AI task test, data cleaning

 Well, so much for AI extracting information from e-mails.

This is why I have my doubts about AI. I'm using Gmail Cloud HQ to separate out my mainly LinkedIn (with some other items) job search from all the spam I get. I'm then pushing that into an Azure SQL database. Using three separate queries looking for "%appli%" from "noreply@" in Gmail (because GMAIL doesn't actually support wildcards) I'm pulling in three slightly different CSV files into SQL, cleaning them, and putting them into my job search. This is *AFTER* CloudHQ used "AI parsing" to fill certain fields to begin with.


Here's the script:

/****** Object: Table [dbo].[CombinedCleand]  Script Date: 7/22/2025 10:59:29 AM ******/

IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[CombinedCleand]') AND type in (N'U'))

DROP TABLE [dbo].[CombinedCleand]

GO


/****** Object: Table [dbo].[CombinedCleand]  Script Date: 7/22/2025 10:59:29 AM ******/

SET ANSI_NULLS ON

GO


SET QUOTED_IDENTIFIER ON

GO


CREATE TABLE [dbo].[CombinedCleand](

[ID] [bigint] IDENTITY(1,1) NOT NULL,

[From] [nvarchar](255) NOT NULL,

[To] [nvarchar](255) NOT NULL,

[Subject] [nvarchar](max) NOT NULL,

[Date_Time_Sent] [datetime2](7) NULL,

[Date_Time_Received] [datetime2](7) NULL,

[Email_Text] [nvarchar](Max) NULL,

[person] [nvarchar](max) NULL,

[contact] [nvarchar](50) NULL,

[results] [nvarchar](200) NULL,

[Location] [nvarchar](1000) NULL,

[Application_date_date] [nvarchar](255) NULL,

[Position] [nvarchar](1000) NULL,

[Company] [nvarchar](50) NULL

) ON [PRIMARY]

GO


INSERT INTO [dbo].[CombinedCleand]

      ([From]

      ,[To]

      ,[Subject]

      ,[Date_Time_Sent]

      ,[Date_Time_Received]

      ,[Email_Text]

      ,[person]

      ,[contact]

      ,[results]

      ,[Location]

      ,[Application_date_date]

      ,[Position]

      ,[Company])

  Select [From]

      ,[To]

      ,[Subject]

      ,[Date_Time_Sent]

      ,[Date_Time_Received]

      ,[Email_Text]

      ,[person]

      ,'email' as [contact]

      ,'unknown' as [results]

      ,[Location]

      ,[date]

      ,[job_title] as [Position]

      ,[Company_Name] as [Company]

from dbo.import1


INSERT INTO [dbo].[CombinedCleand]

      ([From]

      ,[To]

      ,[Subject]

      ,[Date_Time_Sent]

      ,[Date_Time_Received]

      ,[Email_Text]

      ,[person]

      ,[contact]

      ,[results]

      ,[Location]

      ,[Application_date_date]

      ,[Position]

      ,[Company])

select [From]

      ,[To]

      ,[Subject]

      ,[Date_Time_Sent]

      ,[Date_Time_Received]

      ,[Email_Text]

      ,[person]

      ,[contact]

      ,[results]

      ,[Location]

      ,[Application_date_date]

      ,[Position]

      ,[Company]

  from dbo.import2


INSERT INTO [dbo].[CombinedCleand]

      ([From]

      ,[To]

      ,[Subject]

      ,[Date_Time_Sent]

      ,[Date_Time_Received]

      ,[Email_Text]

      ,[person]

      ,[contact]

      ,[results]

      ,[Location]

      ,[Application_date_date]

      ,[Position]

      ,[Company])

select [From]

      ,[To]

      ,[Subject]

      ,[Date_Time_Sent]

      ,[Date_Time_Received]

      ,[Email_Text]

      ,'unknown' as [person]

      ,'email' as [contact]

      ,'unknown' as [results]

      ,[Location]

      ,[Date_Time_Sent] as [date]

      ,[job_title] as [Position]

      ,[Company]

  from dbo.import3



Delete From [dbo].[CombinedCleand]

where Not( [To] in ('seebert42@gmail.com','theodore@seeberfamily.org'))


Delete From [dbo].[CombinedCleand]

where [Subject] like '%looking for a new job?%'


Delete From [dbo].[CombinedCleand]

where [Subject] like '%verification%'


Delete From [dbo].[CombinedCleand]

where [Subject] like 'New jobs similar%'


Delete From [dbo].[CombinedCleand]

where [Email_text] like 'Check out the status of your applications on%'


Delete From [dbo].[CombinedCleand]

where [Subject] like 'Keep track of your application%'


Delete From [dbo].[CombinedCleand]

where [Subject] like 'You look like a good fit for the job%'


Delete From [dbo].[CombinedCleand]

where [Subject] like 'Fast apply%'


Delete From [dbo].[CombinedCleand]

where [Subject] like 'News Feature%'



Delete From [dbo].[CombinedCleand]

where [Subject] like '%apply now%'


Delete From [dbo].[CombinedCleand]

where [Subject] like '%to complete your%'


Delete From [dbo].[CombinedCleand]

where [Subject] like '%want you!%'


Delete From [dbo].[CombinedCleand]

where [Subject] like '%Lead at%'

Delete From [dbo].[CombinedCleand]

where [Subject] like '%Software Design Princi%'

Delete From [dbo].[CombinedCleand]

where [Subject] like '%Business Loan%'


Delete From [dbo].[CombinedCleand]

where [Subject] like '%password%'


Delete From [dbo].[CombinedCleand]

where [Subject] like '%Matching%'


Update [dbo].[CombinedCleand] set Date_time_received=Convert(datetime,substring(Application_date_date,13,10))

where Date_time_Received Is Null


Update [dbo].[CombinedCleand] set Application_date_date=Date_time_received

where Application_date_date Is Null


Update [dbo].[CombinedCleand] set person=contact where person like '%Theodore%' 


Update [dbo].[CombinedCleand] set person=Isnull(IsNull(person,contact),'email')


Update [dbo].[CombinedCleand] set contact=Isnull(IsNull(contact,person),'email')


Update [dbo].[CombinedCleand] set Location=Left(substring(results, CharIndex('Location:',results)+9,50),CharIndex('Appl',substring(results, CharIndex('Location:',results)+9,50))-1)

where Location is Null and Results Like 'Location:%'


Update [dbo].[CombinedCleand] set Location=substring(results, CharIndex('Location:',results)+9,50)

where Location is Null and Results Like 'Appli%'


Update [dbo].[CombinedCleand] set Location=substring(results, CharIndex('Location:',results)+9,50)

where Location is Null and Results Like 'Position%'


Update [dbo].[CombinedCleand] set Location='Remote'

where Location is Null



Update [dbo].[CombinedCleand] set Position=Case when CharIndex('Company', results)>-1 then Left(substring(results, CharIndex('Position:',results)+9,100),CharIndex('Company',substring(results, CharIndex('Position:',results)+9,100))-1)  

else substring(results, CharIndex('Position:',results)+9,50) end

where Position is Null

and Results Like '%Position%'



Update [dbo].[CombinedCleand] set results='applied' where (results is null) and Email_Text Like '%was sent%' 


Update [dbo].[CombinedCleand] set results='applied' where [Subject] Like 'Thank%' 



Update [dbo].[CombinedCleand] set results='applied' where [results] Like '%was sent%' 

Update [dbo].[CombinedCleand] set results='viewed' where [results] Like '%view%' 

Update [dbo].[CombinedCleand] set results='rejected' where [results] Like '%will not be%' 


Update [dbo].[CombinedCleand] set results='rejected' where [results] Like '%not moving forward%' 





INSERT INTO [dbo].[JobSearch]

      ([Date]

      ,[Company]

      ,[Results]

      ,[Person]

      ,[Position]

      ,[Contact type]

      ,[Location])

  Select Date_Time_Received as date,

  Company,

  results,

  IsNull(contact, person) as Person,

  Position,

  'Linked In' as contactType,

  [location]

From [dbo].[CombinedCleand]


Declare @DeleteMe Table (ID bigint,rnk int)


Insert Into @DeleteMe 

Select ID, rnk 

from (

Select ID, count(*) over (Partition by Date, Company, Results, Location Order By Date ) as rnk

from dbo.Jobsearch)x

where x.rnk>1


Delete from dbo.JobSearch where ID in (Select ID from @Deleteme)

Comments

Popular posts from this blog

Father claims 15 year old girl is capable of consent in Lesbian Rape Case

Fundamentalist Atheism

Call them what you like, Broken Families are still a TRAGEDY!