AI task test, data cleaning
Well, so much for AI extracting information from e-mails.
This is why I have my doubts about AI. I'm using Gmail Cloud HQ to separate out my mainly LinkedIn (with some other items) job search from all the spam I get. I'm then pushing that into an Azure SQL database. Using three separate queries looking for "%appli%" from "noreply@" in Gmail (because GMAIL doesn't actually support wildcards) I'm pulling in three slightly different CSV files into SQL, cleaning them, and putting them into my job search. This is *AFTER* CloudHQ used "AI parsing" to fill certain fields to begin with.
Here's the script:
/****** Object: Table [dbo].[CombinedCleand] Script Date: 7/22/2025 10:59:29 AM ******/
IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[CombinedCleand]') AND type in (N'U'))
DROP TABLE [dbo].[CombinedCleand]
GO
/****** Object: Table [dbo].[CombinedCleand] Script Date: 7/22/2025 10:59:29 AM ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[CombinedCleand](
[ID] [bigint] IDENTITY(1,1) NOT NULL,
[From] [nvarchar](255) NOT NULL,
[To] [nvarchar](255) NOT NULL,
[Subject] [nvarchar](max) NOT NULL,
[Date_Time_Sent] [datetime2](7) NULL,
[Date_Time_Received] [datetime2](7) NULL,
[Email_Text] [nvarchar](Max) NULL,
[person] [nvarchar](max) NULL,
[contact] [nvarchar](50) NULL,
[results] [nvarchar](200) NULL,
[Location] [nvarchar](1000) NULL,
[Application_date_date] [nvarchar](255) NULL,
[Position] [nvarchar](1000) NULL,
[Company] [nvarchar](50) NULL
) ON [PRIMARY]
GO
INSERT INTO [dbo].[CombinedCleand]
([From]
,[To]
,[Subject]
,[Date_Time_Sent]
,[Date_Time_Received]
,[Email_Text]
,[person]
,[contact]
,[results]
,[Location]
,[Application_date_date]
,[Position]
,[Company])
Select [From]
,[To]
,[Subject]
,[Date_Time_Sent]
,[Date_Time_Received]
,[Email_Text]
,[person]
,'email' as [contact]
,'unknown' as [results]
,[Location]
,[date]
,[job_title] as [Position]
,[Company_Name] as [Company]
from dbo.import1
INSERT INTO [dbo].[CombinedCleand]
([From]
,[To]
,[Subject]
,[Date_Time_Sent]
,[Date_Time_Received]
,[Email_Text]
,[person]
,[contact]
,[results]
,[Location]
,[Application_date_date]
,[Position]
,[Company])
select [From]
,[To]
,[Subject]
,[Date_Time_Sent]
,[Date_Time_Received]
,[Email_Text]
,[person]
,[contact]
,[results]
,[Location]
,[Application_date_date]
,[Position]
,[Company]
from dbo.import2
INSERT INTO [dbo].[CombinedCleand]
([From]
,[To]
,[Subject]
,[Date_Time_Sent]
,[Date_Time_Received]
,[Email_Text]
,[person]
,[contact]
,[results]
,[Location]
,[Application_date_date]
,[Position]
,[Company])
select [From]
,[To]
,[Subject]
,[Date_Time_Sent]
,[Date_Time_Received]
,[Email_Text]
,'unknown' as [person]
,'email' as [contact]
,'unknown' as [results]
,[Location]
,[Date_Time_Sent] as [date]
,[job_title] as [Position]
,[Company]
from dbo.import3
Delete From [dbo].[CombinedCleand]
where Not( [To] in ('seebert42@gmail.com','theodore@seeberfamily.org'))
Delete From [dbo].[CombinedCleand]
where [Subject] like '%looking for a new job?%'
Delete From [dbo].[CombinedCleand]
where [Subject] like '%verification%'
Delete From [dbo].[CombinedCleand]
where [Subject] like 'New jobs similar%'
Delete From [dbo].[CombinedCleand]
where [Email_text] like 'Check out the status of your applications on%'
Delete From [dbo].[CombinedCleand]
where [Subject] like 'Keep track of your application%'
Delete From [dbo].[CombinedCleand]
where [Subject] like 'You look like a good fit for the job%'
Delete From [dbo].[CombinedCleand]
where [Subject] like 'Fast apply%'
Delete From [dbo].[CombinedCleand]
where [Subject] like 'News Feature%'
Delete From [dbo].[CombinedCleand]
where [Subject] like '%apply now%'
Delete From [dbo].[CombinedCleand]
where [Subject] like '%to complete your%'
Delete From [dbo].[CombinedCleand]
where [Subject] like '%want you!%'
Delete From [dbo].[CombinedCleand]
where [Subject] like '%Lead at%'
Delete From [dbo].[CombinedCleand]
where [Subject] like '%Software Design Princi%'
Delete From [dbo].[CombinedCleand]
where [Subject] like '%Business Loan%'
Delete From [dbo].[CombinedCleand]
where [Subject] like '%password%'
Delete From [dbo].[CombinedCleand]
where [Subject] like '%Matching%'
Update [dbo].[CombinedCleand] set Date_time_received=Convert(datetime,substring(Application_date_date,13,10))
where Date_time_Received Is Null
Update [dbo].[CombinedCleand] set Application_date_date=Date_time_received
where Application_date_date Is Null
Update [dbo].[CombinedCleand] set person=contact where person like '%Theodore%'
Update [dbo].[CombinedCleand] set person=Isnull(IsNull(person,contact),'email')
Update [dbo].[CombinedCleand] set contact=Isnull(IsNull(contact,person),'email')
Update [dbo].[CombinedCleand] set Location=Left(substring(results, CharIndex('Location:',results)+9,50),CharIndex('Appl',substring(results, CharIndex('Location:',results)+9,50))-1)
where Location is Null and Results Like 'Location:%'
Update [dbo].[CombinedCleand] set Location=substring(results, CharIndex('Location:',results)+9,50)
where Location is Null and Results Like 'Appli%'
Update [dbo].[CombinedCleand] set Location=substring(results, CharIndex('Location:',results)+9,50)
where Location is Null and Results Like 'Position%'
Update [dbo].[CombinedCleand] set Location='Remote'
where Location is Null
Update [dbo].[CombinedCleand] set Position=Case when CharIndex('Company', results)>-1 then Left(substring(results, CharIndex('Position:',results)+9,100),CharIndex('Company',substring(results, CharIndex('Position:',results)+9,100))-1)
else substring(results, CharIndex('Position:',results)+9,50) end
where Position is Null
and Results Like '%Position%'
Update [dbo].[CombinedCleand] set results='applied' where (results is null) and Email_Text Like '%was sent%'
Update [dbo].[CombinedCleand] set results='applied' where [Subject] Like 'Thank%'
Update [dbo].[CombinedCleand] set results='applied' where [results] Like '%was sent%'
Update [dbo].[CombinedCleand] set results='viewed' where [results] Like '%view%'
Update [dbo].[CombinedCleand] set results='rejected' where [results] Like '%will not be%'
Update [dbo].[CombinedCleand] set results='rejected' where [results] Like '%not moving forward%'
INSERT INTO [dbo].[JobSearch]
([Date]
,[Company]
,[Results]
,[Person]
,[Position]
,[Contact type]
,[Location])
Select Date_Time_Received as date,
Company,
results,
IsNull(contact, person) as Person,
Position,
'Linked In' as contactType,
[location]
From [dbo].[CombinedCleand]
Declare @DeleteMe Table (ID bigint,rnk int)
Insert Into @DeleteMe
Select ID, rnk
from (
Select ID, count(*) over (Partition by Date, Company, Results, Location Order By Date ) as rnk
from dbo.Jobsearch)x
where x.rnk>1
Delete from dbo.JobSearch where ID in (Select ID from @Deleteme)
Comments