However, we run a LOT of SSIS jobs, multiple per minute, and so the query now takes over 30 seconds to return. Which sucks, because SQL Sentry's "execute SQL" has a hard cut-off of 30 seconds. So we stopped getting the useful emails. I wanted to fix that.
First attempt: go look in sysjobhistory for failed jobs, and then walk through the jobs that failed and run my SSISDB code explicitly.
Enjoy!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE PROCEDURE ssis_job_failure__find_failures_to_report | |
AS | |
/* | |
2016/04/13 MDB - 1.00 - Since SQLSentry doesn't always work with the stored procedure (performance issues because we have so many jobs running frequently) | |
, we're trying this. Scan sysjobhistory for errors. Run each job failure through the SSISDB SP I wrote. | |
Set the job to run every minute. | |
this is a companion to https://gist.github.com/mbourgon/5502095, which will send useful SSISDB emails when a job fails. | |
CREATE TABLE ssisdb_failure_email_last_instance_id_checked (id INT IDENTITY PRIMARY KEY, instance_id int, insert_datetime DATETIME) | |
--109911 | |
INSERT INTO | |
ssisdb_failure_email_last_instance_id_checked (instance_id, insert_datetime) VALUES (6354527, GETDATE()) | |
that value should be a recent instance_id - basically that's the starting point for where it checks. | |
*/ | |
DECLARE @min_instance_id INT, @max_instance_id INT, @last_instance_checked INT | |
DECLARE @list_of_jobs_to_send_emails_for TABLE (id int IDENTITY, job_name sysname) | |
--get the last instance_id inserted in MSDB sysjobhistory, and search all the newer ones for errors. | |
SELECT @last_instance_checked = instance_id FROM ssisdb_failure_email_last_instance_id_checked | |
WHERE id = (SELECT MAX(id) FROM ssisdb_failure_email_last_instance_id_checked) | |
SELECT @last_instance_checked | |
--this sets a bound - we're not looking at anything else after the records we're grabbing now. | |
SELECT @min_instance_id = MIN(instance_id), @max_instance_id = MAX(instance_id) FROM msdb.dbo.sysjobhistory | |
WHERE instance_id > @last_instance_checked --not >=, because then it would process the same row on two subsequent runs. | |
--Get all the jobs that have failed. | |
--Insert into a temp table so that we can handle each. | |
--Doing an EXISTS instead of INNER JOIN to make the logic as simple as possible. | |
INSERT INTO @list_of_jobs_to_send_emails_for | |
(job_name) | |
SELECT sj.name FROM msdb.dbo.sysjobs sj WHERE EXISTS | |
( | |
SELECT 1 FROM msdb.dbo.sysjobhistory sjh WHERE step_id = 0 | |
AND message LIKE '%failed%' AND sjh.job_id = sj.job_id | |
--examine the entire range that we got above. Hence the <= and >=. | |
AND sjh.instance_id >= @min_instance_id AND sjh.instance_id <=@max_instance_id | |
) | |
--This way the details show up in the job log, so we know what _should_ have occurred. | |
SELECT * FROM @list_of_jobs_to_send_emails_for | |
--now go through each, in order, and run the SSISDB email proc for each. | |
declare @min INT, @max INT, @msg NVARCHAR(50), @job_to_do VARCHAR(500) | |
SELECT @min = MIN(id), @max = MAX(id) FROM @list_of_jobs_to_send_emails_for | |
while @min <= @max | |
BEGIN | |
SET @job_to_do = NULL | |
--log when & what we're doing. | |
SET @msg = (select CONVERT(VARCHAR(20), GETDATE(), 120)) | |
RAISERROR (@msg, 0, 1) WITH NOWAIT | |
SELECT @job_to_do = job_name FROM @list_of_jobs_to_send_emails_for WHERE id = @min | |
PRINT 'EXEC ssis_job_failure_info_fromjobname_SqlSentry @job_name = ' + @job_to_do -- varchar(500) | |
EXEC ssis_job_failure_info_fromjobname_SqlSentry @job_name = @job_to_do | |
set @min = @min+1 | |
END | |
--In case there aren't any jobs run between instances of this job, since it'd be NULL (hit in testing) | |
IF @max_instance_id IS NOT NULL | |
BEGIN | |
INSERT INTO ssisdb_failure_email_last_instance_id_checked | |
(instance_id, insert_datetime) | |
VALUES (@max_instance_id, | |
GETDATE() | |
) | |
END |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE PROCEDURE ssis_job_failure__find_failures_to_report | |
AS | |
/* | |
2016/04/13 MDB - 1.00 - Since SQLSentry doesn't always work with the stored procedure (performance issues because we have so many jobs running frequently) | |
, we're trying this. Scan sysjobhistory for errors. Run each job failure through the SSISDB SP I wrote. | |
Set the job to run every minute. | |
this is a companion to https://gist.github.com/mbourgon/5502095, which will send useful SSISDB emails when a job fails. | |
CREATE TABLE ssisdb_failure_email_last_instance_id_checked (id INT IDENTITY PRIMARY KEY, instance_id int, insert_datetime DATETIME) | |
--109911 | |
INSERT INTO | |
ssisdb_failure_email_last_instance_id_checked (instance_id, insert_datetime) VALUES (6354527, GETDATE()) | |
that value should be a recent instance_id - basically that's the starting point for where it checks. | |
*/ | |
DECLARE @min_instance_id INT, @max_instance_id INT, @last_instance_checked INT | |
DECLARE @list_of_jobs_to_send_emails_for TABLE (id int IDENTITY, job_name sysname) | |
--get the last instance_id inserted in MSDB sysjobhistory, and search all the newer ones for errors. | |
SELECT @last_instance_checked = instance_id FROM ssisdb_failure_email_last_instance_id_checked | |
WHERE id = (SELECT MAX(id) FROM ssisdb_failure_email_last_instance_id_checked) | |
SELECT @last_instance_checked | |
--this sets a bound - we're not looking at anything else after the records we're grabbing now. | |
SELECT @min_instance_id = MIN(instance_id), @max_instance_id = MAX(instance_id) FROM msdb.dbo.sysjobhistory | |
WHERE instance_id > @last_instance_checked --not >=, because then it would process the same row on two subsequent runs. | |
--Get all the jobs that have failed. | |
--Insert into a temp table so that we can handle each. | |
--Doing an EXISTS instead of INNER JOIN to make the logic as simple as possible. | |
INSERT INTO @list_of_jobs_to_send_emails_for | |
(job_name) | |
SELECT sj.name FROM msdb.dbo.sysjobs sj WHERE EXISTS | |
( | |
SELECT 1 FROM msdb.dbo.sysjobhistory sjh WHERE step_id = 0 | |
AND message LIKE '%failed%' AND sjh.job_id = sj.job_id | |
--examine the entire range that we got above. Hence the <= and >=. | |
AND sjh.instance_id >= @min_instance_id AND sjh.instance_id <=@max_instance_id | |
) | |
--This way the details show up in the job log, so we know what _should_ have occurred. | |
SELECT * FROM @list_of_jobs_to_send_emails_for | |
--now go through each, in order, and run the SSISDB email proc for each. | |
declare @min INT, @max INT, @msg NVARCHAR(50), @job_to_do VARCHAR(500) | |
SELECT @min = MIN(id), @max = MAX(id) FROM @list_of_jobs_to_send_emails_for | |
while @min <= @max | |
BEGIN | |
SET @job_to_do = NULL | |
--log when & what we're doing. | |
SET @msg = (select CONVERT(VARCHAR(20), GETDATE(), 120)) | |
RAISERROR (@msg, 0, 1) WITH NOWAIT | |
SELECT @job_to_do = job_name FROM @list_of_jobs_to_send_emails_for WHERE id = @min | |
PRINT 'EXEC ssis_job_failure_info_fromjobname_SqlSentry @job_name = ' + @job_to_do -- varchar(500) | |
EXEC ssis_job_failure_info_fromjobname_SqlSentry @job_name = @job_to_do | |
set @min = @min+1 | |
END | |
--In case there aren't any jobs run between instances of this job, since it'd be NULL (hit in testing) | |
IF @max_instance_id IS NOT NULL | |
BEGIN | |
INSERT INTO ssisdb_failure_email_last_instance_id_checked | |
(instance_id, insert_datetime) | |
VALUES (@max_instance_id, | |
GETDATE() | |
) | |
END |
1 comment:
And let me add that I haven't fixed the problem that one of the regulars here has pointed out. I need to, yes, just haven't gotten back to it yet.
Post a Comment