Friday, October 8, 2010

[Replication] Better alternative to Replication Monitor

(changed on 10/12 - whoever woulda thunk a UNION wouldn't work right?)
(changed 2013/04/08 - realized the IF cluase at the end can be confusion if you're not thinking about it)
I hate Replication Monitor. Here are a couple of scripts that, embedded in a job, will better help you. I need to look into adding special alerts for replication. The main change is the addition of snapshot detection - we have missed issues because it somehow falls offline and doesn't notify that the replication needs to be snapshotted.

IF OBJECT_ID('tempdb.dbo.##replication_command_count') IS NOT NULL
DROP TABLE ##replication_command_count

SELECT SUM(UndelivCmdsInDistDB) AS UndelivCmdsInDistDB,
INTO ##replication_command_count
FROM MSDistribution_Status
INNER JOIN MSdistribution_agents
ON MSDistribution_Status.agent_id =
WHERE UndelivCmdsInDistDB > 0 --show only those that are backed up
AND subscriber_id > 0 --negative subscriber IDs are for those that
--always have a snapshot ready
AND MSdistribution_agents.NAME NOT LIKE '%someserverthatthrowserrors%'
GROUP BY MSdistribution_agents.NAME, MSdistribution_agents.publication,
subscriber_id, subscriber_db
ORDER BY MSdistribution_agents.NAME, MSdistribution_agents.publication,
subscriber_id, subscriber_db
--delete from stuff we don't care about; refinement of the IF below.
delete from ##replication_command_count
where publication = 'a_busy_publication'
and UndelivCmdsInDistDB <>

delete from ##replication_command_count
where (publication = 'abusypublication' and UndelivCmdsInDistDB < 100)
--add whatever OR clauses are needed to remove your busy publications

IF (SELECT MAX(UndelivCmdsInDistDB) FROM ##replication_command_count) > 20
--20 is our threshold to send
EXEC msdb.dbo.sp_send_dbmail
@profile_name = 'DBA',
@recipients = '',
@subject = 'Replication is backed up on REPL server'
,@query = 'select left(publication,20), convert(varchar(9),UndelivCmdsInDistDB), left(name,90) from ##replication_command_count where UndelivCmdsInDistDB >20'
,@query_result_header = 0

Script 2 -- looking for errors.

IF OBJECT_ID('tempdb.dbo.##replication_errors') IS NOT NULL
DROP TABLE ##replication_errors

error_messages.comments AS ERROR
INTO ##replication_errors
--find our errors; note that a runstatus 3 can be the last message, even if it's actually idle and good
(SELECT agent_id, MAX(TIME) AS last_time FROM distribution.dbo.MSdistribution_history with (nolock)
WHERE (runstatus IN (3,5,6) AND comments NOT LIKE '%were delivered.' AND comments NOT LIKE '
or (runstatus = 4 and comments like 'The initial snapshot%is not yet available.') GROUP BY agent_id) errors
(SELECT agent_id, MAX(TIME) AS last_time FROM distribution.dbo.MSdistribution_history with (nolock)
WHERE (runstatus IN (1,2,4) and comments not like 'The initial snapshot %is not yet available.')
OR comments LIKE '%were delivered.' GROUP BY agent_id
) clean
ON errors.agent_id = clean.agent_id
--grab the agent information
LEFT OUTER JOIN distribution.dbo.MSdistribution_agents agentinfo
ON = errors.agent_id
--and the actual message we'd see in the monitor
LEFT OUTER JOIN distribution.dbo.MSdistribution_history error_messages
ON error_messages.agent_id = errors.agent_id AND error_messages.time = errors.last_time
where errors.last_TIME > ISNULL(clean.last_time,'20100101')
AND comments NOT LIKE '%TCP Provider%'
AND comments NOT LIKE '%Delivering replicated transactions%'
AND name NOT LIKE '%suckyservername%'

IF (SELECT COUNT(*) FROM ##replication_errors) > 0
EXEC msdb.dbo.sp_send_dbmail
@profile_name = 'dba',
@recipients = '',
@subject = 'Replication errors on REPL server'
,@query = 'select * from ##replication_errors'
,@query_result_header = 0

DROP TABLE ##replication_errors

No comments: