This is SQL Server syntax. If you figure this out for Oracle or another database then please post it. getdate() is the current date. datepart(DW, getdate()) will get the day of the week for today. stddev() is the standard deviation for the current avg(). Most numbers should be within one standard deviation of the average.
Next how do you use this information?
You need to find all jobs where the number of rows was greater then the avg(number of rows) + stdev(number of rows).
Code: Select all
select
LAST_RUN.JOB_NAME,
LAST_RUN.LINK_NAME,
LAST_RUN.START_TS,
LAST_RUN.ELAPSED_TIME,
LAST_RUN.ROWS_PER_SEC,
ROWS_MAX.AVG_ROWS_PER_SEC,
convert(decimal(18,0),ROWS_MAX.STD_ROWS_PER_SEC) as STD_ROWS_PER_SEC,
LAST_RUN.ROW_COUNT,
ROWS_MAX.AVG_ROW_COUNT,
convert(decimal(18,0),ROWS_MAX.STD_ROW_COUNT) as STD_ROW_COUNT
from
ETL_ROW_HIST LAST_RUN,
(
select
PROJECT_NAME,
JOB_NAME,
LINK_NAME,
max(START_TS) as MAX_START_TS,
avg(ROW_COUNT) as AVG_ROW_COUNT,
stdev(ROW_COUNT) as STD_ROW_COUNT,
avg(ROWS_PER_SEC) as AVG_ROWS_PER_SEC,
stdev(ROWS_PER_SEC) as STD_ROWS_PER_SEC
from
ETL_ROW_HIST
where
datepart(DW, getdate()) = datepart(DW, convert(datetime, START_TS))
group by
PROJECT_NAME,
JOB_NAME,
LINK_NAME
) ROWS_MAX,
ETL_JOB
where
LAST_RUN.PROJECT_NAME = ETL_JOB.PROJECT_NAME and
LAST_RUN.JOB_NAME = ETL_JOB.JOB_NAME and
LAST_RUN.PROJECT_NAME = ROWS_MAX.PROJECT_NAME and
LAST_RUN.JOB_NAME = ROWS_MAX.JOB_NAME and
LAST_RUN.LINK_NAME = ROWS_MAX.LINK_NAME and
LAST_RUN.START_TS = ROWS_MAX.MAX_START_TS and
LAST_RUN.ROW_COUNT > (ROWS_MAX.AVG_ROW_COUNT + ROWS_MAX.STD_ROW_COUNT)
order by
ETL_JOB.JOB_CATEGORY,
LAST_RUN.JOB_NAME,
LAST_RUN.LINK_NAME,
LAST_RUN.START_TS
;