Search This Blog

Tuesday, August 23, 2011

What is Predictive Analytics and how does it relate to BI?


The "predictive" approach stems from modeling data and storing in such a way an analyst/statistician can use it to make those decisions "openly" rather than boxing a decision maker into a silo of data which can limit them. You'll see a tool such as SAS, SPSS (and there are other approaches) used to allow a person to perform"what-ifs" that'll evolve the data to help them drive to conclusions.

Traditional BI reporting does a fine job at aggregating and showing how a company has done to date but lacks the ability to make decisions on what one should do. Predictive analytics builds on the practice of data mining and the principles of building decisions on what's in front of a company based on existing data, market conditions and the driving conclusions of model scores.

Tuesday, April 5, 2011

db backups

USE [Operations]
GO

/****** Object:  StoredProcedure [dbo].[db_backups]    Script Date: 04/05/2011 15:13:47 ******/
SET ANSI_NULLS ON
GO

SET QUOTED_IDENTIFIER ON
GO

--exec db_backups 'model'
CREATE procedure [dbo].[db_backups]
@db_name varchar(50)
AS
-->declare working variables
DECLARE
@db nvarchar(255),
@cleanup varchar(4000),
@device nvarchar(4000),
@ddevice nvarchar(4000),
@backup varchar(4000),
@bkp_path varchar(1000),
@bkp_device varchar(1000),
@bkp_l_device varchar(2000),
@bkp_sql nvarchar(4000)

-->@bkp_l_device default setting
set @bkp_l_device=null

-->bkp file cleaner
declare cleaner cursor  for select bkp_partition_path  from operations.dbo.ADMIN_db_bkp_meta BM
inner join operations.dbo.ADMIN_db_bkp_partitions BP on BM.bkp_partition_id=BP.bkp_partition_id
where [db_name] =(''+@db_name+'')
order by 1

OPEN cleaner

FETCH NEXT FROM cleaner INTO @bkp_path
WHILE @@FETCH_STATUS = 0
BEGIN
-->purge old backup files.  full purge, no dynamic bkp file names
SET @cleanup='del /Q ' + '"'+@bkp_path+''+ @db_name + '*.bak"'
PRINT @cleanup
exec master..xp_cmdshell @cleanup
FETCH NEXT FROM cleaner INTO @bkp_path
END
CLOSE cleaner
DEALLOCATE cleaner

-->build backup device(s)
declare devices cursor  for select BP.bkp_partition_id  from operations.dbo.ADMIN_db_bkp_meta BM
inner join operations.dbo.ADMIN_db_bkp_partitions BP on BM.bkp_partition_id=BP.bkp_partition_id
where BM.[db_name] =(''+@db_name+'')
order by 1

OPEN devices

FETCH NEXT FROM devices INTO @bkp_device
WHILE @@FETCH_STATUS = 0
BEGIN
-->build bkp devices
set @bkp_path=(select  bkp_partition_path from operations.dbo.ADMIN_db_bkp_partitions BP
where bkp_partition_id=@bkp_device)


SET @device='exec master..sp_addumpdevice ''disk'', ''bkp_'+@db_name+'_device_'+@bkp_device+''', '''+@bkp_path+''+@db_name++@bkp_device+'.bak'''--+@bkp_device+
print @device

SET @bkp_l_device = --N'EXECUTE ' + @proc_name
CASE WHEN @bkp_l_device IS NOT NULL
THEN @bkp_l_device + ',' + 'bkp_'+@db_name+'_device_'+@bkp_device+''
ELSE 'bkp_'+@db_name+'_device_'+@bkp_device+''
END
exec sp_executesql @device
FETCH NEXT FROM devices INTO @bkp_device
END
CLOSE devices
DEALLOCATE devices
print @bkp_l_device

-->backup the database
set @bkp_sql='BACKUP DATABASE ['+@db_name+'] TO
'+@bkp_l_device+' WITH NOFORMAT, INIT,
NAME = N'''+@db_name+'-Full Database Backup'', SKIP, NOREWIND, NOUNLOAD, COMPRESSION,  STATS = 10'
print @bkp_sql
exec sp_executesql @bkp_sql

-->drop backup device(s)
declare ddevices cursor  for select BP.bkp_partition_id  from operations.dbo.ADMIN_db_bkp_meta BM
inner join operations.dbo.ADMIN_db_bkp_partitions BP on BM.bkp_partition_id=BP.bkp_partition_id
where BM.[db_name] =(''+@db_name+'')
order by 1

OPEN ddevices

FETCH NEXT FROM ddevices INTO @bkp_device
WHILE @@FETCH_STATUS = 0
BEGIN
-->drop bkp devices

SET @ddevice='exec sp_dropdevice bkp_'+@db_name+'_device_'+@bkp_device+''
print @ddevice

exec sp_executesql @ddevice
FETCH NEXT FROM ddevices INTO @bkp_device
END
CLOSE ddevices
DEALLOCATE ddevices


/*scripts for meta tables

CREATE TABLE operations.[dbo].[ADMIN_db_bkp_meta](
[db_name] [varchar](50) NOT NULL,
[bkp_partition_id] [int] NOT NULL
) ON [PRIMARY]


CREATE TABLE operations.[dbo].[ADMIN_db_bkp_partitions](
[bkp_partition_id] [int] NOT NULL,
[bkp_partition_path] [varchar](max) NOT NULL
) ON [PRIMARY]

insert ADMIN_db_bkp_partitions
(bkp_partition_id,bkp_partition_path)
values(1,
'\\HQFINSQL90\E$\backup\')

insert ADMIN_db_bkp_meta
(db_name,bkp_partition_id)
values ('model','1')
*/
GO

Friday, March 25, 2011

SQL Server: create table indexes from meta data

How to create a meta data driven table index routine using T-SQL.

1)  Metadata container


CREATE TABLE [dbo].[ETL_INDEXES](
[TABLE_NAME] [varchar](100) NOT NULL,
[INDEX_TYPE] [varchar](2) NOT NULL,
[INDEX_NAME] [varchar](100) NOT NULL,
[DROP_FL] [bit] NOT NULL,
[CREATE_FL] [bit] NOT NULL,
[REBUILD_FL] [bit] NOT NULL,
[COLUMNS] [varchar](max) NOT NULL
) ON [PRIMARY]

2)  Create T SQL routine to loop through meta data table based on passed table variable


CREATE PROCEDURE [dbo].[sp_loop_create_table_indexes]
@table_name varchar(100)
AS

/*
@table_name: target table

*/
-->This stored procedure will read meta data stored in etl_indexes
--and perform creates where flagged

--*declare working variables
DECLARE @cursor CURSOR
DECLARE @vTABLE_NAME varchar(100)
DECLARE @vINDEX_TYPE varchar(2)
DECLARE @vINDEX_NAME varchar(100)
DECLARE @vDROP_FL bit
DECLARE @vCREATE_FL bit
DECLARE @vREBUILD_FL bit
DECLARE @vCOLUMNS varchar(max)

SET @cursor  = CURSOR FOR
Select distinct TABLE_NAME
, INDEX_TYPE
, INDEX_NAME
, DROP_FL
, CREATE_FL
, REBUILD_FL
, COLUMNS
from operations.dbo.ETL_INDEXES where TABLE_NAME=@table_name
OPEN @cursor

FETCH NEXT FROM @cursor INTO  @vTABLE_NAME ,@vINDEX_TYPE ,@vINDEX_NAME
 ,@vDROP_FL ,@vCREATE_FL ,@vREBUILD_FL  ,@vCOLUMNS

WHILE @@FETCH_STATUS <> -1
BEGIN

exec dbo.sp_create_table_indexes @vTABLE_NAME ,@vINDEX_TYPE ,@vINDEX_NAME
 ,@vDROP_FL ,@vCREATE_FL ,@vREBUILD_FL  ,@vCOLUMNS

FETCH NEXT FROM @cursor INTO @vTABLE_NAME ,@vINDEX_TYPE ,@vINDEX_NAME
 ,@vDROP_FL ,@vCREATE_FL ,@vREBUILD_FL  ,@vCOLUMNS
END
CLOSE @cursor  
DEALLOCATE @cursor
GO

3) Create T SQL routine to create indexes (sub routine to previous step)


CREATE PROCEDURE [dbo].[sp_create_table_indexes]
@table_name varchar(100)
,@index_type varchar(2)
,@index_name varchar(100)
,@drop_fl bit
,@create_fl bit
,@rebuild_fl bit
,@columns varchar(max)

AS

/*
@table_name: target table name
@index_type: IX (nonclustered), CX (clustered), UX (unique nonclustered),
PK (primary key constraint, unique clustered)
**NOTE: PK's can only be applied to non-null columns, there can be only
one clustered index, by default PK's are clustered with this script
@index_name: Name of table index
@drop_fl: triggers index drop
@create_fl: triggers index creation
@rebuild_fl: triggers index rebuild
@columns: list of columns to index
*/

-->This stored procedure executes a create index based on passed variables

--*declare working variables
declare @PK varchar(4000)
declare @IX varchar(4000)
declare @CX varchar(4000)
declare @UX varchar(4000)

IF @index_type='PK' BEGIN
set @PK='
ALTER TABLE [dbo].['+@table_name+'] ADD  CONSTRAINT ['+@index_name+'] PRIMARY KEY CLUSTERED
('+@columns+') WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
'
exec (@PK)
END

IF @index_type ='IX' BEGIN
SET @IX='
CREATE NONCLUSTERED INDEX ['+@index_name+'] ON [dbo].['+@table_name+']
('+@columns+')WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
'
exec (@IX)
END

IF @index_type ='UX' BEGIN
SET @UX='
CREATE UNIQUE NONCLUSTERED INDEX ['+@index_name+'] ON [dbo].['+@table_name+']
('+@columns+')WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
'
exec (@UX)
END

IF @index_type ='CX' BEGIN
SET @CX='
CREATE CLUSTERED INDEX ['+@index_name+'] ON [dbo].['+@table_name+']
('+@columns+')WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
'
exec (@CX)
END

4)  Create T SQL drop index procedure (this is sub routine to steps above)


CREATE PROCEDURE [dbo].[sp_drop_table_indexes]
@table_name varchar(100)
,@index_type varchar(2)
,@index_name varchar(100)
,@drop_fl bit
,@create_fl bit
,@rebuild_fl bit
,@columns varchar(max)

AS

/*
@table_name: target table name
@index_type: IX (nonclustered), CX (clustered), UX (unique nonclustered),
PK (primary key constraint, unique clustered)
@index_name: Name of table index
@drop_fl: triggers index drop
@create_fl: triggers index creation
@rebuild_fl: triggers index rebuild
@columns: list of columns to index
*/

-->This stored procedure executes a drop index based on passed variables

--*declare working variables
declare @PK varchar(4000)
declare @I varchar(4000)

IF @index_type='PK' BEGIN
set @PK='
IF  EXISTS (SELECT * FROM sys.indexes WHERE object_id = OBJECT_ID(N''[dbo].['+@table_name+']'') AND name = N'''+@index_name+''')
ALTER TABLE [dbo].['+@table_name+'] DROP CONSTRAINT ['+@index_name+']
'
exec (@PK)
END

IF @index_type IN ('IX','UX','CX') BEGIN
SET @I='
IF  EXISTS (SELECT * FROM sys.indexes WHERE object_id = OBJECT_ID(N''[dbo].['+@table_name+']'') AND name = N'''+@index_name+''')
DROP INDEX ['+@index_name+'] ON [dbo].['+@table_name+'] WITH ( ONLINE = OFF )'
exec (@I)
END

Thursday, March 24, 2011

Informatica: dynamic filelist generation from metadata

Purpose: How to build dynamic filelist for Informatica.
Required:  File and Source metadata

Step1:  Create Informatica mapping.
Step2:  Drop source(s) in mapping that will drive unique filename, source name, filelist name.  One example would be building a filelist of files to load that have not been logged (loaded in ETL system) or run but logged a failure (re-run due to failure).
Step3: I suggest using an aggregator post joiner (if used) to get a list of unique filenames, source names, filelist names.
Step4: Add a sort transform to put data in order by filelist name
Step5: Add function transform to generate output columns.
Needed: full file path (from variable) appended to filename (ex: $$landing_dir||'/'||FILENAME).
Add 3 ports.
vNEW_FILENAME_IND (var): IIF(FILELIST_NAME = vPREV_FILELIST_NAME,0,1)
vPREV_FILELIST_NAME (var): FILELIST_NAME 
NEW_FILENAME_IND (output port): vNEW_FILENAME_IND
Step6: Add transaction control transform
Map the filename+full_path port, New Filename Ind port, Filelist name port
For the TX control condition: IIF(NEW_FILENAME_IND=1,TC_COMMIT_BEFORE,TC_CONTINUE_TRANSACTION)
Step7:  Map to Flat file target
Filelist name port and Filename+path port

Unix shell: preps meta load to a logging repository. collects file meta.

#!/bin/bash/

#ADMIN_fileprep_sh
#preps meta load to a logging repository.  collects file meta.


#Set variables
vPath=/source_directory
nPipe="|"
nVar="##_"

#Set file perms
#chmod -f 666 $vPath/ADMIN_filelist_prep
chmod -f 666 $vPath/ADMIN_filelist_rename

#Rename files and insert into Admin_filelist_prep
while read line
do
mv $vPath/$line $vPath/$nVar`date +"%Y%m%d%H%M%S"`_$line
echo "$nVar""`date +"%Y%m%d%H%M%S"`_$line">>$vPath/ADMIN_filelist_prep
done<$vPath/ADMIN_filelist_rename

#Loop for permissions
while read line
do
chmod -f 666 $vPath/$line
done<$vPath/ADMIN_filelist_prep

#Loop for dos2unix conversions, uncomment if desired
#while read line
#do
#isitdos='cat $vPath/$line|head -1| od -c|grep "\r"|wc -l'
#if [$isitdos > 0]
#then
#dos2unix -k $vPath/$line
#fi
#done<$vPath/ADMIN_filelist_prep

#Create filestats
>$vPath/ADMIN_filestats

#Loop for stats
while read line
do
     nCount=`wc -l<$vPath/$line`
     nBytes=`wc -c<$vPath/$line`
     nDate=`date -r $vPath/$line +%F`
     echo "$line""$nPipe""$nCount""$nPipe""$nBytes""$nPipe""$nDate">>$vPath/ADMIN_filestats
done<$vPath/ADMIN_filelist_prep

#Loop for Bytes
#while read line
#do
#     nBytes=`wc -c<$vPath/$line`
#     echo "$nBytes" "$line">>$vPath/ADMIN_filebytes
#done<$vPath/ADMIN_filelist_prep

#Loop for Dates
#while read line
#do
#     nDate=`date -r $vPath/$line +%F`
#     echo "$line" "$nDate" >>$vPath/ADMIN_filedates
#done<$vPath/ADMIN_filelist_prep

#Adjust permissions for files
#chmod -f 666 $vPath/ADMIN_filestats
#chmod -f 666 $vPath/ADMIN_filebytes
#chmod -f 666 $vPath/ADMIN_filedates

#Move files to prep folder
#while read line
#do
#mv $vPath/$line $vPath/prep/$line
#done<$vPath/ADMIN_filelist_prep

Unix shell: moves file from working dir to target dir. works from working file.


#!/bin/bash/sh
#moves file from working dir to target dir.  works from working file
#ADMIN_filemove_sh

#Declare Variables
vPath=/start_dir
vDest=/end_dir

#Change perms on file
#chmod 666 $vPath/ADMIN_filelist_move

#Move
while read line
do
mv $vPath/$line $vDest/$line
done<$vPath/ADMIN_filelist_move

Unix shell: scans working directory for files to process

#!/bin/bash/
#scans working directory for files to process
#ADMIN_filelist_sh

#Set path variable
vPath=/directory_to_scan

#Remove ADMIN_filelist
rm -f $vPath/ADMIN_filelist
rm -f $vPath/ADMIN_filelist_prep
rm -f $vPath/ADMIN_filelist_rename
rm -f $vPath/ADMIN_filestats
rm -f $vPath/ADMIN_filelist_move


#Rename file in current directory with spaces
find $vPath/. -name "* *"|while read file
do
     #vFile=$vPath/$file
     mv "$file" "`echo "$file"|sed 's/ /##/g'`"
done

#Create ADMIN_filelist
find $vPath/. -maxdepth 1 -type f -printf %f\\n>>$vPath/ADMIN_filelist

#Adjust file permissions
#chmod 666 $vPath/ADMIN_filelist

SQL Server: archive target table

/*purpose:   archive passed target table with date time stamp.  original table is copied as a shell
this is referred to as a table swap*/


ALTER procedure [dbo].[sp_archive_target_table]
@target varchar(100)
--set @target='table_name'
AS
--=====================
declare @CREATE varchar(4000)
declare @RENAME_ORIG varchar(4000)
declare @RENAME_NEW varchar(4000)
declare @append varchar(16)
--========================
set @append=CONVERT(varchar(8),getdate(),112)+REPLACE(CONVERT(VARCHAR(8),GETDATE(),114),':','')
--========================
set @CREATE='
select top 0 *
into '+@target+'_new
from '+@target+''
exec (@CREATE)
--========================
set @RENAME_ORIG='
sp_rename '''+@target+''', '''+@target+''+'_'+''+@append+'''
'
exec (@RENAME_ORIG)

set @RENAME_NEW='
sp_rename '''+@target+'_new'', '''+@target+'''
'
exec (@RENAME_NEW)
--=======================