Saturday, April 18, 2020

SQL SERVER DATA PROFILING WITH JUST QUERY

 Data Profiling is the first step in understanding data more.  In any data-related project which involves SQL server but not having any data profiling tools below query helpful to analysis.



Metod 1: If you don't have access to create table


SET NOCOUNT ON
SET ANSI_WARNINGS OFF
-- Suppresses the "Null value is eliminated by an aggregate..." warning

DECLARE @TableName sysname
DECLARE @TableType VARCHAR(15)
DECLARE @Schema sysname
DECLARE @Catalog sysname
DECLARE @ColumnName sysname
DECLARE @OrdinalPosition INT
DECLARE @DataType sysname
DECLARE @char INT
DECLARE @num TINYINT
DECLARE @date SMALLINT
DECLARE @sql VARCHAR(MAX)
DECLARE @stmtString VARCHAR(MAX)
DECLARE @stmtNum VARCHAR(MAX)
DECLARE @stmtDate VARCHAR(MAX)
DECLARE @stmtOther VARCHAR(MAX)
DECLARE @stmtUnsup VARCHAR(MAX)
DECLARE @q CHAR(1)
 -- single quote
DECLARE @qq CHAR(2)
 -- double quote

-----------------------------------------------------------------------------------
-- Table variable to collect the final results
-----------------------------------------------------------------------------------
DECLARE @Results TABLE
    (
      [Schema] sysname ,
      [Catalog] sysname ,
      [Table Name] sysname ,
      [Table Type] VARCHAR(10) ,
      [Column Name] sysname ,
      [Seq] INT ,
      [Data Type] sysname ,
      [Avg Len/Val] NUMERIC ,
      [Min Len/Val] NUMERIC ,
      [Max Len/Val] NUMERIC ,
      [Min Date] DATETIME ,
      [Max Date] DATETIME ,
      [Distinct Values] NUMERIC ,
      [Num NULL] NUMERIC
    )

-----------------------------------------------------------------------------------
-- quote char
-----------------------------------------------------------------------------------
SET @q = ''''
SET @qq = @q + @q

-----------------------------------------------------------------------------------
-- The dynamic replacement strings for various data types
-----------------------------------------------------------------------------------
SET @stmtUnsup = 'null, null, null, null, null, null, 0'
SET @stmtString = 'avg(len([@@replace])), ' + 'min(len([@@replace])), ' + 'max(len([@@replace])), ' + 'null, null, count(distinct [@@replace]), '
    + 'sum(case when [@@replace] is null then 1 else 0 end)'
SET @stmtNum = 'avg(CAST(isnull([@@replace], 0) AS FLOAT)), ' + 'min([@@replace]) AS [Min], ' + 'max([@@replace]) AS [Max], '
    + 'null, null, count(distinct @@replace) AS [Dist Count], ' + 'sum(case when @@replace is null then 1 else 0 end) AS [Num Null]'
SET @stmtDate = 'null, null, null, min([@@replace]) AS [Min], ' + 'max([@@replace]) AS [Max], '
    + 'count(distinct @@replace) AS [Dist Count], ' + 'sum(case when @@replace is null then 1 else 0 end) AS [Num Null]'
SET @stmtOther = 'null, null, null, null, null, count(distinct @@replace) AS [Dist Count], '
    + 'sum(case when @@replace is null then 1 else 0 end) AS [Num Null]'

-----------------------------------------------------------------------------------
-- The cursor to read through the schema.  Change the WHERE clause to control the tables/views used
-----------------------------------------------------------------------------------
DECLARE TableCursor CURSOR
FOR
    SELECT  c.TABLE_SCHEMA ,
            c.TABLE_CATALOG ,
            c.TABLE_NAME ,
            t.TABLE_TYPE ,
            ---c.COLUMN_NAME ,
            case when SUBSTRING(c.COLUMN_NAME,1,1) NOT LIKE '['  then '['+c.COLUMN_NAME+']' else c.COLUMN_NAME end AS COLUMN_NAME,
            c.ORDINAL_POSITION ,
            c.DATA_TYPE ,
            c.CHARACTER_MAXIMUM_LENGTH ,
            c.NUMERIC_PRECISION ,
            c.DATETIME_PRECISION
    FROM    INFORMATION_SCHEMA.COLUMNS c
            INNER JOIN INFORMATION_SCHEMA.TABLES t ON t.TABLE_SCHEMA = c.TABLE_SCHEMA
                                                      AND t.TABLE_NAME = c.TABLE_NAME
    WHERE   c.TABLE_SCHEMA IN ( 'dbo' )  --------------------------------
            AND c.TABLE_NAME  LIKE 'ABC%'  -- <<< Schema, table and view names to analyze go here
            AND t.TABLE_TYPE NOT IN ( 'VIEW' ) --------------------------------
    ORDER BY c.TABLE_NAME ,
            c.ORDINAL_POSITION

OPEN TableCursor

FETCH NEXT
FROM TableCursor
INTO @Schema, @Catalog, @TableName, @TableType, @ColumnName, @OrdinalPosition, @DataType, @char, @num, @date

-----------------------------------------------------------------------------------
-- Process through the database schema
-----------------------------------------------------------------------------------
WHILE @@FETCH_STATUS = 0
    BEGIN
        SET @sql = CASE WHEN @DataType = 'image' THEN @stmtUnsup
                        WHEN @DataType = 'text' THEN @stmtUnsup
                        WHEN @DataType = 'ntext' THEN @stmtUnsup
                        WHEN @char IS NOT NULL THEN @stmtString
                        WHEN @num IS NOT NULL THEN @stmtNum
                        WHEN @date IS NOT NULL THEN @stmtDate
                        ELSE @stmtOther
                   END
        IF (SUBSTRING(@ColumnName,1,1)='[')
        BEGIN
        SET @sql = REPLACE(@sql, '['+'@@replace'+']',@ColumnName)
        END
          SET @sql = REPLACE(@sql, '@@replace',@ColumnName)
                PRINT(@sql)
        IF @sql <> ''
            BEGIN
                SET @Schema = @q + @Schema + @q
                SET @Catalog = @q + @Catalog + @q
                SET @TableName = @q + @TableName + @q
                SET @TableType = @q + @TableType + @q
                SET @ColumnName = @q + REPLACE(@ColumnName, @q, @qq) + @q
                SET @DataType = @q + @DataType + @q

                SET @sql = 'SELECT ' + @Schema + ', ' + @Catalog + ', ' + @TableName + ', ' + @TableType + ', ' + @ColumnName + ', '
                    + CONVERT(VARCHAR(5), @OrdinalPosition) + ', ' + @DataType + ', ' + @sql + ' FROM [' + REPLACE(@Schema, '''', '') + '].['
                    + REPLACE(@TableName, '''', '') + ']'

                PRINT @sql

                INSERT  INTO @Results
                        EXECUTE ( @sql
                               )
            END

        FETCH NEXT
  FROM TableCursor
  INTO @Schema, @Catalog, @TableName, @TableType, @ColumnName, @OrdinalPosition, @DataType, @char, @num, @date
    END

-----------------------------------------------------------------------------------
-- Clean-up
-----------------------------------------------------------------------------------
CLOSE TableCursor

DEALLOCATE TableCursor

-----------------------------------------------------------------------------------
-- Display the results
-----------------------------------------------------------------------------------
SELECT  [Schema] ,
        [Catalog] ,
        [Table Name] ,
        CASE [Table Type]
          WHEN 'BASE TABLE' THEN 'TABLE'
          ELSE [Table Type]
        END AS 'Table Type' ,
        [Column Name] ,
        [Seq] ,
        [Data Type] ,
        [Avg Len/Val] ,
        [Min Len/Val] ,
        [Max Len/Val] ,
        [Min Date] ,
        [Max Date] ,
        [Distinct Values] ,
        [Num NULL]
FROM    @Results
ORDER BY [Table Name] ,
        [Seq] ,
        [Column Name]


-----------------------------------------------------------------------------------
-- Reset
-----------------------------------------------------------------------------------
SET NOCOUNT OFF
SET ANSI_WARNINGS ON;




Metod 2: You can create #temp table



CREATE TABLE #TEMP 
    (
      [Schema] sysname ,
      [Catalog] sysname ,
      [Table Name] sysname ,
      [Table Type] VARCHAR(10) ,
      [Column Name] sysname ,
      [Seq] INT ,
      [Data Type] sysname ,
      [Avg Len/Val] NUMERIC ,
      [Min Len/Val] NUMERIC ,
      [Max Len/Val] NUMERIC ,
      [Min Date] DATETIME ,
      [Max Date] DATETIME ,
      [Distinct Values] NUMERIC ,
      [Num NULL] NUMERIC
    );


SET NOCOUNT ON
SET ANSI_WARNINGS OFF
-- Suppresses the "Null value is eliminated by an aggregate..." warning

DECLARE @TableName sysname
DECLARE @TableType VARCHAR(15)
DECLARE @Schema sysname
DECLARE @Catalog sysname
DECLARE @ColumnName sysname
DECLARE @OrdinalPosition INT
DECLARE @DataType sysname
DECLARE @char INT
DECLARE @num TINYINT
DECLARE @date SMALLINT
DECLARE @sql VARCHAR(MAX)
DECLARE @stmtString VARCHAR(MAX)
DECLARE @stmtNum VARCHAR(MAX)
DECLARE @stmtDate VARCHAR(MAX)
DECLARE @stmtOther VARCHAR(MAX)
DECLARE @stmtUnsup VARCHAR(MAX)
DECLARE @q CHAR(1)
 -- single quote
DECLARE @qq CHAR(2)
 -- double quote
DECLARE @cnt INT=0

-----------------------------------------------------------------------------------
-- Table variable to collect the final results
-----------------------------------------------------------------------------------
DECLARE @Results TABLE
    (
      [Schema] sysname ,
      [Catalog] sysname ,
      [Table Name] sysname ,
      [Table Type] VARCHAR(10) ,
      [Column Name] sysname ,
      [Seq] INT ,
      [Data Type] sysname ,
      [Avg Len/Val] NUMERIC ,
      [Min Len/Val] NUMERIC ,
      [Max Len/Val] NUMERIC ,
      [Min Date] DATETIME ,
      [Max Date] DATETIME ,
      [Distinct Values] NUMERIC ,
      [Num NULL] NUMERIC
    )

-----------------------------------------------------------------------------------
-- quote char
-----------------------------------------------------------------------------------
SET @q = ''''
SET @qq = @q + @q

-----------------------------------------------------------------------------------
-- The dynamic replacement strings for various data types
-----------------------------------------------------------------------------------
SET @stmtUnsup = 'null, null, null, null, null, null, 0'
SET @stmtString = 'avg(len([@@replace])), ' + 'min(len([@@replace])), ' + 'max(len([@@replace])), ' + 'null, null, count(distinct [@@replace]), '
    + 'sum(case when [@@replace] is null then 1 else 0 end)'
SET @stmtNum = 'avg(CAST(isnull([@@replace], 0) AS FLOAT)), ' + 'min([@@replace]) AS [Min], ' + 'max([@@replace]) AS [Max], '
    + 'null, null, count(distinct @@replace) AS [Dist Count], ' + 'sum(case when @@replace is null then 1 else 0 end) AS [Num Null]'
SET @stmtDate = 'null, null, null, min([@@replace]) AS [Min], ' + 'max([@@replace]) AS [Max], '
    + 'count(distinct @@replace) AS [Dist Count], ' + 'sum(case when @@replace is null then 1 else 0 end) AS [Num Null]'
SET @stmtOther = 'null, null, null, null, null, count(distinct @@replace) AS [Dist Count], '
    + 'sum(case when @@replace is null then 1 else 0 end) AS [Num Null]'

-----------------------------------------------------------------------------------
-- The cursor to read through the schema.  Change the WHERE clause to control the tables/views used
-----------------------------------------------------------------------------------
DECLARE TableCursor CURSOR
FOR
    SELECT  c.TABLE_SCHEMA ,
            c.TABLE_CATALOG ,
            c.TABLE_NAME ,
            t.TABLE_TYPE ,
            ---c.COLUMN_NAME ,
            case when SUBSTRING(c.COLUMN_NAME,1,1) NOT LIKE '['  then '['+c.COLUMN_NAME+']' else c.COLUMN_NAME end AS COLUMN_NAME,
            c.ORDINAL_POSITION ,
            c.DATA_TYPE ,
            c.CHARACTER_MAXIMUM_LENGTH ,
            c.NUMERIC_PRECISION ,
            c.DATETIME_PRECISION
    FROM    INFORMATION_SCHEMA.COLUMNS c
            INNER JOIN INFORMATION_SCHEMA.TABLES t ON t.TABLE_SCHEMA = c.TABLE_SCHEMA
                                                      AND t.TABLE_NAME = c.TABLE_NAME
    WHERE   c.TABLE_SCHEMA IN ( 'dbo' )  --------------------------------
            AND c.TABLE_NAME  LIKE 'ABC%'  -- <<< Schema, table and view names to analyze go here
            AND t.TABLE_TYPE NOT IN ( 'VIEW' ) --------------------------------
    ORDER BY c.TABLE_NAME ,
            c.ORDINAL_POSITION

OPEN TableCursor

FETCH NEXT
FROM TableCursor
INTO @Schema, @Catalog, @TableName, @TableType, @ColumnName, @OrdinalPosition, @DataType, @char, @num, @date

PRINT(CAST(@cnt AS VARCHAR) + 'TABLE PROFLING DONE')
-----------------------------------------------------------------------------------
-- Process through the database schema
-----------------------------------------------------------------------------------
WHILE @@FETCH_STATUS = 0
    BEGIN
        SET @sql = CASE WHEN @DataType = 'image' THEN @stmtUnsup
                        WHEN @DataType = 'text' THEN @stmtUnsup
                        WHEN @DataType = 'ntext' THEN @stmtUnsup
                        WHEN @char IS NOT NULL THEN @stmtString
                        WHEN @num IS NOT NULL THEN @stmtNum
                        WHEN @date IS NOT NULL THEN @stmtDate
                        ELSE @stmtOther
                   END
        IF (SUBSTRING(@ColumnName,1,1)='[')
        BEGIN
        SET @sql = REPLACE(@sql, '['+'@@replace'+']',@ColumnName)
        END
          SET @sql = REPLACE(@sql, '@@replace',@ColumnName)
                ---PRINT(@sql)
        IF @sql <> ''
            BEGIN
                SET @Schema = @q + @Schema + @q
                SET @Catalog = @q + @Catalog + @q
                SET @TableName = @q + @TableName + @q
                SET @TableType = @q + @TableType + @q
                SET @ColumnName = @q + REPLACE(@ColumnName, @q, @qq) + @q
                SET @DataType = @q + @DataType + @q

                SET @sql = 'SELECT ' + @Schema + ', ' + @Catalog + ', ' + @TableName + ', ' + @TableType + ', ' + @ColumnName + ', '
                    + CONVERT(VARCHAR(5), @OrdinalPosition) + ', ' + @DataType + ', ' + @sql + ' FROM [' + REPLACE(@Schema, '''', '') + '].['
                    + REPLACE(@TableName, '''', '') + ']'

                PRINT @sql

                INSERT  INTO #TEMP
                        EXECUTE ( @sql
                               )
            END

        FETCH NEXT
  FROM TableCursor
  INTO @Schema, @Catalog, @TableName, @TableType, @ColumnName, @OrdinalPosition, @DataType, @char, @num, @date
    END

-----------------------------------------------------------------------------------
-- Clean-up
-----------------------------------------------------------------------------------
CLOSE TableCursor

DEALLOCATE TableCursor


-----------------------------------------------------------------------------------
-- Reset
-----------------------------------------------------------------------------------
SET NOCOUNT OFF
SET ANSI_WARNINGS ON;

---------

SELECT * FROM #TEMP

This article inspired by https://www.codeproject.com/Articles/1082891/High-Level-Data-Profiler-Script and one problem I faced in real-time projects.
If column names have space between my modified query will work perfectly





Sunday, February 11, 2018

ORACLE CERTIFICATIONS

Or
Download from:

Tuesday, September 26, 2017

SAP BIRTH DAY DASHBOARD




Click on the link to download dashboard template.

https://drive.google.com/file/d/0B0gyBIgZMKM7Nk5JLUZGY0dGcU0/view?usp=sharing

Sunday, April 10, 2016

Sound Enabled SAP Xcelsius DASHBOARD....!!!



Dear Pal,

This is simple and innovative example of sound enabled Dashboard.


Click below URL, download and give your valuable  feedback. My next post will how to make this type of Dashboard. 

You can download this dashboard below.

Download 

Sunday, March 13, 2016

USE SSIS TO ANALYSE YOUR BANK ACCOUNTS

                    LOADING MULTIPLE EXCELS WITH SSIS

      I worked most of time on   SAP BO/ETL technology. I love to solve real time problems with technology. I just started learning SSIS by watching some videos,referring Microsoft official blogs then a thought comes to my mind how can I use this tool to analyse my Bank Account data. I do small investments in share market /PPF/RD so I am sometime confused where my money is going..!!:)

This is what I did.
Downloaded all account statements year wise.

So here come solution how you can load multiple excels with same format into SQL SERVER. 


1)Create table in the SQL SERVER DB(try to capture all columns in bank statement)

Below is the excel which I downloaded form Bank
















Now we are ready to go to SSIS. Open BIDS create new SSIS Project and package.

Logic: Load excels to table one after another and once loading of excel completed move excel to some other folder 


1)Create table with same number of columns

CREATE TABLE [dbo].[MH_SAVINGS_ACC](
[ValueDate] [date] NULL,
[TransactionDate] [date] NULL,
[ChequeNumber] [varchar](200) NULL,
[TransactionRemarks] [varchar](1000) NULL,
[WithdrawalAmount] [numeric](18, 2) NULL,
[DepositAmount] [numeric](18, 2) NULL,
[BalanceAmount] [numeric](18, 2) NULL
) ON [PRIMARY]


2)   Open BIDS create New project and package




















3)Create variables as
InputFolder:C:\Users\Mahantesh\Desktop\Extra\ACC ANALYSYS\---This is where excel files are located
InputFullPath:C:\Users\Mahantesh\Desktop\Extra\ACC ANALYSYS\Test.xls-----Full path selecting sample .xls file
FileName:Tet.xls--Sample xls file
ArchiveFolder:C:\Users\Mahantesh\Desktop\Extra\Archive\--After loadig of the file move to this path
AchiveFullPath:C:\Users\Mahantesh\Desktop\Extra\Archive\Test.xls---After loadig of the file move to this path


4)Drop Foreach loop container to the control flow area set all parameters and the path of the files





5)Drag dataflow into foreachloop and inside dataflow just define the source and destination. 



Set parameters as bellow
OpenRowset:OpTransactionHistory$C13:I--Reason data present between these row and coulmns

$C13 means data starts from C13 (see excel image) "I" means complete I column 

Defining full paths
AchiveFullPath:@[User::ArchiveFolder]+@[User::FileName]

InputFullPath:@[User::InputFolder]+ @[User::FileName]

6)Set delay validation for excel connection and excel source true.

Note:Starting Filename is empty so for next loop we need to pic file don't for get to change 
expression of the Excelfilepath to @[User::InputFolder]+ @[User::FileName] 

7)The drag File system task and set source and destination as below.




For more detail please see the video. Happy learning..:)