Data Profiling is the first step in understanding data more. In any data-related project which involves SQL server but not having any data profiling tools below query helpful to analysis.
Metod 1: If you don't have access to create table
SET NOCOUNT ON
SET ANSI_WARNINGS OFF
-- Suppresses the "Null value is eliminated by an aggregate..." warning
DECLARE @TableName sysname
DECLARE @TableType VARCHAR(15)
DECLARE @Schema sysname
DECLARE @Catalog sysname
DECLARE @ColumnName sysname
DECLARE @OrdinalPosition INT
DECLARE @DataType sysname
DECLARE @char INT
DECLARE @num TINYINT
DECLARE @date SMALLINT
DECLARE @sql VARCHAR(MAX)
DECLARE @stmtString VARCHAR(MAX)
DECLARE @stmtNum VARCHAR(MAX)
DECLARE @stmtDate VARCHAR(MAX)
DECLARE @stmtOther VARCHAR(MAX)
DECLARE @stmtUnsup VARCHAR(MAX)
DECLARE @q CHAR(1)
-- single quote
DECLARE @qq CHAR(2)
-- double quote
-----------------------------------------------------------------------------------
-- Table variable to collect the final results
-----------------------------------------------------------------------------------
DECLARE @Results TABLE
(
[Schema] sysname ,
[Catalog] sysname ,
[Table Name] sysname ,
[Table Type] VARCHAR(10) ,
[Column Name] sysname ,
[Seq] INT ,
[Data Type] sysname ,
[Avg Len/Val] NUMERIC ,
[Min Len/Val] NUMERIC ,
[Max Len/Val] NUMERIC ,
[Min Date] DATETIME ,
[Max Date] DATETIME ,
[Distinct Values] NUMERIC ,
[Num NULL] NUMERIC
)
-----------------------------------------------------------------------------------
-- quote char
-----------------------------------------------------------------------------------
SET @q = ''''
SET @qq = @q + @q
-----------------------------------------------------------------------------------
-- The dynamic replacement strings for various data types
-----------------------------------------------------------------------------------
SET @stmtUnsup = 'null, null, null, null, null, null, 0'
SET @stmtString = 'avg(len([@@replace])), ' + 'min(len([@@replace])), ' + 'max(len([@@replace])), ' + 'null, null, count(distinct [@@replace]), '
+ 'sum(case when [@@replace] is null then 1 else 0 end)'
SET @stmtNum = 'avg(CAST(isnull([@@replace], 0) AS FLOAT)), ' + 'min([@@replace]) AS [Min], ' + 'max([@@replace]) AS [Max], '
+ 'null, null, count(distinct @@replace) AS [Dist Count], ' + 'sum(case when @@replace is null then 1 else 0 end) AS [Num Null]'
SET @stmtDate = 'null, null, null, min([@@replace]) AS [Min], ' + 'max([@@replace]) AS [Max], '
+ 'count(distinct @@replace) AS [Dist Count], ' + 'sum(case when @@replace is null then 1 else 0 end) AS [Num Null]'
SET @stmtOther = 'null, null, null, null, null, count(distinct @@replace) AS [Dist Count], '
+ 'sum(case when @@replace is null then 1 else 0 end) AS [Num Null]'
-----------------------------------------------------------------------------------
-- The cursor to read through the schema. Change the WHERE clause to control the tables/views used
-----------------------------------------------------------------------------------
DECLARE TableCursor CURSOR
FOR
SELECT c.TABLE_SCHEMA ,
c.TABLE_CATALOG ,
c.TABLE_NAME ,
t.TABLE_TYPE ,
---c.COLUMN_NAME ,
case when SUBSTRING(c.COLUMN_NAME,1,1) NOT LIKE '[' then '['+c.COLUMN_NAME+']' else c.COLUMN_NAME end AS COLUMN_NAME,
c.ORDINAL_POSITION ,
c.DATA_TYPE ,
c.CHARACTER_MAXIMUM_LENGTH ,
c.NUMERIC_PRECISION ,
c.DATETIME_PRECISION
FROM INFORMATION_SCHEMA.COLUMNS c
INNER JOIN INFORMATION_SCHEMA.TABLES t ON t.TABLE_SCHEMA = c.TABLE_SCHEMA
AND t.TABLE_NAME = c.TABLE_NAME
WHERE c.TABLE_SCHEMA IN ( 'dbo' ) --------------------------------
AND c.TABLE_NAME LIKE 'ABC%' -- <<< Schema, table and view names to analyze go here
AND t.TABLE_TYPE NOT IN ( 'VIEW' ) --------------------------------
ORDER BY c.TABLE_NAME ,
c.ORDINAL_POSITION
OPEN TableCursor
FETCH NEXT
FROM TableCursor
INTO @Schema, @Catalog, @TableName, @TableType, @ColumnName, @OrdinalPosition, @DataType, @char, @num, @date
-----------------------------------------------------------------------------------
-- Process through the database schema
-----------------------------------------------------------------------------------
WHILE @@FETCH_STATUS = 0
BEGIN
SET @sql = CASE WHEN @DataType = 'image' THEN @stmtUnsup
WHEN @DataType = 'text' THEN @stmtUnsup
WHEN @DataType = 'ntext' THEN @stmtUnsup
WHEN @char IS NOT NULL THEN @stmtString
WHEN @num IS NOT NULL THEN @stmtNum
WHEN @date IS NOT NULL THEN @stmtDate
ELSE @stmtOther
END
IF (SUBSTRING(@ColumnName,1,1)='[')
BEGIN
SET @sql = REPLACE(@sql, '['+'@@replace'+']',@ColumnName)
END
SET @sql = REPLACE(@sql, '@@replace',@ColumnName)
PRINT(@sql)
IF @sql <> ''
BEGIN
SET @Schema = @q + @Schema + @q
SET @Catalog = @q + @Catalog + @q
SET @TableName = @q + @TableName + @q
SET @TableType = @q + @TableType + @q
SET @ColumnName = @q + REPLACE(@ColumnName, @q, @qq) + @q
SET @DataType = @q + @DataType + @q
SET @sql = 'SELECT ' + @Schema + ', ' + @Catalog + ', ' + @TableName + ', ' + @TableType + ', ' + @ColumnName + ', '
+ CONVERT(VARCHAR(5), @OrdinalPosition) + ', ' + @DataType + ', ' + @sql + ' FROM [' + REPLACE(@Schema, '''', '') + '].['
+ REPLACE(@TableName, '''', '') + ']'
PRINT @sql
INSERT INTO @Results
EXECUTE ( @sql
)
END
FETCH NEXT
FROM TableCursor
INTO @Schema, @Catalog, @TableName, @TableType, @ColumnName, @OrdinalPosition, @DataType, @char, @num, @date
END
-----------------------------------------------------------------------------------
-- Clean-up
-----------------------------------------------------------------------------------
CLOSE TableCursor
DEALLOCATE TableCursor
-----------------------------------------------------------------------------------
-- Display the results
-----------------------------------------------------------------------------------
SELECT [Schema] ,
[Catalog] ,
[Table Name] ,
CASE [Table Type]
WHEN 'BASE TABLE' THEN 'TABLE'
ELSE [Table Type]
END AS 'Table Type' ,
[Column Name] ,
[Seq] ,
[Data Type] ,
[Avg Len/Val] ,
[Min Len/Val] ,
[Max Len/Val] ,
[Min Date] ,
[Max Date] ,
[Distinct Values] ,
[Num NULL]
FROM @Results
ORDER BY [Table Name] ,
[Seq] ,
[Column Name]
-----------------------------------------------------------------------------------
-- Reset
-----------------------------------------------------------------------------------
SET NOCOUNT OFF
SET ANSI_WARNINGS ON;
Metod 2: You can create #temp table
CREATE TABLE #TEMP
(
[Schema] sysname ,
[Catalog] sysname ,
[Table Name] sysname ,
[Table Type] VARCHAR(10) ,
[Column Name] sysname ,
[Seq] INT ,
[Data Type] sysname ,
[Avg Len/Val] NUMERIC ,
[Min Len/Val] NUMERIC ,
[Max Len/Val] NUMERIC ,
[Min Date] DATETIME ,
[Max Date] DATETIME ,
[Distinct Values] NUMERIC ,
[Num NULL] NUMERIC
);
SET NOCOUNT ON
SET ANSI_WARNINGS OFF
-- Suppresses the "Null value is eliminated by an aggregate..." warning
DECLARE @TableName sysname
DECLARE @TableType VARCHAR(15)
DECLARE @Schema sysname
DECLARE @Catalog sysname
DECLARE @ColumnName sysname
DECLARE @OrdinalPosition INT
DECLARE @DataType sysname
DECLARE @char INT
DECLARE @num TINYINT
DECLARE @date SMALLINT
DECLARE @sql VARCHAR(MAX)
DECLARE @stmtString VARCHAR(MAX)
DECLARE @stmtNum VARCHAR(MAX)
DECLARE @stmtDate VARCHAR(MAX)
DECLARE @stmtOther VARCHAR(MAX)
DECLARE @stmtUnsup VARCHAR(MAX)
DECLARE @q CHAR(1)
-- single quote
DECLARE @qq CHAR(2)
-- double quote
DECLARE @cnt INT=0
-----------------------------------------------------------------------------------
-- Table variable to collect the final results
-----------------------------------------------------------------------------------
DECLARE @Results TABLE
(
[Schema] sysname ,
[Catalog] sysname ,
[Table Name] sysname ,
[Table Type] VARCHAR(10) ,
[Column Name] sysname ,
[Seq] INT ,
[Data Type] sysname ,
[Avg Len/Val] NUMERIC ,
[Min Len/Val] NUMERIC ,
[Max Len/Val] NUMERIC ,
[Min Date] DATETIME ,
[Max Date] DATETIME ,
[Distinct Values] NUMERIC ,
[Num NULL] NUMERIC
)
-----------------------------------------------------------------------------------
-- quote char
-----------------------------------------------------------------------------------
SET @q = ''''
SET @qq = @q + @q
-----------------------------------------------------------------------------------
-- The dynamic replacement strings for various data types
-----------------------------------------------------------------------------------
SET @stmtUnsup = 'null, null, null, null, null, null, 0'
SET @stmtString = 'avg(len([@@replace])), ' + 'min(len([@@replace])), ' + 'max(len([@@replace])), ' + 'null, null, count(distinct [@@replace]), '
+ 'sum(case when [@@replace] is null then 1 else 0 end)'
SET @stmtNum = 'avg(CAST(isnull([@@replace], 0) AS FLOAT)), ' + 'min([@@replace]) AS [Min], ' + 'max([@@replace]) AS [Max], '
+ 'null, null, count(distinct @@replace) AS [Dist Count], ' + 'sum(case when @@replace is null then 1 else 0 end) AS [Num Null]'
SET @stmtDate = 'null, null, null, min([@@replace]) AS [Min], ' + 'max([@@replace]) AS [Max], '
+ 'count(distinct @@replace) AS [Dist Count], ' + 'sum(case when @@replace is null then 1 else 0 end) AS [Num Null]'
SET @stmtOther = 'null, null, null, null, null, count(distinct @@replace) AS [Dist Count], '
+ 'sum(case when @@replace is null then 1 else 0 end) AS [Num Null]'
-----------------------------------------------------------------------------------
-- The cursor to read through the schema. Change the WHERE clause to control the tables/views used
-----------------------------------------------------------------------------------
DECLARE TableCursor CURSOR
FOR
SELECT c.TABLE_SCHEMA ,
c.TABLE_CATALOG ,
c.TABLE_NAME ,
t.TABLE_TYPE ,
---c.COLUMN_NAME ,
case when SUBSTRING(c.COLUMN_NAME,1,1) NOT LIKE '[' then '['+c.COLUMN_NAME+']' else c.COLUMN_NAME end AS COLUMN_NAME,
c.ORDINAL_POSITION ,
c.DATA_TYPE ,
c.CHARACTER_MAXIMUM_LENGTH ,
c.NUMERIC_PRECISION ,
c.DATETIME_PRECISION
FROM INFORMATION_SCHEMA.COLUMNS c
INNER JOIN INFORMATION_SCHEMA.TABLES t ON t.TABLE_SCHEMA = c.TABLE_SCHEMA
AND t.TABLE_NAME = c.TABLE_NAME
WHERE c.TABLE_SCHEMA IN ( 'dbo' ) --------------------------------
AND c.TABLE_NAME LIKE 'ABC%' -- <<< Schema, table and view names to analyze go here
AND t.TABLE_TYPE NOT IN ( 'VIEW' ) --------------------------------
ORDER BY c.TABLE_NAME ,
c.ORDINAL_POSITION
OPEN TableCursor
FETCH NEXT
FROM TableCursor
INTO @Schema, @Catalog, @TableName, @TableType, @ColumnName, @OrdinalPosition, @DataType, @char, @num, @date
PRINT(CAST(@cnt AS VARCHAR) + 'TABLE PROFLING DONE')
-----------------------------------------------------------------------------------
-- Process through the database schema
-----------------------------------------------------------------------------------
WHILE @@FETCH_STATUS = 0
BEGIN
SET @sql = CASE WHEN @DataType = 'image' THEN @stmtUnsup
WHEN @DataType = 'text' THEN @stmtUnsup
WHEN @DataType = 'ntext' THEN @stmtUnsup
WHEN @char IS NOT NULL THEN @stmtString
WHEN @num IS NOT NULL THEN @stmtNum
WHEN @date IS NOT NULL THEN @stmtDate
ELSE @stmtOther
END
IF (SUBSTRING(@ColumnName,1,1)='[')
BEGIN
SET @sql = REPLACE(@sql, '['+'@@replace'+']',@ColumnName)
END
SET @sql = REPLACE(@sql, '@@replace',@ColumnName)
---PRINT(@sql)
IF @sql <> ''
BEGIN
SET @Schema = @q + @Schema + @q
SET @Catalog = @q + @Catalog + @q
SET @TableName = @q + @TableName + @q
SET @TableType = @q + @TableType + @q
SET @ColumnName = @q + REPLACE(@ColumnName, @q, @qq) + @q
SET @DataType = @q + @DataType + @q
SET @sql = 'SELECT ' + @Schema + ', ' + @Catalog + ', ' + @TableName + ', ' + @TableType + ', ' + @ColumnName + ', '
+ CONVERT(VARCHAR(5), @OrdinalPosition) + ', ' + @DataType + ', ' + @sql + ' FROM [' + REPLACE(@Schema, '''', '') + '].['
+ REPLACE(@TableName, '''', '') + ']'
PRINT @sql
INSERT INTO #TEMP
EXECUTE ( @sql
)
END
FETCH NEXT
FROM TableCursor
INTO @Schema, @Catalog, @TableName, @TableType, @ColumnName, @OrdinalPosition, @DataType, @char, @num, @date
END
-----------------------------------------------------------------------------------
-- Clean-up
-----------------------------------------------------------------------------------
CLOSE TableCursor
DEALLOCATE TableCursor
-----------------------------------------------------------------------------------
-- Reset
-----------------------------------------------------------------------------------
SET NOCOUNT OFF
SET ANSI_WARNINGS ON;
---------
SELECT * FROM #TEMP
This article inspired by https://www.codeproject.com/Articles/1082891/High-Level-Data-Profiler-Script and one problem I faced in real-time projects.
If column names have space between my modified query will work perfectly
If column names have space between my modified query will work perfectly