Context
- Netezza as databases
- Cognos as report tool
- Netezza as databases
- Cognos as report tool
Goal
- Make development easier – All ETLs will use the centralized table to get dimension business key, also all these map will be used to validate dimension tables as needed
- Make maintenance easier – when some changes applies duo to business logic change, we don’t need to change ETL and Cognos model
- Make development easier – All ETLs will use the centralized table to get dimension business key, also all these map will be used to validate dimension tables as needed
- Make maintenance easier – when some changes applies duo to business logic change, we don’t need to change ETL and Cognos model
Assumptions
- Focused area is from staging data to data mart data
- SCD concept with surrogate key is NOT applied
- Focused area is from staging data to data mart data
- SCD concept with surrogate key is NOT applied
Detail assumption of staging table
- All these dimension related columns are not NULL.
- If the column is NULL, then setup as ‘-99’ for string, or as -99 for number.
- If the column cannot be found from reference table, then setup as ‘-1’ for string, or as -1 for number.
Capabilities
- Handle single value mapping
- Handle data range mapping
- Handle default
- Generate exception between map and dimensions
- Handle single value mapping
- Handle data range mapping
- Handle default
- Generate exception between map and dimensions
Mapping table
-- DDL
DROP TABLE MPL_BI.REFERENCE_DATA_MAP IF EXISTS;
CREATE TABLE MPL_BI.REFERENCE_DATA_MAP
(
SOURCE CHARACTER VARYING(100) NOT NULL,
PARAMETER_NAME CHARACTER VARYING(100) NOT NULL,
SINGLE_VALUE CHARACTER VARYING(100),
LOW_BOUND_VALUE DECIMAL(20,2),
LOW_BOUND_INCL_VALUE DECIMAL(20,2),
HIGH_BOUND_VALUE DECIMAL(20,2),
HIGH_BOUND_INCL_VALUE DECIMAL(20,2),
DIMENSION_NAME CHARACTER VARYING(100) NOT NULL,
DIMENSION_ITEM_NAME CHARACTER VARYING(100) NOT NULL,
NOTE CHARACTER VARYING(1000),
LAST_UPDATE_USER CHARACTER VARYING(100) ,
LAST_MOFIFIED_DATE DATE ,
START_DATE DATE NOT NULL,
END_DATE DATE
);
-- DDL
DROP TABLE MPL_BI.REFERENCE_DATA_MAP IF EXISTS;
CREATE TABLE MPL_BI.REFERENCE_DATA_MAP
(
SOURCE CHARACTER VARYING(100) NOT NULL,
PARAMETER_NAME CHARACTER VARYING(100) NOT NULL,
SINGLE_VALUE CHARACTER VARYING(100),
LOW_BOUND_VALUE DECIMAL(20,2),
LOW_BOUND_INCL_VALUE DECIMAL(20,2),
HIGH_BOUND_VALUE DECIMAL(20,2),
HIGH_BOUND_INCL_VALUE DECIMAL(20,2),
DIMENSION_NAME CHARACTER VARYING(100) NOT NULL,
DIMENSION_ITEM_NAME CHARACTER VARYING(100) NOT NULL,
NOTE CHARACTER VARYING(1000),
LAST_UPDATE_USER CHARACTER VARYING(100) ,
LAST_MOFIFIED_DATE DATE ,
START_DATE DATE NOT NULL,
END_DATE DATE
);
Sample Data
INSERT INTO MPL_BI.REFERENCE_DATA_MAP values('ALL','MAP_SINGLE_VALUE','Current (0)',NULL,NULL,NULL,NULL,'DIM_SINGLE_VALUE','NOT DELINQUENT',NULL,NULL,NULL,'2018-03-01',NULL);
.............
INSERT INTO MPL_BI.REFERENCE_DATA_MAP values('ALL','MAP_SINGLE_VALUE','Current (0)',NULL,NULL,NULL,NULL,'DIM_SINGLE_VALUE','NOT DELINQUENT',NULL,NULL,NULL,'2018-03-01',NULL);
.............
select * from MPL_BI.REFERENCE_DATA_MAP;
Apply for ETL
DROP TABLE MPL_BI.STG_TABLE IF EXISTS;
CREATE TABLE MPL_BI.STG_TABLE
(
ACCOUNT int,
DELQ CHARACTER VARYING(100),
ORIGINAL_BUREAU_SCORE int
);
SOURCE | PARAMETER_NAME | SINGLE_VALUE | LOW_BOUND_VALUE | LOW_BOUND_INCL_VALUE | HIGH_BOUND_VALUE | HIGH_BOUND_INCL_VALUE | DIMENSION_NAME | DIMENSION_ITEM_NAME | NOTE | LAST_UPDATE_USER | LAST_MOFIFIED_DATE | START_DATE | END_DATE |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
ALL | MAP_SINGLE_VALUE | Current (0) | NULL | NULL | NULL | NULL | DIM_SINGLE_VALUE | NOT DELINQUENT | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
ALL | MAP_SINGLE_VALUE | CYC1. 1-30 | NULL | NULL | NULL | NULL | DIM_SINGLE_VALUE | CLASS 0 | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
ALL | MAP_SINGLE_VALUE | CYC2. 31-60 | NULL | NULL | NULL | NULL | DIM_SINGLE_VALUE | CLASS 1 | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
ALL | MAP_SINGLE_VALUE | CYC3. 61-90 | NULL | NULL | NULL | NULL | DIM_SINGLE_VALUE | CLASS 2 | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
ALL | MAP_SINGLE_VALUE | CYC4. 90+ | NULL | NULL | NULL | NULL | DIM_SINGLE_VALUE | CLASS 3 | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
ALL | MAP_SINGLE_VALUE | NPNA | NULL | NULL | NULL | NULL | DIM_SINGLE_VALUE | NPNA | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
ALL | MAP_SINGLE_VALUE | DWO | NULL | NULL | NULL | NULL | DIM_SINGLE_VALUE | DWO | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
ALL | MAP_SINGLE_VALUE | DEFAULT | NULL | NULL | NULL | NULL | DIM_SINGLE_VALUE | NOT IDENTIFIED | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
ALL | MAP_BAND | NULL | NULL | -1.00 | NULL | -999.00 | DIM_BAND | MISSING | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
ALL | MAP_BAND | NULL | NULL | 0.00 | NULL | 0.00 | DIM_BAND | ZERO | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
ALL | MAP_BAND | NULL | NULL | 1.00 | NULL | 50.00 | DIM_BAND | FORCED | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
ALL | MAP_BAND | NULL | NULL | 51.00 | NULL | 599.00 | DIM_BAND | <600 | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
ALL | MAP_BAND | NULL | NULL | 600.00 | NULL | 619.00 | DIM_BAND | 600-619 | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
ALL | MAP_BAND | NULL | NULL | 620.00 | NULL | 639.00 | DIM_BAND | 620-639 | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
ALL | MAP_BAND | NULL | NULL | 640.00 | NULL | 659.00 | DIM_BAND | 640-659 | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
ALL | MAP_BAND | NULL | NULL | 660.00 | NULL | 679.00 | DIM_BAND | 660-679 | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
ALL | MAP_BAND | NULL | NULL | 680.00 | NULL | 699.00 | DIM_BAND | 680-699 | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
ALL | MAP_BAND | NULL | NULL | 700.00 | NULL | 719.00 | DIM_BAND | 700-719 | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
ALL | MAP_BAND | NULL | NULL | 720.00 | NULL | 739.00 | DIM_BAND | 720-739 | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
ALL | MAP_BAND | NULL | NULL | 740.00 | NULL | 759.00 | DIM_BAND | 740-759 | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
ALL | MAP_BAND | NULL | NULL | 760.00 | NULL | 779.00 | DIM_BAND | 760-779 | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
ALL | MAP_BAND | NULL | NULL | 780.00 | NULL | 799.00 | DIM_BAND | 780-799 | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
ALL | MAP_BAND | NULL | NULL | 800.00 | NULL | 819.00 | DIM_BAND | 800-819 | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
ALL | MAP_BAND | NULL | NULL | 820.00 | NULL | 839.00 | DIM_BAND | 820-839 | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
ALL | MAP_BAND | NULL | NULL | 840.00 | NULL | 859.00 | DIM_BAND | 840-859 | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
ALL | MAP_BAND | NULL | NULL | 860.00 | NULL | 1000.00 | DIM_BAND | 860+ | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
ALL | MAP_BAND | DEFAULT | NULL | NULL | NULL | NULL | DIM_BAND | MISSING | NULL | NULL | NULL | 2018-03-01 00:00:00 | NULL |
Constraints
SELECT PARAMETER_NAME,SINGLE_VALUE, count(*) FROM MPL_BI.REFERENCE_DATA_MAP
WHERE END_DATE IS NULL AND SINGLE_VALUE IS NOT NULL
GROUP BY PARAMETER_NAME,SINGLE_VALUE HAVING COUNT(*) > 1;
SELECT PARAMETER_NAME,SINGLE_VALUE, count(*) FROM MPL_BI.REFERENCE_DATA_MAP
WHERE END_DATE IS NULL AND SINGLE_VALUE IS NOT NULL
GROUP BY PARAMETER_NAME,SINGLE_VALUE HAVING COUNT(*) > 1;
SELECT PARAMETER_NAME,LOW_BOUND_VALUE,LOW_BOUND_INCL_VALUE,HIGH_BOUND_VALUE,HIGH_BOUND_INCL_VALUE, count(*) FROM MPL_BI.REFERENCE_DATA_MAP
WHERE END_DATE IS NULL AND (LOW_BOUND_VALUE IS NOT NULL OR LOW_BOUND_INCL_VALUE IS NOT NULL OR HIGH_BOUND_VALUE IS NOT NULL OR HIGH_BOUND_INCL_VALUE IS NOT NULL)
GROUP BY PARAMETER_NAME,LOW_BOUND_VALUE,LOW_BOUND_INCL_VALUE,HIGH_BOUND_VALUE,HIGH_BOUND_INCL_VALUE HAVING COUNT(*) > 1;
WHERE END_DATE IS NULL AND (LOW_BOUND_VALUE IS NOT NULL OR LOW_BOUND_INCL_VALUE IS NOT NULL OR HIGH_BOUND_VALUE IS NOT NULL OR HIGH_BOUND_INCL_VALUE IS NOT NULL)
GROUP BY PARAMETER_NAME,LOW_BOUND_VALUE,LOW_BOUND_INCL_VALUE,HIGH_BOUND_VALUE,HIGH_BOUND_INCL_VALUE HAVING COUNT(*) > 1;
Apply for ETL
DROP TABLE MPL_BI.STG_TABLE IF EXISTS;
CREATE TABLE MPL_BI.STG_TABLE
(
ACCOUNT int,
DELQ CHARACTER VARYING(100),
ORIGINAL_BUREAU_SCORE int
);
INSERT INTO MPL_BI.STG_TABLE VALUES (1,'Current (0)',-1);
INSERT INTO MPL_BI.STG_TABLE VALUES (2,'CYC1. 1-30',610);
INSERT INTO MPL_BI.STG_TABLE VALUES (3,'Other Value',NULL);
INSERT INTO MPL_BI.STG_TABLE VALUES (2,'CYC1. 1-30',610);
INSERT INTO MPL_BI.STG_TABLE VALUES (3,'Other Value',NULL);
SELECT
ACCOUNT,
NVL(MAP_SINGLE_VALUE.DIMENSION_ITEM_NAME,MAP_DELQ_STATUS_COPS_0.DIMENSION_ITEM_NAME) AS MEMBER_DELQ,
NVL(MAP_BAND.DIMENSION_ITEM_NAME,MAP_BUREAU_SCORE_20PT_BAND_INCLUDING_FORCED_0.DIMENSION_ITEM_NAME) AS MEMBER_ORIGINAL_BUREAU_SCORE
FROM MPL_BI.STG_TABLE S
LEFT JOIN MPL_BI.REFERENCE_DATA_MAP MAP_SINGLE_VALUE ON (MAP_SINGLE_VALUE.PARAMETER_NAME ='MAP_SINGLE_VALUE' AND S.DELQ=MAP_SINGLE_VALUE.SINGLE_VALUE)
LEFT JOIN MPL_BI.REFERENCE_DATA_MAP MAP_DELQ_STATUS_COPS_0 ON (MAP_DELQ_STATUS_COPS_0.PARAMETER_NAME ='MAP_SINGLE_VALUE' AND MAP_DELQ_STATUS_COPS_0.SINGLE_VALUE = 'DEFAULT')
ACCOUNT,
NVL(MAP_SINGLE_VALUE.DIMENSION_ITEM_NAME,MAP_DELQ_STATUS_COPS_0.DIMENSION_ITEM_NAME) AS MEMBER_DELQ,
NVL(MAP_BAND.DIMENSION_ITEM_NAME,MAP_BUREAU_SCORE_20PT_BAND_INCLUDING_FORCED_0.DIMENSION_ITEM_NAME) AS MEMBER_ORIGINAL_BUREAU_SCORE
FROM MPL_BI.STG_TABLE S
LEFT JOIN MPL_BI.REFERENCE_DATA_MAP MAP_SINGLE_VALUE ON (MAP_SINGLE_VALUE.PARAMETER_NAME ='MAP_SINGLE_VALUE' AND S.DELQ=MAP_SINGLE_VALUE.SINGLE_VALUE)
LEFT JOIN MPL_BI.REFERENCE_DATA_MAP MAP_DELQ_STATUS_COPS_0 ON (MAP_DELQ_STATUS_COPS_0.PARAMETER_NAME ='MAP_SINGLE_VALUE' AND MAP_DELQ_STATUS_COPS_0.SINGLE_VALUE = 'DEFAULT')
LEFT JOIN MPL_BI.REFERENCE_DATA_MAP MAP_BAND ON (MAP_BAND.PARAMETER_NAME ='MAP_BAND' AND S.ORIGINAL_BUREAU_SCORE BETWEEN MAP_BAND.LOW_BOUND_INCL_VALUE AND MAP_BAND.HIGH_BOUND_INCL_VALUE)
LEFT JOIN MPL_BI.REFERENCE_DATA_MAP MAP_BUREAU_SCORE_20PT_BAND_INCLUDING_FORCED_0 ON (MAP_BUREAU_SCORE_20PT_BAND_INCLUDING_FORCED_0.PARAMETER_NAME ='MAP_BAND' AND MAP_BUREAU_SCORE_20PT_BAND_INCLUDING_FORCED_0.SINGLE_VALUE = 'DEFAULT');
LEFT JOIN MPL_BI.REFERENCE_DATA_MAP MAP_BUREAU_SCORE_20PT_BAND_INCLUDING_FORCED_0 ON (MAP_BUREAU_SCORE_20PT_BAND_INCLUDING_FORCED_0.PARAMETER_NAME ='MAP_BAND' AND MAP_BUREAU_SCORE_20PT_BAND_INCLUDING_FORCED_0.SINGLE_VALUE = 'DEFAULT');
Apply for Cognos Dimensions
select * FROM MPL_BI.REFERENCE_DATA_MAP
WHERE PARAMETER_NAME IN ('MAP_SINGLE_VALUE')
AND DIMENSION_ITEM_NAME NOT IN (SELECT DELQ_STATUS from MPL_BI.DIM_CONFORMED_DELQ_STATUS);
select * FROM MPL_BI.REFERENCE_DATA_MAP
WHERE PARAMETER_NAME IN ('MAP_SINGLE_VALUE')
AND DIMENSION_ITEM_NAME NOT IN (SELECT DELQ_STATUS from MPL_BI.DIM_CONFORMED_DELQ_STATUS);
select * FROM MPL_BI.REFERENCE_DATA_MAP
WHERE PARAMETER_NAME IN ('MAP_BAND')
AND DIMENSION_ITEM_NAME NOT IN (SELECT CURR_BUREAU_SCORE___20PT_BAND_INCLUDING_FORCED from MPL_BI.DIM_CONFORMED_BUREAU_SCORE_20PT_BAND_INCLUDING_FORCED);
WHERE PARAMETER_NAME IN ('MAP_BAND')
AND DIMENSION_ITEM_NAME NOT IN (SELECT CURR_BUREAU_SCORE___20PT_BAND_INCLUDING_FORCED from MPL_BI.DIM_CONFORMED_BUREAU_SCORE_20PT_BAND_INCLUDING_FORCED);
No comments:
Post a Comment