/
SMART Bank of Kigali Feature Engineering
133 lines (112 loc) · 4.98 KB
/
SMART Bank of Kigali Feature Engineering
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
---- Kindly not that the below script were executed in an oracle database
--- create a table for the data set to be used
CREATE TABLE RAILS
(
ID VARCHAR2(100),
CUSTOMER_ID VARCHAR2(100),
DATE_OF_BIRTH DATE,
PROVINCE VARCHAR2(100),
DISTRICT VARCHAR2(100),
CUSTOMER_BRANCH VARCHAR2(100),
PRINCIPAL_AMT NUMBER,
PAID_PRINCIPAL NUMBER,
PAID_INTEREST NUMBER,
PAID_PENALTY NUMBER,
TOTAL_REMAINING_PRINIPAL NUMBER,
REMAINING_PRINCIPAL NUMBER,
AMOUNT_DUE NUMBER,
DUE_PRINCIPAL NUMBER,
DUE_INTEREST NUMBER,
DUE_PEN_INTEREST NUMBER,
DUE_FEE NUMBER,
PAID_FEE NUMBER,
DUE_TAX NUMBER,
PAID_TAX NUMBER,
PREVIOUS_DAYS NUMBER,
OVERDUE_DAYS NUMBER,
EFFECTIVE_DATE DATE,
MATURITY_DATE DATE,
CREDITSCOREGROUP VARCHAR2(100),
PAYMENTSTATUS VARCHAR2(100),
DURATION NUMBER,
RETURNINGCUSTOMER VARCHAR2(100),
CLASS VARCHAR2(100)
);
----Import the data into the table created above
---- Create package for custom dependent functions first
---- SMART_PKG contains functions for data analytics modelling for both demographic and transactinal segmentation. They accept parameters to produce an output.
---- create package specification
CREATE OR REPLACE package SMART_PKG
AS
/*
-- func_age takes in the data of birth in a given data set and computes the age.
-- func_age_band takes in the data of birth in a given data set and segments the data entries into age band segments.
*/
function func_age(p_dob date) return varchar2;
function func_age_band (p_dob date) return varchar2;
END SMART_PKG;
/
---- create package body
CREATE OR REPLACE PACKAGE BODY SMART_PKG
AS
function func_age(p_dob date) return varchar2
as
v_dob date;
v_age varchar2(50);
begin
SELECT p_dob
INTO v_dob
FROM dual;
v_age := floor((trunc(sysdate) - v_dob)/365);
return(v_age);
end ;
function func_age_band (p_dob date) return varchar2
as
v_dob date;
v_age_band varchar2(50);
begin
SELECT p_dob
INTO v_dob
FROM dual;
if floor((trunc(sysdate) - v_dob)/365) < 15 then v_age_band := '14 and Below';
elsif floor((trunc(sysdate) - v_dob)/365) between 15 and 24 then v_age_band := '15 - 24';
elsif floor((trunc(sysdate) - v_dob)/365) between 25 and 34 then v_age_band := '25 - 34';
elsif floor((trunc(sysdate) - v_dob)/365) between 35 and 44 then v_age_band := '35 - 44';
elsif floor((trunc(sysdate) - v_dob)/365) between 45 and 54 then v_age_band := '45 - 54';
elsif floor((trunc(sysdate) - v_dob)/365) between 55 and 64 then v_age_band := '55 - 64';
elsif floor((trunc(sysdate) - v_dob)/365) > 64 then v_age_band := '65 and Above';
--- else v_age_band IS NULL;
END if;
return(v_age_band);
end ;
END SMART_PKG;
/
---- Query that extract already existing features and generate new features
----- SMART_PKG.FUNC_AGE() is a custom package that takes in date (date of birth) and evaluates age
select * from (
select
CUSTOMER_ID,
SMART_PKG.FUNC_AGE(DATE_OF_BIRTH)age,-- OCTAVE_PKG.FUNC_AGE_BAND(DATE_OF_BIRTH) age_band,
PROVINCE, DISTRICT, CUSTOMER_BRANCH,
PRINCIPAL_AMT, PAID_PRINCIPAL, PAID_INTEREST, PAID_PENALTY, TOTAL_REMAINING_PRINIPAL, REMAINING_PRINCIPAL, AMOUNT_DUE, DUE_PRINCIPAL, DUE_INTEREST, DUE_PEN_INTEREST,
case when PAID_PRINCIPAL < PRINCIPAL_AMT then 1 else 0 end f1,
case when REMAINING_PRINCIPAL < TOTAL_REMAINING_PRINIPAL then 1 else 0 end f2,
case when AMOUNT_DUE > DUE_PRINCIPAL then 1 else 0 end f3,
PAID_FEE, PAID_TAX, PAID_FEE+PAID_TAX total_tax,
PREVIOUS_DAYS, OVERDUE_DAYS, PREVIOUS_DAYS + OVERDUE_DAYS TOTAL_OVERDUE_DAYS,MATURITY_DATE-EFFECTIVE_DATE tenure,round((MATURITY_DATE-EFFECTIVE_DATE)/30) ee, DURATION,
case when (PREVIOUS_DAYS =0 and OVERDUE_DAYS = 0) then 1
when PREVIOUS_DAYS > OVERDUE_DAYS then 2
when (PREVIOUS_DAYS < 0 and OVERDUE_DAYS = 0) then 3 else 4 end f4,
PAYMENTSTATUS, RETURNINGCUSTOMER, CLASS, CREDITSCOREGROUP
from aff.rails order by 1
);
----- The script below is a reblica of the above script but tweaked to execute on Microsoft Azure Machine Learning Platform
select a.*,case when [Paid principal]
< [PRINCIPAL AMT] then 1 else 0 end DVar1,
case when [REMAINING PRINCIPAL] < [TOTAL REMAINING PRINIPAL] then 1 else 0 end DVar2,
case when [AMOUNT DUE] > [DUE PRINCIPAL] then 1 else 0 end DVar7,
[PREVIOUS DAYS] + [OVERDUE DAYS] as DVar3,[MATURITY DATE]-[EFFECTIVE DATE] as DVar4,round(([MATURITY DATE]-[EFFECTIVE DATE])/30) DVar5,
case when ([PREVIOUS DAYS] =0 and [OVERDUE DAYS] = 0) then 1
when [PREVIOUS DAYS] > [OVERDUE DAYS] then 2
when ([PREVIOUS DAYS] < 0 and [OVERDUE DAYS] = 0) then 3 else 4 end DVar6
from t1 as a;