- Notifications
You must be signed in to change notification settings - Fork 849
/
Copy pathoml4sql-classification-text-mining-svm.sql
167 lines (150 loc) · 5.57 KB
/
oml4sql-classification-text-mining-svm.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
-----------------------------------------------------------------------
-- Oracle Machine Learning for SQL (OML4SQL) 21c
--
-- Classification - SVM Algorithm with Text Mining - dmtxtsvm.sql
--
-- Copyright (c) 2021 Oracle Corporation and/or its affilitiates.
--
-- The Universal Permissive License (UPL), Version 1.0
--
-- https://oss.oracle.com/licenses/upl/
-----------------------------------------------------------------------
SET serveroutput ON
SET trimspool ON
SET pages 10000
SET echo ON
-- Create a policy for text feature extraction
BEGIN
ctx_ddl.drop_policy('dmdemo_svm_policy');
EXCEPTION WHEN OTHERS THEN NULL; END;
/
EXECUTE ctx_ddl.create_policy('dmdemo_svm_policy');
-----------------------------------------------------------------------
-- SAMPLE PROBLEM
-----------------------------------------------------------------------
-- Mine text features using SVM algorithm.
-----------------------------------------------------------------------
-- BUILD THE MODEL
-----------------------------------------------------------------------
-- Cleanup old model and objects for repeat runs
BEGINDBMS_DATA_MINING.DROP_MODEL('T_SVM_Clas_sample');
EXCEPTION WHEN OTHERS THEN NULL; END;
/
BEGIN EXECUTE IMMEDIATE 'DROP TABLE t_svmc_sample_settings';
EXCEPTION WHEN OTHERS THEN NULL; END;
/
-- Create settings table to choose linear kernel
CREATETABLEt_svmc_sample_settings (
setting_name VARCHAR2(30),
setting_value VARCHAR2(4000));
BEGIN
-- Populate settings table
INSERT INTO t_svmc_sample_settings VALUES
(dbms_data_mining.algo_name,
dbms_data_mining.algo_support_vector_machines);
INSERT INTO t_svmc_sample_settings VALUES
(dbms_data_mining.prep_auto, dbms_data_mining.prep_auto_on);
INSERT INTO t_svmc_sample_settings VALUES
(dbms_data_mining.svms_kernel_function,dbms_data_mining.svms_linear);
INSERT INTO t_svmc_sample_settings VALUES
(dbms_data_mining.svms_complexity_factor, 100);
INSERT INTO t_svmc_sample_settings VALUES
(dbms_data_mining.odms_text_policy_name, 'DMDEMO_SVM_POLICY');
INSERT INTO t_svmc_sample_settings VALUES
(dbms_data_mining.svms_solver, dbms_data_mining.svms_solver_sgd);
COMMIT;
END;
/
---------------
-- CREATE MODEL
-- Create SVM model
-- Note the transform makes the 'comments' attribute
-- to be treated as unstructured text data
DECLARE
xformlist dbms_data_mining_transform.TRANSFORM_LIST;
BEGIN
dbms_data_mining_transform.SET_TRANSFORM(
xformlist, 'comments', null, 'comments', null, 'TEXT');
DBMS_DATA_MINING.CREATE_MODEL(
model_name =>'T_SVM_Clas_sample',
mining_function =>dbms_data_mining.classification,
data_table_name =>'mining_build_text',
case_id_column_name =>'cust_id',
target_column_name =>'affinity_card',
settings_table_name =>'t_svmc_sample_settings',
xform_list => xformlist);
END;
/
-- Display the model settings
column setting_name format a30;
column setting_value format a30;
SELECT setting_name, setting_value
FROM user_mining_model_settings
WHERE model_name ='T_SVM_CLAS_SAMPLE'
ORDER BY setting_name;
-- Display the model signature
column attribute_name format a40
column attribute_type format a20
SELECT attribute_name, attribute_type
FROM user_mining_model_attributes
WHERE model_name ='T_SVM_CLAS_SAMPLE'
ORDER BY attribute_name;
-- Display model details
-- Get a list of model views
col view_name format a30
col view_type format a50
SELECT view_name, view_type FROM user_mining_model_views
WHERE model_name='T_SVM_CLAS_SAMPLE'
ORDER BY view_name;
-- Note how several text terms extracted from the COMMENTs documents
-- show up as influential predictors.
--
SETline120
column attribute_name format a25
column attribute_subname format a25
column attribute_value format a25
column coefficient format 9.99
SELECT*from
(SELECT target_value, attribute_name, attribute_subname,
attribute_value, coefficient,
rank() over (order by abs(coefficient) desc) rnk
FROM DM$VLT_SVM_CLAS_SAMPLE)
WHERE rnk <=10
ORDER BY rnk, attribute_name, attribute_subname;
-----------------------------------------------------------------------
-- TEST THE MODEL
-----------------------------------------------------------------------
-- See dmsvcdem.sql for examples.
-----------------------------------------------------------------------
-- SCORE NEW DATA USING SQL DATA MINING FUNCTIONS
-----------------------------------------------------------------------
------------------
-- BUSINESS CASE 1
--
-- Find the 5 customers that are most likely to use an affinity card.
-- Note that the SQL data mining functions seamless work against
-- tables that contain textual data (comments).
-- Also explain why they are likely to use an affinity card.
--
set long 20000
SELECT cust_id, pd FROM
( SELECT cust_id,
PREDICTION_DETAILS(T_SVM_Clas_sample, 1 USING *) pd,
rank() over (order by PREDICTION_PROBABILITY(T_SVM_Clas_sample, 1 USING *) DESC,
cust_id) rnk
FROM mining_apply_text)
WHERE rnk <=5
order by rnk;
------------------
-- BUSINESS CASE 2
-- Find the average age of customers who are likely to use an
-- affinity card. Break out the results by gender.
--
column cust_gender format a12
SELECT cust_gender,
COUNT(*) AS cnt,
ROUND(AVG(age)) AS avg_age
FROM mining_apply_text
WHERE PREDICTION(T_SVM_Clas_sample USING *) =1
GROUP BY cust_gender
ORDER BY cust_gender;