-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsvm_rfe.py
75 lines (62 loc) · 1.99 KB
/
svm_rfe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.feature_selection import RFE
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
X_train = train[['HMM', 'SSD', 'OGS']].values
y_train = train[['Target']].values.ravel()
X_test = test[['HMM', 'SSD', 'OGS']].values
y_test = test[['Target']].values.ravel()
#(a)
estimator = svm.SVC(kernel="linear")
estimator.fit(X_train, y_train)
a_result = estimator.predict(X_test)
print("accuracy: ", accuracy_score(y_test,a_result))
print("weights of HMM, SSD, OGS: ", estimator.coef_)
#(b)1
selector = RFE(estimator, 2, step=1)
selector = selector.fit(X_train, y_train)
#record the selected variable
summary = np.zeros(sum(selector.support_)).tolist()
j=0
k=0
for i in selector.support_:
j=j+1
if i==True:
summary[k]=j-1
k=k+1
#new X based on selected variable
X_train1 = X_train[:,summary]
X_test1 = X_test[:,summary]
#new fit
estimator.fit(X_train1, y_train)
a_result = estimator.predict(X_test1)
print("accuracy: ", accuracy_score(y_test,a_result))
print("seleted variable",selector.support_)
print("weights of HMM, SSD, OGS: ", estimator.coef_)
#(b)2
selector = RFE(estimator, 1, step=1)
selector = selector.fit(X_train, y_train)
#record the selected variable
summary = np.zeros(sum(selector.support_)).tolist()
j=0
k=0
for i in selector.support_:
j=j+1
if i==True:
summary[k]=j-1
k=k+1
#new X based on selected variable
X_train2 = X_train[:,summary]
X_test2 = X_test[:,summary]
#new fit
estimator.fit(X_train2, y_train)
a_result = estimator.predict(X_test2)
print("accuracy: ", accuracy_score(y_test,a_result))
print("seleted variable",selector.support_)
print("weights of HMM, SSD, OGS: ", estimator.coef_)
plt.scatter(X_test2,np.zeros(len(X_test2)), marker='o',c=y_test)
plt.scatter(X_test1[:,0], X_test1[:,1], marker='o',c=y_test)