-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodel_selection.py
More file actions
99 lines (72 loc) · 2.8 KB
/
model_selection.py
File metadata and controls
99 lines (72 loc) · 2.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from preprocess import init
from plot import index_to_color
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
Synthesized = ["Scale Free", "ER Network", "Small World", "Forest Fire Network"]
def make_layers(bp_tuple_L, base_Ys):
predicted_Ys = list(set(map(lambda x: x[1], bp_tuple_L)))
Ys = sorted(list(set(base_Ys)))
accum_dic = {k: [0 for i in range(len(Ys))] for k in predicted_Ys}
for base, predicted in bp_tuple_L:
accum_dic[predicted][Ys.index(base)] += 1
return Ys, accum_dic
def plot_accumulation(Ys, accum_dic):
iterate = list(accum_dic.keys())
color_map = index_to_color(iterate)
first = iterate[0]
colorVal = color_map(0)
p = plt.barh(range(len(Ys)), accum_dic[first], 0.35, color=colorVal)
prev = accum_dic[first] # previous stack
ps = [p] # storing axis objects
for i, k in enumerate(iterate[1:]):
colorVal = color_map(i + 1)
p = plt.barh(range(len(Ys)), accum_dic[k], 0.35, color=colorVal, left=prev)
prev = map(lambda x: x[0] + x[1], zip(prev, accum_dic[k]))
ps.append(p)
plt.legend(ps, iterate, bbox_to_anchor=(1.12, 0.4), prop={'size': 12})
plt.yticks(range(len(Ys)), Ys)
plt.ylabel('Base Classes')
plt.xlabel('Frequency')
plt.show()
def separator(X, Y):
"""
Separates Synthesized classes (network models) from real-world network classes
"""
real_X = []
real_Y = []
synthesized_X = []
synthesized_Y = []
for x, y in zip(X, Y):
if y in Synthesized:
synthesized_X.append(x)
synthesized_Y.append(y)
else:
real_X.append(x)
real_Y.append(y)
return real_X, real_Y, synthesized_X, synthesized_Y
def base_to_predict(base_X, base_Y, predict_X, predict_Y):
"""
Train on the base networks, classify predict networks
"""
random_forest = RandomForestClassifier()
random_forest.fit(base_X, base_Y)
y_pred = random_forest.predict(predict_X)
return zip(y_pred, predict_Y)
def main():
column_names = ["NetworkType", "SubType", "ClusteringCoefficient", "DegreeAssortativity", "m4_1", "m4_2", "m4_3",
"m4_4", "m4_5", "m4_6"]
isSubType = True
at_least = 1
X, Y, sub_to_main_type, feature_order = init("features.csv", column_names, isSubType, at_least)
N = 100
# synthesized to real
real_X, real_Y, synthesized_X, synthesized_Y = separator(X, Y)
bp_tuple_L = base_to_predict(synthesized_X, synthesized_Y, real_X, real_Y)
Ys, accum_dic = make_layers(bp_tuple_L, Synthesized)
plot_accumulation(Ys, accum_dic)
# real to synthesized
bp_tuple_L = base_to_predict(*separator(X, Y))
Ys, accum_dic = make_layers(bp_tuple_L, Y)
plot_accumulation(Ys, accum_dic)
if __name__ == '__main__':
main()