NetworkClassification/model_selection.py at master · kansuke231/NetworkClassification · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from preprocess import init
from plot import index_to_color
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt

Synthesized = ["Scale Free", "ER Network", "Small World", "Forest Fire Network"]


def make_layers(bp_tuple_L, base_Ys):
    predicted_Ys = list(set(map(lambda x: x[1], bp_tuple_L)))

    Ys = sorted(list(set(base_Ys)))
    accum_dic = {k: [0 for i in range(len(Ys))] for k in predicted_Ys}

    for base, predicted in bp_tuple_L:
        accum_dic[predicted][Ys.index(base)] += 1

    return Ys, accum_dic


def plot_accumulation(Ys, accum_dic):
    iterate = list(accum_dic.keys())

    color_map = index_to_color(iterate)

    first = iterate[0]
    colorVal = color_map(0)
    p = plt.barh(range(len(Ys)), accum_dic[first], 0.35, color=colorVal)

    prev = accum_dic[first]  # previous stack
    ps = [p]  # storing axis objects

    for i, k in enumerate(iterate[1:]):
        colorVal = color_map(i + 1)
        p = plt.barh(range(len(Ys)), accum_dic[k], 0.35, color=colorVal, left=prev)

        prev = map(lambda x: x[0] + x[1], zip(prev, accum_dic[k]))
        ps.append(p)

    plt.legend(ps, iterate, bbox_to_anchor=(1.12, 0.4), prop={'size': 12})
    plt.yticks(range(len(Ys)), Ys)
    plt.ylabel('Base Classes')
    plt.xlabel('Frequency')
    plt.show()


def separator(X, Y):
    """
    Separates Synthesized classes (network models) from real-world network classes
    """
    real_X = []
    real_Y = []
    synthesized_X = []
    synthesized_Y = []

    for x, y in zip(X, Y):
        if y in Synthesized:
            synthesized_X.append(x)
            synthesized_Y.append(y)
        else:
            real_X.append(x)
            real_Y.append(y)

    return real_X, real_Y, synthesized_X, synthesized_Y


def base_to_predict(base_X, base_Y, predict_X, predict_Y):
    """
    Train on the base networks, classify predict networks
    """
    random_forest = RandomForestClassifier()
    random_forest.fit(base_X, base_Y)
    y_pred = random_forest.predict(predict_X)
    return zip(y_pred, predict_Y)


def main():
    column_names = ["NetworkType", "SubType", "ClusteringCoefficient", "DegreeAssortativity", "m4_1", "m4_2", "m4_3",
                    "m4_4", "m4_5", "m4_6"]

    isSubType = True
    at_least = 1
    X, Y, sub_to_main_type, feature_order = init("features.csv", column_names, isSubType, at_least)
    N = 100

    # synthesized to real
    real_X, real_Y, synthesized_X, synthesized_Y = separator(X, Y)
    bp_tuple_L = base_to_predict(synthesized_X, synthesized_Y, real_X, real_Y)
    Ys, accum_dic = make_layers(bp_tuple_L, Synthesized)
    plot_accumulation(Ys, accum_dic)

    # real to synthesized
    bp_tuple_L = base_to_predict(*separator(X, Y))
    Ys, accum_dic = make_layers(bp_tuple_L, Y)
    plot_accumulation(Ys, accum_dic)


if __name__ == '__main__':
    main()