2929import eli5
3030from eli5 .lime import TextExplainer
3131from eli5 import explain_prediction
32- from eli5 .formatters import format_as_text
32+ from eli5 .formatters import format_as_text , format_as_html
3333import pandas as pd
34+ from IPython .display import display
35+ from keras .callbacks import ModelCheckpoint
36+ from keras .wrappers .scikit_learn import KerasClassifier
37+ from keras .models import Sequential
38+ from keras .layers import Dense
39+ from keras .models import load_model
3440
35-
41+ keras = False
3642increment = False
3743
3844stacked_embeddings = DocumentPoolEmbeddings ([
3945 WordEmbeddings ('en' ),
4046 WordEmbeddings ('glove' ),
41- # WordEmbeddings('extvec'),#ELMoEmbeddings('original'),
47+ WordEmbeddings ('extvec' ),#ELMoEmbeddings('original'),
4248 #BertEmbeddings('bert-base-cased'),
4349 #FlairEmbeddings('news-forward-fast'),
4450 #FlairEmbeddings('news-backward-fast'),
4551 ]) #, mode='max')
4652
53+ def create_model (optimizer = 'adam' , kernel_initializer = 'glorot_uniform' , epochs = 5 ):
54+ model = Sequential ()
55+ model .add (Dense (list_of_embeddings [1 ].size , activation = 'relu' ,kernel_initializer = 'he_uniform' , use_bias = True ))
56+ model .add (Dense (11 ,activation = 'softmax' ,kernel_initializer = kernel_initializer , use_bias = True ))
57+ model .compile (loss = 'categorical_crossentropy' ,optimizer = optimizer , metrics = ['accuracy' ])
58+ return model
59+
60+
4761def parse_string (a_str ):
4862 to_ret = "" .join ([c .lower () for c in a_str if c in string .ascii_letters or c in string .whitespace ])
4963 to_ret2 = to_ret .split ()
@@ -69,7 +83,7 @@ def transform(self, X):
6983 else :
7084 a_set = Sentence (p_str )
7185 stacked_embeddings .embed (a_set )
72- list_of_emb .append (a_set .get_embedding ().detach ().numpy ())
86+ list_of_emb .append (a_set .get_embedding ().cpu (). detach ().numpy ())
7387 to_ret = np .array (list_of_emb )
7488 else :
7589 try :
@@ -79,7 +93,7 @@ def transform(self, X):
7993 else :
8094 a_set = Sentence (p_str )
8195 stacked_embeddings .embed (a_set )
82- to_ret = a_set .get_embedding ().detach ().numpy ().reshape (1 , - 1 )
96+ to_ret = a_set .get_embedding ().cpu (). detach ().numpy ().reshape (1 , - 1 )
8397 except :
8498 print (type (X ))
8599 print (X )
@@ -89,18 +103,23 @@ def transform(self, X):
89103
90104pipe = joblib .load ('saved_card_classification.pkl' )
91105
92- te = TextExplainer (random_state = 42 , n_samples = 1000 , position_dependent = True )
106+ if keras :
107+ pipe .named_steps ['model' ].model = load_model ('keras_model.h5' )
108+
109+
110+ te = TextExplainer (random_state = 42 , n_samples = 10000 , position_dependent = True )
93111
94112def explain_pred (sentence ):
95113 te .fit (sentence , pipe .predict_proba )
96114 #txt = format_as_text(te.explain_prediction(target_names=["green", "neutral", "red"]))
97- txt = format_as_text (te .explain_prediction (top = 20 , target_names = ["ANB" , "CAP" , "ECON" , "EDU" , "ENV" , "EX" , "FED" , "HEG" , "NAT" , "POL" , "TOP" ]))
98- print (txt )
115+ t_pred = te .explain_prediction (top = 20 , target_names = ["ANB" , "CAP" , "ECON" , "EDU" , "ENV" , "EX" , "FED" , "HEG" , "NAT" , "POL" , "TOP" ])
116+ txt = format_as_text (t_pred )
117+ html = format_as_html (t_pred )
118+ html_file = open ("latest_prediction.html" , "a+" )
119+ html_file .write (html )
120+ html_file .close ()
99121 print (te .metrics_ )
100122
101- def direct_explain_pred (sentence ):
102- txt = format_as_text (eli5 .explain_prediction (model , doc = sentence , target_names = ["green" , "neutral" , "red" ], vec = Text2Vec ())) #get vector importances
103- print (txt )
104123
105124def print_misclass ():
106125 print ("misclassified examples!!!" )
@@ -124,6 +143,9 @@ def print_misclass():
124143 break
125144 elif label == "stop" :
126145 csvfile .close ()
146+ if keras :
147+ pipe .named_steps ['model' ].model .save ('keras_model.h5' )
148+ pipe .named_steps ['model' ].model = None
127149 joblib .dump (pipe , 'saved_card_classification.pkl' )
128150 print ("Model Dumped!!!!" )
129151 done = True
@@ -134,8 +156,8 @@ def print_misclass():
134156 t_model = pipe .named_steps ['model' ]
135157 ppset = Sentence (str (to_process ))
136158 stacked_embeddings .embed (ppset )
137- the_emb = ppset .get_embedding ().detach ().numpy ().reshape (1 , - 1 )
159+ the_emb = ppset .get_embedding ().cpu (). detach ().numpy ().reshape (1 , - 1 )
138160 t_model .partial_fit (the_emb , the_labels ) ##INCREMENTAL LEARNING MODE ENGAGED
139161 the_labels .append (str (to_process ))
140162 spamwriter .writerow (the_labels )
141- csvfile .flush ()
163+ csvfile .flush ()
0 commit comments