Skip to content

Commit 1f5abcf

Browse files
Add files via upload
1 parent 26b6670 commit 1f5abcf

1 file changed

Lines changed: 18 additions & 7 deletions

File tree

classification.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import csv
33
from flair.data import Sentence
44
from flair.models import SequenceTagger
5-
from flair.embeddings import WordEmbeddings, FlairEmbeddings, StackedEmbeddings, DocumentPoolEmbeddings, BertEmbeddings, ELMoEmbeddings, OpenAIGPTEmbeddings, RoBERTaEmbeddings, XLNetEmbeddings
5+
from flair.embeddings import WordEmbeddings, FlairEmbeddings, StackedEmbeddings, DocumentPoolEmbeddings, BertEmbeddings, ELMoEmbeddings, OpenAIGPTEmbeddings, RoBERTaEmbeddings, XLNetEmbeddings, BytePairEmbeddings
66
import torch
77
from torch import tensor
88
import numpy as np
@@ -43,6 +43,11 @@ def parse_string(a_str):
4343
to_ret3 = " ".join(to_ret2)
4444
return to_ret3
4545

46+
47+
def get_misclass():
48+
print("misclassified examples!!!")
49+
return np.where(Y_val != pipe.predict(X_val))
50+
4651
class Text2Vec( BaseEstimator, TransformerMixin):
4752
'''
4853
def __init__():
@@ -82,9 +87,11 @@ def transform(self, X):
8287

8388

8489

85-
stacked_embeddings = DocumentPoolEmbeddings([#WordEmbeddings('en'),
90+
stacked_embeddings = DocumentPoolEmbeddings([WordEmbeddings('en'),
8691
#WordEmbeddings('glove'),
87-
WordEmbeddings('en-crawl')])
92+
#WordEmbeddings('en-crawl',
93+
#BytePairEmbeddings('en', 300),
94+
])
8895

8996
with open('card_classification.csv') as csvfile:
9097
reader = csv.reader(csvfile)
@@ -109,10 +116,10 @@ def create_model(optimizer='adam', kernel_initializer='glorot_uniform', epochs =
109116
model = Sequential()
110117
#model.add(InputLayer(input_shape=(153, 1, 300)))
111118
#print(model.output_shape)
112-
model.add(Reshape((1, list_of_embeddings[1].size), input_shape = Emb_train.shape[1:])) ##magical fucking stupid keras BS
119+
model.add(Reshape((1, list_of_embeddings[1].size), input_shape = Emb_train.shape[1:])) ##magical fucking stupid keras BS needed for RNN/CNN
113120
#print(model.output_shape)
114-
#model.add(Conv1D(filters=12, kernel_size=1, activation='relu')) ##works now
115-
model.add(GRU(list_of_embeddings[1].size))
121+
#model.add(Conv1D(filters=20, kernel_size=1, activation='relu')) ##works now
122+
model.add(GRU(list_of_embeddings[1].size)) ##this works too - seems to be better for smaller datasets too!
116123
#print(model.output_shape)
117124
#model.add(Flatten())
118125
#model.add(Dense(list_of_embeddings[1].size, activation='relu',kernel_initializer='he_uniform', use_bias = False))
@@ -125,7 +132,7 @@ def create_model(optimizer='adam', kernel_initializer='glorot_uniform', epochs =
125132

126133
if keras:
127134
checkpointer = ModelCheckpoint(filepath='/tmp/weights.hdf5', verbose=1, save_best_only=True)
128-
model = KerasClassifier(build_fn=create_model, batch_size = 32, epochs = 150, callbacks=[checkpointer], validation_split = 0.2)
135+
model = KerasClassifier(build_fn=create_model, batch_size = 32, epochs = 50, callbacks=[checkpointer], validation_split = 0.2)
129136

130137

131138

@@ -152,11 +159,15 @@ def create_model(optimizer='adam', kernel_initializer='glorot_uniform', epochs =
152159

153160
print(pd.DataFrame(conf, index=labels, columns=labels))
154161

162+
163+
predicts = pipe.predict(X_val)
155164
probs = pipe.predict_proba(X_val)
156165
a_df = pd.DataFrame(probs, index=Y_val, columns=labels)
157166
a_df[a_df.eq(0)] = np.nan
158167
print(a_df.round(2))
159168

169+
print(a_df.iloc[get_misclass()])
170+
160171
if keras:
161172
pipe.named_steps['model'].model.save('keras_model.h5')
162173
pipe.named_steps['model'].model = None

0 commit comments

Comments
 (0)