Skip to content

Commit f3fd80a

Browse files
author
Ali Hamdi
committed
Second week
Natural Language Processing (NLP) Text Classification Sentiment Analysis Feature Extraction Machine Learning Evaluation
1 parent 3f7386c commit f3fd80a

12 files changed

+61389
-0
lines changed

20 Python CSV.ipynb

Lines changed: 1490 additions & 0 deletions
Large diffs are not rendered by default.

21 Python BeautifulSoup4.ipynb

Lines changed: 48472 additions & 0 deletions
Large diffs are not rendered by default.

22 Python NLTK.ipynb

Lines changed: 6496 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 349 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,349 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {
7+
"collapsed": true
8+
},
9+
"outputs": [],
10+
"source": [
11+
"from nltk.metrics import *"
12+
]
13+
},
14+
{
15+
"cell_type": "code",
16+
"execution_count": 4,
17+
"metadata": {
18+
"collapsed": false
19+
},
20+
"outputs": [
21+
{
22+
"data": {
23+
"text/plain": [
24+
"['DET', 'NN', 'VB', 'DET', 'JJ', 'NN', 'NN', 'IN', 'DET', 'NN']"
25+
]
26+
},
27+
"execution_count": 4,
28+
"metadata": {},
29+
"output_type": "execute_result"
30+
}
31+
],
32+
"source": [
33+
"reference = 'DET NN VB DET JJ NN NN IN DET NN'.split()\n",
34+
"reference"
35+
]
36+
},
37+
{
38+
"cell_type": "code",
39+
"execution_count": 5,
40+
"metadata": {
41+
"collapsed": false
42+
},
43+
"outputs": [
44+
{
45+
"data": {
46+
"text/plain": [
47+
"['DET', 'VB', 'VB', 'DET', 'NN', 'NN', 'NN', 'IN', 'DET', 'NN']"
48+
]
49+
},
50+
"execution_count": 5,
51+
"metadata": {},
52+
"output_type": "execute_result"
53+
}
54+
],
55+
"source": [
56+
"test = 'DET VB VB DET NN NN NN IN DET NN'.split()\n",
57+
"test"
58+
]
59+
},
60+
{
61+
"cell_type": "code",
62+
"execution_count": 6,
63+
"metadata": {
64+
"collapsed": false
65+
},
66+
"outputs": [
67+
{
68+
"data": {
69+
"text/plain": [
70+
"0.8"
71+
]
72+
},
73+
"execution_count": 6,
74+
"metadata": {},
75+
"output_type": "execute_result"
76+
}
77+
],
78+
"source": [
79+
"accuracy(reference, test)"
80+
]
81+
},
82+
{
83+
"cell_type": "code",
84+
"execution_count": 8,
85+
"metadata": {
86+
"collapsed": false
87+
},
88+
"outputs": [
89+
{
90+
"data": {
91+
"text/plain": [
92+
"1.0"
93+
]
94+
},
95+
"execution_count": 8,
96+
"metadata": {},
97+
"output_type": "execute_result"
98+
}
99+
],
100+
"source": [
101+
"precision(set(reference), set(test))"
102+
]
103+
},
104+
{
105+
"cell_type": "code",
106+
"execution_count": 9,
107+
"metadata": {
108+
"collapsed": false
109+
},
110+
"outputs": [
111+
{
112+
"data": {
113+
"text/plain": [
114+
"0.8"
115+
]
116+
},
117+
"execution_count": 9,
118+
"metadata": {},
119+
"output_type": "execute_result"
120+
}
121+
],
122+
"source": [
123+
"recall(set(reference), set(test))"
124+
]
125+
},
126+
{
127+
"cell_type": "code",
128+
"execution_count": 10,
129+
"metadata": {
130+
"collapsed": false
131+
},
132+
"outputs": [
133+
{
134+
"data": {
135+
"text/plain": [
136+
"0.8888888888888888"
137+
]
138+
},
139+
"execution_count": 10,
140+
"metadata": {},
141+
"output_type": "execute_result"
142+
}
143+
],
144+
"source": [
145+
"f_measure(set(reference), set(test))"
146+
]
147+
},
148+
{
149+
"cell_type": "code",
150+
"execution_count": 11,
151+
"metadata": {
152+
"collapsed": false
153+
},
154+
"outputs": [
155+
{
156+
"data": {
157+
"text/plain": [
158+
"3"
159+
]
160+
},
161+
"execution_count": 11,
162+
"metadata": {},
163+
"output_type": "execute_result"
164+
}
165+
],
166+
"source": [
167+
"edit_distance(\"rain\",\"shine\")"
168+
]
169+
},
170+
{
171+
"cell_type": "code",
172+
"execution_count": 14,
173+
"metadata": {
174+
"collapsed": false
175+
},
176+
"outputs": [
177+
{
178+
"data": {
179+
"text/plain": [
180+
"1"
181+
]
182+
},
183+
"execution_count": 14,
184+
"metadata": {},
185+
"output_type": "execute_result"
186+
}
187+
],
188+
"source": [
189+
"edit_distance(\"rain\",\"sain\")"
190+
]
191+
},
192+
{
193+
"cell_type": "code",
194+
"execution_count": 15,
195+
"metadata": {
196+
"collapsed": false
197+
},
198+
"outputs": [
199+
{
200+
"data": {
201+
"text/plain": [
202+
"2"
203+
]
204+
},
205+
"execution_count": 15,
206+
"metadata": {},
207+
"output_type": "execute_result"
208+
}
209+
],
210+
"source": [
211+
"edit_distance(\"raine\",\"shine\")"
212+
]
213+
},
214+
{
215+
"cell_type": "code",
216+
"execution_count": 16,
217+
"metadata": {
218+
"collapsed": false
219+
},
220+
"outputs": [
221+
{
222+
"data": {
223+
"text/plain": [
224+
"1.0"
225+
]
226+
},
227+
"execution_count": 16,
228+
"metadata": {},
229+
"output_type": "execute_result"
230+
}
231+
],
232+
"source": [
233+
"s1 = set([1,2,3,4])\n",
234+
"s2 = set([3,4,5])\n",
235+
"binary_distance(s1,s2)"
236+
]
237+
},
238+
{
239+
"cell_type": "code",
240+
"execution_count": 17,
241+
"metadata": {
242+
"collapsed": false
243+
},
244+
"outputs": [
245+
{
246+
"data": {
247+
"text/plain": [
248+
"0.0"
249+
]
250+
},
251+
"execution_count": 17,
252+
"metadata": {},
253+
"output_type": "execute_result"
254+
}
255+
],
256+
"source": [
257+
"s1 = set([1,2,3,4])\n",
258+
"s2 = set([1,2,3,4])\n",
259+
"binary_distance(s1,s2)"
260+
]
261+
},
262+
{
263+
"cell_type": "code",
264+
"execution_count": 24,
265+
"metadata": {
266+
"collapsed": false
267+
},
268+
"outputs": [
269+
{
270+
"name": "stdout",
271+
"output_type": "stream",
272+
"text": [
273+
" | n p |\n",
274+
" | e o |\n",
275+
" | g s |\n",
276+
"----+-----+\n",
277+
"neg |<2>1 |\n",
278+
"pos | 1<2>|\n",
279+
"----+-----+\n",
280+
"(row = reference; col = test)\n",
281+
"\n"
282+
]
283+
}
284+
],
285+
"source": [
286+
"reference, test = ['pos', 'neg', 'pos', 'neg', 'pos', 'neg'], ['pos', 'neg','pos', 'pos', 'neg','neg']\n",
287+
"print (ConfusionMatrix(reference, test))"
288+
]
289+
},
290+
{
291+
"cell_type": "code",
292+
"execution_count": null,
293+
"metadata": {
294+
"collapsed": true
295+
},
296+
"outputs": [],
297+
"source": []
298+
},
299+
{
300+
"cell_type": "code",
301+
"execution_count": null,
302+
"metadata": {
303+
"collapsed": true
304+
},
305+
"outputs": [],
306+
"source": []
307+
},
308+
{
309+
"cell_type": "code",
310+
"execution_count": null,
311+
"metadata": {
312+
"collapsed": true
313+
},
314+
"outputs": [],
315+
"source": []
316+
},
317+
{
318+
"cell_type": "code",
319+
"execution_count": null,
320+
"metadata": {
321+
"collapsed": true
322+
},
323+
"outputs": [],
324+
"source": []
325+
}
326+
],
327+
"metadata": {
328+
"anaconda-cloud": {},
329+
"kernelspec": {
330+
"display_name": "Python [Root]",
331+
"language": "python",
332+
"name": "Python [Root]"
333+
},
334+
"language_info": {
335+
"codemirror_mode": {
336+
"name": "ipython",
337+
"version": 3
338+
},
339+
"file_extension": ".py",
340+
"mimetype": "text/x-python",
341+
"name": "python",
342+
"nbconvert_exporter": "python",
343+
"pygments_lexer": "ipython3",
344+
"version": "3.5.2"
345+
}
346+
},
347+
"nbformat": 4,
348+
"nbformat_minor": 0
349+
}

0 commit comments

Comments
 (0)