-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathingest to elasticsearch.py
More file actions
39 lines (35 loc) · 916 Bytes
/
ingest to elasticsearch.py
File metadata and controls
39 lines (35 loc) · 916 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import sys
sys.setrecursionlimit(5000)
import simplejson as json
from elasticsearch import Elasticsearch
import pandas as pd
from elasticsearch.helpers import bulk
es = Elasticsearch()
mappings = {
"settings":{
"index" : {
"number_of_shards" : 5,
"number_of_replicas" : 1
},
"index.mapping.total_fields.limit" : 10000,
}
}
es.indices.create(index="trump", body=mappings, ignore=400)
#data = pd.read_json("tweetfile.json")
tweets = []
for line in open('tweetfile.json', 'r'):
if line.strip() != "":
message = json.loads(line)
id = message["id"]
del(message["id"])
tweet = {
'_id': id,
'_source':message,
'_index' : 'trump'
}
tweets.append(tweet)
if len(tweets) == 1000:
bulk(es,tweets)
tweets.clear()
bulk(es,tweets)
tweets.clear()