-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy path_init_user.py
More file actions
180 lines (144 loc) · 4.94 KB
/
_init_user.py
File metadata and controls
180 lines (144 loc) · 4.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import os
import csv
import time
import logging
import datetime
import re
from datetime import tzinfo
import pytz
from pytz import timezone
from dateutil import parser
from twitter import *
logger = logging.getLogger("root")
logging.basicConfig(
format = "\033[1;36m%(levelname)s: %(filename)s (def %(funcName)s %(lineno)s): \033[1;37m %(message)s",
level=logging.DEBUG
)
TWITTER_CONSUMER_KEY = os.environ.get("TWITTER_CONSUMER_KEY")
TWITTER_CONSUMER_SECRET = os.environ.get("TWITTER_CONSUMER_SECRET")
TWITTER_ACCESS_TOKEN = os.environ.get("TWITTER_ACCESS_TOKEN")
TWITTER_ACCESS_TOKEN_SECRET = os.environ.get("TWITTER_ACCESS_TOKEN_SECRET")
LOCAL_TIMEZONE = pytz.timezone("US/Pacific")
TWITTER_TIMEZONE = timezone("UTC")
class TwitterUserSearch(object):
# username to search
user = ""
# column names for our csv
# this will change if you pull in more data
csv_headers = [
"tweet_utc_date",
"user_name",
"user_screen_name",
"tweet_text",
"tweet_url",
"tweet_id",
"user_profile_image_url",
"user_location",
"source",
"in_reply_to_screen_name",
"in_reply_to_status_id",
"image_link",
"retweet_count",
"favorite_count",
"time_zone",
"geo_enabled",
"geography",
"coordinates",
"lang",
]
# what we'll name our csv file
csv_filename = "_%s_tweets.csv" % (user)
def _init(self, *args, **kwargs):
"""
start the whole twitter hashtag search a rollin
"""
# open a file
with open(self.csv_filename, "wb") as csv_file:
# that will become our csv
csv_output = csv.writer(csv_file, delimiter=',', quoting=csv.QUOTE_ALL)
# write the header row to the csv file
csv_output.writerow(self.csv_headers)
# return our tweets
tweet_results = self.construct_twitter_search(self.user)
# for each status
for tweet in tweet_results:
# get the UTC time for each
tweet_date = parser.parse(tweet["created_at"])
# set some timezone information
tweet_date = tweet_date.replace(tzinfo=TWITTER_TIMEZONE)
# build a new csv row
csv_row = self.build_csv_row_from(tweet, tweet_date)
# write the new csv row
csv_output.writerow(csv_row)
def construct_twitter_search(self, user):
"""
function to auth with twitter and return tweets
"""
# build the authorization for the twitter api
twitter_object = Twitter(
auth=OAuth(
TWITTER_ACCESS_TOKEN,
TWITTER_ACCESS_TOKEN_SECRET,
TWITTER_CONSUMER_KEY,
TWITTER_CONSUMER_SECRET
)
)
# retrieve the tweets
tweet_results = twitter_object.statuses.user_timeline(
screen_name=user,
count=200
)
# return them
return tweet_results
def build_csv_row_from(self, tweet, tweet_date):
"""
create a csv row from tweet results
"""
# construct url format
tweet_url = "https://twitter.com/" + tweet["user"]["screen_name"].encode('ascii', 'ignore') + "/status/" + str(tweet["id"])
# output some information
print "%s - %s - %s" % (
tweet_date,
tweet["user"]["screen_name"],
tweet_url,
)
# see if an image is present in the dictionary
has_image = tweet.has_key("media")
# if there are images
if has_image == True:
# grab it
tweet_image = tweet["media"]["media_url_https"]
# otherwise
else:
# call it none
tweet_image = None
# build a row of tweet data
csv_row_data = [
tweet_date,
tweet["user"]["name"].encode('ascii', 'ignore'),
tweet["user"]["screen_name"].encode('ascii', 'ignore'),
tweet["text"].encode('ascii', 'ignore'),
tweet_url.encode('ascii', 'ignore'),
tweet["id"],
tweet["user"]["profile_image_url"].encode('ascii', 'ignore'),
tweet["user"]["location"].encode('ascii', 'ignore'),
tweet["source"].encode('ascii', 'ignore'),
tweet["in_reply_to_screen_name"],
tweet["in_reply_to_status_id_str"],
tweet_image,
tweet["retweet_count"],
tweet["favorite_count"],
tweet["user"]["time_zone"],
tweet["user"]["geo_enabled"],
tweet["geo"],
tweet["coordinates"],
tweet["lang"],
]
# print the row
print csv_row_data
# return the row
return csv_row_data
if __name__ == '__main__':
task_run = TwitterUserSearch()
task_run._init()
print "\nTask finished at %s\n" % str(datetime.datetime.now())