python - Filtering GEO location for Twitter Streaming API- using IPython and Mongo DB -
i new programming, , trying head around code in jupyter notebook stream tweets specific location mongo db database. having difficulty doing this. please tell me if i'm using correct geocode call filter twitter stream?
thank you
the full code using below:
import numpy np import pandas pd import tweepy import time import math import os import sys geopy import geocoders tweepy import stream tweepy import oauthhandler tweepy.streaming import streamlistener import matplotlib.pyplot plt import ipywidgets wgt ipython.display import display sklearn.feature_extraction.text import countvectorizer import re datetime import datetime %matplotlib inline api_key = "*****" # <---- add api key api_secret = "****" # <---- add api secret access_token = "****" # <---- add access token access_token_secret = "****" # <---- add access token secret auth = tweepy.oauthhandler(api_key, api_secret) auth.set_access_token(access_token, access_token_secret) class listener(streamlistener): def __init__(self, start_time, time_limit=60): self.time = start_time self.limit = time_limit self.tweet_data = [] def on_data(self, data): savefile = io.open('raw_tweets.json', 'a', encoding='utf-8') while (time.time() - self.time) < self.limit: try: self.tweet_data.append(data) return true except baseexception e: print ('failed ondata,', str(e)) time.sleep(5) pass savefile = io.open('raw_tweets.json', 'w', encoding='utf-8') savefile.write(u'[\n') savefile.write(','.join(self.tweet_data)) savefile.write(u'\n]') savefile.close() exit() def on_error(self, status): print (statuses) api = tweepy.api(auth) api.reverse_geocode(51.4545 , -2.5879 , 2000 , 'city' , 1) import pymongo pymongo import mongoclient import json start_time = time.time() #grabs system time twitterstream = stream(auth, streamlistener) mystreamlistener = streamlistener#(max_tweets=1000) mystream = tweepy.stream(auth = api.auth, listener=mystreamlistener) mystream.filter(track=['api.reverse_geocode'], async=true) class listener(streamlistener): counter = 0 def __init__(self, max_tweets=1000, *args, **kwargs): self.max_tweets = max_tweets self.counter = 0 super().__init__(*args, **kwargs) def on_connect(self): self.counter = 0 self.start_time = datetime.now() def on_status(self, status): # increment counter self.counter += 1 collection.insert_many if self.counter % 1 == 0: value = int(100.00 * self.counter / self.max_tweets) mining_time = datetime.now() - self.start_time progress_bar.value = value html_value = """<span class="label label-primary">tweets/sec: %.1f</span>""" % (self.counter / max([1,mining_time.seconds])) html_value += """ <span class="label label-success">progress: %.1f%%</span>""" % (self.counter / self.max_tweets * 100.0) html_value += """ <span class="label label-info">eta: %.1f sec</span>""" % ((self.max_tweets - self.counter) / (self.counter / max([1,mining_time.seconds]))) wgt_status.value = html_value if self.counter >= self.max_tweets: mystream.disconnect() print("finished") print("total mining time: %s" % (mining_time)) print("tweets/sec: %.1f" % (self.max_tweets / mining_time.seconds)) progress_bar.value = 0 try: client = pymongo.mongoclient('localhost', 27017) db = client['happycitydb'] collection = db['happycitytweets_collection'] tweet = json.loads(data) collection.insert(tweet) return true except baseexception e: print ('failed ondata,', str(e)) time.sleep(5) pass exit() keywords = ["happy"] progress_bar = wgt.intprogress(value=0) display(progress_bar) wgt_status = wgt.html(value="""<span class="label label primary">tweets/sec: 0.0</span>""") display(wgt_status) error_counter in range(5): try: mystream.filter(track=keywords) print("tweets collected: %s" % mystream.listener.counter) print("total tweets in collection: %s" % col.count()) break except: print("error# %s" % (error_counter + 1))
Comments
Post a Comment