python - Filtering GEO location for Twitter Streaming API- using IPython and Mongo DB -


i new programming, , trying head around code in jupyter notebook stream tweets specific location mongo db database. having difficulty doing this. please tell me if i'm using correct geocode call filter twitter stream?

thank you

the full code using below:

import numpy np import pandas pd import tweepy import time import math import os import sys geopy import geocoders  tweepy import stream tweepy import oauthhandler tweepy.streaming import streamlistener   import matplotlib.pyplot plt  import ipywidgets wgt ipython.display import display sklearn.feature_extraction.text import countvectorizer import re datetime import datetime  %matplotlib inline  api_key = "*****" # <---- add api key api_secret = "****" # <---- add api secret access_token = "****" # <---- add access token access_token_secret = "****" # <---- add access token secret  auth = tweepy.oauthhandler(api_key, api_secret) auth.set_access_token(access_token, access_token_secret)  class listener(streamlistener):  def __init__(self, start_time, time_limit=60):      self.time = start_time      self.limit = time_limit      self.tweet_data = []  def on_data(self, data):  savefile = io.open('raw_tweets.json', 'a', encoding='utf-8')       while (time.time() - self.time) < self.limit:           try:              self.tweet_data.append(data)              return true          except baseexception e:             print ('failed ondata,', str(e))             time.sleep(5)             pass      savefile = io.open('raw_tweets.json', 'w', encoding='utf-8')     savefile.write(u'[\n')     savefile.write(','.join(self.tweet_data))     savefile.write(u'\n]')     savefile.close()     exit()  def on_error(self, status):      print (statuses)   api = tweepy.api(auth) api.reverse_geocode(51.4545 , -2.5879 , 2000 , 'city' , 1)   import pymongo pymongo import mongoclient import json   start_time = time.time() #grabs system time  twitterstream = stream(auth, streamlistener)  mystreamlistener = streamlistener#(max_tweets=1000) mystream = tweepy.stream(auth = api.auth, listener=mystreamlistener)  mystream.filter(track=['api.reverse_geocode'], async=true)  class listener(streamlistener):  counter = 0  def __init__(self, max_tweets=1000, *args, **kwargs):     self.max_tweets = max_tweets     self.counter = 0     super().__init__(*args, **kwargs)  def on_connect(self):     self.counter = 0     self.start_time = datetime.now()  def on_status(self, status):     # increment counter     self.counter += 1     collection.insert_many       if self.counter % 1 == 0:         value = int(100.00 * self.counter / self.max_tweets)         mining_time = datetime.now() - self.start_time         progress_bar.value = value         html_value = """<span class="label label-primary">tweets/sec: %.1f</span>""" % (self.counter / max([1,mining_time.seconds]))         html_value += """ <span class="label label-success">progress: %.1f%%</span>""" % (self.counter / self.max_tweets * 100.0)         html_value += """ <span class="label label-info">eta: %.1f sec</span>""" % ((self.max_tweets - self.counter) / (self.counter / max([1,mining_time.seconds])))         wgt_status.value = html_value          if self.counter >= self.max_tweets:             mystream.disconnect()             print("finished")             print("total mining time: %s" % (mining_time))             print("tweets/sec: %.1f" % (self.max_tweets / mining_time.seconds))             progress_bar.value = 0              try:                 client = pymongo.mongoclient('localhost', 27017)                 db = client['happycitydb']                 collection = db['happycitytweets_collection']                 tweet = json.loads(data)                 collection.insert(tweet)                  return true             except baseexception e:                 print ('failed ondata,', str(e))                 time.sleep(5)                 pass             exit()   keywords = ["happy"]  progress_bar = wgt.intprogress(value=0) display(progress_bar) wgt_status = wgt.html(value="""<span class="label label primary">tweets/sec: 0.0</span>""") display(wgt_status)  error_counter in range(5): try:     mystream.filter(track=keywords)     print("tweets collected: %s" % mystream.listener.counter)     print("total tweets in collection: %s" % col.count())     break except:     print("error# %s" % (error_counter + 1)) 


Comments

Popular posts from this blog

javascript - jQuery: Add class depending on URL in the best way -

caching - How to check if a url path exists in the service worker cache -

Redirect to a HTTPS version using .htaccess -