Skip to content

Commit

Permalink
crawler: if argument not given on cli, ask for them
Browse files Browse the repository at this point in the history
also, add ability for a searchterm instead of a hashtag

Signed-off-by: Erik Castricum <git@cas-online.nl>
  • Loading branch information
erikcas committed Nov 24, 2021
1 parent f23f9a9 commit b670692
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 10 deletions.
29 changes: 23 additions & 6 deletions hashtag-crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,11 @@ def writetweets(hashtag, tweets):
writer.writerow(["screen_name", "id", "created_at", "trucated text"])
writer.writerows(tweets)

def getweets(hashtag, datumi, tabel):
def getweets(tek, hashtag, datumi, tabel):
hashtweets, sorttweets, datatweets = [], [], []
twitter_datum = datetime.strptime(datum, '%d-%m-%Y').strftime('%Y-%m-%d')

hashtwit =f'#{hashtag}'
hashtwit =f'{tek}{hashtag}'
counter = 0
for tweet in tweepy.Cursor(api.search, q = hashtwit,
since=twitter_datum,count=200).items():
Expand All @@ -74,20 +74,37 @@ def getweets(hashtag, datumi, tabel):

writetweets(hashtag, hashtweets)
tag_data(hashtag, datatweets)
plot_data(hashtag, datatweets, datum)
plot_data(tek, hashtag, datatweets, datum)
leaders(hashtag, sorttweets)
if tabel == 'ja':
print_table(hashtag, sorttweets)
post_twitter(hashtag, counter, tabel, datum)
post_twitter(tek, hashtag, counter, tabel, datum)


try:
hashtek = sys.argv[1]
tek = '#'
datum = sys.argv[2]
if sys.argv[3]:
tabel = sys.argv[3]
else:
tabel = 'nee'
getweets(hashtek, datum, tabel)
getweets(tek, hashtek, datum, tabel)
except IndexError:
print('no hashtag given')
q = input('Geen of onvoldoende criteria ingegeven. Handmatig invoeren (j|n): ')
if q =='n':
sys.exit()
else:
q = input('Zoeken naar hastag (j|n): ')
if q == 'j':
tek = '#'
else:
tek = ''
hashtek = input('Zoekterm (let op: voor een hastag 1 woord zonder "#" invoeren): ')
datum = input('Sinds welke datum (dd-mm-jjjj): ')
q = ('Tabel maken van de top 10 posters (j|n): ')
if q == 'j':
tabel = 'ja'
else:
tabel = 'nee'
getweets(tek, hashtek, datum, tabel)
4 changes: 2 additions & 2 deletions post.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
# calling the api
api = tweepy.API(auth)

def post_twitter(hashtag, counter, tabel, datum):
text = f'#{hashtag} fun facts\n\n \
def post_twitter(tek, hashtag, counter, tabel, datum):
text = f'{tek}{hashtag} fun facts\n\n \
Aantal tweets sinds {datum}: {counter}\n \
Grafische voorstelling in de plaatje(s) hieronder\n\n \
Volg ook @inter_crap voor breaking news!'
Expand Down
4 changes: 2 additions & 2 deletions tag_hourly.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def tag_data(hashtag, datatweets):
stand = counts.items()
pd.DataFrame(stand).to_csv(filename, header=['Tijd', 'Aantal'])

def plot_data(hashtag, datatweets, datum):
def plot_data(tek, hashtag, datatweets, datum):
filename = f'{hashtag}_tweet_graph.csv'
with open(filename) as f:
reader = csv.reader(f)
Expand All @@ -36,7 +36,7 @@ def plot_data(hashtag, datatweets, datum):
ax.plot(tijd, aantal, c='red')

# Format plot
titel = f'{hashtag} tweets per uur sinds {datum}'
titel = f'{tek}{hashtag} tweets per uur sinds {datum}'
plt.title(titel, fontsize=20)
plt.xlabel('', fontsize=16)
fig.autofmt_xdate()
Expand Down

0 comments on commit b670692

Please sign in to comment.