/
app.py
107 lines (86 loc) · 5 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import streamlit as st
import analytics
from analytics import MetsTwitter
import charts
mt = MetsTwitter()
st.set_page_config(
page_title="#MetsTwitter Feels"
)
st.title("#MetsTwitter Mood")
st.markdown("How's #MetsTwitter feeling today? Here's a real-ish time dashboard keeping a pulse on all the highs and "
"lows that define #MetsTwitter!")
lookback_choice = st.selectbox(
label="Pick a Timeframe.",
options=[
"Last 12 Hours",
"Last 24 Hours",
"Last 7 Days",
"Season"
],
index=1
)
sentiment_today = mt.sentiment_window("now-12h")
st.markdown(f"### Current Mood: {sentiment_today['score']}")
sentiment_trend = mt.sentiment_history(lookback_choice)
st.altair_chart(
altair_chart=charts.time_series_chart(sentiment_trend, lookback_choice),
use_container_width=True
)
st.altair_chart(
altair_chart=charts.bar_chart(sentiment_trend, lookback_choice),
use_container_width=True
)
# Show table of player sentiment.
st.subheader("Player Sentiment Rankings")
st.markdown("Player ranking based on the ratio of positive to negative tweets on Mets Twitter.")
player_sentiment = mt.player_sentiment(lookback_choice)
top_3 = player_sentiment.sort_values("Overall Sentiment", ascending=False)[:3]
st.markdown(f"### 🥇 {top_3.iloc[0].name}")
st.markdown(f"### 🥈 {top_3.iloc[1].name}")
st.markdown(f"### 🥉 {top_3.iloc[2].name}")
with st.expander("Full Player Sentiment Rankings..."):
player_sentiment["Rank"] = player_sentiment["Overall Sentiment"].rank(
method="min",
ascending=False
).astype(int)
st.table(player_sentiment.sort_values("Overall Sentiment", ascending=False)[["Rank"]])
# Display player sentiment history.
st.subheader("Player Trends")
player = st.selectbox(
label="Select a Player",
options=sorted(analytics.roster)
)
player_data = mt.player_history(player=player)
st.altair_chart(
altair_chart=charts.pos_neg_bar_chart(player_data),
use_container_width=True
)
# A note about the data.
st.subheader("Let's Get Nerdy 🤓")
with st.expander("View Technical Notes..."):
st.markdown("Thank you for checking out my dashboard! The purpose is to track the mood of Mets Twitter. The data is "
"extracted from the Twitter API using the filtered stream. The stream is filtered for tweets where the "
"Mets are mentioned or the #LGM hashtag is used. The intention is to get a pulse on what's happening "
"in the Mets Twitter universe. Tweets from other fanbases talking about the Mets are still being read. "
"I'm working on identifying such tweets to get a more accurate sentiment of the New York Mets fan base. "
"Additionally, the tweets represented here are just a sample of the total Twitter activity.")
st.markdown("One problem I faced was keeping up the with stream of tweets, especially given the variability in tweet "
"volume (large spikes during games). Originally I naively tried to run the NLP models against each tweet "
"as they were received. Over time the processing was creating a backlog of tweets coming from the stream. "
"To keep up with the tweets being received I separated reading the stream and processing the tweets into "
"separate processes. When the stream is read each tweet is written to an Elastic index. A process was "
"scheduled every minute to retrieve new documents from the database. The NLP models process the new "
"documents in a batch and their entries updated. The entire process results in less than 2 minutes of "
"latency between when the tweet is received and when it's represented in the dashboard. And it all runs on "
"a small EC2 machine!")
st.markdown("To measure sentiment the Huggingface [bertweet-base-sentiment-analysis](https://huggingface.co/finiteautomata/bertweet-base-sentiment-analysis) "
"model is used. Reviewing tweets coming from the stream the model works reasonably well out-of-the-box."
"Sports-related tweets can be dripping in sarcasm that the base model might not pickup. For example the "
"tweet 'Can’t wait to see Dom go 0-4 2Ks and 2 Ground ball outs one GIDP and a groundout to first' "
"is classified as **POSITIVE**. The tweet drips with sarcasm and is actually a negative tweet. Having a "
"model adjust for sarcasm can help generate more accurate sentiment scores.")
st.markdown("A custom NER model was trained using [spaCy](https://spacy.io/). The model was trained on a sample of "
"tweets to extract player names. Right now the model has an F-Score of 0.81. The model is updated daily with "
"tweets from the previous day.")
st.markdown("Have any ideas for what you'd like to see? Let me know on [Twitter](https://twitter.com/MatthewVielkind)! "
"I am working on a few more features to incorporate, but always open to new ideas!")