/
bar_chart.py
executable file
·174 lines (156 loc) · 5.53 KB
/
bar_chart.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly
import plotly.graph_objs as go
from plotly import figure_factory as FF
import pandas as pd
import functools
from common import app
from common import graphconfig
from tools import get_facet_group_options
import bwypy
app.config.supress_callback_exceptions=True
bwypy.set_options(database='Bookworm2016', endpoint='https://bookworm.htrc.illinois.edu/cgi-bin/dbbindings.py')
bw = bwypy.BWQuery(verify_fields=False)
facet_opts = get_facet_group_options(bw)
# This will cache identical calls
@functools.lru_cache(maxsize=32)
def get_results(group):
bw.counttype = ['WordCount', 'TextCount']
bw.groups = ['*'+group]
bw.search_limits = { group + '__id' : {"$lt": 60 } }
return bw.run()
bw_date = bwypy.BWQuery(verify_fields=False)
@functools.lru_cache(maxsize=32)
def get_date_distribution(group, facet):
bw_date.groups = ['date_year']
bw_date.counttype = ['TextCount']
bw_date.search_limits = { group: facet }
results = bw_date.run()
df = results.frame(index=False)
df.date_year = pd.to_numeric(df.date_year)
df2 = df.query('(date_year > 1800) and (date_year < 2016)').sort_values('date_year', ascending=True)
df2['smoothed'] = df2.TextCount.rolling(10, 0).mean()
return df2
header = '''
# Bookworm Bar Chart
Select a field and see the raw counts in the Bookworm database
'''
controls = html.Div([
dcc.Markdown(header),
html.Label("Facet Group"),
dcc.Dropdown(id='group-dropdown', options=facet_opts, value='language'),
html.Label("Number of results to show"),
dcc.Slider(id='trim-slider', min=10, max=60, value=20, step=5,
marks={str(n): str(n) for n in range(10, 61, 10)}),
html.Label("Ignore unknown values:", style={'padding-top': '15px'}),
dcc.RadioItems(
id='drop-radio',
options=[
{'label': u'Yes', 'value': 'drop'},
{'label': u'No', 'value': 'keep'}
],
value='drop'
),
html.Label("Count by:"),
dcc.RadioItems(id='counttype-dropdown', options=[
{'label': u'# of Texts', 'value': 'TextCount'},
{'label': u'# of Words', 'value': 'WordCount'}
], value='TextCount')
],
className='col-md-3')
app.layout = html.Div([
html.Div([
controls,
html.Div([dcc.Graph(id='bar-chart-main-graph', config=graphconfig)], className='col-md-9')
],
className='row'),
html.Div([
html.Div([html.H2("Data"), dcc.Graph(id='bar-data-table')], id='data-table', className='col-md-5'),
html.Div([dcc.Graph(id='date-distribution')], id='graph-wrapper', className='col-md-7')
],
className='row')
], className='container-fluid')
@app.callback(
Output('bar-chart-main-graph', 'figure'),
[Input('group-dropdown', 'value'), Input('trim-slider', 'value'),
Input('drop-radio', 'value'), Input('counttype-dropdown', 'value')]
)
def update_figure(group, trim_at, drop_radio, counttype):
bw.groups = [group]
results = get_results(group)
df = results.frame(index=False, drop_unknowns=(drop_radio=='drop'))
df = df.copy()
df_trimmed = df.head(trim_at)
data = [
go.Bar(
x=df_trimmed[group],
y=df_trimmed[counttype]
)
]
return {
'data': data,
'layout': {
'yTitle': counttype,
'title': group.replace('_', ' ').title()
}
}
@app.callback(
Output('bar-data-table', 'figure'),
[Input('group-dropdown', 'value'), Input('drop-radio', 'value')]
)
def update_table(group, drop_radio):
results = get_results(group)
df = results.frame(index=False, drop_unknowns=(drop_radio=='drop'))
df = df.copy()
return FF.create_table(df)
#return html.Table(
# Header
#[html.Tr([html.Th(col) for col in df.columns])] +
# Body
#[html.Tr([
# html.Td(df.iloc[i][col]) for col in df.columns
# ]) for i in range(min(len(df), 100))]
#)
@app.callback(
Output('date-distribution', 'figure'),
[Input('bar-chart-main-graph', 'hoverData'), Input('group-dropdown', 'value')])
def print_hover_data(clickData, group):
if clickData:
facet_value = clickData['points'][0]['x']
df = get_date_distribution(group, facet_value)
df = df.copy()
data = [
go.Scatter(
x=df['date_year'],
y=df['smoothed']
)
]
return {
'data': data,
'layout': {
'height': 300,
'yaxis': {'range': [0, int(df.smoothed.max())+100]},
'title': 'Date Distribution for ' + facet_value.replace('_', ' ').title()
}
}
else:
data = [
go.Scatter(
x=list(range(1800, 2016)),
y=[0]*(2013-1800)
)
]
return {
'data': data,
'layout': {
'height': 300,
'yaxis': {'range': [0, 100000]},
'title': 'Select a ' + group.replace('_', ' ') + ' to see date distribution' }
}
if __name__ == '__main__':
# app.scripts.config.serve_locally = False
app.config.supress_callback_exceptions = True
app.run_server(debug=True, port=8080, threaded=True, host='0.0.0.0')