/
pubs.py
241 lines (182 loc) · 10.7 KB
/
pubs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
# -*- coding: utf-8 -*-
# an ugly hack to convert some stuff into other stuff...
import sys
from datetime import datetime
try:
import anvio.utils as u
from anvio.errors import ConfigError
except:
sys.stderr.write("This program requires anvi'o to be installed :/\n")
sys.exit(-1)
class Publications:
def __init__(self, pubs_data_file_path='_data/pubs.yaml'):
"""Takes a YAML file, generates pubs output"""
self.info = {}
# to be populated from the YAML input:
self.yaml_data = {}
# the final data structure to be populated from
# the parsin of the YAML input:
self.pubs_data = {}
self.journals_list = []
self.authors_list = []
self.recent_authors_list = []
self.author_links = {}
self.pubs_data_file_path = pubs_data_file_path
def get_author_highlights(self, pub, year):
authors_str = []
MEMBER_CLASS = """class="pub-member-author" title='An official member of the lab at the time of publication'"""
COLLAB_CLASS = """class="pub-collaborator-author" title='A key collaborator of the lab at the time of publication'"""
for author in pub['authors']:
if author in pub['co_first_authors']:
author_h = author + '<sup>☯</sup>'
elif author in pub['co_senior_authors']:
author_h = author + '<sup>‡</sup>'
else:
author_h = author
if author in self.yaml_data['people']:
# if we are here, `author` is an author name to highlight. but is it to be highlighted
# for a given range?
if 'tenure' in self.yaml_data['people'][author]:
tenure = [int(i) for i in self.yaml_data['people'][author]['tenure'].split('-')]
tenure_range = range(tenure[0], tenure[1])
# the author was an official member in the year of publication?
if int(year) in tenure_range:
authors_str.append(f'<span {MEMBER_CLASS}>%s</span>' % (author_h))
# if the author was not an offical member in the year of publication
# but still was a collaborator?
elif 'collaborator' in self.yaml_data['people'][author]:
authors_str.append(f'<span {COLLAB_CLASS}>%s</span>' % (author_h))
# neither?
else:
authors_str.append(author_h)
else:
if 'collaborator' in self.yaml_data['people'][author]:
authors_str.append(f'<span {COLLAB_CLASS}>%s</span>' % (author_h))
else:
authors_str.append(f'<span {MEMBER_CLASS}>%s</span>' % (author_h))
else:
authors_str.append(author_h)
return ', '.join(authors_str)
def get_abbreviated_name_from_full_name(self, author_name):
"""Takes full name (First Middle Last), return abbreviated name (Last FM)"""
names = author_name.replace('.', '').split()
abbreviated_name = f"{names[-1]} {''.join([n[0] for n in names[:-1]])}"
if '*' in abbreviated_name:
abbreviated_name = abbreviated_name.replace('*', '') + '*'
if '+' in abbreviated_name:
abbreviated_name = abbreviated_name.replace('+', '') + '+'
return abbreviated_name
def parse_pubs_txt(self):
self.yaml_data = u.get_yaml_as_dict(self.pubs_data_file_path)
# {'doi': '10.1101/2020.11.01.361691',
# 'title': 'The genetic and ecological landscape of plasmids in the human gut',
# 'authors': 'Michael K Yu*, Emily C Fogarty*, A. Murat Eren',
# 'journal': 'bioRxiv',
# 'year': 2022,
# 'volume': None,
# 'number': None,
# 'pages': None,
# 'additional_info': {
# 'highlights': ['A study that aims to shed light on <b>the ecology and evolution of one of the most critical yet poorly studied aspects of microbial life -- naturally occurring plasmids</b>.',
# 'Uses state-of-the-art machine learning strategies to identify <b>over 60,000 plasmids</b> from human gut metagenomes, which represents a <b>200-fold increase</b> in the number of known plasmids to date that were detectable in healthy humans.',
# "Defines hundreds of '<b>plasmid systems</b>', and demonstrates that naturally occurring plasmids are not static entities, but <b>their evolution is driven by the need to respond to the environment, and their ecology cannot be simply explained by bacterial taxonomy and distribution patterns of their putative hosts</b>."],
# 'featured_image': '/images/pubs/plasmid_systems.png'
# }
# }
for pub in self.yaml_data['publications']:
pub['co_first_authors'] = []
pub['co_senior_authors'] = []
# turn author names from "FIRST M LAST" form to "LAST FM" form.
pub['authors'] = [self.get_abbreviated_name_from_full_name(a.strip()) for a in pub['authors'].split(',')]
for author_name in pub['authors']:
if author_name.endswith('*'):
pub['co_first_authors'].append(author_name[:-1])
elif author_name.endswith('+'):
pub['co_senior_authors'].append(author_name[:-1])
pub['authors'] = [a[:-1] if (a.endswith('*') or a.endswith('+')) else a for a in pub['authors']]
if pub['volume'] and pub['number'] and pub['pages']:
pub['issue'] = f"{pub['volume']}({pub['number']}):{pub['pages']}"
elif pub['volume'] and pub['number']:
pub['issue'] = f"{pub['volume']}({pub['number']})"
elif pub['volume'] and pub['pages']:
pub['issue'] = f"{pub['volume']}:{pub['pages']}"
elif pub['volume']:
pub['issue'] = f"{pub['volume']}"
else:
pub['issue'] = None
year = pub['year']
if year not in self.pubs_data:
self.pubs_data[year] = [pub]
else:
self.pubs_data[year].append(pub)
def get_markdown_text_for_pub(self, pub):
pub_md = []
A = lambda s: pub_md.append(s)
if 'read_link' in pub:
read_link = pub['read_link']
else:
read_link = f"https://doi.org/{pub['doi']}"
A(f'''<a id="{pub['doi']}"> </a>''')
A('<div class="pub">')
A(f'''<div class='altmetric-embed' data-badge-type='donut' data-doi="{pub['doi']}"></div>''')
A(f'''<div class="__dimensions_badge_embed__" data-doi="{pub['doi']}" data-hide-zero-citations="true" data-legend="hover-bottom" data-style="small_circle"></div>''')
A(f''' <span class="pub-title"><a href="{read_link}" target="_new">{pub['title']}</a></span>''')
A(f''' <span class="pub-authors">{self.get_author_highlights(pub, pub['year'])}</span>''')
# take care of co-first / co-senior authors
if pub['co_first_authors'] and not pub['co_senior_authors']:
A(' <span class="pub-co-first-authors"><sup>☯</sup>Co-first authors</span>')
elif pub['co_first_authors'] and pub['co_senior_authors']:
A(' <span class="pub-co-first-authors"><sup>☯</sup>Co-first authors; <sup>‡</sup>Co-senior authors</span>')
elif pub['co_senior_authors'] and not pub['co_first_authors']:
A(' <span class="pub-co-first-authors"><sup>‡</sup>Co-senior authors</span>')
# add the publication highlights:
if 'additional_info' in pub and pub['additional_info']['highlights']:
I = pub['additional_info']
A(' <div class="%s">' % ('pub-info' if pub['additional_info']['featured_image'] else 'pub-info-no-image'))
if I['featured_image']:
A(' <div class="pub-featured-image">')
A(' <a href="%s"><img src="%s" style="max-width: 100px; max-height: 80px; width: auto; border: none; height: auto; margin: 0 auto; display: block; transform: translateY(15%%);"/></a>' % (I['featured_image'], I['featured_image']))
A(' </div>')
if I['highlights']:
A(' <div class="%s">' % ('pub-highlights' if I['featured_image'] else 'pub-highlights-no-image'))
A(' %s' % '<br>'.join(['<span style="display: inline-block; padding-bottom: 5px;">- %s</span>' % h for h in I['highlights']]))
A(' </div>')
A(' </div>')
scholar_link = f'''http://scholar.google.com/scholar?hl=en&q={pub['title'].replace(' ', '+')}'''
additional_links = f'''| 🔍 <a href="{scholar_link}" target="_blank">Google Scholar</a> | 🔗 <a href="https://doi.org/{pub['doi']}" target="_blank">doi:{pub['doi']}</a>'''
if pub['issue']:
A(f''' <span class="pub-journal"> 📚 <b>{pub['journal']}</b>, {pub['issue']} {additional_links}</span>''')
else:
A(f''' <span class="pub-journal"> 📚 <b>{pub['journal']}</b> {additional_links}</span>''')
A('</div>\n')
return '\n'.join(pub_md)
def store_markdown_output_for_pubs(self, output_file_path):
#years = ''.join(['<a href="#%s"><span class="category-item">%s</span></a>' % (y, y) for y in sorted(list(self.pubs_data.keys()), reverse=True)])
output_file = open(output_file_path, 'w')
W = lambda s: output_file.write(s + '\n')
W('---')
W('layout: publications')
W('modified: %s' % datetime.today().strftime('%Y-%m-%d'))
W('comments: false')
W('image:')
W(' display: true')
W(' feature: eel-pond.jpg')
W('---\n')
W('''<script type='text/javascript' src='https://d1bxh8uas1mnw7.cloudfront.net/assets/embed.js'></script>\n''')
W('''<script async src="https://badge.dimensions.ai/badge.js" charset="utf-8"></script>\n''')
#W('<div class="category-box">\n%s\n</div>\n' % years)
W('{:.notice}\n')
W("This page lists publications that are most reflective of our interests. For a complete list, please see <a href='https://scholar.google.com/citations?user=GtLLuxoAAAAJ&view_op=list_works&sortby=pubdate' target='_blank'>Meren's Google Scholar page</a>.\n")
for year in sorted(list(self.pubs_data.keys()), reverse=True):
W('## %s\n' % (year))
for pub in self.pubs_data[year]:
W(self.get_markdown_text_for_pub(pub))
W('')
if __name__ == '__main__':
pubs = Publications()
try:
pubs.parse_pubs_txt()
pubs.store_markdown_output_for_pubs('publications/index.md')
except ConfigError as e:
print(e)
sys.exit(-1)