/
cvserver.py
99 lines (82 loc) · 3.36 KB
/
cvserver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# coding: utf-8
from __future__ import print_function
from __future__ import unicode_literals
import requests
import bs4
import re
DEBUG = False
def response_for_image(image_url, client_name):
base_url = 'http://deeplearning.cs.toronto.edu/api/url.php'
files = {
'urllink': ('', image_url),
'url-2txt': ('', '')
}
headers = {
'connection': 'keep-alive',
'X-Requested-With': 'XMLHttpRequest',
'User-agent': "@interesting_jpg %s v. 1.0" % client_name
}
try:
r = requests.post(base_url, files=files, headers=headers, timeout=5*60)
except requests.exceptions.ReadTimeout as err:
print("read time out")
return
text = r.text.strip()
if DEBUG:
print(r)
if not len(text):
print('no text in response. status: %d %s' % (r.status_code, r.reason))
return None
return text
def nearest_neighbour(raw_text):
if raw_text:
soup = bs4.BeautifulSoup(raw_text, 'html.parser')
try:
return soup.li.get_text()
except AttributeError as err:
print(err)
print(soup.prettify())
return None
def captions(raw_text):
soup = bs4.BeautifulSoup(raw_text)
header = soup.find('h4', text=re.compile(r'Top'))
if not header:
print('error parsing text')
print(soup.prettify())
return
if DEBUG:
print(header.find_next_sibling().prettify())
next_sib = header.find_next_sibling()
if next_sib:
captions = next_sib.find_all('li')
if captions:
return [c.text for c in captions]
print("no captions found?")
print(soup.prettify())
def top_caption(raw_text):
return old_caption(raw_text) or new_caption(raw_text)
def old_caption(raw_text):
all_captions = captions(raw_text)
if DEBUG:
print(all_captions)
if all_captions:
return all_captions[0]
def new_caption(raw_text):
"""
Tue Jun 30 17:14:11 2015
a single caption is being returned, in the tags field. This extracts that caption.
"""
soup = bs4.BeautifulSoup(raw_text)
tag = soup.find('h4', text=re.compile(r'TAGS'))
if not tag:
return
return tag.next_sibling.text.strip()
def main():
sample_response = """<img id="result-img" src="../tmpfiles/20150107-10:35:13.jpg" height="300"/><h4>TAGS:</h4><h4> cycler peddler salesman rucksack pedicab </h4><br/><h4>Nearest Neighbor Sentence:</h4><ul><li>a woman outside with an umbrella riding a motor cart .</li></ul><br/><h4>Top-5 Generated:</h4><ul><li>two men are wearing a hat , riding on a bicycle with a backpack .</li><li>a man in a cart filled with bikes .</li><li>a man wearing a hat while trying to ride a bicycle on a bike .</li><li>a man riding a bicycle with a cart attached .</li><li>a man wearing a hat on a bicycle and carrying a cart .
</li></ul>"""
new_response = '''<img id="result-img" src="../tmpfiles/20150630-17:10:37.jpg" height="300"/><h4>TAGS:</h4><h4> A group of people sitting in front of a television .\n </h4><br/><h4>Nearest Neighbor Sentence:</h4><ul></ul><br/><h4>Top-5 Generated:</h4><ul></ul>'''
return print(top_caption(new_response))
print("\n".join(captions(sample_response)))
print(top_caption(sample_response))
if __name__ == "__main__":
main()