/
scraping.py
101 lines (83 loc) · 3.43 KB
/
scraping.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import requests
from bs4 import BeautifulSoup
import psycopg2
import settings
from discord import Embed, Colour
import processing
class HandSpeak:
def search(self, input, current_page=1):
url = "https://www.handspeak.com/word/search/app/app-dictionary.php"
payload = f'page={current_page}&query={input}'
headers = {
'Accept': '*/*',
'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
}
response = requests.request("POST", url, headers=headers, data = payload)
soup = BeautifulSoup(response.text, 'html.parser')
try:
results = soup.find('ul',class_='col-abc').find_all('a')
except:
return {
'queryResults':"No results found.",
'numPages':0
}
num_pages = int(soup.find('ul',class_='pagination').find_all('li')[-2].get_text())
results_string = ""
for result in results:
result_text = result.get_text()
result_url = result.get('href')
results_string += f"• [{result_text}](https://www.handspeak.com{result_url})\n"
return {
'queryResults':results_string,
'numPages':num_pages
}
def wordOfTheDay(self):
url = "https://www.handspeak.com"
response = requests.request(method="GET",url=url)
soup = BeautifulSoup(response.text, 'html.parser')
wotd_section = soup.find_all('section')[1]
relative_video_url = wotd_section.find('video').get('src')
english_equivalent = wotd_section.find('span', class_='asl').get_text().upper()
video_url = url + relative_video_url
return f"**The Word of the Day is:** `{english_equivalent}`\n{video_url}"
def makeSearchEmbed(self,description, search_input):
query_formatted = '+'.join(search_input.split())
embed = Embed(
title = f"Search results: {search_input}",
description=description,
colour= Colour.blue()
)
embed.set_footer(text="HandSpeak.com",icon_url="https://i.imgur.com/TBq0Afu.png")
embed.add_field(name='Additional info',value=f'[See Google search results ⯈](https://www.google.com/search?hl=en&q=site%3Ahandspeak.com+{query_formatted})')
return embed
class LifePrint:
hostname = settings.DB_SETTINGS['host']
username = settings.DB_SETTINGS['user']
password = settings.DB_SETTINGS['password']
database = settings.DB_SETTINGS['db']
def sqlQuery(self, query):
conn = psycopg2.connect( host=self.hostname, user=self.username, password=self.password, dbname=self.database )
cur = conn.cursor()
cur.execute(query)
rows = cur.fetchall()
conn.close()
return rows
def randomVid(self):
query = """
SELECT video_url FROM lifeprint_vids
ORDER BY RANDOM()
LIMIT 1
"""
rows = self.sqlQuery(query)
selected_vid = rows[0][0]
return selected_vid
def search(self, search_input):
query = f'''
SELECT * from lifeprint_vids
WHERE phrase
LIKE '%{search_input}%'
'''
search_results = self.sqlQuery(query)
return search_results