/
denver.py
35 lines (30 loc) · 1.29 KB
/
denver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from urllib2 import urlopen
from bs4 import BeautifulSoup
from pattern.en import tag
import string
import random
def random_denver_song(link):
html = urlopen(link).read()
soup = BeautifulSoup(html)
albums = soup.find('div',{'id':'listAlbum'}).findAll('a')
links = [s['href'] for s in albums if s.has_attr('href')]
links = [s for s in links if s[14]!= 'z']
song_choice = random.choice(links)
return denver_lyrics(song_choice)
def denver_lyrics(link):
link = link.replace('..', 'http://www.azlyrics.com/')
html = urlopen(link).read()
soup = BeautifulSoup(html)
lyrics = str(''.join(''.join([s.text for s in soup.findAll('div')]).split('lyrics')[1:]).split('\n\n\n\n\r\nif')[0].strip()).replace('LYRICS', "").replace('JOHN DENVER', '')
lyrics = [x for x in lyrics.splitlines() if x]
lyrics = ' '.join([str(x) for x in lyrics if x[0] not in ['[', ' ']])
exclude = set(string.punctuation)
lyrics = ''.join(ch.lower() for ch in lyrics if ch not in exclude)
lyrics = lyrics.split()
nounlist = []
for word in lyrics:
if tag(word)[0][1] in ['NN', 'NNP'] and len(word)>2 and tag(word) not in nounlist:
nounlist.append(tag(word))
return nounlist
#http://www.azlyrics.com/n/neildiamond.html
#http://www.azlyrics.com/j/johndenver.html