forked from thuandt/OnlineMusicDownloader
/
ZingRadioParser.py
63 lines (54 loc) · 2.07 KB
/
ZingRadioParser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" ZingRadioParser - Parser data from http://mp3.zing.vn/radio
Parser web page to get xml url
Get xml file and parser to get data
"""
from urllib import urlopen
from HTMLParser import HTMLParser
from xml.etree import ElementTree as ET
__author__ = "Thuan.D.T (MrTux)"
__copyright__ = "Copyright (c) 2011 Thuan.D.T (MrTux) "
__credits__ = ["Thuan.D.T"]
__license__ = "GPL"
__version__ = "0.0.1"
__maintainer__ = "Thuan.D.T (MrTux)"
__email__ = "mrtux@ubuntu-vn.org"
__status__ = "Development"
class ZingRadioParser(HTMLParser):
def __init__(self, url):
"""Returns new Sequence object with specified url
url: link to mp3.zing.vn/radio web page
"""
HTMLParser.__init__(self)
self.song_name = []
self.song_artist = []
self.song_link = []
self.song_type = []
self.xml_url = ""
req = urlopen(url) # open connection to web page
data = req.read().split("\n") # split web page with \n
for param in data:
if (param.find('xmlURL:') > -1):
"""Find line to get xml url
"""
self.xml_url = param.split("'")[1].replace("'", "")
break
xml_data = urlopen(self.xml_url) # get xml data
tree = ET.parse(xml_data)
root = tree.getroot()
for name in tree.findall('./item/title'):
self.song_name.append(name.text.strip()) # get song name
for artist in tree.findall('./item/performer'):
self.song_artist.append(artist.text.strip()) # get song artist
for media_url in tree.findall('./item/source'):
self.song_link.append(media_url.text) # get media url
for child in root:
self.song_type.append(child.attrib['type']) # get media file type
def music_data(self):
"""Returns data of Object
song_name: list of song name
song_artist: list of artist
song_link: list of mp3 media link
"""
return self.song_name, self.song_artist, self.song_link, self.song_type