Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved song matching, other updates #21

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
75 changes: 60 additions & 15 deletions retrieve-identifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,72 @@
import struct
import urllib.parse, urllib.request
import json
import time
from difflib import SequenceMatcher

DEBUG = 0 # Set to 1 for closer inspection

def similar(a, b):
return SequenceMatcher(None, a, b).ratio()


def retrieve_itunes_identifier(title, artist):
headers = {
"X-Apple-Store-Front" : "143446-10,32 ab:rSwnYxS0 t:music2",
"X-Apple-Tz" : "7200"
}
url = "https://itunes.apple.com/WebObjects/MZStore.woa/wa/search?clientApplication=MusicPlayer&term=" + urllib.parse.quote(title)

search_string = str(artist) +" " + str(title)
url = "https://itunes.apple.com/WebObjects/MZStore.woa/wa/search?clientApplication=MusicPlayer&term=" + urllib.parse.quote(search_string)
request = urllib.request.Request(url, None, headers)

try:
response = urllib.request.urlopen(request)
data = json.loads(response.read().decode('utf-8'))
songs = [result for result in data["storePlatformData"]["lockup"]["results"].values() if result["kind"] == "song"]

# Attempt to match by title & artist
for song in songs:
if song["name"].lower() == title.lower() and (song["artistName"].lower() in artist.lower() or artist.lower() in song["artistName"].lower()):
return song["id"]

# Attempt to match by title if we didn't get a title & artist match
for song in songs:
if song["name"].lower() == title.lower():
return song["id"]
# Attempt to match by title & artist
for song in songs:

song_match = similar(song["name"].lower() , title.lower())
artist_match = similar(song["artistName"].lower() , artist.lower())

if DEBUG == 1:

print ("Requested:",artist, " : ",title," => Received:",song["artistName"]," : " ,song["name"])
print ("Confidence : Artist " + str(round(artist_match*100)),"% Song " + str(round(song_match*100)) + "%")
input("Press Enter to continue...")


# For primary matches, lets assume that artist string should always match
if song["artistName"].lower() == artist.lower():
if (song["name"].lower() in title.lower()):
return (song["id"],'Primary')

# Attempt to match by title if we didn't get an exact title & artist match
# For secondary matches, lets assume that artist string should always match
# return if song title similarty > 80%

elif song["artistName"].lower() == artist.lower():
if song_match > 0.7:
return (song["id"],'Secondary')

elif artist_match > 0.8:
if song_match > 0.7:
return (song["id"],'Fuzzy')

else:
print("FAIL: Could not find suitable match for: {} - {}".format(artist, title))
return None

except:
# We don't do any fancy error handling.. Just return None if something went wrong
except KeyError as e:
print("FAIL: Nothing returned for: {} - {}".format(artist, title))
#We don't do any fancy error handling.. Just return None if something went wrong
return None



itunes_identifiers = []


Expand All @@ -44,12 +80,21 @@ def retrieve_itunes_identifier(title, artist):
itunes_identifier = retrieve_itunes_identifier(title, artist)

if itunes_identifier:
itunes_identifiers.append(itunes_identifier)
print("{} - {} => {}".format(title, artist, itunes_identifier))
if itunes_identifier[1] == 'Primary':
itunes_identifiers.append(itunes_identifier)
print("SUCCESS: Exact match: {} - {} => {}".format(title, artist, itunes_identifier[0]))

elif itunes_identifier[1] == 'Secondary':
itunes_identifiers.append(itunes_identifier)
print("SUCCESS: Secondary match: {} - {} => {}".format(title, artist, itunes_identifier[0]))

elif itunes_identifier[1] == 'Fuzzy':
itunes_identifiers.append(itunes_identifier)
print("SUCCESS: Fuzzy match: {} - {} => {}".format(title, artist, itunes_identifier[0]))
else:
print("{} - {} => Not Found".format(title, artist))
pass


with open('itunes.csv', 'w', encoding='utf-8') as output_file:
for itunes_identifier in itunes_identifiers:
output_file.write(str(itunes_identifier) + "\n")
output_file.write(str(itunes_identifier[0]) + "\n")