summaryrefslogtreecommitdiffstats
path: root/addons/metadata.generic.albums/lib/allmusic.py
blob: 7d55324f1b25813a16bf6d6450a564310abadda2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# -*- coding: utf-8 -*-

import datetime
import difflib
import time
import re

def allmusic_albumfind(data, artist, album):
    data = data.decode('utf-8')
    albums = []
    albumlist = re.findall('class="album">\s*(.*?)\s*</li', data, re.S)
    for item in albumlist:
        albumdata = {}
        albumartist = re.search('class="artist">.*?>(.*?)</a', item, re.S)
        if albumartist:
            albumdata['artist'] = albumartist.group(1)
        else: # classical album
            continue
        albumname = re.search('class="title">.*?>(.*?)</a', item, re.S)
        if albumname:
            albumdata['album'] = albumname.group(1)
        else: # not likely to happen, but just in case
            continue
        # filter inaccurate results
        artistmatch = difflib.SequenceMatcher(None, artist.lower(), albumdata['artist'].lower()).ratio()
        albummatch = difflib.SequenceMatcher(None, album.lower(), albumdata['album'].lower()).ratio()
        if artistmatch > 0.90 and albummatch > 0.90:
            albumurl = re.search('class="title">\s*<a href="(.*?)"', item)
            if albumurl:
                albumdata['url'] = albumurl.group(1)
            else: # not likely to happen, but just in case
                continue
            albums.append(albumdata)
            # we are only interested in the top result
            break
    return albums

def allmusic_albumdetails(data):
    data = data.decode('utf-8')
    albumdata = {}
    releasedata = re.search('class="release-date">.*?<span>(.*?)<', data, re.S)
    if releasedata:
        dateformat = releasedata.group(1)
        if len(dateformat) > 4:
            try:
                # month day, year
                albumdata['releasedate'] = datetime.datetime(*(time.strptime(dateformat, '%B %d, %Y')[0:3])).strftime('%Y-%m-%d')
            except:
                # month, year
                albumdata['releasedate'] = datetime.datetime(*(time.strptime(dateformat, '%B, %Y')[0:3])).strftime('%Y-%m')
        else:
            # year
            albumdata['releasedate'] = dateformat
    yeardata = re.search('class="year".*?>\s*(.*?)\s*<', data)
    if yeardata:
        albumdata['year'] = yeardata.group(1)
    genredata = re.search('class="genre">.*?">(.*?)<', data, re.S)
    if genredata:
        albumdata['genre'] = genredata.group(1)
    styledata = re.search('class="styles">.*?div>\s*(.*?)\s*</div', data, re.S)
    if styledata:
        stylelist = re.findall('">(.*?)<', styledata.group(1))
        if stylelist:
            albumdata['styles'] =  ' / '.join(stylelist)
    mooddata = re.search('class="moods">.*?div>\s*(.*?)\s*</div', data, re.S)
    if mooddata:
        moodlist = re.findall('">(.*?)<', mooddata.group(1))
        if moodlist:
            albumdata['moods'] =  ' / '.join(moodlist)
    themedata = re.search('class="themes">.*?div>\s*(.*?)\s*</div', data, re.S)
    if themedata:
        themelist = re.findall('">(.*?)<', themedata.group(1))
        if themelist:
            albumdata['themes'] =  ' / '.join(themelist)
    ratingdata = re.search('itemprop="ratingValue">\s*(.*?)\s*</div', data)
    if ratingdata:
        albumdata['rating'] = ratingdata.group(1)
    albumdata['votes'] = ''
    titledata = re.search('class="album-title".*?>\s*(.*?)\s*<', data, re.S)
    if titledata:
        albumdata['album'] = titledata.group(1)
    labeldata = re.search('class="label-catalog".*?<.*?>(.*?)<', data, re.S)
    if labeldata:
        albumdata['label'] = labeldata.group(1)
    artistdata = re.search('class="album-artist".*?<span.*?>\s*(.*?)\s*</span', data, re.S)
    if artistdata:
        artistlist = re.findall('">(.*?)<', artistdata.group(1))
        artists = []
        for item in artistlist:
            artistinfo = {}
            artistinfo['artist'] = item
            artists.append(artistinfo)
        if artists:
            albumdata['artist'] = artists
            albumdata['artist_description'] = ' / '.join(artistlist)
    thumbsdata = re.search('class="album-contain".*?src="(.*?)"', data, re.S)
    if thumbsdata:
        thumbs = []
        thumbdata = {}
        thumb = thumbsdata.group(1).rstrip('?partner=allrovi.com')
        # ignore internal blank thumb
        if thumb.startswith('http'):
            # 0=largest / 1=75 / 2=150 / 3=250 / 4=400 / 5=500 / 6=1080
            if thumb.endswith('f=5'):
                thumbdata['image'] = thumb.replace('f=5', 'f=0')
                thumbdata['preview'] = thumb.replace('f=5', 'f=2')
            else:
                thumbdata['image'] = thumb
                thumbdata['preview'] = thumb
            thumbdata['aspect'] = 'thumb'
            thumbs.append(thumbdata)
            albumdata['thumb'] = thumbs
    return albumdata