summaryrefslogtreecommitdiffstats
path: root/addons/metadata.themoviedb.org.python/python/lib/tmdbscraper/tmdb.py
blob: 2852f9eaa3df1c73f45b7537a057281072298bf1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
from datetime import datetime, timedelta
from . import tmdbapi


class TMDBMovieScraper(object):
    def __init__(self, url_settings, language, certification_country):
        self.url_settings = url_settings
        self.language = language
        self.certification_country = certification_country
        self._urls = None

    @property
    def urls(self):
        if not self._urls:
            self._urls = _load_base_urls(self.url_settings)
        return self._urls

    def search(self, title, year=None):
        search_media_id = _parse_media_id(title)
        if search_media_id:
            if search_media_id['type'] == 'tmdb':
                result = _get_movie(search_media_id['id'], self.language, True)
                result = [result]
            else:
                response = tmdbapi.find_movie_by_external_id(search_media_id['id'], language=self.language)
                theerror = response.get('error')
                if theerror:
                    return 'error: {}'.format(theerror)
                result = response.get('movie_results')
            if 'error' in result:
                return result
        else:
            response = tmdbapi.search_movie(query=title, year=year, language=self.language)
            theerror = response.get('error')
            if theerror:
                return 'error: {}'.format(theerror)
            result = response['results']
        urls = self.urls

        def is_best(item):
            return item['title'].lower() == title and (
                not year or item.get('release_date', '').startswith(year))
        if result and not is_best(result[0]):
            best_first = next((item for item in result if is_best(item)), None)
            if best_first:
                result = [best_first] + [item for item in result if item is not best_first]

        for item in result:
            if item.get('poster_path'):
                item['poster_path'] = urls['preview'] + item['poster_path']
            if item.get('backdrop_path'):
                item['backdrop_path'] = urls['preview'] + item['backdrop_path']
        return result

    def get_details(self, uniqueids):
        media_id = uniqueids.get('tmdb') or uniqueids.get('imdb')
        details = self._gather_details(media_id)
        if not details:
            return None
        if details.get('error'):
            return details
        return self._assemble_details(**details)

    def _gather_details(self, media_id):
        movie = _get_movie(media_id, self.language)
        if not movie or movie.get('error'):
            return movie

        # don't specify language to get English text for fallback
        movie_fallback = _get_movie(media_id)

        collection = _get_moviecollection(movie['belongs_to_collection'].get('id'), self.language) if \
            movie['belongs_to_collection'] else None
        collection_fallback = _get_moviecollection(movie['belongs_to_collection'].get('id')) if \
            movie['belongs_to_collection'] else None

        return {'movie': movie, 'movie_fallback': movie_fallback, 'collection': collection,
            'collection_fallback': collection_fallback}

    def _assemble_details(self, movie, movie_fallback, collection, collection_fallback):
        info = {
            'title': movie['title'],
            'originaltitle': movie['original_title'],
            'plot': movie.get('overview') or movie_fallback.get('overview'),
            'tagline': movie.get('tagline') or movie_fallback.get('tagline'),
            'studio': _get_names(movie['production_companies']),
            'genre': _get_names(movie['genres']),
            'country': _get_names(movie['production_countries']),
            'credits': _get_cast_members(movie['casts'], 'crew', 'Writing', ['Screenplay', 'Writer', 'Author']),
            'director': _get_cast_members(movie['casts'], 'crew', 'Directing', ['Director']),
            'premiered': movie['release_date'],
            'tag': _get_names(movie['keywords']['keywords'])
        }

        if 'countries' in movie['releases']:
            certcountry = self.certification_country.upper()
            for country in movie['releases']['countries']:
                if country['iso_3166_1'] == certcountry and country['certification']:
                    info['mpaa'] = country['certification']
                    break

        trailer = _parse_trailer(movie.get('trailers', {}), movie_fallback.get('trailers', {}))
        if trailer:
            info['trailer'] = trailer
        if collection:
            info['set'] = collection.get('name') or collection_fallback.get('name')
            info['setoverview'] = collection.get('overview') or collection_fallback.get('overview')
        if movie.get('runtime'):
            info['duration'] = movie['runtime'] * 60

        ratings = {'themoviedb': {'rating': float(movie['vote_average']), 'votes': int(movie['vote_count'])}}
        uniqueids = {'tmdb': movie['id'], 'imdb': movie['imdb_id']}
        cast = [{
                'name': actor['name'],
                'role': actor['character'],
                'thumbnail': self.urls['original'] + actor['profile_path']
                    if actor['profile_path'] else "",
                'order': actor['order']
            }
            for actor in movie['casts'].get('cast', [])
        ]
        available_art = _parse_artwork(movie, collection, self.urls, self.language)

        _info = {'set_tmdbid': movie['belongs_to_collection'].get('id')
            if movie['belongs_to_collection'] else None}

        return {'info': info, 'ratings': ratings, 'uniqueids': uniqueids, 'cast': cast,
            'available_art': available_art, '_info': _info}

def _parse_media_id(title):
    if title.startswith('tt') and title[2:].isdigit():
        return {'type': 'imdb', 'id':title} # IMDB ID works alone because it is clear
    title = title.lower()
    if title.startswith('tmdb/') and title[5:].isdigit(): # TMDB ID
        return {'type': 'tmdb', 'id':title[5:]}
    elif title.startswith('imdb/tt') and title[7:].isdigit(): # IMDB ID with prefix to match
        return {'type': 'imdb', 'id':title[5:]}
    return None

def _get_movie(mid, language=None, search=False):
    details = None if search else \
        'trailers,images,releases,casts,keywords' if language is not None else \
        'trailers'
    response = tmdbapi.get_movie(mid, language=language, append_to_response=details)
    theerror = response.get('error')
    if theerror:
        return 'error: {}'.format(theerror)
    else:
        return response

def _get_moviecollection(collection_id, language=None):
    if not collection_id:
        return None
    details = 'images'
    response = tmdbapi.get_collection(collection_id, language=language, append_to_response=details)
    theerror = response.get('error')
    if theerror:
        return 'error: {}'.format(theerror)
    else:
        return response

def _parse_artwork(movie, collection, urlbases, language):
    if language:
        # Image languages don't have regional variants
        language = language.split('-')[0]
    posters = []
    landscape = []
    fanart = []
    if 'images' in movie:
        posters = _get_images_with_fallback(movie['images']['posters'], urlbases, language)
        landscape = _get_images(movie['images']['backdrops'], urlbases, language)
        fanart = _get_images(movie['images']['backdrops'], urlbases, None)

    setposters = []
    setlandscape = []
    setfanart = []
    if collection and 'images' in collection:
        setposters = _get_images_with_fallback(collection['images']['posters'], urlbases, language)
        setlandscape = _get_images(collection['images']['backdrops'], urlbases, language)
        setfanart = _get_images(collection['images']['backdrops'], urlbases, None)

    return {'poster': posters, 'landscape': landscape, 'fanart': fanart,
        'set.poster': setposters, 'set.landscape': setlandscape, 'set.fanart': setfanart}

def _get_images_with_fallback(imagelist, urlbases, language, language_fallback='en'):
    images = _get_images(imagelist, urlbases, language)

    # Add backup images
    if language != language_fallback:
        images.extend(_get_images(imagelist, urlbases, language_fallback))

    # Add any images if nothing set so far
    if not images:
        images = _get_images(imagelist, urlbases)

    return images

def _get_images(imagelist, urlbases, language='_any'):
    result = []
    for img in imagelist:
        if language != '_any' and img['iso_639_1'] != language:
            continue
        result.append({
            'url': urlbases['original'] + img['file_path'],
            'preview': urlbases['preview'] + img['file_path'],
        })
    return result

def _get_date_numeric(datetime_):
    return (datetime_ - datetime(1970, 1, 1)).total_seconds()

def _load_base_urls(url_settings):
    urls = {}
    urls['original'] = url_settings.getSettingString('originalUrl')
    urls['preview'] = url_settings.getSettingString('previewUrl')
    last_updated = url_settings.getSettingString('lastUpdated')
    if not urls['original'] or not urls['preview'] or not last_updated or \
            float(last_updated) < _get_date_numeric(datetime.now() - timedelta(days=30)):
        conf = tmdbapi.get_configuration()
        if conf:
            urls['original'] = conf['images']['secure_base_url'] + 'original'
            urls['preview'] = conf['images']['secure_base_url'] + 'w780'
            url_settings.setSetting('originalUrl', urls['original'])
            url_settings.setSetting('previewUrl', urls['preview'])
            url_settings.setSetting('lastUpdated', str(_get_date_numeric(datetime.now())))
    return urls

def _parse_trailer(trailers, fallback):
    if trailers.get('youtube'):
        return 'plugin://plugin.video.youtube/?action=play_video&videoid='+trailers['youtube'][0]['source']
    if fallback.get('youtube'):
        return 'plugin://plugin.video.youtube/?action=play_video&videoid='+fallback['youtube'][0]['source']
    return None

def _get_names(items):
    return [item['name'] for item in items] if items else []

def _get_cast_members(casts, casttype, department, jobs):
    result = []
    if casttype in casts:
        for cast in casts[casttype]:
            if cast['department'] == department and cast['job'] in jobs and cast['name'] not in result:
                result.append(cast['name'])
    return result