add imdb scraper
This commit is contained in:
parent
6b07963b51
commit
b192f32d4d
5 changed files with 118 additions and 7 deletions
|
@ -21,6 +21,7 @@ class SourceSiteEnum(models.TextChoices):
|
|||
IN_SITE = "in-site", CLIENT_NAME
|
||||
DOUBAN = "douban", _("豆瓣")
|
||||
SPOTIFY = "spotify", _("Spotify")
|
||||
IMDB = "imdb", _("IMDb")
|
||||
|
||||
|
||||
class Entity(models.Model):
|
||||
|
|
|
@ -9,7 +9,7 @@ import time
|
|||
from lxml import html
|
||||
from mimetypes import guess_extension
|
||||
from threading import Thread
|
||||
from boofilsic.settings import LUMINATI_USERNAME, LUMINATI_PASSWORD, DEBUG
|
||||
from boofilsic.settings import LUMINATI_USERNAME, LUMINATI_PASSWORD, DEBUG, IMDB_API_KEY
|
||||
from boofilsic.settings import SPOTIFY_CREDENTIAL
|
||||
from django.utils import timezone
|
||||
from django.utils.translation import ugettext_lazy as _
|
||||
|
@ -645,7 +645,6 @@ spotify_token_expire_time = time.time()
|
|||
|
||||
class SpotifyTrackScraper(AbstractScraper):
|
||||
site_name = SourceSiteEnum.SPOTIFY.value
|
||||
# API URL
|
||||
host = 'https://open.spotify.com/track/'
|
||||
data_class = Song
|
||||
form_class = SongForm
|
||||
|
@ -918,3 +917,100 @@ def invoke_spotify_token():
|
|||
# minus 2 for execution time error
|
||||
spotify_token_expire_time = int(data['expires_in']) + time.time() - 2
|
||||
spotify_token = data['access_token']
|
||||
|
||||
|
||||
class ImdbMovieScraper(AbstractScraper):
|
||||
site_name = SourceSiteEnum.IMDB.value
|
||||
host = 'https://www.imdb.com/title/'
|
||||
data_class = Movie
|
||||
form_class = MovieForm
|
||||
|
||||
regex = re.compile(r"(?<=https://www\.imdb\.com/title/)[a-zA-Z0-9]+")
|
||||
|
||||
def scrape(self, url):
|
||||
|
||||
effective_url = self.get_effective_url(url)
|
||||
if effective_url is None:
|
||||
raise ValueError("not valid url")
|
||||
|
||||
api_url = self.get_api_url(effective_url)
|
||||
r = requests.get(api_url)
|
||||
res_data = r.json()
|
||||
|
||||
if not res_data['type'] in ['Movie', 'TVSeries']:
|
||||
raise ValueError("not movie/series item")
|
||||
|
||||
if res_data['type'] == 'Movie':
|
||||
is_series = False
|
||||
elif res_data['type'] == 'TVSeries':
|
||||
is_series = True
|
||||
|
||||
title = res_data['title']
|
||||
orig_title = res_data['originalTitle']
|
||||
imdb_code = self.regex.findall(effective_url)[0]
|
||||
director = []
|
||||
for direct_dict in res_data['directorList']:
|
||||
director.append(direct_dict['name'])
|
||||
playwright = []
|
||||
for writer_dict in res_data['writerList']:
|
||||
playwright.append(writer_dict['name'])
|
||||
actor = []
|
||||
for actor_dict in res_data['actorList']:
|
||||
actor.append(actor_dict['name'])
|
||||
genre = res_data['genres'].split(', ')
|
||||
area = res_data['countries'].split(', ')
|
||||
language = res_data['languages'].split(', ')
|
||||
year = int(res_data['year'])
|
||||
duration = res_data['runtimeStr']
|
||||
brief = res_data['plotLocal'] if res_data['plotLocal'] else res_data['plot']
|
||||
if res_data['releaseDate']:
|
||||
showtime = [{res_data['releaseDate']: "发布日期"}]
|
||||
else:
|
||||
showtime = None
|
||||
|
||||
other_info = {}
|
||||
other_info['分级'] = res_data['contentRating'] if res_data['contentRating'] else None
|
||||
other_info['IMDb评分'] = res_data['imDbRating'] if res_data['imDbRating'] else None
|
||||
other_info['Metacritic评分'] = res_data['metacriticRating'] if res_data['metacriticRating'] else None
|
||||
other_info['奖项'] = res_data['awards'] if res_data['awards'] else None
|
||||
|
||||
raw_img, ext = self.download_image(res_data['image'])
|
||||
|
||||
data = {
|
||||
'title': title,
|
||||
'orig_title': orig_title,
|
||||
'other_title': None,
|
||||
'imdb_code': imdb_code,
|
||||
'director': director,
|
||||
'playwright': playwright,
|
||||
'actor': actor,
|
||||
'genre': genre,
|
||||
'showtime': showtime,
|
||||
'site': None,
|
||||
'area': area,
|
||||
'language': language,
|
||||
'year': year,
|
||||
'duration': duration,
|
||||
'season': None,
|
||||
'episodes': None,
|
||||
'single_episode_length': None,
|
||||
'brief': brief,
|
||||
'is_series': is_series,
|
||||
'other_info': other_info,
|
||||
'source_site': self.site_name,
|
||||
'source_url': effective_url,
|
||||
}
|
||||
self.raw_data, self.raw_img, self.img_ext = data, raw_img, ext
|
||||
return data, raw_img
|
||||
|
||||
@classmethod
|
||||
def get_effective_url(cls, raw_url):
|
||||
code = cls.regex.findall(raw_url)
|
||||
if code:
|
||||
return f"https://www.imdb.com/title/{code[0]}/"
|
||||
else:
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def get_api_url(cls, url):
|
||||
return f"https://imdb-api.com/zh/API/Title/{IMDB_API_KEY}/{cls.regex.findall(url)[0]}/FullActor,"
|
||||
|
|
|
@ -1216,7 +1216,7 @@ select::placeholder {
|
|||
font-weight: lighter;
|
||||
letter-spacing: 0.1rem;
|
||||
word-break: keep-all;
|
||||
opacity: 0.8;
|
||||
opacity: 1;
|
||||
position: relative;
|
||||
top: -1px;
|
||||
}
|
||||
|
@ -1239,6 +1239,13 @@ select::placeholder {
|
|||
font-weight: bold;
|
||||
}
|
||||
|
||||
.source-label.source-label__imdb {
|
||||
background-color: #F5C518;
|
||||
color: #121212;
|
||||
border: none;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.main-section-wrapper {
|
||||
padding: 32px 48px 32px 36px;
|
||||
background-color: #f7f7f7;
|
||||
|
|
2
common/static/css/boofilsic.min.css
vendored
2
common/static/css/boofilsic.min.css
vendored
File diff suppressed because one or more lines are too long
|
@ -1,10 +1,12 @@
|
|||
// source label name should match the enum value in `common.models.SourceSiteEnum`
|
||||
|
||||
$in-site-color: $color-primary
|
||||
$douban-color-primary: #319840
|
||||
$douban-color-secondary: white
|
||||
$in-site-color: $color-primary
|
||||
$spotify-color-primary: #1ed760
|
||||
$spotify-color-secondary: black
|
||||
$imdb-color-primary: #F5C518
|
||||
$imdb-color-secondary: #121212
|
||||
|
||||
.source-label
|
||||
display: inline
|
||||
|
@ -20,7 +22,7 @@ $spotify-color-secondary: black
|
|||
font-weight: lighter
|
||||
letter-spacing: 0.1rem
|
||||
word-break: keep-all
|
||||
opacity: 0.8
|
||||
opacity: 1
|
||||
|
||||
|
||||
position: relative
|
||||
|
@ -38,4 +40,9 @@ $spotify-color-secondary: black
|
|||
background-color: $spotify-color-primary
|
||||
color: $spotify-color-secondary
|
||||
border: none
|
||||
font-weight: bold
|
||||
font-weight: bold
|
||||
&.source-label__imdb
|
||||
background-color: $imdb-color-primary
|
||||
color: $imdb-color-secondary
|
||||
border: none
|
||||
font-weight: bold
|
||||
|
|
Loading…
Add table
Reference in a new issue