scrape content from other language codes

This commit is contained in:
Your Name 2024-06-05 00:00:54 -04:00 committed by Henri Dickson
parent 6decb37ae3
commit 9dbfd5fd8e
7 changed files with 34 additions and 13 deletions

View file

@ -51,5 +51,6 @@ jobs:
NEODB_SITE_NAME: test
NEODB_SITE_DOMAIN: test.domain
NEODB_SECRET_KEY: test
NEODB_LANGUAGE: zh-hans
run: |
python manage.py test

View file

@ -122,11 +122,21 @@ class DownloadError(Exception):
class BasicDownloader:
@staticmethod
def get_accept_language():
match settings.LANGUAGE_CODE:
case "zh-hans":
return "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2"
case "zh-hant":
return "zh-TW,zh-HK;q=0.7,zh;q=0.5,en-US;q=0.3,en;q=0.2"
case _:
return "en-US;q=0.3,en;q=0.2"
headers = {
# "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:107.0) Gecko/20100101 Firefox/107.0",
"User-Agent": "Mozilla/5.0 (iPad; CPU OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
"Accept-Language": get_accept_language(),
"Accept-Encoding": "gzip, deflate",
"Connection": "keep-alive",
"DNT": "1",

View file

@ -63,7 +63,7 @@ class Goodreads:
headers={
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:107.0) Gecko/20100101 Firefox/107.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
"Accept-Language": BasicDownloader.get_accept_language(),
"Accept-Encoding": "gzip, deflate",
"Connection": "keep-alive",
"DNT": "1",

View file

@ -35,7 +35,7 @@ class AppleMusic(AbstractSite):
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:107.0) Gecko/20100101 Firefox/107.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
"Accept-Language": BasicDownloader.get_accept_language(),
"Accept-Encoding": "gzip, deflate",
"Connection": "keep-alive",
"DNT": "1",

View file

@ -16,8 +16,18 @@ from .douban import *
_logger = logging.getLogger(__name__)
def get_language_code():
match settings.LANGUAGE_CODE:
case "zh-hans":
return "zh-CN"
case "zh-hant":
return "zh-TW"
case _:
return "en-US"
def search_tmdb_by_imdb_id(imdb_id):
tmdb_api_url = f"https://api.themoviedb.org/3/find/{imdb_id}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&external_source=imdb_id"
tmdb_api_url = f"https://api.themoviedb.org/3/find/{imdb_id}?api_key={settings.TMDB_API3_KEY}&language={get_language_code()}&external_source=imdb_id"
res_data = BasicDownloader(tmdb_api_url).download().json()
return res_data
@ -50,9 +60,9 @@ class TMDB_Movie(AbstractSite):
def scrape(self):
is_series = False
if is_series:
api_url = f"https://api.themoviedb.org/3/tv/{self.id_value}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids,credits"
api_url = f"https://api.themoviedb.org/3/tv/{self.id_value}?api_key={settings.TMDB_API3_KEY}&language={get_language_code()}&append_to_response=external_ids,credits"
else:
api_url = f"https://api.themoviedb.org/3/movie/{self.id_value}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids,credits"
api_url = f"https://api.themoviedb.org/3/movie/{self.id_value}?api_key={settings.TMDB_API3_KEY}&language={get_language_code()}&append_to_response=external_ids,credits"
res_data = BasicDownloader(api_url).download().json()
@ -184,9 +194,9 @@ class TMDB_TV(AbstractSite):
def scrape(self):
is_series = True
if is_series:
api_url = f"https://api.themoviedb.org/3/tv/{self.id_value}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids,credits"
api_url = f"https://api.themoviedb.org/3/tv/{self.id_value}?api_key={settings.TMDB_API3_KEY}&language={get_language_code()}&append_to_response=external_ids,credits"
else:
api_url = f"https://api.themoviedb.org/3/movie/{self.id_value}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids,credits"
api_url = f"https://api.themoviedb.org/3/movie/{self.id_value}?api_key={settings.TMDB_API3_KEY}&language={get_language_code()}&append_to_response=external_ids,credits"
res_data = BasicDownloader(api_url).download().json()
@ -347,7 +357,7 @@ class TMDB_TVSeason(AbstractSite):
show_resource = site.get_resource_ready(auto_create=False, auto_link=False)
if not show_resource:
raise ValueError(f"TMDB: failed to get show for season {self.url}")
api_url = f"https://api.themoviedb.org/3/tv/{show_id}/season/{season_id}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids,credits"
api_url = f"https://api.themoviedb.org/3/tv/{show_id}/season/{season_id}?api_key={settings.TMDB_API3_KEY}&language={get_language_code()}&append_to_response=external_ids,credits"
d = BasicDownloader(api_url).download().json()
if not d.get("id"):
raise ParseError(self, "id")
@ -419,7 +429,7 @@ class TMDB_TVSeason(AbstractSite):
)
else:
ep = pd.metadata["episode_number_list"][0]
api_url2 = f"https://api.themoviedb.org/3/tv/{v[0]}/season/{v[1]}/episode/{ep}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids,credits"
api_url2 = f"https://api.themoviedb.org/3/tv/{v[0]}/season/{v[1]}/episode/{ep}?api_key={settings.TMDB_API3_KEY}&language={get_language_code()}&append_to_response=external_ids,credits"
d2 = BasicDownloader(api_url2).download().json()
if not d2.get("id"):
raise ParseError(self, "first episode id for season")
@ -459,7 +469,7 @@ class TMDB_TVEpisode(AbstractSite):
episode_id = v[2]
site = TMDB_TV(TMDB_TV.id_to_url(show_id))
show_resource = site.get_resource_ready(auto_create=False, auto_link=False)
api_url = f"https://api.themoviedb.org/3/tv/{show_id}/season/{season_id}/episode/{episode_id}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids,credits"
api_url = f"https://api.themoviedb.org/3/tv/{show_id}/season/{season_id}/episode/{episode_id}?api_key={settings.TMDB_API3_KEY}&language={get_language_code()}&append_to_response=external_ids,credits"
d = BasicDownloader(api_url).download().json()
if not d.get("id"):
raise ParseError(self, "id")

View file

@ -104,7 +104,7 @@ neodb-shell neodb-init
Run unit test:
```
neodb-manage test
NEODB_LANGUAGE=zh-hans neodb-manage test
```
Update translations:

View file

@ -183,7 +183,7 @@ class MarkTest(TestCase):
mark = Mark(self.user1.identity, self.book1)
self.assertEqual(mark.shelf_type, ShelfType.WISHLIST)
self.assertEqual(mark.shelf_label, "books to read")
self.assertEqual(mark.shelf_label, "想读的书")
self.assertEqual(mark.comment_text, "a gentle comment")
self.assertEqual(mark.rating_grade, 9)
self.assertEqual(mark.visibility, 1)