From e45980a85abcdf25af79bf25f5b03ed9d9011ca7 Mon Sep 17 00:00:00 2001 From: mein Name Date: Sun, 9 Mar 2025 11:23:58 -0400 Subject: [PATCH] fix apple music --- boofilsic/settings.py | 3 +- catalog/music/tests.py | 24 +- catalog/sites/apple_music.py | 100 +- ...ttps___music_apple_com_cn_album_1284391545 | 209 ++ ...ttps___music_apple_com_fr_album_1284391545 | 201 ++ ...ttps___music_apple_com_jp_album_1284391545 | 197 ++ ...ttps___music_apple_com_kr_album_1284391545 | 197 ++ ...ttps___music_apple_com_us_album_1284391545 | 1920 +---------------- 8 files changed, 977 insertions(+), 1874 deletions(-) create mode 100644 test_data/https___music_apple_com_cn_album_1284391545 create mode 100644 test_data/https___music_apple_com_fr_album_1284391545 create mode 100644 test_data/https___music_apple_com_jp_album_1284391545 create mode 100644 test_data/https___music_apple_com_kr_album_1284391545 diff --git a/boofilsic/settings.py b/boofilsic/settings.py index 04b1df5f..3f74a9cb 100644 --- a/boofilsic/settings.py +++ b/boofilsic/settings.py @@ -449,6 +449,7 @@ LANGUAGE_CODE, PREFERRED_LANGUAGES = _init_language_settings( if TESTING: # force en if testing LANGUAGE_CODE = "en" + PREFERRED_LANGUAGES = ["en"] LOCALE_PATHS = [os.path.join(BASE_DIR, "locale")] @@ -580,7 +581,7 @@ SEARCH_INDEX_NEW_ONLY = False INDEX_ALIASES = env("INDEX_ALIASES") -DOWNLOADER_SAVEDIR = env("NEODB_DOWNLOADER_SAVE_DIR", default="/tmp") # type: ignore +DOWNLOADER_SAVEDIR = env("NEODB_DOWNLOADER_SAVE_DIR", default="") # type: ignore DISABLE_MODEL_SIGNAL = False # disable index and social feeds during importing/etc diff --git a/catalog/music/tests.py b/catalog/music/tests.py index 0bfd8a98..d489fe7a 100644 --- a/catalog/music/tests.py +++ b/catalog/music/tests.py @@ -6,6 +6,8 @@ from catalog.music.utils import * class BasicMusicTest(TestCase): + databases = "__all__" + def test_gtin(self): self.assertIsNone(upc_to_gtin_13("018771208112X")) self.assertIsNone(upc_to_gtin_13("999018771208112")) @@ -15,6 +17,8 @@ class BasicMusicTest(TestCase): class SpotifyTestCase(TestCase): + databases = "__all__" + def test_parse(self): t_id_type = IdType.Spotify_Album t_id_value = "65KwtzkJXw7oT819NFWmEP" @@ -48,6 +52,8 @@ class SpotifyTestCase(TestCase): class DoubanMusicTestCase(TestCase): + databases = "__all__" + def test_parse(self): t_id_type = IdType.DoubanMusic t_id_value = "33551231" @@ -74,6 +80,8 @@ class DoubanMusicTestCase(TestCase): class MultiMusicSitesTestCase(TestCase): + databases = "__all__" + @use_local_response def test_albums(self): url1 = "https://music.douban.com/subject/33551231/" @@ -92,6 +100,8 @@ class MultiMusicSitesTestCase(TestCase): class BandcampTestCase(TestCase): + databases = "__all__" + def test_parse(self): t_id_type = IdType.Bandcamp t_id_value = "intlanthem.bandcamp.com/album/in-these-times" @@ -119,6 +129,8 @@ class BandcampTestCase(TestCase): class DiscogsReleaseTestCase(TestCase): + databases = "__all__" + def test_parse(self): t_id_type = IdType.Discogs_Release t_id_value = "25829341" @@ -155,6 +167,8 @@ class DiscogsReleaseTestCase(TestCase): class DiscogsMasterTestCase(TestCase): + databases = "__all__" + def test_parse(self): t_id_type = IdType.Discogs_Master t_id_value = "469004" @@ -182,6 +196,8 @@ class DiscogsMasterTestCase(TestCase): class AppleMusicTestCase(TestCase): + databases = "__all__" + def test_parse(self): t_id_type = IdType.AppleMusic t_id_value = "1284391545" @@ -201,8 +217,10 @@ class AppleMusicTestCase(TestCase): self.assertEqual(site.ready, False) site.get_resource_ready() self.assertEqual(site.ready, True) - self.assertEqual(site.resource.metadata["title"], "Kids Only") + self.assertEqual( + site.resource.metadata["localized_title"][0]["text"], "Kids Only" + ) self.assertEqual(site.resource.metadata["artist"], ["Leah Dou"]) self.assertIsInstance(site.resource.item, Album) - self.assertEqual(site.resource.item.genre, ["Pop"]) - self.assertEqual(site.resource.item.duration, 2371628) + self.assertEqual(site.resource.item.genre, ["Pop", "Music"]) + self.assertEqual(site.resource.item.duration, 2368000) diff --git a/catalog/sites/apple_music.py b/catalog/sites/apple_music.py index 19b89d98..194944d8 100644 --- a/catalog/sites/apple_music.py +++ b/catalog/sites/apple_music.py @@ -9,8 +9,9 @@ Scraping the website directly. """ import json +from datetime import timedelta -import dateparser +from django.utils.dateparse import parse_duration from loguru import logger from catalog.common import * @@ -18,7 +19,6 @@ from catalog.models import * from common.models.lang import ( SITE_DEFAULT_LANGUAGE, SITE_PREFERRED_LANGUAGES, - detect_language, ) from common.models.misc import uniq @@ -39,7 +39,6 @@ class AppleMusic(AbstractSite): headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:107.0) Gecko/20100101 Firefox/107.0", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", - "Accept-Language": BasicDownloader.get_accept_language(), "Accept-Encoding": "gzip, deflate", "Connection": "keep-alive", "DNT": "1", @@ -70,80 +69,63 @@ class AppleMusic(AbstractSite): return locales def scrape(self): - matched_content = None + matched_schema_data = None localized_title = [] localized_desc = [] for lang, locales in self.get_locales().items(): for loc in locales: # waterfall thru all locales url = f"https://music.apple.com/{loc}/album/{self.id_value}" try: + tl = f"{lang}-{loc}" if lang == "zh" else lang + headers = { + "Accept-Language": tl, + } + headers.update(self.headers) content = ( BasicDownloader(url, headers=self.headers).download().html() ) - logger.info(f"got localized content from {url}") - elem = content.xpath( - "//script[@id='serialized-server-data']/text()" - ) - txt: str = elem[0] # type:ignore - page_data = json.loads(txt)[0] - album_data = page_data["data"]["sections"][0]["items"][0] - title = album_data["title"] - brief = album_data.get("modalPresentationDescriptor", {}).get( - "paragraphText", "" - ) - tl = detect_language(title + " " + brief) - localized_title.append({"lang": tl, "text": title}) - if brief: - localized_desc.append({"lang": tl, "text": brief}) - if lang == SITE_DEFAULT_LANGUAGE or not matched_content: - matched_content = content + logger.debug(f"got localized content from {url}") + txt: str = content.xpath( + "//script[@id='schema:music-album']/text()" + )[0] # type:ignore + schema_data = json.loads(txt) + title = schema_data["name"] + if title: + localized_title.append({"lang": tl, "text": title}) + try: + txt: str = content.xpath( + "//script[@id='serialized-server-data']/text()" + )[0] # type:ignore + server_data = json.loads(txt) + brief = server_data[0]["data"]["sections"][0]["items"][0][ + "modalPresentationDescriptor" + ]["paragraphText"] + if brief: + localized_desc.append({"lang": tl, "text": brief}) + except Exception: + server_data = brief = None + if lang == SITE_DEFAULT_LANGUAGE or not matched_schema_data: + matched_schema_data = schema_data break except Exception: pass - if matched_content is None: + if matched_schema_data is None: # no schema data found raise ParseError(self, f"localized content for {self.url}") - elem = matched_content.xpath("//script[@id='serialized-server-data']/text()") - txt: str = elem[0] # type:ignore - page_data = json.loads(txt)[0] - album_data = page_data["data"]["sections"][0]["items"][0] - title = album_data["title"] - brief = album_data.get("modalPresentationDescriptor") - brief = brief.get("paragraphText") if brief else None - artist_list = album_data["subtitleLinks"] - artist = [item["title"] for item in artist_list] - - track_data = page_data["data"]["seoData"] - date_elem = track_data.get("musicReleaseDate") - release_datetime = dateparser.parse(date_elem.strip()) if date_elem else None - release_date = ( - release_datetime.strftime("%Y-%m-%d") if release_datetime else None + artist = [a["name"] for a in matched_schema_data.get("byArtist", [])] + release_date = matched_schema_data.get("datePublished", None) + genre = matched_schema_data.get("genre", []) + image_url = matched_schema_data.get("image", None) + track_list = [t["name"] for t in matched_schema_data.get("tracks", [])] + duration = round( + sum( + (parse_duration(t["duration"]) or timedelta()).total_seconds() * 1000 + for t in matched_schema_data.get("tracks", []) + ) ) - - track_list = [ - f"{i}. {track['attributes']['name']}" - for i, track in enumerate(track_data["ogSongs"], 1) - ] - duration_list = [ - track["attributes"].get("durationInMillis", 0) - for track in track_data["ogSongs"] - ] - duration = int(sum(duration_list)) - genre = track_data["schemaContent"].get("genre") - if genre: - genre = [ - genre[0] - ] # apple treat "Music" as a genre. Thus, only the first genre is obtained. - - images = matched_content.xpath("//source[@type='image/jpeg']/@srcset") - image_elem: str = images[0] if images else "" # type:ignore - image_url = image_elem.split(" ")[0] if image_elem else None - pd = ResourceContent( metadata={ "localized_title": uniq(localized_title), "localized_description": uniq(localized_desc), - "title": title, - "brief": brief, "artist": artist, "genre": genre, "release_date": release_date, diff --git a/test_data/https___music_apple_com_cn_album_1284391545 b/test_data/https___music_apple_com_cn_album_1284391545 new file mode 100644 index 00000000..daf7d8f4 --- /dev/null +++ b/test_data/https___music_apple_com_cn_album_1284391545 @@ -0,0 +1,209 @@ + + + + + + + + + + + + + + + + + + + + + + + + + ‎《Kids Only》- 窦靖童的专辑 - Apple Music + + + + + + + + + + + + + + + + + + + +
+

Kids Only

Kids Only

首张原创专辑《Stone Café》惊艳亮相后,窦靖童便开始了在音乐上的深入探索,与世界顶级音乐人合作,到世界各地演出,不断历练自己。时隔一年推出了全新的概念专辑《Kids Only》。她包办了这张专辑的词曲创作,大部分歌曲都采用了同期录制,每首歌都邀请不同的音乐人合作,多样的音乐风格加上大家现场的自由发挥,每首歌都散发着鲜活可爱而又自由肆意的气息。整张专辑最明显的一点就是对人声的处理,将人声与器乐声融合,低吟恰到好处。似她母亲王菲的唱腔,似她父亲窦唯的创作风格,但她又是如此与众不同,顽皮的孩子气、少女心事的烦忧、童年往昔的回顾、对自由的向往追求,小小年龄有大大的思虑,皆呈现于此。

选择国家或地区

非洲、中东和印度

亚太地区

欧洲

拉丁美洲和加勒比海地区

美国和加拿大

+
+ + + + + diff --git a/test_data/https___music_apple_com_fr_album_1284391545 b/test_data/https___music_apple_com_fr_album_1284391545 new file mode 100644 index 00000000..d4b43a89 --- /dev/null +++ b/test_data/https___music_apple_com_fr_album_1284391545 @@ -0,0 +1,201 @@ + + + + + + + + + + + + + + + + + + + + + + + + + ‎Kids Only – Album par 竇靖童 – Apple Music + + + + + + + + + + + + + + + + + + + +
+

Kids Only

Kids Only
Choisissez un pays ou une région

Afrique, Moyen‑Orient et Inde

Asie‑Pacifique

Europe

Amérique latine et Caraïbes

États‑Unis et Canada

+
+ + + + + diff --git a/test_data/https___music_apple_com_jp_album_1284391545 b/test_data/https___music_apple_com_jp_album_1284391545 new file mode 100644 index 00000000..d6fa4749 --- /dev/null +++ b/test_data/https___music_apple_com_jp_album_1284391545 @@ -0,0 +1,197 @@ + + + + + + + + + + + + + + + + + + + + + + + + + ‎Kids Only - リア・ドウのアルバム - Apple Music + + + + + + + + + + + + + + + + + + + +
+

Kids Only

Kids Only
国または地域を選択

アフリカ、中東、インド

アジア太平洋

ヨーロッパ

ラテンアメリカ、カリブ海地域

米国およびカナダ

+
+ + + + + diff --git a/test_data/https___music_apple_com_kr_album_1284391545 b/test_data/https___music_apple_com_kr_album_1284391545 new file mode 100644 index 00000000..c8241613 --- /dev/null +++ b/test_data/https___music_apple_com_kr_album_1284391545 @@ -0,0 +1,197 @@ + + + + + + + + + + + + + + + + + + + + + + + + + ‎Kids Only - 竇靖童의 앨범 - Apple Music + + + + + + + + + + + + + + + + + + + +
+
국가 또는 지역 선택

아프리카, 중동 및 인도

아시아 태평양

유럽

라틴 아메리카 및 카리브해

미국 및 캐나다

+
+ + + + + diff --git a/test_data/https___music_apple_com_us_album_1284391545 b/test_data/https___music_apple_com_us_album_1284391545 index b07debbd..c8fde71e 100644 --- a/test_data/https___music_apple_com_us_album_1284391545 +++ b/test_data/https___music_apple_com_us_album_1284391545 @@ -1,244 +1,55 @@ - - - - - - + + + + + + + + + + + + > + + > + + > + - + > + + - ‎Kids Only by Leah Dou on Apple Music - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + ‎Kids Only - Album by Leah Dou - Apple Music - - - - + + + @@ -272,1636 +83,123 @@ /> - - - -
- - -
- -
- - -
- -
- - -
-
- - - - - - - - -
- -
- - -
- -

Kids Only

- -
- Leah Dou -
- -
-
- -
- -
-
-
- -
-
- -
-
+

Kids Only

- - - - - - -
-
- - - - - - Kids Only -
- -
- - -

Psychedelic electro-soul colors Dou’s second album.

-
- -
- -
- -
- - -
-
- -
- - - -
- -
- - - - - - - - - -
- - - - -
- - - - -
-
- - - -
- -
1
- -
-
-
- -
- - -
- -
- - - - - - - -
- -
- - - -
-
-
- - -
- - - - -
-
- - - -
- -
2
- -
-
-
- -
- - -
- -
- - - - - - - -
- -
- - - -
-
-
- - -
- - - - -
-
- - - -
- -
3
- -
-
-
- -
- - -
- -
- - - - - - - -
- -
- - - -
-
-
- - -
- - - - -
-
- - - -
- -
4
- -
-
-
- -
- - -
- -
- - - - - - - -
- -
- - - -
-
-
- - -
- - - - -
-
- - - -
- -
5
- -
-
-
- -
- - -
- -
- - - - - - - -
- -
- - - -
-
-
- - -
- - - - -
-
- - - -
- -
6
- -
-
-
- -
- - -
- -
- - - - - - - -
- -
- - - -
-
-
- - -
- - - - -
-
- - - -
- -
7
- -
-
-
- -
- - -
- -
- - - - - - - -
- -
- - - -
-
-
- - -
- - - - -
-
- - - -
- -
8
- -
-
-
- -
- - -
- -
- - - - - - - -
- -
- - - -
-
-
- - -
- - - - -
-
- - - -
- -
9
- -
-
-
- -
- - -
- -
- - - - - - - -
- -
- - - -
-
-
- - -
- - - - -
-
- - - -
- -
10
- -
-
-
- -
- - -
- -
- - - - - - - -
- -
- - - -
-
-
- - -
- - -
- - - -
- -
- -

Psychedelic electro-soul colors Dou’s second album.

- - - -
- -
- -
- - - -
- -
- - -
- -

More By Leah Dou

- -
- - - -
- - - -

More By Leah Dou

-
- - -
-
-
- - - -
- -
- - -
- -

Featured On

- -
- - - -
- - - -

Featured On

-
- - -
-
-
- - - -
- - - - - -
-
+ ">
- - - - -
- - - - - - - - - - - -
-

Select a country or region

-

Africa, Middle East, and India

-
-

Asia Pacific

-
-

Europe

-
-

Latin America and the Caribbean

-
-

The United States and Canada

-
-
-
-
- -
+ ">
Select a country or region

Africa, Middle East, and India

Asia Pacific

Europe

Latin America and the Caribbean

The United States and Canada

+ - - + +