diff --git a/boofilsic/settings.py b/boofilsic/settings.py
index 04b1df5f..3f74a9cb 100644
--- a/boofilsic/settings.py
+++ b/boofilsic/settings.py
@@ -449,6 +449,7 @@ LANGUAGE_CODE, PREFERRED_LANGUAGES = _init_language_settings(
if TESTING: # force en if testing
LANGUAGE_CODE = "en"
+ PREFERRED_LANGUAGES = ["en"]
LOCALE_PATHS = [os.path.join(BASE_DIR, "locale")]
@@ -580,7 +581,7 @@ SEARCH_INDEX_NEW_ONLY = False
INDEX_ALIASES = env("INDEX_ALIASES")
-DOWNLOADER_SAVEDIR = env("NEODB_DOWNLOADER_SAVE_DIR", default="/tmp") # type: ignore
+DOWNLOADER_SAVEDIR = env("NEODB_DOWNLOADER_SAVE_DIR", default="") # type: ignore
DISABLE_MODEL_SIGNAL = False # disable index and social feeds during importing/etc
diff --git a/catalog/music/tests.py b/catalog/music/tests.py
index 0bfd8a98..d489fe7a 100644
--- a/catalog/music/tests.py
+++ b/catalog/music/tests.py
@@ -6,6 +6,8 @@ from catalog.music.utils import *
class BasicMusicTest(TestCase):
+ databases = "__all__"
+
def test_gtin(self):
self.assertIsNone(upc_to_gtin_13("018771208112X"))
self.assertIsNone(upc_to_gtin_13("999018771208112"))
@@ -15,6 +17,8 @@ class BasicMusicTest(TestCase):
class SpotifyTestCase(TestCase):
+ databases = "__all__"
+
def test_parse(self):
t_id_type = IdType.Spotify_Album
t_id_value = "65KwtzkJXw7oT819NFWmEP"
@@ -48,6 +52,8 @@ class SpotifyTestCase(TestCase):
class DoubanMusicTestCase(TestCase):
+ databases = "__all__"
+
def test_parse(self):
t_id_type = IdType.DoubanMusic
t_id_value = "33551231"
@@ -74,6 +80,8 @@ class DoubanMusicTestCase(TestCase):
class MultiMusicSitesTestCase(TestCase):
+ databases = "__all__"
+
@use_local_response
def test_albums(self):
url1 = "https://music.douban.com/subject/33551231/"
@@ -92,6 +100,8 @@ class MultiMusicSitesTestCase(TestCase):
class BandcampTestCase(TestCase):
+ databases = "__all__"
+
def test_parse(self):
t_id_type = IdType.Bandcamp
t_id_value = "intlanthem.bandcamp.com/album/in-these-times"
@@ -119,6 +129,8 @@ class BandcampTestCase(TestCase):
class DiscogsReleaseTestCase(TestCase):
+ databases = "__all__"
+
def test_parse(self):
t_id_type = IdType.Discogs_Release
t_id_value = "25829341"
@@ -155,6 +167,8 @@ class DiscogsReleaseTestCase(TestCase):
class DiscogsMasterTestCase(TestCase):
+ databases = "__all__"
+
def test_parse(self):
t_id_type = IdType.Discogs_Master
t_id_value = "469004"
@@ -182,6 +196,8 @@ class DiscogsMasterTestCase(TestCase):
class AppleMusicTestCase(TestCase):
+ databases = "__all__"
+
def test_parse(self):
t_id_type = IdType.AppleMusic
t_id_value = "1284391545"
@@ -201,8 +217,10 @@ class AppleMusicTestCase(TestCase):
self.assertEqual(site.ready, False)
site.get_resource_ready()
self.assertEqual(site.ready, True)
- self.assertEqual(site.resource.metadata["title"], "Kids Only")
+ self.assertEqual(
+ site.resource.metadata["localized_title"][0]["text"], "Kids Only"
+ )
self.assertEqual(site.resource.metadata["artist"], ["Leah Dou"])
self.assertIsInstance(site.resource.item, Album)
- self.assertEqual(site.resource.item.genre, ["Pop"])
- self.assertEqual(site.resource.item.duration, 2371628)
+ self.assertEqual(site.resource.item.genre, ["Pop", "Music"])
+ self.assertEqual(site.resource.item.duration, 2368000)
diff --git a/catalog/sites/apple_music.py b/catalog/sites/apple_music.py
index 19b89d98..194944d8 100644
--- a/catalog/sites/apple_music.py
+++ b/catalog/sites/apple_music.py
@@ -9,8 +9,9 @@ Scraping the website directly.
"""
import json
+from datetime import timedelta
-import dateparser
+from django.utils.dateparse import parse_duration
from loguru import logger
from catalog.common import *
@@ -18,7 +19,6 @@ from catalog.models import *
from common.models.lang import (
SITE_DEFAULT_LANGUAGE,
SITE_PREFERRED_LANGUAGES,
- detect_language,
)
from common.models.misc import uniq
@@ -39,7 +39,6 @@ class AppleMusic(AbstractSite):
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:107.0) Gecko/20100101 Firefox/107.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
- "Accept-Language": BasicDownloader.get_accept_language(),
"Accept-Encoding": "gzip, deflate",
"Connection": "keep-alive",
"DNT": "1",
@@ -70,80 +69,63 @@ class AppleMusic(AbstractSite):
return locales
def scrape(self):
- matched_content = None
+ matched_schema_data = None
localized_title = []
localized_desc = []
for lang, locales in self.get_locales().items():
for loc in locales: # waterfall thru all locales
url = f"https://music.apple.com/{loc}/album/{self.id_value}"
try:
+ tl = f"{lang}-{loc}" if lang == "zh" else lang
+ headers = {
+ "Accept-Language": tl,
+ }
+ headers.update(self.headers)
content = (
BasicDownloader(url, headers=self.headers).download().html()
)
- logger.info(f"got localized content from {url}")
- elem = content.xpath(
- "//script[@id='serialized-server-data']/text()"
- )
- txt: str = elem[0] # type:ignore
- page_data = json.loads(txt)[0]
- album_data = page_data["data"]["sections"][0]["items"][0]
- title = album_data["title"]
- brief = album_data.get("modalPresentationDescriptor", {}).get(
- "paragraphText", ""
- )
- tl = detect_language(title + " " + brief)
- localized_title.append({"lang": tl, "text": title})
- if brief:
- localized_desc.append({"lang": tl, "text": brief})
- if lang == SITE_DEFAULT_LANGUAGE or not matched_content:
- matched_content = content
+ logger.debug(f"got localized content from {url}")
+ txt: str = content.xpath(
+ "//script[@id='schema:music-album']/text()"
+ )[0] # type:ignore
+ schema_data = json.loads(txt)
+ title = schema_data["name"]
+ if title:
+ localized_title.append({"lang": tl, "text": title})
+ try:
+ txt: str = content.xpath(
+ "//script[@id='serialized-server-data']/text()"
+ )[0] # type:ignore
+ server_data = json.loads(txt)
+ brief = server_data[0]["data"]["sections"][0]["items"][0][
+ "modalPresentationDescriptor"
+ ]["paragraphText"]
+ if brief:
+ localized_desc.append({"lang": tl, "text": brief})
+ except Exception:
+ server_data = brief = None
+ if lang == SITE_DEFAULT_LANGUAGE or not matched_schema_data:
+ matched_schema_data = schema_data
break
except Exception:
pass
- if matched_content is None:
+ if matched_schema_data is None: # no schema data found
raise ParseError(self, f"localized content for {self.url}")
- elem = matched_content.xpath("//script[@id='serialized-server-data']/text()")
- txt: str = elem[0] # type:ignore
- page_data = json.loads(txt)[0]
- album_data = page_data["data"]["sections"][0]["items"][0]
- title = album_data["title"]
- brief = album_data.get("modalPresentationDescriptor")
- brief = brief.get("paragraphText") if brief else None
- artist_list = album_data["subtitleLinks"]
- artist = [item["title"] for item in artist_list]
-
- track_data = page_data["data"]["seoData"]
- date_elem = track_data.get("musicReleaseDate")
- release_datetime = dateparser.parse(date_elem.strip()) if date_elem else None
- release_date = (
- release_datetime.strftime("%Y-%m-%d") if release_datetime else None
+ artist = [a["name"] for a in matched_schema_data.get("byArtist", [])]
+ release_date = matched_schema_data.get("datePublished", None)
+ genre = matched_schema_data.get("genre", [])
+ image_url = matched_schema_data.get("image", None)
+ track_list = [t["name"] for t in matched_schema_data.get("tracks", [])]
+ duration = round(
+ sum(
+ (parse_duration(t["duration"]) or timedelta()).total_seconds() * 1000
+ for t in matched_schema_data.get("tracks", [])
+ )
)
-
- track_list = [
- f"{i}. {track['attributes']['name']}"
- for i, track in enumerate(track_data["ogSongs"], 1)
- ]
- duration_list = [
- track["attributes"].get("durationInMillis", 0)
- for track in track_data["ogSongs"]
- ]
- duration = int(sum(duration_list))
- genre = track_data["schemaContent"].get("genre")
- if genre:
- genre = [
- genre[0]
- ] # apple treat "Music" as a genre. Thus, only the first genre is obtained.
-
- images = matched_content.xpath("//source[@type='image/jpeg']/@srcset")
- image_elem: str = images[0] if images else "" # type:ignore
- image_url = image_elem.split(" ")[0] if image_elem else None
-
pd = ResourceContent(
metadata={
"localized_title": uniq(localized_title),
"localized_description": uniq(localized_desc),
- "title": title,
- "brief": brief,
"artist": artist,
"genre": genre,
"release_date": release_date,
diff --git a/test_data/https___music_apple_com_cn_album_1284391545 b/test_data/https___music_apple_com_cn_album_1284391545
new file mode 100644
index 00000000..daf7d8f4
--- /dev/null
+++ b/test_data/https___music_apple_com_cn_album_1284391545
@@ -0,0 +1,209 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ âãKids Onlyã- 窦éç«¥çä¸è¾ - Apple Music
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test_data/https___music_apple_com_fr_album_1284391545 b/test_data/https___music_apple_com_fr_album_1284391545
new file mode 100644
index 00000000..d4b43a89
--- /dev/null
+++ b/test_data/https___music_apple_com_fr_album_1284391545
@@ -0,0 +1,201 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ âKids Only â Album par ç«éç«¥ â Apple Music
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test_data/https___music_apple_com_jp_album_1284391545 b/test_data/https___music_apple_com_jp_album_1284391545
new file mode 100644
index 00000000..d6fa4749
--- /dev/null
+++ b/test_data/https___music_apple_com_jp_album_1284391545
@@ -0,0 +1,197 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ âKids Only - ãªã¢ã»ãã¦ã®ã¢ã«ãã - Apple Music
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test_data/https___music_apple_com_kr_album_1284391545 b/test_data/https___music_apple_com_kr_album_1284391545
new file mode 100644
index 00000000..c8241613
--- /dev/null
+++ b/test_data/https___music_apple_com_kr_album_1284391545
@@ -0,0 +1,197 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ âKids Only - ç«éç«¥ì ì¨ë² - Apple Music
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test_data/https___music_apple_com_us_album_1284391545 b/test_data/https___music_apple_com_us_album_1284391545
index b07debbd..c8fde71e 100644
--- a/test_data/https___music_apple_com_us_album_1284391545
+++ b/test_data/https___music_apple_com_us_album_1284391545
@@ -1,244 +1,55 @@
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+ >
+
+ >
+
+ >
+
-
+ >
+
+
- âKids Only by Leah Dou on Apple Music
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+ âKids Only - Album by Leah Dou - Apple Music
-
-
-
-
+
+
+
@@ -272,1636 +83,123 @@
/>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+ ">
+
-
-
+
+