fix apple music
This commit is contained in:
parent
4758af58a6
commit
e45980a85a
8 changed files with 977 additions and 1874 deletions
|
@ -449,6 +449,7 @@ LANGUAGE_CODE, PREFERRED_LANGUAGES = _init_language_settings(
|
||||||
|
|
||||||
if TESTING: # force en if testing
|
if TESTING: # force en if testing
|
||||||
LANGUAGE_CODE = "en"
|
LANGUAGE_CODE = "en"
|
||||||
|
PREFERRED_LANGUAGES = ["en"]
|
||||||
|
|
||||||
LOCALE_PATHS = [os.path.join(BASE_DIR, "locale")]
|
LOCALE_PATHS = [os.path.join(BASE_DIR, "locale")]
|
||||||
|
|
||||||
|
@ -580,7 +581,7 @@ SEARCH_INDEX_NEW_ONLY = False
|
||||||
|
|
||||||
INDEX_ALIASES = env("INDEX_ALIASES")
|
INDEX_ALIASES = env("INDEX_ALIASES")
|
||||||
|
|
||||||
DOWNLOADER_SAVEDIR = env("NEODB_DOWNLOADER_SAVE_DIR", default="/tmp") # type: ignore
|
DOWNLOADER_SAVEDIR = env("NEODB_DOWNLOADER_SAVE_DIR", default="") # type: ignore
|
||||||
|
|
||||||
DISABLE_MODEL_SIGNAL = False # disable index and social feeds during importing/etc
|
DISABLE_MODEL_SIGNAL = False # disable index and social feeds during importing/etc
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,8 @@ from catalog.music.utils import *
|
||||||
|
|
||||||
|
|
||||||
class BasicMusicTest(TestCase):
|
class BasicMusicTest(TestCase):
|
||||||
|
databases = "__all__"
|
||||||
|
|
||||||
def test_gtin(self):
|
def test_gtin(self):
|
||||||
self.assertIsNone(upc_to_gtin_13("018771208112X"))
|
self.assertIsNone(upc_to_gtin_13("018771208112X"))
|
||||||
self.assertIsNone(upc_to_gtin_13("999018771208112"))
|
self.assertIsNone(upc_to_gtin_13("999018771208112"))
|
||||||
|
@ -15,6 +17,8 @@ class BasicMusicTest(TestCase):
|
||||||
|
|
||||||
|
|
||||||
class SpotifyTestCase(TestCase):
|
class SpotifyTestCase(TestCase):
|
||||||
|
databases = "__all__"
|
||||||
|
|
||||||
def test_parse(self):
|
def test_parse(self):
|
||||||
t_id_type = IdType.Spotify_Album
|
t_id_type = IdType.Spotify_Album
|
||||||
t_id_value = "65KwtzkJXw7oT819NFWmEP"
|
t_id_value = "65KwtzkJXw7oT819NFWmEP"
|
||||||
|
@ -48,6 +52,8 @@ class SpotifyTestCase(TestCase):
|
||||||
|
|
||||||
|
|
||||||
class DoubanMusicTestCase(TestCase):
|
class DoubanMusicTestCase(TestCase):
|
||||||
|
databases = "__all__"
|
||||||
|
|
||||||
def test_parse(self):
|
def test_parse(self):
|
||||||
t_id_type = IdType.DoubanMusic
|
t_id_type = IdType.DoubanMusic
|
||||||
t_id_value = "33551231"
|
t_id_value = "33551231"
|
||||||
|
@ -74,6 +80,8 @@ class DoubanMusicTestCase(TestCase):
|
||||||
|
|
||||||
|
|
||||||
class MultiMusicSitesTestCase(TestCase):
|
class MultiMusicSitesTestCase(TestCase):
|
||||||
|
databases = "__all__"
|
||||||
|
|
||||||
@use_local_response
|
@use_local_response
|
||||||
def test_albums(self):
|
def test_albums(self):
|
||||||
url1 = "https://music.douban.com/subject/33551231/"
|
url1 = "https://music.douban.com/subject/33551231/"
|
||||||
|
@ -92,6 +100,8 @@ class MultiMusicSitesTestCase(TestCase):
|
||||||
|
|
||||||
|
|
||||||
class BandcampTestCase(TestCase):
|
class BandcampTestCase(TestCase):
|
||||||
|
databases = "__all__"
|
||||||
|
|
||||||
def test_parse(self):
|
def test_parse(self):
|
||||||
t_id_type = IdType.Bandcamp
|
t_id_type = IdType.Bandcamp
|
||||||
t_id_value = "intlanthem.bandcamp.com/album/in-these-times"
|
t_id_value = "intlanthem.bandcamp.com/album/in-these-times"
|
||||||
|
@ -119,6 +129,8 @@ class BandcampTestCase(TestCase):
|
||||||
|
|
||||||
|
|
||||||
class DiscogsReleaseTestCase(TestCase):
|
class DiscogsReleaseTestCase(TestCase):
|
||||||
|
databases = "__all__"
|
||||||
|
|
||||||
def test_parse(self):
|
def test_parse(self):
|
||||||
t_id_type = IdType.Discogs_Release
|
t_id_type = IdType.Discogs_Release
|
||||||
t_id_value = "25829341"
|
t_id_value = "25829341"
|
||||||
|
@ -155,6 +167,8 @@ class DiscogsReleaseTestCase(TestCase):
|
||||||
|
|
||||||
|
|
||||||
class DiscogsMasterTestCase(TestCase):
|
class DiscogsMasterTestCase(TestCase):
|
||||||
|
databases = "__all__"
|
||||||
|
|
||||||
def test_parse(self):
|
def test_parse(self):
|
||||||
t_id_type = IdType.Discogs_Master
|
t_id_type = IdType.Discogs_Master
|
||||||
t_id_value = "469004"
|
t_id_value = "469004"
|
||||||
|
@ -182,6 +196,8 @@ class DiscogsMasterTestCase(TestCase):
|
||||||
|
|
||||||
|
|
||||||
class AppleMusicTestCase(TestCase):
|
class AppleMusicTestCase(TestCase):
|
||||||
|
databases = "__all__"
|
||||||
|
|
||||||
def test_parse(self):
|
def test_parse(self):
|
||||||
t_id_type = IdType.AppleMusic
|
t_id_type = IdType.AppleMusic
|
||||||
t_id_value = "1284391545"
|
t_id_value = "1284391545"
|
||||||
|
@ -201,8 +217,10 @@ class AppleMusicTestCase(TestCase):
|
||||||
self.assertEqual(site.ready, False)
|
self.assertEqual(site.ready, False)
|
||||||
site.get_resource_ready()
|
site.get_resource_ready()
|
||||||
self.assertEqual(site.ready, True)
|
self.assertEqual(site.ready, True)
|
||||||
self.assertEqual(site.resource.metadata["title"], "Kids Only")
|
self.assertEqual(
|
||||||
|
site.resource.metadata["localized_title"][0]["text"], "Kids Only"
|
||||||
|
)
|
||||||
self.assertEqual(site.resource.metadata["artist"], ["Leah Dou"])
|
self.assertEqual(site.resource.metadata["artist"], ["Leah Dou"])
|
||||||
self.assertIsInstance(site.resource.item, Album)
|
self.assertIsInstance(site.resource.item, Album)
|
||||||
self.assertEqual(site.resource.item.genre, ["Pop"])
|
self.assertEqual(site.resource.item.genre, ["Pop", "Music"])
|
||||||
self.assertEqual(site.resource.item.duration, 2371628)
|
self.assertEqual(site.resource.item.duration, 2368000)
|
||||||
|
|
|
@ -9,8 +9,9 @@ Scraping the website directly.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
from datetime import timedelta
|
||||||
|
|
||||||
import dateparser
|
from django.utils.dateparse import parse_duration
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from catalog.common import *
|
from catalog.common import *
|
||||||
|
@ -18,7 +19,6 @@ from catalog.models import *
|
||||||
from common.models.lang import (
|
from common.models.lang import (
|
||||||
SITE_DEFAULT_LANGUAGE,
|
SITE_DEFAULT_LANGUAGE,
|
||||||
SITE_PREFERRED_LANGUAGES,
|
SITE_PREFERRED_LANGUAGES,
|
||||||
detect_language,
|
|
||||||
)
|
)
|
||||||
from common.models.misc import uniq
|
from common.models.misc import uniq
|
||||||
|
|
||||||
|
@ -39,7 +39,6 @@ class AppleMusic(AbstractSite):
|
||||||
headers = {
|
headers = {
|
||||||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:107.0) Gecko/20100101 Firefox/107.0",
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:107.0) Gecko/20100101 Firefox/107.0",
|
||||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
"Accept-Language": BasicDownloader.get_accept_language(),
|
|
||||||
"Accept-Encoding": "gzip, deflate",
|
"Accept-Encoding": "gzip, deflate",
|
||||||
"Connection": "keep-alive",
|
"Connection": "keep-alive",
|
||||||
"DNT": "1",
|
"DNT": "1",
|
||||||
|
@ -70,80 +69,63 @@ class AppleMusic(AbstractSite):
|
||||||
return locales
|
return locales
|
||||||
|
|
||||||
def scrape(self):
|
def scrape(self):
|
||||||
matched_content = None
|
matched_schema_data = None
|
||||||
localized_title = []
|
localized_title = []
|
||||||
localized_desc = []
|
localized_desc = []
|
||||||
for lang, locales in self.get_locales().items():
|
for lang, locales in self.get_locales().items():
|
||||||
for loc in locales: # waterfall thru all locales
|
for loc in locales: # waterfall thru all locales
|
||||||
url = f"https://music.apple.com/{loc}/album/{self.id_value}"
|
url = f"https://music.apple.com/{loc}/album/{self.id_value}"
|
||||||
try:
|
try:
|
||||||
|
tl = f"{lang}-{loc}" if lang == "zh" else lang
|
||||||
|
headers = {
|
||||||
|
"Accept-Language": tl,
|
||||||
|
}
|
||||||
|
headers.update(self.headers)
|
||||||
content = (
|
content = (
|
||||||
BasicDownloader(url, headers=self.headers).download().html()
|
BasicDownloader(url, headers=self.headers).download().html()
|
||||||
)
|
)
|
||||||
logger.info(f"got localized content from {url}")
|
logger.debug(f"got localized content from {url}")
|
||||||
elem = content.xpath(
|
txt: str = content.xpath(
|
||||||
"//script[@id='serialized-server-data']/text()"
|
"//script[@id='schema:music-album']/text()"
|
||||||
)
|
)[0] # type:ignore
|
||||||
txt: str = elem[0] # type:ignore
|
schema_data = json.loads(txt)
|
||||||
page_data = json.loads(txt)[0]
|
title = schema_data["name"]
|
||||||
album_data = page_data["data"]["sections"][0]["items"][0]
|
if title:
|
||||||
title = album_data["title"]
|
localized_title.append({"lang": tl, "text": title})
|
||||||
brief = album_data.get("modalPresentationDescriptor", {}).get(
|
try:
|
||||||
"paragraphText", ""
|
txt: str = content.xpath(
|
||||||
)
|
"//script[@id='serialized-server-data']/text()"
|
||||||
tl = detect_language(title + " " + brief)
|
)[0] # type:ignore
|
||||||
localized_title.append({"lang": tl, "text": title})
|
server_data = json.loads(txt)
|
||||||
if brief:
|
brief = server_data[0]["data"]["sections"][0]["items"][0][
|
||||||
localized_desc.append({"lang": tl, "text": brief})
|
"modalPresentationDescriptor"
|
||||||
if lang == SITE_DEFAULT_LANGUAGE or not matched_content:
|
]["paragraphText"]
|
||||||
matched_content = content
|
if brief:
|
||||||
|
localized_desc.append({"lang": tl, "text": brief})
|
||||||
|
except Exception:
|
||||||
|
server_data = brief = None
|
||||||
|
if lang == SITE_DEFAULT_LANGUAGE or not matched_schema_data:
|
||||||
|
matched_schema_data = schema_data
|
||||||
break
|
break
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
if matched_content is None:
|
if matched_schema_data is None: # no schema data found
|
||||||
raise ParseError(self, f"localized content for {self.url}")
|
raise ParseError(self, f"localized content for {self.url}")
|
||||||
elem = matched_content.xpath("//script[@id='serialized-server-data']/text()")
|
artist = [a["name"] for a in matched_schema_data.get("byArtist", [])]
|
||||||
txt: str = elem[0] # type:ignore
|
release_date = matched_schema_data.get("datePublished", None)
|
||||||
page_data = json.loads(txt)[0]
|
genre = matched_schema_data.get("genre", [])
|
||||||
album_data = page_data["data"]["sections"][0]["items"][0]
|
image_url = matched_schema_data.get("image", None)
|
||||||
title = album_data["title"]
|
track_list = [t["name"] for t in matched_schema_data.get("tracks", [])]
|
||||||
brief = album_data.get("modalPresentationDescriptor")
|
duration = round(
|
||||||
brief = brief.get("paragraphText") if brief else None
|
sum(
|
||||||
artist_list = album_data["subtitleLinks"]
|
(parse_duration(t["duration"]) or timedelta()).total_seconds() * 1000
|
||||||
artist = [item["title"] for item in artist_list]
|
for t in matched_schema_data.get("tracks", [])
|
||||||
|
)
|
||||||
track_data = page_data["data"]["seoData"]
|
|
||||||
date_elem = track_data.get("musicReleaseDate")
|
|
||||||
release_datetime = dateparser.parse(date_elem.strip()) if date_elem else None
|
|
||||||
release_date = (
|
|
||||||
release_datetime.strftime("%Y-%m-%d") if release_datetime else None
|
|
||||||
)
|
)
|
||||||
|
|
||||||
track_list = [
|
|
||||||
f"{i}. {track['attributes']['name']}"
|
|
||||||
for i, track in enumerate(track_data["ogSongs"], 1)
|
|
||||||
]
|
|
||||||
duration_list = [
|
|
||||||
track["attributes"].get("durationInMillis", 0)
|
|
||||||
for track in track_data["ogSongs"]
|
|
||||||
]
|
|
||||||
duration = int(sum(duration_list))
|
|
||||||
genre = track_data["schemaContent"].get("genre")
|
|
||||||
if genre:
|
|
||||||
genre = [
|
|
||||||
genre[0]
|
|
||||||
] # apple treat "Music" as a genre. Thus, only the first genre is obtained.
|
|
||||||
|
|
||||||
images = matched_content.xpath("//source[@type='image/jpeg']/@srcset")
|
|
||||||
image_elem: str = images[0] if images else "" # type:ignore
|
|
||||||
image_url = image_elem.split(" ")[0] if image_elem else None
|
|
||||||
|
|
||||||
pd = ResourceContent(
|
pd = ResourceContent(
|
||||||
metadata={
|
metadata={
|
||||||
"localized_title": uniq(localized_title),
|
"localized_title": uniq(localized_title),
|
||||||
"localized_description": uniq(localized_desc),
|
"localized_description": uniq(localized_desc),
|
||||||
"title": title,
|
|
||||||
"brief": brief,
|
|
||||||
"artist": artist,
|
"artist": artist,
|
||||||
"genre": genre,
|
"genre": genre,
|
||||||
"release_date": release_date,
|
"release_date": release_date,
|
||||||
|
|
209
test_data/https___music_apple_com_cn_album_1284391545
Normal file
209
test_data/https___music_apple_com_cn_album_1284391545
Normal file
File diff suppressed because one or more lines are too long
201
test_data/https___music_apple_com_fr_album_1284391545
Normal file
201
test_data/https___music_apple_com_fr_album_1284391545
Normal file
File diff suppressed because one or more lines are too long
197
test_data/https___music_apple_com_jp_album_1284391545
Normal file
197
test_data/https___music_apple_com_jp_album_1284391545
Normal file
File diff suppressed because one or more lines are too long
197
test_data/https___music_apple_com_kr_album_1284391545
Normal file
197
test_data/https___music_apple_com_kr_album_1284391545
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Add table
Reference in a new issue