diff --git a/catalog/common/models.py b/catalog/common/models.py index a863392c..9fe8d0c0 100644 --- a/catalog/common/models.py +++ b/catalog/common/models.py @@ -36,7 +36,7 @@ class SiteName(models.TextChoices): # ApplePodcast = "apple_podcast", _("苹果播客") RSS = "rss", _("RSS") Discogs = "discogs", _("Discogs") - AppleMusic = "apple_music", _("苹果音乐") + AppleMusic = "applemusic", _("苹果音乐") class IdType(models.TextChoices): diff --git a/catalog/music/models.py b/catalog/music/models.py index 9ffddbb0..13ffd43f 100644 --- a/catalog/music/models.py +++ b/catalog/music/models.py @@ -83,6 +83,8 @@ class Album(Item): return f"https://bandcamp.com/EmbeddedPlayer/album={res.metadata.get('bandcamp_album_id')}/size=large/bgcol=ffffff/linkcol=19A2CA/artwork=small/transparent=true/" if res.id_type == IdType.Spotify_Album.value: return res.url.replace("open.spotify.com/", "open.spotify.com/embed/") + if res.id_type == IdType.AppleMusic.value: + return res.url.replace("music.apple.com/", "embed.music.apple.com/us/") return None @classmethod diff --git a/catalog/music/tests.py b/catalog/music/tests.py index cd0e9e58..0b1754d7 100644 --- a/catalog/music/tests.py +++ b/catalog/music/tests.py @@ -172,7 +172,7 @@ class AppleMusicTestCase(TestCase): t_id_type = IdType.AppleMusic t_id_value = "1284391545" t_url = "https://music.apple.com/us/album/kids-only/1284391545" - t_url_2 = "https://music.apple.com/us/album/1284391545" + t_url_2 = "https://music.apple.com/album/1284391545" site = SiteManager.get_site_by_id_type(t_id_type) self.assertIsNotNone(site) self.assertEqual(site.validate_url(t_url), True) diff --git a/catalog/sites/apple_music.py b/catalog/sites/apple_music.py index 4833cc07..2fda1f86 100644 --- a/catalog/sites/apple_music.py +++ b/catalog/sites/apple_music.py @@ -22,40 +22,44 @@ _logger = logging.getLogger(__name__) class AppleMusic(AbstractSite): SITE_NAME = SiteName.AppleMusic ID_TYPE = IdType.AppleMusic - URL_PATTERNS = [r"https://music\.apple\.com/[a-z]{2}/album/[\d\w%-]+/(\d+)[^\d]*"] - DOMAIN_PATTERNS = [ - r"(https://music\.apple\.com/[a-z]{2})/album/[\d\w%-]+/\d+[^\d]*" + URL_PATTERNS = [ + r"https://music\.apple\.com/[a-z]{2}/album/[\w%-]+/(\d+)", + r"https://music\.apple\.com/[a-z]{2}/album/(\d+)", + r"https://music\.apple\.com/album/(\d+)", ] WIKI_PROPERTY_ID = "?" DEFAULT_MODEL = Album - @classmethod - def url_to_id(cls, url: str): - """ - Transform url to id. Find the domain of the provided url. - """ - domain = next( - iter([re.match(p, url) for p in cls.DOMAIN_PATTERNS if re.match(p, url)]), - None, - ) - cls.domain = domain[1] if domain else None - u = next( - iter([re.match(p, url) for p in cls.URL_PATTERNS if re.match(p, url)]), - None, - ) - return u[1] if u else None - @classmethod def id_to_url(cls, id_value): - # find albums according to the domain of the provided link - return f"{cls.domain}/album/{id_value}" + return f"https://music.apple.com/album/{id_value}" + + def get_localized_urls(self): + return [ + f"https://music.apple.com/{locale}/album/{self.id_value}" + for locale in ["hk", "tw", "us", "sg", "cn", "gb", "ca", "fr"] + ] def scrape(self): - content = BasicDownloader(self.url).download().html() + content = None + # it's less than ideal to waterfall thru locales, a better solution + # would be change ExternalResource to store preferred locale, + # or to find an AppleMusic API to get available locales for an album + for url in self.get_localized_urls(): + try: + content = BasicDownloader(url).download().html() + _logger.info(f"got localized content from {url}") + break + except Exception: + pass + if content is None: + raise ParseError(self, f"localized content for {self.url}") elem = content.xpath("//script[@id='serialized-server-data']/text()") page_data = json.loads(elem[0])[0] album_data = page_data["data"]["sections"][0]["items"][0] title = album_data["title"] + brief = album_data.get("modalPresentationDescriptor") + brief = brief.get("paragraphText") if brief else None artist_list = album_data["subtitleLinks"] artist = [item["title"] for item in artist_list] @@ -84,6 +88,7 @@ class AppleMusic(AbstractSite): pd = ResourceContent( metadata={ "title": title, + "brief": brief, "artist": artist, "genre": genre, "release_date": release_date, diff --git a/common/static/scss/_sitelabel.scss b/common/static/scss/_sitelabel.scss index b7599f3a..ca34e79f 100644 --- a/common/static/scss/_sitelabel.scss +++ b/common/static/scss/_sitelabel.scss @@ -76,7 +76,12 @@ color: white; border: none; font-weight: lighter; - padding-top: 2px; + } + + .applemusic { + background: linear-gradient(135deg, #F64464, #F12745); + color: white; + border: none; } .googlebooks {