From a73b9080176168af8951474c8a51e441204afdd4 Mon Sep 17 00:00:00 2001 From: qilinz Date: Fri, 3 Feb 2023 13:43:55 +0100 Subject: [PATCH] Complete discogs --- catalog/common/models.py | 1 + catalog/music/tests.py | 57 ++++++++++++++++ catalog/sites/__init__.py | 2 + catalog/sites/discogs.py | 134 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 194 insertions(+) create mode 100644 catalog/sites/discogs.py diff --git a/catalog/common/models.py b/catalog/common/models.py index 65e1738c..90381641 100644 --- a/catalog/common/models.py +++ b/catalog/common/models.py @@ -33,6 +33,7 @@ class SiteName(models.TextChoices): Bangumi = "bangumi", _("Bangumi") # ApplePodcast = "apple_podcast", _("苹果播客") RSS = "rss", _("RSS") + Discogs = "discogs", _("Discogs") class IdType(models.TextChoices): diff --git a/catalog/music/tests.py b/catalog/music/tests.py index aed4e715..87323c19 100644 --- a/catalog/music/tests.py +++ b/catalog/music/tests.py @@ -84,3 +84,60 @@ class BandcampTestCase(TestCase): self.assertEqual(site.resource.metadata["title"], "In These Times") self.assertEqual(site.resource.metadata["artist"], ["Makaya McCraven"]) self.assertIsInstance(site.resource.item, Album) + + +class DiscogsReleaseTestCase(TestCase): + def test_parse(self): + t_id_type = IdType.Discogs_Release + t_id_value = "25746742" + t_url = ( + "https://www.discogs.com/release/25746742-Phish-LP-on-LP-04-Ghost-5222000" + ) + t_url_2 = "https://www.discogs.com/release/25746742" + site = SiteManager.get_site_by_id_type(t_id_type) + self.assertIsNotNone(site) + self.assertEqual(site.validate_url(t_url), True) + site = SiteManager.get_site_by_url(t_url) + self.assertEqual(site.url, t_url_2) + self.assertEqual(site.id_value, t_id_value) + + @use_local_response + def test_scrape(self): + t_url = ( + "https://www.discogs.com/release/25746742-Phish-LP-on-LP-04-Ghost-5222000" + ) + site = SiteManager.get_site_by_url(t_url) + self.assertEqual(site.ready, False) + site.get_resource_ready() + self.assertEqual(site.ready, True) + self.assertEqual( + site.resource.metadata["title"], 'LP on LP 04: "Ghost" 5/22/2000' + ) + self.assertEqual(site.resource.metadata["artist"], ["Phish"]) + self.assertIsInstance(site.resource.item, Album) + self.assertEqual(site.resource.item.barcode, "850014859275") + + +class DiscogsMasterTestCase(TestCase): + def test_parse(self): + t_id_type = IdType.Discogs_Master + t_id_value = "14772" + t_url = "https://www.discogs.com/master/14772-Linda-Ronstadt-Silk-Purse" + t_url_2 = "https://www.discogs.com/master/14772" + site = SiteManager.get_site_by_id_type(t_id_type) + self.assertIsNotNone(site) + self.assertEqual(site.validate_url(t_url), True) + site = SiteManager.get_site_by_url(t_url) + self.assertEqual(site.url, t_url_2) + self.assertEqual(site.id_value, t_id_value) + + @use_local_response + def test_scrape(self): + t_url = "https://www.discogs.com/master/14772-Linda-Ronstadt-Silk-Purse" + site = SiteManager.get_site_by_url(t_url) + self.assertEqual(site.ready, False) + site.get_resource_ready() + self.assertEqual(site.ready, True) + self.assertEqual(site.resource.metadata["title"], "Silk Purse") + self.assertEqual(site.resource.metadata["artist"], ["Linda Ronstadt"]) + self.assertIsInstance(site.resource.item, Album) diff --git a/catalog/sites/__init__.py b/catalog/sites/__init__.py index f8a57ab3..4d995206 100644 --- a/catalog/sites/__init__.py +++ b/catalog/sites/__init__.py @@ -16,3 +16,5 @@ from .igdb import IGDB from .steam import Steam from .bandcamp import Bandcamp from .bangumi import Bangumi +from .discogs import DiscogsRelease +from .discogs import DiscogsMaster diff --git a/catalog/sites/discogs.py b/catalog/sites/discogs.py new file mode 100644 index 00000000..fab5bca2 --- /dev/null +++ b/catalog/sites/discogs.py @@ -0,0 +1,134 @@ +""" +Discogs. +""" +from django.conf import settings +from catalog.common import * +from catalog.models import * +from .douban import * +import json +import logging +import requests + + +_logger = logging.getLogger(__name__) + + +@SiteManager.register +class DiscogsRelease(AbstractSite): + SITE_NAME = SiteName.Discogs + ID_TYPE = IdType.Discogs_Release + URL_PATTERNS = [r"https://www\.discogs\.com/release/(\d+)-.+"] + WIKI_PROPERTY_ID = "?" + DEFAULT_MODEL = Album + + @classmethod + def id_to_url(self, id_value): + return f"https://www.discogs.com/release/{id_value}" + + def scrape(self): + release = get_discogs_data("releases", self.id_value) + title = release.get("title") + artist = [artist.get("name") for artist in release.get("artists")] + genre = release.get("genres") + track_list = [track.get("title") for track in release.get("tracklist")] + company = [company.get("name") for company in release.get("companies")] + + media, disc_count = None, None + formats = release.get("formats") + if len(formats) == 1: + media = formats[0].get("name") + disc_count = formats[0].get("qty") + + identifiers = release.get("identifiers") + barcode = None + if identifiers: + for i in identifiers: + if i["type"] == "Barcode": + barcode = i["value"].replace(" ", "").replace("-", "") + image_url = None + if len(release.get("images")) > 0: + image_url = release["images"][0].get("uri") + pd = ResourceContent( + metadata={ + "title": title, + "artist": artist, + "genre": genre, + "track_list": track_list, + "release_date": None, # only year provided by API + "company": company, + "media": media, + "disc_count": disc_count, + "cover_image_url": image_url, + } + ) + if barcode: + pd.lookup_ids[IdType.GTIN] = barcode + if pd.metadata["cover_image_url"]: + imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url) + try: + pd.cover_image = imgdl.download().content + pd.cover_image_extention = imgdl.extention + except Exception: + _logger.debug( + f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}' + ) + return pd + + +@SiteManager.register +class DiscogsMaster(AbstractSite): + SITE_NAME = SiteName.Discogs + ID_TYPE = IdType.Discogs_Master + URL_PATTERNS = [r"https://www\.discogs\.com/master/(\d+)-.+"] + WIKI_PROPERTY_ID = "?" + DEFAULT_MODEL = Album + + @classmethod + def id_to_url(self, id_value): + return f"https://www.discogs.com/master/{id_value}" + + def scrape(self): + master_release = get_discogs_data("masters", self.id_value) + title = master_release.get("title") + artist = [artist.get("name") for artist in master_release.get("artists")] + genre = master_release.get("genres") + track_list = [track.get("title") for track in master_release.get("tracklist")] + + image_url = None + if len(master_release.get("images")) > 0: + image_url = master_release["images"][0].get("uri") + pd = ResourceContent( + metadata={ + "title": title, + "artist": artist, + "genre": genre, + "track_list": track_list, + "cover_image_url": image_url, + } + ) + if pd.metadata["cover_image_url"]: + imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url) + try: + pd.cover_image = imgdl.download().content + pd.cover_image_extention = imgdl.extention + except Exception: + _logger.debug( + f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}' + ) + return pd + + +def get_discogs_data(data_type: str, discogs_id): + if data_type not in ("releases", "masters"): + raise ValueError("data_type can only be in ('releases' or masters')") + user_agent_string = "Neodb/0.1" + user_token = settings.DISCOGS_TOKEN + headers = { + "User-Agent": user_agent_string, + "Authorization": f"Discogs token={user_token}", + } + response = requests.get( + f"https://api.discogs.com/{data_type}/{discogs_id}", headers=headers + ) + data = json.loads(response.text) + return data