Complete discogs

This commit is contained in:
qilinz 2023-02-03 13:43:55 +01:00 committed by Henri Dickson
parent 95bc47dc5b
commit a73b908017
4 changed files with 194 additions and 0 deletions

View file

@ -33,6 +33,7 @@ class SiteName(models.TextChoices):
Bangumi = "bangumi", _("Bangumi") Bangumi = "bangumi", _("Bangumi")
# ApplePodcast = "apple_podcast", _("苹果播客") # ApplePodcast = "apple_podcast", _("苹果播客")
RSS = "rss", _("RSS") RSS = "rss", _("RSS")
Discogs = "discogs", _("Discogs")
class IdType(models.TextChoices): class IdType(models.TextChoices):

View file

@ -84,3 +84,60 @@ class BandcampTestCase(TestCase):
self.assertEqual(site.resource.metadata["title"], "In These Times") self.assertEqual(site.resource.metadata["title"], "In These Times")
self.assertEqual(site.resource.metadata["artist"], ["Makaya McCraven"]) self.assertEqual(site.resource.metadata["artist"], ["Makaya McCraven"])
self.assertIsInstance(site.resource.item, Album) self.assertIsInstance(site.resource.item, Album)
class DiscogsReleaseTestCase(TestCase):
def test_parse(self):
t_id_type = IdType.Discogs_Release
t_id_value = "25746742"
t_url = (
"https://www.discogs.com/release/25746742-Phish-LP-on-LP-04-Ghost-5222000"
)
t_url_2 = "https://www.discogs.com/release/25746742"
site = SiteManager.get_site_by_id_type(t_id_type)
self.assertIsNotNone(site)
self.assertEqual(site.validate_url(t_url), True)
site = SiteManager.get_site_by_url(t_url)
self.assertEqual(site.url, t_url_2)
self.assertEqual(site.id_value, t_id_value)
@use_local_response
def test_scrape(self):
t_url = (
"https://www.discogs.com/release/25746742-Phish-LP-on-LP-04-Ghost-5222000"
)
site = SiteManager.get_site_by_url(t_url)
self.assertEqual(site.ready, False)
site.get_resource_ready()
self.assertEqual(site.ready, True)
self.assertEqual(
site.resource.metadata["title"], 'LP on LP 04: "Ghost" 5/22/2000'
)
self.assertEqual(site.resource.metadata["artist"], ["Phish"])
self.assertIsInstance(site.resource.item, Album)
self.assertEqual(site.resource.item.barcode, "850014859275")
class DiscogsMasterTestCase(TestCase):
def test_parse(self):
t_id_type = IdType.Discogs_Master
t_id_value = "14772"
t_url = "https://www.discogs.com/master/14772-Linda-Ronstadt-Silk-Purse"
t_url_2 = "https://www.discogs.com/master/14772"
site = SiteManager.get_site_by_id_type(t_id_type)
self.assertIsNotNone(site)
self.assertEqual(site.validate_url(t_url), True)
site = SiteManager.get_site_by_url(t_url)
self.assertEqual(site.url, t_url_2)
self.assertEqual(site.id_value, t_id_value)
@use_local_response
def test_scrape(self):
t_url = "https://www.discogs.com/master/14772-Linda-Ronstadt-Silk-Purse"
site = SiteManager.get_site_by_url(t_url)
self.assertEqual(site.ready, False)
site.get_resource_ready()
self.assertEqual(site.ready, True)
self.assertEqual(site.resource.metadata["title"], "Silk Purse")
self.assertEqual(site.resource.metadata["artist"], ["Linda Ronstadt"])
self.assertIsInstance(site.resource.item, Album)

View file

@ -16,3 +16,5 @@ from .igdb import IGDB
from .steam import Steam from .steam import Steam
from .bandcamp import Bandcamp from .bandcamp import Bandcamp
from .bangumi import Bangumi from .bangumi import Bangumi
from .discogs import DiscogsRelease
from .discogs import DiscogsMaster

134
catalog/sites/discogs.py Normal file
View file

@ -0,0 +1,134 @@
"""
Discogs.
"""
from django.conf import settings
from catalog.common import *
from catalog.models import *
from .douban import *
import json
import logging
import requests
_logger = logging.getLogger(__name__)
@SiteManager.register
class DiscogsRelease(AbstractSite):
SITE_NAME = SiteName.Discogs
ID_TYPE = IdType.Discogs_Release
URL_PATTERNS = [r"https://www\.discogs\.com/release/(\d+)-.+"]
WIKI_PROPERTY_ID = "?"
DEFAULT_MODEL = Album
@classmethod
def id_to_url(self, id_value):
return f"https://www.discogs.com/release/{id_value}"
def scrape(self):
release = get_discogs_data("releases", self.id_value)
title = release.get("title")
artist = [artist.get("name") for artist in release.get("artists")]
genre = release.get("genres")
track_list = [track.get("title") for track in release.get("tracklist")]
company = [company.get("name") for company in release.get("companies")]
media, disc_count = None, None
formats = release.get("formats")
if len(formats) == 1:
media = formats[0].get("name")
disc_count = formats[0].get("qty")
identifiers = release.get("identifiers")
barcode = None
if identifiers:
for i in identifiers:
if i["type"] == "Barcode":
barcode = i["value"].replace(" ", "").replace("-", "")
image_url = None
if len(release.get("images")) > 0:
image_url = release["images"][0].get("uri")
pd = ResourceContent(
metadata={
"title": title,
"artist": artist,
"genre": genre,
"track_list": track_list,
"release_date": None, # only year provided by API
"company": company,
"media": media,
"disc_count": disc_count,
"cover_image_url": image_url,
}
)
if barcode:
pd.lookup_ids[IdType.GTIN] = barcode
if pd.metadata["cover_image_url"]:
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
try:
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
_logger.debug(
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
)
return pd
@SiteManager.register
class DiscogsMaster(AbstractSite):
SITE_NAME = SiteName.Discogs
ID_TYPE = IdType.Discogs_Master
URL_PATTERNS = [r"https://www\.discogs\.com/master/(\d+)-.+"]
WIKI_PROPERTY_ID = "?"
DEFAULT_MODEL = Album
@classmethod
def id_to_url(self, id_value):
return f"https://www.discogs.com/master/{id_value}"
def scrape(self):
master_release = get_discogs_data("masters", self.id_value)
title = master_release.get("title")
artist = [artist.get("name") for artist in master_release.get("artists")]
genre = master_release.get("genres")
track_list = [track.get("title") for track in master_release.get("tracklist")]
image_url = None
if len(master_release.get("images")) > 0:
image_url = master_release["images"][0].get("uri")
pd = ResourceContent(
metadata={
"title": title,
"artist": artist,
"genre": genre,
"track_list": track_list,
"cover_image_url": image_url,
}
)
if pd.metadata["cover_image_url"]:
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
try:
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
_logger.debug(
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
)
return pd
def get_discogs_data(data_type: str, discogs_id):
if data_type not in ("releases", "masters"):
raise ValueError("data_type can only be in ('releases' or masters')")
user_agent_string = "Neodb/0.1"
user_token = settings.DISCOGS_TOKEN
headers = {
"User-Agent": user_agent_string,
"Authorization": f"Discogs token={user_token}",
}
response = requests.get(
f"https://api.discogs.com/{data_type}/{discogs_id}", headers=headers
)
data = json.loads(response.text)
return data