Complete discogs
This commit is contained in:
parent
95bc47dc5b
commit
a73b908017
4 changed files with 194 additions and 0 deletions
|
@ -33,6 +33,7 @@ class SiteName(models.TextChoices):
|
|||
Bangumi = "bangumi", _("Bangumi")
|
||||
# ApplePodcast = "apple_podcast", _("苹果播客")
|
||||
RSS = "rss", _("RSS")
|
||||
Discogs = "discogs", _("Discogs")
|
||||
|
||||
|
||||
class IdType(models.TextChoices):
|
||||
|
|
|
@ -84,3 +84,60 @@ class BandcampTestCase(TestCase):
|
|||
self.assertEqual(site.resource.metadata["title"], "In These Times")
|
||||
self.assertEqual(site.resource.metadata["artist"], ["Makaya McCraven"])
|
||||
self.assertIsInstance(site.resource.item, Album)
|
||||
|
||||
|
||||
class DiscogsReleaseTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_id_type = IdType.Discogs_Release
|
||||
t_id_value = "25746742"
|
||||
t_url = (
|
||||
"https://www.discogs.com/release/25746742-Phish-LP-on-LP-04-Ghost-5222000"
|
||||
)
|
||||
t_url_2 = "https://www.discogs.com/release/25746742"
|
||||
site = SiteManager.get_site_by_id_type(t_id_type)
|
||||
self.assertIsNotNone(site)
|
||||
self.assertEqual(site.validate_url(t_url), True)
|
||||
site = SiteManager.get_site_by_url(t_url)
|
||||
self.assertEqual(site.url, t_url_2)
|
||||
self.assertEqual(site.id_value, t_id_value)
|
||||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = (
|
||||
"https://www.discogs.com/release/25746742-Phish-LP-on-LP-04-Ghost-5222000"
|
||||
)
|
||||
site = SiteManager.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(
|
||||
site.resource.metadata["title"], 'LP on LP 04: "Ghost" 5/22/2000'
|
||||
)
|
||||
self.assertEqual(site.resource.metadata["artist"], ["Phish"])
|
||||
self.assertIsInstance(site.resource.item, Album)
|
||||
self.assertEqual(site.resource.item.barcode, "850014859275")
|
||||
|
||||
|
||||
class DiscogsMasterTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_id_type = IdType.Discogs_Master
|
||||
t_id_value = "14772"
|
||||
t_url = "https://www.discogs.com/master/14772-Linda-Ronstadt-Silk-Purse"
|
||||
t_url_2 = "https://www.discogs.com/master/14772"
|
||||
site = SiteManager.get_site_by_id_type(t_id_type)
|
||||
self.assertIsNotNone(site)
|
||||
self.assertEqual(site.validate_url(t_url), True)
|
||||
site = SiteManager.get_site_by_url(t_url)
|
||||
self.assertEqual(site.url, t_url_2)
|
||||
self.assertEqual(site.id_value, t_id_value)
|
||||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = "https://www.discogs.com/master/14772-Linda-Ronstadt-Silk-Purse"
|
||||
site = SiteManager.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata["title"], "Silk Purse")
|
||||
self.assertEqual(site.resource.metadata["artist"], ["Linda Ronstadt"])
|
||||
self.assertIsInstance(site.resource.item, Album)
|
||||
|
|
|
@ -16,3 +16,5 @@ from .igdb import IGDB
|
|||
from .steam import Steam
|
||||
from .bandcamp import Bandcamp
|
||||
from .bangumi import Bangumi
|
||||
from .discogs import DiscogsRelease
|
||||
from .discogs import DiscogsMaster
|
||||
|
|
134
catalog/sites/discogs.py
Normal file
134
catalog/sites/discogs.py
Normal file
|
@ -0,0 +1,134 @@
|
|||
"""
|
||||
Discogs.
|
||||
"""
|
||||
from django.conf import settings
|
||||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
from .douban import *
|
||||
import json
|
||||
import logging
|
||||
import requests
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@SiteManager.register
|
||||
class DiscogsRelease(AbstractSite):
|
||||
SITE_NAME = SiteName.Discogs
|
||||
ID_TYPE = IdType.Discogs_Release
|
||||
URL_PATTERNS = [r"https://www\.discogs\.com/release/(\d+)-.+"]
|
||||
WIKI_PROPERTY_ID = "?"
|
||||
DEFAULT_MODEL = Album
|
||||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
return f"https://www.discogs.com/release/{id_value}"
|
||||
|
||||
def scrape(self):
|
||||
release = get_discogs_data("releases", self.id_value)
|
||||
title = release.get("title")
|
||||
artist = [artist.get("name") for artist in release.get("artists")]
|
||||
genre = release.get("genres")
|
||||
track_list = [track.get("title") for track in release.get("tracklist")]
|
||||
company = [company.get("name") for company in release.get("companies")]
|
||||
|
||||
media, disc_count = None, None
|
||||
formats = release.get("formats")
|
||||
if len(formats) == 1:
|
||||
media = formats[0].get("name")
|
||||
disc_count = formats[0].get("qty")
|
||||
|
||||
identifiers = release.get("identifiers")
|
||||
barcode = None
|
||||
if identifiers:
|
||||
for i in identifiers:
|
||||
if i["type"] == "Barcode":
|
||||
barcode = i["value"].replace(" ", "").replace("-", "")
|
||||
image_url = None
|
||||
if len(release.get("images")) > 0:
|
||||
image_url = release["images"][0].get("uri")
|
||||
pd = ResourceContent(
|
||||
metadata={
|
||||
"title": title,
|
||||
"artist": artist,
|
||||
"genre": genre,
|
||||
"track_list": track_list,
|
||||
"release_date": None, # only year provided by API
|
||||
"company": company,
|
||||
"media": media,
|
||||
"disc_count": disc_count,
|
||||
"cover_image_url": image_url,
|
||||
}
|
||||
)
|
||||
if barcode:
|
||||
pd.lookup_ids[IdType.GTIN] = barcode
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
||||
|
||||
@SiteManager.register
|
||||
class DiscogsMaster(AbstractSite):
|
||||
SITE_NAME = SiteName.Discogs
|
||||
ID_TYPE = IdType.Discogs_Master
|
||||
URL_PATTERNS = [r"https://www\.discogs\.com/master/(\d+)-.+"]
|
||||
WIKI_PROPERTY_ID = "?"
|
||||
DEFAULT_MODEL = Album
|
||||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
return f"https://www.discogs.com/master/{id_value}"
|
||||
|
||||
def scrape(self):
|
||||
master_release = get_discogs_data("masters", self.id_value)
|
||||
title = master_release.get("title")
|
||||
artist = [artist.get("name") for artist in master_release.get("artists")]
|
||||
genre = master_release.get("genres")
|
||||
track_list = [track.get("title") for track in master_release.get("tracklist")]
|
||||
|
||||
image_url = None
|
||||
if len(master_release.get("images")) > 0:
|
||||
image_url = master_release["images"][0].get("uri")
|
||||
pd = ResourceContent(
|
||||
metadata={
|
||||
"title": title,
|
||||
"artist": artist,
|
||||
"genre": genre,
|
||||
"track_list": track_list,
|
||||
"cover_image_url": image_url,
|
||||
}
|
||||
)
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
||||
|
||||
def get_discogs_data(data_type: str, discogs_id):
|
||||
if data_type not in ("releases", "masters"):
|
||||
raise ValueError("data_type can only be in ('releases' or masters')")
|
||||
user_agent_string = "Neodb/0.1"
|
||||
user_token = settings.DISCOGS_TOKEN
|
||||
headers = {
|
||||
"User-Agent": user_agent_string,
|
||||
"Authorization": f"Discogs token={user_token}",
|
||||
}
|
||||
response = requests.get(
|
||||
f"https://api.discogs.com/{data_type}/{discogs_id}", headers=headers
|
||||
)
|
||||
data = json.loads(response.text)
|
||||
return data
|
Loading…
Add table
Reference in a new issue