lib.itmens/catalog/sites/discogs.py

138 lines
4.7 KiB
Python
Raw Normal View History

2023-02-03 13:43:55 +01:00
"""
Discogs.
"""
from django.conf import settings
from catalog.common import *
from catalog.models import *
from catalog.music.utils import upc_to_gtin_13
2023-02-03 13:43:55 +01:00
from .douban import *
import json
import logging
import requests
_logger = logging.getLogger(__name__)
@SiteManager.register
class DiscogsRelease(AbstractSite):
SITE_NAME = SiteName.Discogs
ID_TYPE = IdType.Discogs_Release
URL_PATTERNS = [r"https://www\.discogs\.com/release/(\d+)[^\d]*"]
2023-02-03 13:43:55 +01:00
WIKI_PROPERTY_ID = "?"
DEFAULT_MODEL = Album
@classmethod
def id_to_url(cls, id_value):
2023-02-03 13:43:55 +01:00
return f"https://www.discogs.com/release/{id_value}"
def scrape(self):
release = get_discogs_data("releases", self.id_value)
title = release.get("title")
artist = [artist.get("name") for artist in release.get("artists")]
genre = release.get("genres", [])
2023-02-03 13:43:55 +01:00
track_list = [track.get("title") for track in release.get("tracklist")]
company = list(
set([company.get("name") for company in release.get("companies")])
)
2023-02-03 13:43:55 +01:00
media, disc_count = None, None
formats = release.get("formats")
if len(formats) == 1:
media = formats[0].get("name")
disc_count = formats[0].get("qty")
identifiers = release.get("identifiers")
barcode = None
if identifiers:
for i in identifiers:
if i["type"] == "Barcode":
barcode = upc_to_gtin_13(
i["value"].replace(" ", "").replace("-", "")
)
2023-02-03 13:43:55 +01:00
image_url = None
if len(release.get("images")) > 0:
image_url = release["images"][0].get("uri")
pd = ResourceContent(
metadata={
"title": title,
"artist": artist,
"genre": genre,
2023-02-03 16:56:42 -05:00
"track_list": "\n".join(track_list),
2023-02-03 13:43:55 +01:00
"release_date": None, # only year provided by API
"company": company,
"media": media,
"disc_count": disc_count,
"cover_image_url": image_url,
}
)
if barcode:
pd.lookup_ids[IdType.GTIN] = barcode
if pd.metadata["cover_image_url"]:
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
try:
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
_logger.debug(
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
)
return pd
@SiteManager.register
class DiscogsMaster(AbstractSite):
SITE_NAME = SiteName.Discogs
ID_TYPE = IdType.Discogs_Master
URL_PATTERNS = [r"https://www\.discogs\.com/master/(\d+)[^\d]*"]
2023-02-03 13:43:55 +01:00
WIKI_PROPERTY_ID = "?"
DEFAULT_MODEL = Album
@classmethod
def id_to_url(cls, id_value):
2023-02-03 13:43:55 +01:00
return f"https://www.discogs.com/master/{id_value}"
def scrape(self):
master_release = get_discogs_data("masters", self.id_value)
title = master_release.get("title")
artist = [artist.get("name") for artist in master_release.get("artists")]
genre = master_release.get("genres", [])
2023-02-03 13:43:55 +01:00
track_list = [track.get("title") for track in master_release.get("tracklist")]
image_url = None
if len(master_release.get("images")) > 0:
image_url = master_release["images"][0].get("uri")
pd = ResourceContent(
metadata={
"title": title,
"artist": artist,
"genre": genre,
2023-02-03 16:56:42 -05:00
"track_list": "\n".join(track_list),
2023-02-03 13:43:55 +01:00
"cover_image_url": image_url,
}
)
if pd.metadata["cover_image_url"]:
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
try:
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
_logger.debug(
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
)
return pd
def get_discogs_data(data_type: str, discogs_id):
if data_type not in ("releases", "masters"):
raise ValueError("data_type can only be in ('releases' or masters')")
user_agent_string = "Neodb/0.1"
2023-02-03 18:29:51 +01:00
user_token = settings.DISCOGS_API_KEY
2023-02-03 13:43:55 +01:00
headers = {
"User-Agent": user_agent_string,
"Authorization": f"Discogs token={user_token}",
}
2023-02-03 14:21:46 +01:00
api_url = f"https://api.discogs.com/{data_type}/{discogs_id}"
data = BasicDownloader(api_url, headers=headers).download().json()
2023-02-03 13:43:55 +01:00
return data