2023-02-03 13:43:55 +01:00
|
|
|
"""
|
|
|
|
Discogs.
|
|
|
|
"""
|
2024-06-02 14:50:07 -04:00
|
|
|
|
2023-08-10 11:27:31 -04:00
|
|
|
import logging
|
|
|
|
|
2023-02-03 13:43:55 +01:00
|
|
|
from django.conf import settings
|
2023-08-10 11:27:31 -04:00
|
|
|
|
2023-02-03 13:43:55 +01:00
|
|
|
from catalog.common import *
|
|
|
|
from catalog.models import *
|
2023-02-03 16:33:58 -05:00
|
|
|
from catalog.music.utils import upc_to_gtin_13
|
2023-02-03 13:43:55 +01:00
|
|
|
|
2023-08-10 11:27:31 -04:00
|
|
|
from .douban import *
|
2023-02-03 13:43:55 +01:00
|
|
|
|
|
|
|
_logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
@SiteManager.register
|
|
|
|
class DiscogsRelease(AbstractSite):
|
|
|
|
SITE_NAME = SiteName.Discogs
|
|
|
|
ID_TYPE = IdType.Discogs_Release
|
2023-08-06 21:42:54 +08:00
|
|
|
URL_PATTERNS = [
|
|
|
|
r"https://www\.discogs\.com/release/(\d+)[^\d]*",
|
|
|
|
r"https://www\.discogs\.com/[a-z]{2}/release/(\d+)[^\d]*",
|
2023-08-06 21:56:05 +08:00
|
|
|
r"https://www\.discogs\.com/[a-z]{2}_[A-Z]{2}/release/(\d+)[^\d]*",
|
2023-08-06 21:42:54 +08:00
|
|
|
]
|
2023-02-03 13:43:55 +01:00
|
|
|
WIKI_PROPERTY_ID = "?"
|
|
|
|
DEFAULT_MODEL = Album
|
|
|
|
|
|
|
|
@classmethod
|
2023-02-03 16:33:58 -05:00
|
|
|
def id_to_url(cls, id_value):
|
2023-02-03 13:43:55 +01:00
|
|
|
return f"https://www.discogs.com/release/{id_value}"
|
|
|
|
|
|
|
|
def scrape(self):
|
|
|
|
release = get_discogs_data("releases", self.id_value)
|
|
|
|
title = release.get("title")
|
|
|
|
artist = [artist.get("name") for artist in release.get("artists")]
|
2023-04-17 21:43:20 -04:00
|
|
|
genre = release.get("genres", [])
|
2023-02-03 13:43:55 +01:00
|
|
|
track_list = [track.get("title") for track in release.get("tracklist")]
|
2023-02-03 16:33:58 -05:00
|
|
|
company = list(
|
|
|
|
set([company.get("name") for company in release.get("companies")])
|
|
|
|
)
|
2023-02-03 13:43:55 +01:00
|
|
|
|
|
|
|
media, disc_count = None, None
|
2023-11-01 23:59:49 -04:00
|
|
|
formats = release.get("formats", [])
|
2023-02-03 13:43:55 +01:00
|
|
|
if len(formats) == 1:
|
|
|
|
media = formats[0].get("name")
|
|
|
|
disc_count = formats[0].get("qty")
|
|
|
|
|
|
|
|
identifiers = release.get("identifiers")
|
|
|
|
barcode = None
|
|
|
|
if identifiers:
|
|
|
|
for i in identifiers:
|
|
|
|
if i["type"] == "Barcode":
|
2023-02-03 16:33:58 -05:00
|
|
|
barcode = upc_to_gtin_13(
|
|
|
|
i["value"].replace(" ", "").replace("-", "")
|
|
|
|
)
|
2023-02-03 13:43:55 +01:00
|
|
|
image_url = None
|
2023-11-01 23:59:49 -04:00
|
|
|
if len(release.get("images", [])) > 0:
|
2023-02-03 13:43:55 +01:00
|
|
|
image_url = release["images"][0].get("uri")
|
|
|
|
pd = ResourceContent(
|
|
|
|
metadata={
|
|
|
|
"title": title,
|
2024-07-13 00:16:47 -04:00
|
|
|
"localized_title": [{"lang": "en", "text": title}],
|
2023-02-03 13:43:55 +01:00
|
|
|
"artist": artist,
|
|
|
|
"genre": genre,
|
2023-02-03 16:56:42 -05:00
|
|
|
"track_list": "\n".join(track_list),
|
2024-07-13 00:16:47 -04:00
|
|
|
# "release_date": None, # only year provided by API
|
2023-02-03 13:43:55 +01:00
|
|
|
"company": company,
|
|
|
|
"media": media,
|
|
|
|
"disc_count": disc_count,
|
|
|
|
"cover_image_url": image_url,
|
|
|
|
}
|
|
|
|
)
|
|
|
|
if barcode:
|
|
|
|
pd.lookup_ids[IdType.GTIN] = barcode
|
|
|
|
return pd
|
|
|
|
|
|
|
|
|
|
|
|
@SiteManager.register
|
|
|
|
class DiscogsMaster(AbstractSite):
|
|
|
|
SITE_NAME = SiteName.Discogs
|
|
|
|
ID_TYPE = IdType.Discogs_Master
|
2023-08-14 14:15:47 -04:00
|
|
|
URL_PATTERNS = [
|
|
|
|
r"^https://www\.discogs\.com/master/(\d+)[^\d]*",
|
|
|
|
r"^https://www\.discogs\.com/[\w\-]+/master/(\d+)[^\d]*",
|
|
|
|
]
|
2023-02-03 13:43:55 +01:00
|
|
|
WIKI_PROPERTY_ID = "?"
|
|
|
|
DEFAULT_MODEL = Album
|
|
|
|
|
|
|
|
@classmethod
|
2023-02-03 16:33:58 -05:00
|
|
|
def id_to_url(cls, id_value):
|
2023-02-03 13:43:55 +01:00
|
|
|
return f"https://www.discogs.com/master/{id_value}"
|
|
|
|
|
|
|
|
def scrape(self):
|
|
|
|
master_release = get_discogs_data("masters", self.id_value)
|
|
|
|
title = master_release.get("title")
|
|
|
|
artist = [artist.get("name") for artist in master_release.get("artists")]
|
2023-04-17 21:43:20 -04:00
|
|
|
genre = master_release.get("genres", [])
|
2023-02-03 13:43:55 +01:00
|
|
|
track_list = [track.get("title") for track in master_release.get("tracklist")]
|
|
|
|
|
|
|
|
image_url = None
|
|
|
|
if len(master_release.get("images")) > 0:
|
|
|
|
image_url = master_release["images"][0].get("uri")
|
|
|
|
pd = ResourceContent(
|
|
|
|
metadata={
|
|
|
|
"title": title,
|
2024-10-19 11:30:58 -04:00
|
|
|
"localized_title": [{"lang": "en", "text": title}],
|
2023-02-03 13:43:55 +01:00
|
|
|
"artist": artist,
|
|
|
|
"genre": genre,
|
2023-02-03 16:56:42 -05:00
|
|
|
"track_list": "\n".join(track_list),
|
2023-02-03 13:43:55 +01:00
|
|
|
"cover_image_url": image_url,
|
|
|
|
}
|
|
|
|
)
|
|
|
|
return pd
|
|
|
|
|
|
|
|
|
|
|
|
def get_discogs_data(data_type: str, discogs_id):
|
|
|
|
if data_type not in ("releases", "masters"):
|
|
|
|
raise ValueError("data_type can only be in ('releases' or masters')")
|
2023-11-26 17:23:53 -05:00
|
|
|
user_agent_string = settings.NEODB_USER_AGENT
|
2023-02-03 18:29:51 +01:00
|
|
|
user_token = settings.DISCOGS_API_KEY
|
2023-02-03 13:43:55 +01:00
|
|
|
headers = {
|
|
|
|
"User-Agent": user_agent_string,
|
|
|
|
"Authorization": f"Discogs token={user_token}",
|
|
|
|
}
|
2023-02-03 14:21:46 +01:00
|
|
|
api_url = f"https://api.discogs.com/{data_type}/{discogs_id}"
|
|
|
|
data = BasicDownloader(api_url, headers=headers).download().json()
|
2023-02-03 13:43:55 +01:00
|
|
|
return data
|