2024-01-07 17:30:37 -05:00
|
|
|
"""
|
|
|
|
BoardGameGeek
|
|
|
|
|
|
|
|
ref: https://boardgamegeek.com/wiki/page/BGG_XML_API2
|
|
|
|
"""
|
2024-06-02 14:50:07 -04:00
|
|
|
|
2024-01-07 17:30:37 -05:00
|
|
|
import html
|
|
|
|
|
|
|
|
from langdetect import detect
|
|
|
|
from loguru import logger
|
|
|
|
|
|
|
|
from catalog.common import *
|
|
|
|
from catalog.models import *
|
2024-07-13 00:16:47 -04:00
|
|
|
from common.models.lang import detect_language
|
2024-01-09 17:23:04 -05:00
|
|
|
|
|
|
|
|
2024-01-07 17:30:37 -05:00
|
|
|
@SiteManager.register
|
|
|
|
class BoardGameGeek(AbstractSite):
|
|
|
|
SITE_NAME = SiteName.BGG
|
|
|
|
ID_TYPE = IdType.BGG
|
|
|
|
URL_PATTERNS = [
|
|
|
|
r"^\w+://boardgamegeek\.com/boardgame/(\d+)",
|
|
|
|
]
|
|
|
|
WIKI_PROPERTY_ID = "?"
|
|
|
|
DEFAULT_MODEL = Game
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def id_to_url(cls, id_value):
|
|
|
|
return "https://boardgamegeek.com/boardgame/" + id_value
|
|
|
|
|
|
|
|
def scrape(self):
|
|
|
|
api_url = f"https://boardgamegeek.com/xmlapi2/thing?stats=1&type=boardgame&id={self.id_value}"
|
|
|
|
content = BasicDownloader(api_url).download().xml()
|
|
|
|
items = list(content.xpath("/items/item")) # type: ignore
|
|
|
|
if not len(items):
|
2024-04-06 19:41:51 -04:00
|
|
|
raise ParseError(scraper=self, field="id")
|
2024-01-07 17:30:37 -05:00
|
|
|
item = items[0]
|
|
|
|
title = self.query_str(item, "name[@type='primary']/@value")
|
|
|
|
other_title = self.query_list(item, "name[@type='alternate']/@value")
|
2024-07-13 00:16:47 -04:00
|
|
|
localized_title = [
|
|
|
|
{"lang": detect_language(t), "text": t} for t in [title] + other_title
|
|
|
|
]
|
2024-01-07 17:30:37 -05:00
|
|
|
cover_image_url = self.query_str(item, "image/text()")
|
|
|
|
brief = html.unescape(self.query_str(item, "description/text()"))
|
|
|
|
year = self.query_str(item, "yearpublished/@value")
|
|
|
|
designer = self.query_list(item, "link[@type='boardgamedesigner']/@value")
|
|
|
|
artist = self.query_list(item, "link[@type='boardgameartist']/@value")
|
|
|
|
publisher = self.query_list(item, "link[@type='boardgamepublisher']/@value")
|
|
|
|
developer = self.query_list(item, "link[@type='boardgamedeveloper']/@value")
|
|
|
|
category = self.query_list(item, "link[@type='boardgamecategory']/@value")
|
|
|
|
|
|
|
|
pd = ResourceContent(
|
|
|
|
metadata={
|
2024-07-13 00:16:47 -04:00
|
|
|
"localized_title": localized_title,
|
2024-07-16 00:51:05 -04:00
|
|
|
"localized_description": (
|
|
|
|
[{"lang": "en", "text": brief}] if brief else []
|
|
|
|
),
|
2024-01-07 17:30:37 -05:00
|
|
|
"title": title,
|
|
|
|
"other_title": other_title,
|
|
|
|
"genre": category,
|
|
|
|
"developer": developer,
|
|
|
|
"publisher": publisher,
|
|
|
|
"designer": designer,
|
|
|
|
"artist": artist,
|
|
|
|
"release_year": year,
|
|
|
|
"platform": ["Boardgame"],
|
|
|
|
"brief": brief,
|
|
|
|
# "official_site": official_site,
|
|
|
|
"cover_image_url": cover_image_url,
|
|
|
|
}
|
|
|
|
)
|
|
|
|
return pd
|