lib.itmens/catalog/sites/bgg.py

80 lines
2.9 KiB
Python
Raw Normal View History

2024-01-07 17:30:37 -05:00
"""
BoardGameGeek
ref: https://boardgamegeek.com/wiki/page/BGG_XML_API2
"""
2024-06-02 14:50:07 -04:00
2024-01-07 17:30:37 -05:00
import html
from langdetect import detect
from loguru import logger
from catalog.common import *
2025-01-04 23:48:08 -05:00
from catalog.game.models import GameReleaseType
2024-01-07 17:30:37 -05:00
from catalog.models import *
2024-07-13 00:16:47 -04:00
from common.models.lang import detect_language
2024-01-09 17:23:04 -05:00
2024-01-07 17:30:37 -05:00
@SiteManager.register
class BoardGameGeek(AbstractSite):
SITE_NAME = SiteName.BGG
ID_TYPE = IdType.BGG
URL_PATTERNS = [
r"^\w+://boardgamegeek\.com/boardgame/(\d+)",
]
WIKI_PROPERTY_ID = "?"
DEFAULT_MODEL = Game
@classmethod
def id_to_url(cls, id_value):
return "https://boardgamegeek.com/boardgame/" + id_value
def scrape(self):
2025-01-04 23:48:08 -05:00
api_url = f"https://boardgamegeek.com/xmlapi2/thing?stats=1&type=boardgame,boardgameexpansion&id={self.id_value}"
2024-01-07 17:30:37 -05:00
content = BasicDownloader(api_url).download().xml()
items = list(content.xpath("/items/item")) # type: ignore
if not len(items):
2024-04-06 19:41:51 -04:00
raise ParseError(scraper=self, field="id")
2024-01-07 17:30:37 -05:00
item = items[0]
2025-01-04 23:48:08 -05:00
typ = self.query_str(item, "@type")
2024-01-07 17:30:37 -05:00
title = self.query_str(item, "name[@type='primary']/@value")
other_title = self.query_list(item, "name[@type='alternate']/@value")
2024-07-13 00:16:47 -04:00
localized_title = [
{"lang": detect_language(t), "text": t} for t in [title] + other_title
]
2024-01-07 17:30:37 -05:00
cover_image_url = self.query_str(item, "image/text()")
brief = html.unescape(self.query_str(item, "description/text()"))
year = self.query_str(item, "yearpublished/@value")
designer = self.query_list(item, "link[@type='boardgamedesigner']/@value")
artist = self.query_list(item, "link[@type='boardgameartist']/@value")
publisher = self.query_list(item, "link[@type='boardgamepublisher']/@value")
developer = self.query_list(item, "link[@type='boardgamedeveloper']/@value")
category = self.query_list(item, "link[@type='boardgamecategory']/@value")
pd = ResourceContent(
metadata={
2024-07-13 00:16:47 -04:00
"localized_title": localized_title,
2024-07-16 00:51:05 -04:00
"localized_description": (
[{"lang": "en", "text": brief}] if brief else []
),
2024-01-07 17:30:37 -05:00
"title": title,
"other_title": other_title,
"genre": category,
"developer": developer,
"publisher": publisher,
"designer": designer,
"artist": artist,
"release_year": year,
2025-01-04 23:48:08 -05:00
"release_type": (
GameReleaseType.EXPANSION
if typ == "boardgameexpansion"
else GameReleaseType.GAME
),
2024-01-07 17:30:37 -05:00
"platform": ["Boardgame"],
"brief": brief,
# "official_site": official_site,
"cover_image_url": cover_image_url,
}
)
return pd