From 742c58e239c25959fff3f2ca01033dc87acbdd1d Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 7 Jan 2024 17:30:37 -0500 Subject: [PATCH] support BoardGameGeek.com links --- catalog/common/downloaders.py | 9 +++- catalog/common/models.py | 2 + catalog/common/sites.py | 4 ++ catalog/game/models.py | 22 +++++++++ catalog/game/tests.py | 19 ++++++-- catalog/sites/__init__.py | 1 + catalog/sites/bgg.py | 78 ++++++++++++++++++++++++++++++ catalog/templates/_people.html | 2 +- catalog/templates/game.html | 70 +++++---------------------- common/static/scss/_sitelabel.scss | 8 +++ 10 files changed, 151 insertions(+), 64 deletions(-) create mode 100644 catalog/sites/bgg.py diff --git a/catalog/common/downloaders.py b/catalog/common/downloaders.py index 8035a06d..dde9f2b5 100644 --- a/catalog/common/downloaders.py +++ b/catalog/common/downloaders.py @@ -11,7 +11,7 @@ import filetype import requests from django.conf import settings from django.core.cache import cache -from lxml import html +from lxml import etree, html from PIL import Image from requests import Response from requests.exceptions import RequestException @@ -85,6 +85,9 @@ class MockResponse: self.content.decode("utf-8") ) + def xml(self): + return etree.fromstring(self.content, base_url=self.url) + @property def headers(self): return { @@ -93,6 +96,7 @@ class MockResponse: requests.Response.html = MockResponse.html # type:ignore +requests.Response.xml = MockResponse.xml # type:ignore class DownloaderResponse(Response): @@ -101,6 +105,9 @@ class DownloaderResponse(Response): self.content.decode("utf-8") ) + def xml(self): + return etree.fromstring(self.content, base_url=self.url) + class DownloadError(Exception): def __init__(self, downloader, msg=None): diff --git a/catalog/common/models.py b/catalog/common/models.py index b4e9c53f..8422469a 100644 --- a/catalog/common/models.py +++ b/catalog/common/models.py @@ -42,6 +42,7 @@ class SiteName(models.TextChoices): IGDB = "igdb", _("IGDB") Steam = "steam", _("Steam") Bangumi = "bangumi", _("Bangumi") + BGG = "bgg", _("BGG") # ApplePodcast = "apple_podcast", _("苹果播客") RSS = "rss", _("RSS") Discogs = "discogs", _("Discogs") @@ -87,6 +88,7 @@ class IdType(models.TextChoices): Spotify_Artist = "spotify_artist", _("Spotify艺术家") TMDB_Person = "tmdb_person", _("TMDB影人") IGDB = "igdb", _("IGDB游戏") + BGG = "bgg", _("BGG桌游") Steam = "steam", _("Steam游戏") Bangumi = "bangumi", _("Bangumi") ApplePodcast = "apple_podcast", _("苹果播客") diff --git a/catalog/common/sites.py b/catalog/common/sites.py index 7158ff28..c52c25d5 100644 --- a/catalog/common/sites.py +++ b/catalog/common/sites.py @@ -104,6 +104,10 @@ class AbstractSite: def query_str(content, query: str) -> str: return content.xpath(query)[0].strip() + @staticmethod + def query_list(content, query: str) -> list[str]: + return list(content.xpath(query)) + @classmethod def match_existing_item_for_resource( cls, resource: ExternalResource diff --git a/catalog/game/models.py b/catalog/game/models.py index 8bfe50ff..b68a3ac7 100644 --- a/catalog/game/models.py +++ b/catalog/game/models.py @@ -43,8 +43,11 @@ class Game(Item): "title", "brief", "other_title", + "designer", + "artist", "developer", "publisher", + "release_year", "release_date", "genre", "platform", @@ -59,6 +62,22 @@ class Game(Item): default=list, ) + designer = jsondata.ArrayField( + base_field=models.CharField(blank=True, default="", max_length=500), + verbose_name=_("设计者"), + null=True, + blank=True, + default=list, + ) + + artist = jsondata.ArrayField( + base_field=models.CharField(blank=True, default="", max_length=500), + verbose_name=_("艺术家"), + null=True, + blank=True, + default=list, + ) + developer = jsondata.ArrayField( base_field=models.CharField(blank=True, default="", max_length=500), verbose_name=_("开发商"), @@ -75,6 +94,8 @@ class Game(Item): default=list, ) + release_year = jsondata.IntegerField(verbose_name=_("发布年份"), null=True, blank=True) + release_date = jsondata.DateField( verbose_name=_("发布日期"), auto_now=False, @@ -106,6 +127,7 @@ class Game(Item): id_types = [ IdType.IGDB, IdType.Steam, + IdType.BGG, IdType.DoubanGame, IdType.Bangumi, ] diff --git a/catalog/game/tests.py b/catalog/game/tests.py index f46b0a98..5ed21ca9 100644 --- a/catalog/game/tests.py +++ b/catalog/game/tests.py @@ -118,10 +118,23 @@ class BangumiGameTestCase(TestCase): self.assertEqual(site.url, t_url) self.assertEqual(site.id_value, t_id_value) - @use_local_response + +class BoardGameGeekTestCase(TestCase): def test_scrape(self): - # TODO - pass + t_url = "https://boardgamegeek.com/boardgame/167791" + site = SiteManager.get_site_by_url(t_url) + self.assertIsNotNone(site) + self.assertEqual(site.ready, False) + site.get_resource_ready() + self.assertEqual(site.ready, True) + self.assertEqual(site.resource.metadata["title"], "Terraforming Mars") + self.assertIsInstance(site.resource.item, Game) + self.assertEqual(site.resource.item.id_type, IdType.BGG) + self.assertEqual(site.resource.item.id_value, "167791") + self.assertEqual(site.resource.item.platform, ["Boardgame"]) + self.assertEqual(site.resource.item.other_title[0], "殖民火星") + # self.assertEqual(site.resource.item.genre[0], ) + self.assertEqual(site.resource.item.designer, ["Jacob Fryxelius"]) class MultiGameSitesTestCase(TestCase): diff --git a/catalog/sites/__init__.py b/catalog/sites/__init__.py index 7518ebfb..71c75948 100644 --- a/catalog/sites/__init__.py +++ b/catalog/sites/__init__.py @@ -2,6 +2,7 @@ from ..common.sites import SiteManager from .apple_music import AppleMusic from .bandcamp import Bandcamp from .bangumi import Bangumi +from .bgg import BoardGameGeek from .bookstw import BooksTW from .discogs import DiscogsMaster, DiscogsRelease from .douban_book import DoubanBook diff --git a/catalog/sites/bgg.py b/catalog/sites/bgg.py new file mode 100644 index 00000000..7c2aee32 --- /dev/null +++ b/catalog/sites/bgg.py @@ -0,0 +1,78 @@ +""" +BoardGameGeek + +ref: https://boardgamegeek.com/wiki/page/BGG_XML_API2 +""" +import html + +from langdetect import detect +from loguru import logger + +from catalog.common import * +from catalog.models import * + + +@SiteManager.register +class BoardGameGeek(AbstractSite): + SITE_NAME = SiteName.BGG + ID_TYPE = IdType.BGG + URL_PATTERNS = [ + r"^\w+://boardgamegeek\.com/boardgame/(\d+)", + ] + WIKI_PROPERTY_ID = "?" + DEFAULT_MODEL = Game + + @classmethod + def id_to_url(cls, id_value): + return "https://boardgamegeek.com/boardgame/" + id_value + + def scrape(self): + api_url = f"https://boardgamegeek.com/xmlapi2/thing?stats=1&type=boardgame&id={self.id_value}" + content = BasicDownloader(api_url).download().xml() + items = list(content.xpath("/items/item")) # type: ignore + if not len(items): + raise ParseError("boardgame not found", field="id") + item = items[0] + title = self.query_str(item, "name[@type='primary']/@value") + other_title = self.query_list(item, "name[@type='alternate']/@value") + zh_title = [t for t in other_title if detect(t).startswith("zh")] + if zh_title: + for z in zh_title: + other_title.remove(z) + other_title = zh_title + other_title + + cover_image_url = self.query_str(item, "image/text()") + brief = html.unescape(self.query_str(item, "description/text()")) + year = self.query_str(item, "yearpublished/@value") + designer = self.query_list(item, "link[@type='boardgamedesigner']/@value") + artist = self.query_list(item, "link[@type='boardgameartist']/@value") + publisher = self.query_list(item, "link[@type='boardgamepublisher']/@value") + developer = self.query_list(item, "link[@type='boardgamedeveloper']/@value") + category = self.query_list(item, "link[@type='boardgamecategory']/@value") + + pd = ResourceContent( + metadata={ + "title": title, + "other_title": other_title, + "genre": category, + "developer": developer, + "publisher": publisher, + "designer": designer, + "artist": artist, + "release_year": year, + "platform": ["Boardgame"], + "brief": brief, + # "official_site": official_site, + "cover_image_url": cover_image_url, + } + ) + if pd.metadata["cover_image_url"]: + imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url) + try: + pd.cover_image = imgdl.download().content + pd.cover_image_extention = imgdl.extention + except Exception: + logger.debug( + f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}' + ) + return pd diff --git a/catalog/templates/_people.html b/catalog/templates/_people.html index 207ded86..c13abbba 100644 --- a/catalog/templates/_people.html +++ b/catalog/templates/_people.html @@ -3,7 +3,7 @@ {% if role %}{{ role }}:{% endif %} {% for p in people %} {% if forloop.counter <= max %} - {% if not forloop.first %}、{% endif %} + {% if not forloop.first %}/{% endif %} {{ p }} {% elif forloop.last %} 等 diff --git a/catalog/templates/game.html b/catalog/templates/game.html index 039730c0..b3739b63 100644 --- a/catalog/templates/game.html +++ b/catalog/templates/game.html @@ -11,73 +11,25 @@ {% load thumb %} {% block details %} -
- {% if item.other_title %} - {% trans '别名:' %} - {% for other_title in item.other_title %} - 5 %}style="display: none;"{% endif %}> - {{ other_title }} - {% if not forloop.last %}/{% endif %} - - {% endfor %} - {% if item.other_title|length > 5 %} - {% trans '更多' %} - - {% endif %} - {% endif %} -
-
- {% if item.genre %} - {% trans '类型:' %} - {% for genre in item.genre %} - {{ genre }} - {% if not forloop.last %}/{% endif %} - {% endfor %} - {% endif %} -
-
- {% if item.developer %} - {% trans '开发商:' %} - {% for developer in item.developer %} - {{ developer }} - {% if not forloop.last %}/{% endif %} - {% endfor %} - {% endif %} -
-
- {% if item.publisher %} - {% trans '发行商:' %} - {% for publisher in item.publisher %} - {{ publisher }} - {% if not forloop.last %}/{% endif %} - {% endfor %} - {% endif %} +
+ {% include '_people.html' with people=item.other_title _role='别名' max=99 %}
{% if item.release_date %} - {% trans '发行日期:' %}{{ item.release_date }} + {% trans '发行时间:' %}{{ item.release_date }} + {% elif item.release_year %} + {% trans '发行时间:' %}{{ item.release_year }} {% endif %}
+
{% include '_people.html' with people=item.platform role='平台' max=8 %}
+
{% include '_people.html' with people=item.genre role='类型' max=5 %}
+
{% include '_people.html' with people=item.designer role='设计者' max=3 %}
+
{% include '_people.html' with people=item.artist role='艺术家' max=3 %}
+
{% include '_people.html' with people=item.developer role='开发商' max=1 %}
+
{% include '_people.html' with people=item.publisher role='发行商' max=1 %}
{% if item.official_site %} {% trans '官方网站:' %}{{ item.official_site|urlizetrunc:24 }} {% endif %}
-
- {% if item.platform %} - {% trans '平台:' %} - {% for platform in item.platform %} - {{ platform }} - {% if not forloop.last %}/{% endif %} - {% endfor %} - {% endif %} -
{% endblock %} diff --git a/common/static/scss/_sitelabel.scss b/common/static/scss/_sitelabel.scss index 021d1493..989293af 100644 --- a/common/static/scss/_sitelabel.scss +++ b/common/static/scss/_sitelabel.scss @@ -45,6 +45,14 @@ font-weight: bold; } + .bgg { + background-color: #3F3A60; + color: #FFFFFF; + font-weight: bold; + //#FC3808; + border: none; + } + .steam { background: linear-gradient(30deg, #1387b8, #111d2e); color: white;