search IGDB for games

This commit is contained in:
mein Name 2024-12-08 19:33:05 +00:00 committed by Henri Dickson
parent 4a88c2ecf4
commit d80045c5ba
3 changed files with 113 additions and 59 deletions

View file

@ -8,8 +8,9 @@ import requests
from django.conf import settings
from lxml import html
from catalog.common import *
from catalog.models import *
from catalog.common import BasicDownloader, ItemCategory, SiteManager, SiteName
from catalog.search.models import ExternalSearchResultItem
from catalog.sites.igdb import IGDB as IGDB_Site
from catalog.sites.spotify import get_spotify_token
from catalog.sites.tmdb import TMDB_DEFAULT_LANG
@ -17,41 +18,6 @@ SEARCH_PAGE_SIZE = 5 # not all apis support page size
logger = logging.getLogger(__name__)
class SearchResultItem:
def __init__(
self, category, source_site, source_url, title, subtitle, brief, cover_url
):
self.class_name = "base"
self.category = category
self.external_resources = {
"all": [
{
"url": source_url,
"site_name": source_site,
"site_label": source_site,
}
]
}
self.source_site = source_site
self.source_url = source_url
self.display_title = title
self.subtitle = subtitle
self.display_description = brief
self.cover_image_url = cover_url
@property
def verbose_category_name(self):
return self.category.label
@property
def url(self):
return f"/search?q={quote_plus(self.source_url)}"
@property
def scraped(self):
return False
class Goodreads:
@classmethod
def search(cls, q: str, page=1):
@ -80,14 +46,14 @@ class Goodreads:
if res:
subtitle = f"{res.metadata.get('pub_year')} {', '.join(res.metadata.get('author', []))} {', '.join(res.metadata.get('translator', []))}"
results.append(
SearchResultItem(
ExternalSearchResultItem(
ItemCategory.Book,
SiteName.Goodreads,
res.url,
res.metadata["title"],
subtitle,
res.metadata.get("brief"),
res.metadata.get("cover_image_url"),
res.metadata.get("brief", ""),
res.metadata.get("cover_image_url", ""),
)
)
else:
@ -95,15 +61,15 @@ class Goodreads:
books = h.xpath('//tr[@itemtype="http://schema.org/Book"]')
for c in books: # type:ignore
el_cover = c.xpath('.//img[@class="bookCover"]/@src')
cover = el_cover[0] if el_cover else None
cover = el_cover[0] if el_cover else ""
el_title = c.xpath('.//a[@class="bookTitle"]//text()')
title = "".join(el_title).strip() if el_title else None
title = "".join(el_title).strip() if el_title else "Unkown Title"
el_url = c.xpath('.//a[@class="bookTitle"]/@href')
url = "https://www.goodreads.com" + el_url[0] if el_url else None
url = "https://www.goodreads.com" + el_url[0] if el_url else ""
el_authors = c.xpath('.//a[@class="authorName"]//text()')
subtitle = ", ".join(el_authors) if el_authors else None
subtitle = ", ".join(el_authors) if el_authors else ""
results.append(
SearchResultItem(
ExternalSearchResultItem(
ItemCategory.Book,
SiteName.Goodreads,
url,
@ -149,10 +115,10 @@ class GoogleBooks:
cover = (
b["volumeInfo"]["imageLinks"]["thumbnail"]
if "imageLinks" in b["volumeInfo"]
else None
else ""
)
results.append(
SearchResultItem(
ExternalSearchResultItem(
category,
SiteName.GoogleBooks,
url,
@ -191,10 +157,10 @@ class TheMovieDatabase:
cover = (
f"https://image.tmdb.org/t/p/w500/{m.get('poster_path')}"
if m.get("poster_path")
else None
else ""
)
results.append(
SearchResultItem(
ExternalSearchResultItem(
cat,
SiteName.TMDB,
url,
@ -228,9 +194,9 @@ class Spotify:
for artist in a.get("artists", []):
subtitle += " " + artist.get("name", "")
url = a["external_urls"]["spotify"]
cover = a["images"][0]["url"] if a.get("images") else None
cover = a["images"][0]["url"] if a.get("images") else ""
results.append(
SearchResultItem(
ExternalSearchResultItem(
ItemCategory.Music,
SiteName.Spotify,
url,
@ -260,15 +226,15 @@ class Bandcamp:
albums = h.xpath('//li[@class="searchresult data-search"]')
for c in albums: # type:ignore
el_cover = c.xpath('.//div[@class="art"]/img/@src')
cover = el_cover[0] if el_cover else None
cover = el_cover[0] if el_cover else ""
el_title = c.xpath('.//div[@class="heading"]//text()')
title = "".join(el_title).strip() if el_title else None
title = "".join(el_title).strip() if el_title else "Unknown Title"
el_url = c.xpath('..//div[@class="itemurl"]/a/@href')
url = el_url[0] if el_url else None
url = el_url[0] if el_url else ""
el_authors = c.xpath('.//div[@class="subhead"]//text()')
subtitle = ", ".join(el_authors) if el_authors else None
subtitle = ", ".join(el_authors) if el_authors else ""
results.append(
SearchResultItem(
ExternalSearchResultItem(
ItemCategory.Music,
SiteName.Bandcamp,
url,
@ -295,7 +261,7 @@ class ApplePodcast:
for p in r["results"][(page - 1) * SEARCH_PAGE_SIZE :]:
if p.get("feedUrl"):
results.append(
SearchResultItem(
ExternalSearchResultItem(
ItemCategory.Podcast,
SiteName.RSS,
p["feedUrl"],
@ -314,6 +280,14 @@ class ApplePodcast:
return results
class IGDB:
@classmethod
def search(cls, q, page=1):
return IGDB_Site.search(
q, limit=SEARCH_PAGE_SIZE, offset=page * SEARCH_PAGE_SIZE
)
class Fediverse:
@staticmethod
async def search_task(host, q, category=None):
@ -343,9 +317,9 @@ class Fediverse:
try:
cat = ItemCategory(item["category"])
except Exception:
cat = ""
cat = None
results.append(
SearchResultItem(
ExternalSearchResultItem(
cat,
host,
url,

View file

@ -1,5 +1,6 @@
# pyright: reportFunctionMemberAccess=false
import hashlib
from urllib.parse import quote_plus
import django_rq
from auditlog.context import set_actor
@ -10,6 +11,7 @@ from loguru import logger
from rq.job import Job
from catalog.common.downloaders import RESPONSE_CENSORSHIP, DownloadError
from catalog.common.models import ItemCategory, SiteName
from catalog.common.sites import SiteManager
from ..models import Item, TVSeason
@ -48,6 +50,48 @@ class DbIndexer:
pass
class ExternalSearchResultItem:
def __init__(
self,
category: ItemCategory | None,
source_site: SiteName,
source_url: str,
title: str,
subtitle: str,
brief: str,
cover_url: str,
):
self.class_name = "base"
self.category = category
self.external_resources = {
"all": [
{
"url": source_url,
"site_name": source_site,
"site_label": source_site,
}
]
}
self.source_site = source_site
self.source_url = source_url
self.display_title = title
self.subtitle = subtitle
self.display_description = brief
self.cover_image_url = cover_url
@property
def verbose_category_name(self):
return self.category.label if self.category else ""
@property
def url(self):
return f"/search?q={quote_plus(self.source_url)}"
@property
def scraped(self):
return False
# if settings.SEARCH_BACKEND == "MEILISEARCH":
#
# el

View file

@ -6,6 +6,7 @@ use (e.g. "portal-2") as id, which is different from real id in IGDB API
import datetime
import json
from urllib.parse import quote_plus
import requests
from django.conf import settings
@ -15,6 +16,7 @@ from loguru import logger
from catalog.common import *
from catalog.models import *
from catalog.search.models import ExternalSearchResultItem
_cache_key = "igdb_access_token"
@ -77,6 +79,40 @@ class IGDB(AbstractSite):
fp.write(json.dumps(r))
return r
@classmethod
def search(cls, q, limit: int, offset: int = 0):
rs = cls.api_query(
"games",
f'fields name; search "{quote_plus(q)}"; limit {limit}; offset {offset};',
)
result = []
for r in rs:
subtitle = ""
if "first_release_date" in r:
subtitle = datetime.datetime.fromtimestamp(
r["first_release_date"], datetime.timezone.utc
).strftime("%Y-%m-%d ")
if "platforms" in r:
ps = sorted(r["platforms"], key=lambda p: p["id"])
subtitle += ",".join(
[(p["name"] if p["id"] != 6 else "Windows") for p in ps]
)
brief = r["summary"] if "summary" in r else ""
brief += "\n\n" + r["storyline"] if "storyline" in r else ""
cover = "https:" + r["cover"]["url"] if r.get("cover") else ""
result.append(
ExternalSearchResultItem(
ItemCategory.Game,
SiteName.IGDB,
r["url"],
r["name"],
subtitle,
brief,
cover,
)
)
return result
def scrape(self):
fields = "*, cover.url, genres.name, platforms.name, involved_companies.*, involved_companies.company.name"
r = self.api_query("games", f'fields {fields}; where url = "{self.url}";')[0]