make external search async and configurable
This commit is contained in:
parent
2826bc60dc
commit
90386bbf1a
20 changed files with 523 additions and 414 deletions
|
@ -92,6 +92,8 @@ env = environ.FileAwareEnv(
|
||||||
NEODB_DISCOVER_UPDATE_INTERVAL=(int, 60),
|
NEODB_DISCOVER_UPDATE_INTERVAL=(int, 60),
|
||||||
# Disable cron jobs, * for all
|
# Disable cron jobs, * for all
|
||||||
NEODB_DISABLE_CRON_JOBS=(list, []),
|
NEODB_DISABLE_CRON_JOBS=(list, []),
|
||||||
|
# search sites
|
||||||
|
NEODB_SEARCH_SITES=(list, []),
|
||||||
# federated search peers
|
# federated search peers
|
||||||
NEODB_SEARCH_PEERS=(list, []),
|
NEODB_SEARCH_PEERS=(list, []),
|
||||||
# INTEGRATED TAKAHE CONFIGURATION
|
# INTEGRATED TAKAHE CONFIGURATION
|
||||||
|
@ -282,6 +284,7 @@ DOWNLOADER_RETRIES = env("NEODB_DOWNLOADER_RETRIES")
|
||||||
|
|
||||||
DISABLE_CRON_JOBS = env("NEODB_DISABLE_CRON_JOBS")
|
DISABLE_CRON_JOBS = env("NEODB_DISABLE_CRON_JOBS")
|
||||||
SEARCH_PEERS = env("NEODB_SEARCH_PEERS")
|
SEARCH_PEERS = env("NEODB_SEARCH_PEERS")
|
||||||
|
SEARCH_SITES = env("NEODB_SEARCH_SITES")
|
||||||
|
|
||||||
FANOUT_LIMIT_DAYS = env("NEODB_FANOUT_LIMIT_DAYS")
|
FANOUT_LIMIT_DAYS = env("NEODB_FANOUT_LIMIT_DAYS")
|
||||||
# ====== USER CONFIGUTRATION END ======
|
# ====== USER CONFIGUTRATION END ======
|
||||||
|
|
|
@ -43,7 +43,7 @@ class SiteName(models.TextChoices):
|
||||||
Steam = "steam", _("Steam") # type:ignore[reportCallIssue]
|
Steam = "steam", _("Steam") # type:ignore[reportCallIssue]
|
||||||
Bangumi = "bangumi", _("Bangumi") # type:ignore[reportCallIssue]
|
Bangumi = "bangumi", _("Bangumi") # type:ignore[reportCallIssue]
|
||||||
BGG = "bgg", _("BGG") # type:ignore[reportCallIssue]
|
BGG = "bgg", _("BGG") # type:ignore[reportCallIssue]
|
||||||
# ApplePodcast = "apple_podcast", _("Apple Podcast") # type:ignore[reportCallIssue]
|
ApplePodcast = "apple_podcast", _("Apple Podcast") # type:ignore[reportCallIssue]
|
||||||
RSS = "rss", _("RSS") # type:ignore[reportCallIssue]
|
RSS = "rss", _("RSS") # type:ignore[reportCallIssue]
|
||||||
Discogs = "discogs", _("Discogs") # type:ignore[reportCallIssue]
|
Discogs = "discogs", _("Discogs") # type:ignore[reportCallIssue]
|
||||||
AppleMusic = "apple_music", _("Apple Music") # type:ignore[reportCallIssue]
|
AppleMusic = "apple_music", _("Apple Music") # type:ignore[reportCallIssue]
|
||||||
|
|
|
@ -14,6 +14,7 @@ from typing import Type, TypeVar
|
||||||
|
|
||||||
import django_rq
|
import django_rq
|
||||||
import requests
|
import requests
|
||||||
|
from django.conf import settings
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from validators import url as url_validate
|
from validators import url as url_validate
|
||||||
|
|
||||||
|
@ -91,6 +92,13 @@ class AbstractSite:
|
||||||
)
|
)
|
||||||
return self.resource
|
return self.resource
|
||||||
|
|
||||||
|
# add this method to subclass to enable external search
|
||||||
|
# @classmethod
|
||||||
|
# async def search_task(
|
||||||
|
# cls, query: str, page: int, category: str
|
||||||
|
# ) -> list[ExternalSearchResultItem]:
|
||||||
|
# return []
|
||||||
|
|
||||||
def scrape(self) -> ResourceContent:
|
def scrape(self) -> ResourceContent:
|
||||||
"""subclass should implement this, return ResourceContent object"""
|
"""subclass should implement this, return ResourceContent object"""
|
||||||
data = ResourceContent()
|
data = ResourceContent()
|
||||||
|
@ -340,6 +348,17 @@ class SiteManager:
|
||||||
def get_all_sites():
|
def get_all_sites():
|
||||||
return SiteManager.registry.values()
|
return SiteManager.registry.values()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_sites_for_search():
|
||||||
|
if settings.SEARCH_SITES == ["-"]:
|
||||||
|
return []
|
||||||
|
sites = [
|
||||||
|
cls for cls in SiteManager.get_all_sites() if hasattr(cls, "search_task")
|
||||||
|
]
|
||||||
|
if settings.SEARCH_SITES == ["*"] or not settings.SEARCH_SITES:
|
||||||
|
return sites
|
||||||
|
return [s for s in sites if s.SITE_NAME.value in settings.SEARCH_SITES]
|
||||||
|
|
||||||
|
|
||||||
def crawl_related_resources_task(resource_pk):
|
def crawl_related_resources_task(resource_pk):
|
||||||
resource = ExternalResource.objects.filter(pk=resource_pk).first()
|
resource = ExternalResource.objects.filter(pk=resource_pk).first()
|
||||||
|
|
|
@ -1,16 +1,28 @@
|
||||||
|
import time
|
||||||
|
|
||||||
from django.contrib.contenttypes.models import ContentType
|
from django.contrib.contenttypes.models import ContentType
|
||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
from django.db.models import Count, F
|
from django.db.models import Count, F
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
from catalog.common.sites import SiteManager
|
||||||
from catalog.models import Edition, Item, Podcast, TVSeason, TVShow
|
from catalog.models import Edition, Item, Podcast, TVSeason, TVShow
|
||||||
|
from catalog.search.external import ExternalSources
|
||||||
from common.models import detect_language, uniq
|
from common.models import detect_language, uniq
|
||||||
|
from takahe.utils import Takahe
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(BaseCommand):
|
||||||
help = "catalog app utilities"
|
help = "catalog app utilities"
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
|
parser.add_argument(
|
||||||
|
"--extsearch",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--category",
|
||||||
|
default="all",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--verbose",
|
"--verbose",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
|
@ -44,8 +56,26 @@ class Command(BaseCommand):
|
||||||
self.integrity()
|
self.integrity()
|
||||||
if options["localize"]:
|
if options["localize"]:
|
||||||
self.localize()
|
self.localize()
|
||||||
|
if options["extsearch"]:
|
||||||
|
self.external_search(options["extsearch"], options["category"])
|
||||||
self.stdout.write(self.style.SUCCESS("Done."))
|
self.stdout.write(self.style.SUCCESS("Done."))
|
||||||
|
|
||||||
|
def external_search(self, q, cat):
|
||||||
|
sites = SiteManager.get_sites_for_search()
|
||||||
|
peers = Takahe.get_neodb_peers()
|
||||||
|
self.stdout.write(f"Searching {cat} '{q}' ...")
|
||||||
|
self.stdout.write(f"Peers: {peers}")
|
||||||
|
self.stdout.write(f"Sites: {sites}")
|
||||||
|
start_time = time.time()
|
||||||
|
results = ExternalSources.search(q, 1, cat)
|
||||||
|
for r in results:
|
||||||
|
self.stdout.write(f"{r}")
|
||||||
|
self.stdout.write(
|
||||||
|
self.style.SUCCESS(
|
||||||
|
f"{time.time() - start_time} seconds, {len(results)} items."
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
def localize(self):
|
def localize(self):
|
||||||
c = Item.objects.all().count()
|
c = Item.objects.all().count()
|
||||||
qs = Item.objects.filter(is_deleted=False, merged_to_item__isnull=True)
|
qs = Item.objects.filter(is_deleted=False, merged_to_item__isnull=True)
|
||||||
|
|
|
@ -38,7 +38,7 @@ from .tv.models import (
|
||||||
TVShowSchema,
|
TVShowSchema,
|
||||||
)
|
)
|
||||||
|
|
||||||
from .search.models import Indexer # isort:skip
|
from .search.models import Indexer, ExternalSearchResultItem # isort:skip
|
||||||
|
|
||||||
|
|
||||||
# class Exhibition(Item):
|
# class Exhibition(Item):
|
||||||
|
@ -103,6 +103,7 @@ __all__ = [
|
||||||
"CatalogCollection",
|
"CatalogCollection",
|
||||||
"AvailableItemCategory",
|
"AvailableItemCategory",
|
||||||
"ExternalResource",
|
"ExternalResource",
|
||||||
|
"ExternalSearchResultItem",
|
||||||
"IdType",
|
"IdType",
|
||||||
"Item",
|
"Item",
|
||||||
"ItemCategory",
|
"ItemCategory",
|
||||||
|
|
|
@ -1,342 +1,26 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
from urllib.parse import quote_plus, urlparse
|
|
||||||
|
|
||||||
import httpx
|
from catalog.common import SiteManager
|
||||||
import requests
|
|
||||||
from django.conf import settings
|
|
||||||
from lxml import html
|
|
||||||
|
|
||||||
from catalog.common import BasicDownloader, ItemCategory, SiteManager, SiteName
|
|
||||||
from catalog.search.models import ExternalSearchResultItem
|
from catalog.search.models import ExternalSearchResultItem
|
||||||
from catalog.sites.igdb import IGDB as IGDB_Site
|
from catalog.sites.fedi import FediverseInstance
|
||||||
from catalog.sites.spotify import get_spotify_token
|
|
||||||
from catalog.sites.tmdb import TMDB_DEFAULT_LANG
|
|
||||||
|
|
||||||
SEARCH_PAGE_SIZE = 5 # not all apis support page size
|
SEARCH_PAGE_SIZE = 5 # not all apis support page size
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class Goodreads:
|
class ExternalSources:
|
||||||
@classmethod
|
@classmethod
|
||||||
def search(cls, q: str, page=1):
|
def search(
|
||||||
results = []
|
cls, query: str, page: int = 1, category: str | None = None
|
||||||
search_url = f"https://www.goodreads.com/search?page={page}&q={quote_plus(q)}"
|
) -> list[ExternalSearchResultItem]:
|
||||||
try:
|
if not query or page < 1 or page > 10:
|
||||||
r = requests.get(
|
|
||||||
search_url,
|
|
||||||
timeout=3,
|
|
||||||
headers={
|
|
||||||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:107.0) Gecko/20100101 Firefox/107.0",
|
|
||||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
||||||
"Accept-Language": BasicDownloader.get_accept_language(),
|
|
||||||
"Accept-Encoding": "gzip, deflate",
|
|
||||||
"Connection": "keep-alive",
|
|
||||||
"DNT": "1",
|
|
||||||
"Upgrade-Insecure-Requests": "1",
|
|
||||||
"Cache-Control": "no-cache",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
if r.url.startswith("https://www.goodreads.com/book/show/"):
|
|
||||||
# Goodreads will 302 if only one result matches ISBN
|
|
||||||
site = SiteManager.get_site_by_url(r.url)
|
|
||||||
if site:
|
|
||||||
res = site.get_resource_ready()
|
|
||||||
if res:
|
|
||||||
subtitle = f"{res.metadata.get('pub_year')} {', '.join(res.metadata.get('author', []))} {', '.join(res.metadata.get('translator', []))}"
|
|
||||||
results.append(
|
|
||||||
ExternalSearchResultItem(
|
|
||||||
ItemCategory.Book,
|
|
||||||
SiteName.Goodreads,
|
|
||||||
res.url,
|
|
||||||
res.metadata["title"],
|
|
||||||
subtitle,
|
|
||||||
res.metadata.get("brief", ""),
|
|
||||||
res.metadata.get("cover_image_url", ""),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
h = html.fromstring(r.content.decode("utf-8"))
|
|
||||||
books = h.xpath('//tr[@itemtype="http://schema.org/Book"]')
|
|
||||||
for c in books: # type:ignore
|
|
||||||
el_cover = c.xpath('.//img[@class="bookCover"]/@src')
|
|
||||||
cover = el_cover[0] if el_cover else ""
|
|
||||||
el_title = c.xpath('.//a[@class="bookTitle"]//text()')
|
|
||||||
title = "".join(el_title).strip() if el_title else "Unkown Title"
|
|
||||||
el_url = c.xpath('.//a[@class="bookTitle"]/@href')
|
|
||||||
url = "https://www.goodreads.com" + el_url[0] if el_url else ""
|
|
||||||
el_authors = c.xpath('.//a[@class="authorName"]//text()')
|
|
||||||
subtitle = ", ".join(el_authors) if el_authors else ""
|
|
||||||
results.append(
|
|
||||||
ExternalSearchResultItem(
|
|
||||||
ItemCategory.Book,
|
|
||||||
SiteName.Goodreads,
|
|
||||||
url,
|
|
||||||
title,
|
|
||||||
subtitle,
|
|
||||||
"",
|
|
||||||
cover,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
logger.warning(f"Search {search_url} error: {e}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error("Goodreads search error", extra={"query": q, "exception": e})
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
class GoogleBooks:
|
|
||||||
@classmethod
|
|
||||||
def search(cls, q, page=1):
|
|
||||||
results = []
|
|
||||||
api_url = f"https://www.googleapis.com/books/v1/volumes?country=us&q={quote_plus(q)}&startIndex={SEARCH_PAGE_SIZE * (page - 1)}&maxResults={SEARCH_PAGE_SIZE}&maxAllowedMaturityRating=MATURE"
|
|
||||||
try:
|
|
||||||
j = requests.get(api_url, timeout=2).json()
|
|
||||||
if "items" in j:
|
|
||||||
for b in j["items"]:
|
|
||||||
if "title" not in b["volumeInfo"]:
|
|
||||||
continue
|
|
||||||
title = b["volumeInfo"]["title"]
|
|
||||||
subtitle = ""
|
|
||||||
if "publishedDate" in b["volumeInfo"]:
|
|
||||||
subtitle += b["volumeInfo"]["publishedDate"] + " "
|
|
||||||
if "authors" in b["volumeInfo"]:
|
|
||||||
subtitle += ", ".join(b["volumeInfo"]["authors"])
|
|
||||||
if "description" in b["volumeInfo"]:
|
|
||||||
brief = b["volumeInfo"]["description"]
|
|
||||||
elif "textSnippet" in b["volumeInfo"]:
|
|
||||||
brief = b["volumeInfo"]["textSnippet"]["searchInfo"]
|
|
||||||
else:
|
|
||||||
brief = ""
|
|
||||||
category = ItemCategory.Book
|
|
||||||
# b['volumeInfo']['infoLink'].replace('http:', 'https:')
|
|
||||||
url = "https://books.google.com/books?id=" + b["id"]
|
|
||||||
cover = (
|
|
||||||
b["volumeInfo"]["imageLinks"]["thumbnail"]
|
|
||||||
if "imageLinks" in b["volumeInfo"]
|
|
||||||
else ""
|
|
||||||
)
|
|
||||||
results.append(
|
|
||||||
ExternalSearchResultItem(
|
|
||||||
category,
|
|
||||||
SiteName.GoogleBooks,
|
|
||||||
url,
|
|
||||||
title,
|
|
||||||
subtitle,
|
|
||||||
brief,
|
|
||||||
cover,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
logger.warning(f"Search {api_url} error: {e}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error("GoogleBooks search error", extra={"query": q, "exception": e})
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
class TheMovieDatabase:
|
|
||||||
@classmethod
|
|
||||||
def search(cls, q, page=1):
|
|
||||||
results = []
|
|
||||||
api_url = f"https://api.themoviedb.org/3/search/multi?query={quote_plus(q)}&page={page}&api_key={settings.TMDB_API3_KEY}&language={TMDB_DEFAULT_LANG}&include_adult=true"
|
|
||||||
try:
|
|
||||||
j = requests.get(api_url, timeout=2).json()
|
|
||||||
if j.get("results"):
|
|
||||||
for m in j["results"]:
|
|
||||||
if m["media_type"] in ["tv", "movie"]:
|
|
||||||
url = f"https://www.themoviedb.org/{m['media_type']}/{m['id']}"
|
|
||||||
if m["media_type"] == "tv":
|
|
||||||
cat = ItemCategory.TV
|
|
||||||
title = m["name"]
|
|
||||||
subtitle = f"{m.get('first_air_date', '')} {m.get('original_name', '')}"
|
|
||||||
else:
|
|
||||||
cat = ItemCategory.Movie
|
|
||||||
title = m["title"]
|
|
||||||
subtitle = f"{m.get('release_date', '')} {m.get('original_name', '')}"
|
|
||||||
cover = (
|
|
||||||
f"https://image.tmdb.org/t/p/w500/{m.get('poster_path')}"
|
|
||||||
if m.get("poster_path")
|
|
||||||
else ""
|
|
||||||
)
|
|
||||||
results.append(
|
|
||||||
ExternalSearchResultItem(
|
|
||||||
cat,
|
|
||||||
SiteName.TMDB,
|
|
||||||
url,
|
|
||||||
title,
|
|
||||||
subtitle,
|
|
||||||
m.get("overview"),
|
|
||||||
cover,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
logger.warning(f"TMDB search '{q}' no results found.")
|
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
logger.warning(f"Search {api_url} error: {e}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error("TMDb search error", extra={"query": q, "exception": e})
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
class Spotify:
|
|
||||||
@classmethod
|
|
||||||
def search(cls, q, page=1):
|
|
||||||
results = []
|
|
||||||
api_url = f"https://api.spotify.com/v1/search?q={q}&type=album&limit={SEARCH_PAGE_SIZE}&offset={page * SEARCH_PAGE_SIZE}"
|
|
||||||
try:
|
|
||||||
headers = {"Authorization": f"Bearer {get_spotify_token()}"}
|
|
||||||
j = requests.get(api_url, headers=headers, timeout=2).json()
|
|
||||||
if j.get("albums"):
|
|
||||||
for a in j["albums"]["items"]:
|
|
||||||
title = a["name"]
|
|
||||||
subtitle = a.get("release_date", "")
|
|
||||||
for artist in a.get("artists", []):
|
|
||||||
subtitle += " " + artist.get("name", "")
|
|
||||||
url = a["external_urls"]["spotify"]
|
|
||||||
cover = a["images"][0]["url"] if a.get("images") else ""
|
|
||||||
results.append(
|
|
||||||
ExternalSearchResultItem(
|
|
||||||
ItemCategory.Music,
|
|
||||||
SiteName.Spotify,
|
|
||||||
url,
|
|
||||||
title,
|
|
||||||
subtitle,
|
|
||||||
"",
|
|
||||||
cover,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
logger.warning(f"Spotify search '{q}' no results found.")
|
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
logger.warning(f"Search {api_url} error: {e}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error("Spotify search error", extra={"query": q, "exception": e})
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
class Bandcamp:
|
|
||||||
@classmethod
|
|
||||||
def search(cls, q, page=1):
|
|
||||||
results = []
|
|
||||||
search_url = f"https://bandcamp.com/search?from=results&item_type=a&page={page}&q={quote_plus(q)}"
|
|
||||||
try:
|
|
||||||
r = requests.get(search_url, timeout=2)
|
|
||||||
h = html.fromstring(r.content.decode("utf-8"))
|
|
||||||
albums = h.xpath('//li[@class="searchresult data-search"]')
|
|
||||||
for c in albums: # type:ignore
|
|
||||||
el_cover = c.xpath('.//div[@class="art"]/img/@src')
|
|
||||||
cover = el_cover[0] if el_cover else ""
|
|
||||||
el_title = c.xpath('.//div[@class="heading"]//text()')
|
|
||||||
title = "".join(el_title).strip() if el_title else "Unknown Title"
|
|
||||||
el_url = c.xpath('..//div[@class="itemurl"]/a/@href')
|
|
||||||
url = el_url[0] if el_url else ""
|
|
||||||
el_authors = c.xpath('.//div[@class="subhead"]//text()')
|
|
||||||
subtitle = ", ".join(el_authors) if el_authors else ""
|
|
||||||
results.append(
|
|
||||||
ExternalSearchResultItem(
|
|
||||||
ItemCategory.Music,
|
|
||||||
SiteName.Bandcamp,
|
|
||||||
url,
|
|
||||||
title,
|
|
||||||
subtitle,
|
|
||||||
"",
|
|
||||||
cover,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
logger.warning(f"Search {search_url} error: {e}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error("Bandcamp search error", extra={"query": q, "exception": e})
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
class ApplePodcast:
|
|
||||||
@classmethod
|
|
||||||
def search(cls, q, page=1):
|
|
||||||
results = []
|
|
||||||
search_url = f"https://itunes.apple.com/search?entity=podcast&limit={page * SEARCH_PAGE_SIZE}&term={quote_plus(q)}"
|
|
||||||
try:
|
|
||||||
r = requests.get(search_url, timeout=2).json()
|
|
||||||
for p in r["results"][(page - 1) * SEARCH_PAGE_SIZE :]:
|
|
||||||
if p.get("feedUrl"):
|
|
||||||
results.append(
|
|
||||||
ExternalSearchResultItem(
|
|
||||||
ItemCategory.Podcast,
|
|
||||||
SiteName.RSS,
|
|
||||||
p["feedUrl"],
|
|
||||||
p["trackName"],
|
|
||||||
p["artistName"],
|
|
||||||
"",
|
|
||||||
p["artworkUrl600"],
|
|
||||||
)
|
|
||||||
)
|
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
logger.warning(f"Search {search_url} error: {e}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(
|
|
||||||
"ApplePodcast search error", extra={"query": q, "exception": e}
|
|
||||||
)
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
class IGDB:
|
|
||||||
@classmethod
|
|
||||||
def search(cls, q, page=1):
|
|
||||||
return IGDB_Site.search(
|
|
||||||
q, limit=SEARCH_PAGE_SIZE, offset=page * SEARCH_PAGE_SIZE
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class Fediverse:
|
|
||||||
@staticmethod
|
|
||||||
async def search_task(host, q, category=None):
|
|
||||||
api_url = f"https://{host}/api/catalog/search?query={quote_plus(q)}{'&category=' + category if category else ''}"
|
|
||||||
async with httpx.AsyncClient() as client:
|
|
||||||
results = []
|
|
||||||
try:
|
|
||||||
response = await client.get(
|
|
||||||
api_url,
|
|
||||||
timeout=2,
|
|
||||||
)
|
|
||||||
r = response.json()
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(
|
|
||||||
f"Fediverse search {host} error",
|
|
||||||
extra={"url": api_url, "query": q, "exception": e},
|
|
||||||
)
|
|
||||||
return []
|
return []
|
||||||
if "data" in r:
|
if category in ["", None]:
|
||||||
for item in r["data"]:
|
category = "all"
|
||||||
if any(
|
tasks = FediverseInstance.search_tasks(query, page, category)
|
||||||
urlparse(res["url"]).hostname in settings.SITE_DOMAINS
|
for site in SiteManager.get_sites_for_search():
|
||||||
for res in item.get("external_resources", [])
|
tasks.append(site.search_task(query, page, category))
|
||||||
):
|
|
||||||
continue
|
|
||||||
url = f"https://{host}{item['url']}" # FIXME update API and use abs urls
|
|
||||||
try:
|
|
||||||
cat = ItemCategory(item["category"])
|
|
||||||
except Exception:
|
|
||||||
cat = None
|
|
||||||
results.append(
|
|
||||||
ExternalSearchResultItem(
|
|
||||||
cat,
|
|
||||||
host,
|
|
||||||
url,
|
|
||||||
item["display_title"],
|
|
||||||
"",
|
|
||||||
item["brief"],
|
|
||||||
item["cover_image_url"],
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return results
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def search(cls, q: str, page: int = 1, category: str | None = None):
|
|
||||||
from takahe.utils import Takahe
|
|
||||||
|
|
||||||
peers = Takahe.get_neodb_peers()
|
|
||||||
c = category if category != "movietv" else "movie,tv"
|
|
||||||
tasks = [Fediverse.search_task(host, q, c) for host in peers]
|
|
||||||
# loop = asyncio.get_event_loop()
|
# loop = asyncio.get_event_loop()
|
||||||
loop = asyncio.new_event_loop()
|
loop = asyncio.new_event_loop()
|
||||||
asyncio.set_event_loop(loop)
|
asyncio.set_event_loop(loop)
|
||||||
|
@ -344,29 +28,3 @@ class Fediverse:
|
||||||
for r in loop.run_until_complete(asyncio.gather(*tasks)):
|
for r in loop.run_until_complete(asyncio.gather(*tasks)):
|
||||||
results.extend(r)
|
results.extend(r)
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
class ExternalSources:
|
|
||||||
@classmethod
|
|
||||||
def search(cls, c, q, page=1):
|
|
||||||
if not q:
|
|
||||||
return []
|
|
||||||
results = []
|
|
||||||
results.extend(
|
|
||||||
Fediverse.search(q, page, category=c if c and c != "all" else None)
|
|
||||||
)
|
|
||||||
if c == "" or c is None:
|
|
||||||
c = "all"
|
|
||||||
if c == "all" or c == "movietv":
|
|
||||||
results.extend(TheMovieDatabase.search(q, page))
|
|
||||||
if c == "all" or c == "book":
|
|
||||||
results.extend(GoogleBooks.search(q, page))
|
|
||||||
results.extend(Goodreads.search(q, page))
|
|
||||||
if c == "all" or c == "game":
|
|
||||||
results.extend(IGDB.search(q, page))
|
|
||||||
if c == "all" or c == "music":
|
|
||||||
results.extend(Spotify.search(q, page))
|
|
||||||
results.extend(Bandcamp.search(q, page))
|
|
||||||
if c == "podcast":
|
|
||||||
results.extend(ApplePodcast.search(q, page))
|
|
||||||
return results
|
|
||||||
|
|
|
@ -79,7 +79,7 @@ class ExternalSearchResultItem:
|
||||||
self.cover_image_url = cover_url
|
self.cover_image_url = cover_url
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return f"[{self.category}] {self.display_title} {self.url}"
|
return f"[{self.category}] {self.display_title} {self.source_url}"
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def verbose_category_name(self):
|
def verbose_category_name(self):
|
||||||
|
|
|
@ -159,7 +159,7 @@ def external_search(request):
|
||||||
category = None
|
category = None
|
||||||
keywords = request.GET.get("q", default="").strip()
|
keywords = request.GET.get("q", default="").strip()
|
||||||
page_number = int_(request.GET.get("page"), 1)
|
page_number = int_(request.GET.get("page"), 1)
|
||||||
items = ExternalSources.search(category, keywords, page_number) if keywords else []
|
items = ExternalSources.search(keywords, page_number, category) if keywords else []
|
||||||
cache_key = f"search_{category if category != 'movietv' else 'movie,tv'}_{keywords}"
|
cache_key = f"search_{category if category != 'movietv' else 'movie,tv'}_{keywords}"
|
||||||
dedupe_urls = cache.get(cache_key, [])
|
dedupe_urls = cache.get(cache_key, [])
|
||||||
items = [i for i in items if i.source_url not in dedupe_urls]
|
items = [i for i in items if i.source_url not in dedupe_urls]
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
from ..common.sites import SiteManager
|
from ..common.sites import SiteManager
|
||||||
from .ao3 import ArchiveOfOurOwn
|
from .ao3 import ArchiveOfOurOwn
|
||||||
from .apple_music import AppleMusic
|
from .apple_music import AppleMusic
|
||||||
|
from .apple_podcast import ApplePodcast
|
||||||
from .bandcamp import Bandcamp
|
from .bandcamp import Bandcamp
|
||||||
from .bangumi import Bangumi
|
from .bangumi import Bangumi
|
||||||
from .bgg import BoardGameGeek
|
from .bgg import BoardGameGeek
|
||||||
|
@ -24,12 +25,11 @@ from .steam import Steam
|
||||||
from .tmdb import TMDB_Movie
|
from .tmdb import TMDB_Movie
|
||||||
from .ypshuo import Ypshuo
|
from .ypshuo import Ypshuo
|
||||||
|
|
||||||
# from .apple_podcast import ApplePodcast
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"SiteManager",
|
"SiteManager",
|
||||||
"ArchiveOfOurOwn",
|
"ArchiveOfOurOwn",
|
||||||
"AppleMusic",
|
"AppleMusic",
|
||||||
|
"ApplePodcast",
|
||||||
"Bandcamp",
|
"Bandcamp",
|
||||||
"Bangumi",
|
"Bangumi",
|
||||||
"BoardGameGeek",
|
"BoardGameGeek",
|
||||||
|
|
|
@ -1,16 +1,17 @@
|
||||||
import logging
|
from urllib.parse import quote_plus
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
from catalog.common import *
|
from catalog.common import *
|
||||||
from catalog.models import *
|
from catalog.models import *
|
||||||
|
|
||||||
from .rss import RSS
|
from .rss import RSS
|
||||||
|
|
||||||
_logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
@SiteManager.register
|
@SiteManager.register
|
||||||
class ApplePodcast(AbstractSite):
|
class ApplePodcast(AbstractSite):
|
||||||
# SITE_NAME = SiteName.ApplePodcast
|
SITE_NAME = SiteName.ApplePodcast
|
||||||
ID_TYPE = IdType.ApplePodcast
|
ID_TYPE = IdType.ApplePodcast
|
||||||
URL_PATTERNS = [r"https://[^.]+.apple.com/\w+/podcast/*[^/?]*/id(\d+)"]
|
URL_PATTERNS = [r"https://[^.]+.apple.com/\w+/podcast/*[^/?]*/id(\d+)"]
|
||||||
WIKI_PROPERTY_ID = "P5842"
|
WIKI_PROPERTY_ID = "P5842"
|
||||||
|
@ -38,3 +39,35 @@ class ApplePodcast(AbstractSite):
|
||||||
)
|
)
|
||||||
pd.lookup_ids[IdType.RSS] = RSS.url_to_id(feed_url)
|
pd.lookup_ids[IdType.RSS] = RSS.url_to_id(feed_url)
|
||||||
return pd
|
return pd
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
async def search_task(
|
||||||
|
cls, q: str, page: int, category: str
|
||||||
|
) -> list[ExternalSearchResultItem]:
|
||||||
|
if category != "podcast":
|
||||||
|
return []
|
||||||
|
SEARCH_PAGE_SIZE = 5 if category == "all" else 10
|
||||||
|
results = []
|
||||||
|
search_url = f"https://itunes.apple.com/search?entity=podcast&limit={page * SEARCH_PAGE_SIZE}&term={quote_plus(q)}"
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
try:
|
||||||
|
response = await client.get(search_url, timeout=2)
|
||||||
|
r = response.json()
|
||||||
|
for p in r["results"][(page - 1) * SEARCH_PAGE_SIZE :]:
|
||||||
|
if p.get("feedUrl"):
|
||||||
|
results.append(
|
||||||
|
ExternalSearchResultItem(
|
||||||
|
ItemCategory.Podcast,
|
||||||
|
SiteName.RSS,
|
||||||
|
p["feedUrl"],
|
||||||
|
p["trackName"],
|
||||||
|
p["artistName"],
|
||||||
|
"",
|
||||||
|
p["artworkUrl600"],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
"ApplePodcast search error", extra={"query": q, "exception": e}
|
||||||
|
)
|
||||||
|
return results
|
||||||
|
|
|
@ -5,6 +5,9 @@ import urllib.parse
|
||||||
|
|
||||||
import dateparser
|
import dateparser
|
||||||
import dns.resolver
|
import dns.resolver
|
||||||
|
import httpx
|
||||||
|
from loguru import logger
|
||||||
|
from lxml import html
|
||||||
|
|
||||||
from catalog.common import *
|
from catalog.common import *
|
||||||
from catalog.models import *
|
from catalog.models import *
|
||||||
|
@ -103,3 +106,45 @@ class Bandcamp(AbstractSite):
|
||||||
}
|
}
|
||||||
pd = ResourceContent(metadata=data)
|
pd = ResourceContent(metadata=data)
|
||||||
return pd
|
return pd
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
async def search_task(
|
||||||
|
cls, q: str, page: int, category: str
|
||||||
|
) -> list[ExternalSearchResultItem]:
|
||||||
|
if category != "music":
|
||||||
|
return []
|
||||||
|
SEARCH_PAGE_SIZE = 5
|
||||||
|
p = (page - 1) * SEARCH_PAGE_SIZE // 18 + 1
|
||||||
|
offset = (page - 1) * SEARCH_PAGE_SIZE % 18
|
||||||
|
results = []
|
||||||
|
search_url = f"https://bandcamp.com/search?from=results&item_type=a&page={p}&q={urllib.parse.quote_plus(q)}"
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
try:
|
||||||
|
r = await client.get(search_url, timeout=2)
|
||||||
|
h = html.fromstring(r.content.decode("utf-8"))
|
||||||
|
albums = h.xpath('//li[@class="searchresult data-search"]')
|
||||||
|
for c in albums: # type:ignore
|
||||||
|
el_cover = c.xpath('.//div[@class="art"]/img/@src')
|
||||||
|
cover = el_cover[0] if el_cover else ""
|
||||||
|
el_title = c.xpath('.//div[@class="heading"]//text()')
|
||||||
|
title = "".join(el_title).strip() if el_title else "Unknown Title"
|
||||||
|
el_url = c.xpath('..//div[@class="itemurl"]/a/@href')
|
||||||
|
url = el_url[0] if el_url else ""
|
||||||
|
el_authors = c.xpath('.//div[@class="subhead"]//text()')
|
||||||
|
subtitle = ", ".join(el_authors) if el_authors else ""
|
||||||
|
results.append(
|
||||||
|
ExternalSearchResultItem(
|
||||||
|
ItemCategory.Music,
|
||||||
|
SiteName.Bandcamp,
|
||||||
|
url,
|
||||||
|
title,
|
||||||
|
subtitle,
|
||||||
|
"",
|
||||||
|
cover,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
"Bandcamp search error", extra={"query": q, "exception": e}
|
||||||
|
)
|
||||||
|
return results[offset : offset + SEARCH_PAGE_SIZE]
|
||||||
|
|
|
@ -1,9 +1,33 @@
|
||||||
|
from urllib.parse import quote_plus, urlparse
|
||||||
|
|
||||||
|
import httpx
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.core.validators import URLValidator
|
from django.core.validators import URLValidator
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from catalog.common import *
|
from catalog.common import (
|
||||||
from catalog.models import *
|
AbstractSite,
|
||||||
|
BasicImageDownloader,
|
||||||
|
CachedDownloader,
|
||||||
|
IdType,
|
||||||
|
ItemCategory,
|
||||||
|
ResourceContent,
|
||||||
|
SiteManager,
|
||||||
|
SiteName,
|
||||||
|
)
|
||||||
|
from catalog.models import (
|
||||||
|
Album,
|
||||||
|
Edition,
|
||||||
|
ExternalSearchResultItem,
|
||||||
|
Game,
|
||||||
|
Movie,
|
||||||
|
Performance,
|
||||||
|
PerformanceProduction,
|
||||||
|
Podcast,
|
||||||
|
TVEpisode,
|
||||||
|
TVSeason,
|
||||||
|
TVShow,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@SiteManager.register
|
@SiteManager.register
|
||||||
|
@ -99,3 +123,56 @@ class FediverseInstance(AbstractSite):
|
||||||
lookup_ids=ids,
|
lookup_ids=ids,
|
||||||
)
|
)
|
||||||
return d
|
return d
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
async def peer_search_task(cls, host, q, page, category=None):
|
||||||
|
SEARCH_PAGE_SIZE = 5
|
||||||
|
p = (page - 1) * SEARCH_PAGE_SIZE // 20 + 1
|
||||||
|
offset = (page - 1) * SEARCH_PAGE_SIZE % 20
|
||||||
|
api_url = f"https://{host}/api/catalog/search?query={quote_plus(q)}&page={p}{'&category=' + category if category and category != 'all' else ''}"
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
results = []
|
||||||
|
try:
|
||||||
|
response = await client.get(
|
||||||
|
api_url,
|
||||||
|
timeout=2,
|
||||||
|
)
|
||||||
|
r = response.json()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
f"Fediverse search {host} error",
|
||||||
|
extra={"url": api_url, "query": q, "exception": e},
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
if "data" in r:
|
||||||
|
for item in r["data"]:
|
||||||
|
if any(
|
||||||
|
urlparse(res["url"]).hostname in settings.SITE_DOMAINS
|
||||||
|
for res in item.get("external_resources", [])
|
||||||
|
):
|
||||||
|
continue
|
||||||
|
url = f"https://{host}{item['url']}" # FIXME update API and use abs urls
|
||||||
|
try:
|
||||||
|
cat = ItemCategory(item["category"])
|
||||||
|
except Exception:
|
||||||
|
cat = None
|
||||||
|
results.append(
|
||||||
|
ExternalSearchResultItem(
|
||||||
|
cat,
|
||||||
|
host,
|
||||||
|
url,
|
||||||
|
item["display_title"],
|
||||||
|
"",
|
||||||
|
item["brief"],
|
||||||
|
item["cover_image_url"],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return results[offset : offset + SEARCH_PAGE_SIZE]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def search_tasks(cls, q: str, page: int = 1, category: str | None = None):
|
||||||
|
from takahe.utils import Takahe
|
||||||
|
|
||||||
|
peers = Takahe.get_neodb_peers()
|
||||||
|
c = category if category != "movietv" else "movie,tv"
|
||||||
|
return [cls.peer_search_task(host, q, page, c) for host in peers]
|
||||||
|
|
|
@ -1,18 +1,18 @@
|
||||||
import json
|
import json
|
||||||
import logging
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from urllib.parse import quote_plus
|
||||||
|
|
||||||
|
import httpx
|
||||||
from django.utils.timezone import make_aware
|
from django.utils.timezone import make_aware
|
||||||
|
from loguru import logger
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
|
||||||
from catalog.book.models import Edition, Work
|
|
||||||
from catalog.book.utils import binding_to_format, detect_isbn_asin
|
from catalog.book.utils import binding_to_format, detect_isbn_asin
|
||||||
from catalog.common import *
|
from catalog.common import *
|
||||||
from common.models.lang import detect_language
|
from catalog.models import Edition, ExternalSearchResultItem, Work
|
||||||
|
from common.models import detect_language
|
||||||
from journal.models.renderers import html_to_text
|
from journal.models.renderers import html_to_text
|
||||||
|
|
||||||
_logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class GoodreadsDownloader(RetryDownloader):
|
class GoodreadsDownloader(RetryDownloader):
|
||||||
def validate_response(self, response):
|
def validate_response(self, response):
|
||||||
|
@ -121,6 +121,82 @@ class Goodreads(AbstractSite):
|
||||||
pd.lookup_ids[IdType.ASIN] = ids.get(IdType.ASIN)
|
pd.lookup_ids[IdType.ASIN] = ids.get(IdType.ASIN)
|
||||||
return pd
|
return pd
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
async def search_task(
|
||||||
|
cls, q: str, page: int, category: str
|
||||||
|
) -> list[ExternalSearchResultItem]:
|
||||||
|
if category not in ["all", "book"]:
|
||||||
|
return []
|
||||||
|
SEARCH_PAGE_SIZE = 5
|
||||||
|
p = (page - 1) * SEARCH_PAGE_SIZE // 20 + 1
|
||||||
|
offset = (page - 1) * SEARCH_PAGE_SIZE % 20
|
||||||
|
results = []
|
||||||
|
search_url = f"https://www.goodreads.com/search?page={p}&q={quote_plus(q)}"
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
try:
|
||||||
|
r = await client.get(
|
||||||
|
search_url,
|
||||||
|
timeout=3,
|
||||||
|
headers={
|
||||||
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:107.0) Gecko/20100101 Firefox/107.0",
|
||||||
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
|
"Accept-Language": BasicDownloader.get_accept_language(),
|
||||||
|
"Accept-Encoding": "gzip, deflate",
|
||||||
|
"Connection": "keep-alive",
|
||||||
|
"DNT": "1",
|
||||||
|
"Upgrade-Insecure-Requests": "1",
|
||||||
|
"Cache-Control": "no-cache",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
if r.url.path.startswith("/book/show/"):
|
||||||
|
# Goodreads will 302 if only one result matches ISBN
|
||||||
|
site = SiteManager.get_site_by_url(str(r.url))
|
||||||
|
if site:
|
||||||
|
res = site.get_resource_ready()
|
||||||
|
if res:
|
||||||
|
subtitle = f"{res.metadata.get('pub_year')} {', '.join(res.metadata.get('author', []))} {', '.join(res.metadata.get('translator', []))}"
|
||||||
|
results.append(
|
||||||
|
ExternalSearchResultItem(
|
||||||
|
ItemCategory.Book,
|
||||||
|
SiteName.Goodreads,
|
||||||
|
res.url,
|
||||||
|
res.metadata["title"],
|
||||||
|
subtitle,
|
||||||
|
res.metadata.get("brief", ""),
|
||||||
|
res.metadata.get("cover_image_url", ""),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
h = html.fromstring(r.content.decode("utf-8"))
|
||||||
|
books = h.xpath('//tr[@itemtype="http://schema.org/Book"]')
|
||||||
|
for c in books: # type:ignore
|
||||||
|
el_cover = c.xpath('.//img[@class="bookCover"]/@src')
|
||||||
|
cover = el_cover[0] if el_cover else ""
|
||||||
|
el_title = c.xpath('.//a[@class="bookTitle"]//text()')
|
||||||
|
title = (
|
||||||
|
"".join(el_title).strip() if el_title else "Unkown Title"
|
||||||
|
)
|
||||||
|
el_url = c.xpath('.//a[@class="bookTitle"]/@href')
|
||||||
|
url = "https://www.goodreads.com" + el_url[0] if el_url else ""
|
||||||
|
el_authors = c.xpath('.//a[@class="authorName"]//text()')
|
||||||
|
subtitle = ", ".join(el_authors) if el_authors else ""
|
||||||
|
results.append(
|
||||||
|
ExternalSearchResultItem(
|
||||||
|
ItemCategory.Book,
|
||||||
|
SiteName.Goodreads,
|
||||||
|
url,
|
||||||
|
title,
|
||||||
|
subtitle,
|
||||||
|
"",
|
||||||
|
cover,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
"Goodreads search error", extra={"query": q, "exception": e}
|
||||||
|
)
|
||||||
|
return results[offset : offset + SEARCH_PAGE_SIZE]
|
||||||
|
|
||||||
|
|
||||||
@SiteManager.register
|
@SiteManager.register
|
||||||
class Goodreads_Work(AbstractSite):
|
class Goodreads_Work(AbstractSite):
|
||||||
|
|
|
@ -1,7 +1,10 @@
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
|
from urllib.parse import quote_plus
|
||||||
|
|
||||||
|
import httpx
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
from catalog.book.utils import isbn_10_to_13
|
from catalog.book.utils import isbn_10_to_13
|
||||||
from catalog.common import *
|
from catalog.common import *
|
||||||
|
@ -116,3 +119,57 @@ class GoogleBooks(AbstractSite):
|
||||||
cover_image_extention=ext,
|
cover_image_extention=ext,
|
||||||
lookup_ids={IdType.ISBN: isbn13},
|
lookup_ids={IdType.ISBN: isbn13},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
async def search_task(
|
||||||
|
cls, q: str, page: int, category: str
|
||||||
|
) -> list[ExternalSearchResultItem]:
|
||||||
|
if category not in ["all", "book"]:
|
||||||
|
return []
|
||||||
|
SEARCH_PAGE_SIZE = 5
|
||||||
|
results = []
|
||||||
|
api_url = f"https://www.googleapis.com/books/v1/volumes?country=us&q={quote_plus(q)}&startIndex={SEARCH_PAGE_SIZE * (page - 1)}&maxResults={SEARCH_PAGE_SIZE}&maxAllowedMaturityRating=MATURE"
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
try:
|
||||||
|
response = await client.get(api_url, timeout=2)
|
||||||
|
j = response.json()
|
||||||
|
if "items" in j:
|
||||||
|
for b in j["items"]:
|
||||||
|
if "title" not in b["volumeInfo"]:
|
||||||
|
continue
|
||||||
|
title = b["volumeInfo"]["title"]
|
||||||
|
subtitle = ""
|
||||||
|
if "publishedDate" in b["volumeInfo"]:
|
||||||
|
subtitle += b["volumeInfo"]["publishedDate"] + " "
|
||||||
|
if "authors" in b["volumeInfo"]:
|
||||||
|
subtitle += ", ".join(b["volumeInfo"]["authors"])
|
||||||
|
if "description" in b["volumeInfo"]:
|
||||||
|
brief = b["volumeInfo"]["description"]
|
||||||
|
elif "textSnippet" in b["volumeInfo"]:
|
||||||
|
brief = b["volumeInfo"]["textSnippet"]["searchInfo"]
|
||||||
|
else:
|
||||||
|
brief = ""
|
||||||
|
category = ItemCategory.Book
|
||||||
|
# b['volumeInfo']['infoLink'].replace('http:', 'https:')
|
||||||
|
url = "https://books.google.com/books?id=" + b["id"]
|
||||||
|
cover = (
|
||||||
|
b["volumeInfo"]["imageLinks"]["thumbnail"]
|
||||||
|
if "imageLinks" in b["volumeInfo"]
|
||||||
|
else ""
|
||||||
|
)
|
||||||
|
results.append(
|
||||||
|
ExternalSearchResultItem(
|
||||||
|
category,
|
||||||
|
SiteName.GoogleBooks,
|
||||||
|
url,
|
||||||
|
title,
|
||||||
|
subtitle,
|
||||||
|
brief,
|
||||||
|
cover,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
"GoogleBooks search error", extra={"query": q, "exception": e}
|
||||||
|
)
|
||||||
|
return results
|
||||||
|
|
|
@ -8,6 +8,7 @@ import datetime
|
||||||
import json
|
import json
|
||||||
from urllib.parse import quote_plus
|
from urllib.parse import quote_plus
|
||||||
|
|
||||||
|
import httpx
|
||||||
import requests
|
import requests
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.core.cache import cache
|
from django.core.cache import cache
|
||||||
|
@ -83,44 +84,6 @@ class IGDB(AbstractSite):
|
||||||
fp.write(json.dumps(r))
|
fp.write(json.dumps(r))
|
||||||
return r
|
return r
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def search(cls, q, limit: int, offset: int = 0):
|
|
||||||
rs = cls.api_query(
|
|
||||||
"games",
|
|
||||||
f'fields *, cover.url, genres.name, platforms.name, involved_companies.*, involved_companies.company.name; search "{quote_plus(q)}"; limit {limit}; offset {offset};',
|
|
||||||
)
|
|
||||||
result = []
|
|
||||||
for r in rs:
|
|
||||||
subtitle = ""
|
|
||||||
if "first_release_date" in r:
|
|
||||||
subtitle = datetime.datetime.fromtimestamp(
|
|
||||||
r["first_release_date"], datetime.timezone.utc
|
|
||||||
).strftime("%Y-%m-%d ")
|
|
||||||
if "platforms" in r:
|
|
||||||
ps = sorted(r["platforms"], key=lambda p: p["id"])
|
|
||||||
subtitle += ",".join(
|
|
||||||
[(p["name"] if p["id"] != 6 else "Windows") for p in ps]
|
|
||||||
)
|
|
||||||
brief = r["summary"] if "summary" in r else ""
|
|
||||||
brief += "\n\n" + r["storyline"] if "storyline" in r else ""
|
|
||||||
cover = (
|
|
||||||
"https:" + r["cover"]["url"].replace("t_thumb", "t_cover_big")
|
|
||||||
if r.get("cover")
|
|
||||||
else ""
|
|
||||||
)
|
|
||||||
result.append(
|
|
||||||
ExternalSearchResultItem(
|
|
||||||
ItemCategory.Game,
|
|
||||||
SiteName.IGDB,
|
|
||||||
r["url"],
|
|
||||||
r["name"],
|
|
||||||
subtitle,
|
|
||||||
brief,
|
|
||||||
cover,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return result
|
|
||||||
|
|
||||||
def scrape(self):
|
def scrape(self):
|
||||||
fields = "*, cover.url, genres.name, platforms.name, involved_companies.*, involved_companies.company.name"
|
fields = "*, cover.url, genres.name, platforms.name, involved_companies.*, involved_companies.company.name"
|
||||||
r = self.api_query("games", f'fields {fields}; where url = "{self.url}";')
|
r = self.api_query("games", f'fields {fields}; where url = "{self.url}";')
|
||||||
|
@ -200,3 +163,55 @@ class IGDB(AbstractSite):
|
||||||
IdType.Steam
|
IdType.Steam
|
||||||
).url_to_id(steam_url)
|
).url_to_id(steam_url)
|
||||||
return pd
|
return pd
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
async def search_task(
|
||||||
|
cls, q: str, page: int, category: str
|
||||||
|
) -> list[ExternalSearchResultItem]:
|
||||||
|
if category != "game":
|
||||||
|
return []
|
||||||
|
SEARCH_PAGE_SIZE = 5 if category == "all" else 10
|
||||||
|
limit = SEARCH_PAGE_SIZE
|
||||||
|
offset = (page - 1) * limit
|
||||||
|
q = f'fields *, cover.url, genres.name, platforms.name, involved_companies.*, involved_companies.company.name; search "{quote_plus(q)}"; limit {limit}; offset {offset};'
|
||||||
|
_wrapper = IGDBWrapper(settings.IGDB_CLIENT_ID, _igdb_access_token())
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
try:
|
||||||
|
url = IGDBWrapper._build_url("games")
|
||||||
|
params = _wrapper._compose_request(q)
|
||||||
|
response = await client.post(url, **params)
|
||||||
|
rs = json.loads(response.content)
|
||||||
|
except requests.HTTPError as e:
|
||||||
|
logger.error(f"IGDB API: {e}", extra={"exception": e})
|
||||||
|
rs = []
|
||||||
|
result = []
|
||||||
|
for r in rs:
|
||||||
|
subtitle = ""
|
||||||
|
if "first_release_date" in r:
|
||||||
|
subtitle = datetime.datetime.fromtimestamp(
|
||||||
|
r["first_release_date"], datetime.timezone.utc
|
||||||
|
).strftime("%Y-%m-%d ")
|
||||||
|
if "platforms" in r:
|
||||||
|
ps = sorted(r["platforms"], key=lambda p: p["id"])
|
||||||
|
subtitle += ",".join(
|
||||||
|
[(p["name"] if p["id"] != 6 else "Windows") for p in ps]
|
||||||
|
)
|
||||||
|
brief = r["summary"] if "summary" in r else ""
|
||||||
|
brief += "\n\n" + r["storyline"] if "storyline" in r else ""
|
||||||
|
cover = (
|
||||||
|
"https:" + r["cover"]["url"].replace("t_thumb", "t_cover_big")
|
||||||
|
if r.get("cover")
|
||||||
|
else ""
|
||||||
|
)
|
||||||
|
result.append(
|
||||||
|
ExternalSearchResultItem(
|
||||||
|
ItemCategory.Game,
|
||||||
|
SiteName.IGDB,
|
||||||
|
r["url"],
|
||||||
|
r["name"],
|
||||||
|
subtitle,
|
||||||
|
brief,
|
||||||
|
cover,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
|
@ -6,8 +6,10 @@ import logging
|
||||||
import time
|
import time
|
||||||
|
|
||||||
import dateparser
|
import dateparser
|
||||||
|
import httpx
|
||||||
import requests
|
import requests
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
from catalog.common import *
|
from catalog.common import *
|
||||||
from catalog.models import *
|
from catalog.models import *
|
||||||
|
@ -107,6 +109,45 @@ class Spotify(AbstractSite):
|
||||||
pd.lookup_ids[IdType.ISRC] = isrc
|
pd.lookup_ids[IdType.ISRC] = isrc
|
||||||
return pd
|
return pd
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
async def search_task(
|
||||||
|
cls, q: str, page: int, category: str
|
||||||
|
) -> list[ExternalSearchResultItem]:
|
||||||
|
if category not in ["music", "all"]:
|
||||||
|
return []
|
||||||
|
SEARCH_PAGE_SIZE = 5
|
||||||
|
results = []
|
||||||
|
api_url = f"https://api.spotify.com/v1/search?q={q}&type=album&limit={SEARCH_PAGE_SIZE}&offset={page * SEARCH_PAGE_SIZE}"
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
try:
|
||||||
|
headers = {"Authorization": f"Bearer {get_spotify_token()}"}
|
||||||
|
response = await client.get(api_url, headers=headers, timeout=2)
|
||||||
|
j = response.json()
|
||||||
|
if j.get("albums"):
|
||||||
|
for a in j["albums"]["items"]:
|
||||||
|
title = a["name"]
|
||||||
|
subtitle = a.get("release_date", "")
|
||||||
|
for artist in a.get("artists", []):
|
||||||
|
subtitle += " " + artist.get("name", "")
|
||||||
|
url = a["external_urls"]["spotify"]
|
||||||
|
cover = a["images"][0]["url"] if a.get("images") else ""
|
||||||
|
results.append(
|
||||||
|
ExternalSearchResultItem(
|
||||||
|
ItemCategory.Music,
|
||||||
|
SiteName.Spotify,
|
||||||
|
url,
|
||||||
|
title,
|
||||||
|
subtitle,
|
||||||
|
"",
|
||||||
|
cover,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.warning(f"Spotify search '{q}' no results found.")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Spotify search error", extra={"query": q, "exception": e})
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
def get_spotify_token():
|
def get_spotify_token():
|
||||||
global spotify_token, spotify_token_expire_time
|
global spotify_token, spotify_token_expire_time
|
||||||
|
|
|
@ -12,8 +12,11 @@ these language code from TMDB are not in currently iso-639-1
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
|
from urllib.parse import quote_plus
|
||||||
|
|
||||||
|
import httpx
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
from catalog.common import *
|
from catalog.common import *
|
||||||
from catalog.movie.models import *
|
from catalog.movie.models import *
|
||||||
|
@ -175,6 +178,55 @@ class TMDB_Movie(AbstractSite):
|
||||||
pd.lookup_ids[IdType.IMDB] = imdb_code
|
pd.lookup_ids[IdType.IMDB] = imdb_code
|
||||||
return pd
|
return pd
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
async def search_task(
|
||||||
|
cls, q: str, page: int, category: str
|
||||||
|
) -> list[ExternalSearchResultItem]:
|
||||||
|
if category not in ["movietv", "all", "movie", "tv"]:
|
||||||
|
return []
|
||||||
|
SEARCH_PAGE_SIZE = 5 if category == "all" else 10
|
||||||
|
p = (page - 1) * SEARCH_PAGE_SIZE // 20 + 1
|
||||||
|
offset = (page - 1) * SEARCH_PAGE_SIZE % 20
|
||||||
|
results = []
|
||||||
|
api_url = f"https://api.themoviedb.org/3/search/multi?query={quote_plus(q)}&page={p}&api_key={settings.TMDB_API3_KEY}&language={TMDB_DEFAULT_LANG}&include_adult=true"
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
try:
|
||||||
|
response = await client.get(api_url, timeout=2)
|
||||||
|
j = response.json()
|
||||||
|
if j.get("results"):
|
||||||
|
for m in j["results"]:
|
||||||
|
if m["media_type"] in ["tv", "movie"]:
|
||||||
|
url = f"https://www.themoviedb.org/{m['media_type']}/{m['id']}"
|
||||||
|
if m["media_type"] == "tv":
|
||||||
|
cat = ItemCategory.TV
|
||||||
|
title = m["name"]
|
||||||
|
subtitle = f"{m.get('first_air_date', '')} {m.get('original_name', '')}"
|
||||||
|
else:
|
||||||
|
cat = ItemCategory.Movie
|
||||||
|
title = m["title"]
|
||||||
|
subtitle = f"{m.get('release_date', '')} {m.get('original_name', '')}"
|
||||||
|
cover = (
|
||||||
|
f"https://image.tmdb.org/t/p/w500/{m.get('poster_path')}"
|
||||||
|
if m.get("poster_path")
|
||||||
|
else ""
|
||||||
|
)
|
||||||
|
results.append(
|
||||||
|
ExternalSearchResultItem(
|
||||||
|
cat,
|
||||||
|
SiteName.TMDB,
|
||||||
|
url,
|
||||||
|
title,
|
||||||
|
subtitle,
|
||||||
|
m.get("overview"),
|
||||||
|
cover,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.warning(f"TMDB search '{q}' no results found.")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("TMDb search error", extra={"query": q, "exception": e})
|
||||||
|
return results[offset : offset + SEARCH_PAGE_SIZE]
|
||||||
|
|
||||||
|
|
||||||
@SiteManager.register
|
@SiteManager.register
|
||||||
class TMDB_TV(AbstractSite):
|
class TMDB_TV(AbstractSite):
|
||||||
|
|
|
@ -33,6 +33,7 @@ x-shared:
|
||||||
NEODB_DISABLE_DEFAULT_RELAY:
|
NEODB_DISABLE_DEFAULT_RELAY:
|
||||||
NEODB_DISABLE_CRON_JOBS:
|
NEODB_DISABLE_CRON_JOBS:
|
||||||
NEODB_SEARCH_PEERS:
|
NEODB_SEARCH_PEERS:
|
||||||
|
NEODB_SEARCH_SITES:
|
||||||
NEODB_MIN_MARKS_FOR_DISCOVER:
|
NEODB_MIN_MARKS_FOR_DISCOVER:
|
||||||
NEODB_DISCOVER_UPDATE_INTERVAL:
|
NEODB_DISCOVER_UPDATE_INTERVAL:
|
||||||
NEODB_DISCOVER_FILTER_LANGUAGE:
|
NEODB_DISCOVER_FILTER_LANGUAGE:
|
||||||
|
|
|
@ -57,6 +57,7 @@ if you are doing debug or development:
|
||||||
- `GOOGLE_API_KEY` - API key for [Google Books](https://developers.google.com/books/docs/v1/using)
|
- `GOOGLE_API_KEY` - API key for [Google Books](https://developers.google.com/books/docs/v1/using)
|
||||||
- `DISCOGS_API_KEY` - personal access token from [Discogs](https://www.discogs.com/settings/developers)
|
- `DISCOGS_API_KEY` - personal access token from [Discogs](https://www.discogs.com/settings/developers)
|
||||||
- `IGDB_API_CLIENT_ID`, `IGDB_API_CLIENT_SECRET` - IGDB [keys](https://api-docs.igdb.com/)
|
- `IGDB_API_CLIENT_ID`, `IGDB_API_CLIENT_SECRET` - IGDB [keys](https://api-docs.igdb.com/)
|
||||||
|
- `NEODB_SEARCH_SITES` is empty by default, which means NeoDB will search all available sources. This can be set to a comma-separated list of site names (e.g. `goodreads,googlebooks,spotify,tmdb,igdb,bandcamp,apple_podcast`), so that NeoDB will only search those sites; or not search any of them if set to just `-`.
|
||||||
|
|
||||||
|
|
||||||
## Other maintenance tasks
|
## Other maintenance tasks
|
||||||
|
|
|
@ -169,7 +169,7 @@ mkdocs==1.6.1
|
||||||
# via mkdocs-material
|
# via mkdocs-material
|
||||||
mkdocs-get-deps==0.2.0
|
mkdocs-get-deps==0.2.0
|
||||||
# via mkdocs
|
# via mkdocs
|
||||||
mkdocs-material==9.5.49
|
mkdocs-material==9.5.50
|
||||||
mkdocs-material-extensions==1.3.1
|
mkdocs-material-extensions==1.3.1
|
||||||
# via mkdocs-material
|
# via mkdocs-material
|
||||||
multidict==6.1.0
|
multidict==6.1.0
|
||||||
|
@ -213,7 +213,7 @@ pygments==2.19.1
|
||||||
# via mkdocs-material
|
# via mkdocs-material
|
||||||
pymdown-extensions==10.14
|
pymdown-extensions==10.14
|
||||||
# via mkdocs-material
|
# via mkdocs-material
|
||||||
pyright==1.1.391
|
pyright==1.1.392.post0
|
||||||
python-dateutil==2.9.0.post0
|
python-dateutil==2.9.0.post0
|
||||||
# via dateparser
|
# via dateparser
|
||||||
# via django-auditlog
|
# via django-auditlog
|
||||||
|
@ -251,7 +251,7 @@ rjsmin==1.2.2
|
||||||
# via django-compressor
|
# via django-compressor
|
||||||
rq==2.1.0
|
rq==2.1.0
|
||||||
# via django-rq
|
# via django-rq
|
||||||
ruff==0.9.1
|
ruff==0.9.2
|
||||||
sentry-sdk==2.20.0
|
sentry-sdk==2.20.0
|
||||||
setproctitle==1.3.4
|
setproctitle==1.3.4
|
||||||
six==1.17.0
|
six==1.17.0
|
||||||
|
@ -292,7 +292,7 @@ urllib3==2.3.0
|
||||||
# via sentry-sdk
|
# via sentry-sdk
|
||||||
urlman==2.0.2
|
urlman==2.0.2
|
||||||
validators==0.34.0
|
validators==0.34.0
|
||||||
virtualenv==20.28.1
|
virtualenv==20.29.1
|
||||||
# via pre-commit
|
# via pre-commit
|
||||||
watchdog==6.0.0
|
watchdog==6.0.0
|
||||||
# via mkdocs
|
# via mkdocs
|
||||||
|
|
Loading…
Add table
Reference in a new issue