make external search async and configurable
This commit is contained in:
parent
2826bc60dc
commit
90386bbf1a
20 changed files with 523 additions and 414 deletions
|
@ -92,6 +92,8 @@ env = environ.FileAwareEnv(
|
|||
NEODB_DISCOVER_UPDATE_INTERVAL=(int, 60),
|
||||
# Disable cron jobs, * for all
|
||||
NEODB_DISABLE_CRON_JOBS=(list, []),
|
||||
# search sites
|
||||
NEODB_SEARCH_SITES=(list, []),
|
||||
# federated search peers
|
||||
NEODB_SEARCH_PEERS=(list, []),
|
||||
# INTEGRATED TAKAHE CONFIGURATION
|
||||
|
@ -282,6 +284,7 @@ DOWNLOADER_RETRIES = env("NEODB_DOWNLOADER_RETRIES")
|
|||
|
||||
DISABLE_CRON_JOBS = env("NEODB_DISABLE_CRON_JOBS")
|
||||
SEARCH_PEERS = env("NEODB_SEARCH_PEERS")
|
||||
SEARCH_SITES = env("NEODB_SEARCH_SITES")
|
||||
|
||||
FANOUT_LIMIT_DAYS = env("NEODB_FANOUT_LIMIT_DAYS")
|
||||
# ====== USER CONFIGUTRATION END ======
|
||||
|
|
|
@ -43,7 +43,7 @@ class SiteName(models.TextChoices):
|
|||
Steam = "steam", _("Steam") # type:ignore[reportCallIssue]
|
||||
Bangumi = "bangumi", _("Bangumi") # type:ignore[reportCallIssue]
|
||||
BGG = "bgg", _("BGG") # type:ignore[reportCallIssue]
|
||||
# ApplePodcast = "apple_podcast", _("Apple Podcast") # type:ignore[reportCallIssue]
|
||||
ApplePodcast = "apple_podcast", _("Apple Podcast") # type:ignore[reportCallIssue]
|
||||
RSS = "rss", _("RSS") # type:ignore[reportCallIssue]
|
||||
Discogs = "discogs", _("Discogs") # type:ignore[reportCallIssue]
|
||||
AppleMusic = "apple_music", _("Apple Music") # type:ignore[reportCallIssue]
|
||||
|
|
|
@ -14,6 +14,7 @@ from typing import Type, TypeVar
|
|||
|
||||
import django_rq
|
||||
import requests
|
||||
from django.conf import settings
|
||||
from loguru import logger
|
||||
from validators import url as url_validate
|
||||
|
||||
|
@ -91,6 +92,13 @@ class AbstractSite:
|
|||
)
|
||||
return self.resource
|
||||
|
||||
# add this method to subclass to enable external search
|
||||
# @classmethod
|
||||
# async def search_task(
|
||||
# cls, query: str, page: int, category: str
|
||||
# ) -> list[ExternalSearchResultItem]:
|
||||
# return []
|
||||
|
||||
def scrape(self) -> ResourceContent:
|
||||
"""subclass should implement this, return ResourceContent object"""
|
||||
data = ResourceContent()
|
||||
|
@ -340,6 +348,17 @@ class SiteManager:
|
|||
def get_all_sites():
|
||||
return SiteManager.registry.values()
|
||||
|
||||
@staticmethod
|
||||
def get_sites_for_search():
|
||||
if settings.SEARCH_SITES == ["-"]:
|
||||
return []
|
||||
sites = [
|
||||
cls for cls in SiteManager.get_all_sites() if hasattr(cls, "search_task")
|
||||
]
|
||||
if settings.SEARCH_SITES == ["*"] or not settings.SEARCH_SITES:
|
||||
return sites
|
||||
return [s for s in sites if s.SITE_NAME.value in settings.SEARCH_SITES]
|
||||
|
||||
|
||||
def crawl_related_resources_task(resource_pk):
|
||||
resource = ExternalResource.objects.filter(pk=resource_pk).first()
|
||||
|
|
|
@ -1,16 +1,28 @@
|
|||
import time
|
||||
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.db.models import Count, F
|
||||
from tqdm import tqdm
|
||||
|
||||
from catalog.common.sites import SiteManager
|
||||
from catalog.models import Edition, Item, Podcast, TVSeason, TVShow
|
||||
from catalog.search.external import ExternalSources
|
||||
from common.models import detect_language, uniq
|
||||
from takahe.utils import Takahe
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "catalog app utilities"
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"--extsearch",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--category",
|
||||
default="all",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--verbose",
|
||||
action="store_true",
|
||||
|
@ -44,8 +56,26 @@ class Command(BaseCommand):
|
|||
self.integrity()
|
||||
if options["localize"]:
|
||||
self.localize()
|
||||
if options["extsearch"]:
|
||||
self.external_search(options["extsearch"], options["category"])
|
||||
self.stdout.write(self.style.SUCCESS("Done."))
|
||||
|
||||
def external_search(self, q, cat):
|
||||
sites = SiteManager.get_sites_for_search()
|
||||
peers = Takahe.get_neodb_peers()
|
||||
self.stdout.write(f"Searching {cat} '{q}' ...")
|
||||
self.stdout.write(f"Peers: {peers}")
|
||||
self.stdout.write(f"Sites: {sites}")
|
||||
start_time = time.time()
|
||||
results = ExternalSources.search(q, 1, cat)
|
||||
for r in results:
|
||||
self.stdout.write(f"{r}")
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(
|
||||
f"{time.time() - start_time} seconds, {len(results)} items."
|
||||
)
|
||||
)
|
||||
|
||||
def localize(self):
|
||||
c = Item.objects.all().count()
|
||||
qs = Item.objects.filter(is_deleted=False, merged_to_item__isnull=True)
|
||||
|
|
|
@ -38,7 +38,7 @@ from .tv.models import (
|
|||
TVShowSchema,
|
||||
)
|
||||
|
||||
from .search.models import Indexer # isort:skip
|
||||
from .search.models import Indexer, ExternalSearchResultItem # isort:skip
|
||||
|
||||
|
||||
# class Exhibition(Item):
|
||||
|
@ -103,6 +103,7 @@ __all__ = [
|
|||
"CatalogCollection",
|
||||
"AvailableItemCategory",
|
||||
"ExternalResource",
|
||||
"ExternalSearchResultItem",
|
||||
"IdType",
|
||||
"Item",
|
||||
"ItemCategory",
|
||||
|
|
|
@ -1,342 +1,26 @@
|
|||
import asyncio
|
||||
import logging
|
||||
from urllib.parse import quote_plus, urlparse
|
||||
|
||||
import httpx
|
||||
import requests
|
||||
from django.conf import settings
|
||||
from lxml import html
|
||||
|
||||
from catalog.common import BasicDownloader, ItemCategory, SiteManager, SiteName
|
||||
from catalog.common import SiteManager
|
||||
from catalog.search.models import ExternalSearchResultItem
|
||||
from catalog.sites.igdb import IGDB as IGDB_Site
|
||||
from catalog.sites.spotify import get_spotify_token
|
||||
from catalog.sites.tmdb import TMDB_DEFAULT_LANG
|
||||
from catalog.sites.fedi import FediverseInstance
|
||||
|
||||
SEARCH_PAGE_SIZE = 5 # not all apis support page size
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Goodreads:
|
||||
class ExternalSources:
|
||||
@classmethod
|
||||
def search(cls, q: str, page=1):
|
||||
results = []
|
||||
search_url = f"https://www.goodreads.com/search?page={page}&q={quote_plus(q)}"
|
||||
try:
|
||||
r = requests.get(
|
||||
search_url,
|
||||
timeout=3,
|
||||
headers={
|
||||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:107.0) Gecko/20100101 Firefox/107.0",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": BasicDownloader.get_accept_language(),
|
||||
"Accept-Encoding": "gzip, deflate",
|
||||
"Connection": "keep-alive",
|
||||
"DNT": "1",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Cache-Control": "no-cache",
|
||||
},
|
||||
)
|
||||
if r.url.startswith("https://www.goodreads.com/book/show/"):
|
||||
# Goodreads will 302 if only one result matches ISBN
|
||||
site = SiteManager.get_site_by_url(r.url)
|
||||
if site:
|
||||
res = site.get_resource_ready()
|
||||
if res:
|
||||
subtitle = f"{res.metadata.get('pub_year')} {', '.join(res.metadata.get('author', []))} {', '.join(res.metadata.get('translator', []))}"
|
||||
results.append(
|
||||
ExternalSearchResultItem(
|
||||
ItemCategory.Book,
|
||||
SiteName.Goodreads,
|
||||
res.url,
|
||||
res.metadata["title"],
|
||||
subtitle,
|
||||
res.metadata.get("brief", ""),
|
||||
res.metadata.get("cover_image_url", ""),
|
||||
)
|
||||
)
|
||||
else:
|
||||
h = html.fromstring(r.content.decode("utf-8"))
|
||||
books = h.xpath('//tr[@itemtype="http://schema.org/Book"]')
|
||||
for c in books: # type:ignore
|
||||
el_cover = c.xpath('.//img[@class="bookCover"]/@src')
|
||||
cover = el_cover[0] if el_cover else ""
|
||||
el_title = c.xpath('.//a[@class="bookTitle"]//text()')
|
||||
title = "".join(el_title).strip() if el_title else "Unkown Title"
|
||||
el_url = c.xpath('.//a[@class="bookTitle"]/@href')
|
||||
url = "https://www.goodreads.com" + el_url[0] if el_url else ""
|
||||
el_authors = c.xpath('.//a[@class="authorName"]//text()')
|
||||
subtitle = ", ".join(el_authors) if el_authors else ""
|
||||
results.append(
|
||||
ExternalSearchResultItem(
|
||||
ItemCategory.Book,
|
||||
SiteName.Goodreads,
|
||||
url,
|
||||
title,
|
||||
subtitle,
|
||||
"",
|
||||
cover,
|
||||
)
|
||||
)
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.warning(f"Search {search_url} error: {e}")
|
||||
except Exception as e:
|
||||
logger.error("Goodreads search error", extra={"query": q, "exception": e})
|
||||
return results
|
||||
|
||||
|
||||
class GoogleBooks:
|
||||
@classmethod
|
||||
def search(cls, q, page=1):
|
||||
results = []
|
||||
api_url = f"https://www.googleapis.com/books/v1/volumes?country=us&q={quote_plus(q)}&startIndex={SEARCH_PAGE_SIZE * (page - 1)}&maxResults={SEARCH_PAGE_SIZE}&maxAllowedMaturityRating=MATURE"
|
||||
try:
|
||||
j = requests.get(api_url, timeout=2).json()
|
||||
if "items" in j:
|
||||
for b in j["items"]:
|
||||
if "title" not in b["volumeInfo"]:
|
||||
continue
|
||||
title = b["volumeInfo"]["title"]
|
||||
subtitle = ""
|
||||
if "publishedDate" in b["volumeInfo"]:
|
||||
subtitle += b["volumeInfo"]["publishedDate"] + " "
|
||||
if "authors" in b["volumeInfo"]:
|
||||
subtitle += ", ".join(b["volumeInfo"]["authors"])
|
||||
if "description" in b["volumeInfo"]:
|
||||
brief = b["volumeInfo"]["description"]
|
||||
elif "textSnippet" in b["volumeInfo"]:
|
||||
brief = b["volumeInfo"]["textSnippet"]["searchInfo"]
|
||||
else:
|
||||
brief = ""
|
||||
category = ItemCategory.Book
|
||||
# b['volumeInfo']['infoLink'].replace('http:', 'https:')
|
||||
url = "https://books.google.com/books?id=" + b["id"]
|
||||
cover = (
|
||||
b["volumeInfo"]["imageLinks"]["thumbnail"]
|
||||
if "imageLinks" in b["volumeInfo"]
|
||||
else ""
|
||||
)
|
||||
results.append(
|
||||
ExternalSearchResultItem(
|
||||
category,
|
||||
SiteName.GoogleBooks,
|
||||
url,
|
||||
title,
|
||||
subtitle,
|
||||
brief,
|
||||
cover,
|
||||
)
|
||||
)
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.warning(f"Search {api_url} error: {e}")
|
||||
except Exception as e:
|
||||
logger.error("GoogleBooks search error", extra={"query": q, "exception": e})
|
||||
return results
|
||||
|
||||
|
||||
class TheMovieDatabase:
|
||||
@classmethod
|
||||
def search(cls, q, page=1):
|
||||
results = []
|
||||
api_url = f"https://api.themoviedb.org/3/search/multi?query={quote_plus(q)}&page={page}&api_key={settings.TMDB_API3_KEY}&language={TMDB_DEFAULT_LANG}&include_adult=true"
|
||||
try:
|
||||
j = requests.get(api_url, timeout=2).json()
|
||||
if j.get("results"):
|
||||
for m in j["results"]:
|
||||
if m["media_type"] in ["tv", "movie"]:
|
||||
url = f"https://www.themoviedb.org/{m['media_type']}/{m['id']}"
|
||||
if m["media_type"] == "tv":
|
||||
cat = ItemCategory.TV
|
||||
title = m["name"]
|
||||
subtitle = f"{m.get('first_air_date', '')} {m.get('original_name', '')}"
|
||||
else:
|
||||
cat = ItemCategory.Movie
|
||||
title = m["title"]
|
||||
subtitle = f"{m.get('release_date', '')} {m.get('original_name', '')}"
|
||||
cover = (
|
||||
f"https://image.tmdb.org/t/p/w500/{m.get('poster_path')}"
|
||||
if m.get("poster_path")
|
||||
else ""
|
||||
)
|
||||
results.append(
|
||||
ExternalSearchResultItem(
|
||||
cat,
|
||||
SiteName.TMDB,
|
||||
url,
|
||||
title,
|
||||
subtitle,
|
||||
m.get("overview"),
|
||||
cover,
|
||||
)
|
||||
)
|
||||
else:
|
||||
logger.warning(f"TMDB search '{q}' no results found.")
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.warning(f"Search {api_url} error: {e}")
|
||||
except Exception as e:
|
||||
logger.error("TMDb search error", extra={"query": q, "exception": e})
|
||||
return results
|
||||
|
||||
|
||||
class Spotify:
|
||||
@classmethod
|
||||
def search(cls, q, page=1):
|
||||
results = []
|
||||
api_url = f"https://api.spotify.com/v1/search?q={q}&type=album&limit={SEARCH_PAGE_SIZE}&offset={page * SEARCH_PAGE_SIZE}"
|
||||
try:
|
||||
headers = {"Authorization": f"Bearer {get_spotify_token()}"}
|
||||
j = requests.get(api_url, headers=headers, timeout=2).json()
|
||||
if j.get("albums"):
|
||||
for a in j["albums"]["items"]:
|
||||
title = a["name"]
|
||||
subtitle = a.get("release_date", "")
|
||||
for artist in a.get("artists", []):
|
||||
subtitle += " " + artist.get("name", "")
|
||||
url = a["external_urls"]["spotify"]
|
||||
cover = a["images"][0]["url"] if a.get("images") else ""
|
||||
results.append(
|
||||
ExternalSearchResultItem(
|
||||
ItemCategory.Music,
|
||||
SiteName.Spotify,
|
||||
url,
|
||||
title,
|
||||
subtitle,
|
||||
"",
|
||||
cover,
|
||||
)
|
||||
)
|
||||
else:
|
||||
logger.warning(f"Spotify search '{q}' no results found.")
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.warning(f"Search {api_url} error: {e}")
|
||||
except Exception as e:
|
||||
logger.error("Spotify search error", extra={"query": q, "exception": e})
|
||||
return results
|
||||
|
||||
|
||||
class Bandcamp:
|
||||
@classmethod
|
||||
def search(cls, q, page=1):
|
||||
results = []
|
||||
search_url = f"https://bandcamp.com/search?from=results&item_type=a&page={page}&q={quote_plus(q)}"
|
||||
try:
|
||||
r = requests.get(search_url, timeout=2)
|
||||
h = html.fromstring(r.content.decode("utf-8"))
|
||||
albums = h.xpath('//li[@class="searchresult data-search"]')
|
||||
for c in albums: # type:ignore
|
||||
el_cover = c.xpath('.//div[@class="art"]/img/@src')
|
||||
cover = el_cover[0] if el_cover else ""
|
||||
el_title = c.xpath('.//div[@class="heading"]//text()')
|
||||
title = "".join(el_title).strip() if el_title else "Unknown Title"
|
||||
el_url = c.xpath('..//div[@class="itemurl"]/a/@href')
|
||||
url = el_url[0] if el_url else ""
|
||||
el_authors = c.xpath('.//div[@class="subhead"]//text()')
|
||||
subtitle = ", ".join(el_authors) if el_authors else ""
|
||||
results.append(
|
||||
ExternalSearchResultItem(
|
||||
ItemCategory.Music,
|
||||
SiteName.Bandcamp,
|
||||
url,
|
||||
title,
|
||||
subtitle,
|
||||
"",
|
||||
cover,
|
||||
)
|
||||
)
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.warning(f"Search {search_url} error: {e}")
|
||||
except Exception as e:
|
||||
logger.error("Bandcamp search error", extra={"query": q, "exception": e})
|
||||
return results
|
||||
|
||||
|
||||
class ApplePodcast:
|
||||
@classmethod
|
||||
def search(cls, q, page=1):
|
||||
results = []
|
||||
search_url = f"https://itunes.apple.com/search?entity=podcast&limit={page * SEARCH_PAGE_SIZE}&term={quote_plus(q)}"
|
||||
try:
|
||||
r = requests.get(search_url, timeout=2).json()
|
||||
for p in r["results"][(page - 1) * SEARCH_PAGE_SIZE :]:
|
||||
if p.get("feedUrl"):
|
||||
results.append(
|
||||
ExternalSearchResultItem(
|
||||
ItemCategory.Podcast,
|
||||
SiteName.RSS,
|
||||
p["feedUrl"],
|
||||
p["trackName"],
|
||||
p["artistName"],
|
||||
"",
|
||||
p["artworkUrl600"],
|
||||
)
|
||||
)
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.warning(f"Search {search_url} error: {e}")
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"ApplePodcast search error", extra={"query": q, "exception": e}
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
class IGDB:
|
||||
@classmethod
|
||||
def search(cls, q, page=1):
|
||||
return IGDB_Site.search(
|
||||
q, limit=SEARCH_PAGE_SIZE, offset=page * SEARCH_PAGE_SIZE
|
||||
)
|
||||
|
||||
|
||||
class Fediverse:
|
||||
@staticmethod
|
||||
async def search_task(host, q, category=None):
|
||||
api_url = f"https://{host}/api/catalog/search?query={quote_plus(q)}{'&category=' + category if category else ''}"
|
||||
async with httpx.AsyncClient() as client:
|
||||
results = []
|
||||
try:
|
||||
response = await client.get(
|
||||
api_url,
|
||||
timeout=2,
|
||||
)
|
||||
r = response.json()
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Fediverse search {host} error",
|
||||
extra={"url": api_url, "query": q, "exception": e},
|
||||
)
|
||||
return []
|
||||
if "data" in r:
|
||||
for item in r["data"]:
|
||||
if any(
|
||||
urlparse(res["url"]).hostname in settings.SITE_DOMAINS
|
||||
for res in item.get("external_resources", [])
|
||||
):
|
||||
continue
|
||||
url = f"https://{host}{item['url']}" # FIXME update API and use abs urls
|
||||
try:
|
||||
cat = ItemCategory(item["category"])
|
||||
except Exception:
|
||||
cat = None
|
||||
results.append(
|
||||
ExternalSearchResultItem(
|
||||
cat,
|
||||
host,
|
||||
url,
|
||||
item["display_title"],
|
||||
"",
|
||||
item["brief"],
|
||||
item["cover_image_url"],
|
||||
)
|
||||
)
|
||||
return results
|
||||
|
||||
@classmethod
|
||||
def search(cls, q: str, page: int = 1, category: str | None = None):
|
||||
from takahe.utils import Takahe
|
||||
|
||||
peers = Takahe.get_neodb_peers()
|
||||
c = category if category != "movietv" else "movie,tv"
|
||||
tasks = [Fediverse.search_task(host, q, c) for host in peers]
|
||||
def search(
|
||||
cls, query: str, page: int = 1, category: str | None = None
|
||||
) -> list[ExternalSearchResultItem]:
|
||||
if not query or page < 1 or page > 10:
|
||||
return []
|
||||
if category in ["", None]:
|
||||
category = "all"
|
||||
tasks = FediverseInstance.search_tasks(query, page, category)
|
||||
for site in SiteManager.get_sites_for_search():
|
||||
tasks.append(site.search_task(query, page, category))
|
||||
# loop = asyncio.get_event_loop()
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
@ -344,29 +28,3 @@ class Fediverse:
|
|||
for r in loop.run_until_complete(asyncio.gather(*tasks)):
|
||||
results.extend(r)
|
||||
return results
|
||||
|
||||
|
||||
class ExternalSources:
|
||||
@classmethod
|
||||
def search(cls, c, q, page=1):
|
||||
if not q:
|
||||
return []
|
||||
results = []
|
||||
results.extend(
|
||||
Fediverse.search(q, page, category=c if c and c != "all" else None)
|
||||
)
|
||||
if c == "" or c is None:
|
||||
c = "all"
|
||||
if c == "all" or c == "movietv":
|
||||
results.extend(TheMovieDatabase.search(q, page))
|
||||
if c == "all" or c == "book":
|
||||
results.extend(GoogleBooks.search(q, page))
|
||||
results.extend(Goodreads.search(q, page))
|
||||
if c == "all" or c == "game":
|
||||
results.extend(IGDB.search(q, page))
|
||||
if c == "all" or c == "music":
|
||||
results.extend(Spotify.search(q, page))
|
||||
results.extend(Bandcamp.search(q, page))
|
||||
if c == "podcast":
|
||||
results.extend(ApplePodcast.search(q, page))
|
||||
return results
|
||||
|
|
|
@ -79,7 +79,7 @@ class ExternalSearchResultItem:
|
|||
self.cover_image_url = cover_url
|
||||
|
||||
def __repr__(self):
|
||||
return f"[{self.category}] {self.display_title} {self.url}"
|
||||
return f"[{self.category}] {self.display_title} {self.source_url}"
|
||||
|
||||
@property
|
||||
def verbose_category_name(self):
|
||||
|
|
|
@ -159,7 +159,7 @@ def external_search(request):
|
|||
category = None
|
||||
keywords = request.GET.get("q", default="").strip()
|
||||
page_number = int_(request.GET.get("page"), 1)
|
||||
items = ExternalSources.search(category, keywords, page_number) if keywords else []
|
||||
items = ExternalSources.search(keywords, page_number, category) if keywords else []
|
||||
cache_key = f"search_{category if category != 'movietv' else 'movie,tv'}_{keywords}"
|
||||
dedupe_urls = cache.get(cache_key, [])
|
||||
items = [i for i in items if i.source_url not in dedupe_urls]
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
from ..common.sites import SiteManager
|
||||
from .ao3 import ArchiveOfOurOwn
|
||||
from .apple_music import AppleMusic
|
||||
from .apple_podcast import ApplePodcast
|
||||
from .bandcamp import Bandcamp
|
||||
from .bangumi import Bangumi
|
||||
from .bgg import BoardGameGeek
|
||||
|
@ -24,12 +25,11 @@ from .steam import Steam
|
|||
from .tmdb import TMDB_Movie
|
||||
from .ypshuo import Ypshuo
|
||||
|
||||
# from .apple_podcast import ApplePodcast
|
||||
|
||||
__all__ = [
|
||||
"SiteManager",
|
||||
"ArchiveOfOurOwn",
|
||||
"AppleMusic",
|
||||
"ApplePodcast",
|
||||
"Bandcamp",
|
||||
"Bangumi",
|
||||
"BoardGameGeek",
|
||||
|
|
|
@ -1,16 +1,17 @@
|
|||
import logging
|
||||
from urllib.parse import quote_plus
|
||||
|
||||
import httpx
|
||||
from loguru import logger
|
||||
|
||||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
|
||||
from .rss import RSS
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@SiteManager.register
|
||||
class ApplePodcast(AbstractSite):
|
||||
# SITE_NAME = SiteName.ApplePodcast
|
||||
SITE_NAME = SiteName.ApplePodcast
|
||||
ID_TYPE = IdType.ApplePodcast
|
||||
URL_PATTERNS = [r"https://[^.]+.apple.com/\w+/podcast/*[^/?]*/id(\d+)"]
|
||||
WIKI_PROPERTY_ID = "P5842"
|
||||
|
@ -38,3 +39,35 @@ class ApplePodcast(AbstractSite):
|
|||
)
|
||||
pd.lookup_ids[IdType.RSS] = RSS.url_to_id(feed_url)
|
||||
return pd
|
||||
|
||||
@classmethod
|
||||
async def search_task(
|
||||
cls, q: str, page: int, category: str
|
||||
) -> list[ExternalSearchResultItem]:
|
||||
if category != "podcast":
|
||||
return []
|
||||
SEARCH_PAGE_SIZE = 5 if category == "all" else 10
|
||||
results = []
|
||||
search_url = f"https://itunes.apple.com/search?entity=podcast&limit={page * SEARCH_PAGE_SIZE}&term={quote_plus(q)}"
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
response = await client.get(search_url, timeout=2)
|
||||
r = response.json()
|
||||
for p in r["results"][(page - 1) * SEARCH_PAGE_SIZE :]:
|
||||
if p.get("feedUrl"):
|
||||
results.append(
|
||||
ExternalSearchResultItem(
|
||||
ItemCategory.Podcast,
|
||||
SiteName.RSS,
|
||||
p["feedUrl"],
|
||||
p["trackName"],
|
||||
p["artistName"],
|
||||
"",
|
||||
p["artworkUrl600"],
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"ApplePodcast search error", extra={"query": q, "exception": e}
|
||||
)
|
||||
return results
|
||||
|
|
|
@ -5,6 +5,9 @@ import urllib.parse
|
|||
|
||||
import dateparser
|
||||
import dns.resolver
|
||||
import httpx
|
||||
from loguru import logger
|
||||
from lxml import html
|
||||
|
||||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
|
@ -103,3 +106,45 @@ class Bandcamp(AbstractSite):
|
|||
}
|
||||
pd = ResourceContent(metadata=data)
|
||||
return pd
|
||||
|
||||
@classmethod
|
||||
async def search_task(
|
||||
cls, q: str, page: int, category: str
|
||||
) -> list[ExternalSearchResultItem]:
|
||||
if category != "music":
|
||||
return []
|
||||
SEARCH_PAGE_SIZE = 5
|
||||
p = (page - 1) * SEARCH_PAGE_SIZE // 18 + 1
|
||||
offset = (page - 1) * SEARCH_PAGE_SIZE % 18
|
||||
results = []
|
||||
search_url = f"https://bandcamp.com/search?from=results&item_type=a&page={p}&q={urllib.parse.quote_plus(q)}"
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
r = await client.get(search_url, timeout=2)
|
||||
h = html.fromstring(r.content.decode("utf-8"))
|
||||
albums = h.xpath('//li[@class="searchresult data-search"]')
|
||||
for c in albums: # type:ignore
|
||||
el_cover = c.xpath('.//div[@class="art"]/img/@src')
|
||||
cover = el_cover[0] if el_cover else ""
|
||||
el_title = c.xpath('.//div[@class="heading"]//text()')
|
||||
title = "".join(el_title).strip() if el_title else "Unknown Title"
|
||||
el_url = c.xpath('..//div[@class="itemurl"]/a/@href')
|
||||
url = el_url[0] if el_url else ""
|
||||
el_authors = c.xpath('.//div[@class="subhead"]//text()')
|
||||
subtitle = ", ".join(el_authors) if el_authors else ""
|
||||
results.append(
|
||||
ExternalSearchResultItem(
|
||||
ItemCategory.Music,
|
||||
SiteName.Bandcamp,
|
||||
url,
|
||||
title,
|
||||
subtitle,
|
||||
"",
|
||||
cover,
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Bandcamp search error", extra={"query": q, "exception": e}
|
||||
)
|
||||
return results[offset : offset + SEARCH_PAGE_SIZE]
|
||||
|
|
|
@ -1,9 +1,33 @@
|
|||
from urllib.parse import quote_plus, urlparse
|
||||
|
||||
import httpx
|
||||
from django.conf import settings
|
||||
from django.core.validators import URLValidator
|
||||
from loguru import logger
|
||||
|
||||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
from catalog.common import (
|
||||
AbstractSite,
|
||||
BasicImageDownloader,
|
||||
CachedDownloader,
|
||||
IdType,
|
||||
ItemCategory,
|
||||
ResourceContent,
|
||||
SiteManager,
|
||||
SiteName,
|
||||
)
|
||||
from catalog.models import (
|
||||
Album,
|
||||
Edition,
|
||||
ExternalSearchResultItem,
|
||||
Game,
|
||||
Movie,
|
||||
Performance,
|
||||
PerformanceProduction,
|
||||
Podcast,
|
||||
TVEpisode,
|
||||
TVSeason,
|
||||
TVShow,
|
||||
)
|
||||
|
||||
|
||||
@SiteManager.register
|
||||
|
@ -99,3 +123,56 @@ class FediverseInstance(AbstractSite):
|
|||
lookup_ids=ids,
|
||||
)
|
||||
return d
|
||||
|
||||
@classmethod
|
||||
async def peer_search_task(cls, host, q, page, category=None):
|
||||
SEARCH_PAGE_SIZE = 5
|
||||
p = (page - 1) * SEARCH_PAGE_SIZE // 20 + 1
|
||||
offset = (page - 1) * SEARCH_PAGE_SIZE % 20
|
||||
api_url = f"https://{host}/api/catalog/search?query={quote_plus(q)}&page={p}{'&category=' + category if category and category != 'all' else ''}"
|
||||
async with httpx.AsyncClient() as client:
|
||||
results = []
|
||||
try:
|
||||
response = await client.get(
|
||||
api_url,
|
||||
timeout=2,
|
||||
)
|
||||
r = response.json()
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Fediverse search {host} error",
|
||||
extra={"url": api_url, "query": q, "exception": e},
|
||||
)
|
||||
return []
|
||||
if "data" in r:
|
||||
for item in r["data"]:
|
||||
if any(
|
||||
urlparse(res["url"]).hostname in settings.SITE_DOMAINS
|
||||
for res in item.get("external_resources", [])
|
||||
):
|
||||
continue
|
||||
url = f"https://{host}{item['url']}" # FIXME update API and use abs urls
|
||||
try:
|
||||
cat = ItemCategory(item["category"])
|
||||
except Exception:
|
||||
cat = None
|
||||
results.append(
|
||||
ExternalSearchResultItem(
|
||||
cat,
|
||||
host,
|
||||
url,
|
||||
item["display_title"],
|
||||
"",
|
||||
item["brief"],
|
||||
item["cover_image_url"],
|
||||
)
|
||||
)
|
||||
return results[offset : offset + SEARCH_PAGE_SIZE]
|
||||
|
||||
@classmethod
|
||||
def search_tasks(cls, q: str, page: int = 1, category: str | None = None):
|
||||
from takahe.utils import Takahe
|
||||
|
||||
peers = Takahe.get_neodb_peers()
|
||||
c = category if category != "movietv" else "movie,tv"
|
||||
return [cls.peer_search_task(host, q, page, c) for host in peers]
|
||||
|
|
|
@ -1,18 +1,18 @@
|
|||
import json
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from urllib.parse import quote_plus
|
||||
|
||||
import httpx
|
||||
from django.utils.timezone import make_aware
|
||||
from loguru import logger
|
||||
from lxml import html
|
||||
|
||||
from catalog.book.models import Edition, Work
|
||||
from catalog.book.utils import binding_to_format, detect_isbn_asin
|
||||
from catalog.common import *
|
||||
from common.models.lang import detect_language
|
||||
from catalog.models import Edition, ExternalSearchResultItem, Work
|
||||
from common.models import detect_language
|
||||
from journal.models.renderers import html_to_text
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GoodreadsDownloader(RetryDownloader):
|
||||
def validate_response(self, response):
|
||||
|
@ -121,6 +121,82 @@ class Goodreads(AbstractSite):
|
|||
pd.lookup_ids[IdType.ASIN] = ids.get(IdType.ASIN)
|
||||
return pd
|
||||
|
||||
@classmethod
|
||||
async def search_task(
|
||||
cls, q: str, page: int, category: str
|
||||
) -> list[ExternalSearchResultItem]:
|
||||
if category not in ["all", "book"]:
|
||||
return []
|
||||
SEARCH_PAGE_SIZE = 5
|
||||
p = (page - 1) * SEARCH_PAGE_SIZE // 20 + 1
|
||||
offset = (page - 1) * SEARCH_PAGE_SIZE % 20
|
||||
results = []
|
||||
search_url = f"https://www.goodreads.com/search?page={p}&q={quote_plus(q)}"
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
r = await client.get(
|
||||
search_url,
|
||||
timeout=3,
|
||||
headers={
|
||||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:107.0) Gecko/20100101 Firefox/107.0",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": BasicDownloader.get_accept_language(),
|
||||
"Accept-Encoding": "gzip, deflate",
|
||||
"Connection": "keep-alive",
|
||||
"DNT": "1",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Cache-Control": "no-cache",
|
||||
},
|
||||
)
|
||||
if r.url.path.startswith("/book/show/"):
|
||||
# Goodreads will 302 if only one result matches ISBN
|
||||
site = SiteManager.get_site_by_url(str(r.url))
|
||||
if site:
|
||||
res = site.get_resource_ready()
|
||||
if res:
|
||||
subtitle = f"{res.metadata.get('pub_year')} {', '.join(res.metadata.get('author', []))} {', '.join(res.metadata.get('translator', []))}"
|
||||
results.append(
|
||||
ExternalSearchResultItem(
|
||||
ItemCategory.Book,
|
||||
SiteName.Goodreads,
|
||||
res.url,
|
||||
res.metadata["title"],
|
||||
subtitle,
|
||||
res.metadata.get("brief", ""),
|
||||
res.metadata.get("cover_image_url", ""),
|
||||
)
|
||||
)
|
||||
else:
|
||||
h = html.fromstring(r.content.decode("utf-8"))
|
||||
books = h.xpath('//tr[@itemtype="http://schema.org/Book"]')
|
||||
for c in books: # type:ignore
|
||||
el_cover = c.xpath('.//img[@class="bookCover"]/@src')
|
||||
cover = el_cover[0] if el_cover else ""
|
||||
el_title = c.xpath('.//a[@class="bookTitle"]//text()')
|
||||
title = (
|
||||
"".join(el_title).strip() if el_title else "Unkown Title"
|
||||
)
|
||||
el_url = c.xpath('.//a[@class="bookTitle"]/@href')
|
||||
url = "https://www.goodreads.com" + el_url[0] if el_url else ""
|
||||
el_authors = c.xpath('.//a[@class="authorName"]//text()')
|
||||
subtitle = ", ".join(el_authors) if el_authors else ""
|
||||
results.append(
|
||||
ExternalSearchResultItem(
|
||||
ItemCategory.Book,
|
||||
SiteName.Goodreads,
|
||||
url,
|
||||
title,
|
||||
subtitle,
|
||||
"",
|
||||
cover,
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Goodreads search error", extra={"query": q, "exception": e}
|
||||
)
|
||||
return results[offset : offset + SEARCH_PAGE_SIZE]
|
||||
|
||||
|
||||
@SiteManager.register
|
||||
class Goodreads_Work(AbstractSite):
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
import logging
|
||||
import re
|
||||
from urllib.parse import quote_plus
|
||||
|
||||
import httpx
|
||||
from django.conf import settings
|
||||
from loguru import logger
|
||||
|
||||
from catalog.book.utils import isbn_10_to_13
|
||||
from catalog.common import *
|
||||
|
@ -116,3 +119,57 @@ class GoogleBooks(AbstractSite):
|
|||
cover_image_extention=ext,
|
||||
lookup_ids={IdType.ISBN: isbn13},
|
||||
)
|
||||
|
||||
@classmethod
|
||||
async def search_task(
|
||||
cls, q: str, page: int, category: str
|
||||
) -> list[ExternalSearchResultItem]:
|
||||
if category not in ["all", "book"]:
|
||||
return []
|
||||
SEARCH_PAGE_SIZE = 5
|
||||
results = []
|
||||
api_url = f"https://www.googleapis.com/books/v1/volumes?country=us&q={quote_plus(q)}&startIndex={SEARCH_PAGE_SIZE * (page - 1)}&maxResults={SEARCH_PAGE_SIZE}&maxAllowedMaturityRating=MATURE"
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
response = await client.get(api_url, timeout=2)
|
||||
j = response.json()
|
||||
if "items" in j:
|
||||
for b in j["items"]:
|
||||
if "title" not in b["volumeInfo"]:
|
||||
continue
|
||||
title = b["volumeInfo"]["title"]
|
||||
subtitle = ""
|
||||
if "publishedDate" in b["volumeInfo"]:
|
||||
subtitle += b["volumeInfo"]["publishedDate"] + " "
|
||||
if "authors" in b["volumeInfo"]:
|
||||
subtitle += ", ".join(b["volumeInfo"]["authors"])
|
||||
if "description" in b["volumeInfo"]:
|
||||
brief = b["volumeInfo"]["description"]
|
||||
elif "textSnippet" in b["volumeInfo"]:
|
||||
brief = b["volumeInfo"]["textSnippet"]["searchInfo"]
|
||||
else:
|
||||
brief = ""
|
||||
category = ItemCategory.Book
|
||||
# b['volumeInfo']['infoLink'].replace('http:', 'https:')
|
||||
url = "https://books.google.com/books?id=" + b["id"]
|
||||
cover = (
|
||||
b["volumeInfo"]["imageLinks"]["thumbnail"]
|
||||
if "imageLinks" in b["volumeInfo"]
|
||||
else ""
|
||||
)
|
||||
results.append(
|
||||
ExternalSearchResultItem(
|
||||
category,
|
||||
SiteName.GoogleBooks,
|
||||
url,
|
||||
title,
|
||||
subtitle,
|
||||
brief,
|
||||
cover,
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"GoogleBooks search error", extra={"query": q, "exception": e}
|
||||
)
|
||||
return results
|
||||
|
|
|
@ -8,6 +8,7 @@ import datetime
|
|||
import json
|
||||
from urllib.parse import quote_plus
|
||||
|
||||
import httpx
|
||||
import requests
|
||||
from django.conf import settings
|
||||
from django.core.cache import cache
|
||||
|
@ -83,44 +84,6 @@ class IGDB(AbstractSite):
|
|||
fp.write(json.dumps(r))
|
||||
return r
|
||||
|
||||
@classmethod
|
||||
def search(cls, q, limit: int, offset: int = 0):
|
||||
rs = cls.api_query(
|
||||
"games",
|
||||
f'fields *, cover.url, genres.name, platforms.name, involved_companies.*, involved_companies.company.name; search "{quote_plus(q)}"; limit {limit}; offset {offset};',
|
||||
)
|
||||
result = []
|
||||
for r in rs:
|
||||
subtitle = ""
|
||||
if "first_release_date" in r:
|
||||
subtitle = datetime.datetime.fromtimestamp(
|
||||
r["first_release_date"], datetime.timezone.utc
|
||||
).strftime("%Y-%m-%d ")
|
||||
if "platforms" in r:
|
||||
ps = sorted(r["platforms"], key=lambda p: p["id"])
|
||||
subtitle += ",".join(
|
||||
[(p["name"] if p["id"] != 6 else "Windows") for p in ps]
|
||||
)
|
||||
brief = r["summary"] if "summary" in r else ""
|
||||
brief += "\n\n" + r["storyline"] if "storyline" in r else ""
|
||||
cover = (
|
||||
"https:" + r["cover"]["url"].replace("t_thumb", "t_cover_big")
|
||||
if r.get("cover")
|
||||
else ""
|
||||
)
|
||||
result.append(
|
||||
ExternalSearchResultItem(
|
||||
ItemCategory.Game,
|
||||
SiteName.IGDB,
|
||||
r["url"],
|
||||
r["name"],
|
||||
subtitle,
|
||||
brief,
|
||||
cover,
|
||||
)
|
||||
)
|
||||
return result
|
||||
|
||||
def scrape(self):
|
||||
fields = "*, cover.url, genres.name, platforms.name, involved_companies.*, involved_companies.company.name"
|
||||
r = self.api_query("games", f'fields {fields}; where url = "{self.url}";')
|
||||
|
@ -200,3 +163,55 @@ class IGDB(AbstractSite):
|
|||
IdType.Steam
|
||||
).url_to_id(steam_url)
|
||||
return pd
|
||||
|
||||
@classmethod
|
||||
async def search_task(
|
||||
cls, q: str, page: int, category: str
|
||||
) -> list[ExternalSearchResultItem]:
|
||||
if category != "game":
|
||||
return []
|
||||
SEARCH_PAGE_SIZE = 5 if category == "all" else 10
|
||||
limit = SEARCH_PAGE_SIZE
|
||||
offset = (page - 1) * limit
|
||||
q = f'fields *, cover.url, genres.name, platforms.name, involved_companies.*, involved_companies.company.name; search "{quote_plus(q)}"; limit {limit}; offset {offset};'
|
||||
_wrapper = IGDBWrapper(settings.IGDB_CLIENT_ID, _igdb_access_token())
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
url = IGDBWrapper._build_url("games")
|
||||
params = _wrapper._compose_request(q)
|
||||
response = await client.post(url, **params)
|
||||
rs = json.loads(response.content)
|
||||
except requests.HTTPError as e:
|
||||
logger.error(f"IGDB API: {e}", extra={"exception": e})
|
||||
rs = []
|
||||
result = []
|
||||
for r in rs:
|
||||
subtitle = ""
|
||||
if "first_release_date" in r:
|
||||
subtitle = datetime.datetime.fromtimestamp(
|
||||
r["first_release_date"], datetime.timezone.utc
|
||||
).strftime("%Y-%m-%d ")
|
||||
if "platforms" in r:
|
||||
ps = sorted(r["platforms"], key=lambda p: p["id"])
|
||||
subtitle += ",".join(
|
||||
[(p["name"] if p["id"] != 6 else "Windows") for p in ps]
|
||||
)
|
||||
brief = r["summary"] if "summary" in r else ""
|
||||
brief += "\n\n" + r["storyline"] if "storyline" in r else ""
|
||||
cover = (
|
||||
"https:" + r["cover"]["url"].replace("t_thumb", "t_cover_big")
|
||||
if r.get("cover")
|
||||
else ""
|
||||
)
|
||||
result.append(
|
||||
ExternalSearchResultItem(
|
||||
ItemCategory.Game,
|
||||
SiteName.IGDB,
|
||||
r["url"],
|
||||
r["name"],
|
||||
subtitle,
|
||||
brief,
|
||||
cover,
|
||||
)
|
||||
)
|
||||
return result
|
||||
|
|
|
@ -6,8 +6,10 @@ import logging
|
|||
import time
|
||||
|
||||
import dateparser
|
||||
import httpx
|
||||
import requests
|
||||
from django.conf import settings
|
||||
from loguru import logger
|
||||
|
||||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
|
@ -107,6 +109,45 @@ class Spotify(AbstractSite):
|
|||
pd.lookup_ids[IdType.ISRC] = isrc
|
||||
return pd
|
||||
|
||||
@classmethod
|
||||
async def search_task(
|
||||
cls, q: str, page: int, category: str
|
||||
) -> list[ExternalSearchResultItem]:
|
||||
if category not in ["music", "all"]:
|
||||
return []
|
||||
SEARCH_PAGE_SIZE = 5
|
||||
results = []
|
||||
api_url = f"https://api.spotify.com/v1/search?q={q}&type=album&limit={SEARCH_PAGE_SIZE}&offset={page * SEARCH_PAGE_SIZE}"
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
headers = {"Authorization": f"Bearer {get_spotify_token()}"}
|
||||
response = await client.get(api_url, headers=headers, timeout=2)
|
||||
j = response.json()
|
||||
if j.get("albums"):
|
||||
for a in j["albums"]["items"]:
|
||||
title = a["name"]
|
||||
subtitle = a.get("release_date", "")
|
||||
for artist in a.get("artists", []):
|
||||
subtitle += " " + artist.get("name", "")
|
||||
url = a["external_urls"]["spotify"]
|
||||
cover = a["images"][0]["url"] if a.get("images") else ""
|
||||
results.append(
|
||||
ExternalSearchResultItem(
|
||||
ItemCategory.Music,
|
||||
SiteName.Spotify,
|
||||
url,
|
||||
title,
|
||||
subtitle,
|
||||
"",
|
||||
cover,
|
||||
)
|
||||
)
|
||||
else:
|
||||
logger.warning(f"Spotify search '{q}' no results found.")
|
||||
except Exception as e:
|
||||
logger.error("Spotify search error", extra={"query": q, "exception": e})
|
||||
return results
|
||||
|
||||
|
||||
def get_spotify_token():
|
||||
global spotify_token, spotify_token_expire_time
|
||||
|
|
|
@ -12,8 +12,11 @@ these language code from TMDB are not in currently iso-639-1
|
|||
|
||||
import logging
|
||||
import re
|
||||
from urllib.parse import quote_plus
|
||||
|
||||
import httpx
|
||||
from django.conf import settings
|
||||
from loguru import logger
|
||||
|
||||
from catalog.common import *
|
||||
from catalog.movie.models import *
|
||||
|
@ -175,6 +178,55 @@ class TMDB_Movie(AbstractSite):
|
|||
pd.lookup_ids[IdType.IMDB] = imdb_code
|
||||
return pd
|
||||
|
||||
@classmethod
|
||||
async def search_task(
|
||||
cls, q: str, page: int, category: str
|
||||
) -> list[ExternalSearchResultItem]:
|
||||
if category not in ["movietv", "all", "movie", "tv"]:
|
||||
return []
|
||||
SEARCH_PAGE_SIZE = 5 if category == "all" else 10
|
||||
p = (page - 1) * SEARCH_PAGE_SIZE // 20 + 1
|
||||
offset = (page - 1) * SEARCH_PAGE_SIZE % 20
|
||||
results = []
|
||||
api_url = f"https://api.themoviedb.org/3/search/multi?query={quote_plus(q)}&page={p}&api_key={settings.TMDB_API3_KEY}&language={TMDB_DEFAULT_LANG}&include_adult=true"
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
response = await client.get(api_url, timeout=2)
|
||||
j = response.json()
|
||||
if j.get("results"):
|
||||
for m in j["results"]:
|
||||
if m["media_type"] in ["tv", "movie"]:
|
||||
url = f"https://www.themoviedb.org/{m['media_type']}/{m['id']}"
|
||||
if m["media_type"] == "tv":
|
||||
cat = ItemCategory.TV
|
||||
title = m["name"]
|
||||
subtitle = f"{m.get('first_air_date', '')} {m.get('original_name', '')}"
|
||||
else:
|
||||
cat = ItemCategory.Movie
|
||||
title = m["title"]
|
||||
subtitle = f"{m.get('release_date', '')} {m.get('original_name', '')}"
|
||||
cover = (
|
||||
f"https://image.tmdb.org/t/p/w500/{m.get('poster_path')}"
|
||||
if m.get("poster_path")
|
||||
else ""
|
||||
)
|
||||
results.append(
|
||||
ExternalSearchResultItem(
|
||||
cat,
|
||||
SiteName.TMDB,
|
||||
url,
|
||||
title,
|
||||
subtitle,
|
||||
m.get("overview"),
|
||||
cover,
|
||||
)
|
||||
)
|
||||
else:
|
||||
logger.warning(f"TMDB search '{q}' no results found.")
|
||||
except Exception as e:
|
||||
logger.error("TMDb search error", extra={"query": q, "exception": e})
|
||||
return results[offset : offset + SEARCH_PAGE_SIZE]
|
||||
|
||||
|
||||
@SiteManager.register
|
||||
class TMDB_TV(AbstractSite):
|
||||
|
|
|
@ -33,6 +33,7 @@ x-shared:
|
|||
NEODB_DISABLE_DEFAULT_RELAY:
|
||||
NEODB_DISABLE_CRON_JOBS:
|
||||
NEODB_SEARCH_PEERS:
|
||||
NEODB_SEARCH_SITES:
|
||||
NEODB_MIN_MARKS_FOR_DISCOVER:
|
||||
NEODB_DISCOVER_UPDATE_INTERVAL:
|
||||
NEODB_DISCOVER_FILTER_LANGUAGE:
|
||||
|
|
|
@ -57,6 +57,7 @@ if you are doing debug or development:
|
|||
- `GOOGLE_API_KEY` - API key for [Google Books](https://developers.google.com/books/docs/v1/using)
|
||||
- `DISCOGS_API_KEY` - personal access token from [Discogs](https://www.discogs.com/settings/developers)
|
||||
- `IGDB_API_CLIENT_ID`, `IGDB_API_CLIENT_SECRET` - IGDB [keys](https://api-docs.igdb.com/)
|
||||
- `NEODB_SEARCH_SITES` is empty by default, which means NeoDB will search all available sources. This can be set to a comma-separated list of site names (e.g. `goodreads,googlebooks,spotify,tmdb,igdb,bandcamp,apple_podcast`), so that NeoDB will only search those sites; or not search any of them if set to just `-`.
|
||||
|
||||
|
||||
## Other maintenance tasks
|
||||
|
|
|
@ -169,7 +169,7 @@ mkdocs==1.6.1
|
|||
# via mkdocs-material
|
||||
mkdocs-get-deps==0.2.0
|
||||
# via mkdocs
|
||||
mkdocs-material==9.5.49
|
||||
mkdocs-material==9.5.50
|
||||
mkdocs-material-extensions==1.3.1
|
||||
# via mkdocs-material
|
||||
multidict==6.1.0
|
||||
|
@ -213,7 +213,7 @@ pygments==2.19.1
|
|||
# via mkdocs-material
|
||||
pymdown-extensions==10.14
|
||||
# via mkdocs-material
|
||||
pyright==1.1.391
|
||||
pyright==1.1.392.post0
|
||||
python-dateutil==2.9.0.post0
|
||||
# via dateparser
|
||||
# via django-auditlog
|
||||
|
@ -251,7 +251,7 @@ rjsmin==1.2.2
|
|||
# via django-compressor
|
||||
rq==2.1.0
|
||||
# via django-rq
|
||||
ruff==0.9.1
|
||||
ruff==0.9.2
|
||||
sentry-sdk==2.20.0
|
||||
setproctitle==1.3.4
|
||||
six==1.17.0
|
||||
|
@ -292,7 +292,7 @@ urllib3==2.3.0
|
|||
# via sentry-sdk
|
||||
urlman==2.0.2
|
||||
validators==0.34.0
|
||||
virtualenv==20.28.1
|
||||
virtualenv==20.29.1
|
||||
# via pre-commit
|
||||
watchdog==6.0.0
|
||||
# via mkdocs
|
||||
|
|
Loading…
Add table
Reference in a new issue