lib.itmens/catalog/sites/apple_podcast.py

75 lines
2.5 KiB
Python
Raw Normal View History

from urllib.parse import quote_plus
import httpx
from loguru import logger
from catalog.common import *
2022-12-08 16:59:03 +00:00
from catalog.models import *
2023-01-29 20:05:30 -05:00
from .rss import RSS
2022-12-15 17:29:35 -05:00
@SiteManager.register
class ApplePodcast(AbstractSite):
SITE_NAME = SiteName.ApplePodcast
ID_TYPE = IdType.ApplePodcast
URL_PATTERNS = [r"https://[^.]+.apple.com/\w+/podcast/*[^/?]*/id(\d+)"]
2022-12-29 23:57:02 -05:00
WIKI_PROPERTY_ID = "P5842"
DEFAULT_MODEL = Podcast
@classmethod
2023-01-29 20:05:30 -05:00
def id_to_url(cls, id_value):
return "https://podcasts.apple.com/us/podcast/id" + id_value
def scrape(self):
2022-12-29 23:57:02 -05:00
api_url = f"https://itunes.apple.com/lookup?id={self.id_value}"
dl = BasicDownloader(api_url)
resp = dl.download()
2022-12-29 23:57:02 -05:00
r = resp.json()["results"][0]
2023-12-31 08:32:19 -05:00
feed_url = r["feedUrl"]
2024-07-13 00:16:47 -04:00
title = r["trackName"]
2022-12-29 23:57:02 -05:00
pd = ResourceContent(
metadata={
2024-07-13 00:16:47 -04:00
"title": title,
2023-12-31 08:32:19 -05:00
"feed_url": feed_url,
2024-07-13 00:16:47 -04:00
"host": [r["artistName"]],
2022-12-29 23:57:02 -05:00
"genres": r["genres"],
"cover_image_url": r["artworkUrl600"],
}
)
2023-12-31 08:32:19 -05:00
pd.lookup_ids[IdType.RSS] = RSS.url_to_id(feed_url)
return pd
@classmethod
async def search_task(
cls, q: str, page: int, category: str, page_size: int
) -> list[ExternalSearchResultItem]:
if category != "podcast":
return []
results = []
search_url = f"https://itunes.apple.com/search?entity=podcast&limit={page * page_size}&term={quote_plus(q)}"
async with httpx.AsyncClient() as client:
try:
response = await client.get(search_url, timeout=2)
r = response.json()
for p in r["results"][(page - 1) * page_size :]:
if p.get("feedUrl"):
results.append(
ExternalSearchResultItem(
ItemCategory.Podcast,
SiteName.RSS,
p["feedUrl"],
p["trackName"],
p["artistName"],
"",
p["artworkUrl600"],
)
)
2025-01-29 23:33:45 -05:00
except httpx.ReadTimeout:
logger.warning("ApplePodcast search timeout", extra={"query": q})
except Exception as e:
logger.error(
"ApplePodcast search error", extra={"query": q, "exception": e}
)
return results