update catalog/bangumi (#828)

* fix: calendar_yearview

* external search bangumi

* bgm.tv: fecth volumes of series

* bgm.tv: mark ova as tv season

* bangumi performance

* add bangumi performance test

* pagesize
This commit is contained in:
Jigsaw 2025-01-20 23:24:20 +08:00 committed by GitHub
parent ecc5432027
commit ad1e6fb8c4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 275 additions and 49 deletions

View file

@ -118,3 +118,54 @@ class DoubanDramaTestCase(TestCase):
self.assertEqual(productions[3].language, ["日语"]) self.assertEqual(productions[3].language, ["日语"])
self.assertEqual(productions[3].opening_date, "2017-11-13") self.assertEqual(productions[3].opening_date, "2017-11-13")
self.assertEqual(productions[3].location, ["梅田芸術劇場メインホール"]) self.assertEqual(productions[3].location, ["梅田芸術劇場メインホール"])
class BangumiDramaTestCase(TestCase):
databases = "__all__"
def setUp(self):
pass
@use_local_response
def test_scrape(self):
t_url = "https://bgm.tv/subject/224973"
site = SiteManager.get_site_by_url(t_url)
resource = site.get_resource_ready()
item = site.get_item()
self.assertEqual(item.display_title, "超级弹丸论破2舞台剧~再见了绝望学园~2017")
self.assertEqual(
sorted(item.actor, key=lambda a: a["name"]),
[
{"name": "伊藤萌々香", "role": None},
{"name": "横浜流星", "role": None},
{"name": "鈴木拡樹", "role": None},
],
)
self.assertEqual(item.language, ["日语"])
t_url = "https://bgm.tv/subject/442025"
site = SiteManager.get_site_by_url(t_url)
resource = site.get_resource_ready()
item = site.get_item()
self.assertEqual(item.display_title, "LIVE STAGE「ぼっち・ざ・ろっく")
self.assertEqual(
item.orig_creator,
[
"はまじあき芳文社「まんがタイムきららMAX」連載中TVアニメ「ぼっち・ざ・ろっく"
],
)
self.assertEqual(item.opening_date, "2023-08-11")
self.assertEqual(item.closing_date, "2023-08-20")
self.assertEqual(item.genre, ["舞台演出"])
self.assertEqual(item.language, ["日本语"])
self.assertEqual(item.playwright, ["山崎彬"])
self.assertEqual(item.director, ["山崎彬"])
self.assertEqual(
sorted(item.actor, key=lambda a: a["name"]),
[
{"name": "大森未来衣", "role": None},
{"name": "大竹美希", "role": None},
{"name": "守乃まも", "role": None},
{"name": "小山内花凜", "role": None},
],
)

View file

@ -1,8 +1,14 @@
import logging import logging
from typing import Any
from collections import OrderedDict from collections import OrderedDict
from loguru import logger
import httpx
from django.conf import settings
from catalog.book.utils import detect_isbn_asin from catalog.book.utils import detect_isbn_asin
from catalog.common import * from catalog.common import *
from catalog.game.models import GameReleaseType
from catalog.models import * from catalog.models import *
from common.models.lang import detect_language from common.models.lang import detect_language
@ -22,45 +28,162 @@ class Bangumi(AbstractSite):
DEFAULT_MODEL = None DEFAULT_MODEL = None
@classmethod @classmethod
def id_to_url(cls, id_value): def get_category(
return f"https://bgm.tv/subject/{id_value}" cls, o: dict[str, Any], fetch_resources: bool = False
) -> tuple[ItemCategory, dict[str, Any]]:
def scrape(self): dt = o.get("date")
api_url = f"https://api.bgm.tv/v0/subjects/{self.id_value}"
o = BasicDownloader(api_url).download().json()
showtime = None
pub_year = None pub_year = None
pub_month = None pub_month = None
release_year = None
release_type = None
showtime = None
year = None year = None
dt = o.get("date") related_resources = []
episodes = o.get("total_episodes", 0)
match o["type"]: match o["type"]:
case 1: case 1:
model = "Edition" model = "Edition"
category = ItemCategory.Book
if o["series"] and fetch_resources:
# model = "Series" TODO
res = (
BasicDownloader(
f"https://api.bgm.tv/v0/subjects/{o['id']}/subjects",
headers={
"User-Agent": settings.NEODB_USER_AGENT,
},
)
.download()
.json()
)
for s in res:
if s["relation"] != "单行本":
continue
related_resources.append(
{
"url": cls.id_to_url(s["id"]),
}
)
if dt: if dt:
d = dt.split("-") d = dt.split("-")
pub_year = d[0] pub_year = d[0]
pub_month = d[1] pub_month = d[1]
case 2 | 6: case 2 | 6:
is_series = episodes > 1 is_season = o["platform"] in {
model = "TVSeason" if is_series else "Movie" "TV",
"OVA", # may be movie in other sites
"WEB",
"电视剧",
"欧美剧",
"日剧",
"华语剧",
"综艺",
}
category = ItemCategory.TV if is_season else ItemCategory.Movie
model = "TVSeason" if is_season else "Movie"
if "舞台剧" in [
t["name"] for t in o["tags"]
]: # 只能这样判断舞台剧了bangumi三次元分类太少
category = ItemCategory.Performance
model = "Performance"
if dt: if dt:
year = dt.split("-")[0] year = dt.split("-")[0]
showtime = [ showtime = [
{"time": dt, "region": "首播日期" if is_series else "发布日期"} {"time": dt, "region": "首播日期" if is_season else "发布日期"}
] ]
case 3: case 3:
model = "Album" model = "Album"
category = ItemCategory.Music
case 4: case 4:
model = "Game" model = "Game"
category = ItemCategory.Game
match o["platform"]:
case "游戏":
release_type = GameReleaseType.GAME
case "扩展包":
release_type = GameReleaseType.DLC
case _: case _:
raise ValueError( raise ValueError(
f"Unknown type {o['type']} for bangumi subject {self.id_value}" f"Unknown type {o['type']} for bangumi subject {o["id"]}"
) )
return category, {
"preferred_model": model,
"related_resources": related_resources,
"pub_year": pub_year,
"pub_month": pub_month,
"release_year": release_year,
"release_type": release_type,
"showtime": showtime,
"year": year,
}
@classmethod
def id_to_url(cls, id_value):
return f"https://bgm.tv/subject/{id_value}"
@classmethod
async def search_task(
cls, query: str, page: int, category: str, page_size: int
) -> list[ExternalSearchResultItem]:
results = []
bgm_type = {
"all": None,
"movietv": [2, 6],
"movie": [2, 6],
"tv": [2, 6],
"book": [1],
"game": [4],
"performance": [6],
"music": [3],
}
if category not in bgm_type:
return results
search_url = f"https://api.bgm.tv/v0/search/subjects?limit={page_size}&offset={(page-1)*page_size}"
async with httpx.AsyncClient() as client:
try:
response = await client.post(
search_url,
headers={"User-Agent": settings.NEODB_USER_AGENT},
json={"keyword": query, "filter": {"type": bgm_type[category]}},
timeout=2,
)
r = response.json()
for s in r["data"]:
cat, _ = cls.get_category(s)
results.append(
ExternalSearchResultItem(
category=cat,
source_site=cls.SITE_NAME,
source_url=cls.id_to_url(s["id"]),
title=s["name"],
subtitle="",
brief=s.get("summary", ""),
cover_url=s["images"].get("common"),
)
)
except Exception as e:
logger.error(
"Bangumi search error", extra={"query": query, "exception": e}
)
return results
def scrape(self):
api_url = f"https://api.bgm.tv/v0/subjects/{self.id_value}"
o = (
BasicDownloader(
api_url,
headers={
"User-Agent": settings.NEODB_USER_AGENT,
},
)
.download()
.json()
)
category, data = self.get_category(o, True)
title = o.get("name_cn") or o.get("name") title = o.get("name_cn") or o.get("name")
orig_title = o.get("name") if o.get("name") != title else None orig_title = o.get("name") if o.get("name") != title else None
brief = o.get("summary") brief = o.get("summary")
episodes = o.get("total_episodes", 0)
genre = None genre = None
platform = None platform = None
other_title = [] other_title = []
@ -69,13 +192,19 @@ class Bangumi(AbstractSite):
isbn = None isbn = None
language = None language = None
pub_house = None pub_house = None
authors = None orig_creator = None
authors = []
site = None site = None
director = None director = None
playwright = None
actor = None
pages = None pages = None
price = None price = None
opening_date = None
closing_date = None
location = None
for i in o.get("infobox", []): for i in o.get("infobox", []):
k = i["key"] k = i["key"].lower()
v = i["value"] v = i["value"]
match k: match k:
case "别名": case "别名":
@ -84,9 +213,14 @@ class Bangumi(AbstractSite):
if isinstance(v, list) if isinstance(v, list)
else ([v] if isinstance(v, str) else []) else ([v] if isinstance(v, str) else [])
) )
case "话数":
try:
episodes = int(v)
except ValueError:
pass
case "imdb_id": case "imdb_id":
imdb_code = v imdb_code = v
case "isbn" | "ISBN": case "isbn":
isbn_type, isbn = detect_isbn_asin(v) isbn_type, isbn = detect_isbn_asin(v)
case "语言": case "语言":
language = v language = v
@ -94,8 +228,26 @@ class Bangumi(AbstractSite):
pub_house = v pub_house = v
case "导演": case "导演":
director = v director = v
case "编剧" | "脚本":
playwright = (
[d["v"] for d in v]
if isinstance(v, list)
else ([v] if isinstance(v, str) else [])
)
case "原作":
match category:
case ItemCategory.Book:
authors.append(v)
case ItemCategory.Performance:
orig_creator = (
[d["v"] for d in v]
if isinstance(v, list)
else ([v] if isinstance(v, str) else [])
)
case "作画":
authors.append(v)
case "作者": case "作者":
authors = ( authors.extend(
[d["v"] for d in v] [d["v"] for d in v]
if isinstance(v, list) if isinstance(v, list)
else ([v] if isinstance(v, str) else []) else ([v] if isinstance(v, str) else [])
@ -106,7 +258,7 @@ class Bangumi(AbstractSite):
if isinstance(v, list) if isinstance(v, list)
else ([v] if isinstance(v, str) else []) else ([v] if isinstance(v, str) else [])
) )
case "游戏类型": case "游戏类型" | "类型":
genre = ( genre = (
[d["v"] for d in v] [d["v"] for d in v]
if isinstance(v, list) if isinstance(v, list)
@ -118,6 +270,25 @@ class Bangumi(AbstractSite):
pages = v pages = v
case "价格": case "价格":
price = v price = v
case "开始":
opening_date = v
case "结束":
closing_date = v
case "演出":
if category == ItemCategory.Performance:
director = v
case "主演":
actor = (
[{"name": d["v"], "role": None} for d in v]
if isinstance(v, list)
else (
[{"name": w, "role": None} for w in v.split("")]
if isinstance(v, str)
else []
)
)
case "会场" | "演出地点":
location = v
img_url = o["images"].get("large") or o["images"].get("common") img_url = o["images"].get("large") or o["images"].get("common")
raw_img = None raw_img = None
@ -138,36 +309,38 @@ class Bangumi(AbstractSite):
localized_desc = ( localized_desc = (
[{"lang": detect_language(brief), "text": brief}] if brief else [] [{"lang": detect_language(brief), "text": brief}] if brief else []
) )
data = { data.update(
"localized_title": localized_title, {
"localized_description": localized_desc, "localized_title": localized_title,
"preferred_model": model, "localized_description": localized_desc,
"title": title, "title": title,
"orig_title": orig_title, "orig_title": orig_title,
"other_title": other_title or None, "other_title": other_title or None,
"author": authors, "orig_creator": orig_creator,
"genre": genre, "author": authors,
"translator": None, "genre": genre,
"director": director, "translator": None,
"language": language, "director": director,
"platform": platform, "playwright": playwright,
"year": year, "actor": actor,
"showtime": showtime, "language": language,
"imdb_code": imdb_code, "platform": platform,
"pub_house": pub_house, "imdb_code": imdb_code,
"pub_year": pub_year, "pub_house": pub_house,
"pub_month": pub_month, "binding": None,
"binding": None, "episode_count": episodes or None,
"episode_count": episodes or None, "official_site": site,
"official_site": site, "site": site,
"site": site, "isbn": isbn,
"isbn": isbn, "brief": brief,
"brief": brief, "cover_image_url": img_url,
"cover_image_url": img_url, "pages": pages,
"release_date": dt, "price": price,
"pages": pages, "opening_date": opening_date,
"price": price, "closing_date": closing_date,
} "location": location,
}
)
lookup_ids = {} lookup_ids = {}
if isbn: if isbn:
lookup_ids[isbn_type] = isbn lookup_ids[isbn_type] = isbn

View file

@ -0,0 +1 @@
{"date":"2017-03-16","platform":"其他","images":{"small":"https://lain.bgm.tv/r/200/pic/cover/l/71/a4/224973_bORmF.jpg","grid":"https://lain.bgm.tv/r/100/pic/cover/l/71/a4/224973_bORmF.jpg","large":"https://lain.bgm.tv/pic/cover/l/71/a4/224973_bORmF.jpg","medium":"https://lain.bgm.tv/r/800/pic/cover/l/71/a4/224973_bORmF.jpg","common":"https://lain.bgm.tv/r/400/pic/cover/l/71/a4/224973_bORmF.jpg"},"summary":"继2015年弹丸论破2第一次被搬上舞的两年后弹丸论破2再一次登上舞台。\r\n原班人马七海由フェアリーズ的伊藤萌々香饰演新舞台新手法新感动。\r\n\r\n青い空、白い雲・・・煌めく海、広がる砂浜。\r\nリゾート地として有名な南の島・ジャバウォック島に修学旅行で訪れた希望ヶ峰学園の生徒達だったが、学園長の悪だくみによって、島に閉じ込められてしまう。\r\n島の脱出を条件に、生徒達はコロシアイ、そして犯人を探し出す学級裁判を強いられる。\r\nハイスピードでテンポよく展開する学級裁判は、捜査パートで集めた証言や、証拠を弾丸としてトリガーにセットし、相手の主張の矛盾を打ち抜くことで進行していく。\r\n渦巻く疑心。見えない狂気。極限状態のなか、進化した学級裁判が始まる。","name":"スーパーダンガンロンパ2 THE STAGE 〜さよなら絶望学園〜2017","name_cn":"超级弹丸论破2舞台剧~再见了绝望学园~2017","tags":[{"name":"舞台剧","count":5,"total_cont":0},{"name":"弹丸论破","count":3,"total_cont":0},{"name":"2017","count":2,"total_cont":0},{"name":"推理","count":1,"total_cont":0},{"name":"日本","count":1,"total_cont":0}],"infobox":[{"key":"中文名","value":"超级弹丸论破2舞台剧~再见了绝望学园~2017"},{"key":"集数","value":"1"},{"key":"开始","value":"2017年3月16日-3月26日 (东京)"},{"key":"结束","value":"2017年3月30日-4月2日大阪"},{"key":"国家/地区","value":"日本"},{"key":"语言","value":"日语"},{"key":"官方网站","value":"http://www.cornflakes.jp/dangan/2017/"},{"key":"时长","value":"185分钟"},{"key":"公演剧场","value":[{"v":"Zeppブルーシアター六本木东京"},{"v":"森ノ宮ピロティホール(大阪)"}]},{"key":"主演","value":[{"v":"横浜流星"},{"v":"伊藤萌々香"},{"v":"鈴木拡樹"}]},{"key":"脚本·演出","value":"山本タク"},{"key":"舞台导演","value":"田中翼"},{"key":"演出助手","value":"陶山浩乃"},{"key":"舞美","value":"ENcounter ENgravers"},{"key":"美术","value":"泉真"},{"key":"照明","value":"桥本刚"},{"key":"音响","value":"田上笃志"},{"key":"服装","value":"小泉美都"},{"key":"化妆","value":"松前詠美子"},{"key":"宣传照","value":"渡边慎一"},{"key":"视觉设计","value":"清水みちる"},{"key":"现场摄像","value":"ふじもと光明"},{"key":"视频","value":"高野正也"},{"key":"执行","value":"竹内舞"},{"key":"宣传","value":"ニキータプラス"},{"key":"运营","value":"东京音协"},{"key":"制作","value":"CORNFLAKES"},{"key":"助理制片人","value":"中村奈々、麻生かほり"},{"key":"综合制片人","value":"堀江庆、中崎裕介"},{"key":"高级制片人","value":"吉田正大、染谷誓一"},{"key":"后援","value":"beachwalkers.、TOKYO MX"}],"rating":{"rank":0,"total":12,"count":{"1":0,"2":0,"3":0,"4":0,"5":0,"6":0,"7":7,"8":5,"9":0,"10":0},"score":7.4},"total_episodes":1,"collection":{"on_hold":3,"dropped":0,"wish":5,"collect":21,"doing":0},"id":224973,"eps":1,"meta_tags":["日本"],"volumes":0,"series":false,"locked":false,"nsfw":false,"type":6}

View file

@ -0,0 +1 @@
{"date":"2023-08-11","platform":"演出","images":{"small":"https://lain.bgm.tv/r/200/pic/cover/l/75/09/442025_fuuE0.jpg","grid":"https://lain.bgm.tv/r/100/pic/cover/l/75/09/442025_fuuE0.jpg","large":"https://lain.bgm.tv/pic/cover/l/75/09/442025_fuuE0.jpg","medium":"https://lain.bgm.tv/r/800/pic/cover/l/75/09/442025_fuuE0.jpg","common":"https://lain.bgm.tv/r/400/pic/cover/l/75/09/442025_fuuE0.jpg"},"summary":"「ぼっち・ざ・ろっく」がこの度、2023年8月に舞台化決定 \r\nLIVE STAGE「ぼっち・ざ・ろっく」と題した本作は、2023年8月11日(金)8月20日(日)、THEATER MILANO-Zaにて上演いたします。舞台ならではの多彩な演出で紡がれるストーリーと、キャストによる生演奏を織り交ぜた迫力のパフォーマンスで作品世界を再現 \r\nオーディションで選ばれた演奏力・演技力を兼ね備えたLIVE STAGEは、\r\n作品世界観を大切にした舞台演出とキャストによるライブシーンでの生演奏でキャラクターが実在するかのような臨場感を感じていただけること間違いありません。\r\n続報にご期待ください\r\n\r\n\r\n公演概要【タイトル】THEATER MILANO-Zaオープニングシリーズ LIVE STAGE「ぼっち・ざ・ろっく」\r\n【原作】はまじあき芳文社「まんがタイムきららMAX」連載中TVアニメ「ぼっち・ざ・ろっく」 \r\n\r\n【公演日程】2023年8月11日8月20日\r\n【会場】THEATER MILANO-Za\r\n〒160-0021東京都新宿区歌舞伎町一丁目29番1号 東急歌舞伎町タワー6階","name":"LIVE STAGE「ぼっち・ざ・ろっく」","name_cn":"","tags":[{"name":"舞台剧","count":27,"total_cont":0},{"name":"孤独摇滚","count":16,"total_cont":0},{"name":"2023","count":13,"total_cont":0},{"name":"live","count":12,"total_cont":0},{"name":"STAGE「ぼっち・ざ・ろっく」舞台剧","count":8,"total_cont":0},{"name":"改编","count":7,"total_cont":0},{"name":"抽象","count":4,"total_cont":0},{"name":"演出","count":2,"total_cont":0},{"name":"日本","count":1,"total_cont":0},{"name":"山崎彬","count":1,"total_cont":0}],"infobox":[{"key":"集数","value":"1"},{"key":"开始","value":"2023-08-11"},{"key":"结束","value":"2023-08-20"},{"key":"类型","value":"舞台演出"},{"key":"国家/地区","value":"日本"},{"key":"语言","value":"日本语"},{"key":"官方网站","value":"https://bocchi.rocks/stage/"},{"key":"原作","value":"はまじあき芳文社「まんがタイムきららMAX」連載中TVアニメ「ぼっち・ざ・ろっく」"},{"key":"编剧","value":"山崎彬"},{"key":"演出","value":"山崎彬"},{"key":"主演","value":"守乃まも、大竹美希、小山内花凜、大森未来衣"},{"key":"配角","value":"河内美里、月川玲、岡菜々美、津久井有咲、堀春菜、澤田美紀、斉藤瑞季、ピーターピーター、やじりまおん、園田光"},{"key":"舞台导演","value":"中西輝彦、仲里良"},{"key":"主办","value":"LIVE STAGE「ぼっち・ざ・ろっく」製作委員会"}],"rating":{"rank":113,"total":70,"count":{"1":0,"2":0,"3":0,"4":0,"5":0,"6":1,"7":2,"8":27,"9":32,"10":8},"score":8.6},"total_episodes":1,"collection":{"on_hold":5,"dropped":3,"wish":68,"collect":96,"doing":12},"id":442025,"eps":1,"meta_tags":["日本","演出"],"volumes":0,"series":false,"locked":false,"nsfw":false,"type":6}