diff --git a/catalog/performance/tests.py b/catalog/performance/tests.py index 9b93c245..7d9a848d 100644 --- a/catalog/performance/tests.py +++ b/catalog/performance/tests.py @@ -118,3 +118,54 @@ class DoubanDramaTestCase(TestCase): self.assertEqual(productions[3].language, ["日语"]) self.assertEqual(productions[3].opening_date, "2017-11-13") self.assertEqual(productions[3].location, ["梅田芸術劇場メインホール"]) + + +class BangumiDramaTestCase(TestCase): + databases = "__all__" + + def setUp(self): + pass + + @use_local_response + def test_scrape(self): + t_url = "https://bgm.tv/subject/224973" + site = SiteManager.get_site_by_url(t_url) + resource = site.get_resource_ready() + item = site.get_item() + self.assertEqual(item.display_title, "超级弹丸论破2舞台剧~再见了绝望学园~2017") + self.assertEqual( + sorted(item.actor, key=lambda a: a["name"]), + [ + {"name": "伊藤萌々香", "role": None}, + {"name": "横浜流星", "role": None}, + {"name": "鈴木拡樹", "role": None}, + ], + ) + self.assertEqual(item.language, ["日语"]) + + t_url = "https://bgm.tv/subject/442025" + site = SiteManager.get_site_by_url(t_url) + resource = site.get_resource_ready() + item = site.get_item() + self.assertEqual(item.display_title, "LIVE STAGE「ぼっち・ざ・ろっく!」") + self.assertEqual( + item.orig_creator, + [ + "はまじあき(芳文社「まんがタイムきららMAX」連載中)/TVアニメ「ぼっち・ざ・ろっく!」" + ], + ) + self.assertEqual(item.opening_date, "2023-08-11") + self.assertEqual(item.closing_date, "2023-08-20") + self.assertEqual(item.genre, ["舞台演出"]) + self.assertEqual(item.language, ["日本语"]) + self.assertEqual(item.playwright, ["山崎彬"]) + self.assertEqual(item.director, ["山崎彬"]) + self.assertEqual( + sorted(item.actor, key=lambda a: a["name"]), + [ + {"name": "大森未来衣", "role": None}, + {"name": "大竹美希", "role": None}, + {"name": "守乃まも", "role": None}, + {"name": "小山内花凜", "role": None}, + ], + ) diff --git a/catalog/sites/bangumi.py b/catalog/sites/bangumi.py index 99c3d58d..af801906 100644 --- a/catalog/sites/bangumi.py +++ b/catalog/sites/bangumi.py @@ -1,8 +1,14 @@ import logging +from typing import Any from collections import OrderedDict +from loguru import logger +import httpx + +from django.conf import settings from catalog.book.utils import detect_isbn_asin from catalog.common import * +from catalog.game.models import GameReleaseType from catalog.models import * from common.models.lang import detect_language @@ -22,45 +28,162 @@ class Bangumi(AbstractSite): DEFAULT_MODEL = None @classmethod - def id_to_url(cls, id_value): - return f"https://bgm.tv/subject/{id_value}" - - def scrape(self): - api_url = f"https://api.bgm.tv/v0/subjects/{self.id_value}" - o = BasicDownloader(api_url).download().json() - showtime = None + def get_category( + cls, o: dict[str, Any], fetch_resources: bool = False + ) -> tuple[ItemCategory, dict[str, Any]]: + dt = o.get("date") pub_year = None pub_month = None + release_year = None + release_type = None + showtime = None year = None - dt = o.get("date") - episodes = o.get("total_episodes", 0) + related_resources = [] match o["type"]: case 1: model = "Edition" + category = ItemCategory.Book + if o["series"] and fetch_resources: + # model = "Series" TODO + res = ( + BasicDownloader( + f"https://api.bgm.tv/v0/subjects/{o['id']}/subjects", + headers={ + "User-Agent": settings.NEODB_USER_AGENT, + }, + ) + .download() + .json() + ) + + for s in res: + if s["relation"] != "单行本": + continue + related_resources.append( + { + "url": cls.id_to_url(s["id"]), + } + ) if dt: d = dt.split("-") pub_year = d[0] pub_month = d[1] case 2 | 6: - is_series = episodes > 1 - model = "TVSeason" if is_series else "Movie" + is_season = o["platform"] in { + "TV", + "OVA", # may be movie in other sites + "WEB", + "电视剧", + "欧美剧", + "日剧", + "华语剧", + "综艺", + } + category = ItemCategory.TV if is_season else ItemCategory.Movie + model = "TVSeason" if is_season else "Movie" + if "舞台剧" in [ + t["name"] for t in o["tags"] + ]: # 只能这样判断舞台剧了,bangumi三次元分类太少 + category = ItemCategory.Performance + model = "Performance" if dt: year = dt.split("-")[0] showtime = [ - {"time": dt, "region": "首播日期" if is_series else "发布日期"} + {"time": dt, "region": "首播日期" if is_season else "发布日期"} ] case 3: model = "Album" + category = ItemCategory.Music case 4: model = "Game" + category = ItemCategory.Game + match o["platform"]: + case "游戏": + release_type = GameReleaseType.GAME + case "扩展包": + release_type = GameReleaseType.DLC case _: raise ValueError( - f"Unknown type {o['type']} for bangumi subject {self.id_value}" + f"Unknown type {o['type']} for bangumi subject {o["id"]}" ) + return category, { + "preferred_model": model, + "related_resources": related_resources, + "pub_year": pub_year, + "pub_month": pub_month, + "release_year": release_year, + "release_type": release_type, + "showtime": showtime, + "year": year, + } + + @classmethod + def id_to_url(cls, id_value): + return f"https://bgm.tv/subject/{id_value}" + + @classmethod + async def search_task( + cls, query: str, page: int, category: str, page_size: int + ) -> list[ExternalSearchResultItem]: + results = [] + bgm_type = { + "all": None, + "movietv": [2, 6], + "movie": [2, 6], + "tv": [2, 6], + "book": [1], + "game": [4], + "performance": [6], + "music": [3], + } + if category not in bgm_type: + return results + search_url = f"https://api.bgm.tv/v0/search/subjects?limit={page_size}&offset={(page-1)*page_size}" + async with httpx.AsyncClient() as client: + try: + response = await client.post( + search_url, + headers={"User-Agent": settings.NEODB_USER_AGENT}, + json={"keyword": query, "filter": {"type": bgm_type[category]}}, + timeout=2, + ) + r = response.json() + for s in r["data"]: + cat, _ = cls.get_category(s) + results.append( + ExternalSearchResultItem( + category=cat, + source_site=cls.SITE_NAME, + source_url=cls.id_to_url(s["id"]), + title=s["name"], + subtitle="", + brief=s.get("summary", ""), + cover_url=s["images"].get("common"), + ) + ) + except Exception as e: + logger.error( + "Bangumi search error", extra={"query": query, "exception": e} + ) + return results + + def scrape(self): + api_url = f"https://api.bgm.tv/v0/subjects/{self.id_value}" + o = ( + BasicDownloader( + api_url, + headers={ + "User-Agent": settings.NEODB_USER_AGENT, + }, + ) + .download() + .json() + ) + category, data = self.get_category(o, True) title = o.get("name_cn") or o.get("name") orig_title = o.get("name") if o.get("name") != title else None brief = o.get("summary") - + episodes = o.get("total_episodes", 0) genre = None platform = None other_title = [] @@ -69,13 +192,19 @@ class Bangumi(AbstractSite): isbn = None language = None pub_house = None - authors = None + orig_creator = None + authors = [] site = None director = None + playwright = None + actor = None pages = None price = None + opening_date = None + closing_date = None + location = None for i in o.get("infobox", []): - k = i["key"] + k = i["key"].lower() v = i["value"] match k: case "别名": @@ -84,9 +213,14 @@ class Bangumi(AbstractSite): if isinstance(v, list) else ([v] if isinstance(v, str) else []) ) + case "话数": + try: + episodes = int(v) + except ValueError: + pass case "imdb_id": imdb_code = v - case "isbn" | "ISBN": + case "isbn": isbn_type, isbn = detect_isbn_asin(v) case "语言": language = v @@ -94,8 +228,26 @@ class Bangumi(AbstractSite): pub_house = v case "导演": director = v + case "编剧" | "脚本": + playwright = ( + [d["v"] for d in v] + if isinstance(v, list) + else ([v] if isinstance(v, str) else []) + ) + case "原作": + match category: + case ItemCategory.Book: + authors.append(v) + case ItemCategory.Performance: + orig_creator = ( + [d["v"] for d in v] + if isinstance(v, list) + else ([v] if isinstance(v, str) else []) + ) + case "作画": + authors.append(v) case "作者": - authors = ( + authors.extend( [d["v"] for d in v] if isinstance(v, list) else ([v] if isinstance(v, str) else []) @@ -106,7 +258,7 @@ class Bangumi(AbstractSite): if isinstance(v, list) else ([v] if isinstance(v, str) else []) ) - case "游戏类型": + case "游戏类型" | "类型": genre = ( [d["v"] for d in v] if isinstance(v, list) @@ -118,6 +270,25 @@ class Bangumi(AbstractSite): pages = v case "价格": price = v + case "开始": + opening_date = v + case "结束": + closing_date = v + case "演出": + if category == ItemCategory.Performance: + director = v + case "主演": + actor = ( + [{"name": d["v"], "role": None} for d in v] + if isinstance(v, list) + else ( + [{"name": w, "role": None} for w in v.split("、")] + if isinstance(v, str) + else [] + ) + ) + case "会场" | "演出地点": + location = v img_url = o["images"].get("large") or o["images"].get("common") raw_img = None @@ -138,36 +309,38 @@ class Bangumi(AbstractSite): localized_desc = ( [{"lang": detect_language(brief), "text": brief}] if brief else [] ) - data = { - "localized_title": localized_title, - "localized_description": localized_desc, - "preferred_model": model, - "title": title, - "orig_title": orig_title, - "other_title": other_title or None, - "author": authors, - "genre": genre, - "translator": None, - "director": director, - "language": language, - "platform": platform, - "year": year, - "showtime": showtime, - "imdb_code": imdb_code, - "pub_house": pub_house, - "pub_year": pub_year, - "pub_month": pub_month, - "binding": None, - "episode_count": episodes or None, - "official_site": site, - "site": site, - "isbn": isbn, - "brief": brief, - "cover_image_url": img_url, - "release_date": dt, - "pages": pages, - "price": price, - } + data.update( + { + "localized_title": localized_title, + "localized_description": localized_desc, + "title": title, + "orig_title": orig_title, + "other_title": other_title or None, + "orig_creator": orig_creator, + "author": authors, + "genre": genre, + "translator": None, + "director": director, + "playwright": playwright, + "actor": actor, + "language": language, + "platform": platform, + "imdb_code": imdb_code, + "pub_house": pub_house, + "binding": None, + "episode_count": episodes or None, + "official_site": site, + "site": site, + "isbn": isbn, + "brief": brief, + "cover_image_url": img_url, + "pages": pages, + "price": price, + "opening_date": opening_date, + "closing_date": closing_date, + "location": location, + } + ) lookup_ids = {} if isbn: lookup_ids[isbn_type] = isbn diff --git a/test_data/https___api_bgm_tv_v0_subjects_224973 b/test_data/https___api_bgm_tv_v0_subjects_224973 new file mode 100644 index 00000000..a4d5a45a --- /dev/null +++ b/test_data/https___api_bgm_tv_v0_subjects_224973 @@ -0,0 +1 @@ +{"date":"2017-03-16","platform":"其他","images":{"small":"https://lain.bgm.tv/r/200/pic/cover/l/71/a4/224973_bORmF.jpg","grid":"https://lain.bgm.tv/r/100/pic/cover/l/71/a4/224973_bORmF.jpg","large":"https://lain.bgm.tv/pic/cover/l/71/a4/224973_bORmF.jpg","medium":"https://lain.bgm.tv/r/800/pic/cover/l/71/a4/224973_bORmF.jpg","common":"https://lain.bgm.tv/r/400/pic/cover/l/71/a4/224973_bORmF.jpg"},"summary":"继2015年弹丸论破2第一次被搬上舞的两年后,弹丸论破2再一次登上舞台。\r\n原班人马(七海由フェアリーズ的伊藤萌々香饰演),新舞台,新手法,新感动。\r\n\r\n青い空、白い雲・・・煌めく海、広がる砂浜。\r\nリゾート地として有名な南の島・ジャバウォック島に修学旅行で訪れた希望ヶ峰学園の生徒達だったが、学園長の悪だくみによって、島に閉じ込められてしまう。\r\n島の脱出を条件に、生徒達はコロシアイ、そして犯人を探し出す学級裁判を強いられる。\r\nハイスピードでテンポよく展開する学級裁判は、捜査パートで集めた証言や、証拠を弾丸としてトリガーにセットし、相手の主張の矛盾を打ち抜くことで進行していく。\r\n渦巻く疑心。見えない狂気。極限状態のなか、進化した学級裁判が始まる。","name":"スーパーダンガンロンパ2 THE STAGE 〜さよなら絶望学園〜2017","name_cn":"超级弹丸论破2舞台剧~再见了绝望学园~2017","tags":[{"name":"舞台剧","count":5,"total_cont":0},{"name":"弹丸论破","count":3,"total_cont":0},{"name":"2017","count":2,"total_cont":0},{"name":"推理","count":1,"total_cont":0},{"name":"日本","count":1,"total_cont":0}],"infobox":[{"key":"中文名","value":"超级弹丸论破2舞台剧~再见了绝望学园~2017"},{"key":"集数","value":"1"},{"key":"开始","value":"2017年3月16日-3月26日 (东京)"},{"key":"结束","value":"2017年3月30日-4月2日(大阪)"},{"key":"国家/地区","value":"日本"},{"key":"语言","value":"日语"},{"key":"官方网站","value":"http://www.cornflakes.jp/dangan/2017/"},{"key":"时长","value":"185分钟"},{"key":"公演剧场","value":[{"v":"Zeppブルーシアター六本木(东京)"},{"v":"森ノ宮ピロティホール(大阪)"}]},{"key":"主演","value":[{"v":"横浜流星"},{"v":"伊藤萌々香"},{"v":"鈴木拡樹"}]},{"key":"脚本·演出","value":"山本タク"},{"key":"舞台导演","value":"田中翼"},{"key":"演出助手","value":"陶山浩乃"},{"key":"舞美","value":"ENcounter ENgravers"},{"key":"美术","value":"泉真"},{"key":"照明","value":"桥本刚"},{"key":"音响","value":"田上笃志"},{"key":"服装","value":"小泉美都"},{"key":"化妆","value":"松前詠美子"},{"key":"宣传照","value":"渡边慎一"},{"key":"视觉设计","value":"清水みちる"},{"key":"现场摄像","value":"ふじもと光明"},{"key":"视频","value":"高野正也"},{"key":"执行","value":"竹内舞"},{"key":"宣传","value":"ニキータプラス"},{"key":"运营","value":"东京音协"},{"key":"制作","value":"CORNFLAKES"},{"key":"助理制片人","value":"中村奈々、麻生かほり"},{"key":"综合制片人","value":"堀江庆、中崎裕介"},{"key":"高级制片人","value":"吉田正大、染谷誓一"},{"key":"后援","value":"beachwalkers.、TOKYO MX"}],"rating":{"rank":0,"total":12,"count":{"1":0,"2":0,"3":0,"4":0,"5":0,"6":0,"7":7,"8":5,"9":0,"10":0},"score":7.4},"total_episodes":1,"collection":{"on_hold":3,"dropped":0,"wish":5,"collect":21,"doing":0},"id":224973,"eps":1,"meta_tags":["日本"],"volumes":0,"series":false,"locked":false,"nsfw":false,"type":6} \ No newline at end of file diff --git a/test_data/https___api_bgm_tv_v0_subjects_442025 b/test_data/https___api_bgm_tv_v0_subjects_442025 new file mode 100644 index 00000000..584b2500 --- /dev/null +++ b/test_data/https___api_bgm_tv_v0_subjects_442025 @@ -0,0 +1 @@ +{"date":"2023-08-11","platform":"演出","images":{"small":"https://lain.bgm.tv/r/200/pic/cover/l/75/09/442025_fuuE0.jpg","grid":"https://lain.bgm.tv/r/100/pic/cover/l/75/09/442025_fuuE0.jpg","large":"https://lain.bgm.tv/pic/cover/l/75/09/442025_fuuE0.jpg","medium":"https://lain.bgm.tv/r/800/pic/cover/l/75/09/442025_fuuE0.jpg","common":"https://lain.bgm.tv/r/400/pic/cover/l/75/09/442025_fuuE0.jpg"},"summary":"「ぼっち・ざ・ろっく!」がこの度、2023年8月に舞台化決定! \r\nLIVE STAGE「ぼっち・ざ・ろっく!」と題した本作は、2023年8月11日(金)~8月20日(日)、THEATER MILANO-Zaにて上演いたします。舞台ならではの多彩な演出で紡がれるストーリーと、キャストによる生演奏を織り交ぜた迫力のパフォーマンスで作品世界を再現! \r\nオーディションで選ばれた演奏力・演技力を兼ね備えたLIVE STAGEは、\r\n作品世界観を大切にした舞台演出とキャストによるライブシーンでの生演奏でキャラクターが実在するかのような臨場感を感じていただけること間違いありません。\r\n続報にご期待ください!\r\n\r\n\r\n<公演概要>【タイトル】THEATER MILANO-Zaオープニングシリーズ LIVE STAGE「ぼっち・ざ・ろっく!」\r\n【原作】はまじあき(芳文社「まんがタイムきららMAX」連載中)/TVアニメ「ぼっち・ざ・ろっく!」 \r\n\r\n【公演日程】2023年8月11日(金)~8月20日(日)\r\n【会場】THEATER MILANO-Za\r\n(〒160-0021東京都新宿区歌舞伎町一丁目29番1号 東急歌舞伎町タワー6階)","name":"LIVE STAGE「ぼっち・ざ・ろっく!」","name_cn":"","tags":[{"name":"舞台剧","count":27,"total_cont":0},{"name":"孤独摇滚","count":16,"total_cont":0},{"name":"2023","count":13,"total_cont":0},{"name":"live","count":12,"total_cont":0},{"name":"STAGE「ぼっち・ざ・ろっく!」舞台剧","count":8,"total_cont":0},{"name":"改编","count":7,"total_cont":0},{"name":"抽象","count":4,"total_cont":0},{"name":"演出","count":2,"total_cont":0},{"name":"日本","count":1,"total_cont":0},{"name":"山崎彬","count":1,"total_cont":0}],"infobox":[{"key":"集数","value":"1"},{"key":"开始","value":"2023-08-11"},{"key":"结束","value":"2023-08-20"},{"key":"类型","value":"舞台演出"},{"key":"国家/地区","value":"日本"},{"key":"语言","value":"日本语"},{"key":"官方网站","value":"https://bocchi.rocks/stage/"},{"key":"原作","value":"はまじあき(芳文社「まんがタイムきららMAX」連載中)/TVアニメ「ぼっち・ざ・ろっく!」"},{"key":"编剧","value":"山崎彬"},{"key":"演出","value":"山崎彬"},{"key":"主演","value":"守乃まも、大竹美希、小山内花凜、大森未来衣"},{"key":"配角","value":"河内美里、月川玲、岡菜々美、津久井有咲、堀春菜、澤田美紀、斉藤瑞季、ピーターピーター、やじりまおん、園田光"},{"key":"舞台导演","value":"中西輝彦、仲里良"},{"key":"主办","value":"LIVE STAGE「ぼっち・ざ・ろっく!」製作委員会"}],"rating":{"rank":113,"total":70,"count":{"1":0,"2":0,"3":0,"4":0,"5":0,"6":1,"7":2,"8":27,"9":32,"10":8},"score":8.6},"total_episodes":1,"collection":{"on_hold":5,"dropped":3,"wish":68,"collect":96,"doing":12},"id":442025,"eps":1,"meta_tags":["日本","演出"],"volumes":0,"series":false,"locked":false,"nsfw":false,"type":6} \ No newline at end of file