diff --git a/catalog/sites/tmdb.py b/catalog/sites/tmdb.py index d3ffff05..860e762e 100644 --- a/catalog/sites/tmdb.py +++ b/catalog/sites/tmdb.py @@ -39,7 +39,7 @@ def _copy_dict(s, key_map): class TMDB_Movie(AbstractSite): SITE_NAME = SiteName.TMDB ID_TYPE = IdType.TMDB_Movie - URL_PATTERNS = [r"\w+://www.themoviedb.org/movie/(\d+)"] + URL_PATTERNS = [r"^\w+://www.themoviedb.org/movie/(\d+)"] WIKI_PROPERTY_ID = "?" DEFAULT_MODEL = Movie @@ -171,8 +171,8 @@ class TMDB_TV(AbstractSite): SITE_NAME = SiteName.TMDB ID_TYPE = IdType.TMDB_TV URL_PATTERNS = [ - r"\w+://www.themoviedb.org/tv/(\d+)[^/]*$", - r"\w+://www.themoviedb.org/tv/(\d+)[^/]*/seasons", + r"^\w+://www.themoviedb.org/tv/(\d+)[^/]*/?$", + r"^\w+://www.themoviedb.org/tv/(\d+)[^/]*/seasons$", ] WIKI_PROPERTY_ID = "?" DEFAULT_MODEL = TVShow @@ -320,7 +320,7 @@ class TMDB_TV(AbstractSite): class TMDB_TVSeason(AbstractSite): SITE_NAME = SiteName.TMDB ID_TYPE = IdType.TMDB_TVSeason - URL_PATTERNS = [r"\w+://www.themoviedb.org/tv/(\d+)[^/]*/season/(\d+)[^/]*$"] + URL_PATTERNS = [r"^\w+://www.themoviedb.org/tv/(\d+)[^/]*/season/(\d+)[^/]*/?$"] WIKI_PROPERTY_ID = "?" DEFAULT_MODEL = TVSeason ID_PATTERN = r"^(\d+)-(\d+)$" diff --git a/journal/importers/letterboxd.py b/journal/importers/letterboxd.py index 35adfd67..a47591ef 100644 --- a/journal/importers/letterboxd.py +++ b/journal/importers/letterboxd.py @@ -5,6 +5,7 @@ import zipfile import pytz from django.utils.dateparse import parse_datetime from loguru import logger +from markdownify import markdownify as md from catalog.common import * from catalog.common.downloaders import * @@ -25,6 +26,7 @@ class LetterboxdImporter(Task): "imported": 0, "failed": 0, "visibility": 0, + "failed_urls": [], "file": None, } @@ -34,28 +36,33 @@ class LetterboxdImporter(Task): def get_item_by_url(self, url): try: h = BasicDownloader(url).download().html() - if not h.xpath("//body/@data-tmdb-type"): + tu = h.xpath("//a[@data-track-action='TMDb']/@href") + if not tu: i = h.xpath('//span[@class="film-title-wrapper"]/a/@href') u2 = "https://letterboxd.com" + i[0] # type:ignore h = BasicDownloader(u2).download().html() - tt = h.xpath("//body/@data-tmdb-type")[0].strip() # type:ignore - ti = str(h.xpath("//body/@data-tmdb-id")[0].strip()) # type:ignore - if tt != "movie" or not ti: - logger.error(f"Unknown TMDB ({tt}/{ti}) for {url}") + tu = h.xpath("//a[@data-track-action='TMDb']/@href") + if not tu: + logger.error(f"Unknown TMDB for {url}") return None - site = SiteManager.get_site_by_id(IdType.TMDB_Movie, ti) + site = SiteManager.get_site_by_url(tu[0]) # type:ignore if not site: return None + if site.ID_TYPE == IdType.TMDB_TV: + site = SiteManager.get_site_by_url(f"{site.url}/season/1") + if not site: + return None site.get_resource_ready() - return site.get_item() + item = site.get_item() + return item except Exception as e: logger.error(f"Unable to parse {url} {e}") - def mark(self, url, shelf_type, date, rating=None, review=None, tags=None): + def mark(self, url, shelf_type, date, rating=None, text=None, tags=None): item = self.get_item_by_url(url) if not item: logger.error(f"Unable to get item for {url}") - self.progress(-1) + self.progress(-1, url) return owner = self.user.identity mark = Mark(owner, item) @@ -68,7 +75,7 @@ class LetterboxdImporter(Task): ) ): self.progress(0) - return 0 + return visibility = self.metadata["visibility"] shelf_time_offset = { ShelfType.WISHLIST: " 20:00:00", @@ -78,10 +85,19 @@ class LetterboxdImporter(Task): dt = parse_datetime(date + shelf_time_offset[shelf_type]) if dt: dt = dt.replace(tzinfo=_tz_sh) + rating_grade = round(float(rating) * 2) if rating else None + comment = None + if text: + text = md(text) + if len(text) < 360: + comment = text + else: + title = f"评《{item.title}》" + Review.update_item_review(item, owner, title, text, visibility, dt) mark.update( shelf_type, - comment_text=review or None, - rating_grade=round(float(rating) * 2) if rating else None, + comment_text=comment, + rating_grade=rating_grade, visibility=visibility, created_time=dt, ) @@ -90,7 +106,7 @@ class LetterboxdImporter(Task): TagManager.tag_item(item, owner, tag_titles, visibility) self.progress(1) - def progress(self, mark_state: int): + def progress(self, mark_state: int, url=None): self.metadata["total"] += 1 self.metadata["processed"] += 1 match mark_state: @@ -100,6 +116,8 @@ class LetterboxdImporter(Task): self.metadata["skipped"] += 1 case _: self.metadata["failed"] += 1 + if url: + self.metadata["failed_urls"].append(url) self.message = f"{self.metadata['imported']} imported, {self.metadata['skipped']} skipped, {self.metadata['failed']} failed" self.save(update_fields=["metadata", "message"]) diff --git a/users/templates/users/data.html b/users/templates/users/data.html index f0b02a5f..a65f6fcc 100644 --- a/users/templates/users/data.html +++ b/users/templates/users/data.html @@ -113,7 +113,7 @@ {% csrf_token %} 在Letterboxd网站的Settings页面中选择DATA,或在其app的Settings菜单中选择Advanced Settings,点击 EXPORT YOUR DATA,即可下载导出名称类似letterboxd-username-2018-03-11-07-52-utc.zip的文件,勿需解压。 + rel="noopener">Settings页面中选择DATA,或在其app的Settings菜单中选择Advanced Settings,点击 EXPORT YOUR DATA,即可下载导出名称类似letterboxd-username-2018-03-11-07-52-utc.zip的文件,不必解压。

@@ -146,9 +146,18 @@

- {% if letterboxd_task %} - 最近导入于{{ letterboxd_task.created_time }},状态:{{ letterboxd_task.get_state_display }}。 {{ letterboxd_task.message }} - {% endif %} + + 导入时仅更新正向变化(未标->想看->已看)标记;不足360字符的评论会作为短评。 + {% if letterboxd_task %} +
+ 最近导入于{{ letterboxd_task.created_time }},状态:{{ letterboxd_task.get_state_display }}。 +
+ {{ letterboxd_task.message }} + {% if letterboxd_task.metadata.failed_urls %} + + {% endif %} + {% endif %} +