diff --git a/journal/importers/letterboxd.py b/journal/importers/letterboxd.py new file mode 100644 index 00000000..669e1c7c --- /dev/null +++ b/journal/importers/letterboxd.py @@ -0,0 +1,135 @@ +import csv +import re +import tempfile +import zipfile +from datetime import datetime + +from django.utils import timezone +from django.utils.dateparse import parse_datetime +from django.utils.timezone import make_aware +from loguru import logger +from user_messages import api as msg + +from catalog.common import * +from catalog.common.downloaders import * +from catalog.models import * +from journal.models import * +from users.models import * + + +class LetterboxdImporter(Task): + TaskQueue = "import" + TaskType = "import.letterboxd" + DefaultMetadata = { + "total": 0, + "processed": 0, + "skipped": 0, + "imported": 0, + "failed": 0, + "visibility": 0, + "file": None, + } + + def get_item_by_url(self, url): + try: + h = BasicDownloader(url).html() # type:ignore + tt = h.xpath("//body/@data-tmdb-type")[0].strip() + ti = h.xpath("//body/@data-tmdb-type")[0].strip() + if tt != "movie": + logger.error(f"Unknown TMDB type {tt} / {ti}") + return None + site = SiteManager.get_site_by_id(IdType.TMDB_Movie, ti) + if not site: + return None + site.get_resource_ready() + return site.get_item() + except Exception as e: + logger.error(f"Unable to parse {url} {e}") + + def mark(self, url, shelf_type, date, rating=None, review=None, tags=None): + item = self.get_item_by_url(url) + if not item: + logger.error(f"Unable to get item for {url}") + return + owner = self.user.identity + mark = Mark(owner, item) + if ( + mark.shelf_type == shelf_type + or mark.shelf_type == ShelfType.COMPLETE + or ( + mark.shelf_type == ShelfType.PROGRESS + and shelf_type == ShelfType.WISHLIST + ) + ): + return + visibility = self.metadata["visibility"] + shelf_time_offset = { + ShelfType.WISHLIST: " 20:00:00", + ShelfType.PROGRESS: " 21:00:00", + ShelfType.COMPLETE: " 22:00:00", + } + dt = parse_datetime(date + shelf_time_offset[shelf_type]) + mark.update( + shelf_type, + comment_text=review or None, + rating_grade=round(rating * 2) if rating else None, + visibility=visibility, + created_time=dt, + ) + if tags: + tag_titles = [s.strip() for s in tags.split(",")] + TagManager.tag_item(item, owner, tag_titles, visibility) + + def run(self): + uris = set() + filename = self.metadata["file"] + with zipfile.ZipFile(filename, "r") as zipref: + with tempfile.TemporaryDirectory() as tmpdirname: + logger.debug(f"Extracting {filename} to {tmpdirname}") + zipref.extractall(tmpdirname) + with open(tmpdirname + "/reviews.csv") as f: + reader = csv.DictReader(f, delimiter=",") + for row in reader: + uris.add(row["Letterboxd URI"]) + self.mark( + row["Letterboxd URI"], + ShelfType.COMPLETE, + row["Watched Date"], + row["Review"], + row["Rating"], + row["Tags"], + ) + with open(tmpdirname + "/ratings.csv") as f: + reader = csv.DictReader(f, delimiter=",") + for row in reader: + if row["Letterboxd URI"] in uris: + continue + uris.add(row["Letterboxd URI"]) + self.mark( + row["Letterboxd URI"], + ShelfType.COMPLETE, + row["Date"], + row["Rating"], + ) + with open(tmpdirname + "/watched.csv") as f: + reader = csv.DictReader(f, delimiter=",") + for row in reader: + if row["Letterboxd URI"] in uris: + continue + uris.add(row["Letterboxd URI"]) + self.mark( + row["Letterboxd URI"], + ShelfType.COMPLETE, + row["Date"], + ) + with open(tmpdirname + "/watchlist.csv") as f: + reader = csv.DictReader(f, delimiter=",") + for row in reader: + if row["Letterboxd URI"] in uris: + continue + uris.add(row["Letterboxd URI"]) + self.mark( + row["Letterboxd URI"], + ShelfType.WISHLIST, + row["Date"], + ) diff --git a/users/migrations/0019_task.py b/users/migrations/0019_task.py new file mode 100644 index 00000000..6c8b4b31 --- /dev/null +++ b/users/migrations/0019_task.py @@ -0,0 +1,59 @@ +# Generated by Django 4.2.8 on 2024-01-08 03:37 + +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("users", "0018_apidentity_anonymous_viewable"), + ] + + operations = [ + migrations.CreateModel( + name="Task", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("type", models.CharField(max_length=20)), + ( + "state", + models.IntegerField( + choices=[ + (0, "Pending"), + (1, "Started"), + (2, "Complete"), + (3, "Failed"), + ], + default=0, + ), + ), + ("metadata", models.JSONField(default=dict)), + ("message", models.TextField(default="")), + ("created_time", models.DateTimeField(auto_now_add=True)), + ("edited_time", models.DateTimeField(auto_now=True)), + ( + "user", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to=settings.AUTH_USER_MODEL, + ), + ), + ], + options={ + "indexes": [ + models.Index( + fields=["user", "type"], name="users_task_user_id_e29f34_idx" + ) + ], + }, + ), + ] diff --git a/users/models/__init__.py b/users/models/__init__.py index 14d42a2e..94a8afef 100644 --- a/users/models/__init__.py +++ b/users/models/__init__.py @@ -1,4 +1,5 @@ from .apidentity import APIdentity from .preference import Preference from .report import Report +from .task import Task from .user import User diff --git a/users/models/task.py b/users/models/task.py new file mode 100644 index 00000000..c69e4b2c --- /dev/null +++ b/users/models/task.py @@ -0,0 +1,85 @@ +import hashlib +import re +from functools import cached_property +from operator import index + +from auditlog.context import set_actor +from django.conf import settings +from django.contrib.auth.models import AbstractUser +from django.core import validators +from django.core.exceptions import ValidationError +from django.core.serializers.json import DjangoJSONEncoder +from django.db import models +from django.db.models import F, Q, Value +from django.db.models.functions import Concat, Lower +from django.templatetags.static import static +from django.urls import reverse +from django.utils import timezone +from django.utils.deconstruct import deconstructible +from django.utils.translation import gettext_lazy as _ +from loguru import logger + +from management.models import Announcement +from mastodon.api import * +from takahe.utils import Takahe + +from .user import User + + +class Task(models.Model): + TaskQueue = "default" + TaskType = "unknown" + DefaultMetadata = {} + + class States(models.IntegerChoices): + pending = 0, "Pending" + started = 1, "Started" + complete = 2, "Complete" + failed = 3, "Failed" + + user = models.ForeignKey(User, models.CASCADE, null=False) + type = models.CharField(max_length=20, null=False) + state = models.IntegerField(choices=States.choices, default=States.pending) + metadata = models.JSONField(null=False, default=dict) + message = models.TextField(default="") + created_time = models.DateTimeField(auto_now_add=True) + edited_time = models.DateTimeField(auto_now=True) + + class Meta: + indexes = [models.Index(fields=["user", "type"])] + + @property + def job_id(self): + if not self.pk: + raise ValueError("task not saved yet") + return f"{self.type}-{self.user}-{self.pk}" + + def __str__(self): + return self.job_id + + @classmethod + def enqueue(cls, user: User, **kwargs) -> "Task": + d = cls.DefaultMetadata.copy() + d.update(kwargs) + t = cls.objects.create(user=user, type=cls.TaskType, metadata=d) + django_rq.get_queue(cls.TaskQueue).enqueue(cls._run, t.pk, job_id=t.job_id) + return t + + @classmethod + def _run(cls, task_id: int): + task = cls.objects.get(pk=task_id) + try: + task.state = cls.States.started + task.save(update_fields=["state"]) + with set_actor(task.user): + task.run() + task.state = cls.States.complete + task.save(update_fields=["state"]) + except Exception as e: + logger.error(f"Task {task} Exception {e}") + task.message = "Error occured." + task.state = cls.States.failed + task.save(update_fields=["state", "message"]) + + def run(self) -> None: + raise NotImplemented