import letterboxd

This commit is contained in:
Your Name 2024-01-07 22:39:30 -05:00 committed by Henri Dickson
parent 3a47eb1831
commit ae9fb6f3c6
4 changed files with 280 additions and 0 deletions

View file

@ -0,0 +1,135 @@
import csv
import re
import tempfile
import zipfile
from datetime import datetime
from django.utils import timezone
from django.utils.dateparse import parse_datetime
from django.utils.timezone import make_aware
from loguru import logger
from user_messages import api as msg
from catalog.common import *
from catalog.common.downloaders import *
from catalog.models import *
from journal.models import *
from users.models import *
class LetterboxdImporter(Task):
TaskQueue = "import"
TaskType = "import.letterboxd"
DefaultMetadata = {
"total": 0,
"processed": 0,
"skipped": 0,
"imported": 0,
"failed": 0,
"visibility": 0,
"file": None,
}
def get_item_by_url(self, url):
try:
h = BasicDownloader(url).html() # type:ignore
tt = h.xpath("//body/@data-tmdb-type")[0].strip()
ti = h.xpath("//body/@data-tmdb-type")[0].strip()
if tt != "movie":
logger.error(f"Unknown TMDB type {tt} / {ti}")
return None
site = SiteManager.get_site_by_id(IdType.TMDB_Movie, ti)
if not site:
return None
site.get_resource_ready()
return site.get_item()
except Exception as e:
logger.error(f"Unable to parse {url} {e}")
def mark(self, url, shelf_type, date, rating=None, review=None, tags=None):
item = self.get_item_by_url(url)
if not item:
logger.error(f"Unable to get item for {url}")
return
owner = self.user.identity
mark = Mark(owner, item)
if (
mark.shelf_type == shelf_type
or mark.shelf_type == ShelfType.COMPLETE
or (
mark.shelf_type == ShelfType.PROGRESS
and shelf_type == ShelfType.WISHLIST
)
):
return
visibility = self.metadata["visibility"]
shelf_time_offset = {
ShelfType.WISHLIST: " 20:00:00",
ShelfType.PROGRESS: " 21:00:00",
ShelfType.COMPLETE: " 22:00:00",
}
dt = parse_datetime(date + shelf_time_offset[shelf_type])
mark.update(
shelf_type,
comment_text=review or None,
rating_grade=round(rating * 2) if rating else None,
visibility=visibility,
created_time=dt,
)
if tags:
tag_titles = [s.strip() for s in tags.split(",")]
TagManager.tag_item(item, owner, tag_titles, visibility)
def run(self):
uris = set()
filename = self.metadata["file"]
with zipfile.ZipFile(filename, "r") as zipref:
with tempfile.TemporaryDirectory() as tmpdirname:
logger.debug(f"Extracting {filename} to {tmpdirname}")
zipref.extractall(tmpdirname)
with open(tmpdirname + "/reviews.csv") as f:
reader = csv.DictReader(f, delimiter=",")
for row in reader:
uris.add(row["Letterboxd URI"])
self.mark(
row["Letterboxd URI"],
ShelfType.COMPLETE,
row["Watched Date"],
row["Review"],
row["Rating"],
row["Tags"],
)
with open(tmpdirname + "/ratings.csv") as f:
reader = csv.DictReader(f, delimiter=",")
for row in reader:
if row["Letterboxd URI"] in uris:
continue
uris.add(row["Letterboxd URI"])
self.mark(
row["Letterboxd URI"],
ShelfType.COMPLETE,
row["Date"],
row["Rating"],
)
with open(tmpdirname + "/watched.csv") as f:
reader = csv.DictReader(f, delimiter=",")
for row in reader:
if row["Letterboxd URI"] in uris:
continue
uris.add(row["Letterboxd URI"])
self.mark(
row["Letterboxd URI"],
ShelfType.COMPLETE,
row["Date"],
)
with open(tmpdirname + "/watchlist.csv") as f:
reader = csv.DictReader(f, delimiter=",")
for row in reader:
if row["Letterboxd URI"] in uris:
continue
uris.add(row["Letterboxd URI"])
self.mark(
row["Letterboxd URI"],
ShelfType.WISHLIST,
row["Date"],
)

View file

@ -0,0 +1,59 @@
# Generated by Django 4.2.8 on 2024-01-08 03:37
import django.db.models.deletion
from django.conf import settings
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("users", "0018_apidentity_anonymous_viewable"),
]
operations = [
migrations.CreateModel(
name="Task",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("type", models.CharField(max_length=20)),
(
"state",
models.IntegerField(
choices=[
(0, "Pending"),
(1, "Started"),
(2, "Complete"),
(3, "Failed"),
],
default=0,
),
),
("metadata", models.JSONField(default=dict)),
("message", models.TextField(default="")),
("created_time", models.DateTimeField(auto_now_add=True)),
("edited_time", models.DateTimeField(auto_now=True)),
(
"user",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to=settings.AUTH_USER_MODEL,
),
),
],
options={
"indexes": [
models.Index(
fields=["user", "type"], name="users_task_user_id_e29f34_idx"
)
],
},
),
]

View file

@ -1,4 +1,5 @@
from .apidentity import APIdentity
from .preference import Preference
from .report import Report
from .task import Task
from .user import User

85
users/models/task.py Normal file
View file

@ -0,0 +1,85 @@
import hashlib
import re
from functools import cached_property
from operator import index
from auditlog.context import set_actor
from django.conf import settings
from django.contrib.auth.models import AbstractUser
from django.core import validators
from django.core.exceptions import ValidationError
from django.core.serializers.json import DjangoJSONEncoder
from django.db import models
from django.db.models import F, Q, Value
from django.db.models.functions import Concat, Lower
from django.templatetags.static import static
from django.urls import reverse
from django.utils import timezone
from django.utils.deconstruct import deconstructible
from django.utils.translation import gettext_lazy as _
from loguru import logger
from management.models import Announcement
from mastodon.api import *
from takahe.utils import Takahe
from .user import User
class Task(models.Model):
TaskQueue = "default"
TaskType = "unknown"
DefaultMetadata = {}
class States(models.IntegerChoices):
pending = 0, "Pending"
started = 1, "Started"
complete = 2, "Complete"
failed = 3, "Failed"
user = models.ForeignKey(User, models.CASCADE, null=False)
type = models.CharField(max_length=20, null=False)
state = models.IntegerField(choices=States.choices, default=States.pending)
metadata = models.JSONField(null=False, default=dict)
message = models.TextField(default="")
created_time = models.DateTimeField(auto_now_add=True)
edited_time = models.DateTimeField(auto_now=True)
class Meta:
indexes = [models.Index(fields=["user", "type"])]
@property
def job_id(self):
if not self.pk:
raise ValueError("task not saved yet")
return f"{self.type}-{self.user}-{self.pk}"
def __str__(self):
return self.job_id
@classmethod
def enqueue(cls, user: User, **kwargs) -> "Task":
d = cls.DefaultMetadata.copy()
d.update(kwargs)
t = cls.objects.create(user=user, type=cls.TaskType, metadata=d)
django_rq.get_queue(cls.TaskQueue).enqueue(cls._run, t.pk, job_id=t.job_id)
return t
@classmethod
def _run(cls, task_id: int):
task = cls.objects.get(pk=task_id)
try:
task.state = cls.States.started
task.save(update_fields=["state"])
with set_actor(task.user):
task.run()
task.state = cls.States.complete
task.save(update_fields=["state"])
except Exception as e:
logger.error(f"Task {task} Exception {e}")
task.message = "Error occured."
task.state = cls.States.failed
task.save(update_fields=["state", "message"])
def run(self) -> None:
raise NotImplemented