import letterboxd
This commit is contained in:
parent
3a47eb1831
commit
ae9fb6f3c6
4 changed files with 280 additions and 0 deletions
135
journal/importers/letterboxd.py
Normal file
135
journal/importers/letterboxd.py
Normal file
|
@ -0,0 +1,135 @@
|
|||
import csv
|
||||
import re
|
||||
import tempfile
|
||||
import zipfile
|
||||
from datetime import datetime
|
||||
|
||||
from django.utils import timezone
|
||||
from django.utils.dateparse import parse_datetime
|
||||
from django.utils.timezone import make_aware
|
||||
from loguru import logger
|
||||
from user_messages import api as msg
|
||||
|
||||
from catalog.common import *
|
||||
from catalog.common.downloaders import *
|
||||
from catalog.models import *
|
||||
from journal.models import *
|
||||
from users.models import *
|
||||
|
||||
|
||||
class LetterboxdImporter(Task):
|
||||
TaskQueue = "import"
|
||||
TaskType = "import.letterboxd"
|
||||
DefaultMetadata = {
|
||||
"total": 0,
|
||||
"processed": 0,
|
||||
"skipped": 0,
|
||||
"imported": 0,
|
||||
"failed": 0,
|
||||
"visibility": 0,
|
||||
"file": None,
|
||||
}
|
||||
|
||||
def get_item_by_url(self, url):
|
||||
try:
|
||||
h = BasicDownloader(url).html() # type:ignore
|
||||
tt = h.xpath("//body/@data-tmdb-type")[0].strip()
|
||||
ti = h.xpath("//body/@data-tmdb-type")[0].strip()
|
||||
if tt != "movie":
|
||||
logger.error(f"Unknown TMDB type {tt} / {ti}")
|
||||
return None
|
||||
site = SiteManager.get_site_by_id(IdType.TMDB_Movie, ti)
|
||||
if not site:
|
||||
return None
|
||||
site.get_resource_ready()
|
||||
return site.get_item()
|
||||
except Exception as e:
|
||||
logger.error(f"Unable to parse {url} {e}")
|
||||
|
||||
def mark(self, url, shelf_type, date, rating=None, review=None, tags=None):
|
||||
item = self.get_item_by_url(url)
|
||||
if not item:
|
||||
logger.error(f"Unable to get item for {url}")
|
||||
return
|
||||
owner = self.user.identity
|
||||
mark = Mark(owner, item)
|
||||
if (
|
||||
mark.shelf_type == shelf_type
|
||||
or mark.shelf_type == ShelfType.COMPLETE
|
||||
or (
|
||||
mark.shelf_type == ShelfType.PROGRESS
|
||||
and shelf_type == ShelfType.WISHLIST
|
||||
)
|
||||
):
|
||||
return
|
||||
visibility = self.metadata["visibility"]
|
||||
shelf_time_offset = {
|
||||
ShelfType.WISHLIST: " 20:00:00",
|
||||
ShelfType.PROGRESS: " 21:00:00",
|
||||
ShelfType.COMPLETE: " 22:00:00",
|
||||
}
|
||||
dt = parse_datetime(date + shelf_time_offset[shelf_type])
|
||||
mark.update(
|
||||
shelf_type,
|
||||
comment_text=review or None,
|
||||
rating_grade=round(rating * 2) if rating else None,
|
||||
visibility=visibility,
|
||||
created_time=dt,
|
||||
)
|
||||
if tags:
|
||||
tag_titles = [s.strip() for s in tags.split(",")]
|
||||
TagManager.tag_item(item, owner, tag_titles, visibility)
|
||||
|
||||
def run(self):
|
||||
uris = set()
|
||||
filename = self.metadata["file"]
|
||||
with zipfile.ZipFile(filename, "r") as zipref:
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
logger.debug(f"Extracting {filename} to {tmpdirname}")
|
||||
zipref.extractall(tmpdirname)
|
||||
with open(tmpdirname + "/reviews.csv") as f:
|
||||
reader = csv.DictReader(f, delimiter=",")
|
||||
for row in reader:
|
||||
uris.add(row["Letterboxd URI"])
|
||||
self.mark(
|
||||
row["Letterboxd URI"],
|
||||
ShelfType.COMPLETE,
|
||||
row["Watched Date"],
|
||||
row["Review"],
|
||||
row["Rating"],
|
||||
row["Tags"],
|
||||
)
|
||||
with open(tmpdirname + "/ratings.csv") as f:
|
||||
reader = csv.DictReader(f, delimiter=",")
|
||||
for row in reader:
|
||||
if row["Letterboxd URI"] in uris:
|
||||
continue
|
||||
uris.add(row["Letterboxd URI"])
|
||||
self.mark(
|
||||
row["Letterboxd URI"],
|
||||
ShelfType.COMPLETE,
|
||||
row["Date"],
|
||||
row["Rating"],
|
||||
)
|
||||
with open(tmpdirname + "/watched.csv") as f:
|
||||
reader = csv.DictReader(f, delimiter=",")
|
||||
for row in reader:
|
||||
if row["Letterboxd URI"] in uris:
|
||||
continue
|
||||
uris.add(row["Letterboxd URI"])
|
||||
self.mark(
|
||||
row["Letterboxd URI"],
|
||||
ShelfType.COMPLETE,
|
||||
row["Date"],
|
||||
)
|
||||
with open(tmpdirname + "/watchlist.csv") as f:
|
||||
reader = csv.DictReader(f, delimiter=",")
|
||||
for row in reader:
|
||||
if row["Letterboxd URI"] in uris:
|
||||
continue
|
||||
uris.add(row["Letterboxd URI"])
|
||||
self.mark(
|
||||
row["Letterboxd URI"],
|
||||
ShelfType.WISHLIST,
|
||||
row["Date"],
|
||||
)
|
59
users/migrations/0019_task.py
Normal file
59
users/migrations/0019_task.py
Normal file
|
@ -0,0 +1,59 @@
|
|||
# Generated by Django 4.2.8 on 2024-01-08 03:37
|
||||
|
||||
import django.db.models.deletion
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("users", "0018_apidentity_anonymous_viewable"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name="Task",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.BigAutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
("type", models.CharField(max_length=20)),
|
||||
(
|
||||
"state",
|
||||
models.IntegerField(
|
||||
choices=[
|
||||
(0, "Pending"),
|
||||
(1, "Started"),
|
||||
(2, "Complete"),
|
||||
(3, "Failed"),
|
||||
],
|
||||
default=0,
|
||||
),
|
||||
),
|
||||
("metadata", models.JSONField(default=dict)),
|
||||
("message", models.TextField(default="")),
|
||||
("created_time", models.DateTimeField(auto_now_add=True)),
|
||||
("edited_time", models.DateTimeField(auto_now=True)),
|
||||
(
|
||||
"user",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
to=settings.AUTH_USER_MODEL,
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"indexes": [
|
||||
models.Index(
|
||||
fields=["user", "type"], name="users_task_user_id_e29f34_idx"
|
||||
)
|
||||
],
|
||||
},
|
||||
),
|
||||
]
|
|
@ -1,4 +1,5 @@
|
|||
from .apidentity import APIdentity
|
||||
from .preference import Preference
|
||||
from .report import Report
|
||||
from .task import Task
|
||||
from .user import User
|
||||
|
|
85
users/models/task.py
Normal file
85
users/models/task.py
Normal file
|
@ -0,0 +1,85 @@
|
|||
import hashlib
|
||||
import re
|
||||
from functools import cached_property
|
||||
from operator import index
|
||||
|
||||
from auditlog.context import set_actor
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import AbstractUser
|
||||
from django.core import validators
|
||||
from django.core.exceptions import ValidationError
|
||||
from django.core.serializers.json import DjangoJSONEncoder
|
||||
from django.db import models
|
||||
from django.db.models import F, Q, Value
|
||||
from django.db.models.functions import Concat, Lower
|
||||
from django.templatetags.static import static
|
||||
from django.urls import reverse
|
||||
from django.utils import timezone
|
||||
from django.utils.deconstruct import deconstructible
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
from loguru import logger
|
||||
|
||||
from management.models import Announcement
|
||||
from mastodon.api import *
|
||||
from takahe.utils import Takahe
|
||||
|
||||
from .user import User
|
||||
|
||||
|
||||
class Task(models.Model):
|
||||
TaskQueue = "default"
|
||||
TaskType = "unknown"
|
||||
DefaultMetadata = {}
|
||||
|
||||
class States(models.IntegerChoices):
|
||||
pending = 0, "Pending"
|
||||
started = 1, "Started"
|
||||
complete = 2, "Complete"
|
||||
failed = 3, "Failed"
|
||||
|
||||
user = models.ForeignKey(User, models.CASCADE, null=False)
|
||||
type = models.CharField(max_length=20, null=False)
|
||||
state = models.IntegerField(choices=States.choices, default=States.pending)
|
||||
metadata = models.JSONField(null=False, default=dict)
|
||||
message = models.TextField(default="")
|
||||
created_time = models.DateTimeField(auto_now_add=True)
|
||||
edited_time = models.DateTimeField(auto_now=True)
|
||||
|
||||
class Meta:
|
||||
indexes = [models.Index(fields=["user", "type"])]
|
||||
|
||||
@property
|
||||
def job_id(self):
|
||||
if not self.pk:
|
||||
raise ValueError("task not saved yet")
|
||||
return f"{self.type}-{self.user}-{self.pk}"
|
||||
|
||||
def __str__(self):
|
||||
return self.job_id
|
||||
|
||||
@classmethod
|
||||
def enqueue(cls, user: User, **kwargs) -> "Task":
|
||||
d = cls.DefaultMetadata.copy()
|
||||
d.update(kwargs)
|
||||
t = cls.objects.create(user=user, type=cls.TaskType, metadata=d)
|
||||
django_rq.get_queue(cls.TaskQueue).enqueue(cls._run, t.pk, job_id=t.job_id)
|
||||
return t
|
||||
|
||||
@classmethod
|
||||
def _run(cls, task_id: int):
|
||||
task = cls.objects.get(pk=task_id)
|
||||
try:
|
||||
task.state = cls.States.started
|
||||
task.save(update_fields=["state"])
|
||||
with set_actor(task.user):
|
||||
task.run()
|
||||
task.state = cls.States.complete
|
||||
task.save(update_fields=["state"])
|
||||
except Exception as e:
|
||||
logger.error(f"Task {task} Exception {e}")
|
||||
task.message = "Error occured."
|
||||
task.state = cls.States.failed
|
||||
task.save(update_fields=["state", "message"])
|
||||
|
||||
def run(self) -> None:
|
||||
raise NotImplemented
|
Loading…
Add table
Reference in a new issue