From 80874804c88e888696ece6c1f75a93975cef238a Mon Sep 17 00:00:00 2001 From: Her Email Date: Sun, 26 Nov 2023 17:23:53 -0500 Subject: [PATCH] streamline migration from 0.8.x #397 --- boofilsic/settings.py | 4 + catalog/sites/discogs.py | 2 +- catalog/sites/fedi.py | 5 +- catalog/sites/rss.py | 2 +- common/management/commands/cron.py | 6 +- common/management/commands/setup.py | 1 + common/models.py | 10 +-- common/setup.py | 16 ++-- compose.yml | 6 +- ...20_shelflogentry_unique_shelf_log_entry.py | 14 +++ mastodon/api.py | 2 +- takahe/management/commands/backfill_takahe.py | 88 +++++++++++++++++++ takahe/models.py | 17 ++-- takahe/utils.py | 8 +- .../management/commands/generate_usernames.py | 77 ++++++++++++++++ users/migrations/0013_init_identity.py | 37 ++++---- ...nce_mastodon_skip_relationship_and_more.py | 19 ++-- users/models/user.py | 33 ++++--- 18 files changed, 279 insertions(+), 68 deletions(-) create mode 100644 takahe/management/commands/backfill_takahe.py create mode 100644 users/management/commands/generate_usernames.py diff --git a/boofilsic/settings.py b/boofilsic/settings.py index 3961f894..74efda65 100644 --- a/boofilsic/settings.py +++ b/boofilsic/settings.py @@ -192,6 +192,10 @@ REDIRECT_URIS = env( # Timeout of requests to Mastodon, in seconds MASTODON_TIMEOUT = env("NEODB_LOGIN_MASTODON_TIMEOUT", default=10) # type: ignore +TAKAHE_REMOTE_TIMEOUT = MASTODON_TIMEOUT + +NEODB_USER_AGENT = f"NeoDB/{NEODB_VERSION} (+{SITE_INFO.get('site_url', 'undefined')})" +TAKAHE_USER_AGENT = NEODB_USER_AGENT # Scope when creating Mastodon apps # Alternatively, use "read write follow" to avoid re-authorize when migrating to a future version with more features diff --git a/catalog/sites/discogs.py b/catalog/sites/discogs.py index 1f1a6c88..7593c531 100644 --- a/catalog/sites/discogs.py +++ b/catalog/sites/discogs.py @@ -135,7 +135,7 @@ class DiscogsMaster(AbstractSite): def get_discogs_data(data_type: str, discogs_id): if data_type not in ("releases", "masters"): raise ValueError("data_type can only be in ('releases' or masters')") - user_agent_string = "Neodb/0.1" + user_agent_string = settings.NEODB_USER_AGENT user_token = settings.DISCOGS_API_KEY headers = { "User-Agent": user_agent_string, diff --git a/catalog/sites/fedi.py b/catalog/sites/fedi.py index a2ee5632..77c99850 100644 --- a/catalog/sites/fedi.py +++ b/catalog/sites/fedi.py @@ -32,7 +32,10 @@ class FediverseInstance(AbstractSite): "Performance": Performance, "PerformanceProduction": PerformanceProduction, } - request_header = {"User-Agent": "NeoDB/0.5", "Accept": "application/activity+json"} + request_header = { + "User-Agent": settings.NEODB_USER_AGENT, + "Accept": "application/activity+json", + } @classmethod def id_to_url(cls, id_value): diff --git a/catalog/sites/rss.py b/catalog/sites/rss.py index 11dba3c3..4933dac8 100644 --- a/catalog/sites/rss.py +++ b/catalog/sites/rss.py @@ -41,7 +41,7 @@ class RSS(AbstractSite): feed = pickle.load(open(_local_response_path + get_mock_file(url), "rb")) else: req = urllib.request.Request(url) - req.add_header("User-Agent", "NeoDB/0.5") + req.add_header("User-Agent", settings.NEODB_USER_AGENT) try: feed = podcastparser.parse(url, urllib.request.urlopen(req, timeout=3)) except: diff --git a/common/management/commands/cron.py b/common/management/commands/cron.py index d84f9828..88a1a7bb 100644 --- a/common/management/commands/cron.py +++ b/common/management/commands/cron.py @@ -41,10 +41,10 @@ class Command(BaseCommand): def handle(self, *args, **options): if options["cancel"]: - JobManager.cancel() + JobManager.cancel_all() if options["schedule"]: - JobManager.cancel() # cancel previously scheduled jobs if any - JobManager.schedule() + JobManager.cancel_all() # cancel previously scheduled jobs if any + JobManager.schedule_all() if options["runonce"]: for job_id in options["runonce"]: run = JobManager.run(job_id) diff --git a/common/management/commands/setup.py b/common/management/commands/setup.py index 6f3a23fc..cfc57c12 100644 --- a/common/management/commands/setup.py +++ b/common/management/commands/setup.py @@ -18,6 +18,7 @@ class Command(BaseCommand): domain=domain, local=True, service_domain=service_domain, + state="updated", notes="NeoDB", nodeinfo={}, ) diff --git a/common/models.py b/common/models.py index 7ee65b4f..98c7b012 100644 --- a/common/models.py +++ b/common/models.py @@ -53,12 +53,12 @@ class JobManager: return target @classmethod - def schedule(cls): + def schedule_all(cls): for j in cls.registry: j.schedule() @classmethod - def cancel(cls): + def cancel_all(cls): for j in cls.registry: j.cancel() @@ -77,11 +77,11 @@ class JobManager: return registry.get_job_ids() @classmethod - def schedule_all(cls): + def reschedule_all(cls): # TODO rewrite lazy import in a better way from catalog.jobs import DiscoverGenerator, PodcastUpdater from mastodon.jobs import MastodonSiteCheck from users.jobs import MastodonUserSync - cls.cancel() - cls.schedule() + cls.cancel_all() + cls.schedule_all() diff --git a/common/setup.py b/common/setup.py index ca830d3b..6ed36de8 100644 --- a/common/setup.py +++ b/common/setup.py @@ -20,12 +20,15 @@ class Setup: """ def create_site(self, domain, service_domain): - TakaheDomain.objects.create( + TakaheDomain.objects.update_or_create( domain=domain, - local=True, - service_domain=service_domain, - notes="NeoDB", - nodeinfo={}, + defaults={ + "local": True, + "service_domain": service_domain, + "notes": "NeoDB", + "nodeinfo": {}, + "state": "updated", + }, ) TakaheConfig.objects.update_or_create( key="public_timeline", @@ -156,8 +159,9 @@ class Setup: # Register cron jobs if not yet if settings.DISABLE_CRON: logger.info("Cron jobs are disabled.") + JobManager.cancel_all() else: - JobManager.schedule_all() + JobManager.reschedule_all() logger.info("Finished post-migration setup.") diff --git a/compose.yml b/compose.yml index ca74f737..91c90e8a 100644 --- a/compose.yml +++ b/compose.yml @@ -39,6 +39,8 @@ x-shared: NEODB_SEARCH_URL: ${NEODB_SEARCH_URL:-typesense://user:eggplant@typesense:8108/catalog} NEODB_EMAIL_URL: NEODB_EMAIL_FROM: no-reply@${NEODB_SITE_DOMAIN} + NEODB_FANOUT_LIMIT_DAYS: + TAKAHE_FANOUT_LIMIT_DAYS: NEODB_DOWNLOADER_PROXY_LIST: NEODB_DOWNLOADER_BACKUP_PROXY: NEODB_DOWNLOADER_SAVE_DIR: @@ -53,8 +55,8 @@ x-shared: TAKAHE_MEDIA_BACKEND: local://www/media/ TAKAHE_MEDIA_ROOT: /www/media TAKAHE_USE_PROXY_HEADERS: true - TAKAHE_STATOR_CONCURRENCY: 4 - TAKAHE_STATOR_CONCURRENCY_PER_MODEL: 2 + TAKAHE_STATOR_CONCURRENCY: ${TAKAHE_STATOR_CONCURRENCY:-4} + TAKAHE_STATOR_CONCURRENCY_PER_MODEL: ${TAKAHE_STATOR_CONCURRENCY_PER_MODEL:-2} TAKAHE_DEBUG: ${NEODB_DEBUG:-True} TAKAHE_VENV: /takahe-venv SPOTIFY_API_KEY: diff --git a/journal/migrations/0020_shelflogentry_unique_shelf_log_entry.py b/journal/migrations/0020_shelflogentry_unique_shelf_log_entry.py index ecd7ab99..86256a00 100644 --- a/journal/migrations/0020_shelflogentry_unique_shelf_log_entry.py +++ b/journal/migrations/0020_shelflogentry_unique_shelf_log_entry.py @@ -2,6 +2,14 @@ from django.db import migrations, models +_sql = """DELETE +FROM journal_shelflogentry a USING journal_shelflogentry b +WHERE a.ctid < b.ctid + AND a.item_id=b.item_id + AND a.owner_id=b.owner_id + AND a.timestamp=b.timestamp + AND a.shelf_type=b.shelf_type""" + class Migration(migrations.Migration): @@ -10,6 +18,12 @@ class Migration(migrations.Migration): ] operations = [ + migrations.RunSQL("SET CONSTRAINTS ALL IMMEDIATE;"), + migrations.RunSQL( + sql=_sql, + reverse_sql=migrations.RunSQL.noop, + ), + migrations.RunSQL("SET CONSTRAINTS ALL DEFERRED;"), migrations.AddConstraint( model_name="shelflogentry", constraint=models.UniqueConstraint( diff --git a/mastodon/api.py b/mastodon/api.py index e378272a..02b98b28 100644 --- a/mastodon/api.py +++ b/mastodon/api.py @@ -53,7 +53,7 @@ API_CREATE_APP = "/api/v1/apps" # GET API_SEARCH = "/api/v2/search" -USER_AGENT = f"NeoDB/{settings.NEODB_VERSION} (+{settings.SITE_INFO.get('site_url', 'undefined')})" +USER_AGENT = settings.NEODB_USER_AGENT get = functools.partial(requests.get, timeout=settings.MASTODON_TIMEOUT) put = functools.partial(requests.put, timeout=settings.MASTODON_TIMEOUT) diff --git a/takahe/management/commands/backfill_takahe.py b/takahe/management/commands/backfill_takahe.py new file mode 100644 index 00000000..956caa3f --- /dev/null +++ b/takahe/management/commands/backfill_takahe.py @@ -0,0 +1,88 @@ +from django.conf import settings +from django.contrib.contenttypes.models import ContentType +from django.core.management.base import BaseCommand +from django.db.models import Count, F +from loguru import logger +from tqdm import tqdm + +from catalog.common import * +from catalog.common.models import * +from catalog.models import * +from journal.models import * +from takahe.utils import * +from users.models import APIdentity +from users.models import User as NeoUser + + +def content_type_id(cls): + return ContentType.objects.get(app_label="journal", model=cls.__name__.lower()).pk + + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument( + "--verbose", + action="store_true", + ) + parser.add_argument( + "--post", + action="store_true", + ) + parser.add_argument( + "--like", + action="store_true", + ) + parser.add_argument( + "--post-new", + action="store_true", + ) + parser.add_argument("--start", default=0, action="store") + parser.add_argument("--count", default=0, action="store") + + def process_post(self): + logger.info(f"Processing posts...") + qs = Piece.objects.filter( + polymorphic_ctype__in=[ + content_type_id(ShelfMember), + content_type_id(Comment), + content_type_id(Review), + ] + ).order_by("id") + if self.starting_id: + qs = qs.filter(id__gte=self.starting_id) + tracker = tqdm(qs.iterator(), total=self.count_est or qs.count()) + for p in tracker: + tracker.set_postfix_str(f"{p.id}") + if p.__class__ == ShelfMember: + mark = Mark(p.owner, p.item) + Takahe.post_mark(mark, self.post_new) + elif p.__class__ == Comment: + if p.item.__class__ in [PodcastEpisode, TVEpisode]: + Takahe.post_comment(p, self.post_new) + elif p.__class__ == Review: + Takahe.post_review(p, self.post_new) + + def process_like(self): + logger.info(f"Processing likes...") + qs = Like.objects.order_by("id") + tracker = tqdm(qs) + for like in tracker: + post_id = like.target.latest_post_id + if post_id: + Takahe.like_post(post_id, like.owner.pk) + else: + logger.warning(f"Post not found for like {like.id}") + + def handle(self, *args, **options): + self.verbose = options["verbose"] + self.post_new = options["post_new"] + self.starting_id = int(options["start"]) + self.count_est = int(options["count"]) + + if options["post"]: + self.process_post() + + if options["like"]: + self.process_like() + + self.stdout.write(self.style.SUCCESS(f"Done.")) diff --git a/takahe/models.py b/takahe/models.py index 1be80dd8..3221ad8e 100644 --- a/takahe/models.py +++ b/takahe/models.py @@ -526,7 +526,7 @@ class Identity(models.Model): based on probing host-meta. """ with httpx.Client( - timeout=settings.SETUP.REMOTE_TIMEOUT, + timeout=settings.TAKAHE_REMOTE_TIMEOUT, headers={"User-Agent": settings.TAKAHE_USER_AGENT}, ) as client: try: @@ -565,7 +565,7 @@ class Identity(models.Model): # Go make a Webfinger request with httpx.Client( - timeout=settings.SETUP.REMOTE_TIMEOUT, + timeout=settings.TAKAHE_REMOTE_TIMEOUT, headers={"User-Agent": settings.TAKAHE_USER_AGENT}, ) as client: try: @@ -1066,6 +1066,7 @@ class Post(models.Model): attachments: list | None = None, attachment_attributes: list | None = None, type_data: dict | None = None, + edited: datetime.datetime | None = None, ): with transaction.atomic(): # Strip all HTML and apply linebreaks filter @@ -1099,6 +1100,12 @@ class Post(models.Model): self.state_changed = timezone.now() self.state_next_attempt = None self.state_locked_until = None + if edited and edited < timezone.now(): + self.published = edited + if timezone.now() - edited > datetime.timedelta( + days=settings.FANOUT_LIMIT_DAYS + ): + self.state = "edited_fanned_out" # add post quietly if it's old self.save() @classmethod @@ -1109,13 +1116,13 @@ class Post(models.Model): handle = handle.lower() if "@" in handle: username, domain = handle.split("@", 1) + local = False else: username = handle domain = author.domain_id + local = author.local identity = Identity.by_username_and_domain( - username=username, - domain=domain, - fetch=True, + username=username, domain=domain, fetch=True, local=local ) if identity is not None: mentions.add(identity) diff --git a/takahe/utils.py b/takahe/utils.py index 3b08adb8..9fdadcc1 100644 --- a/takahe/utils.py +++ b/takahe/utils.py @@ -366,7 +366,11 @@ class Takahe: raise ValueError(f"Cannot find post to reply: {reply_to_pk}") if post: post.edit_local( - pre_conetent, content, visibility=visibility, type_data=data + pre_conetent, + content, + visibility=visibility, + type_data=data, + edited=post_time, ) else: post = Post.create_local( @@ -435,7 +439,7 @@ class Takahe: comment.visibility, user.preference.mastodon_publish_public ) existing_post = None if share_as_new_post else comment.latest_post - post = Takahe.post( # TODO post as Article? + post = Takahe.post( comment.owner.pk, pre_conetent, content, diff --git a/users/management/commands/generate_usernames.py b/users/management/commands/generate_usernames.py new file mode 100644 index 00000000..5165a8d8 --- /dev/null +++ b/users/management/commands/generate_usernames.py @@ -0,0 +1,77 @@ +from datetime import timedelta + +from django.apps import apps +from django.core.management.base import BaseCommand +from django.utils import timezone +from tqdm import tqdm + +from users.models.user import _RESERVED_USERNAMES + +User = apps.get_model("users", "User") +_RESERVED_USERNAMES = [ + "connect", + "oauth2_login", + "__", + "admin", + "api", + "me", +] + + +class Command(BaseCommand): + help = "Generate unique username" + + def process_users(self, users): + count = 0 + for user in users: + if not user.is_active: + un = f"-{user.pk}-" + else: + un = user.mastodon_username + if not un: + un = f"_{user.pk}" + if un.lower() in _RESERVED_USERNAMES: + un = f"__{un}" + if User.objects.filter(username__iexact=un).exists(): # type: ignore + un = f"{un}_{user.pk}" + print(f"{user} -> {un}") + user.username = un + user.save(update_fields=["username"]) + count += 1 + print(f"{count} users updated") + + def handle(self, *args, **options): + print("Processing active users") + # recent logged in users + proactive_users = User.objects.filter( # type: ignore + username__isnull=True, + is_active=True, + last_login__gt=timezone.now() - timedelta(days=30), + ).order_by("date_joined") + # users with mastodon token still active + active_users = ( + User.objects.filter( # type: ignore + username__isnull=True, + is_active=True, + ) + .exclude(mastodon_token="") + .order_by("date_joined") + ) + # users with mastodon handler still reachable + reachable_users = User.objects.filter( # type: ignore + username__isnull=True, + is_active=True, + mastodon_last_reachable__gt=timezone.now() - timedelta(days=7), + ).order_by("date_joined") + # all other users + users = User.objects.filter( # type: ignore + username__isnull=True, + ).order_by("date_joined") + print(f"{proactive_users.count()} proactive users") + self.process_users(proactive_users) + print(f"{active_users.count()} active users") + self.process_users(active_users) + print(f"{reachable_users.count()} reachable users") + self.process_users(reachable_users) + print(f"{users.count()} other users") + self.process_users(users) diff --git a/users/migrations/0013_init_identity.py b/users/migrations/0013_init_identity.py index 6488f41e..d052053a 100644 --- a/users/migrations/0013_init_identity.py +++ b/users/migrations/0013_init_identity.py @@ -1,7 +1,10 @@ # Generated by Django 4.2.4 on 2023-08-09 16:54 +from datetime import timedelta + from django.conf import settings from django.db import migrations, models, transaction +from django.utils import timezone from loguru import logger from tqdm import tqdm @@ -22,19 +25,17 @@ def init_domain(apps, schema_editor): "No users found, skip domain migration (if you are running initial migration for new site, pls ignore this)" ) return - d = TakaheDomain.objects.filter(domain=domain).first() - if not d: - logger.info(f"Creating takahe domain {domain}") - TakaheDomain.objects.create( - domain=domain, - local=True, - service_domain=service_domain, - notes="NeoDB", - nodeinfo={}, - ) - else: - logger.info(f"Takahe domain {domain} already exists") - + logger.info(f"Creating takahe domain {domain}") + TakaheDomain.objects.update_or_create( + domain=domain, + defaults={ + "local": True, + "service_domain": service_domain, + "notes": "NeoDB", + "nodeinfo": {}, + "state": "updated", + }, + ) TakaheConfig.objects.update_or_create( key="public_timeline", user=None, @@ -85,7 +86,9 @@ def init_identity(apps, schema_editor): local=True, username=username, domain_name=domain, - deleted=None if user.is_active else user.updated, + deleted=None + if user.is_active + else user.mastodon_last_reachable + timedelta(days=90), ) takahe_user = TakaheUser.objects.create( pk=user.pk, email=handler, admin=user.is_superuser @@ -93,12 +96,14 @@ def init_identity(apps, schema_editor): takahe_identity = TakaheIdentity.objects.create( pk=user.pk, actor_uri=f"https://{service_domain or domain}/@{username}@{domain}/", - profile_uri=user.url, + profile_uri=f"https://{service_domain or domain}/users/{username}/", username=username, domain=tdomain, name=username, local=True, - discoverable=not user.preference.no_anonymous_view, + discoverable=True, + state="updated", + state_next_attempt=timezone.now() + timedelta(days=365 * 99), ) takahe_identity.generate_keypair() takahe_user.identities.add(takahe_identity) diff --git a/users/migrations/0014_preference_mastodon_skip_relationship_and_more.py b/users/migrations/0014_preference_mastodon_skip_relationship_and_more.py index 3dce5286..14a8d631 100644 --- a/users/migrations/0014_preference_mastodon_skip_relationship_and_more.py +++ b/users/migrations/0014_preference_mastodon_skip_relationship_and_more.py @@ -6,9 +6,13 @@ from tqdm import tqdm def migrate_relationships(apps, schema_editor): - User = apps.get_model("users", "User") - APIdentity = apps.get_model("users", "APIdentity") - logger.info(f"Migrate user relationship") + # User = apps.get_model("users", "User") + # APIdentity = apps.get_model("users", "APIdentity") + from takahe.models import Block as TakaheBlock + from takahe.models import Follow as TakaheFollow + from users.models import APIdentity, User + + logger.info(f"Migrate local user relationship") for user in tqdm(User.objects.filter(is_active=True)): identity = APIdentity.objects.get(user=user) for target in user.local_following.all(): @@ -20,11 +24,12 @@ def migrate_relationships(apps, schema_editor): for target in user.local_muting.all(): target_identity = APIdentity.objects.get(user=target) identity.mute(target_identity) - user.sync_relationship() + logger.info(f"Migrate sync user relationship") for user in tqdm(User.objects.filter(is_active=True)): - identity = APIdentity.objects.get(user=user) - for target_identity in identity.follow_requesting_identities: - target_identity.accept_follow_request(identity) + user.sync_relationship() + logger.info(f"Update relationship states") + TakaheBlock.objects.all().update(state="sent") + TakaheFollow.objects.all().update(state="accepted") class Migration(migrations.Migration): diff --git a/users/models/user.py b/users/models/user.py index 899f5cf0..c543afd7 100644 --- a/users/models/user.py +++ b/users/models/user.py @@ -236,26 +236,23 @@ class User(AbstractUser): def sync_relationship(self): from .apidentity import APIdentity - for target in self.mastodon_followers: - t = target.split("@") - target_identity = APIdentity.objects.filter( - user__mastodon_username=t[0], user__mastodon_site=t[1] - ).first() - if target_identity and not self.identity.is_following(target_identity): + def get_identities(accts: list): + q = Q(pk__in=[]) + for acct in accts or []: + t = acct.split("@") if acct else [] + if len(t) == 2: + q = q | Q(mastodon_username=t[0], mastodon_site=t[1]) + users = User.objects.filter(is_active=True).filter(q) + return APIdentity.objects.filter(user__in=users) + + for target_identity in get_identities(self.mastodon_following): + if not self.identity.is_following(target_identity): self.identity.follow(target_identity) - for target in self.mastodon_blocks: - t = target.split("@") - target_identity = APIdentity.objects.filter( - user__mastodon_username=t[0], user__mastodon_site=t[1] - ).first() - if target_identity and not self.identity.is_blocking(target_identity): + for target_identity in get_identities(self.mastodon_blocks): + if not self.identity.is_blocking(target_identity): self.identity.block(target_identity) - for target in self.mastodon_mutes: - t = target.split("@") - target_identity = APIdentity.objects.filter( - user__mastodon_username=t[0], user__mastodon_site=t[1] - ).first() - if target_identity and not self.identity.is_muting(target_identity): + for target_identity in get_identities(self.mastodon_mutes): + if not self.identity.is_muting(target_identity): self.identity.mute(target_identity) def sync_identity(self):