streamline migration from 0.8.x #397

This commit is contained in:
Her Email 2023-11-26 17:23:53 -05:00 committed by Henri Dickson
parent bec9f2f8c3
commit 80874804c8
18 changed files with 279 additions and 68 deletions

View file

@ -192,6 +192,10 @@ REDIRECT_URIS = env(
# Timeout of requests to Mastodon, in seconds # Timeout of requests to Mastodon, in seconds
MASTODON_TIMEOUT = env("NEODB_LOGIN_MASTODON_TIMEOUT", default=10) # type: ignore MASTODON_TIMEOUT = env("NEODB_LOGIN_MASTODON_TIMEOUT", default=10) # type: ignore
TAKAHE_REMOTE_TIMEOUT = MASTODON_TIMEOUT
NEODB_USER_AGENT = f"NeoDB/{NEODB_VERSION} (+{SITE_INFO.get('site_url', 'undefined')})"
TAKAHE_USER_AGENT = NEODB_USER_AGENT
# Scope when creating Mastodon apps # Scope when creating Mastodon apps
# Alternatively, use "read write follow" to avoid re-authorize when migrating to a future version with more features # Alternatively, use "read write follow" to avoid re-authorize when migrating to a future version with more features

View file

@ -135,7 +135,7 @@ class DiscogsMaster(AbstractSite):
def get_discogs_data(data_type: str, discogs_id): def get_discogs_data(data_type: str, discogs_id):
if data_type not in ("releases", "masters"): if data_type not in ("releases", "masters"):
raise ValueError("data_type can only be in ('releases' or masters')") raise ValueError("data_type can only be in ('releases' or masters')")
user_agent_string = "Neodb/0.1" user_agent_string = settings.NEODB_USER_AGENT
user_token = settings.DISCOGS_API_KEY user_token = settings.DISCOGS_API_KEY
headers = { headers = {
"User-Agent": user_agent_string, "User-Agent": user_agent_string,

View file

@ -32,7 +32,10 @@ class FediverseInstance(AbstractSite):
"Performance": Performance, "Performance": Performance,
"PerformanceProduction": PerformanceProduction, "PerformanceProduction": PerformanceProduction,
} }
request_header = {"User-Agent": "NeoDB/0.5", "Accept": "application/activity+json"} request_header = {
"User-Agent": settings.NEODB_USER_AGENT,
"Accept": "application/activity+json",
}
@classmethod @classmethod
def id_to_url(cls, id_value): def id_to_url(cls, id_value):

View file

@ -41,7 +41,7 @@ class RSS(AbstractSite):
feed = pickle.load(open(_local_response_path + get_mock_file(url), "rb")) feed = pickle.load(open(_local_response_path + get_mock_file(url), "rb"))
else: else:
req = urllib.request.Request(url) req = urllib.request.Request(url)
req.add_header("User-Agent", "NeoDB/0.5") req.add_header("User-Agent", settings.NEODB_USER_AGENT)
try: try:
feed = podcastparser.parse(url, urllib.request.urlopen(req, timeout=3)) feed = podcastparser.parse(url, urllib.request.urlopen(req, timeout=3))
except: except:

View file

@ -41,10 +41,10 @@ class Command(BaseCommand):
def handle(self, *args, **options): def handle(self, *args, **options):
if options["cancel"]: if options["cancel"]:
JobManager.cancel() JobManager.cancel_all()
if options["schedule"]: if options["schedule"]:
JobManager.cancel() # cancel previously scheduled jobs if any JobManager.cancel_all() # cancel previously scheduled jobs if any
JobManager.schedule() JobManager.schedule_all()
if options["runonce"]: if options["runonce"]:
for job_id in options["runonce"]: for job_id in options["runonce"]:
run = JobManager.run(job_id) run = JobManager.run(job_id)

View file

@ -18,6 +18,7 @@ class Command(BaseCommand):
domain=domain, domain=domain,
local=True, local=True,
service_domain=service_domain, service_domain=service_domain,
state="updated",
notes="NeoDB", notes="NeoDB",
nodeinfo={}, nodeinfo={},
) )

View file

@ -53,12 +53,12 @@ class JobManager:
return target return target
@classmethod @classmethod
def schedule(cls): def schedule_all(cls):
for j in cls.registry: for j in cls.registry:
j.schedule() j.schedule()
@classmethod @classmethod
def cancel(cls): def cancel_all(cls):
for j in cls.registry: for j in cls.registry:
j.cancel() j.cancel()
@ -77,11 +77,11 @@ class JobManager:
return registry.get_job_ids() return registry.get_job_ids()
@classmethod @classmethod
def schedule_all(cls): def reschedule_all(cls):
# TODO rewrite lazy import in a better way # TODO rewrite lazy import in a better way
from catalog.jobs import DiscoverGenerator, PodcastUpdater from catalog.jobs import DiscoverGenerator, PodcastUpdater
from mastodon.jobs import MastodonSiteCheck from mastodon.jobs import MastodonSiteCheck
from users.jobs import MastodonUserSync from users.jobs import MastodonUserSync
cls.cancel() cls.cancel_all()
cls.schedule() cls.schedule_all()

View file

@ -20,12 +20,15 @@ class Setup:
""" """
def create_site(self, domain, service_domain): def create_site(self, domain, service_domain):
TakaheDomain.objects.create( TakaheDomain.objects.update_or_create(
domain=domain, domain=domain,
local=True, defaults={
service_domain=service_domain, "local": True,
notes="NeoDB", "service_domain": service_domain,
nodeinfo={}, "notes": "NeoDB",
"nodeinfo": {},
"state": "updated",
},
) )
TakaheConfig.objects.update_or_create( TakaheConfig.objects.update_or_create(
key="public_timeline", key="public_timeline",
@ -156,8 +159,9 @@ class Setup:
# Register cron jobs if not yet # Register cron jobs if not yet
if settings.DISABLE_CRON: if settings.DISABLE_CRON:
logger.info("Cron jobs are disabled.") logger.info("Cron jobs are disabled.")
JobManager.cancel_all()
else: else:
JobManager.schedule_all() JobManager.reschedule_all()
logger.info("Finished post-migration setup.") logger.info("Finished post-migration setup.")

View file

@ -39,6 +39,8 @@ x-shared:
NEODB_SEARCH_URL: ${NEODB_SEARCH_URL:-typesense://user:eggplant@typesense:8108/catalog} NEODB_SEARCH_URL: ${NEODB_SEARCH_URL:-typesense://user:eggplant@typesense:8108/catalog}
NEODB_EMAIL_URL: NEODB_EMAIL_URL:
NEODB_EMAIL_FROM: no-reply@${NEODB_SITE_DOMAIN} NEODB_EMAIL_FROM: no-reply@${NEODB_SITE_DOMAIN}
NEODB_FANOUT_LIMIT_DAYS:
TAKAHE_FANOUT_LIMIT_DAYS:
NEODB_DOWNLOADER_PROXY_LIST: NEODB_DOWNLOADER_PROXY_LIST:
NEODB_DOWNLOADER_BACKUP_PROXY: NEODB_DOWNLOADER_BACKUP_PROXY:
NEODB_DOWNLOADER_SAVE_DIR: NEODB_DOWNLOADER_SAVE_DIR:
@ -53,8 +55,8 @@ x-shared:
TAKAHE_MEDIA_BACKEND: local://www/media/ TAKAHE_MEDIA_BACKEND: local://www/media/
TAKAHE_MEDIA_ROOT: /www/media TAKAHE_MEDIA_ROOT: /www/media
TAKAHE_USE_PROXY_HEADERS: true TAKAHE_USE_PROXY_HEADERS: true
TAKAHE_STATOR_CONCURRENCY: 4 TAKAHE_STATOR_CONCURRENCY: ${TAKAHE_STATOR_CONCURRENCY:-4}
TAKAHE_STATOR_CONCURRENCY_PER_MODEL: 2 TAKAHE_STATOR_CONCURRENCY_PER_MODEL: ${TAKAHE_STATOR_CONCURRENCY_PER_MODEL:-2}
TAKAHE_DEBUG: ${NEODB_DEBUG:-True} TAKAHE_DEBUG: ${NEODB_DEBUG:-True}
TAKAHE_VENV: /takahe-venv TAKAHE_VENV: /takahe-venv
SPOTIFY_API_KEY: SPOTIFY_API_KEY:

View file

@ -2,6 +2,14 @@
from django.db import migrations, models from django.db import migrations, models
_sql = """DELETE
FROM journal_shelflogentry a USING journal_shelflogentry b
WHERE a.ctid < b.ctid
AND a.item_id=b.item_id
AND a.owner_id=b.owner_id
AND a.timestamp=b.timestamp
AND a.shelf_type=b.shelf_type"""
class Migration(migrations.Migration): class Migration(migrations.Migration):
@ -10,6 +18,12 @@ class Migration(migrations.Migration):
] ]
operations = [ operations = [
migrations.RunSQL("SET CONSTRAINTS ALL IMMEDIATE;"),
migrations.RunSQL(
sql=_sql,
reverse_sql=migrations.RunSQL.noop,
),
migrations.RunSQL("SET CONSTRAINTS ALL DEFERRED;"),
migrations.AddConstraint( migrations.AddConstraint(
model_name="shelflogentry", model_name="shelflogentry",
constraint=models.UniqueConstraint( constraint=models.UniqueConstraint(

View file

@ -53,7 +53,7 @@ API_CREATE_APP = "/api/v1/apps"
# GET # GET
API_SEARCH = "/api/v2/search" API_SEARCH = "/api/v2/search"
USER_AGENT = f"NeoDB/{settings.NEODB_VERSION} (+{settings.SITE_INFO.get('site_url', 'undefined')})" USER_AGENT = settings.NEODB_USER_AGENT
get = functools.partial(requests.get, timeout=settings.MASTODON_TIMEOUT) get = functools.partial(requests.get, timeout=settings.MASTODON_TIMEOUT)
put = functools.partial(requests.put, timeout=settings.MASTODON_TIMEOUT) put = functools.partial(requests.put, timeout=settings.MASTODON_TIMEOUT)

View file

@ -0,0 +1,88 @@
from django.conf import settings
from django.contrib.contenttypes.models import ContentType
from django.core.management.base import BaseCommand
from django.db.models import Count, F
from loguru import logger
from tqdm import tqdm
from catalog.common import *
from catalog.common.models import *
from catalog.models import *
from journal.models import *
from takahe.utils import *
from users.models import APIdentity
from users.models import User as NeoUser
def content_type_id(cls):
return ContentType.objects.get(app_label="journal", model=cls.__name__.lower()).pk
class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
"--verbose",
action="store_true",
)
parser.add_argument(
"--post",
action="store_true",
)
parser.add_argument(
"--like",
action="store_true",
)
parser.add_argument(
"--post-new",
action="store_true",
)
parser.add_argument("--start", default=0, action="store")
parser.add_argument("--count", default=0, action="store")
def process_post(self):
logger.info(f"Processing posts...")
qs = Piece.objects.filter(
polymorphic_ctype__in=[
content_type_id(ShelfMember),
content_type_id(Comment),
content_type_id(Review),
]
).order_by("id")
if self.starting_id:
qs = qs.filter(id__gte=self.starting_id)
tracker = tqdm(qs.iterator(), total=self.count_est or qs.count())
for p in tracker:
tracker.set_postfix_str(f"{p.id}")
if p.__class__ == ShelfMember:
mark = Mark(p.owner, p.item)
Takahe.post_mark(mark, self.post_new)
elif p.__class__ == Comment:
if p.item.__class__ in [PodcastEpisode, TVEpisode]:
Takahe.post_comment(p, self.post_new)
elif p.__class__ == Review:
Takahe.post_review(p, self.post_new)
def process_like(self):
logger.info(f"Processing likes...")
qs = Like.objects.order_by("id")
tracker = tqdm(qs)
for like in tracker:
post_id = like.target.latest_post_id
if post_id:
Takahe.like_post(post_id, like.owner.pk)
else:
logger.warning(f"Post not found for like {like.id}")
def handle(self, *args, **options):
self.verbose = options["verbose"]
self.post_new = options["post_new"]
self.starting_id = int(options["start"])
self.count_est = int(options["count"])
if options["post"]:
self.process_post()
if options["like"]:
self.process_like()
self.stdout.write(self.style.SUCCESS(f"Done."))

View file

@ -526,7 +526,7 @@ class Identity(models.Model):
based on probing host-meta. based on probing host-meta.
""" """
with httpx.Client( with httpx.Client(
timeout=settings.SETUP.REMOTE_TIMEOUT, timeout=settings.TAKAHE_REMOTE_TIMEOUT,
headers={"User-Agent": settings.TAKAHE_USER_AGENT}, headers={"User-Agent": settings.TAKAHE_USER_AGENT},
) as client: ) as client:
try: try:
@ -565,7 +565,7 @@ class Identity(models.Model):
# Go make a Webfinger request # Go make a Webfinger request
with httpx.Client( with httpx.Client(
timeout=settings.SETUP.REMOTE_TIMEOUT, timeout=settings.TAKAHE_REMOTE_TIMEOUT,
headers={"User-Agent": settings.TAKAHE_USER_AGENT}, headers={"User-Agent": settings.TAKAHE_USER_AGENT},
) as client: ) as client:
try: try:
@ -1066,6 +1066,7 @@ class Post(models.Model):
attachments: list | None = None, attachments: list | None = None,
attachment_attributes: list | None = None, attachment_attributes: list | None = None,
type_data: dict | None = None, type_data: dict | None = None,
edited: datetime.datetime | None = None,
): ):
with transaction.atomic(): with transaction.atomic():
# Strip all HTML and apply linebreaks filter # Strip all HTML and apply linebreaks filter
@ -1099,6 +1100,12 @@ class Post(models.Model):
self.state_changed = timezone.now() self.state_changed = timezone.now()
self.state_next_attempt = None self.state_next_attempt = None
self.state_locked_until = None self.state_locked_until = None
if edited and edited < timezone.now():
self.published = edited
if timezone.now() - edited > datetime.timedelta(
days=settings.FANOUT_LIMIT_DAYS
):
self.state = "edited_fanned_out" # add post quietly if it's old
self.save() self.save()
@classmethod @classmethod
@ -1109,13 +1116,13 @@ class Post(models.Model):
handle = handle.lower() handle = handle.lower()
if "@" in handle: if "@" in handle:
username, domain = handle.split("@", 1) username, domain = handle.split("@", 1)
local = False
else: else:
username = handle username = handle
domain = author.domain_id domain = author.domain_id
local = author.local
identity = Identity.by_username_and_domain( identity = Identity.by_username_and_domain(
username=username, username=username, domain=domain, fetch=True, local=local
domain=domain,
fetch=True,
) )
if identity is not None: if identity is not None:
mentions.add(identity) mentions.add(identity)

View file

@ -366,7 +366,11 @@ class Takahe:
raise ValueError(f"Cannot find post to reply: {reply_to_pk}") raise ValueError(f"Cannot find post to reply: {reply_to_pk}")
if post: if post:
post.edit_local( post.edit_local(
pre_conetent, content, visibility=visibility, type_data=data pre_conetent,
content,
visibility=visibility,
type_data=data,
edited=post_time,
) )
else: else:
post = Post.create_local( post = Post.create_local(
@ -435,7 +439,7 @@ class Takahe:
comment.visibility, user.preference.mastodon_publish_public comment.visibility, user.preference.mastodon_publish_public
) )
existing_post = None if share_as_new_post else comment.latest_post existing_post = None if share_as_new_post else comment.latest_post
post = Takahe.post( # TODO post as Article? post = Takahe.post(
comment.owner.pk, comment.owner.pk,
pre_conetent, pre_conetent,
content, content,

View file

@ -0,0 +1,77 @@
from datetime import timedelta
from django.apps import apps
from django.core.management.base import BaseCommand
from django.utils import timezone
from tqdm import tqdm
from users.models.user import _RESERVED_USERNAMES
User = apps.get_model("users", "User")
_RESERVED_USERNAMES = [
"connect",
"oauth2_login",
"__",
"admin",
"api",
"me",
]
class Command(BaseCommand):
help = "Generate unique username"
def process_users(self, users):
count = 0
for user in users:
if not user.is_active:
un = f"-{user.pk}-"
else:
un = user.mastodon_username
if not un:
un = f"_{user.pk}"
if un.lower() in _RESERVED_USERNAMES:
un = f"__{un}"
if User.objects.filter(username__iexact=un).exists(): # type: ignore
un = f"{un}_{user.pk}"
print(f"{user} -> {un}")
user.username = un
user.save(update_fields=["username"])
count += 1
print(f"{count} users updated")
def handle(self, *args, **options):
print("Processing active users")
# recent logged in users
proactive_users = User.objects.filter( # type: ignore
username__isnull=True,
is_active=True,
last_login__gt=timezone.now() - timedelta(days=30),
).order_by("date_joined")
# users with mastodon token still active
active_users = (
User.objects.filter( # type: ignore
username__isnull=True,
is_active=True,
)
.exclude(mastodon_token="")
.order_by("date_joined")
)
# users with mastodon handler still reachable
reachable_users = User.objects.filter( # type: ignore
username__isnull=True,
is_active=True,
mastodon_last_reachable__gt=timezone.now() - timedelta(days=7),
).order_by("date_joined")
# all other users
users = User.objects.filter( # type: ignore
username__isnull=True,
).order_by("date_joined")
print(f"{proactive_users.count()} proactive users")
self.process_users(proactive_users)
print(f"{active_users.count()} active users")
self.process_users(active_users)
print(f"{reachable_users.count()} reachable users")
self.process_users(reachable_users)
print(f"{users.count()} other users")
self.process_users(users)

View file

@ -1,7 +1,10 @@
# Generated by Django 4.2.4 on 2023-08-09 16:54 # Generated by Django 4.2.4 on 2023-08-09 16:54
from datetime import timedelta
from django.conf import settings from django.conf import settings
from django.db import migrations, models, transaction from django.db import migrations, models, transaction
from django.utils import timezone
from loguru import logger from loguru import logger
from tqdm import tqdm from tqdm import tqdm
@ -22,19 +25,17 @@ def init_domain(apps, schema_editor):
"No users found, skip domain migration (if you are running initial migration for new site, pls ignore this)" "No users found, skip domain migration (if you are running initial migration for new site, pls ignore this)"
) )
return return
d = TakaheDomain.objects.filter(domain=domain).first() logger.info(f"Creating takahe domain {domain}")
if not d: TakaheDomain.objects.update_or_create(
logger.info(f"Creating takahe domain {domain}") domain=domain,
TakaheDomain.objects.create( defaults={
domain=domain, "local": True,
local=True, "service_domain": service_domain,
service_domain=service_domain, "notes": "NeoDB",
notes="NeoDB", "nodeinfo": {},
nodeinfo={}, "state": "updated",
) },
else: )
logger.info(f"Takahe domain {domain} already exists")
TakaheConfig.objects.update_or_create( TakaheConfig.objects.update_or_create(
key="public_timeline", key="public_timeline",
user=None, user=None,
@ -85,7 +86,9 @@ def init_identity(apps, schema_editor):
local=True, local=True,
username=username, username=username,
domain_name=domain, domain_name=domain,
deleted=None if user.is_active else user.updated, deleted=None
if user.is_active
else user.mastodon_last_reachable + timedelta(days=90),
) )
takahe_user = TakaheUser.objects.create( takahe_user = TakaheUser.objects.create(
pk=user.pk, email=handler, admin=user.is_superuser pk=user.pk, email=handler, admin=user.is_superuser
@ -93,12 +96,14 @@ def init_identity(apps, schema_editor):
takahe_identity = TakaheIdentity.objects.create( takahe_identity = TakaheIdentity.objects.create(
pk=user.pk, pk=user.pk,
actor_uri=f"https://{service_domain or domain}/@{username}@{domain}/", actor_uri=f"https://{service_domain or domain}/@{username}@{domain}/",
profile_uri=user.url, profile_uri=f"https://{service_domain or domain}/users/{username}/",
username=username, username=username,
domain=tdomain, domain=tdomain,
name=username, name=username,
local=True, local=True,
discoverable=not user.preference.no_anonymous_view, discoverable=True,
state="updated",
state_next_attempt=timezone.now() + timedelta(days=365 * 99),
) )
takahe_identity.generate_keypair() takahe_identity.generate_keypair()
takahe_user.identities.add(takahe_identity) takahe_user.identities.add(takahe_identity)

View file

@ -6,9 +6,13 @@ from tqdm import tqdm
def migrate_relationships(apps, schema_editor): def migrate_relationships(apps, schema_editor):
User = apps.get_model("users", "User") # User = apps.get_model("users", "User")
APIdentity = apps.get_model("users", "APIdentity") # APIdentity = apps.get_model("users", "APIdentity")
logger.info(f"Migrate user relationship") from takahe.models import Block as TakaheBlock
from takahe.models import Follow as TakaheFollow
from users.models import APIdentity, User
logger.info(f"Migrate local user relationship")
for user in tqdm(User.objects.filter(is_active=True)): for user in tqdm(User.objects.filter(is_active=True)):
identity = APIdentity.objects.get(user=user) identity = APIdentity.objects.get(user=user)
for target in user.local_following.all(): for target in user.local_following.all():
@ -20,11 +24,12 @@ def migrate_relationships(apps, schema_editor):
for target in user.local_muting.all(): for target in user.local_muting.all():
target_identity = APIdentity.objects.get(user=target) target_identity = APIdentity.objects.get(user=target)
identity.mute(target_identity) identity.mute(target_identity)
user.sync_relationship() logger.info(f"Migrate sync user relationship")
for user in tqdm(User.objects.filter(is_active=True)): for user in tqdm(User.objects.filter(is_active=True)):
identity = APIdentity.objects.get(user=user) user.sync_relationship()
for target_identity in identity.follow_requesting_identities: logger.info(f"Update relationship states")
target_identity.accept_follow_request(identity) TakaheBlock.objects.all().update(state="sent")
TakaheFollow.objects.all().update(state="accepted")
class Migration(migrations.Migration): class Migration(migrations.Migration):

View file

@ -236,26 +236,23 @@ class User(AbstractUser):
def sync_relationship(self): def sync_relationship(self):
from .apidentity import APIdentity from .apidentity import APIdentity
for target in self.mastodon_followers: def get_identities(accts: list):
t = target.split("@") q = Q(pk__in=[])
target_identity = APIdentity.objects.filter( for acct in accts or []:
user__mastodon_username=t[0], user__mastodon_site=t[1] t = acct.split("@") if acct else []
).first() if len(t) == 2:
if target_identity and not self.identity.is_following(target_identity): q = q | Q(mastodon_username=t[0], mastodon_site=t[1])
users = User.objects.filter(is_active=True).filter(q)
return APIdentity.objects.filter(user__in=users)
for target_identity in get_identities(self.mastodon_following):
if not self.identity.is_following(target_identity):
self.identity.follow(target_identity) self.identity.follow(target_identity)
for target in self.mastodon_blocks: for target_identity in get_identities(self.mastodon_blocks):
t = target.split("@") if not self.identity.is_blocking(target_identity):
target_identity = APIdentity.objects.filter(
user__mastodon_username=t[0], user__mastodon_site=t[1]
).first()
if target_identity and not self.identity.is_blocking(target_identity):
self.identity.block(target_identity) self.identity.block(target_identity)
for target in self.mastodon_mutes: for target_identity in get_identities(self.mastodon_mutes):
t = target.split("@") if not self.identity.is_muting(target_identity):
target_identity = APIdentity.objects.filter(
user__mastodon_username=t[0], user__mastodon_site=t[1]
).first()
if target_identity and not self.identity.is_muting(target_identity):
self.identity.mute(target_identity) self.identity.mute(target_identity)
def sync_identity(self): def sync_identity(self):