streamline migration from 0.8.x #397

This commit is contained in:
Her Email 2023-11-26 17:23:53 -05:00 committed by Henri Dickson
parent bec9f2f8c3
commit 80874804c8
18 changed files with 279 additions and 68 deletions

View file

@ -192,6 +192,10 @@ REDIRECT_URIS = env(
# Timeout of requests to Mastodon, in seconds
MASTODON_TIMEOUT = env("NEODB_LOGIN_MASTODON_TIMEOUT", default=10) # type: ignore
TAKAHE_REMOTE_TIMEOUT = MASTODON_TIMEOUT
NEODB_USER_AGENT = f"NeoDB/{NEODB_VERSION} (+{SITE_INFO.get('site_url', 'undefined')})"
TAKAHE_USER_AGENT = NEODB_USER_AGENT
# Scope when creating Mastodon apps
# Alternatively, use "read write follow" to avoid re-authorize when migrating to a future version with more features

View file

@ -135,7 +135,7 @@ class DiscogsMaster(AbstractSite):
def get_discogs_data(data_type: str, discogs_id):
if data_type not in ("releases", "masters"):
raise ValueError("data_type can only be in ('releases' or masters')")
user_agent_string = "Neodb/0.1"
user_agent_string = settings.NEODB_USER_AGENT
user_token = settings.DISCOGS_API_KEY
headers = {
"User-Agent": user_agent_string,

View file

@ -32,7 +32,10 @@ class FediverseInstance(AbstractSite):
"Performance": Performance,
"PerformanceProduction": PerformanceProduction,
}
request_header = {"User-Agent": "NeoDB/0.5", "Accept": "application/activity+json"}
request_header = {
"User-Agent": settings.NEODB_USER_AGENT,
"Accept": "application/activity+json",
}
@classmethod
def id_to_url(cls, id_value):

View file

@ -41,7 +41,7 @@ class RSS(AbstractSite):
feed = pickle.load(open(_local_response_path + get_mock_file(url), "rb"))
else:
req = urllib.request.Request(url)
req.add_header("User-Agent", "NeoDB/0.5")
req.add_header("User-Agent", settings.NEODB_USER_AGENT)
try:
feed = podcastparser.parse(url, urllib.request.urlopen(req, timeout=3))
except:

View file

@ -41,10 +41,10 @@ class Command(BaseCommand):
def handle(self, *args, **options):
if options["cancel"]:
JobManager.cancel()
JobManager.cancel_all()
if options["schedule"]:
JobManager.cancel() # cancel previously scheduled jobs if any
JobManager.schedule()
JobManager.cancel_all() # cancel previously scheduled jobs if any
JobManager.schedule_all()
if options["runonce"]:
for job_id in options["runonce"]:
run = JobManager.run(job_id)

View file

@ -18,6 +18,7 @@ class Command(BaseCommand):
domain=domain,
local=True,
service_domain=service_domain,
state="updated",
notes="NeoDB",
nodeinfo={},
)

View file

@ -53,12 +53,12 @@ class JobManager:
return target
@classmethod
def schedule(cls):
def schedule_all(cls):
for j in cls.registry:
j.schedule()
@classmethod
def cancel(cls):
def cancel_all(cls):
for j in cls.registry:
j.cancel()
@ -77,11 +77,11 @@ class JobManager:
return registry.get_job_ids()
@classmethod
def schedule_all(cls):
def reschedule_all(cls):
# TODO rewrite lazy import in a better way
from catalog.jobs import DiscoverGenerator, PodcastUpdater
from mastodon.jobs import MastodonSiteCheck
from users.jobs import MastodonUserSync
cls.cancel()
cls.schedule()
cls.cancel_all()
cls.schedule_all()

View file

@ -20,12 +20,15 @@ class Setup:
"""
def create_site(self, domain, service_domain):
TakaheDomain.objects.create(
TakaheDomain.objects.update_or_create(
domain=domain,
local=True,
service_domain=service_domain,
notes="NeoDB",
nodeinfo={},
defaults={
"local": True,
"service_domain": service_domain,
"notes": "NeoDB",
"nodeinfo": {},
"state": "updated",
},
)
TakaheConfig.objects.update_or_create(
key="public_timeline",
@ -156,8 +159,9 @@ class Setup:
# Register cron jobs if not yet
if settings.DISABLE_CRON:
logger.info("Cron jobs are disabled.")
JobManager.cancel_all()
else:
JobManager.schedule_all()
JobManager.reschedule_all()
logger.info("Finished post-migration setup.")

View file

@ -39,6 +39,8 @@ x-shared:
NEODB_SEARCH_URL: ${NEODB_SEARCH_URL:-typesense://user:eggplant@typesense:8108/catalog}
NEODB_EMAIL_URL:
NEODB_EMAIL_FROM: no-reply@${NEODB_SITE_DOMAIN}
NEODB_FANOUT_LIMIT_DAYS:
TAKAHE_FANOUT_LIMIT_DAYS:
NEODB_DOWNLOADER_PROXY_LIST:
NEODB_DOWNLOADER_BACKUP_PROXY:
NEODB_DOWNLOADER_SAVE_DIR:
@ -53,8 +55,8 @@ x-shared:
TAKAHE_MEDIA_BACKEND: local://www/media/
TAKAHE_MEDIA_ROOT: /www/media
TAKAHE_USE_PROXY_HEADERS: true
TAKAHE_STATOR_CONCURRENCY: 4
TAKAHE_STATOR_CONCURRENCY_PER_MODEL: 2
TAKAHE_STATOR_CONCURRENCY: ${TAKAHE_STATOR_CONCURRENCY:-4}
TAKAHE_STATOR_CONCURRENCY_PER_MODEL: ${TAKAHE_STATOR_CONCURRENCY_PER_MODEL:-2}
TAKAHE_DEBUG: ${NEODB_DEBUG:-True}
TAKAHE_VENV: /takahe-venv
SPOTIFY_API_KEY:

View file

@ -2,6 +2,14 @@
from django.db import migrations, models
_sql = """DELETE
FROM journal_shelflogentry a USING journal_shelflogentry b
WHERE a.ctid < b.ctid
AND a.item_id=b.item_id
AND a.owner_id=b.owner_id
AND a.timestamp=b.timestamp
AND a.shelf_type=b.shelf_type"""
class Migration(migrations.Migration):
@ -10,6 +18,12 @@ class Migration(migrations.Migration):
]
operations = [
migrations.RunSQL("SET CONSTRAINTS ALL IMMEDIATE;"),
migrations.RunSQL(
sql=_sql,
reverse_sql=migrations.RunSQL.noop,
),
migrations.RunSQL("SET CONSTRAINTS ALL DEFERRED;"),
migrations.AddConstraint(
model_name="shelflogentry",
constraint=models.UniqueConstraint(

View file

@ -53,7 +53,7 @@ API_CREATE_APP = "/api/v1/apps"
# GET
API_SEARCH = "/api/v2/search"
USER_AGENT = f"NeoDB/{settings.NEODB_VERSION} (+{settings.SITE_INFO.get('site_url', 'undefined')})"
USER_AGENT = settings.NEODB_USER_AGENT
get = functools.partial(requests.get, timeout=settings.MASTODON_TIMEOUT)
put = functools.partial(requests.put, timeout=settings.MASTODON_TIMEOUT)

View file

@ -0,0 +1,88 @@
from django.conf import settings
from django.contrib.contenttypes.models import ContentType
from django.core.management.base import BaseCommand
from django.db.models import Count, F
from loguru import logger
from tqdm import tqdm
from catalog.common import *
from catalog.common.models import *
from catalog.models import *
from journal.models import *
from takahe.utils import *
from users.models import APIdentity
from users.models import User as NeoUser
def content_type_id(cls):
return ContentType.objects.get(app_label="journal", model=cls.__name__.lower()).pk
class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
"--verbose",
action="store_true",
)
parser.add_argument(
"--post",
action="store_true",
)
parser.add_argument(
"--like",
action="store_true",
)
parser.add_argument(
"--post-new",
action="store_true",
)
parser.add_argument("--start", default=0, action="store")
parser.add_argument("--count", default=0, action="store")
def process_post(self):
logger.info(f"Processing posts...")
qs = Piece.objects.filter(
polymorphic_ctype__in=[
content_type_id(ShelfMember),
content_type_id(Comment),
content_type_id(Review),
]
).order_by("id")
if self.starting_id:
qs = qs.filter(id__gte=self.starting_id)
tracker = tqdm(qs.iterator(), total=self.count_est or qs.count())
for p in tracker:
tracker.set_postfix_str(f"{p.id}")
if p.__class__ == ShelfMember:
mark = Mark(p.owner, p.item)
Takahe.post_mark(mark, self.post_new)
elif p.__class__ == Comment:
if p.item.__class__ in [PodcastEpisode, TVEpisode]:
Takahe.post_comment(p, self.post_new)
elif p.__class__ == Review:
Takahe.post_review(p, self.post_new)
def process_like(self):
logger.info(f"Processing likes...")
qs = Like.objects.order_by("id")
tracker = tqdm(qs)
for like in tracker:
post_id = like.target.latest_post_id
if post_id:
Takahe.like_post(post_id, like.owner.pk)
else:
logger.warning(f"Post not found for like {like.id}")
def handle(self, *args, **options):
self.verbose = options["verbose"]
self.post_new = options["post_new"]
self.starting_id = int(options["start"])
self.count_est = int(options["count"])
if options["post"]:
self.process_post()
if options["like"]:
self.process_like()
self.stdout.write(self.style.SUCCESS(f"Done."))

View file

@ -526,7 +526,7 @@ class Identity(models.Model):
based on probing host-meta.
"""
with httpx.Client(
timeout=settings.SETUP.REMOTE_TIMEOUT,
timeout=settings.TAKAHE_REMOTE_TIMEOUT,
headers={"User-Agent": settings.TAKAHE_USER_AGENT},
) as client:
try:
@ -565,7 +565,7 @@ class Identity(models.Model):
# Go make a Webfinger request
with httpx.Client(
timeout=settings.SETUP.REMOTE_TIMEOUT,
timeout=settings.TAKAHE_REMOTE_TIMEOUT,
headers={"User-Agent": settings.TAKAHE_USER_AGENT},
) as client:
try:
@ -1066,6 +1066,7 @@ class Post(models.Model):
attachments: list | None = None,
attachment_attributes: list | None = None,
type_data: dict | None = None,
edited: datetime.datetime | None = None,
):
with transaction.atomic():
# Strip all HTML and apply linebreaks filter
@ -1099,6 +1100,12 @@ class Post(models.Model):
self.state_changed = timezone.now()
self.state_next_attempt = None
self.state_locked_until = None
if edited and edited < timezone.now():
self.published = edited
if timezone.now() - edited > datetime.timedelta(
days=settings.FANOUT_LIMIT_DAYS
):
self.state = "edited_fanned_out" # add post quietly if it's old
self.save()
@classmethod
@ -1109,13 +1116,13 @@ class Post(models.Model):
handle = handle.lower()
if "@" in handle:
username, domain = handle.split("@", 1)
local = False
else:
username = handle
domain = author.domain_id
local = author.local
identity = Identity.by_username_and_domain(
username=username,
domain=domain,
fetch=True,
username=username, domain=domain, fetch=True, local=local
)
if identity is not None:
mentions.add(identity)

View file

@ -366,7 +366,11 @@ class Takahe:
raise ValueError(f"Cannot find post to reply: {reply_to_pk}")
if post:
post.edit_local(
pre_conetent, content, visibility=visibility, type_data=data
pre_conetent,
content,
visibility=visibility,
type_data=data,
edited=post_time,
)
else:
post = Post.create_local(
@ -435,7 +439,7 @@ class Takahe:
comment.visibility, user.preference.mastodon_publish_public
)
existing_post = None if share_as_new_post else comment.latest_post
post = Takahe.post( # TODO post as Article?
post = Takahe.post(
comment.owner.pk,
pre_conetent,
content,

View file

@ -0,0 +1,77 @@
from datetime import timedelta
from django.apps import apps
from django.core.management.base import BaseCommand
from django.utils import timezone
from tqdm import tqdm
from users.models.user import _RESERVED_USERNAMES
User = apps.get_model("users", "User")
_RESERVED_USERNAMES = [
"connect",
"oauth2_login",
"__",
"admin",
"api",
"me",
]
class Command(BaseCommand):
help = "Generate unique username"
def process_users(self, users):
count = 0
for user in users:
if not user.is_active:
un = f"-{user.pk}-"
else:
un = user.mastodon_username
if not un:
un = f"_{user.pk}"
if un.lower() in _RESERVED_USERNAMES:
un = f"__{un}"
if User.objects.filter(username__iexact=un).exists(): # type: ignore
un = f"{un}_{user.pk}"
print(f"{user} -> {un}")
user.username = un
user.save(update_fields=["username"])
count += 1
print(f"{count} users updated")
def handle(self, *args, **options):
print("Processing active users")
# recent logged in users
proactive_users = User.objects.filter( # type: ignore
username__isnull=True,
is_active=True,
last_login__gt=timezone.now() - timedelta(days=30),
).order_by("date_joined")
# users with mastodon token still active
active_users = (
User.objects.filter( # type: ignore
username__isnull=True,
is_active=True,
)
.exclude(mastodon_token="")
.order_by("date_joined")
)
# users with mastodon handler still reachable
reachable_users = User.objects.filter( # type: ignore
username__isnull=True,
is_active=True,
mastodon_last_reachable__gt=timezone.now() - timedelta(days=7),
).order_by("date_joined")
# all other users
users = User.objects.filter( # type: ignore
username__isnull=True,
).order_by("date_joined")
print(f"{proactive_users.count()} proactive users")
self.process_users(proactive_users)
print(f"{active_users.count()} active users")
self.process_users(active_users)
print(f"{reachable_users.count()} reachable users")
self.process_users(reachable_users)
print(f"{users.count()} other users")
self.process_users(users)

View file

@ -1,7 +1,10 @@
# Generated by Django 4.2.4 on 2023-08-09 16:54
from datetime import timedelta
from django.conf import settings
from django.db import migrations, models, transaction
from django.utils import timezone
from loguru import logger
from tqdm import tqdm
@ -22,19 +25,17 @@ def init_domain(apps, schema_editor):
"No users found, skip domain migration (if you are running initial migration for new site, pls ignore this)"
)
return
d = TakaheDomain.objects.filter(domain=domain).first()
if not d:
logger.info(f"Creating takahe domain {domain}")
TakaheDomain.objects.create(
TakaheDomain.objects.update_or_create(
domain=domain,
local=True,
service_domain=service_domain,
notes="NeoDB",
nodeinfo={},
defaults={
"local": True,
"service_domain": service_domain,
"notes": "NeoDB",
"nodeinfo": {},
"state": "updated",
},
)
else:
logger.info(f"Takahe domain {domain} already exists")
TakaheConfig.objects.update_or_create(
key="public_timeline",
user=None,
@ -85,7 +86,9 @@ def init_identity(apps, schema_editor):
local=True,
username=username,
domain_name=domain,
deleted=None if user.is_active else user.updated,
deleted=None
if user.is_active
else user.mastodon_last_reachable + timedelta(days=90),
)
takahe_user = TakaheUser.objects.create(
pk=user.pk, email=handler, admin=user.is_superuser
@ -93,12 +96,14 @@ def init_identity(apps, schema_editor):
takahe_identity = TakaheIdentity.objects.create(
pk=user.pk,
actor_uri=f"https://{service_domain or domain}/@{username}@{domain}/",
profile_uri=user.url,
profile_uri=f"https://{service_domain or domain}/users/{username}/",
username=username,
domain=tdomain,
name=username,
local=True,
discoverable=not user.preference.no_anonymous_view,
discoverable=True,
state="updated",
state_next_attempt=timezone.now() + timedelta(days=365 * 99),
)
takahe_identity.generate_keypair()
takahe_user.identities.add(takahe_identity)

View file

@ -6,9 +6,13 @@ from tqdm import tqdm
def migrate_relationships(apps, schema_editor):
User = apps.get_model("users", "User")
APIdentity = apps.get_model("users", "APIdentity")
logger.info(f"Migrate user relationship")
# User = apps.get_model("users", "User")
# APIdentity = apps.get_model("users", "APIdentity")
from takahe.models import Block as TakaheBlock
from takahe.models import Follow as TakaheFollow
from users.models import APIdentity, User
logger.info(f"Migrate local user relationship")
for user in tqdm(User.objects.filter(is_active=True)):
identity = APIdentity.objects.get(user=user)
for target in user.local_following.all():
@ -20,11 +24,12 @@ def migrate_relationships(apps, schema_editor):
for target in user.local_muting.all():
target_identity = APIdentity.objects.get(user=target)
identity.mute(target_identity)
user.sync_relationship()
logger.info(f"Migrate sync user relationship")
for user in tqdm(User.objects.filter(is_active=True)):
identity = APIdentity.objects.get(user=user)
for target_identity in identity.follow_requesting_identities:
target_identity.accept_follow_request(identity)
user.sync_relationship()
logger.info(f"Update relationship states")
TakaheBlock.objects.all().update(state="sent")
TakaheFollow.objects.all().update(state="accepted")
class Migration(migrations.Migration):

View file

@ -236,26 +236,23 @@ class User(AbstractUser):
def sync_relationship(self):
from .apidentity import APIdentity
for target in self.mastodon_followers:
t = target.split("@")
target_identity = APIdentity.objects.filter(
user__mastodon_username=t[0], user__mastodon_site=t[1]
).first()
if target_identity and not self.identity.is_following(target_identity):
def get_identities(accts: list):
q = Q(pk__in=[])
for acct in accts or []:
t = acct.split("@") if acct else []
if len(t) == 2:
q = q | Q(mastodon_username=t[0], mastodon_site=t[1])
users = User.objects.filter(is_active=True).filter(q)
return APIdentity.objects.filter(user__in=users)
for target_identity in get_identities(self.mastodon_following):
if not self.identity.is_following(target_identity):
self.identity.follow(target_identity)
for target in self.mastodon_blocks:
t = target.split("@")
target_identity = APIdentity.objects.filter(
user__mastodon_username=t[0], user__mastodon_site=t[1]
).first()
if target_identity and not self.identity.is_blocking(target_identity):
for target_identity in get_identities(self.mastodon_blocks):
if not self.identity.is_blocking(target_identity):
self.identity.block(target_identity)
for target in self.mastodon_mutes:
t = target.split("@")
target_identity = APIdentity.objects.filter(
user__mastodon_username=t[0], user__mastodon_site=t[1]
).first()
if target_identity and not self.identity.is_muting(target_identity):
for target_identity in get_identities(self.mastodon_mutes):
if not self.identity.is_muting(target_identity):
self.identity.mute(target_identity)
def sync_identity(self):