lib.itmens/catalog/jobs/discover.py

153 lines
5.7 KiB
Python
Raw Normal View History

2024-06-01 23:25:50 -04:00
import time
2023-10-21 05:41:38 +00:00
from datetime import timedelta
2024-05-27 15:44:12 -04:00
from django.conf import settings
2023-10-21 05:41:38 +00:00
from django.core.cache import cache
from django.db.models import Count, F
from django.utils import timezone
2024-03-10 20:55:50 -04:00
from django.utils.translation import gettext_lazy as _
2023-10-21 05:41:38 +00:00
from loguru import logger
from catalog.models import *
from common.models import BaseJob, JobManager
2024-06-03 00:14:34 -04:00
from journal.models import (
Collection,
Comment,
ShelfMember,
TagManager,
q_item_in_category,
)
2024-06-03 07:27:44 -04:00
from takahe.utils import Takahe
2024-06-02 15:58:37 -04:00
from users.models import APIdentity
2023-10-21 05:41:38 +00:00
MAX_ITEMS_PER_PERIOD = 12
2024-01-14 13:12:43 -05:00
MIN_MARKS = settings.MIN_MARKS_FOR_DISCOVER
2023-10-21 05:41:38 +00:00
MAX_DAYS_FOR_PERIOD = 96
MIN_DAYS_FOR_PERIOD = 6
2024-06-01 23:25:50 -04:00
DAYS_FOR_TRENDS = 3
2023-10-21 05:41:38 +00:00
@JobManager.register
class DiscoverGenerator(BaseJob):
2024-01-14 13:12:43 -05:00
interval = timedelta(hours=1)
2023-10-21 05:41:38 +00:00
def get_popular_marked_item_ids(self, category, days, exisiting_ids):
item_ids = [
m["item_id"]
for m in ShelfMember.objects.filter(q_item_in_category(category))
.filter(created_time__gt=timezone.now() - timedelta(days=days))
.exclude(item_id__in=exisiting_ids)
.values("item_id")
.annotate(num=Count("item_id"))
.filter(num__gte=MIN_MARKS)
.order_by("-num")[:MAX_ITEMS_PER_PERIOD]
]
return item_ids
def get_popular_commented_podcast_ids(self, days, exisiting_ids):
return list(
Comment.objects.filter(q_item_in_category(ItemCategory.Podcast))
.filter(created_time__gt=timezone.now() - timedelta(days=days))
.annotate(p=F("item__podcastepisode__program"))
.filter(p__isnull=False)
.exclude(p__in=exisiting_ids)
.values("p")
.annotate(num=Count("p"))
.filter(num__gte=MIN_MARKS)
.order_by("-num")
.values_list("p", flat=True)[:MAX_ITEMS_PER_PERIOD]
)
def cleanup_shows(self, items):
seasons = [i for i in items if i.__class__ == TVSeason]
for season in seasons:
if season.show in items:
items.remove(season.show)
return items
def run(self):
logger.info("Discover data update start.")
cache_key = "public_gallery"
gallery_categories = [
ItemCategory.Book,
ItemCategory.Movie,
ItemCategory.TV,
ItemCategory.Game,
ItemCategory.Music,
ItemCategory.Podcast,
]
gallery_list = []
2024-06-01 23:25:50 -04:00
trends = []
2023-10-21 05:41:38 +00:00
for category in gallery_categories:
days = MAX_DAYS_FOR_PERIOD
item_ids = []
while days >= MIN_DAYS_FOR_PERIOD:
ids = self.get_popular_marked_item_ids(category, days, item_ids)
logger.info(f"Most marked {category} in last {days} days: {len(ids)}")
item_ids = ids + item_ids
days //= 2
if category == ItemCategory.Podcast:
days = MAX_DAYS_FOR_PERIOD // 4
extra_ids = self.get_popular_commented_podcast_ids(days, item_ids)
logger.info(
f"Most commented podcast in last {days} days: {len(extra_ids)}"
)
item_ids = extra_ids + item_ids
items = [Item.objects.get(pk=i) for i in item_ids]
if category == ItemCategory.TV:
items = self.cleanup_shows(items)
gallery_list.append(
{
"name": "popular_" + category.value,
2024-05-30 00:35:57 -04:00
"category": category,
2023-10-21 05:41:38 +00:00
"items": items,
}
)
2024-06-01 23:25:50 -04:00
item_ids = self.get_popular_marked_item_ids(category, DAYS_FOR_TRENDS, [])[
2024-06-03 00:14:34 -04:00
:5
2024-06-01 23:25:50 -04:00
]
if category == ItemCategory.Podcast:
item_ids += self.get_popular_commented_podcast_ids(
DAYS_FOR_TRENDS, item_ids
)[:3]
for i in Item.objects.filter(pk__in=set(item_ids)):
cnt = ShelfMember.objects.filter(
2024-06-02 14:38:11 -04:00
item=i, created_time__gt=timezone.now() - timedelta(days=7)
2024-06-01 23:25:50 -04:00
).count()
trends.append(
{
"title": i.title,
"description": i.brief,
"url": i.absolute_url,
"image": i.cover_image_url or "",
2024-06-02 14:38:11 -04:00
"provider_name": str(i.category.label),
2024-06-01 23:25:50 -04:00
"history": [
{
2024-06-02 17:41:23 -04:00
"day": str(int(time.time() / 38400 - 3) * 38400),
"accounts": str(cnt),
"uses": str(cnt),
2024-06-01 23:25:50 -04:00
}
],
}
)
2024-06-02 17:41:23 -04:00
trends.sort(key=lambda x: int(x["history"][0]["accounts"]), reverse=True)
2024-06-02 15:58:37 -04:00
collection_ids = (
Collection.objects.filter(visibility=0)
.annotate(num=Count("interactions"))
.filter(num__gte=MIN_MARKS)
.order_by("-edited_time")
.values_list("pk", flat=True)[:40]
)
2024-06-03 00:14:34 -04:00
tags = TagManager.popular_tags(days=14, limit=20)
2024-06-03 07:27:44 -04:00
post_ids = Takahe.get_popular_posts(days=14, limit=20).values_list(
"pk", flat=True
)
2023-10-21 05:41:38 +00:00
cache.set(cache_key, gallery_list, timeout=None)
2024-06-01 23:25:50 -04:00
cache.set("trends_links", trends, timeout=None)
2024-06-02 15:58:37 -04:00
cache.set("featured_collections", collection_ids, timeout=None)
2024-06-03 07:27:44 -04:00
cache.set("popular_tags", list(tags), timeout=None)
cache.set("popular_posts", list(post_ids), timeout=None)
2024-06-02 17:41:23 -04:00
logger.info(
2024-06-03 07:27:44 -04:00
f"Discover data updated, trends: {len(trends)}, collections: {len(collection_ids)}, tags: {len(tags)}, posts: {len(post_ids)}."
2024-06-02 17:41:23 -04:00
)