This commit is contained in:
Your Name 2023-07-12 23:05:19 -04:00 committed by Henri Dickson
parent 9f83b7618c
commit 3e93dae5b7
5 changed files with 29 additions and 17 deletions

View file

@ -2,6 +2,7 @@ from django.core.management.base import BaseCommand
from catalog.common import *
import re
from urllib.parse import urljoin
from loguru import logger
class Command(BaseCommand):
@ -12,6 +13,7 @@ class Command(BaseCommand):
parser.add_argument("--pattern", help="pattern to navigate", action="store")
def handle(self, *args, **options):
logger.info("Crawl starts.")
queue = [str(options["start"])]
pattern = options["pattern"] or ""
history = []
@ -22,7 +24,7 @@ class Command(BaseCommand):
while queue and len(history) < 1000:
url = queue.pop(0)
history.append(url)
self.stdout.write(f"Navigating {url}")
logger.info(f"Navigating {url}")
content = ProxiedDownloader(url).download().html()
urls = content.xpath("//a/@href")
for _u in urls:
@ -33,8 +35,8 @@ class Command(BaseCommand):
u = site.url
if u not in history:
history.append(u)
self.stdout.write(f"Fetching {u}")
logger.info(f"Fetching {u}")
site.get_resource_ready()
elif pattern and u.find(pattern) >= 0:
queue.append(u)
self.stdout.write(self.style.SUCCESS(f"Done."))
logger.info("Crawl finished.")

View file

@ -1,12 +1,11 @@
from django.core.management.base import BaseCommand
from django.core.cache import cache
import pprint
from catalog.models import *
from journal.models import ShelfMember, query_item_category, ItemCategory, Comment
from datetime import timedelta
from django.utils import timezone
from django.db.models import Count, F
from loguru import logger
MAX_ITEMS_PER_PERIOD = 12
MIN_MARKS = 2
@ -60,6 +59,7 @@ class Command(BaseCommand):
def handle(self, *args, **options):
if options["update"]:
logger.info("Discover data update start.")
cache_key = "public_gallery"
gallery_categories = [
ItemCategory.Book,
@ -75,16 +75,16 @@ class Command(BaseCommand):
item_ids = []
while days >= MIN_DAYS_FOR_PERIOD:
ids = self.get_popular_marked_item_ids(category, days, item_ids)
self.stdout.write(
f"Marked {category} for last {days} days: {len(ids)}"
logger.info(
f"Most marked {category} in last {days} days: {len(ids)}"
)
item_ids = ids + item_ids
days //= 2
if category == ItemCategory.Podcast:
days = MAX_DAYS_FOR_PERIOD // 4
extra_ids = self.get_popular_commented_podcast_ids(days, item_ids)
self.stdout.write(
f"Most commented podcast for last {days} days: {len(extra_ids)}"
logger.info(
f"Most commented podcast in last {days} days: {len(extra_ids)}"
)
item_ids = extra_ids + item_ids
items = [Item.objects.get(pk=i) for i in item_ids]
@ -99,4 +99,4 @@ class Command(BaseCommand):
}
)
cache.set(cache_key, gallery_list, timeout=None)
self.stdout.write(self.style.SUCCESS(f"Done."))
logger.info("Discover data updated.")

View file

@ -9,6 +9,7 @@ from tqdm import tqdm
from time import sleep
from datetime import timedelta
from django.utils import timezone
from loguru import logger
class Command(BaseCommand):
@ -31,15 +32,22 @@ class Command(BaseCommand):
self.stdout.write(f"{p.episodes.count()}\t{p.title}\n")
def update(self):
logger.info("Podcasts update start.")
count = 0
qs = Podcast.objects.filter(is_deleted=False, merged_to_item__isnull=True)
for p in tqdm(qs.order_by("pk")):
if (
p.primary_lookup_id_type == IdType.RSS
and p.primary_lookup_id_value is not None
):
logger.info(f"updating {p}")
c = p.episodes.count()
site = RSS(p.feed_url)
site.scrape_additional_data()
self.stdout.write(self.style.SUCCESS("Podcasts updated."))
c2 = p.episodes.count()
logger.info(f"updated {p}, {c2-c} new episodes.")
count += c2 - c
logger.info(f"Podcasts update finished, {count} new episodes total.")
def handle(self, *args, **options):
if options["update"]:

View file

@ -394,7 +394,7 @@ class User(AbstractUser):
self.merge_relationships()
updated = True
elif code == 401:
logger.error(f"401 {self}")
logger.error(f"Refresh mastodon data error 401 for {self}")
self.mastodon_token = ""
return updated

View file

@ -17,10 +17,11 @@ def refresh_mastodon_data_task(user_id, token=None):
user.save()
logger.info(f"{user} mastodon data refreshed")
else:
logger.error(f"{user} mastodon data refresh failed")
logger.warning(f"{user} mastodon data refresh failed")
def refresh_all_mastodon_data_task(ttl_hours):
logger.info(f"Mastodon data refresh start")
count = 0
for user in tqdm(
User.objects.filter(
@ -29,15 +30,16 @@ def refresh_all_mastodon_data_task(ttl_hours):
)
):
if user.mastodon_token or user.mastodon_refresh_token:
tqdm.write(f"Refreshing {user}")
logger.info(f"Refreshing {user}")
if user.refresh_mastodon_data():
tqdm.write(f"Refreshed {user}")
logger.info(f"Refreshed {user}")
count += 1
else:
tqdm.write(f"Refresh failed for {user}")
logger.warning(f"Refresh failed for {user}")
user.save()
else:
tqdm.write(f"Missing token for {user}")
logger.warning(f"Missing token for {user}")
logger.info(f"{count} users updated")
c = User.merge_rejected_by()
logger.info(f"{c} users's rejecting list updated")
logger.info(f"Mastodon data refresh done")