From 8d1503e6eb303e5f58f32cae586fc6776810f44e Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 20 Jun 2022 12:21:54 -0400 Subject: [PATCH] resync --- common/scrapers/douban.py | 2 +- sync/management/commands/resync.py | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/common/scrapers/douban.py b/common/scrapers/douban.py index 6130203d..165ba8f8 100644 --- a/common/scrapers/douban.py +++ b/common/scrapers/douban.py @@ -128,7 +128,7 @@ class DoubanScrapperMixin: check_content() latest() - if content is None: + if content is None and settings.LOCAL_PROXY is None: wayback_cdx() if content is None: diff --git a/sync/management/commands/resync.py b/sync/management/commands/resync.py index 1fa655e9..a5a0a73c 100644 --- a/sync/management/commands/resync.py +++ b/sync/management/commands/resync.py @@ -13,7 +13,16 @@ import os class Command(BaseCommand): help = 'Re-scrape failed urls (via local proxy)' + def add_arguments(self, parser): + parser.add_argument('action', type=str, help='list/download') + def handle(self, *args, **options): + if options['action'] == 'list': + self.do_list() + else: + self.do_download() + + def do_list(self): tasks = SyncTask.objects.filter(failed_urls__isnull=False) urls = [] for task in tqdm(tasks): @@ -30,8 +39,8 @@ class Command(BaseCommand): f = open("/tmp/resync_todo.txt", "w") f.write("\n".join(urls)) f.close() - return + def do_download(self): self.stdout.write(f'Checking local proxy...{settings.LOCAL_PROXY}') url = f'{settings.LOCAL_PROXY}?url=https://www.douban.com/doumail/' try: