From bd991412b460e013fdddc528c4cd105ba7dc03fb Mon Sep 17 00:00:00 2001 From: Your Name Date: Sat, 22 Feb 2025 18:52:40 -0500 Subject: [PATCH] ignore download error during crawl --- catalog/management/commands/crawl.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/catalog/management/commands/crawl.py b/catalog/management/commands/crawl.py index 241fb0a8..0ef8a91f 100644 --- a/catalog/management/commands/crawl.py +++ b/catalog/management/commands/crawl.py @@ -3,6 +3,7 @@ from urllib.parse import urljoin from django.core.management.base import BaseCommand from loguru import logger +from lxml import html from catalog.common import * @@ -27,7 +28,10 @@ class Command(BaseCommand): url = queue.pop(0) history.append(url) logger.info(f"Navigating {url}") - content = ProxiedDownloader(url).download().html() + try: + content = ProxiedDownloader(url).download().html() + except Exception: + content = html.fromstring("") urls = content.xpath("//a/@href") for _u in urls: # type:ignore u = urljoin(url, _u)