lib.itmens/catalog/management/commands/cat.py
2023-10-22 17:46:34 -04:00

49 lines
1.6 KiB
Python

import pprint
from django.core.management.base import BaseCommand
from catalog.common import SiteManager
from catalog.sites import *
class Command(BaseCommand):
help = "Scrape a catalog item from external resource (and save it)"
def add_arguments(self, parser):
parser.add_argument("url", type=str, help="URL to scrape")
parser.add_argument(
"--save",
action="store_true",
help="save to database",
)
parser.add_argument(
"--force",
action="store_true",
help="force redownload",
)
def handle(self, *args, **options):
url = str(options["url"])
site = SiteManager.get_site_by_url(url)
if site is None:
self.stdout.write(self.style.ERROR(f"Unknown site for {url}"))
return
self.stdout.write(f"Fetching from {site}")
if options["save"]:
resource = site.get_resource_ready(ignore_existing_content=options["force"])
if resource:
pprint.pp(resource.metadata)
else:
self.stdout.write(self.style.ERROR(f"Unable to get resource for {url}"))
item = site.get_item()
if item:
pprint.pp(item.cover)
pprint.pp(item.metadata)
pprint.pp(item.absolute_url)
else:
self.stdout.write(self.style.ERROR(f"Unable to get item for {url}"))
else:
resource = site.scrape()
pprint.pp(resource.metadata)
pprint.pp(resource.lookup_ids)
self.stdout.write(self.style.SUCCESS(f"Done."))