49 lines
1.6 KiB
Python
49 lines
1.6 KiB
Python
import pprint
|
|
|
|
from django.core.management.base import BaseCommand
|
|
|
|
from catalog.common import SiteManager
|
|
from catalog.sites import *
|
|
|
|
|
|
class Command(BaseCommand):
|
|
help = "Scrape a catalog item from external resource (and save it)"
|
|
|
|
def add_arguments(self, parser):
|
|
parser.add_argument("url", type=str, help="URL to scrape")
|
|
parser.add_argument(
|
|
"--save",
|
|
action="store_true",
|
|
help="save to database",
|
|
)
|
|
parser.add_argument(
|
|
"--force",
|
|
action="store_true",
|
|
help="force redownload",
|
|
)
|
|
|
|
def handle(self, *args, **options):
|
|
url = str(options["url"])
|
|
site = SiteManager.get_site_by_url(url)
|
|
if site is None:
|
|
self.stdout.write(self.style.ERROR(f"Unknown site for {url}"))
|
|
return
|
|
self.stdout.write(f"Fetching from {site}")
|
|
if options["save"]:
|
|
resource = site.get_resource_ready(ignore_existing_content=options["force"])
|
|
if resource:
|
|
pprint.pp(resource.metadata)
|
|
else:
|
|
self.stdout.write(self.style.ERROR(f"Unable to get resource for {url}"))
|
|
item = site.get_item()
|
|
if item:
|
|
pprint.pp(item.cover)
|
|
pprint.pp(item.metadata)
|
|
pprint.pp(item.absolute_url)
|
|
else:
|
|
self.stdout.write(self.style.ERROR(f"Unable to get item for {url}"))
|
|
else:
|
|
resource = site.scrape()
|
|
pprint.pp(resource.metadata)
|
|
pprint.pp(resource.lookup_ids)
|
|
self.stdout.write(self.style.SUCCESS(f"Done."))
|