2022-12-07 19:09:05 -05:00
|
|
|
import pprint
|
2023-08-10 11:27:31 -04:00
|
|
|
|
|
|
|
from django.core.management.base import BaseCommand
|
|
|
|
|
2022-12-15 17:29:35 -05:00
|
|
|
from catalog.common import SiteManager
|
2022-12-07 19:09:05 -05:00
|
|
|
from catalog.sites import *
|
|
|
|
|
|
|
|
|
|
|
|
class Command(BaseCommand):
|
2022-12-29 23:57:02 -05:00
|
|
|
help = "Scrape a catalog item from external resource (and save it)"
|
2022-12-07 19:09:05 -05:00
|
|
|
|
|
|
|
def add_arguments(self, parser):
|
2022-12-29 23:57:02 -05:00
|
|
|
parser.add_argument("url", type=str, help="URL to scrape")
|
2022-12-14 21:12:37 -05:00
|
|
|
parser.add_argument(
|
2022-12-29 23:57:02 -05:00
|
|
|
"--save",
|
|
|
|
action="store_true",
|
|
|
|
help="save to database",
|
2022-12-14 21:12:37 -05:00
|
|
|
)
|
2023-01-29 20:05:30 -05:00
|
|
|
parser.add_argument(
|
|
|
|
"--force",
|
|
|
|
action="store_true",
|
|
|
|
help="force redownload",
|
|
|
|
)
|
2022-12-07 19:09:05 -05:00
|
|
|
|
|
|
|
def handle(self, *args, **options):
|
2022-12-29 23:57:02 -05:00
|
|
|
url = str(options["url"])
|
2022-12-15 17:29:35 -05:00
|
|
|
site = SiteManager.get_site_by_url(url)
|
2022-12-07 19:09:05 -05:00
|
|
|
if site is None:
|
2022-12-29 23:57:02 -05:00
|
|
|
self.stdout.write(self.style.ERROR(f"Unknown site for {url}"))
|
2022-12-07 19:09:05 -05:00
|
|
|
return
|
2022-12-29 23:57:02 -05:00
|
|
|
self.stdout.write(f"Fetching from {site}")
|
|
|
|
if options["save"]:
|
2023-01-29 20:05:30 -05:00
|
|
|
resource = site.get_resource_ready(ignore_existing_content=options["force"])
|
2023-07-20 21:59:49 -04:00
|
|
|
if resource:
|
|
|
|
pprint.pp(resource.metadata)
|
|
|
|
else:
|
|
|
|
self.stdout.write(self.style.ERROR(f"Unable to get resource for {url}"))
|
|
|
|
item = site.get_item()
|
|
|
|
if item:
|
|
|
|
pprint.pp(item.cover)
|
|
|
|
pprint.pp(item.metadata)
|
|
|
|
pprint.pp(item.absolute_url)
|
|
|
|
else:
|
|
|
|
self.stdout.write(self.style.ERROR(f"Unable to get item for {url}"))
|
2022-12-14 21:12:37 -05:00
|
|
|
else:
|
|
|
|
resource = site.scrape()
|
|
|
|
pprint.pp(resource.metadata)
|
|
|
|
pprint.pp(resource.lookup_ids)
|
2025-01-11 17:20:02 -05:00
|
|
|
self.stdout.write(self.style.SUCCESS("Done."))
|