fix goodreads

This commit is contained in:
Your Name 2024-01-20 23:27:54 -05:00 committed by Henri Dickson
parent 8982934b6a
commit 1c1516d875
4 changed files with 4080 additions and 0 deletions

View file

@ -117,6 +117,14 @@ class GoodreadsTestCase(TestCase):
site.ready, True, "previous resource should still exist with data"
)
@use_local_response
def test_scrape2(self):
site = SiteManager.get_site_by_url(
"https://www.goodreads.com/book/show/13079982-fahrenheit-451"
)
site.get_resource_ready()
self.assertNotIn("<br", site.resource.metadata.get("brief"))
@use_local_response
def test_asin(self):
t_url = "https://www.goodreads.com/book/show/45064996-hyperion"

View file

@ -1,5 +1,6 @@
import json
import logging
import re
from datetime import datetime
from django.utils.timezone import make_aware
@ -68,6 +69,10 @@ class Goodreads(AbstractSite):
raise ParseError(self, "Book in __NEXT_DATA__ json")
data["title"] = b["title"]
data["brief"] = b["description"]
if data["brief"]:
data["brief"] = re.sub(
r"<[^>]*>", "", data["brief"].replace("<br />", "\n")
)
ids = {}
t, n = detect_isbn_asin(b["details"].get("asin"))
if t:

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long