fix goodreads
This commit is contained in:
parent
8982934b6a
commit
1c1516d875
4 changed files with 4080 additions and 0 deletions
|
@ -117,6 +117,14 @@ class GoodreadsTestCase(TestCase):
|
|||
site.ready, True, "previous resource should still exist with data"
|
||||
)
|
||||
|
||||
@use_local_response
|
||||
def test_scrape2(self):
|
||||
site = SiteManager.get_site_by_url(
|
||||
"https://www.goodreads.com/book/show/13079982-fahrenheit-451"
|
||||
)
|
||||
site.get_resource_ready()
|
||||
self.assertNotIn("<br", site.resource.metadata.get("brief"))
|
||||
|
||||
@use_local_response
|
||||
def test_asin(self):
|
||||
t_url = "https://www.goodreads.com/book/show/45064996-hyperion"
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import json
|
||||
import logging
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
from django.utils.timezone import make_aware
|
||||
|
@ -68,6 +69,10 @@ class Goodreads(AbstractSite):
|
|||
raise ParseError(self, "Book in __NEXT_DATA__ json")
|
||||
data["title"] = b["title"]
|
||||
data["brief"] = b["description"]
|
||||
if data["brief"]:
|
||||
data["brief"] = re.sub(
|
||||
r"<[^>]*>", "", data["brief"].replace("<br />", "\n")
|
||||
)
|
||||
ids = {}
|
||||
t, n = detect_isbn_asin(b["details"].get("asin"))
|
||||
if t:
|
||||
|
|
31
test_data/https___www_goodreads_com_book_show_13079982
Normal file
31
test_data/https___www_goodreads_com_book_show_13079982
Normal file
File diff suppressed because one or more lines are too long
4036
test_data/https___www_goodreads_com_work_editions_1272463
Normal file
4036
test_data/https___www_goodreads_com_work_editions_1272463
Normal file
File diff suppressed because one or more lines are too long
Loading…
Add table
Reference in a new issue