fix goodreads
This commit is contained in:
parent
8982934b6a
commit
1c1516d875
4 changed files with 4080 additions and 0 deletions
|
@ -117,6 +117,14 @@ class GoodreadsTestCase(TestCase):
|
||||||
site.ready, True, "previous resource should still exist with data"
|
site.ready, True, "previous resource should still exist with data"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@use_local_response
|
||||||
|
def test_scrape2(self):
|
||||||
|
site = SiteManager.get_site_by_url(
|
||||||
|
"https://www.goodreads.com/book/show/13079982-fahrenheit-451"
|
||||||
|
)
|
||||||
|
site.get_resource_ready()
|
||||||
|
self.assertNotIn("<br", site.resource.metadata.get("brief"))
|
||||||
|
|
||||||
@use_local_response
|
@use_local_response
|
||||||
def test_asin(self):
|
def test_asin(self):
|
||||||
t_url = "https://www.goodreads.com/book/show/45064996-hyperion"
|
t_url = "https://www.goodreads.com/book/show/45064996-hyperion"
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from django.utils.timezone import make_aware
|
from django.utils.timezone import make_aware
|
||||||
|
@ -68,6 +69,10 @@ class Goodreads(AbstractSite):
|
||||||
raise ParseError(self, "Book in __NEXT_DATA__ json")
|
raise ParseError(self, "Book in __NEXT_DATA__ json")
|
||||||
data["title"] = b["title"]
|
data["title"] = b["title"]
|
||||||
data["brief"] = b["description"]
|
data["brief"] = b["description"]
|
||||||
|
if data["brief"]:
|
||||||
|
data["brief"] = re.sub(
|
||||||
|
r"<[^>]*>", "", data["brief"].replace("<br />", "\n")
|
||||||
|
)
|
||||||
ids = {}
|
ids = {}
|
||||||
t, n = detect_isbn_asin(b["details"].get("asin"))
|
t, n = detect_isbn_asin(b["details"].get("asin"))
|
||||||
if t:
|
if t:
|
||||||
|
|
31
test_data/https___www_goodreads_com_book_show_13079982
Normal file
31
test_data/https___www_goodreads_com_book_show_13079982
Normal file
File diff suppressed because one or more lines are too long
4036
test_data/https___www_goodreads_com_work_editions_1272463
Normal file
4036
test_data/https___www_goodreads_com_work_editions_1272463
Normal file
File diff suppressed because one or more lines are too long
Loading…
Add table
Reference in a new issue