fix douban book parsing
This commit is contained in:
parent
1738dbd46c
commit
ba80bf801b
2 changed files with 16 additions and 0 deletions
|
@ -202,6 +202,17 @@ class DoubanBookTestCase(TestCase):
|
|||
self.assertEqual(site.resource.item.isbn, "9781847498571")
|
||||
self.assertEqual(site.resource.item.title, "1984 Nineteen Eighty-Four")
|
||||
|
||||
@use_local_response
|
||||
def test_publisher(self):
|
||||
t_url = "https://book.douban.com/subject/35902899/"
|
||||
site = SiteManager.get_site_by_url(t_url)
|
||||
res = site.get_resource_ready()
|
||||
self.assertEqual(res.metadata.get("pub_house"), "Alma Classics")
|
||||
t_url = "https://book.douban.com/subject/1089243/"
|
||||
site = SiteManager.get_site_by_url(t_url)
|
||||
res = site.get_resource_ready()
|
||||
self.assertEqual(res.metadata.get("pub_house"), "花城出版社")
|
||||
|
||||
@use_local_response
|
||||
def test_work(self):
|
||||
# url = 'https://www.goodreads.com/work/editions/153313'
|
||||
|
|
|
@ -55,6 +55,11 @@ class DoubanBook(AbstractSite):
|
|||
"//div[@id='info']//span[text()='出版社:']/following::text()"
|
||||
)
|
||||
pub_house = pub_house_elem[0].strip() if pub_house_elem else None
|
||||
if not pub_house:
|
||||
pub_house_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='出版社:']/following-sibling::a/text()"
|
||||
)
|
||||
pub_house = pub_house_elem[0].strip() if pub_house_elem else None
|
||||
|
||||
pub_date_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='出版年:']/following::text()"
|
||||
|
|
Loading…
Add table
Reference in a new issue