fix douban book parsing

This commit is contained in:
Your Name 2023-01-14 19:55:22 -05:00 committed by Henri Dickson
parent 1738dbd46c
commit ba80bf801b
2 changed files with 16 additions and 0 deletions

View file

@ -202,6 +202,17 @@ class DoubanBookTestCase(TestCase):
self.assertEqual(site.resource.item.isbn, "9781847498571")
self.assertEqual(site.resource.item.title, "1984 Nineteen Eighty-Four")
@use_local_response
def test_publisher(self):
t_url = "https://book.douban.com/subject/35902899/"
site = SiteManager.get_site_by_url(t_url)
res = site.get_resource_ready()
self.assertEqual(res.metadata.get("pub_house"), "Alma Classics")
t_url = "https://book.douban.com/subject/1089243/"
site = SiteManager.get_site_by_url(t_url)
res = site.get_resource_ready()
self.assertEqual(res.metadata.get("pub_house"), "花城出版社")
@use_local_response
def test_work(self):
# url = 'https://www.goodreads.com/work/editions/153313'

View file

@ -55,6 +55,11 @@ class DoubanBook(AbstractSite):
"//div[@id='info']//span[text()='出版社:']/following::text()"
)
pub_house = pub_house_elem[0].strip() if pub_house_elem else None
if not pub_house:
pub_house_elem = content.xpath(
"//div[@id='info']//span[text()='出版社:']/following-sibling::a/text()"
)
pub_house = pub_house_elem[0].strip() if pub_house_elem else None
pub_date_elem = content.xpath(
"//div[@id='info']//span[text()='出版年:']/following::text()"