improve bookstw parsing

This commit is contained in:
Your Name 2023-04-25 20:33:49 -04:00 committed by Henri Dickson
parent 8e3b96ef70
commit 81cc165afe
2 changed files with 11 additions and 7 deletions

View file

@ -41,6 +41,8 @@ class BooksTW(AbstractSite):
authors = content.xpath("string(//div/ul/li[contains(text(),'作者:')])")
authors = authors.strip().split("", 1)[1].split(",") if authors else []
if not authors:
authors = [content.xpath("string(//div/ul/li[contains(.,'作者:')]/a)")]
authors = [s.strip() for s in authors]
# author_orig = content.xpath("string(//div/ul/li[contains(text(),'原文作者:')])")
@ -95,12 +97,14 @@ class BooksTW(AbstractSite):
else None
)
brief = content.xpath("string(//h3[text()='內容簡介']/following-sibling::div)")
contents = content.xpath("string(//h3[text()='目錄']/following-sibling::div)")
series = content.xpath("string(//div/ul/li[contains(text(),'叢書系列:')]/a)")
series = None
imprint = None
brief = content.xpath("string(//h3[text()='內容簡介']/following-sibling::div)")
contents = content.xpath("string(//h3[text()='目錄']/following-sibling::div)")
img_url = content.xpath(
"string(//div[contains(@class,'cover_img')]//img[contains(@class,'cover')]/@src)"
)

View file

@ -30,6 +30,7 @@
<span> {% trans '评分:评分人数不足' %}</span>
{% endif %}
</div>
<div>{% if item.subtitle %}{% trans '副标题:' %}{{ item.subtitle }}{% endif %}</div>
<div>{% if item.isbn %}{% trans 'ISBN' %}{{ item.isbn }}{% endif %}</div>
<div>{% if item.author %}{% trans '作者:' %}
{% for author in item.author %}
@ -37,18 +38,17 @@
{% endfor %}
{% endif %}</div>
<div>{% if item.pub_house %}{% trans '出版社:' %}{{ item.pub_house }}{% endif %}</div>
<div>{% if item.subtitle %}{% trans '副标题:' %}{{ item.subtitle }}{% endif %}</div>
<div>{% if item.orig_title %}{% trans '原作名:' %}{{ item.orig_title }}{% endif %}</div>
<div>{% if item.translator %}{% trans '译者:' %}
{% for translator in item.translator %}
<span>{{ translator }}</span>{% if not forloop.last %} / {% endif %}
{% endfor %}
{% endif %}</div>
<div>{% if item.orig_title %}{% trans '原作名:' %}{{ item.orig_title }}{% endif %}</div>
<div>{% if item.language %}{% trans '语言:' %}{{ item.language }}{% endif %}</div>
<div>{%if item.pub_year %}{% trans '出版时间:' %}{{ item.pub_year }}{% trans '年' %}{% if item.pub_month %}{{ item.pub_month }}{% trans '月' %}{% endif %}{% endif %}</div>
<div>{% if item.series %}{% trans '丛书系列:' %}{{ item.series }}{% endif %}</div>
</div>
<div class="entity-detail__fields">
<div>{% if item.language %}{% trans '语言:' %}{{ item.language }}{% endif %}</div>
<div>{% if item.binding %}{% trans '装帧:' %}{{ item.binding }}{% endif %}</div>
<div>{% if item.price %}{% trans '定价:' %}{{ item.price }}{% endif %}</div>
<div>{% if item.pages %}{% trans '页数:' %}{{ item.pages }}{% endif %}</div>