lib.itmens/catalog/sites/ao3.py

63 lines
1.9 KiB
Python
Raw Normal View History

2024-12-08 19:34:13 +00:00
from catalog.book.models import *
from catalog.common import *
@SiteManager.register
class ArchiveOfOurOwn(AbstractSite):
SITE_NAME = SiteName.AO3
ID_TYPE = IdType.AO3
URL_PATTERNS = [
r"\w+://archiveofourown\.org/works/(\d+)",
]
WIKI_PROPERTY_ID = "?"
DEFAULT_MODEL = Edition
@classmethod
def id_to_url(cls, id_value):
return "https://archiveofourown.org/works/" + id_value
def scrape(self):
if not self.url:
raise ParseError(self, "url")
content = BasicDownloader(self.url + "?view_adult=true").download().html()
title = content.xpath("string(//h2[@class='title heading'])")
if not title:
raise ParseError(self, "title")
authors = content.xpath("//h3[@class='byline heading']/a/text()")
summary = content.xpath(
"string(//div[@class='summary module']//blockquote[@class='userstuff'])"
)
language = [
s.strip()
for s in content.xpath("//dd[@class='language']/text()") # type:ignore
]
published = content.xpath("string(//dd[@class='published']/text())")
if published:
pub_date = published.split("-") # type:ignore
pub_year = int(pub_date[0])
pub_month = int(pub_date[1])
else:
pub_year = None
pub_month = None
data = {
"localized_title": [{"lang": "en", "text": title.strip()}], # type:ignore
"localized_description": (
[
{"lang": "en", "text": summary.strip()} # type:ignore
]
if summary
else []
),
"author": authors,
"language": language,
"pub_year": pub_year,
"pub_month": pub_month,
"format": Edition.BookFormat.WEB,
# "words": words,
}
pd = ResourceContent(metadata=data)
return pd