diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml new file mode 100644 index 00000000..514df728 --- /dev/null +++ b/.github/workflows/codeql-analysis.yml @@ -0,0 +1,74 @@ +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. +# +# You may wish to alter this file to override the set of languages analyzed, +# or to provide custom queries or build logic. +# +# ******** NOTE ******** +# We have attempted to detect the languages in your repository. Please check +# the `language` matrix defined below to confirm you have the correct set of +# supported CodeQL languages. +# +name: "CodeQL" + +on: + push: + branches: [ "neo" ] + pull_request: + # The branches below must be a subset of the branches above + branches: [ "neo" ] + schedule: + - cron: '35 0 * * 0' + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: [ 'javascript', 'python' ] + # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] + # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v2 + with: + languages: ${{ matrix.language }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + + # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs + # queries: security-extended,security-and-quality + + + # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). + # If this step fails, then you should remove it and run the build manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@v2 + + # ℹ️ Command-line programs to run using the OS shell. + # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun + + # If the Autobuild fails above, remove it and uncomment the following three lines. + # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. + + # - run: | + # echo "Run, Build Application using script" + # ./location_of_script_within_repo/buildscript.sh + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v2 + with: + category: "/language:${{matrix.language}}" diff --git a/.github/workflows/pysa.yml b/.github/workflows/pysa.yml new file mode 100644 index 00000000..e4e20af3 --- /dev/null +++ b/.github/workflows/pysa.yml @@ -0,0 +1,50 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +# This workflow integrates Python Static Analyzer (Pysa) with +# GitHub's Code Scanning feature. +# +# Python Static Analyzer (Pysa) is a security-focused static +# analysis tool that tracks flows of data from where they +# originate to where they terminate in a dangerous location. +# +# See https://pyre-check.org/docs/pysa-basics/ + +name: Pysa + +on: + workflow_dispatch: + push: + branches: [ "neo" ] + pull_request: + branches: [ "neo" ] + schedule: + - cron: '45 12 * * 4' + +permissions: + contents: read + +jobs: + pysa: + permissions: + actions: read + contents: read + security-events: write + + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: true + + - name: Run Pysa + uses: facebook/pysa-action@f46a63777e59268613bd6e2ff4e29f144ca9e88b + with: + # To customize these inputs: + # See https://github.com/facebook/pysa-action#inputs + repo-directory: './' + requirements-path: 'requirements.txt' + infer-types: true + include-default-sapp-filters: true diff --git a/.gitignore b/.gitignore index 086f9d04..d1edae82 100644 --- a/.gitignore +++ b/.gitignore @@ -24,4 +24,7 @@ migrations/ # debug log file /log -log \ No newline at end of file +log + +# conf folder for neodb +/neodb diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..cc9bf6e8 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,23 @@ +# syntax=docker/dockerfile:1 +FROM python:3.8-slim +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 +RUN apt-get update \ + && apt-get install -y --no-install-recommends build-essential libpq-dev git \ + && rm -rf /var/lib/apt/lists/* +COPY requirements.txt /tmp/requirements.txt +RUN pip install --no-cache-dir -r /tmp/requirements.txt \ + && rm -rf /tmp/requirements.txt \ + && useradd -U app_user \ + && install -d -m 0755 -o app_user -g app_user /app/static + +ENV DJANGO_SETTINGS_MODULE=neodb.dev +WORKDIR /app +USER app_user:app_user +COPY --chown=app_user:app_user . . +RUN chmod +x docker/*.sh + +# Section 6- Docker Run Checks and Configurations +ENTRYPOINT [ "docker/entrypoint.sh" ] + +CMD [ "docker/start.sh", "server" ] \ No newline at end of file diff --git a/README.md b/README.md index 2cfde8ea..d9f46dbc 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,13 @@ An application allows you to mark any books, movies and more things you love. Depends on Mastodon. +## Install +Please see [doc/GUIDE.md](doc/GUIDE.md) + +## Bug Report + - to file a bug for NiceDB, please create an issue [here](https://github.com/doubaniux/boofilsic/issues/new) + - to file a bug or request new features for NeoDB, please contact NeoDB on [Fediverse](https://mastodon.social/@neodb) or [Twitter](https://twitter.com/NeoDBsocial) + ## Contribution The project is based on Django. If you are familiar with this technique and willing to read through the terrible code😝, your contribution would be the most welcome! @@ -11,8 +18,6 @@ Currently looking for someone to help with: - Explaining the structure of code - Refactoring (this is something big) -This project is still in its early stage, so you are not encouraged to deploy it on your own. If you do want to give it a try, please check the [fork of *alphatownsman*](https://github.com/alphatownsman/boofilsic), which is more friendly. - ## Sponsor -If you like this project, please consider sponsoring us on [Patreon](https://patreon.com/tertius). +If you like this project, please consider sponsoring NiceDB on [Patreon](https://patreon.com/tertius). diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 00000000..cafb3b8b --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,5 @@ +# Security Policy + +## Reporting a Vulnerability + +Please DM [us on Fediverse](https://mastodon.social/@neodb) or send email to `dev`@`neodb.social` to report a vulnerability. Please do not post publicly or create pr/issues directly. Thank you. diff --git a/boofilsic/context_processors.py b/boofilsic/context_processors.py new file mode 100644 index 00000000..6fd333b3 --- /dev/null +++ b/boofilsic/context_processors.py @@ -0,0 +1,5 @@ +from django.conf import settings + + +def site_info(request): + return settings.SITE_INFO diff --git a/boofilsic/settings.py b/boofilsic/settings.py index 86d4abf2..6393f4a0 100644 --- a/boofilsic/settings.py +++ b/boofilsic/settings.py @@ -12,10 +12,13 @@ https://docs.djangoproject.com/en/3.0/ref/settings/ import os import psycopg2.extensions +from git import Repo # Build paths inside the project like this: os.path.join(BASE_DIR, ...) BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +# https://docs.djangoproject.com/en/3.2/releases/3.2/#customizing-type-of-auto-created-primary-keys +DEFAULT_AUTO_FIELD = 'django.db.models.AutoField' # Quick-start development settings - unsuitable for production # See https://docs.djangoproject.com/en/3.0/howto/deployment/checklist/ @@ -38,6 +41,8 @@ INTERNAL_IPS = [ INSTALLED_APPS = [ 'django.contrib.admin', + 'hijack', + 'hijack.contrib.admin', 'django.contrib.auth', 'django.contrib.contenttypes', 'django.contrib.sessions', @@ -45,6 +50,9 @@ INSTALLED_APPS = [ 'django.contrib.staticfiles', 'django.contrib.humanize', 'django.contrib.postgres', + 'django_sass', + 'django_rq', + 'simple_history', 'markdownx', 'management.apps.ManagementConfig', 'mastodon.apps.MastodonConfig', @@ -54,7 +62,12 @@ INSTALLED_APPS = [ 'movies.apps.MoviesConfig', 'music.apps.MusicConfig', 'games.apps.GamesConfig', + 'sync.apps.SyncConfig', + 'collection.apps.CollectionConfig', + 'timeline.apps.TimelineConfig', 'easy_thumbnails', + 'user_messages', + 'django_slack', ] MIDDLEWARE = [ @@ -65,6 +78,8 @@ MIDDLEWARE = [ 'django.contrib.auth.middleware.AuthenticationMiddleware', 'django.contrib.messages.middleware.MessageMiddleware', 'django.middleware.clickjacking.XFrameOptionsMiddleware', + 'hijack.middleware.HijackUserMiddleware', + 'simple_history.middleware.HistoryRequestMiddleware', ] ROOT_URLCONF = 'boofilsic.urls' @@ -79,7 +94,9 @@ TEMPLATES = [ 'django.template.context_processors.debug', 'django.template.context_processors.request', 'django.contrib.auth.context_processors.auth', - 'django.contrib.messages.context_processors.messages', + # 'django.contrib.messages.context_processors.messages', + "user_messages.context_processors.messages", + 'boofilsic.context_processors.site_info', ], }, }, @@ -95,10 +112,10 @@ if DEBUG: DATABASES = { 'default': { 'ENGINE': 'django.db.backends.postgresql', - 'NAME': 'test', - 'USER': 'donotban', - 'PASSWORD': 'donotbansilvousplait', - 'HOST': '172.18.116.29', + 'NAME': os.environ.get('DB_NAME', 'test'), + 'USER': os.environ.get('DB_USER', 'donotban'), + 'PASSWORD': os.environ.get('DB_PASSWORD', 'donotbansilvousplait'), + 'HOST': os.environ.get('DB_HOST', '172.18.116.29'), 'OPTIONS': { 'client_encoding': 'UTF8', # 'isolation_level': psycopg2.extensions.ISOLATION_LEVEL_DEFAULT, @@ -184,13 +201,29 @@ STATICFILES_STORAGE = 'django.contrib.staticfiles.storage.ManifestStaticFilesSto AUTH_USER_MODEL = 'users.User' +SILENCED_SYSTEM_CHECKS = [ + "auth.W004", # User.username is non-unique + "admin.E404" # Required by django-user-messages +] + MEDIA_URL = '/media/' MEDIA_ROOT = os.path.join(BASE_DIR, 'media/') +PROJECT_ROOT = os.path.abspath(os.path.dirname(__name__)) +SITE_INFO = { + 'site_name': 'NiceDB', + 'support_link': 'https://github.com/doubaniux/boofilsic/issues', + 'version_hash': None, + 'settings_module': os.getenv('DJANGO_SETTINGS_MODULE'), + 'sentry_dsn': None, +} + # Mastodon configs -CLIENT_NAME = 'NiceDB' -APP_WEBSITE = 'https://nicedb.org' -REDIRECT_URIS = "https://nicedb.org/users/OAuth2_login/\nhttps://www.nicedb.org/users/OAuth2_login/" +CLIENT_NAME = os.environ.get('APP_NAME', 'NiceDB') +SITE_INFO['site_name'] = os.environ.get('APP_NAME', 'NiceDB') +APP_WEBSITE = os.environ.get('APP_URL', 'https://nicedb.org') +REDIRECT_URIS = APP_WEBSITE + "/users/OAuth2_login/" + # Path to save report related images, ends with slash REPORT_MEDIA_PATH_ROOT = 'report/' @@ -205,10 +238,23 @@ ALBUM_MEDIA_PATH_ROOT = 'album/' DEFAULT_ALBUM_IMAGE = os.path.join(ALBUM_MEDIA_PATH_ROOT, 'default.svg') GAME_MEDIA_PATH_ROOT = 'game/' DEFAULT_GAME_IMAGE = os.path.join(GAME_MEDIA_PATH_ROOT, 'default.svg') +COLLECTION_MEDIA_PATH_ROOT = 'collection/' +DEFAULT_COLLECTION_IMAGE = os.path.join(COLLECTION_MEDIA_PATH_ROOT, 'default.svg') +SYNC_FILE_PATH_ROOT = 'sync/' +EXPORT_FILE_PATH_ROOT = 'export/' + +# Allow user to login via any Mastodon/Pleroma sites +MASTODON_ALLOW_ANY_SITE = False # Timeout of requests to Mastodon, in seconds MASTODON_TIMEOUT = 30 +MASTODON_CLIENT_SCOPE = 'read write follow' +#use the following if it's a new site +#MASTODON_CLIENT_SCOPE = 'read:accounts read:follows read:search read:blocks read:mutes write:statuses write:media' + +MASTODON_LEGACY_CLIENT_SCOPE = 'read write follow' + # Tags for toots posted from this site MASTODON_TAGS = '#NiceDB #NiceDB%(category)s #NiceDB%(category)s%(type)s' @@ -217,7 +263,7 @@ STAR_SOLID = ':star_solid:' STAR_HALF = ':star_half:' STAR_EMPTY = ':star_empty:' -# Default password for each user. since assword is not used any way, +# Default password for each user. since password is not used any way, # any string that is not empty is ok DEFAULT_PASSWORD = 'ab7nsm8didusbaqPgq' @@ -231,8 +277,12 @@ ADMIN_URL = 'tertqX7256n7ej8nbv5cwvsegdse6w7ne5rHd' LUMINATI_USERNAME = 'lum-customer-hl_nw4tbv78-zone-static' LUMINATI_PASSWORD = 'nsb7te9bw0ney' +SCRAPING_TIMEOUT = 90 + # ScraperAPI api key SCRAPERAPI_KEY = 'wnb3794v675b8w475h0e8hr7tyge' +PROXYCRAWL_KEY = None +SCRAPESTACK_KEY = None # Spotify credentials SPOTIFY_CREDENTIAL = "NzYzNkYTE6MGQ0ODY0NTY2Y2b3n645sdfgAyY2I1ljYjg3Nzc0MjIwODQ0ZWE=" @@ -240,6 +290,17 @@ SPOTIFY_CREDENTIAL = "NzYzNkYTE6MGQ0ODY0NTY2Y2b3n645sdfgAyY2I1ljYjg3Nzc0MjIwODQ0 # IMDb API service https://imdb-api.com/ IMDB_API_KEY = "k23fwewff23" +# The Movie Database (TMDB) API Keys +TMDB_API3_KEY = "deadbeef" +TMDB_API4_KEY = "deadbeef.deadbeef.deadbeef" + +# Google Books API Key +GOOGLE_API_KEY = 'deadbeef-deadbeef-deadbeef' + +# IGDB +IGDB_CLIENT_ID = 'deadbeef' +IGDB_ACCESS_TOKEN = 'deadbeef' + # Thumbnail setting # It is possible to optimize the image size even more: https://easy-thumbnails.readthedocs.io/en/latest/ref/optimize/ THUMBNAIL_ALIASES = { @@ -257,3 +318,47 @@ if DEBUG: # https://django-debug-toolbar.readthedocs.io/en/latest/ # maybe benchmarking before deployment + +REDIS_HOST = os.environ.get('REDIS_HOST', '127.0.0.1') + +RQ_QUEUES = { + 'mastodon': { + 'HOST': REDIS_HOST, + 'PORT': 6379, + 'DB': 0, + 'DEFAULT_TIMEOUT': -1, + }, + 'export': { + 'HOST': REDIS_HOST, + 'PORT': 6379, + 'DB': 0, + 'DEFAULT_TIMEOUT': -1, + }, + 'doufen': { + 'HOST': REDIS_HOST, + 'PORT': 6379, + 'DB': 0, + 'DEFAULT_TIMEOUT': -1, + } +} + +RQ_SHOW_ADMIN_LINK = True + +SEARCH_INDEX_NEW_ONLY = False + +SEARCH_BACKEND = None + +# SEARCH_BACKEND = 'MEILISEARCH' +# MEILISEARCH_SERVER = 'http://127.0.0.1:7700' +# MEILISEARCH_KEY = 'deadbeef' + +# SEARCH_BACKEND = 'TYPESENSE' +# TYPESENSE_CONNECTION = { +# 'api_key': 'deadbeef', +# 'nodes': [{ +# 'host': 'localhost', +# 'port': '8108', +# 'protocol': 'http' +# }], +# 'connection_timeout_seconds': 2 +# } diff --git a/boofilsic/urls.py b/boofilsic/urls.py index dd52087a..38d74a5a 100644 --- a/boofilsic/urls.py +++ b/boofilsic/urls.py @@ -27,10 +27,16 @@ urlpatterns = [ path('movies/', include('movies.urls')), path('music/', include('music.urls')), path('games/', include('games.urls')), + path('collections/', include('collection.urls')), + path('timeline/', include('timeline.urls')), path('sync/', include('sync.urls')), path('announcement/', include('management.urls')), + path('hijack/', include('hijack.urls')), path('', include('common.urls')), +] +urlpatterns += [ + path(settings.ADMIN_URL + '-rq/', include('django_rq.urls')) ] if settings.DEBUG: diff --git a/books/admin.py b/books/admin.py index 942dccb4..75df663b 100644 --- a/books/admin.py +++ b/books/admin.py @@ -1,7 +1,8 @@ from django.contrib import admin from .models import * +from simple_history.admin import SimpleHistoryAdmin -admin.site.register(Book) +admin.site.register(Book, SimpleHistoryAdmin) admin.site.register(BookMark) admin.site.register(BookReview) admin.site.register(BookTag) diff --git a/books/apps.py b/books/apps.py index f716137a..b03e2d23 100644 --- a/books/apps.py +++ b/books/apps.py @@ -3,3 +3,8 @@ from django.apps import AppConfig class BooksConfig(AppConfig): name = 'books' + + def ready(self): + from common.index import Indexer + from .models import Book + Indexer.update_model_indexable(Book) diff --git a/books/forms.py b/books/forms.py index da7ecee6..27abda07 100644 --- a/books/forms.py +++ b/books/forms.py @@ -1,17 +1,12 @@ from django import forms from django.utils.translation import gettext_lazy as _ -from .models import Book, BookMark, BookReview +from .models import Book, BookMark, BookReview, BookMarkStatusTranslation from common.models import MarkStatusEnum from common.forms import * def BookMarkStatusTranslator(status): - trans_dict = { - MarkStatusEnum.DO.value: _("在读"), - MarkStatusEnum.WISH.value: _("想读"), - MarkStatusEnum.COLLECT.value: _("读过") - } - return trans_dict[status] + return BookMarkStatusTranslation[status] class BookForm(forms.ModelForm): @@ -96,11 +91,8 @@ class BookMarkForm(MarkForm): 'status', 'rating', 'text', - 'is_private', - ] - labels = { - 'rating': _("评分"), - } + 'visibility', + ] widgets = { 'book': forms.TextInput(attrs={"hidden": ""}), } @@ -115,14 +107,8 @@ class BookReviewForm(ReviewForm): 'book', 'title', 'content', - 'is_private' + 'visibility' ] - labels = { - 'book': "", - 'title': _("标题"), - 'content': _("正文"), - 'share_to_mastodon': _("分享到长毛象") - } widgets = { 'book': forms.TextInput(attrs={"hidden": ""}), } diff --git a/books/management/commands/fix-book-cover.py b/books/management/commands/fix-book-cover.py new file mode 100644 index 00000000..ae9227b5 --- /dev/null +++ b/books/management/commands/fix-book-cover.py @@ -0,0 +1,200 @@ +from django.core.management.base import BaseCommand +from django.core.files.uploadedfile import SimpleUploadedFile +from django.conf import settings +from common.scraper import * +from books.models import Book +from books.forms import BookForm +import requests +import re +import filetype +from lxml import html +from PIL import Image +from io import BytesIO + + +class DoubanPatcherMixin: + @classmethod + def download_page(cls, url, headers): + url = cls.get_effective_url(url) + r = None + error = 'DoubanScrapper: error occured when downloading ' + url + content = None + + def get(url, timeout): + nonlocal r + # print('Douban GET ' + url) + try: + r = requests.get(url, timeout=timeout) + except Exception as e: + r = requests.Response() + r.status_code = f"Exception when GET {url} {e}" + url + # print('Douban CODE ' + str(r.status_code)) + return r + + def check_content(): + nonlocal r, error, content + content = None + if r.status_code == 200: + content = r.content.decode('utf-8') + if content.find('关于豆瓣') == -1: + # with open('/tmp/temp.html', 'w', encoding='utf-8') as fp: + # fp.write(content) + content = None + error = error + 'Content not authentic' # response is garbage + elif re.search('不存在[^<]+', content, re.MULTILINE): + content = None + error = error + 'Not found or hidden by Douban' + else: + error = error + str(r.status_code) + + def fix_wayback_links(): + nonlocal content + # fix links + content = re.sub(r'href="http[^"]+http', r'href="http', content) + # https://img9.doubanio.com/view/subject/{l|m|s}/public/s1234.jpg + content = re.sub(r'src="[^"]+/(s\d+\.\w+)"', + r'src="https://img9.doubanio.com/view/subject/m/public/\1"', content) + # https://img9.doubanio.com/view/photo/s_ratio_poster/public/p2681329386.jpg + # https://img9.doubanio.com/view/photo/{l|m|s}/public/p1234.webp + content = re.sub(r'src="[^"]+/(p\d+\.\w+)"', + r'src="https://img9.doubanio.com/view/photo/m/public/\1"', content) + + # Wayback Machine: get latest available + def wayback(): + nonlocal r, error, content + error = error + '\nWayback: ' + get('http://archive.org/wayback/available?url=' + url, 10) + if r.status_code == 200: + w = r.json() + if w['archived_snapshots'] and w['archived_snapshots']['closest']: + get(w['archived_snapshots']['closest']['url'], 10) + check_content() + if content is not None: + fix_wayback_links() + else: + error = error + 'No snapshot available' + else: + error = error + str(r.status_code) + + # Wayback Machine: guess via CDX API + def wayback_cdx(): + nonlocal r, error, content + error = error + '\nWayback: ' + get('http://web.archive.org/cdx/search/cdx?url=' + url, 10) + if r.status_code == 200: + dates = re.findall(r'[^\s]+\s+(\d+)\s+[^\s]+\s+[^\s]+\s+\d+\s+[^\s]+\s+\d{5,}', + r.content.decode('utf-8')) + # assume snapshots whose size >9999 contain real content, use the latest one of them + if len(dates) > 0: + get('http://web.archive.org/web/' + dates[-1] + '/' + url, 10) + check_content() + if content is not None: + fix_wayback_links() + else: + error = error + 'No snapshot available' + else: + error = error + str(r.status_code) + + def latest(): + nonlocal r, error, content + if settings.SCRAPESTACK_KEY is None: + error = error + '\nDirect: ' + get(url, 60) + else: + error = error + '\nScrapeStack: ' + get(f'http://api.scrapestack.com/scrape?access_key={settings.SCRAPESTACK_KEY}&url={url}', 60) + check_content() + + wayback_cdx() + if content is None: + latest() + + if content is None: + logger.error(error) + content = '
' + return html.fromstring(content) + + @classmethod + def download_image(cls, url, item_url=None): + if url is None: + logger.error(f"Douban: no image url for {item_url}") + return None, None + raw_img = None + ext = None + + dl_url = url + if settings.SCRAPESTACK_KEY is not None: + dl_url = f'http://api.scrapestack.com/scrape?access_key={settings.SCRAPESTACK_KEY}&url={url}' + + try: + img_response = requests.get(dl_url, timeout=90) + if img_response.status_code == 200: + raw_img = img_response.content + img = Image.open(BytesIO(raw_img)) + img.load() # corrupted image will trigger exception + content_type = img_response.headers.get('Content-Type') + ext = filetype.get_type(mime=content_type.partition(';')[0].strip()).extension + else: + logger.error(f"Douban: download image failed {img_response.status_code} {dl_url} {item_url}") + # raise RuntimeError(f"Douban: download image failed {img_response.status_code} {dl_url}") + except Exception as e: + raw_img = None + ext = None + logger.error(f"Douban: download image failed {e} {dl_url} {item_url}") + if raw_img is None and settings.SCRAPESTACK_KEY is not None: + try: + img_response = requests.get(dl_url, timeout=90) + if img_response.status_code == 200: + raw_img = img_response.content + img = Image.open(BytesIO(raw_img)) + img.load() # corrupted image will trigger exception + content_type = img_response.headers.get('Content-Type') + ext = filetype.get_type(mime=content_type.partition(';')[0].strip()).extension + else: + logger.error(f"Douban: download image failed {img_response.status_code} {dl_url} {item_url}") + except Exception as e: + raw_img = None + ext = None + logger.error(f"Douban: download image failed {e} {dl_url} {item_url}") + return raw_img, ext + + +class DoubanBookPatcher(DoubanPatcherMixin, AbstractScraper): + site_name = SourceSiteEnum.DOUBAN.value + host = 'book.douban.com' + data_class = Book + form_class = BookForm + + regex = re.compile(r"https://book\.douban\.com/subject/\d+/{0,1}") + + def scrape(self, url): + headers = DEFAULT_REQUEST_HEADERS.copy() + headers['Host'] = self.host + content = self.download_page(url, headers) + img_url_elem = content.xpath("//*[@id='mainpic']/a/img/@src") + img_url = img_url_elem[0].strip() if img_url_elem else None + raw_img, ext = self.download_image(img_url, url) + return raw_img, ext + + +class Command(BaseCommand): + help = 'fix cover image' + + def add_arguments(self, parser): + parser.add_argument('threadId', type=int, help='% 8') + + def handle(self, *args, **options): + t = int(options['threadId']) + for m in Book.objects.filter(cover='book/default.svg', source_site='douban'): + if m.id % 8 == t: + self.stdout.write(f'Re-fetching {m.source_url}') + try: + raw_img, img_ext = DoubanBookPatcher.scrape(m.source_url) + if img_ext is not None: + m.cover = SimpleUploadedFile('temp.' + img_ext, raw_img) + m.save() + self.stdout.write(self.style.SUCCESS(f'Saved {m.source_url}')) + else: + self.stdout.write(self.style.ERROR(f'Skipped {m.source_url}')) + except Exception as e: + print(e) diff --git a/books/models.py b/books/models.py index 4f898709..8b23e9a6 100644 --- a/books/models.py +++ b/books/models.py @@ -1,98 +1,184 @@ -import uuid import django.contrib.postgres.fields as postgres -from django.utils.translation import ugettext_lazy as _ +from django.utils.translation import gettext_lazy as _ from django.db import models -from django.core.serializers.json import DjangoJSONEncoder from django.shortcuts import reverse -from common.models import Entity, Mark, Review, Tag +from common.models import Entity, Mark, Review, Tag, MarkStatusEnum from common.utils import GenerateDateUUIDMediaFilePath -from boofilsic.settings import BOOK_MEDIA_PATH_ROOT, DEFAULT_BOOK_IMAGE -from django.utils import timezone +from django.conf import settings +from django.db.models import Q +from simple_history.models import HistoricalRecords + + +BookMarkStatusTranslation = { + MarkStatusEnum.DO.value: _("在读"), + MarkStatusEnum.WISH.value: _("想读"), + MarkStatusEnum.COLLECT.value: _("读过") +} def book_cover_path(instance, filename): - return GenerateDateUUIDMediaFilePath(instance, filename, BOOK_MEDIA_PATH_ROOT) + return GenerateDateUUIDMediaFilePath(instance, filename, settings.BOOK_MEDIA_PATH_ROOT) class Book(Entity): # widely recognized name, usually in Chinese - title = models.CharField(_("title"), max_length=200) - subtitle = models.CharField(_("subtitle"), blank=True, default='', max_length=200) + title = models.CharField(_("title"), max_length=500) + subtitle = models.CharField( + _("subtitle"), blank=True, default='', max_length=500) # original name, for books in foreign language - orig_title = models.CharField(_("original title"), blank=True, default='', max_length=200) + orig_title = models.CharField( + _("original title"), blank=True, default='', max_length=500) author = postgres.ArrayField( - models.CharField(_("author"), blank=True, default='', max_length=100), + models.CharField(_("author"), blank=True, default='', max_length=200), null=True, blank=True, default=list, ) translator = postgres.ArrayField( - models.CharField(_("translator"), blank=True, default='', max_length=100), + models.CharField(_("translator"), blank=True, + default='', max_length=200), null=True, blank=True, default=list, ) - language = models.CharField(_("language"), blank=True, default='', max_length=10) - pub_house = models.CharField(_("publishing house"), blank=True, default='', max_length=200) + language = models.CharField( + _("language"), blank=True, default='', max_length=50) + pub_house = models.CharField( + _("publishing house"), blank=True, default='', max_length=200) pub_year = models.IntegerField(_("published year"), null=True, blank=True) - pub_month = models.IntegerField(_("published month"), null=True, blank=True) - binding = models.CharField(_("binding"), blank=True, default='', max_length=50) + pub_month = models.IntegerField( + _("published month"), null=True, blank=True) + binding = models.CharField( + _("binding"), blank=True, default='', max_length=200) # since data origin is not formatted and might be CNY USD or other currency, use char instead - price = models.CharField(_("pricing"), blank=True, default='', max_length=50) + price = models.CharField(_("pricing"), blank=True, + default='', max_length=50) pages = models.PositiveIntegerField(_("pages"), null=True, blank=True) - isbn = models.CharField(_("ISBN"), blank=True, null=False, max_length=20, db_index=True, default='') - # to store previously scrapped data - cover = models.ImageField(_("cover picture"), upload_to=book_cover_path, default=DEFAULT_BOOK_IMAGE, blank=True) + isbn = models.CharField(_("ISBN"), blank=True, null=False, + max_length=20, db_index=True, default='') + # to store previously scrapped data + cover = models.ImageField(_("cover picture"), upload_to=book_cover_path, + default=settings.DEFAULT_BOOK_IMAGE, blank=True) contents = models.TextField(blank=True, default="") + history = HistoricalRecords() class Meta: - # more info: https://docs.djangoproject.com/en/2.2/ref/models/options/ - # set managed=False if the model represents an existing table or - # a database view that has been created by some other means. - # check the link above for further info - # managed = True - # db_table = 'book' constraints = [ - models.CheckConstraint(check=models.Q(pub_year__gte=0), name='pub_year_lowerbound'), - models.CheckConstraint(check=models.Q(pub_month__lte=12), name='pub_month_upperbound'), - models.CheckConstraint(check=models.Q(pub_month__gte=1), name='pub_month_lowerbound'), + models.CheckConstraint(check=models.Q( + pub_year__gte=0), name='pub_year_lowerbound'), + models.CheckConstraint(check=models.Q( + pub_month__lte=12), name='pub_month_upperbound'), + models.CheckConstraint(check=models.Q( + pub_month__gte=1), name='pub_month_lowerbound'), ] def __str__(self): return self.title - + + def get_json(self): + r = { + 'subtitle': self.subtitle, + 'original_title': self.orig_title, + 'author': self.author, + 'translator': self.translator, + 'publisher': self.pub_house, + 'publish_year': self.pub_year, + 'publish_month': self.pub_month, + 'language': self.language, + 'isbn': self.isbn, + } + r.update(super().get_json()) + return r + def get_absolute_url(self): return reverse("books:retrieve", args=[self.id]) + @property + def wish_url(self): + return reverse("books:wish", args=[self.id]) + def get_tags_manager(self): return self.book_tags + def get_related_books(self): + qs = Q(orig_title=self.title) + if self.isbn: + qs = qs | Q(isbn=self.isbn) + if self.orig_title: + qs = qs | Q(title=self.orig_title) + qs = qs | Q(orig_title=self.orig_title) + qs = qs & ~Q(id=self.id) + return Book.objects.filter(qs) + + def get_identicals(self): + qs = Q(orig_title=self.title) + if self.isbn: + qs = Q(isbn=self.isbn) + # qs = qs & ~Q(id=self.id) + return Book.objects.filter(qs) + else: + return [self] # Book.objects.filter(id=self.id) + @property def verbose_category_name(self): return _("书籍") + @property + def mark_class(self): + return BookMark + + @property + def tag_class(self): + return BookTag + class BookMark(Mark): - book = models.ForeignKey(Book, on_delete=models.CASCADE, related_name='book_marks', null=True) + book = models.ForeignKey( + Book, on_delete=models.CASCADE, related_name='book_marks', null=True) + class Meta: constraints = [ - models.UniqueConstraint(fields=['owner', 'book'], name="unique_book_mark") + models.UniqueConstraint( + fields=['owner', 'book'], name="unique_book_mark") ] + @property + def translated_status(self): + return BookMarkStatusTranslation[self.status] + class BookReview(Review): - book = models.ForeignKey(Book, on_delete=models.CASCADE, related_name='book_reviews', null=True) + book = models.ForeignKey( + Book, on_delete=models.CASCADE, related_name='book_reviews', null=True) + class Meta: constraints = [ - models.UniqueConstraint(fields=['owner', 'book'], name="unique_book_review") - ] + models.UniqueConstraint( + fields=['owner', 'book'], name="unique_book_review") + ] + + @property + def url(self): + return settings.APP_WEBSITE + reverse("books:retrieve_review", args=[self.id]) + + @property + def item(self): + return self.book class BookTag(Tag): - book = models.ForeignKey(Book, on_delete=models.CASCADE, related_name='book_tags', null=True) - mark = models.ForeignKey(BookMark, on_delete=models.CASCADE, related_name='bookmark_tags', null=True) + book = models.ForeignKey( + Book, on_delete=models.CASCADE, related_name='book_tags', null=True) + mark = models.ForeignKey( + BookMark, on_delete=models.CASCADE, related_name='bookmark_tags', null=True) + class Meta: constraints = [ - models.UniqueConstraint(fields=['content', 'mark'], name="unique_bookmark_tag") + models.UniqueConstraint( + fields=['content', 'mark'], name="unique_bookmark_tag") ] + + @property + def item(self): + return self.book diff --git a/books/templates/books/create_update.html b/books/templates/books/create_update.html index fdd7ec61..de4b8ca7 100644 --- a/books/templates/books/create_update.html +++ b/books/templates/books/create_update.html @@ -10,8 +10,8 @@ -