modernize importers
This commit is contained in:
parent
c5434b44eb
commit
0e41a1e5ef
9 changed files with 233 additions and 243 deletions
|
@ -2,14 +2,11 @@ import os
|
||||||
import re
|
import re
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
import django_rq
|
|
||||||
import openpyxl
|
import openpyxl
|
||||||
import pytz
|
import pytz
|
||||||
from auditlog.context import set_actor
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from markdownify import markdownify as md
|
from markdownify import markdownify as md
|
||||||
from user_messages import api as msg
|
|
||||||
|
|
||||||
from catalog.common import *
|
from catalog.common import *
|
||||||
from catalog.common.downloaders import *
|
from catalog.common.downloaders import *
|
||||||
|
@ -17,6 +14,7 @@ from catalog.models import *
|
||||||
from catalog.sites.douban import DoubanDownloader
|
from catalog.sites.douban import DoubanDownloader
|
||||||
from common.utils import GenerateDateUUIDMediaFilePath
|
from common.utils import GenerateDateUUIDMediaFilePath
|
||||||
from journal.models import *
|
from journal.models import *
|
||||||
|
from users.models import Task
|
||||||
|
|
||||||
_tz_sh = pytz.timezone("Asia/Shanghai")
|
_tz_sh = pytz.timezone("Asia/Shanghai")
|
||||||
|
|
||||||
|
@ -40,77 +38,22 @@ def _fetch_remote_image(url):
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
|
||||||
class DoubanImporter:
|
class DoubanImporter(Task):
|
||||||
total = 0
|
class Meta:
|
||||||
processed = 0
|
app_label = "journal" # workaround bug in TypedModel
|
||||||
skipped = 0
|
|
||||||
imported = 0
|
|
||||||
failed = []
|
|
||||||
visibility = 0
|
|
||||||
mode = 0
|
|
||||||
file = ""
|
|
||||||
|
|
||||||
def __init__(self, user, visibility, mode):
|
TaskQueue = "import"
|
||||||
self.user = user
|
DefaultMetadata = {
|
||||||
self.visibility = visibility
|
"total": 0,
|
||||||
self.mode = mode
|
"processed": 0,
|
||||||
|
"skipped": 0,
|
||||||
def update_user_import_status(self, status):
|
"imported": 0,
|
||||||
self.user.preference.import_status["douban_pending"] = status
|
"failed": 0,
|
||||||
self.user.preference.import_status["douban_file"] = self.file
|
"mode": 0,
|
||||||
self.user.preference.import_status["douban_visibility"] = self.visibility
|
"visibility": 0,
|
||||||
self.user.preference.import_status["douban_mode"] = self.mode
|
"failed_urls": [],
|
||||||
self.user.preference.import_status["douban_total"] = self.total
|
"file": None,
|
||||||
self.user.preference.import_status["douban_processed"] = self.processed
|
}
|
||||||
self.user.preference.import_status["douban_skipped"] = self.skipped
|
|
||||||
self.user.preference.import_status["douban_imported"] = self.imported
|
|
||||||
self.user.preference.import_status["douban_failed"] = self.failed
|
|
||||||
self.user.preference.save(update_fields=["import_status"])
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def reset(cls, user):
|
|
||||||
user.preference.import_status["douban_pending"] = 0
|
|
||||||
user.preference.save(update_fields=["import_status"])
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def redo(cls, user):
|
|
||||||
file = user.preference.import_status["douban_file"]
|
|
||||||
imp = cls(
|
|
||||||
user,
|
|
||||||
user.preference.import_status["douban_visibility"],
|
|
||||||
user.preference.import_status["douban_mode"],
|
|
||||||
)
|
|
||||||
imp.file = file
|
|
||||||
jid = f"Douban_{user.id}_{os.path.basename(file)}_redo"
|
|
||||||
django_rq.get_queue("import").enqueue(imp.import_from_file_task, job_id=jid)
|
|
||||||
|
|
||||||
def import_from_file(self, uploaded_file):
|
|
||||||
try:
|
|
||||||
wb = openpyxl.open(
|
|
||||||
uploaded_file, read_only=True, data_only=True, keep_links=False
|
|
||||||
)
|
|
||||||
wb.close()
|
|
||||||
file = (
|
|
||||||
settings.MEDIA_ROOT
|
|
||||||
+ "/"
|
|
||||||
+ GenerateDateUUIDMediaFilePath("x.xlsx", settings.SYNC_FILE_PATH_ROOT)
|
|
||||||
)
|
|
||||||
os.makedirs(os.path.dirname(file), exist_ok=True)
|
|
||||||
with open(file, "wb") as destination:
|
|
||||||
for chunk in uploaded_file.chunks():
|
|
||||||
destination.write(chunk)
|
|
||||||
self.file = file
|
|
||||||
self.update_user_import_status(2)
|
|
||||||
jid = f"Douban_{self.user.id}_{os.path.basename(self.file)}"
|
|
||||||
django_rq.get_queue("import").enqueue(
|
|
||||||
self.import_from_file_task, job_id=jid
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(
|
|
||||||
f"unable to enqueue import {uploaded_file}", extra={"exception": e}
|
|
||||||
)
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
mark_sheet_config = {
|
mark_sheet_config = {
|
||||||
"想读": [ShelfType.WISHLIST],
|
"想读": [ShelfType.WISHLIST],
|
||||||
|
@ -135,13 +78,30 @@ class DoubanImporter:
|
||||||
"剧评": [Performance],
|
"剧评": [Performance],
|
||||||
"游戏评论&攻略": [Game],
|
"游戏评论&攻略": [Game],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def validate_file(cls, uploaded_file):
|
||||||
|
try:
|
||||||
|
wb = openpyxl.open(
|
||||||
|
uploaded_file, read_only=True, data_only=True, keep_links=False
|
||||||
|
)
|
||||||
|
sheets = cls.mark_sheet_config.keys() | cls.review_sheet_config.keys()
|
||||||
|
for name in sheets:
|
||||||
|
if name in wb:
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
f"unable to validate excel file {uploaded_file}", extra={"exception": e}
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
mark_data = {}
|
mark_data = {}
|
||||||
review_data = {}
|
review_data = {}
|
||||||
entity_lookup = {}
|
entity_lookup = {}
|
||||||
|
|
||||||
def load_sheets(self):
|
def load_sheets(self):
|
||||||
"""Load data into mark_data / review_data / entity_lookup"""
|
"""Load data into mark_data / review_data / entity_lookup"""
|
||||||
f = open(self.file, "rb")
|
f = open(self.metadata["file"], "rb")
|
||||||
wb = openpyxl.load_workbook(f, read_only=True, data_only=True, keep_links=False)
|
wb = openpyxl.load_workbook(f, read_only=True, data_only=True, keep_links=False)
|
||||||
for data, config in [
|
for data, config in [
|
||||||
(self.mark_data, self.mark_sheet_config),
|
(self.mark_data, self.mark_sheet_config),
|
||||||
|
@ -164,8 +124,9 @@ class DoubanImporter:
|
||||||
self.entity_lookup[k].append(v)
|
self.entity_lookup[k].append(v)
|
||||||
else:
|
else:
|
||||||
self.entity_lookup[k] = [v]
|
self.entity_lookup[k] = [v]
|
||||||
self.total = sum(map(lambda a: len(a), self.mark_data.values()))
|
self.metadata["total"] = sum(map(lambda a: len(a), self.mark_data.values()))
|
||||||
self.total += sum(map(lambda a: len(a), self.review_data.values()))
|
self.metadata["total"] += sum(map(lambda a: len(a), self.review_data.values()))
|
||||||
|
self.save()
|
||||||
|
|
||||||
def guess_entity_url(self, title, rating, timestamp):
|
def guess_entity_url(self, title, rating, timestamp):
|
||||||
k = f"{title}|{rating}"
|
k = f"{title}|{rating}"
|
||||||
|
@ -189,28 +150,20 @@ class DoubanImporter:
|
||||||
# if cells[0] == title and cells[5] == rating:
|
# if cells[0] == title and cells[5] == rating:
|
||||||
# return cells[3]
|
# return cells[3]
|
||||||
|
|
||||||
def import_from_file_task(self):
|
def run(self):
|
||||||
logger.info(f"{self.user} import start")
|
logger.info(f"{self.user} import start")
|
||||||
msg.info(self.user, f"开始导入豆瓣标记和评论")
|
self.load_sheets()
|
||||||
self.update_user_import_status(1)
|
logger.info(f"{self.user} sheet loaded, {self.metadata['total']} lines total")
|
||||||
with set_actor(self.user):
|
for name, param in self.mark_sheet_config.items():
|
||||||
self.load_sheets()
|
self.import_mark_sheet(self.mark_data[name], param[0], name)
|
||||||
logger.info(f"{self.user} sheet loaded, {self.total} lines total")
|
for name, param in self.review_sheet_config.items():
|
||||||
self.update_user_import_status(1)
|
self.import_review_sheet(self.review_data[name], name)
|
||||||
for name, param in self.mark_sheet_config.items():
|
self.message = f"豆瓣标记和评论导入完成,共处理{self.metadata['total']}篇,已存在{self.metadata['skipped']}篇,新增{self.metadata['imported']}篇。"
|
||||||
self.import_mark_sheet(self.mark_data[name], param[0], name)
|
if len(self.metadata["failed_urls"]) > 0:
|
||||||
for name, param in self.review_sheet_config.items():
|
self.message += (
|
||||||
self.import_review_sheet(self.review_data[name], name)
|
f'导入时未能处理以下网址:\n{" , ".join(self.metadata["failed_urls"])}'
|
||||||
self.update_user_import_status(0)
|
|
||||||
msg.success(
|
|
||||||
self.user,
|
|
||||||
f"豆瓣标记和评论导入完成,共处理{self.total}篇,已存在{self.skipped}篇,新增{self.imported}篇。",
|
|
||||||
)
|
|
||||||
if len(self.failed):
|
|
||||||
msg.error(
|
|
||||||
self.user,
|
|
||||||
f'豆瓣评论导入时未能处理以下网址:\n{" , ".join(self.failed)}',
|
|
||||||
)
|
)
|
||||||
|
self.save()
|
||||||
|
|
||||||
def import_mark_sheet(self, worksheet, shelf_type, sheet_name):
|
def import_mark_sheet(self, worksheet, shelf_type, sheet_name):
|
||||||
prefix = f"{self.user} {sheet_name}|"
|
prefix = f"{self.user} {sheet_name}|"
|
||||||
|
@ -234,7 +187,7 @@ class DoubanImporter:
|
||||||
except Exception:
|
except Exception:
|
||||||
tags = []
|
tags = []
|
||||||
comment = cells[7] if len(cells) >= 8 else None
|
comment = cells[7] if len(cells) >= 8 else None
|
||||||
self.processed += 1
|
self.metadata["processed"] += 1
|
||||||
try:
|
try:
|
||||||
if type(time) == str:
|
if type(time) == str:
|
||||||
time = datetime.strptime(time, "%Y-%m-%d %H:%M:%S")
|
time = datetime.strptime(time, "%Y-%m-%d %H:%M:%S")
|
||||||
|
@ -243,10 +196,10 @@ class DoubanImporter:
|
||||||
time = None
|
time = None
|
||||||
r = self.import_mark(url, shelf_type, comment, rating_grade, tags, time)
|
r = self.import_mark(url, shelf_type, comment, rating_grade, tags, time)
|
||||||
if r == 1:
|
if r == 1:
|
||||||
self.imported += 1
|
self.metadata["imported"] += 1
|
||||||
elif r == 2:
|
elif r == 2:
|
||||||
self.skipped += 1
|
self.metadata["skipped"] += 1
|
||||||
self.update_user_import_status(1)
|
self.save()
|
||||||
|
|
||||||
def import_mark(self, url, shelf_type, comment, rating_grade, tags, time):
|
def import_mark(self, url, shelf_type, comment, rating_grade, tags, time):
|
||||||
"""
|
"""
|
||||||
|
@ -257,7 +210,7 @@ class DoubanImporter:
|
||||||
logger.warning(f"{self.user} | match/fetch {url} failed")
|
logger.warning(f"{self.user} | match/fetch {url} failed")
|
||||||
return
|
return
|
||||||
mark = Mark(self.user.identity, item)
|
mark = Mark(self.user.identity, item)
|
||||||
if self.mode == 0 and (
|
if self.metadata["mode"] == 0 and (
|
||||||
mark.shelf_type == shelf_type
|
mark.shelf_type == shelf_type
|
||||||
or mark.shelf_type == ShelfType.COMPLETE
|
or mark.shelf_type == ShelfType.COMPLETE
|
||||||
or (
|
or (
|
||||||
|
@ -268,7 +221,12 @@ class DoubanImporter:
|
||||||
print("-", end="", flush=True)
|
print("-", end="", flush=True)
|
||||||
return 2
|
return 2
|
||||||
mark.update(
|
mark.update(
|
||||||
shelf_type, comment, rating_grade, tags, self.visibility, created_time=time
|
shelf_type,
|
||||||
|
comment,
|
||||||
|
rating_grade,
|
||||||
|
tags,
|
||||||
|
self.metadata["visibility"],
|
||||||
|
created_time=time,
|
||||||
)
|
)
|
||||||
print("+", end="", flush=True)
|
print("+", end="", flush=True)
|
||||||
return 1
|
return 1
|
||||||
|
@ -289,7 +247,7 @@ class DoubanImporter:
|
||||||
time = cells[3]
|
time = cells[3]
|
||||||
rating = cells[4]
|
rating = cells[4]
|
||||||
content = cells[6]
|
content = cells[6]
|
||||||
self.processed += 1
|
self.metadata["processed"] += 1
|
||||||
if time:
|
if time:
|
||||||
if type(time) == str:
|
if type(time) == str:
|
||||||
time = datetime.strptime(time, "%Y-%m-%d %H:%M:%S")
|
time = datetime.strptime(time, "%Y-%m-%d %H:%M:%S")
|
||||||
|
@ -304,12 +262,12 @@ class DoubanImporter:
|
||||||
entity_title, rating, title, review_url, content, time
|
entity_title, rating, title, review_url, content, time
|
||||||
)
|
)
|
||||||
if r == 1:
|
if r == 1:
|
||||||
self.imported += 1
|
self.metadata["imported"] += 1
|
||||||
elif r == 2:
|
elif r == 2:
|
||||||
self.skipped += 1
|
self.metadata["skipped"] += 1
|
||||||
else:
|
else:
|
||||||
self.failed.append(review_url)
|
self.metadata["failed_urls"].append(review_url)
|
||||||
self.update_user_import_status(1)
|
self.save()
|
||||||
|
|
||||||
def get_item_by_url(self, url):
|
def get_item_by_url(self, url):
|
||||||
item = None
|
item = None
|
||||||
|
@ -337,7 +295,7 @@ class DoubanImporter:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"fetching error: {url}", extra={"exception": e})
|
logger.error(f"fetching error: {url}", extra={"exception": e})
|
||||||
if item is None:
|
if item is None:
|
||||||
self.failed.append(str(url))
|
self.metadata["failed_urls"].append(str(url))
|
||||||
return item
|
return item
|
||||||
|
|
||||||
def import_review(self, entity_title, rating, title, review_url, content, time):
|
def import_review(self, entity_title, rating, title, review_url, content, time):
|
||||||
|
@ -367,7 +325,7 @@ class DoubanImporter:
|
||||||
logger.warning(f"{prefix} match/fetch {url} failed")
|
logger.warning(f"{prefix} match/fetch {url} failed")
|
||||||
return
|
return
|
||||||
if (
|
if (
|
||||||
self.mode == 1
|
self.metadata["mode"] == 1
|
||||||
and Review.objects.filter(owner=self.user.identity, item=item).exists()
|
and Review.objects.filter(owner=self.user.identity, item=item).exists()
|
||||||
):
|
):
|
||||||
return 2
|
return 2
|
||||||
|
@ -387,7 +345,7 @@ class DoubanImporter:
|
||||||
"edited_time": time,
|
"edited_time": time,
|
||||||
"title": title,
|
"title": title,
|
||||||
"body": content,
|
"body": content,
|
||||||
"visibility": self.visibility,
|
"visibility": self.metadata["visibility"],
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
Review.objects.update_or_create(
|
Review.objects.update_or_create(
|
||||||
|
|
|
@ -1,16 +1,14 @@
|
||||||
import re
|
import re
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
import django_rq
|
|
||||||
from auditlog.context import set_actor
|
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
from django.utils.timezone import make_aware
|
from django.utils.timezone import make_aware
|
||||||
from user_messages import api as msg
|
|
||||||
|
|
||||||
from catalog.common import *
|
from catalog.common import *
|
||||||
from catalog.common.downloaders import *
|
from catalog.common.downloaders import *
|
||||||
from catalog.models import *
|
from catalog.models import *
|
||||||
from journal.models import *
|
from journal.models import *
|
||||||
|
from users.models import Task
|
||||||
|
|
||||||
re_list = r"^https://www\.goodreads\.com/list/show/\d+"
|
re_list = r"^https://www\.goodreads\.com/list/show/\d+"
|
||||||
re_shelf = r"^https://www\.goodreads\.com/review/list/\d+[^\?]*\?shelf=[^&]+"
|
re_shelf = r"^https://www\.goodreads\.com/review/list/\d+[^\?]*\?shelf=[^&]+"
|
||||||
|
@ -24,93 +22,104 @@ gr_rating = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class GoodreadsImporter:
|
class GoodreadsImporter(Task):
|
||||||
|
class Meta:
|
||||||
|
app_label = "journal" # workaround bug in TypedModel
|
||||||
|
|
||||||
|
TaskQueue = "import"
|
||||||
|
DefaultMetadata = {
|
||||||
|
"total": 0,
|
||||||
|
"processed": 0,
|
||||||
|
"skipped": 0,
|
||||||
|
"imported": 0,
|
||||||
|
"failed": 0,
|
||||||
|
"visibility": 0,
|
||||||
|
"failed_urls": [],
|
||||||
|
"url": None,
|
||||||
|
}
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def import_from_url(cls, raw_url, user):
|
def validate_url(cls, raw_url):
|
||||||
match_list = re.match(re_list, raw_url)
|
match_list = re.match(re_list, raw_url)
|
||||||
match_shelf = re.match(re_shelf, raw_url)
|
match_shelf = re.match(re_shelf, raw_url)
|
||||||
match_profile = re.match(re_profile, raw_url)
|
match_profile = re.match(re_profile, raw_url)
|
||||||
if match_profile or match_shelf or match_list:
|
if match_profile or match_shelf or match_list:
|
||||||
django_rq.get_queue("import").enqueue(
|
|
||||||
cls.import_from_url_task, raw_url, user
|
|
||||||
)
|
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@classmethod
|
def run(self):
|
||||||
def import_from_url_task(cls, url, user):
|
url = self.metadata["url"]
|
||||||
|
user = self.user
|
||||||
match_list = re.match(re_list, url)
|
match_list = re.match(re_list, url)
|
||||||
match_shelf = re.match(re_shelf, url)
|
match_shelf = re.match(re_shelf, url)
|
||||||
match_profile = re.match(re_profile, url)
|
match_profile = re.match(re_profile, url)
|
||||||
total = 0
|
total = 0
|
||||||
visibility = user.preference.default_visibility
|
visibility = user.preference.default_visibility
|
||||||
with set_actor(user):
|
shelf = None
|
||||||
shelf = None
|
if match_shelf:
|
||||||
if match_shelf:
|
shelf = self.parse_shelf(match_shelf[0])
|
||||||
shelf = cls.parse_shelf(match_shelf[0], user)
|
elif match_list:
|
||||||
elif match_list:
|
shelf = self.parse_list(match_list[0])
|
||||||
shelf = cls.parse_list(match_list[0], user)
|
if shelf:
|
||||||
if shelf:
|
if shelf["title"] and shelf["books"]:
|
||||||
if shelf["title"] and shelf["books"]:
|
collection = Collection.objects.create(
|
||||||
collection = Collection.objects.create(
|
title=shelf["title"],
|
||||||
title=shelf["title"],
|
brief=shelf["description"]
|
||||||
brief=shelf["description"]
|
+ "\n\nImported from [Goodreads]("
|
||||||
+ "\n\nImported from [Goodreads]("
|
+ url
|
||||||
+ url
|
+ ")",
|
||||||
+ ")",
|
owner=user.identity,
|
||||||
owner=user.identity,
|
|
||||||
)
|
|
||||||
for book in shelf["books"]:
|
|
||||||
collection.append_item(book["book"], note=book["review"])
|
|
||||||
total += 1
|
|
||||||
collection.save()
|
|
||||||
msg.success(
|
|
||||||
user,
|
|
||||||
f'Imported {total} books from Goodreads as a Collection {shelf["title"]}.',
|
|
||||||
)
|
)
|
||||||
elif match_profile:
|
for book in shelf["books"]:
|
||||||
uid = match_profile[1]
|
collection.append_item(book["book"], note=book["review"])
|
||||||
shelves = {
|
total += 1
|
||||||
ShelfType.WISHLIST: f"https://www.goodreads.com/review/list/{uid}?shelf=to-read",
|
collection.save()
|
||||||
ShelfType.PROGRESS: f"https://www.goodreads.com/review/list/{uid}?shelf=currently-reading",
|
self.message = f'Imported {total} books from Goodreads as a Collection {shelf["title"]}.'
|
||||||
ShelfType.COMPLETE: f"https://www.goodreads.com/review/list/{uid}?shelf=read",
|
elif match_profile:
|
||||||
}
|
uid = match_profile[1]
|
||||||
for shelf_type in shelves:
|
shelves = {
|
||||||
shelf_url = shelves.get(shelf_type)
|
ShelfType.WISHLIST: f"https://www.goodreads.com/review/list/{uid}?shelf=to-read",
|
||||||
shelf = cls.parse_shelf(shelf_url, user)
|
ShelfType.PROGRESS: f"https://www.goodreads.com/review/list/{uid}?shelf=currently-reading",
|
||||||
for book in shelf["books"]:
|
ShelfType.COMPLETE: f"https://www.goodreads.com/review/list/{uid}?shelf=read",
|
||||||
mark = Mark(user.identity, book["book"])
|
}
|
||||||
if (
|
for shelf_type in shelves:
|
||||||
(
|
shelf_url = shelves.get(shelf_type)
|
||||||
mark.shelf_type == shelf_type
|
shelf = self.parse_shelf(shelf_url)
|
||||||
and mark.comment_text == book["review"]
|
for book in shelf["books"]:
|
||||||
)
|
mark = Mark(user.identity, book["book"])
|
||||||
or (
|
if (
|
||||||
mark.shelf_type == ShelfType.COMPLETE
|
(
|
||||||
and shelf_type != ShelfType.COMPLETE
|
mark.shelf_type == shelf_type
|
||||||
)
|
and mark.comment_text == book["review"]
|
||||||
or (
|
)
|
||||||
mark.shelf_type == ShelfType.PROGRESS
|
or (
|
||||||
and shelf_type == ShelfType.WISHLIST
|
mark.shelf_type == ShelfType.COMPLETE
|
||||||
)
|
and shelf_type != ShelfType.COMPLETE
|
||||||
):
|
)
|
||||||
print(
|
or (
|
||||||
f'Skip {shelf_type}/{book["book"]} bc it was marked {mark.shelf_type}'
|
mark.shelf_type == ShelfType.PROGRESS
|
||||||
)
|
and shelf_type == ShelfType.WISHLIST
|
||||||
else:
|
)
|
||||||
mark.update(
|
):
|
||||||
shelf_type,
|
print(
|
||||||
book["review"],
|
f'Skip {shelf_type}/{book["book"]} bc it was marked {mark.shelf_type}'
|
||||||
book["rating"],
|
)
|
||||||
visibility=visibility,
|
else:
|
||||||
created_time=book["last_updated"] or timezone.now(),
|
mark.update(
|
||||||
)
|
shelf_type,
|
||||||
total += 1
|
book["review"],
|
||||||
msg.success(user, f"Imported {total} records from Goodreads profile.")
|
book["rating"],
|
||||||
|
visibility=visibility,
|
||||||
|
created_time=book["last_updated"] or timezone.now(),
|
||||||
|
)
|
||||||
|
total += 1
|
||||||
|
self.message = f"Imported {total} records from Goodreads profile."
|
||||||
|
self.metadata["total"] = total
|
||||||
|
self.save()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_book(cls, url, user):
|
def get_book(cls, url):
|
||||||
site = SiteManager.get_site_by_url(url)
|
site = SiteManager.get_site_by_url(url)
|
||||||
if site:
|
if site:
|
||||||
book = site.get_item()
|
book = site.get_item()
|
||||||
|
@ -121,7 +130,7 @@ class GoodreadsImporter:
|
||||||
return book
|
return book
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def parse_shelf(cls, url, user):
|
def parse_shelf(cls, url):
|
||||||
# return {'title': 'abc', books: [{'book': obj, 'rating': 10, 'review': 'txt'}, ...]}
|
# return {'title': 'abc', books: [{'book': obj, 'rating': 10, 'review': 'txt'}, ...]}
|
||||||
title = ""
|
title = ""
|
||||||
books = []
|
books = []
|
||||||
|
@ -194,7 +203,7 @@ class GoodreadsImporter:
|
||||||
except Exception:
|
except Exception:
|
||||||
print(f"Error loading/parsing review{url_review}, ignored")
|
print(f"Error loading/parsing review{url_review}, ignored")
|
||||||
try:
|
try:
|
||||||
book = cls.get_book(url_book, user)
|
book = cls.get_book(url_book)
|
||||||
books.append(
|
books.append(
|
||||||
{
|
{
|
||||||
"url": url_book,
|
"url": url_book,
|
||||||
|
@ -216,7 +225,7 @@ class GoodreadsImporter:
|
||||||
return {"title": title, "description": "", "books": books}
|
return {"title": title, "description": "", "books": books}
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def parse_list(cls, url, user):
|
def parse_list(cls, url):
|
||||||
# return {'title': 'abc', books: [{'book': obj, 'rating': 10, 'review': 'txt'}, ...]}
|
# return {'title': 'abc', books: [{'book': obj, 'rating': 10, 'review': 'txt'}, ...]}
|
||||||
title = ""
|
title = ""
|
||||||
description = ""
|
description = ""
|
||||||
|
@ -237,7 +246,7 @@ class GoodreadsImporter:
|
||||||
for link in links: # type:ignore
|
for link in links: # type:ignore
|
||||||
url_book = "https://www.goodreads.com" + link
|
url_book = "https://www.goodreads.com" + link
|
||||||
try:
|
try:
|
||||||
book = cls.get_book(url_book, user)
|
book = cls.get_book(url_book)
|
||||||
books.append(
|
books.append(
|
||||||
{
|
{
|
||||||
"url": url_book,
|
"url": url_book,
|
||||||
|
|
|
@ -25,16 +25,6 @@ class Command(BaseCommand):
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="purge invalid data (visibility=99)",
|
help="purge invalid data (visibility=99)",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
|
||||||
"--douban-import-redo",
|
|
||||||
action="store",
|
|
||||||
help="reimport for user id",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--douban-import-reset",
|
|
||||||
action="store",
|
|
||||||
help="reset for user id",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--integrity",
|
"--integrity",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
|
@ -66,14 +56,4 @@ class Command(BaseCommand):
|
||||||
self.stdout.write(f"Cleaning up {cls}...")
|
self.stdout.write(f"Cleaning up {cls}...")
|
||||||
cls.objects.filter(visibility=99).delete()
|
cls.objects.filter(visibility=99).delete()
|
||||||
|
|
||||||
if options["douban_import_redo"]:
|
|
||||||
user = User.objects.get(pk=options["douban_import_redo"])
|
|
||||||
self.stdout.write(f"Redo import for {user}...")
|
|
||||||
DoubanImporter.redo(user)
|
|
||||||
|
|
||||||
if options["douban_import_reset"]:
|
|
||||||
user = User.objects.get(pk=options["douban_import_reset"])
|
|
||||||
self.stdout.write(f"Reset import for {user}...")
|
|
||||||
DoubanImporter.reset(user)
|
|
||||||
|
|
||||||
self.stdout.write(self.style.SUCCESS(f"Done."))
|
self.stdout.write(self.style.SUCCESS(f"Done."))
|
||||||
|
|
|
@ -4,7 +4,6 @@ from django.db import migrations
|
||||||
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
("users", "0006_alter_task_type"),
|
("users", "0006_alter_task_type"),
|
||||||
("journal", "0003_note_progress"),
|
("journal", "0003_note_progress"),
|
||||||
|
@ -21,4 +20,24 @@ class Migration(migrations.Migration):
|
||||||
},
|
},
|
||||||
bases=("users.task",),
|
bases=("users.task",),
|
||||||
),
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name="DoubanImporter",
|
||||||
|
fields=[],
|
||||||
|
options={
|
||||||
|
"proxy": True,
|
||||||
|
"indexes": [],
|
||||||
|
"constraints": [],
|
||||||
|
},
|
||||||
|
bases=("users.task",),
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name="GoodreadsImporter",
|
||||||
|
fields=[],
|
||||||
|
options={
|
||||||
|
"proxy": True,
|
||||||
|
"indexes": [],
|
||||||
|
"constraints": [],
|
||||||
|
},
|
||||||
|
bases=("users.task",),
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
|
@ -14,7 +14,9 @@ class Migration(migrations.Migration):
|
||||||
name="type",
|
name="type",
|
||||||
field=models.CharField(
|
field=models.CharField(
|
||||||
choices=[
|
choices=[
|
||||||
|
("journal.doubanimporter", "douban importer"),
|
||||||
("journal.doufenexporter", "doufen exporter"),
|
("journal.doufenexporter", "doufen exporter"),
|
||||||
|
("journal.goodreadsimporter", "goodreads importer"),
|
||||||
("journal.letterboxdimporter", "letterboxd importer"),
|
("journal.letterboxdimporter", "letterboxd importer"),
|
||||||
],
|
],
|
||||||
db_index=True,
|
db_index=True,
|
||||||
|
|
|
@ -21,10 +21,10 @@ class Preference(models.Model):
|
||||||
)
|
)
|
||||||
export_status = models.JSONField(
|
export_status = models.JSONField(
|
||||||
blank=True, null=True, encoder=DjangoJSONEncoder, default=dict
|
blank=True, null=True, encoder=DjangoJSONEncoder, default=dict
|
||||||
)
|
) # deprecated
|
||||||
import_status = models.JSONField(
|
import_status = models.JSONField(
|
||||||
blank=True, null=True, encoder=DjangoJSONEncoder, default=dict
|
blank=True, null=True, encoder=DjangoJSONEncoder, default=dict
|
||||||
)
|
) # deprecated
|
||||||
# 0: public, 1: follower only, 2: private
|
# 0: public, 1: follower only, 2: private
|
||||||
default_visibility = models.PositiveSmallIntegerField(null=False, default=0)
|
default_visibility = models.PositiveSmallIntegerField(null=False, default=0)
|
||||||
# 0: public, 1: unlisted, 4: local
|
# 0: public, 1: unlisted, 4: local
|
||||||
|
|
|
@ -64,7 +64,7 @@
|
||||||
</label>
|
</label>
|
||||||
</p>
|
</p>
|
||||||
<input type="submit"
|
<input type="submit"
|
||||||
{% if import_status.douban_pending %} onclick="return confirm('{% trans "Another import is in progress, starting a new import may cause issues, sure to import?" %}')" value="{% trans "Import in progress, please wait" %}" {% else %} value="{% trans 'Import' %}" {% endif %} />
|
{% if import_task.status == "pending" %} onclick="return confirm('{% trans "Another import is in progress, starting a new import may cause issues, sure to import?" %}')" value="{% trans "Import in progress, please wait" %}" {% else %} value="{% trans 'Import' %}" {% endif %} />
|
||||||
</form>
|
</form>
|
||||||
<div hx-get="{% url 'users:import_status' %}"
|
<div hx-get="{% url 'users:import_status' %}"
|
||||||
hx-trigger="load delay:1s"
|
hx-trigger="load delay:1s"
|
||||||
|
@ -84,6 +84,15 @@
|
||||||
placeholder="https://www.goodreads.com/user/show/12345-janedoe"
|
placeholder="https://www.goodreads.com/user/show/12345-janedoe"
|
||||||
required>
|
required>
|
||||||
<input type="submit" value="{% trans 'Import' %}" />
|
<input type="submit" value="{% trans 'Import' %}" />
|
||||||
|
<small>
|
||||||
|
{% if goodreads_task %}
|
||||||
|
<br>
|
||||||
|
{% trans 'Last import started' %}: {{ goodreads_task.created_time }}
|
||||||
|
{% trans 'Status' %}: {{ goodreads_task.get_state_display }}。
|
||||||
|
<br>
|
||||||
|
{{ goodreads_task.message }}
|
||||||
|
{% endif %}
|
||||||
|
</small>
|
||||||
</div>
|
</div>
|
||||||
<ul>
|
<ul>
|
||||||
<li>
|
<li>
|
||||||
|
|
|
@ -1,19 +1,15 @@
|
||||||
{% load i18n %}
|
{% load i18n %}
|
||||||
{% if import_status.douban_pending == 2 %}
|
{% trans 'Last import started' %}: {{ import_task.created_time }}
|
||||||
正在等待
|
{% trans 'Status' %}: {{ import_task.get_state_display }}。
|
||||||
{% elif import_status.douban_pending == 1 %}
|
{% if import_task.metadata.total %}
|
||||||
<div hx-get="{% url 'users:import_status' %} "
|
<br>
|
||||||
hx-trigger="every 15s"
|
<progress value="{{ import_task.metadata.processed }}"
|
||||||
hx-swap="outerHTML">
|
max="{{ import_task.metadata.total }}"></progress>
|
||||||
正在导入
|
共{{ import_task.metadata.total }}篇,已处理{{ import_task.metadata.processed }}篇,其中已存在{{ import_task.metadata.skipped }}篇,新增{{ import_task.metadata.imported }}篇
|
||||||
{% if import_status.douban_total %}
|
<br>
|
||||||
<br>
|
{% if import_task.metadata.failed_urls %}
|
||||||
<progress value="{{ import_status.douban_processed }}"
|
{% trans 'Failed links, you may have to mark them manually' %}
|
||||||
max="{{ import_status.douban_total }}"></progress>
|
<br>
|
||||||
共{{ import_status.douban_total }}篇,目前已处理{{ import_status.douban_processed }}篇,其中已存在{{ import_status.douban_skipped }}篇,新增{{ import_status.douban_imported }}篇
|
<textarea readonly>{% for url in import_task.metadata.failed_urls %}{{url}} {% endfor %}</textarea>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
|
||||||
{% elif import_status.douban_file %}
|
|
||||||
上次结果
|
|
||||||
共计{{ import_status.douban_total }}篇,处理{{ import_status.douban_processed }}篇,其中已存在{{ import_status.douban_skipped }}篇,新增{{ import_status.douban_imported }}篇
|
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
|
@ -95,9 +95,10 @@ def data(request):
|
||||||
"users/data.html",
|
"users/data.html",
|
||||||
{
|
{
|
||||||
"allow_any_site": settings.MASTODON_ALLOW_ANY_SITE,
|
"allow_any_site": settings.MASTODON_ALLOW_ANY_SITE,
|
||||||
"import_status": request.user.preference.import_status,
|
"import_task": DoubanImporter.latest_task(request.user),
|
||||||
"export_task": DoufenExporter.latest_task(request.user),
|
"export_task": DoufenExporter.latest_task(request.user),
|
||||||
"letterboxd_task": LetterboxdImporter.latest_task(request.user),
|
"letterboxd_task": LetterboxdImporter.latest_task(request.user),
|
||||||
|
"goodreads_task": GoodreadsImporter.latest_task(request.user),
|
||||||
"years": years,
|
"years": years,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
@ -109,7 +110,7 @@ def data_import_status(request):
|
||||||
request,
|
request,
|
||||||
"users/data_import_status.html",
|
"users/data_import_status.html",
|
||||||
{
|
{
|
||||||
"import_status": request.user.preference.import_status,
|
"import_task": DoubanImporter.latest_task(request.user),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -185,7 +186,12 @@ def reset_visibility(request):
|
||||||
def import_goodreads(request):
|
def import_goodreads(request):
|
||||||
if request.method == "POST":
|
if request.method == "POST":
|
||||||
raw_url = request.POST.get("url")
|
raw_url = request.POST.get("url")
|
||||||
if GoodreadsImporter.import_from_url(raw_url, request.user):
|
if GoodreadsImporter.validate_url(raw_url):
|
||||||
|
GoodreadsImporter.create(
|
||||||
|
request.user,
|
||||||
|
visibility=int(request.POST.get("visibility", 0)),
|
||||||
|
url=raw_url,
|
||||||
|
).enqueue()
|
||||||
messages.add_message(request, messages.INFO, _("Import in progress."))
|
messages.add_message(request, messages.INFO, _("Import in progress."))
|
||||||
else:
|
else:
|
||||||
messages.add_message(request, messages.ERROR, _("Invalid URL."))
|
messages.add_message(request, messages.ERROR, _("Invalid URL."))
|
||||||
|
@ -194,18 +200,29 @@ def import_goodreads(request):
|
||||||
|
|
||||||
@login_required
|
@login_required
|
||||||
def import_douban(request):
|
def import_douban(request):
|
||||||
if request.method == "POST":
|
if request.method != "POST":
|
||||||
importer = DoubanImporter(
|
return redirect(reverse("users:data"))
|
||||||
request.user,
|
f = (
|
||||||
int(request.POST.get("visibility", 0)),
|
settings.MEDIA_ROOT
|
||||||
int(request.POST.get("import_mode", 0)),
|
+ "/"
|
||||||
)
|
+ GenerateDateUUIDMediaFilePath("x.zip", settings.SYNC_FILE_PATH_ROOT)
|
||||||
if importer.import_from_file(request.FILES["file"]):
|
)
|
||||||
messages.add_message(
|
os.makedirs(os.path.dirname(f), exist_ok=True)
|
||||||
request, messages.INFO, _("File is uploaded and will be imported soon.")
|
with open(f, "wb+") as destination:
|
||||||
)
|
for chunk in request.FILES["file"].chunks():
|
||||||
else:
|
destination.write(chunk)
|
||||||
messages.add_message(request, messages.ERROR, _("Invalid file."))
|
if not DoubanImporter.validate_file(request.FILES["file"]):
|
||||||
|
messages.add_message(request, messages.ERROR, _("Invalid file."))
|
||||||
|
return redirect(reverse("users:data"))
|
||||||
|
DoubanImporter.create(
|
||||||
|
request.user,
|
||||||
|
visibility=int(request.POST.get("visibility", 0)),
|
||||||
|
mode=int(request.POST.get("import_mode", 0)),
|
||||||
|
file=f,
|
||||||
|
).enqueue()
|
||||||
|
messages.add_message(
|
||||||
|
request, messages.INFO, _("File is uploaded and will be imported soon.")
|
||||||
|
)
|
||||||
return redirect(reverse("users:data"))
|
return redirect(reverse("users:data"))
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue