fix uploading and parsing bugs

This commit is contained in:
doubaniux 2021-09-01 11:41:21 +02:00
parent 8dd90644bc
commit c309628af3
6 changed files with 29 additions and 28 deletions

View file

@ -3,3 +3,7 @@ from django.apps import AppConfig
class SyncConfig(AppConfig):
name = 'sync'
def ready(self):
from sync.jobs import sync_task_manager
sync_task_manager.start()

View file

@ -10,6 +10,7 @@ class SyncTaskForm(forms.ModelForm):
model = SyncTask
fields = [
"user",
"file",
"overwrite",
"sync_book",
"sync_movie",

View file

@ -21,6 +21,8 @@ from .models import SyncTask
__all__ = ['sync_task_manager']
logger = logging.getLogger(__name__)
class SyncTaskManger:
# in seconds
@ -41,7 +43,8 @@ class SyncTaskManger:
def __start_new_worker(self, task):
new_worker = threading.Thread(
target=sync_doufen_job, args=[task, self.is_stopped])
target=sync_doufen_job, args=[task, self.is_stopped], daemon=True
)
self.__worker_threads.append(new_worker)
new_worker.start()
@ -71,10 +74,9 @@ class SyncTaskManger:
def start(self):
self.__enqueue_existing_tasks() # enqueue
self.__listen_for_new_task() # enqueue
listen_new_task_thread = threading.Thread(
target=self.__listen_for_new_task)
target=self.__listen_for_new_task, daemon=True)
self.__worker_threads.append(listen_new_task_thread)
@ -169,12 +171,12 @@ class DoufenParser:
# parse data
for i in range(start_row_index, ws.max_row + 1):
# url definitely exists
url = ws.cell(row=i, column=URL_INDEX).value
url = ws.cell(row=i, column=self.URL_INDEX).value
tags = ws.cell(row=i, column=TAG_INDEX).value
tags = ws.cell(row=i, column=self.TAG_INDEX).value
tags = tags.split(',') if tags else None
time = ws.cell(row=i, column=TIME_INDEX).value
time = ws.cell(row=i, column=self.TIME_INDEX).value
if time:
time = datetime.strptime(time, "%Y-%m-%d %H:%M:%S")
tz = pytz.timezone('Asia/Shanghai')
@ -182,11 +184,11 @@ class DoufenParser:
else:
time = None
content = ws.cell(row=i, column=CONTENT_INDEX).value
content = ws.cell(row=i, column=self.CONTENT_INDEX).value
if not content:
content = ""
rating = ws.cell(row=i, column=RATING_INDEX).value
rating = ws.cell(row=i, column=self.RATING_INDEX).value
rating = int(rating) * 2 if rating else None
# store result
@ -293,6 +295,9 @@ def overwrite_mark(entity, entity_class, mark, mark_class, tag_class, data, shee
def sync_doufen_job(task, stop_check_func):
"""
TODO: Update task status every certain amount of items to reduce IO consumption
"""
parser = DoufenParser(task)
items = parser.parse()
@ -351,15 +356,14 @@ def sync_doufen_job(task, stop_check_func):
task.failed_urls.append(data.url)
task.save(update_fields=['failed_urls'])
continue
task.success_items += 1
task.save(update_fields=["success_items"])
# if task finish
if len(items) == 0:
task.is_finished = True
task.clear_progress()
task.clear_breakpoint()
task.save(update_fields=['is_finished', 'break_point'])
@ -376,6 +380,8 @@ def translate_status(sheet_name):
sync_task_manager = SyncTaskManger()
# sync_task_manager.start()
signal.signal(signal.SIGTERM, sync_task_manager.stop)
if sys.platform.startswith('linux'):
signal.signal(signal.SIGHUP, sync_task_manager.stop)

View file

@ -18,7 +18,7 @@ class SyncTask(models.Model):
#-----------------------------------#
user = models.ForeignKey(
User, on_delete=models.CASCADE, related_name='user_%(class)ss')
file = models.FileField(upload_to=sync_file_path)
file = models.FileField(upload_to=sync_file_path, default='')
#-----------------------------------#
# options
@ -60,15 +60,6 @@ class SyncTask(models.Model):
started_time = models.DateTimeField(auto_now_add=True)
ended_time = models.DateTimeField(auto_now=True)
# how many items are overwritten
# overwrite_books = models.PositiveIntegerField(default=0)
# overwrite_movies = models.PositiveIntegerField(default=0)
# overwrite_music = models.PositiveIntegerField(default=0)
# overwrite_games = models.PositiveIntegerField(default=0)
# thread pid
# pid = models.PositiveIntegerField(blank=True, null=True)
class Meta:
"""Meta definition for SyncTask."""
@ -107,7 +98,8 @@ class SyncTask(models.Model):
def get_breakpoint(self):
if not self.break_point:
return None, None
return self.break_point.split('-')
progress = self.break_point.split('-')
return progress[0], int(progress[1])
def set_breakpoint(self, sheet_name, row_index, save=False):
self.break_point = f"{sheet_name}-{row_index}"

View file

@ -21,7 +21,7 @@ def sync_douban(request):
if request.method == 'POST':
# validate sunmitted data
try:
uploaded_file = request.FILES['xlsx']
uploaded_file = request.FILES['file']
wb = openpyxl.open(uploaded_file, read_only=True,
data_only=True, keep_links=False)
wb.close()
@ -29,6 +29,7 @@ def sync_douban(request):
# raise e
return HttpResponseBadRequest(content="invalid excel file")
# file_data = {'file': request.FILES['xlsx']}
form = SyncTaskForm(request.POST, request.FILES)
if form.is_valid():
# stop all preivous task

View file

@ -547,9 +547,8 @@
</div>
</div>
<!-- import douban data -->
{% comment %}
<!-- import douban data -->
{% if user == request.user %}
<div class="aside-section-wrapper aside-section-wrapper--transparent aside-section-wrapper--collapse">
<div class="import-panel">
@ -593,7 +592,7 @@
<span id="visibilityHelp" class="import-panel__help">?</span>
<div></div>
<div class="import-panel__file-input">
<input type="file" name="xlsx" id="excelFile" required accept=".xlsx">
<input type="file" name="file" id="excelFile" required accept=".xlsx">
</div>
<input type="submit" class="import-panel__button" value="{% trans '导入' %}" id="uploadBtn">
</form>
@ -654,8 +653,6 @@
</div>
</div>
{% endif %}
{% endcomment %}
<div
class="aside-section-wrapper aside-section-wrapper--transparent aside-section-wrapper--collapse">