From 5f61502a53ce4981ffaaf86f3f31945a0ad7d77b Mon Sep 17 00:00:00 2001 From: Karl Engelhardt Date: Wed, 21 Dec 2022 17:43:21 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Zip=20Download:=20Handle=20dupli?= =?UTF-8?q?cate=20filenames?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/filingcabinet/views.py | 35 ++++++++++++++++++++++++++++------- tests/test_misc.py | 18 ++++++++++++++++++ 2 files changed, 46 insertions(+), 7 deletions(-) create mode 100644 tests/test_misc.py diff --git a/src/filingcabinet/views.py b/src/filingcabinet/views.py index 4a2fc23..50936fd 100644 --- a/src/filingcabinet/views.py +++ b/src/filingcabinet/views.py @@ -1,4 +1,7 @@ import json +import os.path +from collections import defaultdict +from pathlib import Path from django.http import HttpResponse, StreamingHttpResponse from django.shortcuts import Http404, get_object_or_404, redirect @@ -195,6 +198,7 @@ def render_to_response(self, context): archive_stream = zipstream.ZipFile(mode="w") coll_docs = CollectionDocument.objects.filter(collection=self.object) directory_dirname_map = {} + filename_counter = defaultdict(int) for doc in coll_docs: if doc.directory is not None: if doc.directory.pk not in directory_dirname_map: @@ -202,19 +206,20 @@ def render_to_response(self, context): *(x.name for x in doc.directory.get_ancestors()), doc.directory.name, ] - dirname = "/".join(path_to_root) + dirname = os.path.join(*path_to_root) directory_dirname_map[doc.directory.pk] = dirname else: dirname = directory_dirname_map[doc.directory.pk] - filename = "/".join( - [ - dirname, - doc.document.get_document_filename(), - ] + filename = os.path.join( + dirname, + doc.document.get_document_filename(), ) else: filename = doc.document.get_document_filename() - archive_stream.write(doc.document.get_file_path(), arcname=filename) + archive_stream.write( + doc.document.get_file_path(), + arcname=ensure_unique_filename(filename_counter, filename), + ) resp = StreamingHttpResponse( archive_stream, @@ -224,6 +229,22 @@ def render_to_response(self, context): return resp +def ensure_unique_filename(filename_counter: defaultdict, filename: str): + if filename_counter[filename] > 0: + original_path = Path(filename) + original_filename = filename + while filename_counter[filename] > 0: + filename = str( + original_path.with_stem( + original_path.stem + "-" + str(filename_counter[original_filename]) + ) + ) + filename_counter[original_filename] += 1 + + filename_counter[filename] += 1 + return filename + + class DocumentCollectionEmbedView(DocumentCollectionView): template_name = "filingcabinet/documentcollection_detail_embed.html" redirect_url_name = "filingcabinet:document-collection_embed" diff --git a/tests/test_misc.py b/tests/test_misc.py new file mode 100644 index 0000000..500e229 --- /dev/null +++ b/tests/test_misc.py @@ -0,0 +1,18 @@ +from collections import defaultdict + +from filingcabinet.views import ensure_unique_filename + + +def test_ensure_unique_filename(): + filename_counter = defaultdict(int) + + assert ensure_unique_filename(filename_counter, "file.pdf") == "file.pdf" + assert ensure_unique_filename(filename_counter, "file.pdf") == "file-1.pdf" + assert ensure_unique_filename(filename_counter, "file.pdf") == "file-2.pdf" + assert ensure_unique_filename(filename_counter, "file.pdf") == "file-3.pdf" + assert ensure_unique_filename(filename_counter, "dir/file.pdf") == "dir/file.pdf" + assert ensure_unique_filename(filename_counter, "dir/file.pdf") == "dir/file-1.pdf" + assert ensure_unique_filename(filename_counter, "dir/file.pdf") == "dir/file-2.pdf" + assert ensure_unique_filename(filename_counter, "file-1.pdf") == "file-1-1.pdf" + assert ensure_unique_filename(filename_counter, "file-4.pdf") == "file-4.pdf" + assert ensure_unique_filename(filename_counter, "file.pdf") == "file-5.pdf"