From 95de2900fae8e8cb0fbf3f6dba3790f565499544 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Thu, 27 Mar 2025 07:51:47 -0400 Subject: [PATCH 1/2] [_132] data object put and create can raise an error rather than overwrite PUT will respect FORCE_FLAG_KW, to mimic iput; whereas create will now respect the force=True option when requested. --- irods/exception.py | 4 +++ irods/manager/data_object_manager.py | 20 ++++++++--- irods/test/data_obj_test.py | 54 ++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 5 deletions(-) diff --git a/irods/exception.py b/irods/exception.py index a4ab6aa5..1ff34150 100644 --- a/irods/exception.py +++ b/irods/exception.py @@ -24,6 +24,10 @@ class InvalidInputArgument(PycommandsException): pass +class LogicalPathAlreadyExists(PycommandsException): + pass + + class DataObjectDoesNotExist(DoesNotExist): pass diff --git a/irods/manager/data_object_manager.py b/irods/manager/data_object_manager.py index 6b84f9f4..a0b28dd2 100644 --- a/irods/manager/data_object_manager.py +++ b/irods/manager/data_object_manager.py @@ -319,6 +319,9 @@ def put( ) else: obj = irods_path + if kw.FORCE_FLAG_KW not in options and self.exists(obj): + raise ex.OVERWRITE_WITHOUT_FORCE_FLAG + options.pop(kw.FORCE_FLAG_KW, None) with open(local_path, "rb") as f: sizelist = [] @@ -459,8 +462,18 @@ def parallel_put( updatables=updatables, ) - def create(self, path, resource=None, force=False, **options): - options[kw.DATA_TYPE_KW] = "generic" + def create(self, path, resource=None, force=None, **options): + """ + Create a new data object with the given logical path. + + 'resource', if provided, is the root node of a storage resource hierarchy where the object is preferentially to be created. + 'force', when False, raises an LogicalPathAlreadyExists if there is already a data object at the logical path specified. + """ + + if not force and self.exists(path): + raise ex.LogicalPathAlreadyExists + + options = {**options, kw.DATA_TYPE_KW: "generic"} if resource: options[kw.DEST_RESC_NAME_KW] = resource @@ -471,9 +484,6 @@ def create(self, path, resource=None, force=False, **options): except AttributeError: pass - if force: - options[kw.FORCE_FLAG_KW] = "" - message_body = FileOpenRequest( objPath=path, createMode=0o644, diff --git a/irods/test/data_obj_test.py b/irods/test/data_obj_test.py index 428e0423..ae25edb7 100644 --- a/irods/test/data_obj_test.py +++ b/irods/test/data_obj_test.py @@ -3,6 +3,7 @@ from datetime import datetime, timezone import base64 import concurrent.futures +import collections import contextlib import errno import hashlib @@ -51,6 +52,7 @@ def is_localhost_synonym(name): from irods.access import iRODSAccess from irods.models import Collection, DataObject +from irods.path import iRODSPath from irods.test.helpers import iRODSUserLogins import irods.exception as ex from irods.column import Criterion @@ -2951,6 +2953,58 @@ def test_replica_truncate__issue_534(self): if data_objs.exists(data_path): data_objs.unlink(data_path, force=True) + def test_data_create_and_put_without_forcing__issue_132(self): + test_path = iRODSPath( + self.coll_path, unique_name(my_function_name(), datetime.now()) + ) + contents = b"** CONTENTS **" + Data_Object_Properties = collections.namedtuple( + "repl_properties", ["modify_times", "resource_names"] + ) + + def check_or_return_properties(data_repls, checked_value=None): + properties_dict = {_.resource_name: _.modify_time for _ in data_repls} + properties = Data_Object_Properties( + modify_times=list(properties_dict.values()), + resource_names=list(properties_dict.keys()), + ) + if (sentinel := checked_value) is not None: + return sentinel == properties + else: + return properties + + # Create a data object with known content. Record a reference state for later comparison. + test_data_object = self.sess.data_objects.create(test_path, force=True) + with test_data_object.open("w") as f: + f.write(contents) + orig_repl_state = check_or_return_properties(test_data_object.replicas) + + # Create should have made only a single replica. + self.assertEqual(1, len(orig_repl_state.resource_names)) + + # If using force=False, then an attempt to create an object at the same path should raise an exception. + with self.assertRaises(ex.LogicalPathAlreadyExists): + d2 = self.sess.data_objects.create(test_path, force=False) + + # Assert that a put without FORCE_FLAG_KW raises an Exception. + with NamedTemporaryFile() as t: + altered_contents = b"[[" + contents + b"]]" + t.write(altered_contents) + t.close() + with self.assertRaises(ex.OVERWRITE_WITHOUT_FORCE_FLAG): + self.sess.data_objects.put(t.name, test_path) + + # Assert that the data object's contents are unchanged. + with test_data_object.open("r") as f: + self.assertEqual(f.read(), contents) + + # Assert that the modify times and hosting resources haven't changed. + self.assertTrue( + check_or_return_properties( + self.sess.data_objects.get(test_path).replicas, orig_repl_state + ) + ) + if __name__ == "__main__": # let the tests find the parent irods lib From 362d1194df0e548dfa400497f9caa771413db7f9 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Sun, 4 May 2025 19:19:32 -0400 Subject: [PATCH 2/2] corrections --- irods/test/data_obj_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/irods/test/data_obj_test.py b/irods/test/data_obj_test.py index ae25edb7..7f5a4430 100644 --- a/irods/test/data_obj_test.py +++ b/irods/test/data_obj_test.py @@ -2,8 +2,8 @@ from datetime import datetime, timezone import base64 -import concurrent.futures import collections +import concurrent.futures import contextlib import errno import hashlib @@ -2962,13 +2962,13 @@ def test_data_create_and_put_without_forcing__issue_132(self): "repl_properties", ["modify_times", "resource_names"] ) - def check_or_return_properties(data_repls, checked_value=None): + def check_or_return_properties(data_repls, reference_value=None): properties_dict = {_.resource_name: _.modify_time for _ in data_repls} properties = Data_Object_Properties( modify_times=list(properties_dict.values()), resource_names=list(properties_dict.keys()), ) - if (sentinel := checked_value) is not None: + if (sentinel := reference_value) is not None: return sentinel == properties else: return properties @@ -3001,7 +3001,7 @@ def check_or_return_properties(data_repls, checked_value=None): # Assert that the modify times and hosting resources haven't changed. self.assertTrue( check_or_return_properties( - self.sess.data_objects.get(test_path).replicas, orig_repl_state + self.sess.data_objects.get(test_path).replicas, reference_value = orig_repl_state ) )