Checking consistency of arguments in @Batch calls

chr1st1ank · chr1st1ank · commit bb44552c27a6 · 2021-07-15T14:45:29.000+02:00
diff --git a/dike/__init__.py b/dike/__init__.py
@@ -106,23 +106,35 @@ async def limited_call(*args, **kwargs):
 
 # Deactivate mccabe's complexity warnings which doesn't like closures
 # flake8: noqa: C901
-def batch(*, target_batch_size: int, max_waiting_time: float, max_processing_time: float = 10.0):
+def batch(
+    *,
+    target_batch_size: int,
+    max_waiting_time: float,
+    max_processing_time: float = 10.0,
+    argument_type: str = "list",
+):
     """@batch is a decorator to cumulate function calls and process them in batches.
         Not thread-safe.
 
     Args:
         target_batch_size: As soon as the collected function arguments reach target_batch_size,
             the wrapped function is called and the results are returned. Note that the function
             may also be called with longer arguments than target_batch_size.
-        max_waiting_time: Maximum waiting time before calling the underlying function although
-            the target_batch_size hasn't been reached.
-        max_processing_time: Maximum time for the processing itself (without waiting) before an
-            asyncio.TimeoutError is raised. Note: It is strongly advised to set a reasonably
-            strict timeout here in order not to create starving tasks which never finish in case
-            something is wrong with the backend call.
+        max_waiting_time: Maximum waiting time in seconds before calling the underlying function
+            although the target_batch_size hasn't been reached.
+        max_processing_time: Maximum time in seconds for the processing itself (without waiting)
+            before an asyncio.TimeoutError is raised. Note: It is strongly advised to set a
+            reasonably strict timeout here in order not to create starving tasks which never finish
+            in case something is wrong with the backend call.
+        argument_type: The type of function argument used for batching. One of "list" or "numpy".
+            Per default "list" is used, i.e. it is assumed that the input arguments to the
+            wrapped functions are lists which can be concatenated. If set to "numpy" the arguments
+            are assumed to be numpy arrays which can be concatenated by numpy.concatenate()
+            along axis 0.
 
     Raises:
         ValueError: If the arguments target_batch_size or max_waiting time are not >= 0.
+        ValueError: When calling the function with incorrect or inconsistent arguments.
         asyncio.TimeoutError: Is raised when calling the wrapped function takes longer than
             max_processing_time
 
@@ -137,7 +149,9 @@ def batch(*, target_batch_size: int, max_waiting_time: float, max_processing_tim
         function in order to avoid race conditions.
     - The return value of the wrapped function must be a single iterable.
     - All calls to the underlying function need to have the same number of positional arguments and
-        the same keyword arguments.
+        the same keyword arguments. It also isn't possible to mix the two ways to pass an argument.
+        The same argument always has to be passed either as keyword argument or as positional
+        argument.
 
     Example:
         >>> import asyncio
@@ -197,16 +211,20 @@ def add_args_to_queue(args, kwargs):
             """Add a new argument vector to the queue and return result indices"""
             nonlocal queue, n_rows_in_queue
 
-            queue.append((args, kwargs))
-            offset = n_rows_in_queue
+            if queue and (len(args) != len(queue[0][0]) or kwargs.keys() != queue[0][1].keys()):
+                raise ValueError("Inconsistent use of positional and keyword arguments")
+            n_rows_call = 0
             if args:
-                n_rows_in_queue += len(args[0])
+                n_rows_call = len(args[0])
             elif kwargs:
                 for v in kwargs.values():
-                    n_rows_in_queue += len(v)
-                    break
-            else:
+                    n_rows_call = len(v)
+                    break  # We only need one arbitrary keyword argument
+            if n_rows_call == 0:
                 raise ValueError("Function called with empty collections as arguments")
+            queue.append((args, kwargs))
+            offset = n_rows_in_queue
+            n_rows_in_queue += n_rows_call
             return offset, n_rows_in_queue
 
         async def wait_for_calculation(batch_no_to_calculate):
@@ -237,7 +255,6 @@ async def calculate(batch_no_to_calculate):
                 results_ready[batch_no_to_calculate] = n_results
                 result_events[batch_no_to_calculate].set()
 
-
         def pop_args_from_queue():
             nonlocal batch_no, queue, n_rows_in_queue
 
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -30,3 +30,4 @@ plugins:
   - mkapi:
       src_dirs:
         - .
+      filters: [short, strict]
diff --git a/tests/test_batch.py b/tests/test_batch.py
@@ -2,55 +2,100 @@
 import asyncio
 import random
 
+import numpy as np
 import pytest
 
 import dike
 
 
+def exceptions_equal(exception1, exception2):
+    """Returns True if the exceptions have the same type and message"""
+    return type(exception1) == type(exception2) and str(exception1) == str(exception2)
+
+
 async def raise_error(message):
     raise RuntimeError(message)
 
 
-def test_single_items_batchsize_reached():
+@pytest.mark.parametrize("argument_type", [list, np.array])
+def test_single_items_batchsize_reached(argument_type):
     @dike.batch(target_batch_size=3, max_waiting_time=10)
     async def f(arg1, arg2):
-        assert arg1 == [0, 1, 2]
-        assert arg2 == ["a", "b", "c"]
-        return [10, 11, 12]
+        assert arg1 == argument_type([0, 1, 2])
+        assert arg2 == argument_type(["a", "b", "c"])
+        return argument_type([10, 11, 12])
 
     async def run_test():
         result = await asyncio.wait_for(
             asyncio.gather(
-                f([0], ["a"]),
-                f([1], ["b"]),
-                f([2], ["c"]),
+                f(argument_type([0]), argument_type(["a"])),
+                f(argument_type([1]), argument_type(["b"])),
+                f(argument_type([2]), argument_type(["c"])),
             ),
             timeout=1.0,
         )
 
-        assert result == [[10], [11], [12]]
+        assert result == [argument_type([10]), argument_type([11]), argument_type([12])]
 
     asyncio.run(run_test())
 
 
-def test_single_items_kwargs_batchsize_reached():
+@pytest.mark.parametrize("argument_type", [list, np.array])
+def test_single_items_kwargs_batchsize_reached(argument_type):
     @dike.batch(target_batch_size=3, max_waiting_time=10)
     async def f(arg1, arg2):
-        assert arg1 == [0, 1, 2]
-        assert arg2 == ["a", "b", "c"]
-        return [10, 11, 12]
+        assert arg1 == argument_type([0, 1, 2])
+        assert arg2 == argument_type(["a", "b", "c"])
+        return argument_type([10, 11, 12])
 
     async def run_test():
         result = await asyncio.wait_for(
             asyncio.gather(
-                f(arg1=[0], arg2=["a"]),
-                f(arg1=[1], arg2=["b"]),
-                f(arg2=["c"], arg1=[2]),
+                f(arg2=argument_type(["a"]), arg1=argument_type([0])),
+                f(arg2=argument_type(["b"]), arg1=argument_type([1])),
+                f(arg1=argument_type([2]), arg2=argument_type(["c"])),
+                # f(arg2=argument_type(["c"]), arg1=argument_type([2])),
             ),
             timeout=1.0,
         )
 
-        assert result == [[10], [11], [12]]
+        assert result == [argument_type([10]), argument_type([11]), argument_type([12])]
+
+    asyncio.run(run_test())
+
+
+@pytest.mark.parametrize("argument_type", [list, np.array])
+def test_single_items_mixed_kwargs_raises_value_error(argument_type):
+    @dike.batch(target_batch_size=3, max_waiting_time=0.01)
+    async def f(arg1, arg2):
+        assert arg1 == argument_type([0, 1])
+        assert arg2 == argument_type(["a", "b"])
+        return argument_type([10, 11])
+
+    async def run_test():
+        result = await asyncio.wait_for(
+            asyncio.gather(
+                f(argument_type([0]), argument_type(["a"])),
+                f(argument_type([1]), argument_type(["b"])),
+                f(arg2=argument_type(["c"]), arg1=argument_type([2])),
+                f(argument_type([1])),
+                f(argument_type([]), argument_type([])),
+                return_exceptions=True
+            ),
+            timeout=1.0,
+        )
+
+        assert result[0] == argument_type([10])
+        assert result[1] == argument_type([11])
+        assert exceptions_equal(
+            result[2], ValueError("Inconsistent use of positional and keyword arguments")
+        )
+        assert exceptions_equal(
+            result[3], ValueError("Inconsistent use of positional and keyword arguments")
+        )
+        assert exceptions_equal(
+            result[4], ValueError("Function called with empty collections as arguments")
+        )
 
     asyncio.run(run_test())
 
@@ -193,12 +238,7 @@ async def f(arg1, arg2):
 
     async def run_test():
         results = await asyncio.wait_for(
-            asyncio.gather(
-                f([0], ["a"]),
-                f([1], ["b"]),
-                f([2], ["c"]),
-                return_exceptions=True
-            ),
+            asyncio.gather(f([0], ["a"]), f([1], ["b"]), f([2], ["c"]), return_exceptions=True),
             timeout=1.0,
         )
         for r in results: