dragonflydb · kostasrim · Jan 21, 2025 · Jan 30, 2025 · Jan 30, 2025 · adiholden
diff --git a/src/facade/dragonfly_connection.cc b/src/facade/dragonfly_connection.cc
@@ -9,6 +9,7 @@
 #include <absl/strings/str_cat.h>
 #include <mimalloc.h>
 
+#include <chrono>
 #include <numeric>
 #include <variant>
 
@@ -93,7 +94,6 @@ using nonstd::make_unexpected;
 
 namespace facade {
 
-
 namespace {
 
 void SendProtocolError(RedisParser::Result pres, SinkReplyBuilder* builder) {
@@ -266,8 +266,6 @@ void LogTraffic(uint32_t id, bool has_more, absl::Span<RespExpr> resp,
 
 constexpr size_t kMinReadSize = 256;
 
-thread_local uint32_t free_req_release_weight = 0;
-
 const char* kPhaseName[Connection::NUM_PHASES] = {"SETUP", "READ", "PROCESS", "SHUTTING_DOWN",
                                                   "PRECLOSE"};
 
@@ -316,6 +314,36 @@ QueueBackpressure& GetQueueBackpressure() {
 
 thread_local vector<Connection::PipelineMessagePtr> Connection::pipeline_req_pool_;
 
+class PipelineCacheSizePaceMaker {
+ public:
+  bool WatermarkReached(size_t pipeline_sz) {
+    const auto now = Clock::now();
+    const auto elapsed = now - last_check_;
+
+    const size_t max = Limits::max();
+    if (elapsed < std::chrono::milliseconds(10)) {
+      min_ = std::min(min_, pipeline_sz);
+      return false;
+    }
+
+    const bool watermark_reached = (min_ > 0) && (min_ != max);
+    min_ = max;
+    last_check_ = Clock::now();
+
+    return watermark_reached;
+  }
+
+ private:
+  using Tp = std::chrono::time_point<std::chrono::system_clock>;
+  using Clock = std::chrono::system_clock;
+  using Limits = std::numeric_limits<size_t>;
+
+  Tp last_check_ = Clock::now();
+  size_t min_ = Limits::max();
+};
+
+thread_local PipelineCacheSizePaceMaker tl_pipe_pace_maker;
+
 void Connection::PipelineMessage::SetArgs(const RespVec& args) {
   auto* next = storage.data();
   for (size_t i = 0; i < args.size(); ++i) {
@@ -1589,14 +1617,7 @@ void Connection::ShrinkPipelinePool() {
   if (pipeline_req_pool_.empty())
     return;
 
-  // The request pool is shared by all the connections in the thread so we do not want
-  // to release it aggressively just because some connection is running in
-  // non-pipelined mode. So by using free_req_release_weight we wait at least N times,
-  // where N is the number of connections in the thread.
-  ++free_req_release_weight;
-
-  if (free_req_release_weight > stats_->num_conns) {
-    free_req_release_weight = 0;
+  if (tl_pipe_pace_maker.WatermarkReached(pipeline_req_pool_.size())) {
     stats_->pipeline_cmd_cache_bytes -= pipeline_req_pool_.back()->StorageCapacity();
     pipeline_req_pool_.pop_back();
   }
@@ -1606,7 +1627,6 @@ Connection::PipelineMessagePtr Connection::GetFromPipelinePool() {
   if (pipeline_req_pool_.empty())
     return nullptr;
 
-  free_req_release_weight = 0;  // Reset the release weight.
   auto ptr = std::move(pipeline_req_pool_.back());
   stats_->pipeline_cmd_cache_bytes -= ptr->StorageCapacity();
   pipeline_req_pool_.pop_back();

diff --git a/tests/dragonfly/connection_test.py b/tests/dragonfly/connection_test.py
@@ -1061,7 +1061,7 @@ async def test_timeout(df_server: DflyInstance, async_client: aioredis.Redis):
     assert len(clients) == 2
 
     await asyncio.sleep(2)
-    
+
     @assert_eventually
     async def wait_for_conn_drop():
         clients = await async_client.client_list()
@@ -1070,4 +1070,55 @@ async def wait_for_conn_drop():
 
     await wait_for_conn_drop()
     info = await async_client.info("clients")
-    assert int(info["timeout_disconnects"]) >= 1
+    assert int(info["timeout_disconnects"]) >= 1
+
+
+@dfly_args({"proactor_threads": 1})
+async def test_pipeline_cache_size(df_factory):
+    server = df_factory.create(proactor_threads=1)
+    server.start()
+
+    # Start 1 client.
+    good_client = server.client()
+
+    await good_client.execute_command("set foo bar")
+
+    bad_actor_client = server.client()
+
+    info = await bad_actor_client.info()
+
+    # Cache is empty.
+    assert info["pipeline_cache_bytes"] == 0
+    assert info["dispatch_queue_bytes"] == 0
+
+    async def push_pipeline(bad_actor_client, size=1):
+        # Fill cache.
+        p = bad_actor_client.pipeline(transaction=True)
+        for i in range(size):
+            p.lpush(str(i), "V")
+        await p.execute()
+
+    await push_pipeline(bad_actor_client, 32)
+    info = await good_client.info()
+
+    old_pipeline_cache_bytes = info["pipeline_cache_bytes"]
+    assert old_pipeline_cache_bytes > 0
+    assert info["dispatch_queue_bytes"] == 0
+
+    for i in range(30):
+        await push_pipeline(bad_actor_client)
+        await good_client.execute_command(f"set foo{i} bar")
+
+    info = await good_client.info()
+
+    # Gradually release pipeline
+    assert old_pipeline_cache_bytes > info["pipeline_cache_bytes"]
+    assert info["dispatch_queue_bytes"] == 0
+
+    # Now drain it
+    async with async_timeout.timeout(5):
+        while info["pipeline_cache_bytes"] != 0:
+            await good_client.execute_command(f"set foo{i} bar")
+            info = await good_client.info()
+
+    assert info["dispatch_queue_bytes"] == 0