Implemented GetCountsPartitioned and tested (k2-fsa#239)

qindazhu · web-flow · commit d4ca57143167 · 2020-10-10T20:42:17.000+08:00
diff --git a/k2/csrc/array_ops.cu b/k2/csrc/array_ops.cu
@@ -271,12 +271,16 @@ void RowIdsToRowSplits(const Array1<int32_t> &row_ids,
 }
 
 Array1<int32_t> GetCounts(const Array1<int32_t> &src, int32_t n) {
-  K2_CHECK_GE(n, 1);
+  K2_CHECK_GE(n, 0);
   ContextPtr c = src.Context();
   int32_t dim = src.Dim();
   const int32_t *src_data = src.Data();
   Array1<int32_t> ans(c, n, 0);  // init with 0
   int32_t *ans_data = ans.Data();
+  if (n == 0) {
+    K2_CHECK_EQ(dim, 0);
+    return ans;
+  }
 
   DeviceType d = c->GetDeviceType();
   if (d == kCpu) {
diff --git a/k2/csrc/array_ops_test.cu b/k2/csrc/array_ops_test.cu
@@ -1355,6 +1355,15 @@ void TestGetCounts() {
     context = GetCudaContext();
   }
 
+  {
+    // empty case
+    int32_t n = 0;
+    std::vector<int32_t> values;
+    Array1<int32_t> src(context, values);
+    Array1<int32_t> ans = GetCounts(src, n);
+    EXPECT_EQ(ans.Dim(), 0);
+  }
+
   {
     // simple case
     int32_t n = 8;
diff --git a/k2/csrc/ragged_ops.cu b/k2/csrc/ragged_ops.cu
@@ -793,4 +793,17 @@ RaggedShape TrivialShape(ContextPtr &c, int32_t num_elems) {
   return RaggedShape2(&row_splits, &row_ids, num_elems);
 }
 
+Ragged<int32_t> GetCountsPartitioned(Ragged<int32_t> &src,
+                                     RaggedShape &ans_ragged_shape) {
+  K2_CHECK_EQ(src.NumAxes(), 2);
+  K2_CHECK_EQ(ans_ragged_shape.NumAxes(), 2);
+  K2_CHECK(IsCompatible(src, ans_ragged_shape));
+  K2_CHECK_EQ(src.Dim0(), ans_ragged_shape.Dim0());
+  const Array1<int32_t> &values = src.values;
+  const Array1<int32_t> &row_splits = ans_ragged_shape.RowSplits(1);
+  int32_t n = ans_ragged_shape.NumElements();
+  Array1<int32_t> counts = GetCounts(values, n);
+  return Ragged<int32_t>(ans_ragged_shape, counts);
+}
+
 }  // namespace k2
diff --git a/k2/csrc/ragged_ops.h b/k2/csrc/ragged_ops.h
@@ -357,7 +357,8 @@ inline Ragged<T> RaggedFromTotSizes(ContextPtr &c,
   src.values.Dim() which tells us the order in which these elements would
   appear if sorted by column.  (TODO: we can decide later whether to require
   sorting secondarily by row).  So `src.values[ans]` will be in sorted
-  order at exit, and `ans` will contain all numbers from 0 to `src.values.Dim() - 1`.
+  order at exit, and `ans` will contain all numbers from 0 to `src.values.Dim()
+  - 1`.
 
   If `src` has more than 2 axes, the earlier-numbered axes do not affect
   the result, except for an efficiency modification: we require that the
@@ -369,21 +370,21 @@ inline Ragged<T> RaggedFromTotSizes(ContextPtr &c,
   TODO(dan): we may at some point make, as an optional output, row-splits and/or
   row-ids of the rearranged matrix.
 
-  This problem has some relationship to the cusparse library, specifically the csr2csc
-  functions https://docs.nvidia.com/cuda/cusparse/index.html#csr2cscEx2).
-  However I'm not sure what it does when there are repeated elements.  It might
-  be easiest to implement it via sorting for now.
+  This problem has some relationship to the cusparse library, specifically the
+  csr2csc functions
+  https://docs.nvidia.com/cuda/cusparse/index.html#csr2cscEx2). However I'm not
+  sure what it does when there are repeated elements.  It might be easiest to
+  implement it via sorting for now.
+
 
-  
      @param [in] src  Input tensor, see above.
-     @param [in] num_cols  Number of columns in matrix to be transposed; 
+     @param [in] num_cols  Number of columns in matrix to be transposed;
                   we require 0 <= src.values[i] < num_cols.
 */
 Array1<int32_t> GetTransposeReordering(Ragged<int32_t> &src, int32_t num_cols);
 
-
 /*
-  This function is like GetCounts() that is declared in array_ops.h, 
+  This function is like GetCounts() that is declared in array_ops.h,
   but works on a partitioned problem (this should be faster).
 
    @param [in] src  A ragged array with src.NumAxes() == 2
@@ -403,7 +404,7 @@ Array1<int32_t> GetTransposeReordering(Ragged<int32_t> &src, int32_t num_cols);
   the result (with num_rows == ans_ragged_shape.NumElements()), then
   for each i, let ans.values[i] = row_splits[i+1]-row_splits[i] (where
   row_splits is the output of RowIdsToRowSplits() we just called).
-  
+
   This could actually be implemented using the GetCounts() of array_ops.h,
   ignoring the structure; the structure should help the speed though.
   This equivalence should be useful for testing.
diff --git a/k2/csrc/ragged_test.cu b/k2/csrc/ragged_test.cu
@@ -1016,4 +1016,48 @@ TEST(RaggedShapeOpsTest, TestRenumber) {
   TestRenumber<kCuda>();
 }
 
+template <DeviceType d>
+void TestGetCountsPartitioned() {
+  ContextPtr cpu = GetCpuContext();  // will use to copy data
+  ContextPtr context = nullptr;
+  if (d == kCpu) {
+    context = GetCpuContext();
+  } else {
+    K2_CHECK_EQ(d, kCuda);
+    context = GetCudaContext();
+  }
+
+  // Testing with simple case is good enough as we have tested GetCounts() with
+  // random large size and GetCountsPartitioned just calls GetCounts.
+  std::vector<int32_t> src_row_splits_vec = {0, 3, 4, 6, 10};
+  Array1<int32_t> src_row_splits(context, src_row_splits_vec);
+  RaggedShape src_shape = RaggedShape2(&src_row_splits, nullptr, -1);
+  std::vector<int32_t> src_values_vec = {0, 1, 0, 2, 5, 5, 7, 7, 9, 7};
+  Array1<int32_t> src_values(context, src_values_vec);
+  Ragged<int32_t> src(src_shape, src_values);
+
+  std::vector<int32_t> ans_row_splits_vec = {0, 2, 4, 7, 10};
+  Array1<int32_t> ans_row_splits(context, ans_row_splits_vec);
+  RaggedShape ans_shape = RaggedShape2(&ans_row_splits, nullptr, -1);
+
+  Ragged<int32_t> result = GetCountsPartitioned(src, ans_shape);
+
+  ASSERT_EQ(result.NumAxes(), 2);
+  // Check row_splits
+  Array1<int32_t> row_splits = result.shape.RowSplits(1).To(cpu);
+  std::vector<int32_t> result_row_splits(row_splits.Data(),
+                                         row_splits.Data() + row_splits.Dim());
+  EXPECT_EQ(result_row_splits, ans_row_splits_vec);
+  // check values
+  std::vector<int32_t> expected_data = {2, 1, 1, 0, 0, 2, 0, 3, 0, 1};
+  Array1<int32_t> values = result.values.To(cpu);
+  std::vector<int32_t> data(values.Data(), values.Data() + values.Dim());
+  EXPECT_EQ(data, expected_data);
+}
+
+TEST(RaggedShapeOpsTest, TestGetCountsPartitioned) {
+  TestGetCountsPartitioned<kCpu>();
+  TestGetCountsPartitioned<kCuda>();
+}
+
 }  // namespace k2