Add ChangeSublistSize() and LinearFsas() (k2-fsa#265)

danpovey · web-flow · commit 471da7815e5f · 2020-10-16T21:01:06.000+08:00
diff --git a/k2/csrc/context.h b/k2/csrc/context.h
@@ -544,7 +544,7 @@ class ParallelRunner {
   // destructor of `this` You can pass this into the Eval() and Eval2()
   // functions, or invoke kernels directly with it; but if you want it
   // to be used in called functions you should do something like
-  //  With(pr.NewStream) w;
+  //  With w(pr.NewStream());
   // with that object alive in the scope where you want the stream to be
   // used.
   //
diff --git a/k2/csrc/fsa_algo.cu b/k2/csrc/fsa_algo.cu
@@ -147,6 +147,96 @@ void Intersect(FsaOrVec &a_fsas, FsaOrVec &b_fsas, FsaVec *out,
   *out = creator.GetFsaVec();
 }
 
+Fsa LinearFsa(Array1<int32_t> &symbols) {
+  ContextPtr c = symbols.Context();
+  int32_t n = symbols.Dim(),
+      num_states = n + 2,
+      num_arcs = n + 1;
+  Array1<int32_t> row_splits1 = Range(c, num_states + 1, 0),
+                     row_ids1 = Range(c, num_arcs, 0);
+  Array1<Arc> arcs(c, num_arcs);
+  Arc *arcs_data = arcs.Data();
+  const int32_t *symbols_data = symbols.Data();
+  auto lambda_set_arcs = [=] __host__ __device__ (int32_t arc_idx01) -> void {
+    int32_t src_state = arc_idx01,
+      dest_state = arc_idx01 + 1,
+    // -1 == kFinalSymbol
+    symbol = (arc_idx01 < n ? symbols_data[n] : -1);
+    K2_CHECK_NE(symbol, -1);
+    float score = 0.0;
+    arcs_data[arc_idx01] = Arc(src_state, dest_state, symbol, score);
+  };
+  Eval(c, num_arcs, lambda_set_arcs);
+  return Ragged<Arc>(RaggedShape2(&row_splits1, &row_ids1, num_arcs),
+                     arcs);
+}
+
+
+Fsa LinearFsas(Ragged<int32_t> &symbols) {
+  K2_CHECK(symbols.NumAxes() == 2);
+  ContextPtr c = symbols.Context();
+
+  // if there are n symbols, there are n+2 states and n+1 arcs.
+  RaggedShape states_shape = ChangeSublistSize(symbols.shape, 2);
+
+  int32_t num_states = states_shape.NumElements(),
+            num_arcs = symbols.NumElements() + symbols.Dim0();
+
+  // row_splits2 maps from state_idx01 to arc_idx012; row_ids2 does the reverse.
+  // We'll set them in the lambda below.
+  Array1<int32_t> row_splits2(c, num_states + 2),
+      row_ids2(c, num_arcs);
+
+
+  int32_t *row_ids2_data = row_ids2.Data(),
+       *row_splits2_data = row_splits2.Data();
+  const int32_t *row_ids1_data = states_shape.RowIds(1).Data(),
+             *row_splits1_data = states_shape.RowSplits(1).Data(),
+                 *symbols_data = symbols.values.Data();
+  Array1<Arc> arcs(c, num_arcs);
+  Arc *arcs_data = arcs.Data();
+  auto lambda = [=] __host__ __device__ (int32_t state_idx01) -> void {
+    int32_t fsa_idx0 = row_ids1_data[state_idx01],
+      state_idx0x = row_splits1_data[fsa_idx0],
+      next_state_idx0x = row_splits1_data[fsa_idx0 + 1],
+      idx1 = state_idx01 - state_idx0x;
+
+    // the following works because each FSA has one fewer arcs than states.
+    int32_t arc_idx0xx = state_idx0x - fsa_idx0,
+      next_arc_idx0xx = next_state_idx0x - (fsa_idx0 + 1),
+    // the following may look a bit wrong.. here, the idx1 is the same as
+    // the idx12 if the arc exists, because each state has one arc leaving
+    // it (except the last state).
+    arc_idx012 = arc_idx0xx + idx1;
+    // the following works because each FSA has one fewer symbols than arcs
+    // (however it doesn't work for the last arc of each FSA; we check below.)
+    int32_t symbol_idx01 = arc_idx012 - fsa_idx0;
+    if (arc_idx012 < next_arc_idx0xx) {
+      int32_t src_state = idx1,
+             dest_state = idx1 + 1,
+                 symbol = (arc_idx012 + 1 < next_arc_idx0xx ?
+                           symbols_data[symbol_idx01] : -1);  // kFinalSymbol
+      float score = 0.0;
+      arcs_data[arc_idx012] = Arc(src_state, dest_state, symbol, score);
+      row_ids2_data[arc_idx012] = state_idx01;
+    } else {
+      // The following ensures that the last element of row_splits1_data
+      // (i.e. row_splits1[num_states]) is set to num_arcs.  It also writes something
+      // unnecessary for the last state of each FSA but the last one, which will
+      // cause 2 threads to write the same item to the same location.
+      // Note that there is no arc with index `arc_idx01`, if you reach here.
+      row_splits2_data[state_idx01+1] = arc_idx012;
+    }
+    row_splits2_data[state_idx01] = arc_idx012;
+  };
+  Eval(c, num_states, lambda);
+
+  return Ragged<Arc>(RaggedShape3(&states_shape.RowSplits(1),
+                                  &states_shape.RowIds(1), num_states,
+                                  &row_splits2, &row_splits2, num_arcs),
+                     arcs);
+}
+
 namespace {
 struct ArcComparer {
   __host__ __device__ __forceinline__ bool operator()(const Arc &lhs,
diff --git a/k2/csrc/fsa_algo.h b/k2/csrc/fsa_algo.h
@@ -131,6 +131,35 @@ void IntersectDensePruned(FsaVec &a_fsas, DenseFsaVec &b_fsas, float beam,
 void Intersect(FsaOrVec &a_fsas, FsaOrVec &b_fsas, FsaVec *out,
                Array1<int32_t> *arc_map_a, Array1<int32_t> *arc_map_b);
 
+/*
+  Create a linear FSA from a sequence of symbols
+
+    @param [in] symbols  Input symbol sequence (must not contain
+                kFinalSymbol == -1).
+
+    @return     Returns an FSA that accepts only this symbol
+                sequence, with zero score.  Note: if
+                `symbols.size() == n`, the returned FSA
+                will have n+1 arcs (including the final-arc) and
+                n+2 states.
+*/
+Fsa LinearFsa(Array1<int32_t> &symbols);
+
+/*
+  Create an FsaVec contining linear FSAs, given a list of sequences of
+  symbols
+
+    @param [in] symbols  Input symbol sequences (must not contain
+                kFinalSymbol == -1).
+
+    @return     Returns an FsaVec with `ans.Dim0() == symbols.Dim0()`.  Note: if
+                the i'th row of `symbols` has n elements, the i'th returned FSA
+                will have n+1 arcs (including the final-arc) and n+2 states.
+ */
+Fsa LinearFsas(Ragged<int32_t> &symbols);
+
+
+
 }  // namespace k2
 
 #endif  // K2_CSRC_FSA_ALGO_H_
diff --git a/k2/csrc/ragged_ops.cu b/k2/csrc/ragged_ops.cu
@@ -939,4 +939,76 @@ Array1<int32_t> GetTransposeReordering(Ragged<int32_t> &src, int32_t num_cols) {
   return ans;
 }
 
+RaggedShape ChangeSublistSize(RaggedShape &src, int32_t size_delta) {
+  K2_CHECK(src.NumAxes() >= 2);
+  // the result will have the same num-axes as `src` (the NumAxes() of the
+  // object is not the same as the number of RaggedShapeDim axes).
+  std::vector<RaggedShapeDim> ans_axes(src.NumAxes() - 1);
+  int32_t last_axis = src.NumAxes() - 1;
+  // The following will only do something if src.NumAxes() > 2.
+  for (int32_t i = 0; i + 1 < last_axis; i++)
+    ans_axes[i] = src.Axes()[i];
+
+  ContextPtr c = src.Context();
+  int32_t num_rows = src.TotSize(last_axis - 1),
+    src_num_elems = src.TotSize(last_axis),
+    num_elems = src_num_elems + size_delta * num_rows;
+  ans_axes[0].row_splits = Array1<int32_t>(c, num_rows + 1);
+  ans_axes[0].row_ids = Array1<int32_t>(c, num_elems);
+  ans_axes[0].cached_tot_size = num_elems;
+  const int32_t *src_row_splits_data = src.RowSplits(last_axis).Data(),
+    *src_row_ids_data = src.RowIds(last_axis).Data();
+  int32_t *row_splits_data = ans_axes[0].row_splits.Data(),
+    *row_ids_data = ans_axes[0].row_ids.Data();
+
+  {
+    ParallelRunner pr(c);
+    {
+      With w(pr.NewStream());
+      auto lambda_set_row_splits = [=] __host__ __device__ (int32_t idx0) -> void {
+        row_splits_data[idx0] = src_row_splits_data[idx0] + size_delta * idx0;
+      };
+      Eval(c, num_rows + 1, lambda_set_row_splits);
+    }
+
+    {
+      With w(pr.NewStream());
+      auto lambda_set_row_ids1 = [=] __host__ __device__ (int32_t src_idx01) -> void {
+        int32_t src_idx0 = src_row_ids_data[src_idx01],
+        src_idx0x = src_row_splits_data[src_idx0],
+        src_idx1 = src_idx01 - src_idx0x,
+        new_idx0x = row_splits_data[src_idx0],
+        new_idx0x_next = row_splits_data[src_idx0 + 1],
+        new_idx01 = new_idx0x + src_idx1;
+        // it's only necessary to guard the next statement with in 'if' because
+        // size_delta might be negative.
+        if (new_idx01 < new_idx0x_next)
+          row_ids_data[new_idx01] = src_idx0;
+      };
+      Eval(c, num_elems, lambda_set_row_ids1);
+    }
+    if (size_delta > 0) {
+      // This sets the row-ids that are not set by lambda_set_row_ids1.
+      With w(pr.NewStream());
+      auto lambda_set_row_ids2 = [=] __host__ __device__ (int32_t i) -> void {
+        int32_t idx0 = i / size_delta, n = i % size_delta,
+        next_idx0 = idx0 + 1;
+        // The following formula is the same as the one in
+        // lambda_set_row_splits; we want to compute the new value of
+        // row_splits_data[next_idx0] without waiting for that kernel to
+        // terminate.
+        int32_t next_idx0x = src_row_splits_data[next_idx0] +
+        size_delta * next_idx0;
+        row_ids_data[next_idx0x - 1 - n] = idx0;
+      };
+      Eval(c, num_rows * size_delta, lambda_set_row_ids2);
+    }
+    // make the ParallelRunner go out of scope (should do this before any
+    // validation code that gets invoked by the constructor of RaggedShape
+    // below).
+  }
+  return RaggedShape(ans_axes);
+}
+
+
 }  // namespace k2
diff --git a/k2/csrc/ragged_ops.h b/k2/csrc/ragged_ops.h
@@ -139,6 +139,32 @@ void SortSublists(Ragged<T> &src, Array1<int32_t> *order);
  */
 RaggedShape Stack(int32_t axis, int32_t src_size, RaggedShape **src);
 
+
+/*
+  Return a modified version of `src` in which all sub-lists on the last axis of
+  the tenor have size modified by `size_delta`.  `size_delta` may have either
+  sign.  If for a sub-list of size `cur_size`, `cur_size - size_delta < 0`, that
+  sub-list's size will be changed to 0 but the sub-list will be kept.
+
+
+     @param [in] src  Source tensor; must have NumAxes() >= 2, i.e. be valid.
+                      Only the last axis, i.e. the last RowSplits/RowIds(),
+                      will be affected by this.
+     @param [in] size_delta  Amount by which to change the size of sub-lists.
+                      May be either sign; if negative, we'll reduce the
+                      sub-list size by this amount, possibly leaving empty
+                      sub-lists (but it's an error if this would reduce any sub-list
+                      size below zero).
+     @return          Returns the modified RaggedShape.  The RowSplits()
+                      and RowIds() of its last axis will not be shared
+                      with `src`.
+
+  Example: ChangeSubListSize( [ [ x x ] [ x x x ] ], 1) returns
+    [ [ x x x ] [ x x x x ] ]
+  (using the x as placeholders for the values since these are unknown).
+ */
+RaggedShape ChangeSublistSize(RaggedShape &src, int32_t size_delta);
+
 /*
   Insert a new axis at position `axis`, with 0 <= axis <= src.NumAxes(), for
   which the only allowed index will be 0 (which is another way of saying: all