Introduce Array to template-ize current code

qindazhu · danpovey · web-flow · commit 071b0b3b96f3 · 2020-06-05T17:01:19.000+08:00
* Added interface for auxiliary labels

* Add notes on Python interface

* Fix typos

* Fix conflicts, remove some typedefs

* Fixes from review

* Small fixes in determinization code

* Progress on determinization code; add new declarations of un-pruned functions

* Fix compile error

* Resolve conflicts

* Add LogAdd

* Fix compile errors in util.h

* More progress on determinization code

* More progress on determinizaton draft.

* More work on Determinize code.

* Draft of ConstFsa interface and CfsaVec

* Small fixes to ConstFsa interface

* Changes from review

* Fix style issues

* Add itf for DenseFsa (not compiled)

* Draft of Array2/Array3

* Some notes on how this would work in Python

* Fix conflict

* [src] More drafts in array stuff, RE interface of functions.

* Further changes

* merge Dan's PR about Array2

Co-authored-by: Daniel Povey &lt;dpovey@gmail.com&gt;
diff --git a/k2/csrc/array.h b/k2/csrc/array.h
@@ -0,0 +1,121 @@
+// k2/csrc/array.h
+
+// Copyright (c)  2020  Xiaomi Corporation (author: Daniel Povey)
+
+// See ../../LICENSE for clarification regarding multiple authors
+
+#ifndef K2_CSRC_ARRAY_H_
+#define K2_CSRC_ARRAY_H_
+
+#include <functional>
+#include <limits>
+#include <memory>
+#include <vector>
+
+namespace k2 {
+
+/*
+   We will use e.g. StridedPtr<int32_t, T> when the stride is not 1, and
+   otherwise just T* (which presumably be faster).
+*/
+template <typename I, typename T>
+struct StridedPtr {
+  T *data;
+  I stride;
+  T &operator[](I i) { return data[i]; }
+  StridedPtr(T *data, I stride) : data(data), stride(stride) {}
+};
+
+/* MIGHT NOT NEED THIS */
+template <typename I, typename Ptr>
+struct Array1 {
+  // Irregular two dimensional array of something, like vector<vector<X> >
+  // where Ptr is, or behaves like, X*.
+  using IndexT = I;
+  using PtrT = Ptr;
+
+  // 'begin' and 'end' are the first and one-past-the-last indexes into `data`
+  // that we are allowed to use.
+  IndexT begin;
+  IndexT end;
+
+  PtrT data;
+};
+
+/*
+  This struct stores the size of an Array2 object; it will generally be used as
+  an output argument by functions that work out this size.
+ */
+template <typename I>
+struct Array2Size {
+  using IndexT = I;
+  // `size1` is the top-level size of the array, equal to the object's .size
+  // element
+  I size1;
+  // `size2` is the nunber of elements in the array, equal to
+  // o->indexes[o->size] - o->indexes[0] (if the Array2 object o is
+  // initialized).
+  I size2;
+};
+
+template <typename I, typename Ptr>
+struct Array2 {
+  // Irregular two dimensional array of something, like vector<vector<X> >
+  // where Ptr is, or behaves like, X*.
+  using IndexT = I;
+  using PtrT = Ptr;
+
+  IndexT size;
+  const IndexT *indexes;  // indexes[0,1,...size] should be defined; note, this
+                          // means the array must be of at least size+1.  We
+                          // require that indexes[i] <= indexes[i+1], but it is
+                          // not required that indexes[0] == 0, it may be
+                          // greater than 0.
+
+  PtrT data;  // `data` might be an actual pointer, or might be some object
+              // supporting operator [].  data[indexes[0]] through
+              // data[indexes[size] - 1] must be accessible through this
+              // object.
+
+  /* initialized definition:
+
+        An Array2 object is initialized if its `size` member is set and its
+        `indexes` and `data` pointer allocated, and the values of its `indexes`
+        array are set for indexes[0] and indexes[size].
+  */
+};
+
+template <typename I, typename Ptr>
+struct Array3 {
+  // Irregular three dimensional array of something, like vector<vector<vetor<X>
+  // > > where Ptr is or behaves like X*.
+  using IndexT = I;
+  using PtrT = Ptr;
+
+  IndexT size;
+  const IndexT *indexes1;  // indexes1[0,1,...size] should be defined; note,
+                           // this means the array must be of at least size+1.
+                           // We require that indexes[i] <= indexes[i+1], but it
+                           // is not required that indexes[0] == 0, it may be
+                           // greater than 0.
+
+  const IndexT *indexes2;  // indexes2[indexes1[0]]
+                           // .. indexes2[indexes1[size]-1] should be defined.
+
+  Ptr data;  // `data` might be an actual pointer, or might be some object
+             // supporting operator [].  data[indexes[0]] through
+             // data[indexes[size] - 1] must be accessible through this
+             // object.
+
+  Array2<I, Ptr> operator[](I i) {
+    // TODO(haowen): fill real data here
+    Array2<I, Ptr> array;
+    return array;
+  }
+};
+
+// Note: we can create Array4 later if we need it.
+
+}  // namespace k2
+
+#endif  // K2_CSRC_ARRAY_H_
diff --git a/k2/csrc/aux_labels.h b/k2/csrc/aux_labels.h
@@ -9,6 +9,7 @@
 
 #include <vector>
 
+#include "k2/csrc/array.h"
 #include "k2/csrc/fsa.h"
 #include "k2/csrc/fsa_util.h"
 #include "k2/csrc/properties.h"
@@ -46,6 +47,9 @@ struct AuxLabels {
   std::vector<int32_t> labels;
 };
 
+// TODO(haowen): replace AuxLabels above with below definition
+using AuxLabels_ = Array2<int32_t, int32_t>;
+
 // Swap AuxLabels; it's cheap to to this as we are actually doing shallow swap.
 void Swap(AuxLabels *labels1, AuxLabels *labels2);
 
@@ -95,6 +99,39 @@ void MapAuxLabels2(const AuxLabels &labels_in,
 void InvertFst(const Fsa &fsa_in, const AuxLabels &labels_in, Fsa *fsa_out,
                AuxLabels *aux_labels_out);
 
+class FstInverter {
+  /* Constructor.  Lightweight. */
+  FstInverter(const Fsa &fsa_in, const AuxLabels &labels_in);
+
+  /*
+    Do enough work that know now much memory will be needed, and output
+    that information
+        @param [out] fsa_size   The num-states and num-arcs of the FSA
+                                will be written to here
+        @param [out] aux_size   The number of lists in the AuxLabels
+                                output (==num-arcs) and the number of
+                                elements will be written to here.
+  */
+  void GetSizes(Array2Size<int32_t> *fsa_size, Array2Size<int32_t> *aux_size);
+
+  /*
+    Finish the operation and output inverted FSA to `fsa_out` and
+    auxiliary labels to `labels_out`.
+       @param [out]  fsa_out  The inverted FSA will be written to
+                         here.  Must be initialized; search for
+                         'initialized definition' in class Array2
+                         in array.h for meaning.
+       @param [out]  labels_out  The auxiliary labels will be written to
+                         here.  Must be initialized; search for
+                         'initialized definition' in class Array2
+                         in array.h for meaning.
+   */
+  void GetOutput(Fsa *fsa_out, AuxLabels *labels_out);
+
+ private:
+  // ...
+};
+
 }  // namespace k2
 
 #endif  // K2_CSRC_AUX_LABELS_H_
diff --git a/k2/csrc/fsa.h b/k2/csrc/fsa.h
@@ -13,6 +13,7 @@
 #include <vector>
 
 #include "glog/logging.h"
+#include "k2/csrc/array.h"
 #include "k2/csrc/util.h"
 
 namespace k2 {
@@ -129,6 +130,10 @@ struct Fsa {
   }
 };
 
+// TODO(haowen): replace Cfsa and CfsaVec with below definitions
+using Cfsa_ = Array2<int32_t, Arc>;
+using CfsaVec_ = Array3<int32_t, Arc>;
+
 /*
   Cfsa is a 'const' FSA, which we'll use as the input to operations.  It is
   designed in such a way that the storage underlying it may either be an Fsa
@@ -157,7 +162,7 @@ struct Cfsa {
                                // are valid.  CAUTION: arc_indexes[0] may be
                                // greater than zero.
 
-  Arc *arcs;  // Note: arcs[BeginArcIndex()] through arcs[EndArcIndex() - 1]
+  Arc *arcs;  // Note: arcs[begin_arc] through arcs[end_arc - 1]
               // are valid.
 
   Cfsa();
diff --git a/notes/array.txt b/notes/array.txt
@@ -0,0 +1,57 @@
+
+
+
+
+
+# defining type k2.Array
+
+
+class Array:
+
+    # `indexes` is a Tensor with one
+    Tensor indexes;
+
+    # `data` is either:
+    #   - of type Tensor (if this corresponds to Array2 == 2-dimensional
+    #     array in C++)
+    #   - of type Array (if this corresponds to Array3 or higher-dimensional
+    #     array in C++)
+    # The Python code is structured a bit differently from the C++ code,
+    # due to the differences in the languages.
+    # When we dispatch things to C++ code there would be some
+    # big switch statement or if-statement to select the right
+    # template instantiation.
+    data;
+
+    def __len__(self):
+        return indexes.shape[0] - 1
+
+    @property
+    def shape(self):
+        # e.g. if indexes.shape is (15,) and
+        # data.shape is (150) -> this.shape would be (15,None)
+        # If data.shape is (150,4), this.shape would be (15,4)
+        # If data.shape is (150,None) (since data is an Array), this.shape
+        #     would be (150,None,None).
+        # The Nones are for dimensions where the shape is not known
+        # because it is variable.
+        return (indexes.shape[0] - 1, None, *data.shape[1:])
+
+
+
+class Fsa(Array):
+
+    # Think of this as a vector of vector of Arc, or in C++,
+    # an Array2<Arc>.
+    # An Arc has 3 int32_t's, so this.data is a Tensor with
+    # dtype int32 and shape (_, 3).
+
+
+
+class FsaVec(Array):
+
+    # Think of this as a vector of vector of vector of Arc, or in C++,
+    # an Array3<Arc>.
+    #
+    # this.data is an Array, and this.data.data is a Tensor with
+    # dtype int32 and shape (_, 3).
diff --git a/notes/python.txt b/notes/python.txt
@@ -207,6 +207,19 @@
    # note: fsas_det still has `phone_syms` and `nnet_arc_indexes`, now as sequences.
 
 
+def PseudoTensor:
+    """This is a class that behaves like a torch.Tensor but in fact only supports one kind of
+       operation, namely indexing with another torch.Tensor"""
+    def __init__(self, t, divisor):
+       """ Constructor.
+           Parameters:
+           t:  torch.LongTensor
+           divisor: int
+        """
+        self.t = t
+        self.divisor = divisor
+    def __getitem__(self, indexes):
+        return self.t[indexes / divisor]
 
 def DenseFsaVec:
 
@@ -246,66 +259,36 @@ def DenseFsaVec:
         # loglikes, one per arc of the CfsaVec object.  This is
         # a repeat of `loglikes` but possibly in a different
         # order.
+
+
         pass
 
     @property
     def loglikes(self):
         return self.arc_loglikes
 
 
-    def seg_frames_for_arcs(self, arc_indexes):
-        """
-        Returns the frame-indexes relative to the start of each segment
-        for each of a provided list of arc indexes, as a torch.LongTensor.
-        """
-
-        # Note: self.seg_frame_indexes will be a torch.IntTensor containing
-        # the frame index for each arc.  Later we'll address not being
-        # able to index with IntTensor but only LongTensor.
-        return self.seg_frame_indexes[arc_indexes / self.num_symbols]
-
-    def seq_frames_for_arcs(self, arc_indexes):
-        """
-        Returns the frame-indexes relative to the start of each sequence
-        for each of a provided list of arc indexes, as a torch.LongTensor.
-
-        Note: if a returned frame-index equals num_frames, then that
-        frame was a `final-arc` (a special arc going to the final state),
-        which cannot be used to index the `loglikes` array provided to
-        the constructor because it's out-of-range.
-        """
-
-        # Note: self.seq_frame_indexes will be a torch.IntTensor containing
-        # the frame index for each arc.  Later we'll address not being
-        # able to index with IntTensor but only LongTensor.
-        return self.seq_frame_indexes[arc_indexes / self.num_symbols]
-
-    def segments_for_arcs(self, arc_indexes):
-        """
-        Return the segment-indexes for each of a provided list of arcs,
-        which tells you which segment it was a part of.
-        """
-        return self.segment_indexes[arc_indexes / self.num_symbols]
-
-    def seqs_for_arcs(self, arc_indexes):
-        """
-        Return the segment-indexes for each of a provided list of arcs,
-        which tells you which sequence it was a part of.
+    @property
+    def seg_frames(self):
+        """Return something that 'acts' like a tensor, indexed by arc, of
+           the frame-index relative to the segment start corresponding to that
+           arc.  NOTE: self.frame_loglikes will actually be a sub-Tensor
+           of the Tensor created at the C++ level as the DenseFsaVecMeta object.
         """
-        return self.input_seq_indexes[self.segments_for_arcs(arc_indexes)]
-
-
-
-
-
-
-# compute posteriors..
-   first_pass_posts =
-
-
-
+        return PseudoTensor(self.frame_loglikes, self.num_symbols)
 
 
+    @property
+    def seq_frames(self):
+        """ as for seg_frames"""
+        pass
 
+    @property
+    def seq_ids(self):
+        """ as for seg_frames"""
+        pass
 
-   nnet_post = log_softmax(nnet_output)  # might use this later for something..
+    @property
+    def seg_ids(self):
+        """ as for seg_frames"""
+        pass