Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement ComposeArcMaps for 1-D arrays. #726

Merged
merged 10 commits into from
Apr 24, 2021
52 changes: 40 additions & 12 deletions k2/csrc/array_ops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -304,10 +304,39 @@ Array1<int32_t> GetCounts(ContextPtr c, const int32_t *src_data,
nullptr, temp_storage_bytes, src_data, ans_data, n + 1, 0, n, src_dim,
c->GetCudaStream())); // The first time is to determine temporary
// device storage requirements.
Array1<int8_t> d_temp_storage(c, temp_storage_bytes);
K2_CHECK_CUDA_ERROR(cub::DeviceHistogram::HistogramEven(
d_temp_storage.Data(), temp_storage_bytes, src_data, ans_data, n + 1, 0,
n, src_dim, c->GetCudaStream()));

constexpr std::size_t kTreshold = (static_cast<std::size_t>(1) << 33);

if (temp_storage_bytes < kTreshold) {
RegionPtr temp_storage = NewRegion(c, temp_storage_bytes);
K2_CHECK_CUDA_ERROR(cub::DeviceHistogram::HistogramEven(
temp_storage->data, temp_storage_bytes, src_data, ans_data, n + 1, 0,
n, src_dim, c->GetCudaStream()));
} else {
// split the array and do a recursive call
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Relates to NVIDIA/cub#288

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

kThreshold.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed. Thanks

//
// See https://github.com/NVIDIA/cub/issues/288
// for why we split it
int32_t first_start = 0; // inclusive
int32_t first_end = src_dim / 2; // exclusive
int32_t first_dim = first_end - first_start;

int32_t second_start = first_end; // inclusive
int32_t second_end = src_dim; // exclusive
int32_t second_dim = second_end - second_start;

Array1<int32_t> first_subset =
GetCounts(c, src_data + first_start, first_dim, n);
Array1<int32_t> second_subset =
GetCounts(c, src_data + second_start, second_dim, n);

const int32_t *first_subset_data = first_subset.Data();
const int32_t *second_subset_data = second_subset.Data();
K2_EVAL(
c, n, set_ans, (int32_t i)->void {
ans_data[i] = first_subset_data[i] + second_subset_data[i];
});
}
}
return ans;
}
Expand Down Expand Up @@ -341,14 +370,13 @@ Array1<int32_t> InvertMonotonicDecreasing(const Array1<int32_t> &src) {
MonotonicDecreasingUpperBound(ans, &ans);
#ifndef NDEBUG
K2_EVAL(
c, ans_dim, lambda_check_values, (int32_t i) -> void {
c, ans_dim, lambda_check_values, (int32_t i)->void {
int32_t j = ans_data[i];
K2_CHECK((j == src_dim || src_data[j] <= i) &&
(j == 0 || src_data[j-1] > i));
(j == 0 || src_data[j - 1] > i));
});
#endif


return ans;
}

Expand Down Expand Up @@ -482,17 +510,17 @@ bool IsPermutation(const Array1<int32_t> &a) {
return Equal(ones, 0);
}


void RowSplitsToRowIdsOffset(const Array1<int32_t> &row_splits_part,
Array1<int32_t> *row_ids_part) {
Array1<int32_t> *row_ids_part) {
NVTX_RANGE(K2_FUNC);
ContextPtr c = row_splits_part.Context();
Array1<int32_t> row_splits(c, row_splits_part.Dim());
int32_t *row_splits_data = row_splits.Data();
const int32_t *row_splits_part_data = row_splits_part.Data();
K2_EVAL(c, row_splits_part.Dim(), lambda_subtract_offset, (int32_t i) {
row_splits_data[i] = row_splits_part_data[i] - row_splits_part_data[0];
});
K2_EVAL(
c, row_splits_part.Dim(), lambda_subtract_offset, (int32_t i) {
row_splits_data[i] = row_splits_part_data[i] - row_splits_part_data[0];
});
RowSplitsToRowIds(row_splits, row_ids_part);
}

Expand Down
6 changes: 3 additions & 3 deletions k2/csrc/array_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -665,14 +665,14 @@ bool ApproxEqual(const Array2<T> &a, const Array2<T> &b, T tol = T(0.0001));
`0 <= indexes[i] < src.Dim()` if
`allow_minus_one == false`,
else -1 is also allowed and the corresponding
output element will be zero.
output element will be `default_value`.
@return Returns an `Array1<T>` of dimension indexes.Dim(),
with `ans[i] = src[indexes[i]]` (or zero if
with `ans[i] = src[indexes[i]]` (or `default_value` if
`allow_minus_one == true` and `indexes[i] == -1`).
*/
template <typename T>
Array1<T> Index(const Array1<T> &src, const Array1<int32_t> &indexes,
bool allow_minus_one);
bool allow_minus_one, T default_value);

/*
Index src's rows with `indexes` which contains the row indexes.
Expand Down
6 changes: 3 additions & 3 deletions k2/csrc/array_ops_inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -760,7 +760,7 @@ Array1<T> Plus(const Array1<T> &src, T t) {

template <typename T>
Array1<T> Index(const Array1<T> &src, const Array1<int32_t> &indexes,
bool allow_minus_one) {
bool allow_minus_one, T default_value) {
NVTX_RANGE(K2_FUNC);
ContextPtr &c = src.Context();
K2_CHECK(c->IsCompatible(*indexes.Context()));
Expand All @@ -775,13 +775,13 @@ Array1<T> Index(const Array1<T> &src, const Array1<int32_t> &indexes,
#pragma unroll(4)
for (int32_t i = 0; i < ans_dim; i++) {
int32_t index = index_data[i];
T value = (index < 0 ? T(0) : src_data[index]);
T value = (index < 0 ? default_value : src_data[index]);
ans_data[i] = value;
}
} else {
auto lambda_set_values = [=] __device__(int32_t i) -> void {
int32_t index = index_data[i];
T value = (index < 0 ? T(0) : src_data[index]);
T value = (index < 0 ? default_value : src_data[index]);
ans_data[i] = value;
};
EvalDevice(c, ans_dim, lambda_set_values);
Expand Down
18 changes: 11 additions & 7 deletions k2/csrc/array_ops_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1519,20 +1519,23 @@ TEST(OpsTest, Array1IndexTest) {
RandUniformArray1<int32_t>(c, ans_dim, 0, src_dim - 1),
indexes_minus_one =
RandUniformArray1<int32_t>(c, ans_dim, -1, src_dim - 1);
Array1<T> ans_no_minus_one = Index(src, indexes_no_minus_one, false),

T default_value = loop - 1;

Array1<T> ans_no_minus_one = Index(src, indexes_no_minus_one, false, default_value),
ans_no_minus_one_check = src[indexes_no_minus_one],
ans_no_minus_one_check2 = Index(src, indexes_no_minus_one, true);
ans_no_minus_one_check2 = Index(src, indexes_no_minus_one, true, default_value);
ASSERT_TRUE(Equal(ans_no_minus_one, ans_no_minus_one_check));
ASSERT_TRUE(Equal(ans_no_minus_one, ans_no_minus_one_check2));

Array1<T> ans_minus_one = Index(src, indexes_minus_one, true);
Array1<T> ans_minus_one = Index(src, indexes_minus_one, true, default_value);

ans_minus_one = ans_minus_one.To(cpu_context);
src = src.To(cpu_context);
indexes_minus_one = indexes_minus_one.To(cpu_context);
for (int32_t i = 0; i < indexes_minus_one.Dim(); i++) {
int32_t index = indexes_minus_one[i];
ASSERT_EQ(ans_minus_one[i], (index < 0 ? 0 : src[index]));
ASSERT_EQ(ans_minus_one[i], (index < 0 ? default_value : src[index]));
}
}
}
Expand Down Expand Up @@ -1717,7 +1720,8 @@ void Array2ContiguousTest() {
Array2<T> src_part = src.ColArange(slice_dim1_begin, slice_dim1_end),
src_part_contiguous1 = ToContiguous(src_part);
Array2<Any> src_part_contiguous_generic2 = ToContiguous(src_part.Generic());
Array2<T> src_part_contiguous2 = src_part_contiguous_generic2.Specialize<T>();
Array2<T> src_part_contiguous2 =
src_part_contiguous_generic2.Specialize<T>();

K2_CHECK_EQ(Equal(src_part_contiguous1, src_part_contiguous2),
true);
Expand All @@ -1729,8 +1733,8 @@ void Array2ContiguousTest() {
TEST(OpsTest, ApproxEqualTest) {
Array2<float> array1("[ [ 1 2 3 ] [4 5 6 ]]"),
array2("[ [ 1.1 2 3 ] [4 5 6 ]]");
K2_CHECK_EQ(ApproxEqual(array1, array2, float(0.2)), true);
K2_CHECK_EQ(ApproxEqual(array1, array2, float(0.01)), false);
K2_CHECK_EQ(ApproxEqual(array1, array2, 0.2f), true);
K2_CHECK_EQ(ApproxEqual(array1, array2, 0.01f), false);
}

TEST(OpsTest, Array2Contiguous) {
Expand Down
7 changes: 4 additions & 3 deletions k2/csrc/array_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ void TestArray1() {
// it is necessary
ASSERT_THROW(array.Generic().template Specialize<unsigned char>(),
std::runtime_error); // assuming T != unsigned char

}

{
Expand Down Expand Up @@ -268,12 +267,14 @@ void TestArray2() {
EXPECT_EQ(array.Data(), array_copy.Data());

const Array2<T> &array_copy_const = array;
const Array2<T> &array_copy_const2 = array_copy_const.Generic().template Specialize<T>();
const Array2<T> &array_copy_const2 =
array_copy_const.Generic().template Specialize<T>();
EXPECT_EQ(array_copy_const.Data(), array_copy.Data());
EXPECT_EQ(array_copy_const2.Data(), array_copy.Data());

const Array1<T> &array1_copy_const = arr1;
const Array1<T> &array1_copy_const2 = array1_copy_const.Generic().template Specialize<T>();
const Array1<T> &array1_copy_const2 =
array1_copy_const.Generic().template Specialize<T>();
EXPECT_EQ(array1_copy_const.Data(), arr1.Data());
EXPECT_EQ(array1_copy_const2.Data(), arr1.Data());

Expand Down
6 changes: 4 additions & 2 deletions k2/csrc/dtype.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ namespace k2 {
const DtypeTraits g_dtype_traits_array[] = {
{kUnknownBase, 0, "Any", 0},
{kFloatBase, 4, "half"}, {kFloatBase, 4, "float"}, {kFloatBase, 8, "double"},
{kIntBase, 1, "int8"}, {kIntBase, 2, "int16"}, {kIntBase, 4, "int32"}, {kIntBase, 8, "int64"},
{kUintBase, 1, "uint8"}, {kUintBase, 2, "uint16"}, {kUintBase, 4, "uint32"}, {kUintBase, 8, "uint64"},
{kIntBase, 1, "int8"}, {kIntBase, 2, "int16"},
{kIntBase, 4, "int32"}, {kIntBase, 8, "int64"},
{kUintBase, 1, "uint8"}, {kUintBase, 2, "uint16"},
{kUintBase, 4, "uint32"}, {kUintBase, 8, "uint64"},
{kUnknownBase, 16, "Arc", 4}, {kUnknownBase, 0, "Other", 0}
};

Expand Down
8 changes: 4 additions & 4 deletions k2/csrc/dtype.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ enum class Dtype {

// This is needed because the comma in std::is_same<T,Any>::value prevents it
// from appearing inside macro arguments.
#define K2_TYPE_IS_ANY(T) std::is_same<T,Any>::value
#define K2_TYPE_IS_ANY(T) (std::is_same<T, Any>::value)

constexpr Dtype kAnyDtype = Dtype::kAnyDtype;
constexpr Dtype kHalfDtype = Dtype::kHalfDtype;
Expand Down Expand Up @@ -112,10 +112,10 @@ struct DtypeOf<Any> {
static const Dtype dtype = kAnyDtype;
};

//template <>
//struct DtypeOf<half> {
// template <>
// struct DtypeOf<half> {
// static const Dtype dtype = kHalfDtype;
//};
// };

template <>
struct DtypeOf<float> {
Expand Down
2 changes: 1 addition & 1 deletion k2/csrc/dtype_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ template <typename T> void CheckDtypes() {
}

TEST(DtypeTest, CheckDtypes) {
//CheckDtypes<half>();
// CheckDtypes<half>();
CheckDtypes<float>();
CheckDtypes<double>();
CheckDtypes<int8_t>();
Expand Down
1 change: 0 additions & 1 deletion k2/csrc/fsa_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,6 @@ FsaVec FsaVecFromArcIndexes(FsaVec &fsas, Ragged<int32_t> &best_arc_indexes);
Ragged<int32_t> ComposeArcMaps(Ragged<int32_t> &step1_arc_map,
Ragged<int32_t> &step2_arc_map);


/*
Return a ragged array that represents the cumulative distribution function
(cdf) of the probability of arcs leaving each state of `fsas`.
Expand Down
10 changes: 4 additions & 6 deletions k2/csrc/hash.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ void Hash::CheckEmpty() {
if (~(hash_data[i]) != 0) error_data[0] = i;
});
int32_t i = error[0];
if (i >= 0) { // there was an error; i is the index into the hash where
if (i >= 0) { // there was an error; i is the index into the hash where
// there was an element.
int64_t elem = data_[i];
// We don't know the number of bits the user was using for the key vs.
Expand All @@ -30,10 +30,9 @@ void Hash::CheckEmpty() {
}
}

void Hash::Resize(int32_t new_num_buckets,
int32_t num_key_bits,
int32_t num_value_bits, // = -1,
bool copy_data) { // = true
void Hash::Resize(int32_t new_num_buckets, int32_t num_key_bits,
int32_t num_value_bits, // = -1,
bool copy_data) { // = true
NVTX_RANGE(K2_FUNC);
if (num_value_bits < 0)
num_value_bits = 64 - num_key_bits;
Expand Down Expand Up @@ -66,5 +65,4 @@ void Hash::Resize(int32_t new_num_buckets,
// expect the hash to be empty when destroyed).
}


} // namespace k2
9 changes: 5 additions & 4 deletions k2/csrc/hash_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ void TestHashConstruct() {
*values_data = values.Data(),
*success_data = success.Data();
int32_t *counts_data = count_per_key.Data();
Hash::Accessor<NUM_KEY_BITS> acc = hash.GetAccessor<Hash::Accessor<NUM_KEY_BITS>>();
Hash::Accessor<NUM_KEY_BITS> acc =
hash.GetAccessor<Hash::Accessor<NUM_KEY_BITS>>();
K2_EVAL(c, num_elems, lambda_insert_pairs, (int32_t i) -> void {
uint32_t key = keys_data[i],
value = values_data[i],
Expand Down Expand Up @@ -237,7 +238,7 @@ void TestHashConstructPacked(int32_t num_key_bits,
success_data[i] = success;
});

if (size != 65535) // just for some variety..
if (size != 65535) // just for some variety..
num_value_bits += 1; // Try changing the number of value bits, so we
// can test Resize() with changes in that.

Expand All @@ -254,8 +255,8 @@ void TestHashConstructPacked(int32_t num_key_bits,

int32_t num_implicit_key_bits = num_key_bits + num_value_bits - 64,
num_kept_key_bits = num_key_bits - num_implicit_key_bits;
uint64_t implicit_key_bits_mask = (uint64_t(1) << num_implicit_key_bits) - 1;

uint64_t implicit_key_bits_mask =
(uint64_t(1) << num_implicit_key_bits) - 1;

uint64_t val = 0;
uint64_t *key_val_addr = nullptr;
Expand Down
3 changes: 1 addition & 2 deletions k2/csrc/ragged.h
Original file line number Diff line number Diff line change
Expand Up @@ -325,8 +325,7 @@ struct Ragged {
}

explicit Ragged(const RaggedShape &shape, Dtype dtype = DtypeOf<T>::dtype)
: shape(shape), values(shape.Context(), shape.NumElements(), dtype) {
}
: shape(shape), values(shape.Context(), shape.NumElements(), dtype) {}

// Defined in ragged_ops_inl.h
// This will crash if T == Any.
Expand Down
11 changes: 6 additions & 5 deletions k2/csrc/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,12 @@ class Shape {

int32_t num_axes_; // Must be >= 0

// num_elements_ is the number of distinct tuples of indexes; since strides may
// be zero, we do not guarantee that all these elements occupy distinct memory
// locations. See NumElements()
// num_elements_ is the number of distinct tuples of indexes; since strides
// may be zero, we do not guarantee that all these elements occupy distinct
// memory locations. See NumElements()
int64_t num_elements_;
// see documentation for IsContiguous() for its meaning. This is "derived data";
// it is computed by IsContiguous().
// see documentation for IsContiguous() for its meaning. This is "derived
// data"; it is computed by IsContiguous().
bool is_contiguous_;

// elements of dims_ and strides_ >= num_axes_ are currently not set;
Expand Down Expand Up @@ -233,6 +233,7 @@ class Tensor {
TensorImplPtr Impl() const { return impl_; }
// This is for use by implementation code; be careful with it.
explicit Tensor(TensorImplPtr impl);

private:
// For use when `shape` and `dtype` are already set up; sets data and
// byte_offset.
Expand Down
Loading