analog/fastnoise: avoid modulo operation when picking indices

When the pool is power-of-2-sized, index generation can be done using a simple bitmask. Document this, add logging info. - use unsigned and fixed-length int where due - avoid expensive integer modulo operation when possible - extract power-of-two constexpr - don't clutter the logs for small pool sizes Signed-off-by: Marcus Müller <[email protected]>
klchiu · Mar 25, 2021 · 4dd02da · 4dd02da
1 parent 9a08594
commit 4dd02da
Show file tree

Hide file tree

Showing 5 changed files with 69 additions and 21 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -26,6 +26,12 @@ Older Logs can be found in `docs/RELEASE-NOTES-*`.
   - requires MSVC 1914 (Microsoft VS 2017 15.7)
 - Windows build: removed unnecessary MSVC-specific system include overrides
 
+#### gr-analog
+
+- `fastnoise_source`: Use `uint64_t` seed API, use `size_t` for vector length/indices
+- `fastnoise_source`: Use a simple bitmask if the random pool length is a power
+  of 2 to determine indices, instead of `%`, which consumed considerable CPU
+
 ### Added
 
 - New in-tree module gr-pdu

diff --git a/gr-analog/include/gnuradio/analog/fastnoise_source.h b/gr-analog/include/gnuradio/analog/fastnoise_source.h
@@ -46,10 +46,11 @@ class ANALOG_API fastnoise_source : virtual public sync_block
      * \param seed seed for random generators. Note that for uniform
      *        and Gaussian distributions, this should be a negative
      *        number.
-     * \param samples Number of samples to pre-generate
+     * \param samples Number of samples to pre-generate. For performance
+     *        reasons, prefer a power of 2.
      */
     static sptr
-    make(noise_type_t type, float ampl, long seed = 0, long samples = 1024 * 16);
+    make(noise_type_t type, float ampl, uint64_t seed = 0, size_t samples = 1024 * 16);
     virtual T sample() = 0;
     virtual T sample_unbiased() = 0;
     virtual const std::vector<T>& samples() const = 0;

diff --git a/gr-analog/lib/fastnoise_source_impl.cc b/gr-analog/lib/fastnoise_source_impl.cc
@@ -13,6 +13,8 @@
 #include <config.h>
 #endif
 
+#include <boost/format.hpp>
+
 #include "fastnoise_source_impl.h"
 #include <gnuradio/io_signature.h>
 #include <gnuradio/xoroshiro128p.h>
@@ -22,26 +24,40 @@
 namespace gr {
 namespace analog {
 
+bool constexpr is_pwr_of_two(size_t value)
+{
+    // simple binary trick: an integer x is power of 2 if x-1 is all 1s, but only below
+    // the old 1-position.
+    // also, zero is not a power of two
+    return value && !(value & (value - 1));
+}
 template <class T>
 typename fastnoise_source<T>::sptr
-fastnoise_source<T>::make(noise_type_t type, float ampl, long seed, long samples)
+fastnoise_source<T>::make(noise_type_t type, float ampl, uint64_t seed, size_t samples)
 {
     return gnuradio::make_block_sptr<fastnoise_source_impl<T>>(type, ampl, seed, samples);
 }
 
 template <>
 void fastnoise_source_impl<gr_complex>::generate()
 {
-    int noutput_items = d_samples.size();
+    size_t noutput_items = d_samples.size();
+    if (noutput_items >= 1 << 23) {
+        GR_LOG_INFO(
+            d_logger,
+            boost::format("Generating %d complex values. This might take a while.") %
+                noutput_items);
+    }
+
     switch (d_type) {
     case GR_UNIFORM:
-        for (int i = 0; i < noutput_items; i++)
+        for (size_t i = 0; i < noutput_items; i++)
             d_samples[i] = gr_complex(d_ampl * ((d_rng.ran1() * 2.0) - 1.0),
                                       d_ampl * ((d_rng.ran1() * 2.0) - 1.0));
         break;
 
     case GR_GAUSSIAN:
-        for (int i = 0; i < noutput_items; i++)
+        for (size_t i = 0; i < noutput_items; i++)
             d_samples[i] = d_ampl * d_rng.rayleigh_complex();
         break;
     default:
@@ -52,33 +68,47 @@ void fastnoise_source_impl<gr_complex>::generate()
 template <class T>
 fastnoise_source_impl<T>::fastnoise_source_impl(noise_type_t type,
                                                 float ampl,
-                                                long seed,
-                                                long samples)
+                                                uint64_t seed,
+                                                size_t samples)
     : sync_block("fastnoise_source",
                  io_signature::make(0, 0, 0),
                  io_signature::make(1, 1, sizeof(T))),
       d_type(type),
       d_ampl(ampl),
-      d_rng(seed)
+      d_rng(seed),
+      d_bitmask(is_pwr_of_two(samples) ? samples - 1 : 0)
 {
+    if (!d_bitmask) {
+        GR_LOG_INFO(this->d_logger,
+                    boost::format("Using non-power-of-2 sample pool size %d. This has "
+                                  "negative effect on performance.") %
+                        samples);
+    }
     d_samples.resize(samples);
-    xoroshiro128p_seed(d_state, (uint64_t)seed);
+    xoroshiro128p_seed(d_state, seed);
     generate();
 }
 
 
 template <>
 fastnoise_source_impl<gr_complex>::fastnoise_source_impl(noise_type_t type,
                                                          float ampl,
-                                                         long seed,
-                                                         long samples)
+                                                         uint64_t seed,
+                                                         size_t samples)
     : sync_block("fastnoise_source",
                  io_signature::make(0, 0, 0),
                  io_signature::make(1, 1, sizeof(gr_complex))),
       d_type(type),
       d_ampl(ampl / sqrtf(2.0f)),
-      d_rng(seed)
+      d_rng(seed),
+      d_bitmask(is_pwr_of_two(samples) ? samples - 1 : 0)
 {
+    if (!d_bitmask) {
+        GR_LOG_INFO(d_logger,
+                    boost::format("Using non-power-of-2 sample pool size %d. This has "
+                                  "negative effect on performance.") %
+                        samples);
+    }
     d_samples.resize(samples);
     xoroshiro128p_seed(d_state, (uint64_t)seed);
     generate();
@@ -117,25 +147,30 @@ void fastnoise_source_impl<gr_complex>::set_amplitude(float ampl)
 template <class T>
 void fastnoise_source_impl<T>::generate()
 {
-    int noutput_items = d_samples.size();
+    size_t noutput_items = d_samples.size();
+    if (noutput_items >= 1 << 23) {
+        GR_LOG_INFO(this->d_logger,
+                    boost::format("Generating %d values. This might take a while.") %
+                        noutput_items);
+    }
     switch (d_type) {
     case GR_UNIFORM:
-        for (int i = 0; i < noutput_items; i++)
+        for (size_t i = 0; i < noutput_items; i++)
             d_samples[i] = (T)(d_ampl * ((d_rng.ran1() * 2.0) - 1.0));
         break;
 
     case GR_GAUSSIAN:
-        for (int i = 0; i < noutput_items; i++)
+        for (size_t i = 0; i < noutput_items; i++)
             d_samples[i] = (T)(d_ampl * d_rng.gasdev());
         break;
 
     case GR_LAPLACIAN:
-        for (int i = 0; i < noutput_items; i++)
+        for (size_t i = 0; i < noutput_items; i++)
             d_samples[i] = (T)(d_ampl * d_rng.laplacian());
         break;
 
     case GR_IMPULSE: // FIXME changeable impulse settings
-        for (int i = 0; i < noutput_items; i++)
+        for (size_t i = 0; i < noutput_items; i++)
             d_samples[i] = (T)(d_ampl * d_rng.impulse(9));
         break;
     default:
@@ -163,7 +198,12 @@ int fastnoise_source_impl<T>::work(int noutput_items,
 template <class T>
 T fastnoise_source_impl<T>::sample()
 {
-    size_t idx = xoroshiro128p_next(d_state) % d_samples.size();
+    size_t idx;
+    if (d_bitmask) {
+        idx = xoroshiro128p_next(d_state) & d_bitmask;
+    } else {
+        idx = xoroshiro128p_next(d_state) % d_samples.size();
+    }
     return d_samples[idx];
 }
 

diff --git a/gr-analog/lib/fastnoise_source_impl.h b/gr-analog/lib/fastnoise_source_impl.h
@@ -28,9 +28,10 @@ class fastnoise_source_impl : public fastnoise_source<T>
     gr::random d_rng;
     std::vector<T> d_samples;
     uint64_t d_state[2];
+    size_t d_bitmask;
 
 public:
-    fastnoise_source_impl(noise_type_t type, float ampl, long seed, long samples);
+    fastnoise_source_impl(noise_type_t type, float ampl, uint64_t seed, size_t samples);
     ~fastnoise_source_impl() override;
 
     T sample() override;

diff --git a/gr-analog/python/analog/bindings/fastnoise_source_python.cc b/gr-analog/python/analog/bindings/fastnoise_source_python.cc
@@ -14,7 +14,7 @@
 /* BINDTOOL_GEN_AUTOMATIC(0)                                                       */
 /* BINDTOOL_USE_PYGCCXML(0)                                                        */
 /* BINDTOOL_HEADER_FILE(fastnoise_source.h)                                        */
-/* BINDTOOL_HEADER_FILE_HASH(a1bcac8382da0203f011bc01e8f42c98)                     */
+/* BINDTOOL_HEADER_FILE_HASH(8e91642118fc23a803672619b185d5cc)                     */
 /***********************************************************************************/
 
 #include <pybind11/complex.h>