Mandelbrot benchmark's SIMD doesn't give the same output as the scalar version. #26

Panhaolin2001 · 2024-11-06T13:03:00Z

#include "VecCore/VecCore"

using namespace vecCore;

template<typename T>
void mandelbrot(T xmin, T xmax, size_t nx,
                T ymin, T ymax, size_t ny,
                size_t max_iter, unsigned char *image)
{
    T dx = (xmax - xmin) / T(nx);
    T dy = (ymax - ymin) / T(ny);

    for (size_t i = 0; i < nx; ++i) {
        for (size_t j = 0; j < ny; ++j) {
            size_t k = 0;
            T x = xmin + T(i) * dx, cr = x, zr = x;
            T y = ymin + T(j) * dy, ci = y, zi = y;

            do {
                x  = zr*zr - zi*zi + cr;
                y  = T(2.0) * zr*zi + ci;
                zr = x;
                zi = y;
            } while (++k < max_iter && (zr*zr + zi*zi < T(4.0)));

            image[ny*i + j] = k;
        }
    }
}

template<typename T>
void mandelbrot_v(Scalar<T> xmin, Scalar<T> xmax, size_t nx,
                  Scalar<T> ymin, Scalar<T> ymax, size_t ny,
                  Scalar<Index<T>> max_iter, unsigned char *image)
{
    T iota;
    for (size_t i = 0; i < VectorSize<T>(); ++i)
        Set<T>(iota, i, i);

    T dx = T(xmax - xmin) / T((Scalar<T>)nx);
    T dy = T(ymax - ymin) / T((Scalar<T>)ny), dyv = iota * dy;

    for (size_t i = 0; i < nx; ++i) {
        for (size_t j = 0; j < ny; j += VectorSize<T>()) {
            Scalar<Index<T>> k(0);
            T x = xmin + T((Scalar<T>)i) * dx,       cr = x, zr = x;
            T y = ymin + T((Scalar<T>)j) * dy + dyv, ci = y, zi = y;

            Index<T> kv(0);
            Mask<T> m(true);

            do {
                x = zr*zr - zi*zi + cr;
                y = T((Scalar<T>)2.0) * zr*zi + ci;
                MaskedAssign<T>(zr, m, x);
                MaskedAssign<T>(zi, m, y);
                MaskedAssign<Index<T>>(kv, m, ++k);
                m = zr*zr + zi*zi < T((Scalar<T>)4.0);
            } while (k < max_iter && !MaskEmpty(m));

            for (size_t k = 0; k < VectorSize<T>(); ++k)
                image[ny*i + j + k] = (unsigned char) Get(kv, k);
        }
    }
}


int main(){
    double xmin = -2.1, xmax = 1.1;
    double ymin = -1.35, ymax = 1.35;

    size_t nx = 1024, ny = 864, max_iter = 500;
    unsigned char *image_scalar = new unsigned char[nx*ny];
    unsigned char *image_simd = new unsigned char[nx*ny];

    mandelbrot_v<backend::SIMDNative::Double_v>(xmin, xmax, nx, ymin, ymax, ny,
                            max_iter, image_simd);
    mandelbrot<double>(xmin, xmax, nx, ymin, ymax, ny,
                            max_iter, image_scalar);
    
    for(int i=0 ; i<nx*ny; ++i){
        assert(image_simd[i] == image_scalar[i]);
    }

    delete[] image_scalar;
    delete[] image_simd;
}

The text was updated successfully, but these errors were encountered:

Panhaolin2001 · 2024-11-06T13:05:28Z

My compiler version is clang 16.0.6, and the compile options are -march=native -O2 -std=c++2a

amadio · 2024-11-19T15:54:58Z

What hardware and ISA are you running on? It's expected that the SIMD and non-SIMD codes give outputs with minor differences. I checked on my machine with AVX2 and compared the output images and the differences are very minor. Only the intrinsics version of the benchmark has a bug that needs to be fixed.

amadio · 2024-11-19T15:59:18Z

Here are attached images for my test:

float scalar:

float std::simd:

differences highlighted:

With double you'd get less differences. Cheers,

amadio self-assigned this Nov 19, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Mandelbrot benchmark's SIMD doesn't give the same output as the scalar version. #26

Mandelbrot benchmark's SIMD doesn't give the same output as the scalar version. #26

Panhaolin2001 commented Nov 6, 2024

Panhaolin2001 commented Nov 6, 2024

amadio commented Nov 19, 2024

amadio commented Nov 19, 2024

Mandelbrot benchmark's SIMD doesn't give the same output as the scalar version. #26

Mandelbrot benchmark's SIMD doesn't give the same output as the scalar version. #26

Comments

Panhaolin2001 commented Nov 6, 2024

Panhaolin2001 commented Nov 6, 2024

amadio commented Nov 19, 2024

amadio commented Nov 19, 2024