diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..544e12f --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +Build +test +*.*sdf +*.suo +*.user \ No newline at end of file diff --git a/CLExpr.sln b/CLExpr.sln new file mode 100644 index 0000000..b0a419e --- /dev/null +++ b/CLExpr.sln @@ -0,0 +1,28 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 2013 +VisualStudioVersion = 12.0.30110.0 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CLExpr", "CLExpr\CLExpr.vcxproj", "{D5AA35BC-C854-4E44-B093-3BC38319E25C}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Debug|x64 = Debug|x64 + Release|Win32 = Release|Win32 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {D5AA35BC-C854-4E44-B093-3BC38319E25C}.Debug|Win32.ActiveCfg = Debug|Win32 + {D5AA35BC-C854-4E44-B093-3BC38319E25C}.Debug|Win32.Build.0 = Debug|Win32 + {D5AA35BC-C854-4E44-B093-3BC38319E25C}.Debug|x64.ActiveCfg = Debug|x64 + {D5AA35BC-C854-4E44-B093-3BC38319E25C}.Debug|x64.Build.0 = Debug|x64 + {D5AA35BC-C854-4E44-B093-3BC38319E25C}.Release|Win32.ActiveCfg = Release|Win32 + {D5AA35BC-C854-4E44-B093-3BC38319E25C}.Release|Win32.Build.0 = Release|Win32 + {D5AA35BC-C854-4E44-B093-3BC38319E25C}.Release|x64.ActiveCfg = Release|x64 + {D5AA35BC-C854-4E44-B093-3BC38319E25C}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/CLExpr/CLExpr.vcxproj b/CLExpr/CLExpr.vcxproj new file mode 100644 index 0000000..8443086 --- /dev/null +++ b/CLExpr/CLExpr.vcxproj @@ -0,0 +1,158 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {D5AA35BC-C854-4E44-B093-3BC38319E25C} + CLExpr + CLExpr + + + + DynamicLibrary + true + v120 + MultiByte + + + DynamicLibrary + true + v120 + MultiByte + + + DynamicLibrary + false + v110_xp + true + MultiByte + + + DynamicLibrary + false + v110_xp + true + MultiByte + + + + + + + + + + + + + + + + + + + $(SolutionDir)Build\$(Platform)\$(Configuration)\ + $(SolutionDir)Build\Temp\$(Platform)\$(Configuration)\ + $(AVISYNTH_SDK_PATH)\include;$(CUDA_INC_PATH);$(IncludePath) + $(CUDA_LIB_PATH)\..\Win32;$(LibraryPath) + + + $(SolutionDir)Build\$(Platform)\$(Configuration)\ + $(SolutionDir)Build\Temp\$(Platform)\$(Configuration)\ + $(AVISYNTH_SDK_PATH)\include;$(CUDA_INC_PATH);$(IncludePath) + $(CUDA_LIB_PATH)\..\x64;$(LibraryPath) + + + $(SolutionDir)Build\$(Platform)\$(Configuration)\ + $(SolutionDir)Build\Temp\$(Platform)\$(Configuration)\ + $(AVISYNTH_SDK_PATH)\include;$(CUDA_INC_PATH);$(IncludePath) + $(CUDA_LIB_PATH)\..\Win32;$(LibraryPath) + + + $(SolutionDir)Build\$(Platform)\$(Configuration)\ + $(SolutionDir)Build\Temp\$(Platform)\$(Configuration)\ + $(AVISYNTH_SDK_PATH)\include;$(CUDA_INC_PATH);$(IncludePath) + $(CUDA_LIB_PATH)\..\x64;$(LibraryPath) + + + + Level3 + Disabled + true + + + true + OpenCL.lib;%(AdditionalDependencies) + + + + + Level3 + Disabled + true + + + true + OpenCL.lib;%(AdditionalDependencies) + + + + + Level3 + MaxSpeed + true + true + true + + + true + true + true + OpenCL.lib;%(AdditionalDependencies) + + + + + Level3 + MaxSpeed + true + true + true + + + true + true + true + OpenCL.lib;%(AdditionalDependencies) + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/CLExpr/CLExpr.vcxproj.filters b/CLExpr/CLExpr.vcxproj.filters new file mode 100644 index 0000000..ee260f6 --- /dev/null +++ b/CLExpr/CLExpr.vcxproj.filters @@ -0,0 +1,39 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/CLExpr/clcode.h b/CLExpr/clcode.h new file mode 100644 index 0000000..702d51a --- /dev/null +++ b/CLExpr/clcode.h @@ -0,0 +1,157 @@ +#ifndef __EXPR_COMMON__ +#define __EXPR_COMMON__ + + +const char* common_ocl_functions = " \ +ulong clip_ulong(float x) { \ + return (ulong)round(clamp(x, 0.0f, (float)(ULONG_MAX))); \ +} \ + \ +long clip_long(float x) { \ + return (long)round(clamp(x, (float)(LONG_MIN), (float)(LONG_MAX))); \ +} \ + \ +float interrogation(float x, float y, float z) { \ + return x > 0 ? y : z; \ +} \ + \ +float equal(float x, float y) { \ + return fabs(x - y) < 0.000001f ? 1.0f : -1.0f; \ +} \ + \ +float notEqual(float x, float y) { \ + return fabs(x - y) >= 0.000001f ? 1.0f : -1.0f; \ +} \ + \ +float inferior(float x, float y) { \ + return x <= y ? 1.0f : -1.0f; \ +} \ + \ +float inferiorStrict(float x, float y) { \ + return x < y ? 1.0f : -1.0f; \ +} \ + \ +float superior(float x, float y) { \ + return x >= y ? 1.0f : -1.0f; \ +} \ + \ +float superiorStrict(float x, float y) { \ + return x > y ? 1.0f : -1.0f; \ +} \ + \ +float mt_and(float x, float y) { \ + return ((x > 0) && (y > 0)) ? 1.0f : -1.0f; \ +} \ + \ +float mt_or(float x, float y) { \ + return ((x > 0) || (y > 0)) ? 1.0f : -1.0f; \ +} \ + \ +float mt_andNot(float x, float y) { \ + return ((x > 0) && (y <= 0)) ? 1.0f : -1.0f; \ +} \ + \ +float mt_xor(float x, float y) { \ + return (((x > 0) && (y <= 0)) || ((x <= 0) && (y > 0))) ? 1.0f : -1.0f; \ +} \ + \ +float andUB(float x, float y) { \ + return (float)(clip_ulong(x) & clip_ulong(y)); \ +} \ + \ +float orUB(float x, float y) { \ + return (float)(clip_ulong(x) | clip_ulong(y)); \ +} \ + \ +float xorUB(float x, float y) { \ + return (float)(clip_ulong(x) ^ clip_ulong(y)); \ +} \ + \ +float negateUB(float x) { \ + return (float)(~clip_ulong(x)); \ +} \ + \ +float posshiftUB(float x, float y) { \ + return y >= 0 ? (float)(clip_ulong(x) << clip_long(y)) : (float)(clip_ulong(x) >> clip_long(-y)); \ +} \ + \ +float negshiftUB(float x, float y) { \ + return y >= 0 ? (float)(clip_ulong(x) >> clip_long(y)) : (float)(clip_ulong(x) << clip_long(-y)); \ +} \ + \ +float andSB(float x, float y) { \ + return (float)(clip_long(x) & clip_long(y)); \ +} \ + \ +float orSB(float x, float y) { \ + return (float)(clip_long(x) | clip_long(y)); \ +} \ + \ +float xorSB(float x, float y) { \ + return (float)(clip_long(x) ^ clip_long(y)); \ +} \ + \ +float negateSB(float x) { \ + return (float)(~clip_long(x)); \ +} \ + \ +float posshiftSB(float x, float y) { \ + return y >= 0 ? (float)(clip_long(x) << clip_long(y)) : (float)(clip_long(x) >> clip_long(-y)); \ +} \ + \ +float negshiftSB(float x, float y) { \ + return y >= 0 ? (float)(clip_long(x) >> clip_long(y)) : (float)(clip_long(x) << clip_long(-y)); \ +} \ +\ +"; + + +const char* expr_source = "__kernel void expr(__global uchar *dstp, __global uchar* srcp, int width) { \ + int offset = get_global_id(0); \ + float x = srcp[offset]; \ + dstp[offset] = (uchar)clamp((int)round({{expression}}), 0, 255); \ +}"; + +const char* exprxy_source = "__kernel void expr(__global uchar *dstp, __global uchar* srcp1, __global uchar* srcp2, int width) { \ + int offset = get_global_id(0); \ + float x = srcp1[offset]; \ + float y = srcp2[offset]; \ + dstp[offset] = (uchar)clamp((int)round({{expression}}), 0, 255); \ +}"; + +const char* exprxyz_source = "__kernel void expr(__global uchar *dstp, __global uchar* srcp1, __global uchar* srcp2, __global uchar* srcp3, int width) { \ + int offset = get_global_id(0); \ + float x = srcp1[offset]; \ + float y = srcp2[offset]; \ + float z = srcp3[offset]; \ + dstp[offset] = (uchar)clamp((int)round({{expression}}), 0, 255); \ +}"; + +const char* expr_source_lsb = "__kernel void expr(__global uchar *dstp, __global uchar* srcp, int width, int height) { \ + int offset = get_global_id(0); \ + float x = ((ushort)srcp[offset] << 8) + ((ushort)srcp[offset+width*height]); \ + ushort result = clamp((int)round({{expression}}), 0, 65535); \ + dstp[offset] = (uchar)((result >> 8) & 0xFF); \ + dstp[offset+width*height] = (uchar)(result & 0xFF); \ +}"; + +const char* exprxy_source_lsb = "__kernel void expr(__global uchar *dstp, __global uchar* srcp1, __global uchar* srcp2, int width, int height) { \ + int offset = get_global_id(0); \ + float x = ((ushort)srcp1[offset] << 8); + ((ushort)srcp1[offset+width*height]); \ + float y = ((ushort)srcp2[offset] << 8) + ((ushort)srcp2[offset+width*height]); \ + ushort result = clamp((int)round({{expression}}), 0, 65535); \ + dstp[offset] = (uchar)((result >> 8) & 0xFF); \ + dstp[offset+width*height] = (uchar)(result & 0xFF); \ +}"; + +const char* exprxyz_source_lsb = "__kernel void expr(__global uchar *dstp, __global uchar* srcp1, __global uchar* srcp2, __global uchar* srcp3, int width, int height) { \ + int offset = get_global_id(0); \ + float x = ((ushort)srcp1[offset] << 8) + ((ushort)srcp1[offset+width*height]); \ + float y = ((ushort)srcp2[offset] << 8) + ((ushort)srcp2[offset+width*height]); \ + float z = ((ushort)srcp3[offset] << 8) + ((ushort)srcp3[offset+width*height]); \ + ushort result = clamp((int)round({{expression}}), 0, 65535); \ + dstp[offset] = (uchar)((result >> 8) & 0xFF); \ + dstp[offset+width*height] = (uchar)(result & 0xFF); \ +}"; + +#endif \ No newline at end of file diff --git a/CLExpr/expr.cpp b/CLExpr/expr.cpp new file mode 100644 index 0000000..8c31b2d --- /dev/null +++ b/CLExpr/expr.cpp @@ -0,0 +1,553 @@ +#define NOMINMAX +#define WIN32_LEAN_AND_MEAN +#define NOGDI +#include +#include +#include +#include +#include +#include +#include +#include +#include "parser\parser.h" +#include "clcode.h" + + +//http://www.khronos.org/message_boards/showthread.php/5912-error-to-string +static const char* get_cl_error_string(cl_int error) +{ + switch (error) + { + case CL_SUCCESS: return "Success!"; + case CL_DEVICE_NOT_FOUND: return "Device not found."; + case CL_DEVICE_NOT_AVAILABLE: return "Device not available"; + case CL_COMPILER_NOT_AVAILABLE: return "Compiler not available"; + case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "Memory object allocation failure"; + case CL_OUT_OF_RESOURCES: return "Out of resources"; + case CL_OUT_OF_HOST_MEMORY: return "Out of host memory"; + case CL_PROFILING_INFO_NOT_AVAILABLE: return "Profiling information not available"; + case CL_MEM_COPY_OVERLAP: return "Memory copy overlap"; + case CL_IMAGE_FORMAT_MISMATCH: return "Image format mismatch"; + case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "Image format not supported"; + case CL_BUILD_PROGRAM_FAILURE: return "Program build failure"; + case CL_MAP_FAILURE: return "Map failure"; + case CL_INVALID_VALUE: return "Invalid value"; + case CL_INVALID_DEVICE_TYPE: return "Invalid device type"; + case CL_INVALID_PLATFORM: return "Invalid platform"; + case CL_INVALID_DEVICE: return "Invalid device"; + case CL_INVALID_CONTEXT: return "Invalid context"; + case CL_INVALID_QUEUE_PROPERTIES: return "Invalid queue properties"; + case CL_INVALID_COMMAND_QUEUE: return "Invalid command queue"; + case CL_INVALID_HOST_PTR: return "Invalid host pointer"; + case CL_INVALID_MEM_OBJECT: return "Invalid memory object"; + case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "Invalid image format descriptor"; + case CL_INVALID_IMAGE_SIZE: return "Invalid image size"; + case CL_INVALID_SAMPLER: return "Invalid sampler"; + case CL_INVALID_BINARY: return "Invalid binary"; + case CL_INVALID_BUILD_OPTIONS: return "Invalid build options"; + case CL_INVALID_PROGRAM: return "Invalid program"; + case CL_INVALID_PROGRAM_EXECUTABLE: return "Invalid program executable"; + case CL_INVALID_KERNEL_NAME: return "Invalid kernel name"; + case CL_INVALID_KERNEL_DEFINITION: return "Invalid kernel definition"; + case CL_INVALID_KERNEL: return "Invalid kernel"; + case CL_INVALID_ARG_INDEX: return "Invalid argument index"; + case CL_INVALID_ARG_VALUE: return "Invalid argument value"; + case CL_INVALID_ARG_SIZE: return "Invalid argument size"; + case CL_INVALID_KERNEL_ARGS: return "Invalid kernel arguments"; + case CL_INVALID_WORK_DIMENSION: return "Invalid work dimension"; + case CL_INVALID_WORK_GROUP_SIZE: return "Invalid work group size"; + case CL_INVALID_WORK_ITEM_SIZE: return "Invalid work item size"; + case CL_INVALID_GLOBAL_OFFSET: return "Invalid global offset"; + case CL_INVALID_EVENT_WAIT_LIST: return "Invalid event wait list"; + case CL_INVALID_EVENT: return "Invalid event"; + case CL_INVALID_OPERATION: return "Invalid operation"; + case CL_INVALID_GL_OBJECT: return "Invalid OpenGL object"; + case CL_INVALID_BUFFER_SIZE: return "Invalid buffer size"; + case CL_INVALID_MIP_LEVEL: return "Invalid mip-map level"; + default: return "Unknown"; + } +} + +static inline void check_cl_error(cl_int error, IScriptEnvironment *env) +{ + if (error != CL_SUCCESS) + { + env->ThrowError(get_cl_error_string(error)); + } +} + +static void replace_first(std::string &source, const std::string &what, const std::string &new_value) +{ + size_t f = source.find(what); + source.replace(f, what.length(), new_value); +} + +enum ExprType +{ + EXPR_X = 1, + EXPR_XY = 2, + EXPR_XYZ = 3 +}; + +enum PlaneProcessMode +{ + DO_NOTHING = 1, + COPY_FIRST = 2, + PROCESS = 3, + COPY_SECOND = 4, + COPY_THIRD = 5 +}; + + +static std::string prepare_program(const std::string &expression, ExprType type, bool lsb) +{ + Parser parser = getDefaultParser(); + std::string expr; + switch (type) + { + case EXPR_X: + parser.addSymbol(Symbol::X); + expr = lsb ? expr_source_lsb : expr_source; + break; + case EXPR_XY: + parser.addSymbol(Symbol::X).addSymbol(Symbol::Y); + expr = lsb ? exprxy_source_lsb : exprxy_source; + break; + case EXPR_XYZ: + parser.addSymbol(Symbol::X).addSymbol(Symbol::Y).addSymbol(Symbol::Z); + expr = lsb ? exprxyz_source_lsb : exprxyz_source; + break; + default: + assert(0); + break; + } + + parser.parse(expression, " "); + Context context(parser.getExpression()); + + replace_first(expr, "{{expression}}", context.infix()); + + OutputDebugString(expr.c_str()); + + std::string program(common_ocl_functions); + return program + expr; +} + +struct PlaneData +{ + int mode; + std::string expr; + cl_kernel kernel; + cl_program program; + cl_mem dst_buffer; + cl_mem src_buffers[3]; + cl_command_queue command_queue; + bool own_program; + cl_event src_copy_events[3]; + cl_event kernel_run_event; + + PlaneData() : kernel(nullptr), program(nullptr), own_program(false), dst_buffer(nullptr), kernel_run_event(nullptr) + { + for (int i = 0; i < 3; ++i) + { + src_buffers[i] = nullptr; + src_copy_events[i] = nullptr; + } + } + + ~PlaneData() + { + if (own_program) + { + clReleaseProgram(program); + } + for (int i = 0; i < 3; i++) + { + clReleaseMemObject(src_buffers[i]); + clReleaseEvent(src_copy_events[i]); + } + clReleaseEvent(kernel_run_event); + clReleaseKernel(kernel); + clReleaseMemObject(dst_buffer); + clReleaseCommandQueue(command_queue); + } + +private: + PlaneData(const PlaneData &other); + PlaneData operator=(const PlaneData &other); +}; + +class ClExpr : public GenericVideoFilter +{ +public: + ClExpr(PClip clip1, PClip clip2, PClip clip3, + std::string expr, std::string yexpr, std::string uexpr, std::string vexpr, + int y, int u, int v, std::string chroma, bool lsb, ExprType mode, IScriptEnvironment* env); + + PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env) override; + + + int __stdcall SetCacheHints(int cachehints, int frame_range) override + { + return cachehints == CACHE_GET_MTMODE ? MT_MULTI_INSTANCE : 0; + } + + ~ClExpr() + { + clReleaseContext(context_); + } + +private: + cl_context context_; + PlaneData plane_params_[3]; + PClip clip1_, clip2_, clip3_; + ExprType filter_type_; + bool lsb_; +}; + +static int chroma_to_int(const std::string &chroma) +{ + if (chroma == "process") + return PROCESS; + else if (chroma == "copy" || chroma == "copy first") + return COPY_FIRST; + else if (chroma == "copy second") + return COPY_SECOND; + else if (chroma == "copy third") + return COPY_THIRD; + else + return -atoi(chroma.c_str()); +} + +static void to_lower(std::string &data) +{ + std::transform(data.begin(), data.end(), data.begin(), tolower); +} + +ClExpr::ClExpr(PClip clip1, PClip clip2, PClip clip3, + std::string expr, std::string yexpr, std::string uexpr, std::string vexpr, + int y, int u, int v, std::string chroma, bool lsb, ExprType filter_type, IScriptEnvironment* env) + : GenericVideoFilter(clip1), context_(nullptr), + clip1_(clip1), clip2_(clip2), clip3_(clip3), filter_type_(filter_type), lsb_(lsb) +{ + if (!vi.IsPlanar()) + { + env->ThrowError("cl_expr: only planar color formats supported"); + } + if (!chroma.empty()) + { + to_lower(chroma); + u = v = chroma_to_int(chroma); + } + plane_params_[0].mode = y; + plane_params_[1].mode = u; + plane_params_[2].mode = v; + + plane_params_[0].expr = yexpr.empty() ? expr : yexpr; + plane_params_[1].expr = uexpr.empty() ? expr : uexpr; + plane_params_[2].expr = vexpr.empty() ? expr : vexpr; + + for (int i = 0; i < 3; ++i) + { + to_lower(plane_params_[i].expr); + } + + //init ocl + cl_platform_id platform; + cl_int error = clGetPlatformIDs(1, &platform, NULL); + check_cl_error(error, env); + + cl_device_id device; + check_cl_error(clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL), env); + + cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; + context_ = clCreateContext(cps, 1, &device, NULL, NULL, &error); + check_cl_error(error, env); + + const static int planes[] = { PLANAR_Y, PLANAR_U, PLANAR_V }; + int planes_count = (vi.IsPlanar() && !vi.IsY8()) ? 3 : 1; + + for (int i = 0; i < planes_count; i++) + { + auto ¤t = plane_params_[i]; + if (current.mode != PROCESS) + { + continue; + } + if (current.expr.empty() || current.expr == "x") + { + current.mode = COPY_FIRST; + continue; + } + + int width = vi.width >> vi.GetPlaneWidthSubsampling(planes[i]); + int height = vi.height >> vi.GetPlaneHeightSubsampling(planes[i]); + if (lsb && ((height % 2) != 0)) + { + env->ThrowError("cl_expr: height of all processed planes with lsb=true must be mod2"); + } + current.command_queue = clCreateCommandQueue(context_, device, NULL, &error); + check_cl_error(error, env); + + current.dst_buffer = clCreateBuffer(context_, CL_MEM_WRITE_ONLY, width * height, NULL, &error); + check_cl_error(error, env); + + for (int j = 0; j < filter_type; j++) + { + current.src_buffers[j] = clCreateBuffer(context_, CL_MEM_READ_ONLY, width * height, NULL, &error); + check_cl_error(error, env); + } + + for (int prev = 0; prev < i; prev++) + { + if (current.expr == plane_params_[prev].expr) + { + //some other plane uses the same expression, reuse the program + current.program = plane_params_[prev].program; + break; + } + } + + if (current.program == nullptr) + { + //build new program for this plane to own + auto source = prepare_program(current.expr, filter_type, lsb_); + auto cstr = source.c_str(); + + current.own_program = true; + current.program = clCreateProgramWithSource(context_, 1, (const char**)&cstr, NULL, &error); + check_cl_error(error, env); + + error = clBuildProgram(current.program, 0, NULL, NULL, NULL, NULL); + if (error != CL_SUCCESS) + { + size_t len; + const int buffer_size = 1024*100; + char buffer[buffer_size]; + memset(buffer, 0, buffer_size); + clGetProgramBuildInfo(current.program, device, CL_PROGRAM_BUILD_LOG, buffer_size*sizeof(char), buffer, &len); + OutputDebugString(buffer); + + env->ThrowError(get_cl_error_string(error)); + } + } + + current.kernel = clCreateKernel(current.program, "expr", &error); + check_cl_error(error, env); + } +} + +static void copy_source_buffers(PlaneData &pd, const std::vector> planes, + int width, int height, IScriptEnvironment* env) +{ + size_t offsets[] = { 0, 0, 0 }; + size_t dimensions[] = { width, height, 1 }; + for (size_t i = 0; i < planes.size(); ++i) + { + check_cl_error(clEnqueueWriteBufferRect(pd.command_queue, + pd.src_buffers[i], CL_FALSE, offsets, offsets, dimensions, + 0, 0, planes[i].second, 0, planes[i].first, + 0, NULL, &pd.src_copy_events[i]), env); + } +} + +static void run_kernel(PlaneData &pd, size_t src_planes_count, + int width, int height, bool lsb, IScriptEnvironment* env) +{ + int real_height = lsb ? height / 2 : height; + size_t global_work_size[] = { width * real_height }; + int arg_idx = 0; + + check_cl_error(clSetKernelArg(pd.kernel, arg_idx++, sizeof(cl_mem), &pd.dst_buffer), env); + for (size_t i = 0; i < src_planes_count; ++i) + { + check_cl_error(clSetKernelArg(pd.kernel, arg_idx++, sizeof(cl_mem), &pd.src_buffers[i]), env); + } + check_cl_error(clSetKernelArg(pd.kernel, arg_idx++, sizeof(int), &width), env); + if (lsb) + { + check_cl_error(clSetKernelArg(pd.kernel, arg_idx++, sizeof(int), &real_height), env); + } + + check_cl_error( + clEnqueueNDRangeKernel(pd.command_queue, pd.kernel, 1, NULL, global_work_size, NULL, + src_planes_count, pd.src_copy_events, &pd.kernel_run_event) + , env); +} + +static void copy_dst_buffer(const PlaneData &pd, + uint8_t* dstp, int dst_pitch, + int width, int height, IScriptEnvironment* env) +{ + size_t offsets[] = { 0, 0, 0 }; + size_t dimensions[] = { width, height, 1 }; + + cl_int error = clEnqueueReadBufferRect(pd.command_queue, pd.dst_buffer, CL_FALSE, + offsets, offsets, dimensions, 0, 0, dst_pitch, 0, dstp, 1, &pd.kernel_run_event, NULL); + check_cl_error(error, env); +} + +PVideoFrame ClExpr::GetFrame(int n, IScriptEnvironment* env) +{ + auto dst = env->NewVideoFrame(vi); + auto src1 = clip1_->GetFrame(n, env); + auto src2 = clip2_ == nullptr ? nullptr : clip2_->GetFrame(n, env); + auto src3 = clip3_ == nullptr ? nullptr : clip3_->GetFrame(n, env); + + const static int planes[] = { PLANAR_Y, PLANAR_U, PLANAR_V }; + size_t planes_count = (vi.IsPlanar() && !vi.IsY8()) ? 3 : 1; + + for (size_t i = 0; i < planes_count; i++) + { + auto& params = plane_params_[i]; + int plane = planes[i]; + if (params.mode == DO_NOTHING) + { + } + else if (params.mode == COPY_FIRST || + (params.mode == COPY_SECOND && clip2_ == nullptr) || + (params.mode == COPY_THIRD && clip3_ == nullptr)) + { + env->BitBlt(dst->GetWritePtr(plane), dst->GetPitch(plane), + src1->GetReadPtr(plane), src1->GetPitch(plane), + src1->GetRowSize(plane), src1->GetHeight(plane)); + } + else if (params.mode == COPY_SECOND) + { + env->BitBlt(dst->GetWritePtr(plane), dst->GetPitch(plane), + src2->GetReadPtr(plane), src2->GetPitch(plane), + src2->GetRowSize(plane), src2->GetHeight(plane)); + } + else if (params.mode == COPY_THIRD) + { + env->BitBlt(dst->GetWritePtr(plane), dst->GetPitch(plane), + src3->GetReadPtr(plane), src3->GetPitch(plane), + src3->GetRowSize(plane), src3->GetHeight(plane)); + } + else if (params.mode <= 0) + { + memset(dst->GetWritePtr(plane), -params.mode, dst->GetPitch(plane) * dst->GetHeight(plane)); + } + else if (params.mode == PROCESS) + { + //copy planes to device + std::vector> src_planes; + + src_planes.emplace_back(src1->GetReadPtr(plane), src1->GetPitch(plane)); + if (src2 != nullptr) + { + src_planes.emplace_back(src2->GetReadPtr(plane), src2->GetPitch(plane)); + } + if (src3 != nullptr) + { + src_planes.emplace_back(src3->GetReadPtr(plane), src3->GetPitch(plane)); + } + copy_source_buffers(params, src_planes, dst->GetRowSize(plane), dst->GetHeight(plane), env); + } + else + { + env->ThrowError("cl_expr: invalid mode. This is a bug"); + } + } + + //process kernels + for (size_t i = 0; i < planes_count; i++) + { + if (plane_params_[i].mode == PROCESS) + { + run_kernel(plane_params_[i], filter_type_, dst->GetRowSize(planes[i]), dst->GetHeight(planes[i]), lsb_, env); + } + } + + //copy results back + for (size_t i = 0; i < planes_count; i++) + { + if (plane_params_[i].mode == PROCESS) + { + copy_dst_buffer(plane_params_[i], dst->GetWritePtr(planes[i]), dst->GetPitch(planes[i]), dst->GetRowSize(planes[i]), dst->GetHeight(planes[i]), env); + } + } + + //wair for everything to complete + for (size_t i = 0; i < planes_count; i++) + { + if (plane_params_[i].mode == PROCESS) + { + clFinish(plane_params_[i].command_queue); + } + } + + return dst; +} + + +AVSValue __cdecl create_expr(AVSValue args, void*, IScriptEnvironment* env) +{ + enum { CLIP, EXPR, YEXPR, UEXPR, VEXPR, Y, U, V, CHROMA, LSB }; + return new ClExpr( + args[CLIP].AsClip(), + nullptr, + nullptr, + args[EXPR].AsString(""), + args[YEXPR].AsString(""), + args[UEXPR].AsString(""), + args[VEXPR].AsString(""), + args[Y].AsInt(3), + args[U].AsInt(2), + args[V].AsInt(2), + args[CHROMA].AsString(""), + args[LSB].AsBool(false), + EXPR_X, + env); +} + +AVSValue __cdecl create_exprxy(AVSValue args, void*, IScriptEnvironment* env) +{ + enum { CLIP, CLIP2, EXPR, YEXPR, UEXPR, VEXPR, Y, U, V, CHROMA, LSB }; + return new ClExpr( + args[CLIP].AsClip(), + args[CLIP2].AsClip(), + nullptr, + args[EXPR].AsString(""), + args[YEXPR].AsString(""), + args[UEXPR].AsString(""), + args[VEXPR].AsString(""), + args[Y].AsInt(3), + args[U].AsInt(2), + args[V].AsInt(2), + args[CHROMA].AsString(""), + args[LSB].AsBool(false), + EXPR_XY, + env); +} + +AVSValue __cdecl create_exprxyz(AVSValue args, void*, IScriptEnvironment* env) +{ + enum { CLIP, CLIP2, CLIP3, EXPR, YEXPR, UEXPR, VEXPR, Y, U, V, CHROMA, LSB }; + return new ClExpr(args[CLIP].AsClip(), + args[CLIP2].AsClip(), + args[CLIP3].AsClip(), + args[EXPR].AsString(""), + args[YEXPR].AsString(""), + args[UEXPR].AsString(""), + args[VEXPR].AsString(""), + args[Y].AsInt(3), + args[U].AsInt(2), + args[V].AsInt(2), + args[CHROMA].AsString(""), + args[LSB].AsBool(false), + EXPR_XYZ, + env); +} + +const AVS_Linkage *AVS_linkage = nullptr; + +extern "C" __declspec(dllexport) const char* __stdcall AvisynthPluginInit3(IScriptEnvironment* env, const AVS_Linkage* const vectors) +{ + AVS_linkage = vectors; + + env->AddFunction("cl_expr", "c[expr]s[yExpr]s[uExpr]s[vExpr]s[Y]i[U]i[V]i[chroma]s[lsb]b", create_expr, 0); + env->AddFunction("cl_exprxy", "cc[expr]s[yExpr]s[uExpr]s[vExpr]s[Y]i[U]i[V]i[chroma]s[lsb]b", create_exprxy, 0); + env->AddFunction("cl_exprxyz", "ccc[expr]s[yExpr]s[uExpr]s[vExpr]s[Y]i[U]i[V]i[chroma]s[lsb]b", create_exprxyz, 0); + return "I'd blame NVIDIA if I were you"; +} diff --git a/CLExpr/parser/parser.cpp b/CLExpr/parser/parser.cpp new file mode 100644 index 0000000..4e7463c --- /dev/null +++ b/CLExpr/parser/parser.cpp @@ -0,0 +1,80 @@ +#include "parser.h" + + +Parser::Parser() +{ +} + +Parser &Parser::addSymbol(const Symbol &symbol) +{ + symbols.push_back(symbol); + + return *this; +} + +const Symbol *Parser::findSymbol(const std::string &value) const +{ + for (auto &symbol: symbols) { + if (symbol.value == value || symbol.value2 == value) { + return &symbol; + } + } + + return nullptr; +} + +Symbol Parser::stringToSymbol(const std::string &value) const +{ + auto found = findSymbol(value); + return found == nullptr + ? Symbol(value, Symbol::NUMBER, 0, "") + : *found; +} + +Parser &Parser::parse(const std::string &parsed_string, const std::string &separators) +{ + this->parsed_string = parsed_string; + + size_t nPos = parsed_string.find_first_not_of(separators, 0); + size_t nEndPos; + + elements.clear(); + + while (nPos != std::string::npos && (nEndPos = parsed_string.find_first_of(separators, nPos)) != std::string::npos) + { + elements.push_back(stringToSymbol(parsed_string.substr(nPos, nEndPos - nPos))); + nPos = parsed_string.find_first_not_of(separators, nEndPos); + } + + if (nPos != std::string::npos) + elements.push_back(stringToSymbol(parsed_string.substr(nPos))); + + return *this; +} + + +Parser getDefaultParser() +{ + Parser parser; + + /* arithmetic operators */ + parser.addSymbol(Symbol::Addition).addSymbol(Symbol::Division).addSymbol(Symbol::Multiplication).addSymbol(Symbol::Substraction).addSymbol(Symbol::Modulo).addSymbol(Symbol::Power); + /* comparison operators */ + parser.addSymbol(Symbol::Equal).addSymbol(Symbol::Equal2).addSymbol(Symbol::NotEqual).addSymbol(Symbol::Inferior).addSymbol(Symbol::InferiorStrict).addSymbol(Symbol::Superior).addSymbol(Symbol::SuperiorStrict); + /* logic operators */ + parser.addSymbol(Symbol::And).addSymbol(Symbol::Or).addSymbol(Symbol::AndNot).addSymbol(Symbol::Xor); + /* unsigned binary operators */ + parser.addSymbol(Symbol::AndUB).addSymbol(Symbol::OrUB).addSymbol(Symbol::XorUB).addSymbol(Symbol::NegateUB).addSymbol(Symbol::PosShiftUB).addSymbol(Symbol::NegShiftUB); + /* signed binary operators */ + parser.addSymbol(Symbol::AndSB).addSymbol(Symbol::OrSB).addSymbol(Symbol::XorSB).addSymbol(Symbol::NegateSB).addSymbol(Symbol::PosShiftSB).addSymbol(Symbol::NegShiftSB); + /* ternary operator */ + parser.addSymbol(Symbol::Interrogation); + /* function */ + parser.addSymbol(Symbol::Abs).addSymbol(Symbol::Acos).addSymbol(Symbol::Asin).addSymbol(Symbol::Atan).addSymbol(Symbol::Cos).addSymbol(Symbol::Exp).addSymbol(Symbol::Log).addSymbol(Symbol::Sin).addSymbol(Symbol::Tan).addSymbol(Symbol::Min).addSymbol(Symbol::Max).addSymbol(Symbol::Clip); + /* rounding */ + parser.addSymbol(Symbol::Round).addSymbol(Symbol::Floor).addSymbol(Symbol::Trunc).addSymbol(Symbol::Ceil); + /* number */ + parser.addSymbol(Symbol::Pi); + + return parser; +} \ No newline at end of file diff --git a/CLExpr/parser/parser.h b/CLExpr/parser/parser.h new file mode 100644 index 0000000..2d1d8bb --- /dev/null +++ b/CLExpr/parser/parser.h @@ -0,0 +1,29 @@ +#ifndef __Mt_Parser_H__ +#define __Mt_Parser_H__ + +#include +#include "symbol.h" +#include + + + +class Parser +{ + std::string parsed_string; + std::deque elements; + std::deque symbols; + const Symbol *findSymbol(const std::string &value) const; + Symbol stringToSymbol(const std::string &value) const; + +public: + Parser &parse(const std::string &parsed_string, const std::string &separators); + Parser(); + Parser &addSymbol(const Symbol &symbol); + std::deque &getExpression() { return elements; } + +}; +Parser getDefaultParser(); + + + +#endif diff --git a/CLExpr/parser/symbol.cpp b/CLExpr/parser/symbol.cpp new file mode 100644 index 0000000..8a9eb4a --- /dev/null +++ b/CLExpr/parser/symbol.cpp @@ -0,0 +1,143 @@ +#include "symbol.h" +#include +#include + +Symbol Symbol::Addition ("+", OPERATOR, 2, "+"); +Symbol Symbol::Multiplication ("*", OPERATOR, 2, "*"); +Symbol Symbol::Division ("/", OPERATOR, 2, "/"); +Symbol Symbol::Substraction ("-", OPERATOR, 2, "-"); +Symbol Symbol::Power ("^", FUNCTION, 2, "pow"); + +Symbol Symbol::Modulo ("%", FUNCTION, 2, "fmod"); +Symbol Symbol::Interrogation ("?", FUNCTION, 3, "interrogation"); +Symbol Symbol::Equal ("==", FUNCTION, 2, "equal"); +Symbol Symbol::Equal2 ("=", FUNCTION, 2, "equal"); +Symbol Symbol::NotEqual ("!=", FUNCTION, 2, "notEqual"); +Symbol Symbol::Inferior ("<=", FUNCTION, 2, "inferior"); +Symbol Symbol::InferiorStrict ("<", FUNCTION, 2, "inferiorStrict"); +Symbol Symbol::Superior (">=", FUNCTION, 2, "superior"); +Symbol Symbol::SuperiorStrict (">", FUNCTION, 2, "superiorStrict"); +Symbol Symbol::And ("&", FUNCTION, 2, "mt_and"); +Symbol Symbol::Or ("|", FUNCTION, 2, "mt_or"); +Symbol Symbol::AndNot ("&!", FUNCTION, 2, "mt_andNot"); +Symbol Symbol::Xor ("°", "@", FUNCTION, 2, "mt_xor"); +Symbol Symbol::AndUB ("&u", FUNCTION, 2, "andUB"); +Symbol Symbol::OrUB ("|u", FUNCTION, 2, "orUB"); +Symbol Symbol::XorUB ("°u", "@u", FUNCTION, 2, "xorUB"); +Symbol Symbol::NegateUB ("~u", FUNCTION, 1, "negateUB"); +Symbol Symbol::PosShiftUB ("<<", "<>", ">>u", FUNCTION, 2, "negshiftUB"); +Symbol Symbol::AndSB ("&s", FUNCTION, 2, "andSB"); +Symbol Symbol::OrSB ("|s", FUNCTION, 2, "orSB"); +Symbol Symbol::XorSB ("°s", "@s", FUNCTION, 2, "xorSB"); +Symbol Symbol::NegateSB ("~s", FUNCTION, 1, "negateSB"); +Symbol Symbol::PosShiftSB ("<>s", FUNCTION, 2, "negshiftSB"); + +Symbol Symbol::Pi ("pi", 3.1415927f, NUMBER, 0, ""); +Symbol Symbol::X ("x", VARIABLE_X, 0, ""); +Symbol Symbol::Y ("y", VARIABLE_Y, 0, ""); +Symbol Symbol::Z ("z", VARIABLE_Z, 0, ""); + +Symbol Symbol::Cos ("cos", FUNCTION, 1, "cos"); +Symbol Symbol::Sin ("sin", FUNCTION, 1, "sin"); +Symbol Symbol::Tan ("tan", FUNCTION, 1, "tan"); +Symbol Symbol::Log ("log", FUNCTION, 1, "log"); +Symbol Symbol::Exp ("exp", FUNCTION, 1, "exp"); +Symbol Symbol::Abs ("abs", FUNCTION, 1, "fabs"); +Symbol Symbol::Atan ("atan", FUNCTION, 1, "atan"); +Symbol Symbol::Acos ("acos", FUNCTION, 1, "acos"); +Symbol Symbol::Asin ("asin", FUNCTION, 1, "asin"); +Symbol Symbol::Round ("round", FUNCTION, 1, "round"); +Symbol Symbol::Clip ("clip", FUNCTION, 3, "clamp"); +Symbol Symbol::Min ("min", FUNCTION, 2, "min"); +Symbol Symbol::Max ("max", FUNCTION, 2, "max"); +Symbol Symbol::Ceil ("ceil", FUNCTION, 1, "ceil"); +Symbol Symbol::Floor ("floor", FUNCTION, 1, "floor"); +Symbol Symbol::Trunc ("trunc", FUNCTION, 1, "trunc"); + +Symbol::Symbol() : +type(UNDEFINED), value(""), value2("") +{ +} + +Symbol::Symbol(std::string value, Type type, int nParameter, std::string op) : +type(type), value(value), value2(""), nParameter(nParameter), code(op) +{ + if (type == NUMBER) + { + dValue = (float)atof(value.c_str()); + } +} + +Symbol::Symbol(std::string value, std::string value2, Type type, int nParameter, std::string op) : +type(type), value(value), value2(value2), nParameter(nParameter), code(op) +{ + if (type == NUMBER) + { + dValue = (float)atof(value.c_str()); + } +} + +Symbol::Symbol(std::string value, float dValue, Type type, int nParameter, std::string op) : +type(type), value(value), value2(""), nParameter(nParameter), dValue(dValue), code(op) +{ +} + + +Context::Context(const std::deque &expression) +{ + nPos = -1; + nSymbols = expression.size(); + pSymbols = new Symbol[nSymbols]; + + auto it = expression.begin(); + + for ( int i = 0; i < nSymbols; i++, it++ ) + pSymbols[i] = *it; +} + +Context::~Context() +{ + delete[] pSymbols; +} + + +std::string Context::rec_infix() +{ + const Symbol &s = pSymbols[--nPos]; + + switch ( s.type ) + { + case Symbol::VARIABLE_X: + case Symbol::VARIABLE_Y: + case Symbol::VARIABLE_Z: + case Symbol::NUMBER: return s.value; + case Symbol::FUNCTION: + if (s.nParameter == 1) { + return s.code + "(" + rec_infix() + ")"; + } else if (s.nParameter == 2) { + auto op2 = rec_infix(); + return s.code + "(" + rec_infix() + "," + op2 + ")"; + } else { + auto op3 = rec_infix(); + auto op2 = rec_infix(); + return s.code + "(" + rec_infix() + "," + op2 + "," + op3 + ")"; + } + case Symbol::OPERATOR: + { + auto op2 = rec_infix(); + return "(" + rec_infix() + s.code + op2 + ")"; + } + default: + assert(0); + return ""; + } +} + +std::string Context::infix() +{ + nPos = nSymbols; + + return rec_infix(); +} diff --git a/CLExpr/parser/symbol.h b/CLExpr/parser/symbol.h new file mode 100644 index 0000000..4e7ba33 --- /dev/null +++ b/CLExpr/parser/symbol.h @@ -0,0 +1,110 @@ +#ifndef __Mt_Symbol_H__ +#define __Mt_Symbol_H__ + +#include +#include +#include + + +class Symbol { +public: + typedef enum { + NUMBER, + OPERATOR, + FUNCTION, + TERNARY, + VARIABLE_X, + VARIABLE_Y, + VARIABLE_Z, + + UNDEFINED + + } Type; + +public: + + Type type; + std::string value; + std::string value2; + int nParameter; + float dValue; + std::string code; + +private: + +private: +public: + + Symbol(); + Symbol(std::string value, Type type, int nParameter, std::string op); + Symbol(std::string value, std::string value2, Type type, int nParameter, std::string op); + Symbol(std::string value, float dValue, Type type, int nParameter, std::string op); + + static Symbol Addition; + static Symbol Multiplication; + static Symbol Division; + static Symbol Substraction; + static Symbol Power; + static Symbol Modulo; + static Symbol Interrogation; + static Symbol Equal; + static Symbol Equal2; + static Symbol NotEqual; + static Symbol Inferior; + static Symbol InferiorStrict; + static Symbol Superior; + static Symbol SuperiorStrict; + static Symbol And; + static Symbol Or; + static Symbol AndNot; + static Symbol Xor; + static Symbol AndUB; + static Symbol OrUB; + static Symbol XorUB; + static Symbol NegateUB; + static Symbol PosShiftUB; + static Symbol NegShiftUB; + static Symbol AndSB; + static Symbol OrSB; + static Symbol XorSB; + static Symbol NegateSB; + static Symbol PosShiftSB; + static Symbol NegShiftSB; + static Symbol Pi; + static Symbol X; + static Symbol Y; + static Symbol Z; + static Symbol Cos; + static Symbol Sin; + static Symbol Tan; + static Symbol Log; + static Symbol Abs; + static Symbol Exp; + static Symbol Acos; + static Symbol Atan; + static Symbol Asin; + static Symbol Round; + static Symbol Clip; + static Symbol Min; + static Symbol Max; + static Symbol Ceil; + static Symbol Floor; + static Symbol Trunc; +}; + +class Context { + Symbol *pSymbols; + int nSymbols; + int nPos; + + std::string rec_infix(); +public: + + Context(const std::deque &expression); + ~Context(); + std::string infix(); +}; + + + +#endif diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..213b94b --- /dev/null +++ b/LICENSE @@ -0,0 +1,23 @@ +Copyright (c) 2014 Victor Efimov +Code in subfolder "parser" is mostly taken from masktools2, originally written by Mathieu Monnier + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..9a51add --- /dev/null +++ b/README.md @@ -0,0 +1,11 @@ +## CLExpr ## + +AviSynth Expr filter implemented in OpenCL. + +Provides three functions: + +* cl_expr - analogue of mt_lut +* cl_exprxy - analogue of mt_lutxy +* cl_exprxyz - analogue of mt_lutxyz + +All expressions are calculated in runtime. 16-bit stacked clips are supported with the *lsb* parameter. Full set of masktools lut operators and functions is supported. \ No newline at end of file