Skip to content

Commit 20c98f6

Browse files
committedMay 25, 2016
avisynth: almost rewrite.
1 parent 2ec6679 commit 20c98f6

14 files changed

+1734
-1924
lines changed
 

‎.gitignore

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
*.dll
2+
*.exe
3+
*.sln
4+
*.db
5+
*.opendb
6+
*.filters
7+
*.user
8+
avisynth/vs2015/.vs/*
9+
avisynth/vs2015/Debug/*
10+
avisynth/vs2015/Release/*
11+
avisynth/vs2015/x64/*
12+
avisynth/archives/*

‎avisynth/CombMask.cpp

-820
This file was deleted.

‎avisynth/CombMask.sln

-20
This file was deleted.

‎avisynth/CombMask.vcxproj.filters

-27
This file was deleted.

‎avisynth/CombMask.vcxproj.user

-3
This file was deleted.

‎avisynth/avisynth.h

-1,026
This file was deleted.

‎avisynth/readme.txt

+83-17
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
CombMask - Combmask create filter for Avisynth2.6x
1+
CombMask - Combmask create filter for Avisynth2.6x/Avisynth+
22

33

44
description:
@@ -18,50 +18,116 @@ description:
1818

1919

2020
syntax:
21-
CombMask(clip, int cthresh, int mthresh, bool chroma, bool sse2)
21+
CombMask(clip, int "cthresh", int "mthresh", bool "chroma", bool "expand",
22+
int "metric", int opt)
2223

23-
cthresh(0 to 255, default is 6):
24+
cthresh:
2425
spatial combing threshold.
26+
0 to 255, default is 6 (metric=0)
27+
0 to 65025, default is 10 (metric=1)
2528

26-
mthresh(0 to 255, default is 9):
29+
mthresh:
2730
motion adaptive threshold.
31+
0 to 255, default is 9.
2832

29-
chroma(default is true):
33+
chroma:
3034
Whether processing is performed to UV planes or not.
35+
default is true.
3136

32-
sse2(default true):
33-
enable SSE2 intrinsic code(faster).
37+
expand:
38+
When set this to true, left and right pixels of combed pixel also
39+
assumed to combed.
40+
default is true.
3441

42+
metric:
43+
Sets which spatial combing metric is used to detect combed pixels.
44+
Possible options:
3545

36-
MaskedMerge(clip base, clip alt, clip mask, int MI, bool chroma, bool sse2)
46+
Assume 5 neighboring pixels (a,b,c,d,e) positioned vertically.
47+
48+
a
49+
b
50+
c
51+
d
52+
e
53+
54+
0: d1 = c - b;
55+
d2 = c - d;
56+
if ((d1 > cthresh && d2 > cthresh) || (d1 < -cthresh && d2 < -cthresh))
57+
{
58+
if (abs(a+4*c+e-3*(b+d)) > cthresh*6) it's combed;
59+
}
60+
61+
1: val = (b - c) * (d - c);
62+
if (val > cthresh) it's combed;
63+
64+
default is 0.
65+
66+
opt:
67+
specify which CPU optimization are used.
68+
0 - Use C++ routine.
69+
1 - Use SSE2 routin if possible. When SSE2 can't be used, fallback to 0.
70+
others(default) - Use AVX2 routine if possible.
71+
When AVX2 can't be used, fallback to 1.
72+
73+
74+
MaskedMerge(clip base, clip alt, clip mask, int "MI", int "blockx", int "blocky",
75+
bool "chroma", int opt)
3776

3877
base: base clip.
3978

4079
alt: alternate clip which will be merged to base.
4180

4281
mask: mask clip.
4382

44-
MI(0 to 128, default is 40):
45-
The # of combed pixels inside any of 8x16 size blocks on the Y-plane
83+
MI(0 to blockx*blocky , default is 80):
84+
The # of combed pixels inside any of blockx * blocky size blocks on the Y-plane
4685
for the frame to be detected as combed.
4786
if the frame is not combed, merge process will be skipped.
4887

49-
chroma(default is true):
88+
blockx:
89+
Sets the x-axis size of the window used during combed frame detection. This has
90+
to do with the size of the area in which MI number of pixels are required to be
91+
detected as combed for a frame to be declared combed.
92+
Possible values are 8, 16(default) or 32.
93+
94+
blockx:
95+
Sets the y-axis size of the window used during combed frame detection. This has
96+
to do with the size of the area in which MI number of pixels are required to be
97+
detected as combed for a frame to be declared combed.
98+
Possible values are 8, 16(default) or 32.
99+
100+
chroma:
50101
Whether processing is performed to UV planes or not.
102+
Default is true.
51103

52-
sse2(default true):
104+
opt:
53105
same as CombMask.
54106

55107

56-
IsCombed(clip, int cthresh, int mthresh,int MI, bool sse2)
108+
IsCombed(clip, int "cthresh", int "mthresh",int "MI", int "blockx", int "blocky",
109+
int "metric", int "opt")
57110

58111
cthresh: Same as CombMask.
59112

60113
mthresh: Same as CombMask.
61114

62115
MI: Same as MaskedMerge.
63116

64-
sse2: same as CombMask.
117+
blockx: Same as MaskedMerge.
118+
119+
blockx: Same as MaskedMerge.
120+
121+
metric: Same as CombMask.
122+
123+
opt: same as CombMask.
124+
125+
126+
note:
127+
128+
- CombMask_avx2.dll is compiled with /arch:AVX2.
129+
- On Avisynth2.6, AVX2 can not to be enabled even if you use CombMask_avx2.dll.
130+
- On Avisynth+MT, CombMask and MaskedMerge are set as MT_NICE_FILTER automatically.
65131

66132

67133
usage:
@@ -84,9 +150,9 @@ usage:
84150

85151
reqirement:
86152

87-
- Avisynth2.6alpha4 or later
88-
- WindowsXPsp3 / Vista / 7 / 8
89-
- Microsoft Visual C++ 2010 Redistributable Package
153+
- Avisynth2.60 or later / Avisynth+ r1578 or greater.
154+
- Windows Vista sp2 / 7 sp1 / 8.1 / 10.
155+
- Microsoft Visual C++ 2015 Redistributable Package
90156
- SSE2 capable CPU
91157

92158

‎avisynth/src/CombMask.cpp

+482
Large diffs are not rendered by default.

‎avisynth/src/CombMask.h

+127
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
#ifndef COMB_MASK_H
2+
#define COMB_MASK_H
3+
4+
#include <stdexcept>
5+
#include <malloc.h>
6+
#define WIN32_LEAN_AND_MEAN
7+
#define VC_EXTRALEAN
8+
#define NOMINMAX
9+
#define NOGDI
10+
#include <windows.h>
11+
#include <avisynth.h>
12+
13+
#define CMASK_VERSION "1.0.0"
14+
15+
16+
typedef IScriptEnvironment ise_t;
17+
18+
19+
enum arch_t {
20+
NO_SIMD = 0,
21+
USE_SSE2 = 1,
22+
USE_AVX2 = 2,
23+
};
24+
25+
26+
class GVFmod : public GenericVideoFilter {
27+
protected:
28+
bool isPlus;
29+
int numPlanes;
30+
size_t align;
31+
GVFmod(PClip c, bool chroma, arch_t a, bool ip) :
32+
GenericVideoFilter(c), align(a == USE_AVX2 ? 32 : 16), isPlus(ip)
33+
{
34+
numPlanes = vi.IsY8() || !chroma ? 1 : 3;
35+
}
36+
};
37+
38+
39+
class CombMask : public GVFmod {
40+
int cthresh;
41+
int mthresh;
42+
bool expand;
43+
size_t buffPitch;
44+
45+
void (__stdcall *writeCombMask)(
46+
uint8_t* dstp, const uint8_t* srcp, const int dpitch, const int cpitch,
47+
const int cthresh, const int width, const int height);
48+
49+
void (__stdcall *writeMotionMask)(
50+
uint8_t* tmpp, uint8_t* dstp, const uint8_t* srcp, const uint8_t* prevp,
51+
const int tpitch, const int dpitch, const int spitch, const int ppitch,
52+
const int mthresh, const int width, const int height);
53+
54+
void (__stdcall *andMasks)(
55+
uint8_t* dstp, const uint8_t* altp, const int dpitch, const int apitch,
56+
const int width, const int height);
57+
58+
void (__stdcall *expandMask)(
59+
uint8_t* dstp, uint8_t* srcp, const int dpitch, const int spitch,
60+
const int width, const int height);
61+
62+
public:
63+
CombMask(PClip c, int cth, int mth, bool chroma, arch_t arch, bool expand,
64+
int metric, bool is_avsplus);
65+
~CombMask() {}
66+
PVideoFrame __stdcall GetFrame(int n, ise_t* env);
67+
};
68+
69+
70+
typedef bool (__stdcall *check_combed_t)(
71+
PVideoFrame& cmask, int mi, int blockx, int blocky, bool is_avsplus,
72+
ise_t* env);
73+
74+
75+
class MaskedMerge : public GVFmod {
76+
PClip altc;
77+
PClip maskc;
78+
int mi;
79+
int blockx;
80+
int blocky;
81+
82+
check_combed_t checkCombed;
83+
84+
void (__stdcall *mergeFrames)(
85+
int mum_planes, PVideoFrame& src, PVideoFrame& alt, PVideoFrame& mask,
86+
PVideoFrame& dst);
87+
88+
public:
89+
MaskedMerge(PClip c, PClip a, PClip m, int mi, int blockx, int blocky,
90+
bool chroma, arch_t arch, bool is_avsplus);
91+
~MaskedMerge() {}
92+
PVideoFrame __stdcall GetFrame(int n, ise_t* env);
93+
};
94+
95+
96+
check_combed_t get_check_combed(arch_t arch);
97+
98+
99+
static inline void validate(bool cond, const char* msg)
100+
{
101+
if (cond) throw std::runtime_error(msg);
102+
}
103+
104+
105+
static inline void*
106+
alloc_buffer(size_t size, size_t align, bool is_avsplus, ise_t* env)
107+
{
108+
if (is_avsplus) {
109+
return static_cast<IScriptEnvironment2*>(
110+
env)->Allocate(size, align, AVS_POOLED_ALLOC);
111+
}
112+
return _aligned_malloc(size, align);
113+
}
114+
115+
116+
static inline void
117+
free_buffer(void* buff, bool is_avsplus, ise_t* env)
118+
{
119+
if (is_avsplus) {
120+
static_cast<IScriptEnvironment2*>(env)->Free(buff);
121+
return;
122+
}
123+
_aligned_free(buff);
124+
}
125+
126+
#endif
127+

‎avisynth/src/MaskedMerge.cpp

+250
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
#include "CombMask.h"
2+
#include "simd.h"
3+
4+
5+
template <typename V>
6+
static bool __stdcall
7+
check_combed_simd(PVideoFrame& cmask, int mi, int blockx, int blocky,
8+
bool is_avsplus, ise_t* env)
9+
{
10+
const int width = cmask->GetRowSize(PLANAR_Y) & (~(blockx - 1));
11+
const int height = cmask->GetHeight(PLANAR_Y) & (~(blocky - 1));
12+
const int pitch = cmask->GetPitch(PLANAR_Y);
13+
14+
const uint8_t* srcp = cmask->GetReadPtr(PLANAR_Y);
15+
16+
size_t pitch_a = (width + 31) & (~31);
17+
uint8_t* arr = reinterpret_cast<uint8_t*>(
18+
alloc_buffer(pitch_a * 4, 32, is_avsplus, env));
19+
int64_t* array[] = {
20+
reinterpret_cast<int64_t*>(arr),
21+
reinterpret_cast<int64_t*>(arr + pitch_a),
22+
reinterpret_cast<int64_t*>(arr + pitch_a * 2),
23+
reinterpret_cast<int64_t*>(arr + pitch_a * 3),
24+
};
25+
int length = width / sizeof(int64_t);
26+
int stepx = blockx / 8;
27+
int stepy = blocky / 8;
28+
29+
const V zero = setzero<V>();
30+
31+
for (int y = 0; y < height; y += 32) {
32+
for (int j = 0; j < 4; ++j) {
33+
for (int x = 0; x < width; x += sizeof(V)) {
34+
// 0xFF == -1, thus the range of each bytes of sum is -8 to 0.
35+
V sum = load<V>(srcp + x);
36+
sum = add_i8(sum, load<V>(srcp + x + pitch * 1));
37+
sum = add_i8(sum, load<V>(srcp + x + pitch * 2));
38+
sum = add_i8(sum, load<V>(srcp + x + pitch * 3));
39+
sum = add_i8(sum, load<V>(srcp + x + pitch * 4));
40+
sum = add_i8(sum, load<V>(srcp + x + pitch * 5));
41+
sum = add_i8(sum, load<V>(srcp + x + pitch * 6));
42+
sum = add_i8(sum, load<V>(srcp + x + pitch * 7));
43+
sum = sad_u8(sub_i8(zero, sum), zero);
44+
store(arr + x + pitch_a * j, sum);
45+
}
46+
srcp += pitch * 8;
47+
}
48+
49+
for (int xx = 0; xx < length; xx += stepx) {
50+
int64_t sum = 0;
51+
for (int by = 0; by < stepy; ++by) {
52+
for (int bx = 0; bx < stepx; ++bx) {
53+
sum += array[by][xx + bx];
54+
}
55+
}
56+
if (sum > mi) {
57+
free_buffer(arr, is_avsplus, env);
58+
return true;
59+
}
60+
}
61+
}
62+
free_buffer(arr, is_avsplus, env);
63+
return false;
64+
}
65+
66+
67+
static bool __stdcall
68+
check_combed_c(PVideoFrame& cmask, int mi, int blockx, int blocky, bool, ise_t*)
69+
{
70+
const int width = cmask->GetRowSize(PLANAR_Y) & (~(blockx - 1));
71+
const int height = cmask->GetHeight(PLANAR_Y) & (~(blocky - 1));
72+
const int pitch = cmask->GetPitch(PLANAR_Y);
73+
74+
const uint8_t* srcp = cmask->GetReadPtr(PLANAR_Y);
75+
76+
for (int y = 0; y < height; y += blocky) {
77+
for (int x = 0; x < width; x += blockx) {
78+
int count = 0;
79+
for (int i = 0; i < blocky; ++i) {
80+
for (int j = 0; j < blockx; ++j) {
81+
count += (srcp[x + j + i * pitch] & 1);
82+
}
83+
}
84+
if (count > mi) {
85+
return true;
86+
}
87+
}
88+
srcp += pitch * blocky;
89+
}
90+
return false;
91+
}
92+
93+
94+
template <typename V>
95+
static void __stdcall
96+
merge_frames_simd(int num_planes, PVideoFrame& src, PVideoFrame& alt,
97+
PVideoFrame& mask, PVideoFrame& dst)
98+
{
99+
static const int planes[] = { PLANAR_Y, PLANAR_U, PLANAR_V };
100+
101+
for (int p = 0; p < num_planes; ++p) {
102+
const int plane = planes[p];
103+
104+
const uint8_t* srcp = src->GetReadPtr(plane);
105+
const uint8_t* altp = alt->GetReadPtr(plane);
106+
const uint8_t* mskp = mask->GetReadPtr(plane);
107+
uint8_t* dstp = dst->GetWritePtr(plane);
108+
109+
const int width = src->GetRowSize(plane);
110+
const int height = src->GetHeight(plane);
111+
112+
const int spitch = src->GetPitch(plane);
113+
const int apitch = alt->GetPitch(plane);
114+
const int mpitch = mask->GetPitch(plane);
115+
const int dpitch = dst->GetPitch(plane);
116+
117+
for (int y = 0; y < height; y++) {
118+
for (int x = 0; x < width; x += sizeof(V)) {
119+
const V s = load<V>(srcp + x);
120+
const V a = load<V>(altp + x);
121+
const V m = load<V>(mskp + x);
122+
123+
stream(dstp + x, blendv(s, a, m));
124+
}
125+
srcp += spitch;
126+
altp += apitch;
127+
mskp += mpitch;
128+
dstp += dpitch;
129+
}
130+
}
131+
}
132+
133+
134+
static void __stdcall
135+
merge_frames_c(int num_planes, PVideoFrame& src, PVideoFrame& alt,
136+
PVideoFrame& mask, PVideoFrame& dst)
137+
{
138+
static const int planes[] = { PLANAR_Y, PLANAR_U, PLANAR_V };
139+
140+
for (int p = 0; p < num_planes; ++p) {
141+
const int plane = planes[p];
142+
const uint8_t* srcp = src->GetReadPtr(plane);
143+
const uint8_t* altp = alt->GetReadPtr(plane);
144+
const uint8_t* mskp = mask->GetReadPtr(plane);
145+
uint8_t* dstp = dst->GetWritePtr(plane);
146+
147+
const int width = src->GetRowSize(plane);
148+
const int height = src->GetHeight(plane);
149+
150+
const int spitch = src->GetPitch(plane);
151+
const int apitch = alt->GetPitch(plane);
152+
const int mpitch = mask->GetPitch(plane);
153+
const int dpitch = dst->GetPitch(plane);
154+
155+
for (int y = 0; y < height; y++) {
156+
for (int x = 0; x < width; x++) {
157+
dstp[x] = (srcp[x] & (~mskp[x])) | (altp[x] & mskp[x]);
158+
}
159+
srcp += spitch;
160+
altp += apitch;
161+
mskp += mpitch;
162+
dstp += dpitch;
163+
}
164+
}
165+
}
166+
167+
168+
169+
170+
check_combed_t get_check_combed(arch_t arch)
171+
{
172+
173+
#if defined(__AVX2__)
174+
if (arch == USE_AVX2) {
175+
return check_combed_simd<__m256i>;
176+
}
177+
#endif
178+
if (arch == USE_SSE2) {
179+
return check_combed_simd<__m128i>;
180+
}
181+
return check_combed_c;
182+
}
183+
184+
185+
186+
MaskedMerge::
187+
MaskedMerge(PClip c, PClip a, PClip m, int _mi, int bx, int by, bool chroma,
188+
arch_t arch, bool ip) :
189+
GVFmod(c, chroma, arch, ip), altc(a), maskc(m), mi(_mi), blockx(bx),
190+
blocky(by)
191+
{
192+
validate(!vi.IsPlanar(), "planar format only.");
193+
validate(mi < 0 || mi > 128, "mi must be between 0 and 128.");
194+
validate(blockx < 8 || blockx > 32 || blockx % 8 > 0,
195+
"blockx must be set to 8, 16 or 32.");
196+
validate(blocky < 8 || blocky > 32 || blocky % 8 > 0,
197+
"blocky must be set to 8, 16 or 32.");
198+
199+
const VideoInfo& a_vi = altc->GetVideoInfo();
200+
const VideoInfo& m_vi = maskc->GetVideoInfo();
201+
validate(!vi.IsSameColorspace(a_vi) || !vi.IsSameColorspace(m_vi),
202+
"unmatch colorspaces.");
203+
validate(vi.width != a_vi.width || vi.width != m_vi.width ||
204+
vi.height != a_vi.height || vi.height != m_vi.height,
205+
"unmatch resolutions.");
206+
207+
switch (arch) {
208+
#if defined(__AVX2__)
209+
case USE_AVX2:
210+
mergeFrames = merge_frames_simd<__m256i>;
211+
break;
212+
#endif
213+
case USE_SSE2:
214+
mergeFrames = merge_frames_simd<__m128i>;
215+
break;
216+
default:
217+
mergeFrames = merge_frames_c;
218+
}
219+
220+
checkCombed = get_check_combed(arch);
221+
}
222+
223+
224+
PVideoFrame __stdcall MaskedMerge::GetFrame(int n, ise_t* env)
225+
{
226+
PVideoFrame src = child->GetFrame(n, env);
227+
PVideoFrame mask = maskc->GetFrame(n, env);
228+
if (mi > 0 && !checkCombed(mask, mi, blockx, blocky, isPlus, env)) {
229+
return src;
230+
}
231+
232+
PVideoFrame alt = altc->GetFrame(n, env);
233+
PVideoFrame dst = env->NewVideoFrame(vi);
234+
235+
mergeFrames(numPlanes, src, alt, mask, dst);
236+
237+
if (numPlanes == 1 && !vi.IsY8()) {
238+
const int src_pitch = src->GetPitch(PLANAR_U);
239+
const int dst_pitch = dst->GetPitch(PLANAR_U);
240+
const int width = src->GetRowSize(PLANAR_U);
241+
const int height = src->GetHeight(PLANAR_U);
242+
env->BitBlt(dst->GetWritePtr(PLANAR_U), dst_pitch,
243+
src->GetReadPtr(PLANAR_U), src_pitch, width, height);
244+
env->BitBlt(dst->GetWritePtr(PLANAR_V), dst_pitch,
245+
src->GetReadPtr(PLANAR_V), src_pitch, width, height);
246+
}
247+
248+
return dst;
249+
}
250+

‎avisynth/src/cpu_check.cpp

+147
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
/*
2+
cpu_check.cpp
3+
4+
This file is a part of TMM2
5+
6+
Copyright (C) 2016 OKA Motofumi
7+
8+
This program is free software; you can redistribute it and/or modify
9+
it under the terms of the GNU General Public License as published by
10+
the Free Software Foundation; either version 2 of the License, or
11+
(at your option) any later version.
12+
13+
This program is distributed in the hope that it will be useful,
14+
but WITHOUT ANY WARRANTY; without even the implied warranty of
15+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16+
GNU General Public License for more details.
17+
18+
You should have received a copy of the GNU General Public License
19+
along with this program; if not, write to the Free Software
20+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
21+
*/
22+
23+
#include <cstdint>
24+
#include <intrin.h>
25+
26+
27+
enum {
28+
CPU_NO_X86_SIMD = 0x000000,
29+
CPU_SSE2_SUPPORT = 0x000001,
30+
CPU_SSE3_SUPPORT = 0x000002,
31+
CPU_SSSE3_SUPPORT = 0x000004,
32+
CPU_SSE4_1_SUPPORT = 0x000008,
33+
CPU_SSE4_2_SUPPORT = 0x000010,
34+
CPU_SSE4_A_SUPPORT = 0x000020,
35+
CPU_FMA4_SUPPORT = 0x000040,
36+
CPU_FMA3_SUPPORT = 0x000080,
37+
CPU_AVX_SUPPORT = 0x000100,
38+
CPU_AVX2_SUPPORT = 0x000200,
39+
CPU_AVX512F_SUPPORT = 0x000400,
40+
CPU_AVX512DQ_SUPPORT = 0x000800,
41+
CPU_AVX512IFMA52_SUPPORT = 0x001000,
42+
CPU_AVX512PF_SUPPORT = 0x002000,
43+
CPU_AVX512ER_SUPPORT = 0x004000,
44+
CPU_AVX512CD_SUPPORT = 0x008000,
45+
CPU_AVX512BW_SUPPORT = 0x010000,
46+
CPU_AVX512VL_SUPPORT = 0x020000,
47+
CPU_AVX512VBMI_SUPPORT = 0x040000,
48+
};
49+
50+
51+
52+
53+
static __forceinline bool is_bit_set(int bitfield, int bit)
54+
{
55+
return (bitfield & (1 << bit)) != 0;
56+
}
57+
58+
static uint32_t get_simd_support_info(void)
59+
{
60+
uint32_t ret = 0;
61+
int regs[4] = {0};
62+
63+
__cpuid(regs, 0x00000001);
64+
if (is_bit_set(regs[3], 26)) {
65+
ret |= CPU_SSE2_SUPPORT;
66+
}
67+
if (is_bit_set(regs[2], 0)) {
68+
ret |= CPU_SSE3_SUPPORT;
69+
}
70+
if (is_bit_set(regs[2], 9)) {
71+
ret |= CPU_SSSE3_SUPPORT;
72+
}
73+
if (is_bit_set(regs[2], 19)) {
74+
ret |= CPU_SSE4_1_SUPPORT;
75+
}
76+
if (is_bit_set(regs[2], 26)) {
77+
ret |= CPU_SSE4_2_SUPPORT;
78+
}
79+
if (is_bit_set(regs[2], 27)) {
80+
if (is_bit_set(regs[2], 28)) {
81+
ret |= CPU_AVX_SUPPORT;
82+
}
83+
if (is_bit_set(regs[2], 12)) {
84+
ret |= CPU_FMA3_SUPPORT;
85+
}
86+
}
87+
88+
regs[3] = 0;
89+
__cpuid(regs, 0x80000001);
90+
if (is_bit_set(regs[3], 6)) {
91+
ret |= CPU_SSE4_A_SUPPORT;
92+
}
93+
if (is_bit_set(regs[3], 16)) {
94+
ret |= CPU_FMA4_SUPPORT;
95+
}
96+
97+
__cpuid(regs, 0x00000000);
98+
if (regs[0] < 7) {
99+
return ret;
100+
}
101+
102+
__cpuidex(regs, 0x00000007, 0);
103+
if (is_bit_set(regs[1], 5)) {
104+
ret |= CPU_AVX2_SUPPORT;
105+
}
106+
if (!is_bit_set(regs[1], 16)) {
107+
return ret;
108+
}
109+
110+
ret |= CPU_AVX512F_SUPPORT;
111+
if (is_bit_set(regs[1], 17)) {
112+
ret |= CPU_AVX512DQ_SUPPORT;
113+
}
114+
if (is_bit_set(regs[1], 21)) {
115+
ret |= CPU_AVX512IFMA52_SUPPORT;
116+
}
117+
if (is_bit_set(regs[1], 26)) {
118+
ret |= CPU_AVX512PF_SUPPORT;
119+
}
120+
if (is_bit_set(regs[1], 27)) {
121+
ret |= CPU_AVX512ER_SUPPORT;
122+
}
123+
if (is_bit_set(regs[1], 28)) {
124+
ret |= CPU_AVX512CD_SUPPORT;
125+
}
126+
if (is_bit_set(regs[1], 30)) {
127+
ret |= CPU_AVX512BW_SUPPORT;
128+
}
129+
if (is_bit_set(regs[1], 31)) {
130+
ret |= CPU_AVX512VL_SUPPORT;
131+
}
132+
if (is_bit_set(regs[2], 1)) {
133+
ret |= CPU_AVX512VBMI_SUPPORT;
134+
}
135+
136+
return ret;
137+
}
138+
139+
bool has_sse2()
140+
{
141+
return (get_simd_support_info() & CPU_SSE2_SUPPORT) != 0;
142+
}
143+
144+
bool has_avx2()
145+
{
146+
return (get_simd_support_info() & CPU_AVX2_SUPPORT) != 0;
147+
}

‎avisynth/src/plugin.cpp

+157
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
#include "CombMask.h"
2+
3+
extern bool has_sse2();
4+
extern bool has_avx2();
5+
6+
7+
static arch_t get_arch(int opt, bool is_avsplus)
8+
{
9+
if (opt == 0 || !has_sse2()) {
10+
return NO_SIMD;
11+
}
12+
#if !defined(__AVX2__)
13+
return USE_SSE2;
14+
#else
15+
if (opt == 1 || !has_avx2() || !is_avsplus) {
16+
return USE_SSE2;
17+
}
18+
return USE_AVX2;
19+
#endif
20+
}
21+
22+
23+
static AVSValue __cdecl
24+
create_combmask(AVSValue args, void* user_data, ise_t* env)
25+
{
26+
enum { CLIP, CTHRESH, MTHRESH, CHROMA, EXPAND, METRIC, OPT };
27+
28+
PClip clip = args[CLIP].AsClip();
29+
int metric = args[METRIC].AsInt(0);
30+
int cth = args[CTHRESH].AsInt(metric == 0 ? 6 : 10);
31+
int mth = args[MTHRESH].AsInt(9);
32+
bool ch = args[CHROMA].AsBool(true);
33+
bool expand = args[EXPAND].AsBool(true);
34+
bool is_avsplus = user_data != nullptr;
35+
arch_t arch = get_arch(args[OPT].AsInt(-1), is_avsplus);
36+
37+
try{
38+
return new CombMask(clip, cth, mth, ch, arch, expand, metric, is_avsplus);
39+
40+
} catch (std::runtime_error& e) {
41+
env->ThrowError((std::string("CombMask: ") + e.what()).c_str());
42+
}
43+
44+
return 0;
45+
}
46+
47+
48+
static AVSValue __cdecl
49+
create_maskedmerge(AVSValue args, void* user_data, IScriptEnvironment* env)
50+
{
51+
enum { BASE, ALT, MASK, MI, BLOCKX, BLOCKY, CHROMA, OPT };
52+
try {
53+
validate(!args[BASE].Defined(), "base clip is not set.");
54+
validate(!args[ALT].Defined(), "alt clip is not set.");
55+
validate(!args[MASK].Defined(), "mask clip is not set.");
56+
57+
PClip base = args[BASE].AsClip();
58+
PClip alt = args[ALT].AsClip();
59+
PClip mask = args[MASK].AsClip();
60+
61+
int mi = args[MI].AsInt(40);
62+
int bx = args[BLOCKX].AsInt(8);
63+
int by = args[BLOCKY].AsInt(8);
64+
bool ch = args[CHROMA].AsBool(true);
65+
bool is_avsplus = user_data != nullptr;
66+
arch_t arch = get_arch(args[OPT].AsInt(-1), is_avsplus);
67+
68+
return new MaskedMerge(base, alt, mask, mi, bx, by, ch, arch, is_avsplus);
69+
} catch (std::runtime_error& e) {
70+
env->ThrowError((std::string("MaskedMerge: ") + e.what()).c_str());
71+
}
72+
return 0;
73+
}
74+
75+
76+
static AVSValue __cdecl
77+
create_iscombed(AVSValue args, void* user_data, ise_t* env)
78+
{
79+
enum { CLIP, CTHRESH, MTHRESH, MI, BLOCKX, BLOCKY, METRIC, OPT };
80+
CombMask* cm = nullptr;
81+
82+
try {
83+
AVSValue cf = env->GetVar("current_frame");
84+
validate(!cf.IsInt(),
85+
"This filter can only be used within ConditionalFilter.");
86+
int n = cf.AsInt();
87+
88+
PClip clip = args[CLIP].AsClip();
89+
int metric = args[METRIC].AsInt(0);
90+
int cth = args[CTHRESH].AsInt(metric == 0 ? 6 : 10);
91+
int mth = args[MTHRESH].AsInt(9);
92+
int mi = args[MI].AsInt(80);
93+
int blockx = args[BLOCKX].AsInt(16);
94+
int blocky = args[BLOCKY].AsInt(16);
95+
bool is_avsplus = user_data != nullptr;
96+
arch_t arch = get_arch(args[OPT].AsInt(-1), is_avsplus);
97+
98+
validate(mi < 0 || mi > 128, "MI must be between 0 and 128.");
99+
validate(blockx != 8 && blockx != 16 && blockx != 32,
100+
"blockx must be set to 8, 16 or 32.");
101+
validate(blocky != 8 && blocky != 16 && blocky != 32,
102+
"blocky must be set to 8, 16 or 32.");
103+
104+
cm = new CombMask(clip, cth, mth, false, arch, false, metric, is_avsplus);
105+
106+
bool is_combed = (get_check_combed(arch))(
107+
cm->GetFrame(n, env), mi, blockx, blocky, is_avsplus, env);
108+
109+
delete cm;
110+
111+
return AVSValue(is_combed);
112+
113+
} catch (std::runtime_error& e) {
114+
if (cm) delete cm;
115+
env->ThrowError((std::string("IsCombed: ") + e.what()).c_str());
116+
}
117+
return 0;
118+
}
119+
120+
121+
122+
123+
124+
125+
126+
127+
const AVS_Linkage* AVS_linkage = nullptr;
128+
129+
130+
extern "C" __declspec(dllexport) const char* __stdcall
131+
AvisynthPluginInit3(IScriptEnvironment* env, const AVS_Linkage* const vectors)
132+
{
133+
AVS_linkage = vectors;
134+
135+
void* is_avsplus = env->FunctionExists("SetFilterMTMode") ? "true" : nullptr;
136+
137+
env->AddFunction(
138+
"CombMask", "c[cthresh]i[mthresh]i[chroma]b[expand]b[metric]i[opt]i",
139+
create_combmask, is_avsplus);
140+
env->AddFunction(
141+
"MaskedMerge",
142+
"[base]c[alt]c[mask]c[MI]i[blockx]i[blocky]i[chroma]b[opt]i",
143+
create_maskedmerge, is_avsplus);
144+
env->AddFunction(
145+
"IsCombed",
146+
"c[cthresh]i[mthresh]i[MI]i[blockx]i[blocky]i[metric]i[opt]i",
147+
create_iscombed, is_avsplus);
148+
149+
if (is_avsplus != nullptr) {
150+
auto env2 = static_cast<IScriptEnvironment2*>(env);
151+
env2->SetFilterMTMode("CombMask", MT_NICE_FILTER, true);
152+
env2->SetFilterMTMode("MaskedMerge", MT_NICE_FILTER, true);
153+
env2->SetFilterMTMode("IsCombed", MT_SERIALIZED, true);
154+
}
155+
156+
return "CombMask filter for Avisynth2.6/Avisynth+ version " CMASK_VERSION;
157+
}

‎avisynth/src/simd.h

+428
Large diffs are not rendered by default.

‎avisynth/CombMask.vcxproj ‎avisynth/vs2015/CombMask.vcxproj

+48-11
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<?xml version="1.0" encoding="utf-8"?>
2-
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
2+
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
33
<ItemGroup Label="ProjectConfigurations">
44
<ProjectConfiguration Include="Debug|Win32">
55
<Configuration>Debug</Configuration>
@@ -9,29 +9,61 @@
99
<Configuration>Release</Configuration>
1010
<Platform>Win32</Platform>
1111
</ProjectConfiguration>
12+
<ProjectConfiguration Include="Debug|x64">
13+
<Configuration>Debug</Configuration>
14+
<Platform>x64</Platform>
15+
</ProjectConfiguration>
16+
<ProjectConfiguration Include="Release|x64">
17+
<Configuration>Release</Configuration>
18+
<Platform>x64</Platform>
19+
</ProjectConfiguration>
1220
</ItemGroup>
1321
<PropertyGroup Label="Globals">
22+
<ProjectGuid>{6B5469DF-E6CA-4A4E-B879-9672E926A88A}</ProjectGuid>
1423
<Keyword>Win32Proj</Keyword>
24+
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
1525
</PropertyGroup>
1626
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
1727
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
1828
<ConfigurationType>DynamicLibrary</ConfigurationType>
1929
<UseDebugLibraries>true</UseDebugLibraries>
30+
<PlatformToolset>v140</PlatformToolset>
31+
<CharacterSet>MultiByte</CharacterSet>
2032
</PropertyGroup>
2133
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
2234
<ConfigurationType>DynamicLibrary</ConfigurationType>
2335
<UseDebugLibraries>false</UseDebugLibraries>
36+
<PlatformToolset>v140</PlatformToolset>
2437
<CharacterSet>MultiByte</CharacterSet>
38+
<WholeProgramOptimization>true</WholeProgramOptimization>
39+
</PropertyGroup>
40+
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
41+
<ConfigurationType>Application</ConfigurationType>
42+
<UseDebugLibraries>true</UseDebugLibraries>
43+
<PlatformToolset>v140</PlatformToolset>
44+
</PropertyGroup>
45+
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
46+
<ConfigurationType>Application</ConfigurationType>
47+
<UseDebugLibraries>false</UseDebugLibraries>
48+
<PlatformToolset>v140</PlatformToolset>
2549
</PropertyGroup>
2650
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
2751
<ImportGroup Label="ExtensionSettings">
2852
</ImportGroup>
53+
<ImportGroup Label="Shared">
54+
</ImportGroup>
2955
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
3056
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
3157
</ImportGroup>
3258
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
3359
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
3460
</ImportGroup>
61+
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
62+
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
63+
</ImportGroup>
64+
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
65+
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
66+
</ImportGroup>
3567
<PropertyGroup Label="UserMacros" />
3668
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
3769
<LinkIncremental>true</LinkIncremental>
@@ -46,8 +78,9 @@
4678
<WarningLevel>Level3</WarningLevel>
4779
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
4880
<Optimization>Disabled</Optimization>
49-
<IntrinsicFunctions>true</IntrinsicFunctions>
50-
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
81+
<AdditionalIncludeDirectories>C:\my_projects\AviSynthPlus\avs_core\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
82+
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
83+
<FloatingPointModel>Fast</FloatingPointModel>
5184
</ClCompile>
5285
<Link>
5386
<TargetMachine>MachineX86</TargetMachine>
@@ -61,28 +94,32 @@
6194
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
6295
<WarningLevel>Level3</WarningLevel>
6396
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
64-
<Optimization>Full</Optimization>
97+
<AdditionalIncludeDirectories>C:\my_projects\AviSynthPlus\avs_core\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
6598
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
6699
<IntrinsicFunctions>true</IntrinsicFunctions>
100+
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
67101
<OmitFramePointers>true</OmitFramePointers>
68-
<WholeProgramOptimization>true</WholeProgramOptimization>
69-
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
70-
<CallingConvention>StdCall</CallingConvention>
102+
<StringPooling>true</StringPooling>
103+
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
104+
<FloatingPointModel>Fast</FloatingPointModel>
71105
</ClCompile>
72106
<Link>
73107
<TargetMachine>MachineX86</TargetMachine>
74-
<GenerateDebugInformation>false</GenerateDebugInformation>
108+
<GenerateDebugInformation>true</GenerateDebugInformation>
75109
<SubSystem>Windows</SubSystem>
76110
<EnableCOMDATFolding>true</EnableCOMDATFolding>
77111
<OptimizeReferences>true</OptimizeReferences>
78-
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
79112
</Link>
80113
</ItemDefinitionGroup>
81114
<ItemGroup>
82-
<ClCompile Include="CombMask.cpp" />
115+
<ClCompile Include="..\src\CombMask.cpp" />
116+
<ClCompile Include="..\src\cpu_check.cpp" />
117+
<ClCompile Include="..\src\MaskedMerge.cpp" />
118+
<ClCompile Include="..\src\plugin.cpp" />
83119
</ItemGroup>
84120
<ItemGroup>
85-
<ClInclude Include="avisynth.h" />
121+
<ClInclude Include="..\src\CombMask.h" />
122+
<ClInclude Include="..\src\simd.h" />
86123
</ItemGroup>
87124
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
88125
<ImportGroup Label="ExtensionTargets">

0 commit comments

Comments
 (0)
Please sign in to comment.