diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..412eeda
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,22 @@
+# Auto detect text files and perform LF normalization
+* text=auto
+
+# Custom for Visual Studio
+*.cs diff=csharp
+*.sln merge=union
+*.csproj merge=union
+*.vbproj merge=union
+*.fsproj merge=union
+*.dbproj merge=union
+
+# Standard to msysgit
+*.doc diff=astextplain
+*.DOC diff=astextplain
+*.docx diff=astextplain
+*.DOCX diff=astextplain
+*.dot diff=astextplain
+*.DOT diff=astextplain
+*.pdf diff=astextplain
+*.PDF diff=astextplain
+*.rtf diff=astextplain
+*.RTF diff=astextplain
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..7cdee98
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,163 @@
+# Windows image file caches
+Thumbs.db
+ehthumbs.db
+
+# Folder config file
+Desktop.ini
+
+# Recycle Bin used on file shares
+$RECYCLE.BIN/
+
+# Windows Installer files
+*.cab
+*.msi
+*.msm
+*.msp
+
+# =========================
+# Operating System Files
+# =========================
+
+# OSX
+# =========================
+
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+# Thumbnails
+._*
+
+# Files that might appear on external disk
+.Spotlight-V100
+.Trashes
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+# Object files
+*.o
+*.ko
+*.obj
+*.elf
+
+# Precompiled Headers
+*.gch
+*.pch
+
+# Libraries
+*.lib
+*.a
+*.la
+*.lo
+
+# Shared objects (inc. Windows DLLs)
+*.dll
+*.so
+*.so.*
+*.dylib
+
+# Executables
+*.exe
+*.out
+*.app
+*.i*86
+*.x86_64
+*.hex
+
+## Ignore Visual Studio temporary files, build results, and
+## files generated by popular Visual Studio add-ons.
+
+# User-specific files
+*.suo
+*.user
+*.userosscache
+*.sln.docstates
+
+# User-specific files (MonoDevelop/Xamarin Studio)
+*.userprefs
+
+# Build results
+[Dd]ebug/
+[Dd]ebugPublic/
+[Rr]elease/
+!unix_makefiles/Release/
+[Rr]eleases/
+x64/
+x86/
+build/
+bld/
+[Bb]in/
+[Oo]bj/
+
+# Visual Studo 2015 cache/options directory
+.vs/
+
+# MSTest test Results
+[Tt]est[Rr]esult*/
+[Bb]uild[Ll]og.*
+
+#NUNIT
+*.VisualState.xml
+TestResult.xml
+
+# Build Results of an ATL Project
+[Dd]ebugPS/
+[Rr]eleasePS/
+dlldata.c
+
+*_i.c
+*_p.c
+*_i.h
+*.ilk
+*.meta
+*.obj
+*.pch
+*.pdb
+*.pgc
+*.pgd
+*.rsp
+*.sbr
+*.tlb
+*.tli
+*.tlh
+*.tmp
+*.tmp_proj
+*.log
+*.vspscc
+*.vssscc
+.builds
+*.pidb
+*.svclog
+*.scc
+
+# Chutzpah Test files
+_Chutzpah*
+
+# Visual C++ cache files
+ipch/
+*.aps
+*.ncb
+*.opensdf
+*.sdf
+*.cachefile
+
+*.VC.db
+
+# Visual Studio profiler
+*.psess
+*.vsp
+*.vspx
+
+#OpenWatcom files
+*.lk1
+*.map
+*.mk
+*.mk1
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..5705a3a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,53 @@
+Репозиторий Fractal Image Compression
+=====================================
+
+Описание
+--------
+
+Простой кодек для фрактального сжатия изображений. Основывается на описании из первой главы книги Yuval Fisher, Fractal Image compression: Theory and Application.
+
+Кодирование реализовано на CPU (compress.exe) и GPU (cuda_compress.exe - cuda 7.5, версии спецификации >=2.0, и cudaold_compress.exe - cuda 6.5, версии спецификации 1.1, 2.0). Имеется версия с поддержкой mpi.
+
+Ссылки
+------
+
+[Репозиторий](https://github.com/ImageProcessing-ElectronicPublications/fractal_image_compression)
+
+[Скачать](https://github.com/ImageProcessing-ElectronicPublications/fractal_image_compression/releases)
+
+Для запуска под Windows необходим Windows XP и выше и Visual Studio 2013 runtime; для использования с mpi необходим mpich2 v1.4.1p1.
+
+Использование
+-------------
+
+Как сжимать:
+
+>compress.exe input.tga output.fi 32 4 5
+
+где 32 - размер рангового блока (ширина и высота изображения должны делиться на 32; чем больше размер рангового блока, тем лучше сжатие), 4 - количество потоков ЦПУ, 5 - максимальное значение погрешности для пикселя (чем больше число, тем меньше файл, но хуже качество получившегося изображения).
+
+Как разжимать:
+
+>decompress.exe output.fi decompressed.tga
+
+или
+
+>decompress.exe output.fi decompressed.tga 3
+
+если вы хотите увеличить ширину и высоту изображения в 3 раза (можно использовать любое целое число).
+
+Сборка
+------
+
+Под Windows просто используйте Visual Studio 2013 и выше.
+
+Под unix просто войдите в unix_makefiles и введите в консоли:
+
+>make all
+
+Для сборки под юниксами также необходим gcc и mpi (тестировал под Debian 7 с mpich2 версии 1.4.1p1).
+
+Лицензия
+--------
+
+Нет её). Общественное достояние.
diff --git a/chihiro2.tga b/chihiro2.tga
new file mode 100644
index 0000000..2a6e850
Binary files /dev/null and b/chihiro2.tga differ
diff --git a/compress/compress.vcxproj b/compress/compress.vcxproj
new file mode 100644
index 0000000..5922b13
--- /dev/null
+++ b/compress/compress.vcxproj
@@ -0,0 +1,128 @@
+
+
+
+
+ Debug
+ Win32
+
+
+ Release (MPI)
+ Win32
+
+
+ Release
+ Win32
+
+
+
+ {65DA45D0-C86F-4A4A-8169-2AA90B68AA47}
+ Win32Proj
+ compress
+
+
+
+ Application
+ true
+ v120_xp
+ Unicode
+
+
+ Application
+ false
+ v120_xp
+ true
+ Unicode
+
+
+ Application
+ false
+ v120_xp
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+
+
+ false
+
+
+ false
+ $(ProgramFiles)\MPICH2\include;$(VC_IncludePath);$(WindowsSDK_IncludePath)
+ $(ProgramFiles)\MPICH2\lib;$(VC_LibraryPath_x86);$(WindowsSDK_LibraryPath_x86);
+
+
+
+
+
+ Level3
+ Disabled
+ WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)
+
+
+ Console
+ true
+
+
+
+
+ Level3
+
+
+ MaxSpeed
+ true
+ true
+ WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)
+
+
+ Console
+ true
+ true
+ true
+
+
+
+
+ Level3
+
+
+ MaxSpeed
+ true
+ true
+ I_USE_MPI;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)
+
+
+ Console
+ true
+ true
+ true
+ mpi.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/compress/compress.vcxproj.filters b/compress/compress.vcxproj.filters
new file mode 100644
index 0000000..55e1b33
--- /dev/null
+++ b/compress/compress.vcxproj.filters
@@ -0,0 +1,46 @@
+
+
+
+
+ {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
+ cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
+
+
+ {93995380-89BD-4b04-88EB-625FBE52EBFB}
+ h;hh;hpp;hxx;hm;inl;inc;xsd
+
+
+ {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+
+ {9856afa4-a7d8-48a4-8188-a60d310d20fc}
+
+
+
+
+ Source Files
+
+
+ Source Files
+
+
+ Source Files
+
+
+ Source Files
+
+
+ Source Files
+
+
+ Source Files
+
+
+ Source Files
+
+
+ Source Files\CPU driven
+
+
+
\ No newline at end of file
diff --git a/cuda_compress/cuda_compress.vcxproj b/cuda_compress/cuda_compress.vcxproj
new file mode 100644
index 0000000..da8a8af
--- /dev/null
+++ b/cuda_compress/cuda_compress.vcxproj
@@ -0,0 +1,256 @@
+
+
+
+
+ Debug
+ Win32
+
+
+ Debug
+ x64
+
+
+ Release (MPI)
+ Win32
+
+
+ Release (MPI)
+ x64
+
+
+ Release
+ Win32
+
+
+ Release
+ x64
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}
+ cuda_compress
+
+
+
+ Application
+ true
+ MultiByte
+ v120
+
+
+ Application
+ true
+ MultiByte
+ v120
+
+
+ Application
+ false
+ true
+ MultiByte
+ v120
+
+
+ Application
+ false
+ true
+ MultiByte
+ v120
+
+
+ Application
+ false
+ true
+ MultiByte
+ v120
+
+
+ Application
+ false
+ true
+ MultiByte
+ v120
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+
+
+ true
+
+
+ $(ProgramFiles)\MPICH2\include;$(VC_IncludePath);$(WindowsSDK_IncludePath);
+ $(ProgramFiles)\MPICH2\lib;$(VC_LibraryPath_x86);$(WindowsSDK_LibraryPath_x86);
+
+
+
+ Level3
+ Disabled
+ SINGLE_THREAD_BY_DEFAULT;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+
+
+ true
+ Console
+ cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
+ 5.01
+
+
+ echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+
+
+ compute_20,sm_20;compute_30,sm_30;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;
+
+
+
+
+ Level3
+ Disabled
+ WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+
+
+ true
+ Console
+ cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
+
+
+ echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+
+
+ 64
+
+
+
+
+ Level3
+ MaxSpeed
+ true
+ true
+ SINGLE_THREAD_BY_DEFAULT;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+
+
+ true
+ true
+ true
+ Console
+ cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
+ 5.01
+
+
+ echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+
+
+ compute_20,sm_20;compute_30,sm_30;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;
+
+
+
+
+ Level3
+ MaxSpeed
+ true
+ true
+ SINGLE_THREAD_BY_DEFAULT;I_USE_MPI;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+
+
+ true
+ true
+ true
+ Console
+ mpi.lib;cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
+ 5.01
+
+
+ echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+
+
+ compute_20,sm_20;compute_30,sm_30;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;
+
+
+
+
+ Level3
+ MaxSpeed
+ true
+ true
+ WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+
+
+ true
+ true
+ true
+ Console
+ cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
+
+
+ echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+
+
+ 64
+
+
+
+
+ Level3
+ MaxSpeed
+ true
+ true
+ WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+
+
+ true
+ true
+ true
+ Console
+ cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
+
+
+ echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+
+
+ 64
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/cuda_compress/cuda_compress.vcxproj.filters b/cuda_compress/cuda_compress.vcxproj.filters
new file mode 100644
index 0000000..6e3e19a
--- /dev/null
+++ b/cuda_compress/cuda_compress.vcxproj.filters
@@ -0,0 +1,25 @@
+
+
+
+
+ {e0e0e71a-6e32-4e79-b5c7-afd5b1a6e698}
+
+
+ {04d7c5b1-6017-4c20-9d85-3b4efa11ce02}
+
+
+
+
+
+
+
+
+
+
+
+
+
+ GPU driven
+
+
+
\ No newline at end of file
diff --git a/cudaold_compress/cudaold_compress.vcxproj b/cudaold_compress/cudaold_compress.vcxproj
new file mode 100644
index 0000000..6194b84
--- /dev/null
+++ b/cudaold_compress/cudaold_compress.vcxproj
@@ -0,0 +1,256 @@
+
+
+
+
+ Debug
+ Win32
+
+
+ Debug
+ x64
+
+
+ Release (MPI)
+ Win32
+
+
+ Release (MPI)
+ x64
+
+
+ Release
+ Win32
+
+
+ Release
+ x64
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {D38F1866-2265-48A0-AD93-3A5E05C69163}
+ cudaold_compress
+
+
+
+ Application
+ true
+ MultiByte
+ v120
+
+
+ Application
+ true
+ MultiByte
+ v120
+
+
+ Application
+ false
+ true
+ MultiByte
+ v120
+
+
+ Application
+ false
+ true
+ MultiByte
+ v120
+
+
+ Application
+ false
+ true
+ MultiByte
+ v120
+
+
+ Application
+ false
+ true
+ MultiByte
+ v120
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+
+
+ true
+
+
+ $(ProgramFiles)\MPICH2\include;$(VC_IncludePath);$(WindowsSDK_IncludePath);
+ $(ProgramFiles)\MPICH2\lib;$(VC_LibraryPath_x86);$(WindowsSDK_LibraryPath_x86);
+
+
+
+ Level3
+ Disabled
+ SINGLE_THREAD_BY_DEFAULT;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+
+
+ true
+ Console
+ cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
+ 5.01
+
+
+ echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+
+
+ compute_12,sm_12;compute_20,sm_20;
+
+
+
+
+ Level3
+ Disabled
+ WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+
+
+ true
+ Console
+ cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
+
+
+ echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+
+
+ 64
+
+
+
+
+ Level3
+ MaxSpeed
+ true
+ true
+ SINGLE_THREAD_BY_DEFAULT;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+
+
+ true
+ true
+ true
+ Console
+ cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
+ 5.01
+
+
+ echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+
+
+ compute_12,sm_12;compute_20,sm_20;
+
+
+
+
+ Level3
+ MaxSpeed
+ true
+ true
+ SINGLE_THREAD_BY_DEFAULT;I_USE_MPI;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+
+
+ true
+ true
+ true
+ Console
+ mpi.lib;cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
+ 5.01
+
+
+ echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+
+
+ compute_12,sm_12;compute_20,sm_20;
+
+
+
+
+ Level3
+ MaxSpeed
+ true
+ true
+ WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+
+
+ true
+ true
+ true
+ Console
+ cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
+
+
+ echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+
+
+ 64
+
+
+
+
+ Level3
+ MaxSpeed
+ true
+ true
+ WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+
+
+ true
+ true
+ true
+ Console
+ cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
+
+
+ echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
+
+
+ 64
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/cudaold_compress/cudaold_compress.vcxproj.filters b/cudaold_compress/cudaold_compress.vcxproj.filters
new file mode 100644
index 0000000..ec4a007
--- /dev/null
+++ b/cudaold_compress/cudaold_compress.vcxproj.filters
@@ -0,0 +1,22 @@
+
+
+
+
+ {3e52e746-98ea-452d-89aa-d1f6432b29fa}
+
+
+
+
+ GPU driven
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/decompress/decompress.vcxproj b/decompress/decompress.vcxproj
new file mode 100644
index 0000000..82a628c
--- /dev/null
+++ b/decompress/decompress.vcxproj
@@ -0,0 +1,88 @@
+
+
+
+
+ Debug
+ Win32
+
+
+ Release
+ Win32
+
+
+
+ {F97F886B-3E32-4790-B091-A4CBF1B91946}
+ Win32Proj
+ decompress
+
+
+
+ Application
+ true
+ v120_xp
+ Unicode
+
+
+ Application
+ false
+ v120_xp
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+
+
+ false
+
+
+
+
+
+ Level3
+ Disabled
+ WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)
+
+
+ Console
+ true
+
+
+
+
+ Level3
+
+
+ MaxSpeed
+ true
+ true
+ WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)
+
+
+ Console
+ true
+ true
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/decompress/decompress.vcxproj.filters b/decompress/decompress.vcxproj.filters
new file mode 100644
index 0000000..a72b55f
--- /dev/null
+++ b/decompress/decompress.vcxproj.filters
@@ -0,0 +1,34 @@
+
+
+
+
+ {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
+ cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
+
+
+ {93995380-89BD-4b04-88EB-625FBE52EBFB}
+ h;hh;hpp;hxx;hm;inl;inc;xsd
+
+
+ {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+
+
+
+ Source Files
+
+
+ Source Files
+
+
+ Source Files
+
+
+ Source Files
+
+
+ Source Files
+
+
+
\ No newline at end of file
diff --git a/fractal_image_compression.sln b/fractal_image_compression.sln
new file mode 100644
index 0000000..116543e
--- /dev/null
+++ b/fractal_image_compression.sln
@@ -0,0 +1,70 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 2013
+VisualStudioVersion = 12.0.40629.0
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "compress", "compress\compress.vcxproj", "{65DA45D0-C86F-4A4A-8169-2AA90B68AA47}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "decompress", "decompress\decompress.vcxproj", "{F97F886B-3E32-4790-B091-A4CBF1B91946}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cuda_compress", "cuda_compress\cuda_compress.vcxproj", "{AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cudaold_compress", "cudaold_compress\cudaold_compress.vcxproj", "{D38F1866-2265-48A0-AD93-3A5E05C69163}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|Win32 = Debug|Win32
+ Debug|x64 = Debug|x64
+ Release (MPI)|Win32 = Release (MPI)|Win32
+ Release (MPI)|x64 = Release (MPI)|x64
+ Release|Win32 = Release|Win32
+ Release|x64 = Release|x64
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {65DA45D0-C86F-4A4A-8169-2AA90B68AA47}.Debug|Win32.ActiveCfg = Debug|Win32
+ {65DA45D0-C86F-4A4A-8169-2AA90B68AA47}.Debug|Win32.Build.0 = Debug|Win32
+ {65DA45D0-C86F-4A4A-8169-2AA90B68AA47}.Debug|x64.ActiveCfg = Debug|Win32
+ {65DA45D0-C86F-4A4A-8169-2AA90B68AA47}.Release (MPI)|Win32.ActiveCfg = Release (MPI)|Win32
+ {65DA45D0-C86F-4A4A-8169-2AA90B68AA47}.Release (MPI)|Win32.Build.0 = Release (MPI)|Win32
+ {65DA45D0-C86F-4A4A-8169-2AA90B68AA47}.Release (MPI)|x64.ActiveCfg = Release (MPI)|Win32
+ {65DA45D0-C86F-4A4A-8169-2AA90B68AA47}.Release|Win32.ActiveCfg = Release|Win32
+ {65DA45D0-C86F-4A4A-8169-2AA90B68AA47}.Release|Win32.Build.0 = Release|Win32
+ {65DA45D0-C86F-4A4A-8169-2AA90B68AA47}.Release|x64.ActiveCfg = Release|Win32
+ {F97F886B-3E32-4790-B091-A4CBF1B91946}.Debug|Win32.ActiveCfg = Debug|Win32
+ {F97F886B-3E32-4790-B091-A4CBF1B91946}.Debug|Win32.Build.0 = Debug|Win32
+ {F97F886B-3E32-4790-B091-A4CBF1B91946}.Debug|x64.ActiveCfg = Debug|Win32
+ {F97F886B-3E32-4790-B091-A4CBF1B91946}.Release (MPI)|Win32.ActiveCfg = Release|Win32
+ {F97F886B-3E32-4790-B091-A4CBF1B91946}.Release (MPI)|Win32.Build.0 = Release|Win32
+ {F97F886B-3E32-4790-B091-A4CBF1B91946}.Release (MPI)|x64.ActiveCfg = Release|Win32
+ {F97F886B-3E32-4790-B091-A4CBF1B91946}.Release|Win32.ActiveCfg = Release|Win32
+ {F97F886B-3E32-4790-B091-A4CBF1B91946}.Release|Win32.Build.0 = Release|Win32
+ {F97F886B-3E32-4790-B091-A4CBF1B91946}.Release|x64.ActiveCfg = Release|Win32
+ {AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}.Debug|Win32.ActiveCfg = Debug|Win32
+ {AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}.Debug|Win32.Build.0 = Debug|Win32
+ {AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}.Debug|x64.ActiveCfg = Debug|x64
+ {AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}.Debug|x64.Build.0 = Debug|x64
+ {AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}.Release (MPI)|Win32.ActiveCfg = Release (MPI)|Win32
+ {AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}.Release (MPI)|Win32.Build.0 = Release (MPI)|Win32
+ {AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}.Release (MPI)|x64.ActiveCfg = Release|x64
+ {AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}.Release (MPI)|x64.Build.0 = Release|x64
+ {AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}.Release|Win32.ActiveCfg = Release|Win32
+ {AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}.Release|Win32.Build.0 = Release|Win32
+ {AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}.Release|x64.ActiveCfg = Release|x64
+ {AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}.Release|x64.Build.0 = Release|x64
+ {D38F1866-2265-48A0-AD93-3A5E05C69163}.Debug|Win32.ActiveCfg = Debug|Win32
+ {D38F1866-2265-48A0-AD93-3A5E05C69163}.Debug|Win32.Build.0 = Debug|Win32
+ {D38F1866-2265-48A0-AD93-3A5E05C69163}.Debug|x64.ActiveCfg = Debug|x64
+ {D38F1866-2265-48A0-AD93-3A5E05C69163}.Debug|x64.Build.0 = Debug|x64
+ {D38F1866-2265-48A0-AD93-3A5E05C69163}.Release (MPI)|Win32.ActiveCfg = Release (MPI)|Win32
+ {D38F1866-2265-48A0-AD93-3A5E05C69163}.Release (MPI)|Win32.Build.0 = Release (MPI)|Win32
+ {D38F1866-2265-48A0-AD93-3A5E05C69163}.Release (MPI)|x64.ActiveCfg = Release|x64
+ {D38F1866-2265-48A0-AD93-3A5E05C69163}.Release (MPI)|x64.Build.0 = Release|x64
+ {D38F1866-2265-48A0-AD93-3A5E05C69163}.Release|Win32.ActiveCfg = Release|Win32
+ {D38F1866-2265-48A0-AD93-3A5E05C69163}.Release|Win32.Build.0 = Release|Win32
+ {D38F1866-2265-48A0-AD93-3A5E05C69163}.Release|x64.ActiveCfg = Release|x64
+ {D38F1866-2265-48A0-AD93-3A5E05C69163}.Release|x64.Build.0 = Release|x64
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+EndGlobal
diff --git a/src/arrays.c b/src/arrays.c
new file mode 100644
index 0000000..d7e1f77
--- /dev/null
+++ b/src/arrays.c
@@ -0,0 +1,86 @@
+
+#include "arrays.h"
+
+bool arrayReadBits(array_type *a, unsigned int size, unsigned int *out)
+{
+ unsigned int output, bits_offset, bytes_offset;
+ unsigned char *bp;
+
+ if(size > 32) return false;
+
+ if((a->bytes_offset+(a->bits_offset+size)/8+((((a->bits_offset+size)%8)>0)?1:0)) > a->bytes_size)
+ return false;
+
+ bp = a->bp;
+ bits_offset = a->bits_offset;
+ bytes_offset = a->bytes_offset;
+ output = 0;
+
+ while(size > 0) {
+ unsigned int this_step;
+
+ if(size > (8-bits_offset))
+ this_step = 8-bits_offset;
+ else
+ this_step = size;
+ size -= this_step;
+ bits_offset += this_step;
+
+ output = (output << this_step)+((unsigned int)(*bp) >> (8-bits_offset))%(1<bp = bp;
+ a->bits_offset = bits_offset;
+ a->bytes_offset = bytes_offset;
+
+ return true;
+}
+
+bool arrayWriteBits(array_type *a, unsigned int size, unsigned int out)
+{
+ unsigned int bits_offset, bytes_offset;
+ unsigned char *bp;
+
+ if(size > 32) return false;
+
+ if((a->bytes_offset+(a->bits_offset+size)/8+((((a->bits_offset+size)%8)>0)?1:0)) > a->bytes_size)
+ return false;
+
+ bp = a->bp;
+ bits_offset = a->bits_offset;
+ bytes_offset = a->bytes_offset;
+
+ while(size > 0) {
+ unsigned int this_step;
+
+ if(size > (8-bits_offset))
+ this_step = 8-bits_offset;
+ else
+ this_step = size;
+ size -= this_step;
+ bits_offset += this_step;
+
+ *bp += ( (out>>size)%(1<> (8-bits_offset))%(1<bp = bp;
+ a->bits_offset = bits_offset;
+ a->bytes_offset = bytes_offset;
+
+ return true;
+}
\ No newline at end of file
diff --git a/src/arrays.h b/src/arrays.h
new file mode 100644
index 0000000..cf4496c
--- /dev/null
+++ b/src/arrays.h
@@ -0,0 +1,18 @@
+
+#ifndef _ARRAYS_H
+#define _ARRAYS_H
+
+#include
+
+typedef struct {
+ unsigned char *bytes;
+ unsigned char *bp;
+ unsigned int bytes_offset;
+ unsigned int bits_offset;
+ unsigned int bytes_size;
+} array_type;
+
+extern bool arrayReadBits(array_type *a, unsigned int size, unsigned int *out);
+extern bool arrayWriteBits(array_type *a, unsigned int size, unsigned int out);
+
+#endif
diff --git a/src/compress.c b/src/compress.c
new file mode 100644
index 0000000..174c0b2
--- /dev/null
+++ b/src/compress.c
@@ -0,0 +1,154 @@
+
+#include
+#include
+#include
+#include
+#include
+
+#ifdef I_USE_MPI
+#include
+#define RETURN_FROM_MAIN(v) {MPI_Abort(MPI_COMM_WORLD,v);return(v);}
+#else
+#define RETURN_FROM_MAIN(v) {return(v);}
+#endif
+
+#include "tga_load.h"
+#include "image_misc.h"
+#include "fi_save.h"
+
+int main(int argc, char **argv)
+{
+ char *input_filename, *output_filename;
+ image_pc_type image_pc;
+ image_uc_type image_uc;
+ unsigned int i, blocksize, nof_threads, rms_error, worst_diff;
+ char *program_name;
+#ifdef I_USE_MPI
+ int my_rank, nof_procs;
+#endif
+
+#ifdef I_USE_MPI
+ MPI_Init(0, 0);
+
+ MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+ MPI_Comm_size(MPI_COMM_WORLD, &nof_procs);
+
+ printf("Using MPI. Program rank is %d+1/%d\n", my_rank, nof_procs);
+#endif
+
+ program_name = strrchr(argv[0], '\\');
+
+ if(program_name == 0)
+ program_name = strrchr(argv[0], '/');
+
+ if(program_name == 0)
+ program_name = argv[0];
+ else
+ program_name++;
+
+ // Чтение параметров командной строки
+ if(argc < 3) {
+ printf("\t%s input.tga output.fi [block_size=8 [nof_threads=4 [rms_error=256]]]\n\n"
+ "\t\tYou must specify input and output filenames\n", program_name);
+
+ RETURN_FROM_MAIN(0);
+ }
+
+ input_filename = argv[1];
+ output_filename = argv[2];
+ if(argc > 3) {
+ blocksize = abs(atoi(argv[3]));
+ if(blocksize < 2)
+ blocksize = 2;
+ if(blocksize > IMAGE_MAXBLOCKSIZE)
+ blocksize = IMAGE_MAXBLOCKSIZE;
+
+ printf("Default block size changed to %d\n", blocksize);
+ } else
+ blocksize = 8;
+
+ if(argc > 4) {
+ nof_threads = abs(atoi(argv[4]));
+ if(nof_threads == 0) nof_threads = 1;
+ } else {
+#ifdef SINGLE_THREAD_BY_DEFAULT
+ nof_threads = 1;
+#else
+ nof_threads = 4;
+#endif
+ }
+ printf("Number of threads set to %d\n", nof_threads);
+
+ if(argc > 5)
+ rms_error = abs(atoi(argv[5]));
+ else
+ rms_error = 256;
+ worst_diff = rms_error*rms_error*blocksize*blocksize;
+ printf("RMS error %d, worst difference for first block size will be %d\n", rms_error, worst_diff);
+
+ // Чтение исходного изображения
+ switch(tgaLoad(input_filename, &image_pc)) {
+ case TGALOAD_OKAY:
+ break;
+ case TGALOAD_CANTOPENFILE:
+ printf("Can\'t open %s\n", input_filename);
+ RETURN_FROM_MAIN(1);
+ case TGALOAD_DAMAGEDFILE:
+ printf("Can\'t open %s, file is damaged\n", input_filename);
+ RETURN_FROM_MAIN(1);
+ case TGALOAD_UNSUPPORTEDFILETYPE:
+ printf("Can\'t open %s, unsupported file type\n", input_filename);
+ RETURN_FROM_MAIN(1);
+ case TGALOAD_MEMORYALLOCERROR:
+ printf("Can\'t open %s, memory allocation error\n", input_filename);
+ RETURN_FROM_MAIN(1);
+ default:
+ printf("Undefined error while opening %s\n", input_filename);
+ RETURN_FROM_MAIN(1);
+ }
+
+ if((image_pc.w%blocksize) != 0 || (image_pc.h%blocksize) != 0 || image_pc.w < (blocksize*2) || image_pc.h < (blocksize*2)) {
+ printf("Image sizes must be divisions of %d and greater than %d\n", blocksize, blocksize*2);
+ RETURN_FROM_MAIN(2);
+ }
+
+ if(!UnpackChannels(&image_pc, &image_uc)) {
+ printf("Can\'t unpack image channels\n");
+ RETURN_FROM_MAIN(3);
+ }
+
+ free(image_pc.data);
+
+ BGRtoYCBCR(&image_uc);
+
+ // Сохранение результирующего изображения
+ switch(fiSave(output_filename, &image_uc, blocksize, nof_threads, worst_diff)) {
+ case FISAVE_OKAY:
+ break;
+ case FISAVE_CANTOPENFILE:
+ printf("Can\'t save %s\n", output_filename);
+ RETURN_FROM_MAIN(4);
+ case FISAVE_WRONGIMAGESIZE:
+ printf("Can\'t save %s, wrong image size\n", output_filename);
+ RETURN_FROM_MAIN(4);
+ case FISAVE_DAMAGEDIMAGESTRUCT:
+ printf("Can\'t save %s, internal error, damaged image structure\n", output_filename);
+ RETURN_FROM_MAIN(4);
+ default:
+ printf("Undefined error while saving %s\n", output_filename);
+ RETURN_FROM_MAIN(4);
+ }
+
+ for(i = 0; i < image_uc.nof_channels; i++)
+ free(image_uc.data[i]);
+ free(image_uc.data);
+
+#ifdef I_USE_MPI
+ printf("===\n\trank %d finished\n===\n", my_rank);
+
+ MPI_Barrier(MPI_COMM_WORLD);
+ MPI_Finalize();
+#endif
+
+ return 0;
+}
diff --git a/src/cpu_driven/fi_save_blocks_search_workers.c b/src/cpu_driven/fi_save_blocks_search_workers.c
new file mode 100644
index 0000000..7e13028
--- /dev/null
+++ b/src/cpu_driven/fi_save_blocks_search_workers.c
@@ -0,0 +1,204 @@
+
+#include
+#include
+
+#include "../image_misc.h"
+#include "../fi_save_blocks_search_workers.h"
+
+static void fiFindBestDomainBlock(unsigned int rx, unsigned int ry,
+ image_rangeblock_type *block_p, unsigned int blocksize,
+ unsigned int w, unsigned int h,
+ unsigned char *scaled_data_p,
+ unsigned char *data_p,
+ unsigned int worst_diff
+ )
+{
+ unsigned int best_diff;
+ long long range_sum, domain_sum, rd_sum; // Сумма пикселей рангового блока, доменного блока, пикселя рангового на пиксель доменного блока
+ long long domain_disp;
+ unsigned int l, m, s, t, tr;
+ unsigned char rotated_data_p[8*IMAGE_MAXBLOCKSIZE*IMAGE_MAXBLOCKSIZE];
+
+ for(l = 0; l < blocksize; l++) {
+ memcpy(rotated_data_p+blocksize*l, data_p+w*(ry+l)+rx, blocksize);
+ }
+ for(l = 1; l < 8; l++) {
+ ApplyReversedTransformToRangeBlock(rotated_data_p, rotated_data_p+l*blocksize*blocksize, l, blocksize);
+ }
+
+ range_sum = 0;
+ for(l = 0; l < blocksize; l++)
+ for(m = 0; m < blocksize; m++) {
+ range_sum += rotated_data_p[l*blocksize+m];
+ }
+ block_p->u = 0; block_p->v = (short)( ((range_sum/(blocksize*blocksize))*63+128)/255 ); // Значения на случай, если совсем ни один блок не подойдёт
+ best_diff = 0; // Вообще, тут можно поставить любое число > 255*255*blocksize*blocksize=4161600(для 8), но мы найдём разницу для v и u выше
+ for(l = 0; l < blocksize; l++) {
+ for(m = 0; m < blocksize; m++) {
+ int br = rotated_data_p[l*blocksize+m];
+ int mult;
+
+ mult = (block_p->v*255/63-br);
+ best_diff += mult*mult;
+ }
+ }
+ //printf("range_disp %d, range_sum %d\n", range_disp, range_sum);
+
+ for(s = 0; s <= h/2-blocksize; s++) {
+ for(t = 0; t <= w/2-blocksize; t++) {
+ domain_disp = domain_sum = 0;
+ for(l = 0; l < blocksize; l++) {
+ for(m = 0; m < blocksize; m++) {
+ int bd = scaled_data_p[(s+l)*w/2+t+m];
+
+ domain_sum += bd;
+ domain_disp += bd*bd;
+ }
+ }
+ domain_disp = domain_disp*(int)(blocksize*blocksize)-domain_sum*domain_sum;
+
+ for(tr = 0; tr < 8; tr++) {
+ unsigned int cur_diff = 0;
+ int u_int, v_int;
+
+ if(domain_disp == 0) {
+ u_int = 0;
+ v_int = (int)( ((range_sum/(blocksize*blocksize))*63+128)/255 );
+ } else {
+ rd_sum = 0;
+ for(l = 0; l < blocksize; l++) {
+ for(m = 0; m < blocksize; m++) {
+ int br = rotated_data_p[tr*blocksize*blocksize+l*blocksize+m];
+ int bd = scaled_data_p[(s+l)*w/2+t+m];
+
+ rd_sum += br*bd;
+ }
+ }
+
+ u_int = (int)( 32*((int)(blocksize*blocksize)*rd_sum-range_sum*domain_sum)/domain_disp ); // яркость в интервале [0, 1.0), т.е. от [0 до 32)
+ // av_u += u_int; nof_u++;
+ //if(u_int < 0) {/*printf("u = %f\n", (float)(u_int/51.0));*/u_int = -u_int;} if(u_int > 255) {/*printf("u = %f\n", (float)(u_int/51.0));*/u_int = 255;}
+ //if(u_int < 0 || u_int > 255) continue;
+ if(u_int < 0) u_int = 0; if(u_int > 31) u_int = 31;
+ v_int = (int)( (range_sum-domain_sum*u_int/32)/(int)(blocksize*blocksize) );
+ if(v_int < -255 || v_int > 255) continue;
+ if(v_int < 0)
+ v_int = (v_int*63-128)/255;
+ else
+ v_int = (v_int*63+128)/255;
+
+ //if(v_int < -255) v_int = -255; if(v_int > 255) v_int = 255;
+ //printf("resulted u %d v %d\n", u_int, v_int);
+ }
+
+ for(l = 0; l < blocksize; l++) {
+ for(m = 0; m < blocksize; m++) {
+ int br = rotated_data_p[tr*blocksize*blocksize+l*blocksize+m];
+ int bd = scaled_data_p[(s+l)*w/2+t+m];
+ int mult;
+
+ mult = (bd*u_int/32+v_int*255/63-br);
+ cur_diff += mult*mult;
+ }
+ }
+ //printf("%d < %d\n", cur_diff, best_diff);
+ if(cur_diff < best_diff) {
+ best_diff = cur_diff;
+ block_p->dx = t;
+ block_p->dy = s;
+ block_p->tr = tr;
+ block_p->u = u_int;
+ block_p->v = v_int;
+ }
+
+ if(domain_disp == 0 || best_diff == 0)
+ break;
+ }
+ /*if(t%10 == 0)
+ printf("- %d dbs ok\n", i*max_dby*max_dbx+s*max_dbx+t);*/
+ if(best_diff == 0)
+ break;
+ }
+
+ if(best_diff == 0)
+ break;
+ }
+
+ if(best_diff > worst_diff && blocksize > 2 && blocksize%2 == 0) {
+ printf("best_diff %d/%d (=rms^2*blocksize^2), divided block size %d to %d\n", best_diff, worst_diff, blocksize, blocksize/2);
+
+ fiFindBestDomainBlock(rx, ry, // Позиция нового блока блока
+ (image_rangeblock_type *)block_p->divided_into, // Указатель на новый блок
+ blocksize/2, // Уменьшаем размер блока на 2
+ w, h, scaled_data_p, data_p,
+ worst_diff/4); // Уменьшаем разницу на 4 (т.к. новый блок будет в 4 раза меньше)
+
+ fiFindBestDomainBlock(rx+blocksize/2, ry, // Позиция нового блока блока
+ (image_rangeblock_type *)block_p->divided_into+1, // Указатель на новый блок
+ blocksize/2, // Уменьшаем размер блока на 2
+ w, h, scaled_data_p, data_p,
+ worst_diff/4); // Уменьшаем разницу на 4 (т.к. новый блок будет в 4 раза меньше)
+
+ fiFindBestDomainBlock(rx, ry+blocksize/2, // Позиция нового блока блока
+ (image_rangeblock_type *)block_p->divided_into+2, // Указатель на новый блок
+ blocksize/2, // Уменьшаем размер блока на 2
+ w, h, scaled_data_p, data_p,
+ worst_diff/4); // Уменьшаем разницу на 4 (т.к. новый блок будет в 4 раза меньше)
+
+ fiFindBestDomainBlock(rx+blocksize/2, ry+blocksize/2, // Позиция нового блока блока
+ (image_rangeblock_type *)block_p->divided_into+3, // Указатель на новый блок
+ blocksize/2, // Уменьшаем размер блока на 2
+ w, h, scaled_data_p, data_p,
+ worst_diff/4); // Уменьшаем разницу на 4 (т.к. новый блок будет в 4 раза меньше)
+
+ block_p->is_divided = true;
+ }
+}
+
+#ifdef WIN32
+unsigned int __stdcall fiWorkerFunc(void *arg)
+#else
+void *fiWorkerFunc(void *arg)
+#endif
+{
+ unsigned int i;
+ unsigned int rx, ry, channel;
+
+ unsigned char *scaled_data = ((fi_worker_arg_type *)arg)->scaled_data;
+ unsigned char **data = ((fi_worker_arg_type *)arg)->data;
+ image_rangeblock_type *block_start_p = ((fi_worker_arg_type *)arg)->block_p, *block_p;
+ unsigned int nof_blocks_per_channel = ((fi_worker_arg_type *)arg)->nof_blocks_per_channel;
+ unsigned int w = ((fi_worker_arg_type *)arg)->w;
+ unsigned int h = ((fi_worker_arg_type *)arg)->h;
+ unsigned int nof_channels = ((fi_worker_arg_type *)arg)->nof_channels;
+ unsigned int alloc_memory = ((fi_worker_arg_type *)arg)->alloc_memory;
+ unsigned int blocksize = ((fi_worker_arg_type *)arg)->blocksize;
+ unsigned int worker_start = ((fi_worker_arg_type *)arg)->worker_start;
+ unsigned int worker_step = ((fi_worker_arg_type *)arg)->worker_step;
+ unsigned int worst_diff = ((fi_worker_arg_type *)arg)->worst_diff;
+
+ unsigned int nof_blocks_per_image = nof_blocks_per_channel*nof_channels;
+
+ block_p = block_start_p+worker_start;
+
+ for(i = worker_start; i < nof_blocks_per_image; i += worker_step) {
+ channel = i / nof_blocks_per_channel;
+ rx = ((i % nof_blocks_per_channel) % (w/blocksize)) * blocksize;
+ ry = ((i % nof_blocks_per_channel) / (w/blocksize)) * blocksize;
+
+ if(((i-worker_start)/worker_step)%10 == 0)
+ printf("worker %d %d rbs (%f) ok\n", worker_start+1, (i-worker_start)/worker_step,
+ (float)( 100.0 * (double)((i-worker_start)/worker_step) / (double)((nof_blocks_per_image-worker_start-1)/worker_step+1) ));
+
+ fiFindBestDomainBlock(rx, ry,
+ block_p, blocksize,
+ w, h,
+ scaled_data+channel*alloc_memory/4,
+ data[channel],
+ worst_diff);
+
+ block_p += worker_step;
+ }
+
+ return 0;
+}
diff --git a/src/decompress.c b/src/decompress.c
new file mode 100644
index 0000000..a6a1115
--- /dev/null
+++ b/src/decompress.c
@@ -0,0 +1,93 @@
+
+#include
+#include
+#include
+
+#include "fi_load.h"
+#include "image_misc.h"
+#include "tga_save.h"
+
+int main(int argc, char **argv)
+{
+ char *input_filename, *output_filename;
+ image_pc_type image_pc;
+ image_uc_type image_uc;
+ unsigned int i, scale;
+ char *program_name;
+
+ program_name = strrchr(argv[0], '\\');
+
+ if(program_name == 0)
+ program_name = strrchr(argv[0], '/');
+
+ if(program_name == 0)
+ program_name = argv[0];
+ else
+ program_name++;
+
+ if(argc < 3) {
+ printf("\t%s input.fi output.tga [scale=1]\n\n"
+ "\t\tYou must specify input and output filenames\n", program_name);
+
+ return 0;
+ }
+ input_filename = argv[1];
+ output_filename = argv[2];
+ if(argc > 3) {
+ scale = abs(atoi(argv[3]));
+ if(scale < 1)
+ scale = 1;
+
+ printf("Scale changed to %u\n", scale);
+ } else
+ scale = 1;
+
+ switch(fiLoad(input_filename, &image_uc, scale)) {
+ case FILOAD_OKAY:
+ break;
+ case FILOAD_CANTOPENFILE:
+ printf("Can\'t open %s\n", input_filename);
+ return 1;
+ case FILOAD_DAMAGEDFILE:
+ printf("Can\'t open %s, file is damaged\n", input_filename);
+ return 1;
+ case FILOAD_MEMORYALLOCERROR:
+ printf("Can\'t open %s, memory allocation error\n", input_filename);
+ return 1;
+ default:
+ printf("Undefined error while opening %s\n", input_filename);
+ return 1;
+ }
+
+ YCBCRtoBGR(&image_uc);
+
+ if(!PackChannels(&image_uc, &image_pc)) {
+ printf("Can\'t unpack image channels\n");
+ return 3;
+ }
+
+ for(i = 0; i < image_uc.nof_channels; i++)
+ free(image_uc.data[i]);
+ free(image_uc.data);
+
+ switch(tgaSave(output_filename, &image_pc)) {
+ case TGASAVE_OKAY:
+ break;
+ case TGASAVE_CANTOPENFILE:
+ printf("Can\'t save %s\n", output_filename);
+ return 4;
+ case TGASAVE_DAMAGEDIMAGESTRUCT:
+ printf("Can\'t save %s, internal error, damaged image structure\n", output_filename);
+ return 4;
+ case TGASAVE_TOOMANYCHANNELS:
+ printf("Can\'t save %s, too many channels in input file\n", output_filename);
+ return 4;
+ default:
+ printf("Undefined error while saving %s\n", output_filename);
+ return 4;
+ }
+
+ free(image_pc.data);
+
+ return 0;
+}
diff --git a/src/fi_file.h b/src/fi_file.h
new file mode 100644
index 0000000..8faa637
--- /dev/null
+++ b/src/fi_file.h
@@ -0,0 +1,15 @@
+
+#ifndef _FI_FILE_H
+#define _FI_FILE_H
+
+#define FI_SIGN 0xC0F1
+
+typedef struct {
+ unsigned short sign; // Сигнатура
+ unsigned short blocksize; // Размер блока. if(blocksize == 0) blocksize = 8;
+ unsigned int w;
+ unsigned int h;
+ unsigned int noc; // Количество каналов
+} FI_HEADER;
+
+#endif
diff --git a/src/fi_load.c b/src/fi_load.c
new file mode 100644
index 0000000..f67f0be
--- /dev/null
+++ b/src/fi_load.c
@@ -0,0 +1,469 @@
+
+#include
+#include
+#include
+
+#include "fi_file.h"
+#include "fi_load.h"
+#include "image_misc.h"
+#include "arrays.h"
+
+static size_t GetFileLength(FILE *f)
+{
+ size_t cur_pos, file_len;
+
+ cur_pos = ftell(f);
+ fseek(f, 0, SEEK_END);
+ file_len = ftell(f);
+ fseek(f, cur_pos, SEEK_SET);
+
+ return file_len;
+}
+
+static bool fiDecompressBlocks(unsigned char *cb, unsigned int cb_size, image_rangeblock_type *b, unsigned int w, unsigned int h, unsigned int blocksize, unsigned int channels, unsigned int scale, unsigned int dxy_bitssize);
+static unsigned int fiCoverRangeBlock(image_rangeblock_type *block_p, unsigned int blocksize, unsigned char *range_p, unsigned char *prev_data, unsigned int w, unsigned int h);
+
+int fiLoad(char *fname, image_uc_type *image, unsigned int scale)
+{
+ FILE *f;
+ FI_HEADER head;
+ size_t file_len;
+ unsigned int i, j, k, channels, alloc_memory, nof_blocks, blocksize, compressed_blocks_size;
+ unsigned char *prev_data; // Данные об изображении на предыдущей итерации
+ unsigned char *compressed_blocks;
+ unsigned int dxy_bitssize, cblock_bitssize; // Размеры dx, dy и всего блока в битах
+ image_rangeblock_type *blocks;
+
+ if(scale < 1)
+ scale = 1;
+
+ f = fopen(fname, "rb");
+
+ if(!f)
+ return FILOAD_CANTOPENFILE;
+
+ file_len = GetFileLength(f);
+
+ if(fread(&head, sizeof(FI_HEADER), 1, f) != 1)
+ return FILOAD_DAMAGEDFILE;
+
+ if(head.sign != FI_SIGN)
+ return FILOAD_DAMAGEDFILE;
+
+ if(head.blocksize < 2)
+ return FILOAD_DAMAGEDFILE;
+
+ blocksize = head.blocksize;
+
+ if((head.w%blocksize) != 0 || (head.h%blocksize) != 0 || head.w < (blocksize*2) || head.h < (blocksize*2)) {
+ return FILOAD_DAMAGEDFILE;
+ }
+
+ GetDxyBitsSize(head.w, head.h, &dxy_bitssize);
+ cblock_bitssize = GetCblockBitsSize(dxy_bitssize);
+
+ image->w = head.w*scale;
+ image->h = head.h*scale;
+ image->nof_channels = channels = head.noc;
+ image->alloc_memory = alloc_memory = image->w*image->h;
+ blocksize *= scale;
+ nof_blocks = image->w*image->h*image->nof_channels/(blocksize*blocksize);
+
+ // Тут начинается код выделения памяти
+ compressed_blocks = malloc((cblock_bitssize*image->w*image->h*image->nof_channels/4+nof_blocks*(blocksize*blocksize-1)/3)/8+1);
+ printf("compressed_blocks = malloc(%u)\n", (cblock_bitssize*image->w*image->h*image->nof_channels/4+nof_blocks*(blocksize*blocksize-1)/3)/8+1);
+ // Из рассчёта, что все блоки 2х2 и имеют размер cblock_bitssize бит
+ // + добавляем дополнительные биты разбиения блоков
+ // Каждый блок может быть разбит на 4. Т.е. для блока размера n (если n - степень двойки) у нас есть s=1+4+16+32+...+pow(4, log2(n)-1)
+ // Т.е. s = 1*(1-pow(4,log2(n)))/(1-4) = (pow(2*2, log2(n))-1)/3 = (n*n-1)/3
+ // Для n не в степени двойки (т.е. вида pow(2,x)*y) будем иметь s=1+4+16+32+...+pow(4, log2(n/y))
+ // Т.е. s = 1*(1-pow(4,log2(n/y)))/(1-4) = (pow(2*2, log2(n/y))-1)/3 = ((n/y)*(n/y)-1)/3, что меньше чем (n*n-1)/3.
+ // Таким образом, достаточно рассмотреть случай, когда n в степени двойки.
+ // В нашем случае получаем nof_blocks*(blocksize*blocksize-1)/3 бит
+ // + 1 байт на случай, если количество бит не кратно 8
+ if(!compressed_blocks)
+ return FILOAD_MEMORYALLOCERROR;
+
+ // Немного кода для чтения блоков из файла
+ compressed_blocks_size = file_len-sizeof(FI_HEADER);
+ printf("compressed_blocks_size %u\n", compressed_blocks_size);
+ if(fread(compressed_blocks, 1, compressed_blocks_size, f) != compressed_blocks_size) {
+ free(compressed_blocks);
+
+ return FILOAD_DAMAGEDFILE;
+ }
+ fclose(f);
+
+ // И снова начинается код выделения памяти
+ blocks = malloc(sizeof(image_rangeblock_type)*nof_blocks*(blocksize*blocksize-1)/3); // (blocksize*blocksize-1)/3 - количество разбиений одного блока в худшем случае
+ printf("blocks = malloc(%u)\n", (unsigned int)(sizeof(image_rangeblock_type)*nof_blocks*(blocksize*blocksize-1)/3));
+ if(!blocks) {
+ free(compressed_blocks);
+ return FILOAD_MEMORYALLOCERROR;
+ }
+ SetBlocksPointers(blocks, blocksize, nof_blocks, image->w, image->h);
+
+ prev_data = malloc(alloc_memory/4);
+ printf("prev_data = malloc(%u)\n", alloc_memory/4);
+ if(!prev_data) {
+ free(compressed_blocks);
+ free(blocks);
+ return FILOAD_MEMORYALLOCERROR;
+ }
+
+ image->data = malloc(channels*sizeof(void *));
+ printf("image->data = malloc(%u)\n", (unsigned int)(channels*sizeof(void *)));
+ if(!image->data) {
+ free(compressed_blocks);
+ free(blocks);
+ free(prev_data);
+ return FILOAD_MEMORYALLOCERROR;
+ }
+
+ for(i = 0; i < channels; i++) {
+ image->data[i] = malloc(alloc_memory);
+ printf("image->data[%u] = malloc(%u)\n", i, alloc_memory);
+ if(!image->data[i]) {
+ unsigned int j;
+
+ for(j = 0; j < i; j++)
+ free(image->data[j]);
+ free(image->data);
+ free(compressed_blocks);
+ free(blocks);
+ free(prev_data);
+
+ return FILOAD_MEMORYALLOCERROR;
+ }
+ memset(image->data[i], 0, alloc_memory);
+ }
+ // А тут он заканчивается
+
+ if(!fiDecompressBlocks(compressed_blocks, compressed_blocks_size, blocks, head.w, head.h, head.blocksize, head.noc, scale, dxy_bitssize)) {
+ free(compressed_blocks);
+ free(blocks);
+ free(prev_data);
+ for(i = 0; i < channels; i++)
+ free(image->data[i]);
+ free(image->data);
+
+ return FILOAD_DAMAGEDFILE;
+ }
+ free(compressed_blocks);
+
+ for(i = 0; i < channels; i++) {
+ unsigned char *data_p;
+ unsigned int nof_iterations = 0;
+
+ data_p = image->data[i];
+
+ while(1) {
+ unsigned long long diff = 0;
+ image_rangeblock_type *block_p;
+
+ block_p = blocks+i*nof_blocks/image->nof_channels;
+ Scale2to1(data_p, prev_data, image->w, image->h);
+ for(j = 0; j < image->h/blocksize; j++) {
+ for(k = 0; k < image->w/blocksize; k++) {
+ unsigned char *range_p;
+
+ range_p = &data_p[j*blocksize*image->w+k*blocksize];
+
+ diff += fiCoverRangeBlock(block_p, blocksize, range_p, prev_data, image->w, image->h);
+
+ block_p++;
+ }
+ }
+
+ diff /= image->w*image->h;
+ printf("diff %llu\n", diff);
+ nof_iterations++;
+ if(nof_iterations >= 1024 || diff == 0)
+ break;
+ }
+ printf("\nchannel %d completed after %d iterations\n", i+1, nof_iterations);
+ }
+
+ free(blocks);
+ free(prev_data);
+
+ return FILOAD_OKAY;
+}
+
+static unsigned int fiCoverRangeBlock(image_rangeblock_type *block_p, unsigned int blocksize, unsigned char *range_p, unsigned char *prev_data, unsigned int w, unsigned int h)
+{
+ unsigned int l, m;
+ unsigned int diff = 0;
+
+ //printf("%d", block_p->tr);
+ if(block_p->is_divided) {
+ //printf("Quadtrees unimplemented in decoder!11\n");
+ //range_p = &data_p[j*blocksize*image->w+k*blocksize];
+ //domain_p = &prev_data[block_p->dy*image->w/2+block_p->dx];
+
+ diff += fiCoverRangeBlock((image_rangeblock_type *)block_p->divided_into, blocksize/2, range_p, prev_data, w, h);
+ diff += fiCoverRangeBlock((image_rangeblock_type *)block_p->divided_into+1, blocksize/2, range_p+blocksize/2, prev_data, w, h);
+ diff += fiCoverRangeBlock((image_rangeblock_type *)block_p->divided_into+2, blocksize/2, range_p+w*blocksize/2, prev_data, w, h);
+ diff += fiCoverRangeBlock((image_rangeblock_type *)block_p->divided_into+3, blocksize/2, range_p+(w+1)*blocksize/2, prev_data, w, h);
+ } else {
+ unsigned char *domain_p;
+
+ domain_p = &prev_data[block_p->dy*w/2+block_p->dx];
+
+ switch(block_p->tr) {
+ case IMAGE_RB_TRANSFORMATION_NONE:
+ for(l = 0; l < blocksize; l++) {
+ for(m = 0; m < blocksize; m++) {
+ int loc_diff, res = (int)(*domain_p)*(int)block_p->u/32+(int)block_p->v;
+
+
+ if(res < 0) res = 0;if(res > 255) res = 255;
+
+ loc_diff = (int)(*range_p)-res;
+ diff += loc_diff*loc_diff;
+
+ *range_p = res;
+
+ range_p++;
+ domain_p++;
+ }
+
+ range_p += w-blocksize;
+ domain_p += w/2-blocksize;
+ }
+
+ break;
+ case IMAGE_RB_TRANSFORMATION_90RIGHT:
+ for(l = 0; l < blocksize; l++) {
+ for(m = 0; m < blocksize; m++) {
+ int loc_diff, res = (int)(domain_p[(blocksize-1-m)*w/2+l])*(int)block_p->u/32+(int)block_p->v;
+
+ if(res < 0) res = 0;if(res > 255) res = 255;
+
+ loc_diff = (int)(*range_p)-res;
+ diff += loc_diff*loc_diff;
+
+ *range_p = res;
+
+ range_p++;
+ }
+
+ range_p += w-blocksize;
+ }
+
+ break;
+ case IMAGE_RB_TRANSFORMATION_180RIGHT:
+ for(l = 0; l < blocksize; l++) {
+ for(m = 0; m < blocksize; m++) {
+ int loc_diff, res = (int)(domain_p[(blocksize-1-l)*w/2+blocksize-1-m])*(int)block_p->u/32+(int)block_p->v;
+
+ if(res < 0) res = 0;if(res > 255) res = 255;
+
+ loc_diff = (int)(*range_p)-res;
+ diff += loc_diff*loc_diff;
+
+ *range_p = res;
+
+ range_p++;
+ }
+
+ range_p += w-blocksize;
+ }
+
+ break;
+ case IMAGE_RB_TRANSFORMATION_270RIGHT:
+ for(l = 0; l < blocksize; l++) {
+ for(m = 0; m < blocksize; m++) {
+ int loc_diff, res = (int)(domain_p[m*w/2+blocksize-1-l])*(int)block_p->u/32+(int)block_p->v;
+
+ if(res < 0) res = 0;if(res > 255) res = 255;
+
+ loc_diff = (int)(*range_p)-res;
+ diff += loc_diff*loc_diff;
+
+ *range_p = res;
+
+ range_p++;
+ }
+
+ range_p += w-blocksize;
+ }
+ break;
+ case IMAGE_RB_TRANSFORMATION_VFLIP:
+ for(l = 0; l < blocksize; l++) {
+ for(m = 0; m < blocksize; m++) {
+ int loc_diff, res = (int)(domain_p[(blocksize-1-l)*w/2+m])*(int)block_p->u/32+(int)block_p->v;
+
+ if(res < 0) res = 0;if(res > 255) res = 255;
+
+ loc_diff = (int)(*range_p)-res;
+ diff += loc_diff*loc_diff;
+
+ *range_p = res;
+
+ range_p++;
+ }
+
+ range_p += w-blocksize;
+ }
+ break;
+ case IMAGE_RB_TRANSFORMATION_HFLIP:
+ for(l = 0; l < blocksize; l++) {
+ for(m = 0; m < blocksize; m++) {
+ int loc_diff, res = (int)(domain_p[l*w/2+blocksize-1-m])*(int)block_p->u/32+(int)block_p->v;
+
+ if(res < 0) res = 0;if(res > 255) res = 255;
+
+ loc_diff = (int)(*range_p)-res;
+ diff += loc_diff*loc_diff;
+
+ *range_p = res;
+
+ range_p++;
+ }
+
+ range_p += w-blocksize;
+ }
+
+ break;
+ case IMAGE_RB_TRANSFORMATION_MAINDIAGFLIP:
+ for(l = 0; l < blocksize; l++) {
+ for(m = 0; m < blocksize; m++) {
+ int loc_diff, res = (int)(domain_p[m*w/2+l])*(int)block_p->u/32+(int)block_p->v;
+
+ if(res < 0) res = 0;if(res > 255) res = 255;
+
+ loc_diff = (int)(*range_p)-res;
+ diff += loc_diff*loc_diff;
+
+ *range_p = res;
+
+ range_p++;
+ }
+
+ range_p += w-blocksize;
+ }
+
+ break;
+ case IMAGE_RB_TRANSFORMATION_ANTIDIAGFLIP:
+ for(l = 0; l < blocksize; l++) {
+ for(m = 0; m < blocksize; m++) {
+ int loc_diff, res = (int)(domain_p[(blocksize-1-m)*w/2+blocksize-1-l])*(int)block_p->u/32+(int)block_p->v;
+
+ if(res < 0) res = 0;if(res > 255) res = 255;
+
+ loc_diff = (int)(*range_p)-res;
+ diff += loc_diff*loc_diff;
+
+ *range_p = res;
+
+ range_p++;
+ }
+
+ range_p += w-blocksize;
+ }
+
+ break;
+ }
+ }
+
+ return diff;
+}
+
+static bool fiDecompressOneBlock(array_type *cbarray, image_rangeblock_type *bp, unsigned int blocksize, unsigned int w, unsigned int h, unsigned int scale, unsigned int dxy_bitssize)
+{
+ unsigned int is_block_divided, dxy, tr, v, u;
+
+ if(((blocksize % 2) != 1) && (blocksize > 3)) {
+ if(!arrayReadBits(cbarray, 1, &is_block_divided))
+ return false;
+ } else
+ is_block_divided = 0;
+
+ if(is_block_divided) {
+ if(!fiDecompressOneBlock(cbarray, (image_rangeblock_type *)bp->divided_into, blocksize/2, w, h, scale, dxy_bitssize))
+ return false;
+
+ if(!fiDecompressOneBlock(cbarray, (image_rangeblock_type *)bp->divided_into+1, blocksize/2, w, h, scale, dxy_bitssize))
+ return false;
+
+ if(!fiDecompressOneBlock(cbarray, (image_rangeblock_type *)bp->divided_into+2, blocksize/2, w, h, scale, dxy_bitssize))
+ return false;
+
+ if(!fiDecompressOneBlock(cbarray, (image_rangeblock_type *)bp->divided_into+3, blocksize/2, w, h, scale, dxy_bitssize))
+ return false;
+
+ bp->is_divided = true;
+ } else {
+ if(!arrayReadBits(cbarray, 5, &u))
+ return false;
+ bp->u = u;
+
+ if(bp->u > 0) {
+ if(!arrayReadBits(cbarray, 7, &v))
+ return false;
+ if(v & 64)
+ bp->v = -1;
+ else
+ bp->v = 1;
+ bp->v *= (int)v&63;
+ bp->v = (bp->v*255)/63;
+
+ if(!arrayReadBits(cbarray, dxy_bitssize, &dxy))
+ return false;
+
+ bp->dx = dxy%(w/2-1);
+ if(bp->dx > w/2-blocksize)
+ return false;
+ bp->dx *= scale;
+
+ bp->dy = dxy/(w/2-1);
+ if(bp->dy > h/2-blocksize)
+ return false;
+ bp->dy *= scale;
+
+ if(!arrayReadBits(cbarray, 3, &tr))
+ return false;
+ bp->tr = tr;
+ } else {
+ if(!arrayReadBits(cbarray, 6, &v))
+ return false;
+ bp->v = v&255;
+ bp->v = (bp->v*255)/63;
+
+ bp->dx = 0;
+ bp->dy = 0;
+ bp->tr = 0;
+ }
+
+ bp->is_divided = false;
+ }
+
+ return true;
+}
+
+static bool fiDecompressBlocks(unsigned char *cb, unsigned int cb_size, image_rangeblock_type *b, unsigned int w, unsigned int h, unsigned int blocksize, unsigned int channels, unsigned int scale, unsigned int dxy_bitssize)
+{
+ unsigned int i, j, k;
+ image_rangeblock_type *bp;
+ array_type cbarray;
+
+ cbarray.bp = cbarray.bytes = cb;
+ cbarray.bytes_offset = cbarray.bits_offset = 0;
+ cbarray.bytes_size = cb_size;
+
+ //memcpy(b, cb, nof_blocks*IMAGE_COMPRESSEDRB_SIZE);
+
+ bp = b;
+ for(i = 0; i < channels; i++) {
+ for(j = 0; j < h/blocksize; j++) {
+ for(k = 0; k < w/blocksize; k++) {
+ if(!fiDecompressOneBlock(&cbarray, bp, blocksize, w, h, scale, dxy_bitssize))
+ return false;
+ bp++;
+ }
+ }
+ }
+
+ return true;
+}
diff --git a/src/fi_load.h b/src/fi_load.h
new file mode 100644
index 0000000..f1983f9
--- /dev/null
+++ b/src/fi_load.h
@@ -0,0 +1,14 @@
+
+#ifndef _FI_LOAD_H
+#define _FI_LOAD_H
+
+#include "image.h"
+
+#define FILOAD_OKAY 0
+#define FILOAD_CANTOPENFILE 1
+#define FILOAD_DAMAGEDFILE 2
+#define FILOAD_MEMORYALLOCERROR 3
+
+extern int fiLoad(char *fname, image_uc_type *image, unsigned int scale);
+
+#endif
diff --git a/src/fi_save.c b/src/fi_save.c
new file mode 100644
index 0000000..7d564c6
--- /dev/null
+++ b/src/fi_save.c
@@ -0,0 +1,134 @@
+
+#include
+#include
+#include
+#include
+
+#ifdef I_USE_MPI
+#include
+#endif
+
+#include "arrays.h"
+#include "fi_file.h"
+#include "fi_save.h"
+#include "fi_save_blocks_compression.h"
+#include "fi_save_blocks_search.h"
+#include "image_misc.h"
+
+static int fiConvertImage(image_uc_type *image, fi_compresseddata_type *cdat, unsigned int blocksize, unsigned int nof_threads, unsigned int worst_diff);
+static int fiSaveCompressedToFile(char *fname, fi_compresseddata_type *cdat);
+
+int fiSave(char *fname, image_uc_type *image, unsigned int blocksize, unsigned int nof_threads, unsigned int worst_diff)
+{
+ int result;
+ fi_compresseddata_type cdat;
+ clock_t compression_time;
+#ifdef I_USE_MPI
+ int my_rank;
+#endif
+
+ compression_time = clock();
+ result = fiConvertImage(image, &cdat, blocksize, nof_threads, worst_diff);
+ compression_time = clock()-compression_time;
+ printf("compression time %d ms\n", (int)compression_time);
+
+ if(result != FISAVE_OKAY)
+ return result;
+
+#ifdef I_USE_MPI
+ MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+
+ if(my_rank == 0)
+ return fiSaveCompressedToFile(fname, &cdat);
+ else
+ return FISAVE_OKAY;
+#else
+ return fiSaveCompressedToFile(fname, &cdat);
+#endif
+}
+
+static int fiConvertImage(image_uc_type *image, fi_compresseddata_type *cdat, unsigned int blocksize, unsigned int nof_threads, unsigned int worst_diff)
+{
+ FI_HEADER head;
+ unsigned int result;
+ fi_data_type data;
+#ifdef I_USE_MPI
+ int my_rank, nof_procs;
+#endif
+
+ result = fiFindBestDomainBlocks(image, blocksize, nof_threads, worst_diff, &data);
+ if(result != FISAVE_OKAY)
+ return result;
+
+#ifdef I_USE_MPI
+ MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+ MPI_Comm_size(MPI_COMM_WORLD, &nof_procs);
+ if(nof_procs > 1) {
+ image_rangeblock_type *resulted_blocks = 0;
+
+ if(my_rank == 0) {
+ resulted_blocks = malloc(sizeof(image_rangeblock_type)*data.nof_blocks*(blocksize*blocksize-1)/3); // Почему такой размер - см. fi_save_blocks_search.c
+
+ if(!resulted_blocks) {
+ result = FISAVE_MEMORYALLOCERROR;
+ goto EXIT_STATE;
+ }
+ }
+
+ MPI_Barrier(MPI_COMM_WORLD);
+
+ // Склейка блоков. Почему такой размер - см. fi_save_blocks_search.c
+ MPI_Reduce(data.blocks, resulted_blocks, sizeof(image_rangeblock_type)*data.nof_blocks*(blocksize*blocksize-1)/3, MPI_BYTE, MPI_BOR, 0, MPI_COMM_WORLD);
+
+ if(my_rank == 0) {
+ free(data.blocks);
+ data.blocks = resulted_blocks;
+
+ // Пересчитываем указатели, так как при вызове MPI_Reduce
+ SetBlocksPointers(data.blocks, blocksize, data.nof_blocks, image->w, image->h);
+ }
+ }
+
+ if(my_rank > 0) {
+ cdat->len = 0;
+ result = FISAVE_OKAY;
+
+ goto EXIT_STATE;
+ }
+#endif
+
+ // Сжимаем полученные блоки для записи в файл
+ head.sign = FI_SIGN;
+ head.blocksize = blocksize;
+ head.noc = image->nof_channels;
+ head.w = image->w;
+ head.h = image->h;
+ result = fiCompressBlocksAndAddHeader(&head ,cdat, data.blocks, data.nof_blocks);
+
+#ifdef I_USE_MPI
+EXIT_STATE:
+#endif
+
+ free(data.blocks);
+
+ return result;
+}
+
+static int fiSaveCompressedToFile(char *fname, fi_compresseddata_type *cdat)
+{
+ FILE *f;
+
+ f = fopen(fname, "wb");
+ if(!f) {
+ free(cdat->data);
+ return FISAVE_CANTOPENFILE;
+ }
+
+ fwrite(cdat->data, 1, cdat->len, f);
+
+ fclose(f);
+ free(cdat->data);
+ cdat->len = 0;
+
+ return FISAVE_OKAY;
+}
diff --git a/src/fi_save.h b/src/fi_save.h
new file mode 100644
index 0000000..0f44d51
--- /dev/null
+++ b/src/fi_save.h
@@ -0,0 +1,15 @@
+
+#ifndef _FI_SAVE_H
+#define _FI_SAVE_H
+
+#include "image.h"
+
+#define FISAVE_OKAY 0
+#define FISAVE_CANTOPENFILE 1
+#define FISAVE_WRONGIMAGESIZE 2
+#define FISAVE_DAMAGEDIMAGESTRUCT 3
+#define FISAVE_MEMORYALLOCERROR 4
+
+extern int fiSave(char *fname, image_uc_type *image, unsigned int blocksize, unsigned int nof_threads, unsigned int worst_diff);
+
+#endif
diff --git a/src/fi_save_blocks_compression.c b/src/fi_save_blocks_compression.c
new file mode 100644
index 0000000..3589a0f
--- /dev/null
+++ b/src/fi_save_blocks_compression.c
@@ -0,0 +1,111 @@
+
+#include
+#include
+#include
+
+#include "arrays.h"
+#include "fi_file.h"
+#include "fi_save.h"
+#include "image_misc.h"
+
+#include "fi_save_blocks_compression.h"
+
+static bool fiCompressOneBlock(array_type *cbarray, image_rangeblock_type *b, unsigned int blocksize, unsigned int w, unsigned int h, unsigned int dxy_bitssize);
+
+static unsigned int g_statistics_nofdivisions, g_statistics_nofblocks;
+
+int fiCompressBlocksAndAddHeader(FI_HEADER *head ,fi_compresseddata_type *cdat, image_rangeblock_type *blocks, unsigned int nof_blocks)
+{
+ unsigned int i;
+ unsigned int dxy_bitssize, cblock_bitssize; // Размеры dx, dy и всего блока в битах
+ array_type cbarray;
+
+ GetDxyBitsSize(head->w, head->h, &dxy_bitssize);
+ cblock_bitssize = GetCblockBitsSize(dxy_bitssize);
+
+ cbarray.bytes_offset = cbarray.bits_offset = 0;
+ cbarray.bytes_size = (cblock_bitssize*head->w*head->h*head->noc/4+nof_blocks*(head->blocksize*head->blocksize-1)/3)/8+1;
+ // Из рассчёта, что все блоки 2х2 и имеют размер cblock_bitssize бит
+ // + добавляем дополнительные биты разбиения блоков (см. fi_save_blocks_search.c)
+ // + 1 байт на случай, если количество бит не кратно 8
+
+ cdat->data = malloc(sizeof(FI_HEADER)+cbarray.bytes_size);
+ if(!cdat->data) {
+ return FISAVE_MEMORYALLOCERROR;
+ }
+ memcpy(cdat->data, head, sizeof(FI_HEADER));
+
+ cbarray.bp = cbarray.bytes = cdat->data+sizeof(FI_HEADER);
+ memset(cbarray.bytes, 0, cbarray.bytes_size);
+
+ g_statistics_nofdivisions = 0;
+ g_statistics_nofblocks = 0;
+
+ for(i = 0; i < nof_blocks; i++) {
+ if(!fiCompressOneBlock(&cbarray, blocks+i, head->blocksize, head->w, head->h, dxy_bitssize)) {
+ free(cdat->data);
+ return FISAVE_MEMORYALLOCERROR;
+ }
+ }
+
+ cdat->len = sizeof(FI_HEADER)+cbarray.bytes_offset+((cbarray.bits_offset>0)?1:0);
+
+ printf("statistics:\n\t%d blocks ~ %d bytes\n", g_statistics_nofblocks, (g_statistics_nofblocks*(cblock_bitssize+1)+7)/8);
+ printf("\t%d divisions - %d bytes\n", g_statistics_nofdivisions, (g_statistics_nofdivisions+7)/8);
+ printf("\ttotal ~ %d bytes\n", (g_statistics_nofblocks*(cblock_bitssize+1)+g_statistics_nofdivisions+7)/8);
+ printf("\tcbarray bytes_offset - %d bits_offset - %d\n", cbarray.bytes_offset, cbarray.bits_offset);
+ printf("\tcdat array size - %d bytes\n", (int)cdat->len);
+
+ return FISAVE_OKAY;
+}
+
+static bool fiCompressOneBlock(array_type *cbarray, image_rangeblock_type *b, unsigned int blocksize, unsigned int w, unsigned int h, unsigned int dxy_bitssize)
+{
+ if(b->is_divided) {
+ if(!arrayWriteBits(cbarray, 1, 1))
+ return false;
+
+ if(!fiCompressOneBlock(cbarray, (image_rangeblock_type *)b->divided_into, blocksize/2, w, h, dxy_bitssize))
+ return false;
+ if(!fiCompressOneBlock(cbarray, (image_rangeblock_type *)b->divided_into+1, blocksize/2, w, h, dxy_bitssize))
+ return false;
+ if(!fiCompressOneBlock(cbarray, (image_rangeblock_type *)b->divided_into+2, blocksize/2, w, h, dxy_bitssize))
+ return false;
+ if(!fiCompressOneBlock(cbarray, (image_rangeblock_type *)b->divided_into+3, blocksize/2, w, h, dxy_bitssize))
+ return false;
+
+ g_statistics_nofdivisions++;
+
+ return true;
+ } else {
+ if(((blocksize % 2) != 1) && (blocksize > 3)) {
+ if(!arrayWriteBits(cbarray, 1, 0))
+ return false;
+ }
+
+ if(!arrayWriteBits(cbarray, 5, (unsigned int)b->u))
+ return false;
+
+ if(b->u > 0) {
+ unsigned int dxy;
+
+ if(!arrayWriteBits(cbarray, 7, (unsigned int)((b->v<0)?64-b->v:b->v)))
+ return false;
+
+ dxy = (w/2-1)*((unsigned int)b->dy)+(unsigned int)b->dx;
+
+ if(!arrayWriteBits(cbarray, dxy_bitssize, dxy))
+ return false;
+
+ if(!arrayWriteBits(cbarray, 3, (unsigned int)b->tr))
+ return false;
+ } else {
+ if(!arrayWriteBits(cbarray, 6, (unsigned int)(b->v&63)))
+ return false;
+ }
+
+ g_statistics_nofblocks++;
+
+ return true;
+ }
+}
diff --git a/src/fi_save_blocks_compression.h b/src/fi_save_blocks_compression.h
new file mode 100644
index 0000000..3dacbb4
--- /dev/null
+++ b/src/fi_save_blocks_compression.h
@@ -0,0 +1,12 @@
+
+#ifndef _FI_SAVE_BLOCKS_COMPRESSION_H
+#define _FI_SAVE_BLOCKS_COMPRESSION_H
+
+typedef struct {
+ unsigned char *data;
+ unsigned int len;
+} fi_compresseddata_type;
+
+extern int fiCompressBlocksAndAddHeader(FI_HEADER *head ,fi_compresseddata_type *cdat, image_rangeblock_type *blocks, unsigned int nof_blocks);
+
+#endif
diff --git a/src/fi_save_blocks_search.c b/src/fi_save_blocks_search.c
new file mode 100644
index 0000000..0f03dc0
--- /dev/null
+++ b/src/fi_save_blocks_search.c
@@ -0,0 +1,171 @@
+
+#include
+#include
+#include
+#ifdef WIN32
+ #include
+ #include
+#else
+ #include
+#endif
+
+#ifdef I_USE_MPI
+#include
+#endif
+
+#include "image_misc.h"
+#include "fi_save.h"
+#include "fi_save_blocks_search.h"
+#include "fi_save_blocks_search_workers.h"
+
+int fiFindBestDomainBlocks(image_uc_type *image, unsigned int blocksize, unsigned int nof_threads, unsigned int worst_diff, fi_data_type *data)
+{
+ unsigned int i, nof_blocks;
+ unsigned char *scaled_data; // Данные об изображении, уменьшенном в 2 раза (для поиска доменных блоков) (Данные идут последовательно для каждого из каналов)
+ image_rangeblock_type *blocks;
+ fi_worker_arg_type *worker_args;
+#ifdef WIN32
+ HANDLE *worker_thread_handles;
+#else
+ pthread_t *worker_thread_handles;
+#endif
+#ifdef I_USE_MPI
+ int my_rank, nof_procs;
+#endif
+
+ if(blocksize < 2 || blocksize > IMAGE_MAXBLOCKSIZE) {
+ return FISAVE_WRONGIMAGESIZE;
+ }
+
+ if((image->w%blocksize) != 0 || (image->h%blocksize) != 0 || image->w < (blocksize*2) || image->h < (blocksize*2)) {
+ return FISAVE_WRONGIMAGESIZE;
+ }
+
+ if(image->w*image->h != image->alloc_memory)
+ return FISAVE_DAMAGEDIMAGESTRUCT;
+
+#ifdef I_USE_MPI
+ MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+ MPI_Comm_size(MPI_COMM_WORLD, &nof_procs);
+#endif
+
+ memset(data, 0, sizeof(fi_data_type));
+
+ nof_blocks = image->w*image->h*image->nof_channels/(blocksize*blocksize);
+
+ // Тут начинается код выделения памяти
+ blocks = malloc(sizeof(image_rangeblock_type)*nof_blocks*(blocksize*blocksize-1)/3);
+ printf("blocks = malloc(%u), sizeof(image_rangeblock_type)=%u\n", (unsigned int)(sizeof(image_rangeblock_type)*nof_blocks*(blocksize*blocksize-1)/3), (unsigned int)(sizeof(image_rangeblock_type)));
+ // Каждый блок может быть разбит на 4. Т.е. для блока размера n (если n - степень двойки) у нас есть s=1+4+16+32+...+pow(4, log2(n)-1)
+ // Т.е. s = 1*(1-pow(4,log2(n)))/(1-4) = (pow(2*2, log2(n))-1)/3 = (n*n-1)/3
+ // Для n не в степени двойки (т.е. вида pow(2,x)*y) будем иметь s=1+4+16+32+...+pow(4, log2(n/y))
+ // Т.е. s = 1*(1-pow(4,log2(n/y)))/(1-4) = (pow(2*2, log2(n/y))-1)/3 = ((n/y)*(n/y)-1)/3, что меньше чем (n*n-1)/3.
+ // Таким образом, достаточно рассмотреть случай, когда n в степени двойки.
+ // В нашем случае получаем s=(blocksize*blocksize-1)/3
+ if(!blocks)
+ return FISAVE_MEMORYALLOCERROR;
+ memset(blocks, 0, sizeof(image_rangeblock_type)*nof_blocks*(blocksize*blocksize-1)/3);
+ SetBlocksPointers(blocks, blocksize, nof_blocks, image->w, image->h);
+
+ scaled_data = malloc(image->nof_channels*image->alloc_memory/4);
+ printf("scaled_data = malloc(%u)\n", image->nof_channels*image->alloc_memory/4);
+ if(!scaled_data) {
+ free(blocks);
+ return FISAVE_MEMORYALLOCERROR;
+ }
+ for(i = 0; i < image->nof_channels; i++)
+ Scale2to1(image->data[i], &scaled_data[i*image->alloc_memory/4], image->w, image->h);
+
+ worker_args = malloc(nof_threads*sizeof(fi_worker_arg_type));
+ if(!worker_args) {
+ free(scaled_data);
+ free(blocks);
+ return FISAVE_MEMORYALLOCERROR;
+ }
+#ifdef WIN32
+ worker_thread_handles = malloc(nof_threads*sizeof(HANDLE));
+#else
+ worker_thread_handles = malloc(nof_threads*sizeof(pthread_t));
+#endif
+ if(!worker_thread_handles) {
+ free(worker_args);
+ free(scaled_data);
+ free(blocks);
+ return FISAVE_MEMORYALLOCERROR;
+ }
+ // А тут он заканчивается
+
+ // Заполняем структуру 1-го "рабочего"
+ worker_args[0].scaled_data = scaled_data;
+ worker_args[0].data = image->data;
+ worker_args[0].block_p = blocks;
+ worker_args[0].nof_blocks_per_channel = nof_blocks/image->nof_channels;
+ worker_args[0].w = image->w;
+ worker_args[0].h = image->h;
+ worker_args[0].nof_channels = image->nof_channels;
+ worker_args[0].alloc_memory = image->alloc_memory;
+ worker_args[0].blocksize = blocksize;
+#ifdef I_USE_MPI
+ worker_args[0].worker_start = nof_threads*my_rank;
+ worker_args[0].worker_step = nof_threads*nof_procs;
+#else
+ worker_args[0].worker_start = 0;
+ worker_args[0].worker_step = nof_threads;
+#endif
+ worker_args[0].worst_diff = worst_diff;
+ for(i = 1; i < nof_threads; i++) { // Заполняем структуры остальных
+ memcpy(worker_args+i, worker_args, sizeof(fi_worker_arg_type));
+ worker_args[i].worker_start = worker_args[0].worker_start+i;
+ }
+
+#ifdef WIN32
+ // Создаём рабочие потоки
+ for(i = 0; i < nof_threads; i++) {
+
+ worker_thread_handles[i] = (HANDLE)_beginthreadex(NULL, 0, fiWorkerFunc, worker_args+i, CREATE_SUSPENDED, NULL);//CreateThread(NULL, 0, fiWorkerFunc, worker_args+i, 0, NULL);
+
+ if(!worker_thread_handles[i]) {
+ unsigned int j;
+
+ for(j = 0; j < i; j++) {
+ CloseHandle(worker_thread_handles[j]);
+ }
+
+ free(worker_thread_handles);
+ free(worker_args);
+ free(scaled_data);
+ free(blocks);
+ return FISAVE_MEMORYALLOCERROR;
+ }
+ //fiWorkerFunc(worker_args+i);
+
+ }
+
+ // Вызываем рабочие потоки
+ for(i = 0; i < nof_threads; i++)
+ ResumeThread(worker_thread_handles[i]);
+#else
+ // Создаём рабочие потоки
+ for(i = 0; i < nof_threads; i++)
+ pthread_create(worker_thread_handles+i, NULL, fiWorkerFunc, worker_args+i);
+#endif
+
+#ifdef WIN32
+ WaitForMultipleObjects(nof_threads, worker_thread_handles, TRUE, INFINITE);
+
+ for(i = 0; i < nof_threads; i++)
+ CloseHandle(worker_thread_handles[i]);
+#else
+ for(i = 0; i < nof_threads; i++)
+ pthread_join(worker_thread_handles[i], 0);
+#endif
+
+ free(scaled_data);
+ free(worker_args);
+ free(worker_thread_handles);
+
+ data->blocks = blocks;
+ data->nof_blocks = nof_blocks;
+
+ return FISAVE_OKAY;
+}
diff --git a/src/fi_save_blocks_search.h b/src/fi_save_blocks_search.h
new file mode 100644
index 0000000..0dabfd6
--- /dev/null
+++ b/src/fi_save_blocks_search.h
@@ -0,0 +1,14 @@
+
+#ifndef _FI_SAVE_BLOCKS_SEARCH_H
+#define _FI_SAVE_BLOCKS_SEARCH_H
+
+#include "image.h"
+
+typedef struct {
+ image_rangeblock_type *blocks;
+ unsigned int nof_blocks;
+} fi_data_type;
+
+extern int fiFindBestDomainBlocks(image_uc_type *image, unsigned int blocksize, unsigned int nof_threads, unsigned int worst_diff, fi_data_type *data);
+
+#endif
diff --git a/src/fi_save_blocks_search_workers.h b/src/fi_save_blocks_search_workers.h
new file mode 100644
index 0000000..65bd0c9
--- /dev/null
+++ b/src/fi_save_blocks_search_workers.h
@@ -0,0 +1,30 @@
+
+#ifndef _FI_SAVE_BLOCKS_SEARCH_WORKERS_H
+#define _FI_SAVE_BLOCKS_SEARCH_WORKERS_H
+
+#include "image.h"
+
+typedef struct {
+ unsigned char *scaled_data;
+ unsigned char **data;
+ image_rangeblock_type *block_p;
+ unsigned int nof_blocks_per_channel;
+ unsigned int w;
+ unsigned int h;
+ unsigned int nof_channels;
+ unsigned int alloc_memory;
+ unsigned int blocksize;
+ unsigned int worker_start;
+ unsigned int worker_step;
+ unsigned int worst_diff;
+} fi_worker_arg_type;
+
+#ifdef WIN32
+ extern unsigned int __stdcall fiWorkerFunc(void *arg);
+#else
+ extern void *fiWorkerFunc(void *arg);
+#endif
+
+
+
+#endif
diff --git a/src/gpu_driven/fi_save_blocks_search_workers.cu b/src/gpu_driven/fi_save_blocks_search_workers.cu
new file mode 100644
index 0000000..feefebf
--- /dev/null
+++ b/src/gpu_driven/fi_save_blocks_search_workers.cu
@@ -0,0 +1,767 @@
+
+#include "cuda_runtime.h"
+#include "device_launch_parameters.h"
+
+#include
+#include
+#include
+#include
+
+extern "C" {
+#include "../image_misc.h"
+#include "../fi_save_blocks_search_workers.h"
+}
+
+#define IMAGE_MAXCUDABLOCKSIZE 64
+
+static void ShowMeSomeCUDAInfo(void);
+
+typedef struct {
+ unsigned int dx; // Смещение по x доменного блока
+ unsigned int dy; // Смещение по у доменного блока
+ unsigned int tr; // Трансформация
+ unsigned int u; // Контрастность
+ int v; // Яркость
+ unsigned int diff;
+} kernel_block_type;
+
+__device__ void fiCalcReversedTransformationNone(long long &range_sum, long long &domain_sum, long long &rd_sum, long long &domain_disp, int *range_block,
+ unsigned int dx, unsigned int dy,
+ unsigned int rx, unsigned int ry, unsigned int blocksize,
+ unsigned int w, unsigned int h,
+ int *scaled_data_cuda,
+ int *data_cuda
+ )
+{
+ unsigned int l, m;
+
+ rd_sum = domain_disp = domain_sum = range_sum = 0;
+
+ for(l = 0; l < blocksize; l++) {
+ for(m = 0; m < blocksize; m++) {
+ int br = data_cuda[(ry+l)*w+rx+m];
+ int bd = scaled_data_cuda[(dy+l)*w/2+dx+m];
+
+ range_block[l*blocksize+m] = br;
+
+ range_sum += range_block[l*blocksize+m];
+
+ domain_sum += bd;
+ domain_disp += bd*bd;
+ rd_sum += br*bd;
+ }
+ }
+
+ domain_disp = domain_disp*(int)(blocksize*blocksize)-domain_sum*domain_sum;
+}
+
+__device__ void fiCalcReversedTransformation90Right(long long &range_sum, long long &domain_sum, long long &rd_sum, long long &domain_disp, int *range_block,
+ unsigned int dx, unsigned int dy,
+ unsigned int rx, unsigned int ry, unsigned int blocksize,
+ unsigned int w, unsigned int h,
+ int *scaled_data_cuda,
+ int *data_cuda
+ )
+{
+ unsigned int l, m;
+
+ rd_sum = domain_disp = domain_sum = range_sum = 0;
+
+ for(l = 0; l < blocksize; l++) {
+ for(m = 0; m < blocksize; m++) {
+ int br = data_cuda[(ry+m)*w+rx+blocksize-1-l];
+ int bd = scaled_data_cuda[(dy+l)*w/2+dx+m];
+
+ range_block[l*blocksize+m] = br;
+
+ range_sum += range_block[l*blocksize+m];
+
+ domain_sum += bd;
+ domain_disp += bd*bd;
+ rd_sum += br*bd;
+ }
+ }
+
+ domain_disp = domain_disp*(int)(blocksize*blocksize)-domain_sum*domain_sum;
+}
+
+__device__ void fiCalcReversedTransformation180Right(long long &range_sum, long long &domain_sum, long long &rd_sum, long long &domain_disp, int *range_block,
+ unsigned int dx, unsigned int dy,
+ unsigned int rx, unsigned int ry, unsigned int blocksize,
+ unsigned int w, unsigned int h,
+ int *scaled_data_cuda,
+ int *data_cuda
+ )
+{
+ unsigned int l, m;
+
+ rd_sum = domain_disp = domain_sum = range_sum = 0;
+
+ for(l = 0; l < blocksize; l++) {
+ for(m = 0; m < blocksize; m++) {
+ int br = data_cuda[(ry+blocksize-1-l)*w+rx+blocksize-1-m];
+ int bd = scaled_data_cuda[(dy+l)*w/2+dx+m];
+
+ range_block[l*blocksize+m] = br;
+
+ range_sum += range_block[l*blocksize+m];
+
+ domain_sum += bd;
+ domain_disp += bd*bd;
+ rd_sum += br*bd;
+ }
+ }
+
+ domain_disp = domain_disp*(int)(blocksize*blocksize)-domain_sum*domain_sum;
+}
+
+__device__ void fiCalcReversedTransformation270Right(long long &range_sum, long long &domain_sum, long long &rd_sum, long long &domain_disp, int *range_block,
+ unsigned int dx, unsigned int dy,
+ unsigned int rx, unsigned int ry, unsigned int blocksize,
+ unsigned int w, unsigned int h,
+ int *scaled_data_cuda,
+ int *data_cuda
+ )
+{
+ unsigned int l, m;
+
+ rd_sum = domain_disp = domain_sum = range_sum = 0;
+
+ for(l = 0; l < blocksize; l++) {
+ for(m = 0; m < blocksize; m++) {
+ int br = data_cuda[(ry+blocksize-1-m)*w+rx+l];
+ int bd = scaled_data_cuda[(dy+l)*w/2+dx+m];
+
+ range_block[l*blocksize+m] = br;
+
+ range_sum += range_block[l*blocksize+m];
+
+ domain_sum += bd;
+ domain_disp += bd*bd;
+ rd_sum += br*bd;
+ }
+ }
+
+ domain_disp = domain_disp*(int)(blocksize*blocksize)-domain_sum*domain_sum;
+}
+
+__device__ void fiCalcReversedTransformationVFlip(long long &range_sum, long long &domain_sum, long long &rd_sum, long long &domain_disp, int *range_block,
+ unsigned int dx, unsigned int dy,
+ unsigned int rx, unsigned int ry, unsigned int blocksize,
+ unsigned int w, unsigned int h,
+ int *scaled_data_cuda,
+ int *data_cuda
+ )
+{
+ unsigned int l, m;
+
+ rd_sum = domain_disp = domain_sum = range_sum = 0;
+
+ for(l = 0; l < blocksize; l++) {
+ for(m = 0; m < blocksize; m++) {
+ int br = data_cuda[(ry+blocksize-1-l)*w+rx+m];
+ int bd = scaled_data_cuda[(dy+l)*w/2+dx+m];
+
+ range_block[l*blocksize+m] = br;
+
+ range_sum += range_block[l*blocksize+m];
+
+ domain_sum += bd;
+ domain_disp += bd*bd;
+ rd_sum += br*bd;
+ }
+ }
+
+ domain_disp = domain_disp*(int)(blocksize*blocksize)-domain_sum*domain_sum;
+}
+
+__device__ void fiCalcReversedTransformationHFlip(long long &range_sum, long long &domain_sum, long long &rd_sum, long long &domain_disp, int *range_block,
+ unsigned int dx, unsigned int dy,
+ unsigned int rx, unsigned int ry, unsigned int blocksize,
+ unsigned int w, unsigned int h,
+ int *scaled_data_cuda,
+ int *data_cuda
+ )
+{
+ unsigned int l, m;
+
+ rd_sum = domain_disp = domain_sum = range_sum = 0;
+
+ for(l = 0; l < blocksize; l++) {
+ for(m = 0; m < blocksize; m++) {
+ int br = data_cuda[(ry+l)*w+rx+blocksize-1-m];
+ int bd = scaled_data_cuda[(dy+l)*w/2+dx+m];
+
+ range_block[l*blocksize+m] = br;
+
+ range_sum += range_block[l*blocksize+m];
+
+ domain_sum += bd;
+ domain_disp += bd*bd;
+ rd_sum += br*bd;
+ }
+ }
+
+ domain_disp = domain_disp*(int)(blocksize*blocksize)-domain_sum*domain_sum;
+}
+
+__device__ void fiCalcReversedTransformationMainDiagFlip(long long &range_sum, long long &domain_sum, long long &rd_sum, long long &domain_disp, int *range_block,
+ unsigned int dx, unsigned int dy,
+ unsigned int rx, unsigned int ry, unsigned int blocksize,
+ unsigned int w, unsigned int h,
+ int *scaled_data_cuda,
+ int *data_cuda
+ )
+{
+ unsigned int l, m;
+
+ rd_sum = domain_disp = domain_sum = range_sum = 0;
+
+ for(l = 0; l < blocksize; l++) {
+ for(m = 0; m < blocksize; m++) {
+ int br = data_cuda[(ry+m)*w+rx+l];
+ int bd = scaled_data_cuda[(dy+l)*w/2+dx+m];
+
+ range_block[l*blocksize+m] = br;
+
+ range_sum += range_block[l*blocksize+m];
+
+ domain_sum += bd;
+ domain_disp += bd*bd;
+ rd_sum += br*bd;
+ }
+ }
+
+ domain_disp = domain_disp*(int)(blocksize*blocksize)-domain_sum*domain_sum;
+}
+
+__device__ void fiCalcReversedTransformationAntiDiagFlip(long long &range_sum, long long &domain_sum, long long &rd_sum, long long &domain_disp, int *range_block,
+ unsigned int dx, unsigned int dy,
+ unsigned int rx, unsigned int ry, unsigned int blocksize,
+ unsigned int w, unsigned int h,
+ int *scaled_data_cuda,
+ int *data_cuda
+ )
+{
+ unsigned int l, m;
+
+ rd_sum = domain_disp = domain_sum = range_sum = 0;
+
+ for(l = 0; l < blocksize; l++) {
+ for(m = 0; m < blocksize; m++) {
+ int br = data_cuda[(ry+blocksize-1-m)*w+rx+blocksize-1-l];
+ int bd = scaled_data_cuda[(dy+l)*w/2+dx+m];
+
+ range_block[l*blocksize+m] = br;
+
+ range_sum += range_block[l*blocksize+m];
+
+ domain_sum += bd;
+ domain_disp += bd*bd;
+ rd_sum += br*bd;
+ }
+ }
+
+ domain_disp = domain_disp*(int)(blocksize*blocksize)-domain_sum*domain_sum;
+}
+
+__global__ void fiFindBestDomainBlockKernel(unsigned int rx, unsigned int ry,
+ kernel_block_type *kernel_blocks_cuda, unsigned int blocksize,
+ unsigned int w, unsigned int h,
+ int *scaled_data_cuda,
+ int *data_cuda
+ )
+{
+ /*__shared__ */int range_block[IMAGE_MAXCUDABLOCKSIZE*IMAGE_MAXCUDABLOCKSIZE];
+ long long range_sum, domain_sum, rd_sum; // Сумма пикселей рангового блока, доменного блока, пикселя рангового на пиксель доменного блока
+ long long domain_disp;
+ unsigned int diff;
+ int u; int v;
+ unsigned int mono_diff;
+ int mono_u; int mono_v;
+ unsigned int local_diff;
+ __shared__ unsigned int best_diff;
+ int local_u; int local_v;
+ unsigned int dx, dy; // Координаты доменного блока
+ unsigned int tr; // Трансформация, вычисляемая в текущем потоке
+ kernel_block_type *kernel_block_cuda; // Текущий доменный блок для текущего блока cuda
+
+ unsigned int l, m;
+
+ atomicExch(&best_diff, 255*255*blocksize*blocksize*(blockDim.x*blockDim.y)+(blockDim.x*blockDim.y));
+ __syncthreads();
+
+ kernel_block_cuda = kernel_blocks_cuda+blockIdx.z*gridDim.y*gridDim.x+blockIdx.y*gridDim.x+blockIdx.x;
+
+ dx = (blockIdx.x*blockDim.x+threadIdx.x)%(w/2-blocksize+1);
+ dy = (blockIdx.y*blockDim.y+threadIdx.y)%(h/2-blocksize+1);
+ tr = blockIdx.z+threadIdx.z;
+
+ switch(tr) {
+ case IMAGE_RB_TRANSFORMATION_NONE:
+ fiCalcReversedTransformationNone(range_sum, domain_sum, rd_sum, domain_disp, range_block,
+ dx, dy,
+ rx, ry, blocksize,
+ w, h,
+ scaled_data_cuda,
+ data_cuda
+ );
+ break;
+ case IMAGE_RB_TRANSFORMATION_90RIGHT:
+ fiCalcReversedTransformation90Right(range_sum, domain_sum, rd_sum, domain_disp, range_block,
+ dx, dy,
+ rx, ry, blocksize,
+ w, h,
+ scaled_data_cuda,
+ data_cuda
+ );
+ break;
+ case IMAGE_RB_TRANSFORMATION_180RIGHT:
+ fiCalcReversedTransformation180Right(range_sum, domain_sum, rd_sum, domain_disp, range_block,
+ dx, dy,
+ rx, ry, blocksize,
+ w, h,
+ scaled_data_cuda,
+ data_cuda
+ );
+ break;
+ case IMAGE_RB_TRANSFORMATION_270RIGHT:
+ fiCalcReversedTransformation270Right(range_sum, domain_sum, rd_sum, domain_disp, range_block,
+ dx, dy,
+ rx, ry, blocksize,
+ w, h,
+ scaled_data_cuda,
+ data_cuda
+ );
+ break;
+ case IMAGE_RB_TRANSFORMATION_VFLIP:
+ fiCalcReversedTransformationVFlip(range_sum, domain_sum, rd_sum, domain_disp, range_block,
+ dx, dy,
+ rx, ry, blocksize,
+ w, h,
+ scaled_data_cuda,
+ data_cuda
+ );
+ break;
+ case IMAGE_RB_TRANSFORMATION_HFLIP:
+ fiCalcReversedTransformationHFlip(range_sum, domain_sum, rd_sum, domain_disp, range_block,
+ dx, dy,
+ rx, ry, blocksize,
+ w, h,
+ scaled_data_cuda,
+ data_cuda
+ );
+ break;
+ case IMAGE_RB_TRANSFORMATION_MAINDIAGFLIP:
+ fiCalcReversedTransformationMainDiagFlip(range_sum, domain_sum, rd_sum, domain_disp, range_block,
+ dx, dy,
+ rx, ry, blocksize,
+ w, h,
+ scaled_data_cuda,
+ data_cuda
+ );
+ break;
+ case IMAGE_RB_TRANSFORMATION_ANTIDIAGFLIP:
+ fiCalcReversedTransformationAntiDiagFlip(range_sum, domain_sum, rd_sum, domain_disp, range_block,
+ dx, dy,
+ rx, ry, blocksize,
+ w, h,
+ scaled_data_cuda,
+ data_cuda
+ );
+ break;
+ }
+
+ mono_u = 0; mono_v = (int)( ((range_sum/(blocksize*blocksize))*63+128)/255 ); // Значения на случай, если совсем ни один блок не подойдёт
+
+ if(domain_disp) {
+ u = (int)( 32*((int)(blocksize*blocksize)*rd_sum-range_sum*domain_sum)/domain_disp ); // яркость в интервале [0, 1.0), т.е. от [0 до 32)
+ if(u < 0) u = 0; if(u > 31) u = 31;
+ v = (int)( (range_sum-domain_sum*u/32)/(int)(blocksize*blocksize) );
+ if(v < -255 || v > 255) {
+ u = mono_u;
+ v = mono_v;
+ } else {
+ if(v < 0)
+ v = (v*63-128)/255;
+ else
+ v = (v*63+128)/255;
+ }
+ } else {
+ u = mono_u;
+ v = mono_v;
+ }
+
+ mono_diff = 0; // Найдём разницу для mono_v и mono_u (mono_diff)
+ diff = 0; // Найдём разницу для v и u блока dx, dy (diff)
+ for(l = 0; l < blocksize; l++) {
+ for(m = 0; m < blocksize; m++) {
+ int br = range_block[l*blocksize+m];
+ int bd = scaled_data_cuda[(dy+l)*w/2+dx+m];
+
+ int mult;
+
+ mult = (mono_v*255/63-br);
+ mono_diff += mult*mult;
+
+ mult = (bd*u/32+v*255/63-br);
+ diff += mult*mult;
+ }
+ }
+
+ if(diff < mono_diff) {
+ local_diff = diff;
+ local_u = u;
+ local_v = v;
+ } else {
+ local_diff = mono_diff;
+ local_u = mono_u;
+ local_v = mono_v;
+ }
+
+ unsigned int uniq_diff = (unsigned int)sqrtf(local_diff)*(blockDim.x*blockDim.y)+(threadIdx.y*blockDim.x+threadIdx.x); // 255*(IMAGE_MAXCUDABLOCKSIZE=64)*64*64=3FC000 < FFFFFFFF
+ atomicMin(&best_diff, uniq_diff);
+ __syncthreads();
+
+ if(best_diff == uniq_diff) {
+ kernel_block_cuda->dx = dx;
+ kernel_block_cuda->dy = dy;
+ kernel_block_cuda->tr = tr;
+ kernel_block_cuda->u = local_u;
+ kernel_block_cuda->v = local_v;
+ kernel_block_cuda->diff = local_diff;
+ }
+}
+
+static void fiFindBestDomainBlock(unsigned int rx, unsigned int ry,
+ image_rangeblock_type *block_p, kernel_block_type *kernel_blocks_cuda, kernel_block_type *kernel_blocks, unsigned int blocksize,
+ unsigned int w, unsigned int h,
+ dim3 threads, dim3 blocks,
+ int *scaled_data_cuda,
+ int *data_cuda,
+ unsigned int worst_diff
+ )
+{
+ unsigned int best_diff, i;
+ kernel_block_type *kernel_block;
+
+ memset(kernel_blocks, 0, sizeof(kernel_block_type)*blocks.x*blocks.y*blocks.z);
+ cudaMemcpy(kernel_blocks_cuda, kernel_blocks, sizeof(kernel_block_type)*blocks.x*blocks.y*blocks.z, cudaMemcpyHostToDevice);
+
+ fiFindBestDomainBlockKernel <<>>(rx, ry, kernel_blocks_cuda, blocksize, w, h, scaled_data_cuda, data_cuda);
+
+ cudaMemcpy(kernel_blocks, kernel_blocks_cuda, sizeof(kernel_block_type)*blocks.x*blocks.y*blocks.z, cudaMemcpyDeviceToHost);
+
+ best_diff = kernel_blocks->diff;
+ kernel_block = kernel_blocks;
+ for(i = 0; i < blocks.x*blocks.y*blocks.z; i++) {
+ if(kernel_blocks[i].diff < best_diff) {
+ best_diff = kernel_blocks[i].diff;
+ kernel_block = kernel_blocks+i;
+
+ }
+ //printf("-? rx %u ry %u i %u dx %u dy %u tr %u l_u %u l_v %d l_diff %u\n", rx, ry, i, kernel_blocks[i].dx, kernel_blocks[i].dy, kernel_blocks[i].tr, kernel_blocks[i].u, kernel_blocks[i].v, kernel_blocks[i].diff);
+ }
+
+ block_p->dx = kernel_block->dx;
+ block_p->dy = kernel_block->dy;
+ block_p->u = kernel_block->u;
+ block_p->v = kernel_block->v;
+ block_p->tr = kernel_block->tr;
+ //printf("-! rx %u ry %u dx %u dy %u tr %u l_u %u l_v %d l_diff %u\n", rx, ry, kernel_block->dx, kernel_block->dy, kernel_block->tr, kernel_block->u, kernel_block->v, kernel_block->diff);
+
+ if(best_diff > worst_diff && blocksize > 2 && blocksize%2 == 0) {
+ printf("best_diff %d/%d (=rms^2*blocksize^2), divided block size %d to %d\n", best_diff, worst_diff, blocksize, blocksize/2);
+
+ fiFindBestDomainBlock(rx, ry, // Позиция нового блока блока
+ (image_rangeblock_type *)block_p->divided_into, kernel_blocks_cuda, kernel_blocks, // Указатель на новый блок, на блоки для вычислений на cuda, память под них на cpu
+ blocksize/2, // Уменьшаем размер блока на 2
+ w, h, threads, blocks, scaled_data_cuda, data_cuda,
+ worst_diff/4); // Уменьшаем разницу на 4 (т.к. новый блок будет в 4 раза меньше)
+
+ fiFindBestDomainBlock(rx+blocksize/2, ry, // Позиция нового блока блока
+ (image_rangeblock_type *)block_p->divided_into+1, kernel_blocks_cuda, kernel_blocks, // Указатель на новый блок, на блоки для вычислений на cuda, память под них на cpu
+ blocksize/2, // Уменьшаем размер блока на 2
+ w, h, threads, blocks, scaled_data_cuda, data_cuda,
+ worst_diff/4); // Уменьшаем разницу на 4 (т.к. новый блок будет в 4 раза меньше)
+
+ fiFindBestDomainBlock(rx, ry+blocksize/2, // Позиция нового блока блока
+ (image_rangeblock_type *)block_p->divided_into+2, kernel_blocks_cuda, kernel_blocks, // Указатель на новый блок, на блоки для вычислений на cuda, память под них на cpu
+ blocksize/2, // Уменьшаем размер блока на 2
+ w, h, threads, blocks, scaled_data_cuda, data_cuda,
+ worst_diff/4); // Уменьшаем разницу на 4 (т.к. новый блок будет в 4 раза меньше)
+
+ fiFindBestDomainBlock(rx+blocksize/2, ry+blocksize/2, // Позиция нового блока блока
+ (image_rangeblock_type *)block_p->divided_into+3, kernel_blocks_cuda, kernel_blocks, // Указатель на новый блок, на блоки для вычислений на cuda, память под них на cpu
+ blocksize/2, // Уменьшаем размер блока на 2
+ w, h, threads, blocks, scaled_data_cuda, data_cuda,
+ worst_diff/4); // Уменьшаем разницу на 4 (т.к. новый блок будет в 4 раза меньше)
+
+ block_p->is_divided = true;
+ }
+}
+
+#ifdef WIN32
+unsigned int __stdcall fiWorkerFunc(void *arg)
+#else
+void *fiWorkerFunc(void *arg)
+#endif
+{
+ unsigned int i;
+ unsigned int rx, ry, channel, copied_channel;
+
+ unsigned char *scaled_data = ((fi_worker_arg_type *)arg)->scaled_data;
+ unsigned char **data = ((fi_worker_arg_type *)arg)->data;
+ int *data_in_int = 0;
+ image_rangeblock_type *block_start_p = ((fi_worker_arg_type *)arg)->block_p, *block_p;
+ unsigned int nof_blocks_per_channel = ((fi_worker_arg_type *)arg)->nof_blocks_per_channel;
+ unsigned int w = ((fi_worker_arg_type *)arg)->w;
+ unsigned int h = ((fi_worker_arg_type *)arg)->h;
+ unsigned int nof_channels = ((fi_worker_arg_type *)arg)->nof_channels;
+ unsigned int alloc_memory = ((fi_worker_arg_type *)arg)->alloc_memory;
+ unsigned int blocksize = ((fi_worker_arg_type *)arg)->blocksize;
+ unsigned int worker_start = ((fi_worker_arg_type *)arg)->worker_start;
+ unsigned int worker_step = ((fi_worker_arg_type *)arg)->worker_step;
+ unsigned int worst_diff = ((fi_worker_arg_type *)arg)->worst_diff;
+
+ unsigned int nof_blocks_per_image = nof_blocks_per_channel*nof_channels;
+
+ int *scaled_data_cuda;
+ kernel_block_type *kernel_blocks_cuda;
+ kernel_block_type *kernel_blocks;
+ int *data_cuda;
+
+ cudaEvent_t start, stop;
+ double cuda_working_time = 0;
+
+ cudaDeviceProp device_prop;
+ dim3 threads, blocks;
+
+ ShowMeSomeCUDAInfo();
+
+ if(blocksize > IMAGE_MAXCUDABLOCKSIZE) {
+ printf("Sorry, but IMAGE_MAXCUDABLOCKSIZE=%d < blocksize=%d\b", IMAGE_MAXCUDABLOCKSIZE, blocksize);
+
+ return 0;
+ }
+
+ printf("cudaMalloc(&scaled_data_cuda, alloc_memory/4=%d)\n", sizeof(int)*alloc_memory/4);
+ if(cudaMalloc(&scaled_data_cuda, sizeof(int)*alloc_memory/4) != cudaSuccess) {
+ printf("Can\'t allocate memory for scaled_data_cuda\n");
+
+ return 0;
+ }
+ printf("cudaMalloc(&data_cuda, alloc_memory=%d)\n", sizeof(int)*alloc_memory);
+ if(cudaMalloc(&data_cuda, sizeof(int)*alloc_memory) != cudaSuccess) {
+ printf("Can\'t allocate memory for data_cuda_p\n");
+
+ cudaFree(scaled_data_cuda);
+
+ return 0;
+ }
+
+ if(cudaEventCreate(&start) != cudaSuccess) {
+ printf("Can\'t create event \"start\"\n");
+
+ cudaFree(scaled_data_cuda);
+ cudaFree(data_cuda);
+
+ return 0;
+ }
+ if(cudaEventCreate(&stop) != cudaSuccess) {
+ printf("Can\'t create event \"stop\"\n");
+
+ cudaFree(scaled_data_cuda);
+ cudaFree(data_cuda);
+
+ cudaEventDestroy(start);
+
+ return 0;
+ }
+
+ cudaGetDeviceProperties(&device_prop, 0);
+
+ int number_of_working_threads = device_prop.maxThreadsPerBlock/2; // Магия
+ int number_of_working_threads_multiplier = 1;
+ int threads_z = 1;
+
+ if(device_prop.maxGridSize[2] < 8) {
+ number_of_working_threads /= 8;
+ threads_z = 8;
+ }
+
+ while( ((unsigned int)(sqrt((float)number_of_working_threads)+0.5)*(unsigned int)(sqrt((float)number_of_working_threads)+0.5)) != number_of_working_threads) {
+ number_of_working_threads /= 2;
+ number_of_working_threads_multiplier *= 2;
+ }
+
+ threads = dim3((unsigned int)(sqrt((float)number_of_working_threads)+0.5)*number_of_working_threads_multiplier, (unsigned int)(sqrt((float)number_of_working_threads)+0.5), threads_z);
+ blocks = dim3(((w/2-blocksize+1)+threads.x-1)/threads.x, ((h/2-blocksize+1)+threads.y-1)/threads.y, 8/threads.z);
+
+ printf("threads(%u,%u,%u) blocks(%u,%u,%u)\n", threads.x, threads.y, threads.z, blocks.x, blocks.y, blocks.z);
+
+ if((int)(threads.x*threads.y*threads.z) > device_prop.maxThreadsPerBlock) {
+ printf("threads.x*threads.y*threads.z is greater than %d, exiting...\n", device_prop.maxThreadsPerBlock);
+
+ cudaFree(scaled_data_cuda);
+ cudaFree(data_cuda);
+
+ cudaEventDestroy(start);
+ cudaEventDestroy(stop);
+
+ return 0;
+ }
+
+ printf("cudaMalloc(&kernel_blocks_cuda, sizeof(image_rangeblock_type)*blocks.x*blocks.y*blocks.z=%d)\n", sizeof(kernel_block_type)*blocks.x*blocks.y*blocks.z);
+ if(cudaMalloc(&kernel_blocks_cuda, sizeof(kernel_block_type)*blocks.x*blocks.y*blocks.z) != cudaSuccess) {
+ printf("Can\'t allocate memory for block_start_cuda_p\n");
+
+ cudaFree(scaled_data_cuda);
+ cudaFree(data_cuda);
+
+ cudaEventDestroy(start);
+ cudaEventDestroy(stop);
+
+ return 0;
+ }
+
+ printf("kernel_blocks = malloc(sizeof(kernel_block_type)*blocks.x*blocks.y*blocks.z=%d), sizeof(kernel_block_type)=%d\n", sizeof(kernel_block_type)*blocks.x*blocks.y*blocks.z, sizeof(kernel_block_type));
+ kernel_blocks = (kernel_block_type *)malloc(sizeof(kernel_block_type)*blocks.x*blocks.y*blocks.z);
+ if(kernel_blocks == 0) {
+ printf("Can\'t allocate memory for block_start_cuda_p\n");
+
+ cudaFree(scaled_data_cuda);
+ cudaFree(data_cuda);
+ cudaFree(kernel_blocks_cuda);
+
+ cudaEventDestroy(start);
+ cudaEventDestroy(stop);
+
+ return 0;
+ }
+
+ copied_channel = nof_channels+1;
+ block_p = block_start_p+worker_start;
+
+ for(i = worker_start; i < nof_blocks_per_image; i += worker_step) {
+ float one_block_time;
+
+ channel = i / nof_blocks_per_channel;
+ rx = ((i % nof_blocks_per_channel) % (w/blocksize)) * blocksize;
+ ry = ((i % nof_blocks_per_channel) / (w/blocksize)) * blocksize;
+
+ if(copied_channel != channel) {
+ unsigned int j;
+
+ if(!data_in_int)
+ data_in_int = (int *)malloc(sizeof(int)*alloc_memory);
+
+ if(!data_in_int) {
+ printf("Can\'t allocate memory for data_in_int\n");
+
+ break;
+ }
+
+ copied_channel = channel;
+
+ for(j = 0; j < alloc_memory/4; j++) {
+ data_in_int[j] = scaled_data[channel*alloc_memory/4+j];
+ }
+ cudaMemcpy(scaled_data_cuda, data_in_int, sizeof(int)*alloc_memory/4, cudaMemcpyHostToDevice);
+
+ for(j = 0; j < alloc_memory; j++) {
+ data_in_int[j] = data[channel][j];
+ }
+ cudaMemcpy(data_cuda, data_in_int, sizeof(int)*alloc_memory, cudaMemcpyHostToDevice);
+ }
+
+ if(((i-worker_start)/worker_step)%10 == 0)
+ printf("worker %d %d rbs (%f) ok\n", worker_start+1, (i-worker_start)/worker_step,
+ (float)( 100.0 * (double)((i-worker_start)/worker_step) / (double)((nof_blocks_per_image-worker_start-1)/worker_step+1) ));
+
+ cudaEventRecord(start, 0);
+
+ fiFindBestDomainBlock(rx, ry,
+ block_p, kernel_blocks_cuda, kernel_blocks, blocksize,
+ w, h,
+ threads, blocks,
+ scaled_data_cuda,
+ data_cuda,
+ worst_diff);
+
+ cudaEventRecord(stop, 0);
+ cudaEventSynchronize(stop);
+ cudaEventElapsedTime(&one_block_time, start, stop);
+ cuda_working_time += one_block_time;
+
+ block_p += worker_step;
+ }
+
+ if(data_in_int)
+ free(data_in_int);
+ cudaFree(scaled_data_cuda);
+ cudaFree(data_cuda);
+ cudaFree(kernel_blocks_cuda);
+ free(kernel_blocks);
+
+ cudaEventDestroy(start);
+ cudaEventDestroy(stop);
+
+ if(worker_step == 1) { // Сбрасываем устройство, только если имеется только один поток
+ if(cudaDeviceReset() != cudaSuccess) {
+ printf("Can\'t reset cuda device\n");
+ }
+ }
+
+ printf("cuda_working_time %f ms\n", cuda_working_time);
+
+ return 0;
+}
+
+static void ShowMeSomeCUDAInfo(void)
+{
+ int device_count;
+ cudaDeviceProp device_prop;
+
+ cudaGetDeviceCount(&device_count);
+
+ printf("Device count: %d\n\n", device_count);
+
+ for(int i = 0; i < device_count; i++) {
+ cudaGetDeviceProperties(&device_prop, i);
+
+ printf("Device name: %s\n", device_prop.name);
+ printf("Compute capability: %d.%d\n", device_prop.major, device_prop.minor);
+ printf("Total constant memory: %lld\n", (long long)(device_prop.totalConstMem));
+ printf("Total global memory: %lld\n", (long long)(device_prop.totalGlobalMem));
+ printf("Shared memory per block: %lld\n", (long long)(device_prop.sharedMemPerBlock));
+ printf("Registers per block: %d\n", device_prop.regsPerBlock);
+ printf("Warp size: %d\n", device_prop.warpSize);
+ printf("Memory pitch: %lld\n", (long long)(device_prop.memPitch));
+ printf("Max threads per block: %d\n", device_prop.maxThreadsPerBlock);
+ printf("Max threads dimensions: x = %d, y = %d, z = %d\n",
+ device_prop.maxThreadsDim[0],
+ device_prop.maxThreadsDim[1],
+ device_prop.maxThreadsDim[2]);
+
+ printf("Max grid size: x = %d, y = %d, z = %d\n",
+ device_prop.maxGridSize[0],
+ device_prop.maxGridSize[1],
+ device_prop.maxGridSize[2]);
+
+ printf("Max Surface1D: %d\n", device_prop.maxSurface1D);
+ printf("Max Surface2D: %d\n", device_prop.maxSurface2D);
+
+ printf("Clock rate: %d\n", device_prop.clockRate);
+ printf("Total constant memory: %d\n", device_prop.totalConstMem);
+ printf("Compute capability: %d.%d\n", device_prop.major, device_prop.minor);
+ printf("Texture alignment: %d\n", device_prop.textureAlignment);
+ printf("Device overlap: %d\n", device_prop.deviceOverlap);
+ printf("Multiprocessor count: %d\n", device_prop.multiProcessorCount);
+ printf("Kernel execution timeout enabled: %s\n",
+ device_prop.kernelExecTimeoutEnabled ? "true" : "false");
+ }
+
+ printf("\n");
+
+}
diff --git a/src/image.h b/src/image.h
new file mode 100644
index 0000000..ebbca84
--- /dev/null
+++ b/src/image.h
@@ -0,0 +1,49 @@
+
+#ifndef _IMAGE_H
+#define _IMAGE_H
+
+#include
+
+typedef struct {
+ unsigned int w;
+ unsigned int h;
+ unsigned int nof_channels;
+ unsigned int alloc_memory; // Общий размер массива data
+ unsigned char *data;
+} image_pc_type; // pc == packed channels, каналы идут последовательно для одного пикселя, содержатся в одном массиве
+
+typedef struct {
+ unsigned int w;
+ unsigned int h;
+ unsigned int nof_channels;
+ unsigned int alloc_memory; // Размер массива, выделенного под каждый канал в data
+ unsigned char **data;
+} image_uc_type; // uc == unpacked channels, отдельный массив под каждый канал (красный, зелёный, синий)
+
+typedef struct {
+ unsigned short dx; // Смещение по x доменного блока
+ unsigned short dy; // Смещение по у доменного блока
+ unsigned char tr; // Трансформация
+ unsigned char u; // Контрастность
+ short v; // Яркость
+ void *divided_into; // Указывает на массив из 4-х дочерних блоков
+ bool is_divided; // true, если блок поделён
+ bool must_process;
+} image_rangeblock_type;
+
+#define IMAGE_MAXBLOCKSIZE 128
+
+#define IMAGE_MAXDXY 1023
+#define IMAGE_LHALFDXY 512
+#define IMAGE_HHALFDXY 511
+
+#define IMAGE_RB_TRANSFORMATION_NONE 0
+#define IMAGE_RB_TRANSFORMATION_90RIGHT 1
+#define IMAGE_RB_TRANSFORMATION_180RIGHT 2
+#define IMAGE_RB_TRANSFORMATION_270RIGHT 3
+#define IMAGE_RB_TRANSFORMATION_VFLIP 4
+#define IMAGE_RB_TRANSFORMATION_HFLIP 5
+#define IMAGE_RB_TRANSFORMATION_MAINDIAGFLIP 6
+#define IMAGE_RB_TRANSFORMATION_ANTIDIAGFLIP 7
+
+#endif
diff --git a/src/image_misc.c b/src/image_misc.c
new file mode 100644
index 0000000..e55a182
--- /dev/null
+++ b/src/image_misc.c
@@ -0,0 +1,283 @@
+
+#include
+#include
+#include
+
+#include "image_misc.h"
+
+void GetDxyBitsSize(unsigned int w, unsigned int h, unsigned int *dxy_bitssize)
+{
+ // Нам нужно такое значение, чтобы поместились цифры от 0 до (w/2-(2-1))*(h/2-(2-1))-1.
+ // В (2-1) число 2 - это размер минимального рангового блока, для которого ищутся доменные блоки
+ // Чтобы найти это значение, надо найти log2(((w/2-1)*(h/2-1)-1)*2)
+ // Например, чтобы сохранить 4, нам нужно 3 бита, 3 = log2(4*2)
+ *dxy_bitssize = (unsigned int)log2(((w/2-1)*(h/2-1)-1)*2);
+}
+
+unsigned int GetCblockBitsSize(unsigned int dxy_bitssize)
+{
+ unsigned int cblock_bitssize;
+
+ //15 = 3+5+7; 3 - поворот, 5 - контраст, 7 - яркость
+ cblock_bitssize = 15+dxy_bitssize;
+
+ return cblock_bitssize;
+}
+
+void SetBlocksPointers(image_rangeblock_type *blocks, unsigned int blocksize, unsigned int nof_blocks, unsigned int w, unsigned int h)
+{
+ unsigned int i, j, k, offset;
+
+ offset = 0;
+ k = nof_blocks;
+ for(i = blocksize; i > 2 && i%2 == 0; i /= 2) {
+ for(j = 0; j < nof_blocks; j++) {
+ blocks[offset+j].divided_into = blocks+k+j*4;
+ }
+ offset = k;
+ nof_blocks *= 4;
+ k += nof_blocks;
+ }
+}
+
+void Scale2to1(unsigned char *src, unsigned char *dst, unsigned int w, unsigned int h)
+{
+ unsigned int i, j, half_w, half_h;
+
+ half_w = w/2;
+ half_h = h/2;
+
+ for(i = 0; i < half_h; i++) {
+ for(j = 0; j < half_w; j++) {
+ dst[i*half_w+j] = ( (int)src[(i*2)*w+j*2]+(int)src[(i*2)*w+j*2+1]+(int)src[(i*2+1)*w+j*2]+(int)src[(i*2+1)*w+j*2+1] )/4;
+ }
+ }
+}
+
+void ApplyReversedTransformToRangeBlock(unsigned char *src, unsigned char *dst, unsigned int tr, unsigned int blocksize)
+{
+ unsigned int k, l;
+ switch(tr) {
+ case IMAGE_RB_TRANSFORMATION_NONE:
+ memcpy(dst, src, blocksize*blocksize);
+
+ break;
+ case IMAGE_RB_TRANSFORMATION_90RIGHT: // Поворачиваем на 90 влево (т.к. обратная трансформация)
+ for(k = 0; k < blocksize; k++)
+ for(l = 0; l < blocksize; l++) {
+ dst[k*blocksize+l] = src[l*blocksize+blocksize-1-k];
+ }
+
+ break;
+ case IMAGE_RB_TRANSFORMATION_180RIGHT: // Поворачиваем на 180 влево
+ for(k = 0; k < blocksize; k++)
+ for(l = 0; l < blocksize; l++) {
+ dst[k*blocksize+l] = src[(blocksize-1-k)*blocksize+blocksize-1-l];
+ }
+
+ break;
+ case IMAGE_RB_TRANSFORMATION_270RIGHT: // Поворачиваем на 270 влево
+ for(k = 0; k < blocksize; k++)
+ for(l = 0; l < blocksize; l++) {
+ dst[k*blocksize+l] = src[(blocksize-1-l)*blocksize+k];
+ }
+
+ break;
+ case IMAGE_RB_TRANSFORMATION_VFLIP:
+ for(k = 0; k < blocksize; k++)
+ for(l = 0; l < blocksize; l++) {
+ dst[k*blocksize+l] = src[(blocksize-1-k)*blocksize+l];
+ }
+
+ break;
+ case IMAGE_RB_TRANSFORMATION_HFLIP:
+ for(k = 0; k < blocksize; k++)
+ for(l = 0; l < blocksize; l++) {
+ dst[k*blocksize+l] = src[k*blocksize+blocksize-1-l];
+ }
+
+ break;
+ case IMAGE_RB_TRANSFORMATION_MAINDIAGFLIP:
+ for(k = 0; k < blocksize; k++)
+ for(l = 0; l < blocksize; l++) {
+ dst[k*blocksize+l] = src[l*blocksize+k];
+ }
+
+ break;
+ case IMAGE_RB_TRANSFORMATION_ANTIDIAGFLIP:
+ for(k = 0; k < blocksize; k++)
+ for(l = 0; l < blocksize; l++) {
+ dst[k*blocksize+l] = src[(blocksize-1-l)*blocksize+blocksize-1-k];
+ }
+
+ break;
+ }
+}
+
+bool UnpackChannels(image_pc_type *in, image_uc_type *out)
+{
+ unsigned int i, channels, alloc_memory;
+
+ out->w = in->w;
+ out->h = in->h;
+ out->nof_channels = channels = in->nof_channels;
+ out->alloc_memory = alloc_memory = in->w*in->h;
+
+ if(channels*alloc_memory != in->alloc_memory)
+ return false;
+
+ out->data = malloc(channels*sizeof(void *));
+ if(!out->data)
+ return false;
+
+ for(i = 0; i < channels; i++) {
+ out->data[i] = malloc(alloc_memory);
+ if(!out->data[i]) {
+ unsigned int j;
+
+ for(j = 0; j < i; j++)
+ free(out->data[j]);
+ free(out->data);
+ return false;
+ }
+ }
+
+ for(i = 0; i < channels; i++) {
+ unsigned char *p1, *p2;
+ unsigned int j;
+
+ p1 = in->data+i;
+ p2 = out->data[i];
+
+ for(j = 0; j < alloc_memory; j++) {
+ *p2 = *p1;
+ p1 += channels;
+ p2++;
+ }
+ }
+
+ return true;
+}
+
+bool PackChannels(image_uc_type *in, image_pc_type *out)
+{
+ unsigned int i, channels, alloc_memory, in_alloc_memory;
+
+ out->w = in->w;
+ out->h = in->h;
+ out->nof_channels = channels = in->nof_channels;
+ out->alloc_memory = alloc_memory = in->w*in->h*channels;
+ in_alloc_memory = in->alloc_memory;
+
+ if(alloc_memory != channels*in->alloc_memory)
+ return false;
+
+ out->data = malloc(alloc_memory);
+ if(!out->data) return false;
+
+ for(i = 0; i < channels; i++) {
+ unsigned char *p1, *p2;
+ unsigned int j;
+
+ p1 = in->data[i];
+ p2 = out->data+i;
+
+ for(j = 0; j < in_alloc_memory; j++) {
+ *p2 = *p1;
+ p1++;
+ p2 += channels;
+ }
+ }
+
+ return true;
+}
+
+// http://www.w3.org/Graphics/JPEG/jfif3.pdf
+void BGRtoYCBCR(image_uc_type *img)
+{
+ unsigned char *r, *g, *b;
+ unsigned int i;
+
+ if(img->nof_channels < 3)
+ return;
+
+ b = img->data[0];
+ g = img->data[1];
+ r = img->data[2];
+
+ for(i = 0; i < img->w*img->h; i++) {
+ double y, cb, cr;
+
+ y = 0.299*(*r) + 0.587*(*g) + 0.114*(*b);
+ cb = -0.1687*(*r) - 0.3313*(*g) + 0.5*(*b) + 128;
+ cr = 0.5*(*r) - 0.4187*(*g) - 0.0813*(*b) + 128;
+
+ if(y < 0.0)
+ *b = 0;
+ else if(y > 255.0)
+ *b = 255;
+ else
+ *b = (unsigned char)(y+0.5);
+
+ if(cb < 0.0)
+ *g = 0;
+ else if(cb > 255.0)
+ *g = 255;
+ else
+ *g = (unsigned char)(cb+0.5);
+
+ if(cr < 0.0)
+ *r = 0;
+ else if(cr > 255.0)
+ *r = 255;
+ else
+ *r = (unsigned char)(cr+0.5);
+
+ b++; g++; r++;
+ }
+}
+
+// http://www.w3.org/Graphics/JPEG/jfif3.pdf
+void YCBCRtoBGR(image_uc_type *img)
+{
+ unsigned char *y, *cb, *cr;
+ unsigned int i;
+
+ if(img->nof_channels < 3)
+ return;
+
+ y = img->data[0];
+ cb = img->data[1];
+ cr = img->data[2];
+
+ for(i = 0; i < img->w*img->h; i++) {
+ double b, g, r;
+
+ b = (*y)+1.772*((int)(*cb)-128);
+ g = (*y)-0.34414*((int)(*cb)-128)-0.71414*((int)(*cr)-128);
+ r = (*y)+1.402*((int)(*cr)-128);
+
+ if(b < 0.0)
+ *y = 0;
+ else if (b > 255.0)
+ *y = 255;
+ else
+ *y = (unsigned char)(b+0.5);
+
+ if(g < 0.0)
+ *cb = 0;
+ else if (g > 255.0)
+ *cb = 255;
+ else
+ *cb = (unsigned char)(g+0.5);
+
+ if(r < 0.0)
+ *cr = 0;
+ else if (r > 255.0)
+ *cr = 255;
+ else
+ *cr = (unsigned char)(r+0.5);
+
+ y++; cb++; cr++;
+ }
+}
diff --git a/src/image_misc.h b/src/image_misc.h
new file mode 100644
index 0000000..509707a
--- /dev/null
+++ b/src/image_misc.h
@@ -0,0 +1,19 @@
+
+#ifndef _IMAGE_MISC_H
+#define _IMAGE_MISC_H
+
+#include
+
+#include "image.h"
+
+extern void GetDxyBitsSize(unsigned int w, unsigned int h, unsigned int *dxy_bitssize);
+extern unsigned int GetCblockBitsSize(unsigned int dxy_bitssize);
+extern void SetBlocksPointers(image_rangeblock_type *blocks, unsigned int blocksize, unsigned int nof_blocks, unsigned int w, unsigned int h);
+extern void Scale2to1(unsigned char *src, unsigned char *dst, unsigned int w, unsigned int h);
+extern void ApplyReversedTransformToRangeBlock(unsigned char *src, unsigned char *dst, unsigned int tr, unsigned int blocksize);
+extern bool UnpackChannels(image_pc_type *in, image_uc_type *out);
+extern bool PackChannels(image_uc_type *in, image_pc_type *out);
+extern void BGRtoYCBCR(image_uc_type *img);
+extern void YCBCRtoBGR(image_uc_type *img);
+
+#endif
diff --git a/src/tga_file.h b/src/tga_file.h
new file mode 100644
index 0000000..3ad9660
--- /dev/null
+++ b/src/tga_file.h
@@ -0,0 +1,29 @@
+
+#ifndef _TGA_FILE_H
+#define _TGA_FILE_H
+
+#pragma pack (push, 1)
+typedef struct {
+ unsigned char IdLeight; //Длина информации после заголовка
+ unsigned char ColorMap; //Идентификатор наличия цветовой карты (0 - нет, 1 - есть)
+ unsigned char DataType; //Тип сжатия
+ // 0 - No Image Data Included
+ // 1 - Uncompressed, Color-mapped Image
+ // 2 - Uncompressed, True-color Image
+ // 3 - Uncompressed, Black-and-white Image
+ // 9 - Run-length encoded, Color-mapped Image
+ // 10 - Run-length encoded, True-color Image
+ // 11 - Run-length encoded, Black-and-white Image
+ unsigned short CmapStart; //Начало палитры
+ unsigned short CmapLength; //Длина палитры
+ unsigned char CmapDepth; //Глубина элементов палитры (15, 16, 24, 32)
+ unsigned short X_Origin; //Начало изображения по оси X
+ unsigned short Y_Origin; //Начало изображения по оси Y
+ unsigned short TGAWidth; //Ширина изображения
+ unsigned short TGAHeight; //Высота изображения
+ unsigned char BitPerPel; //Кол-во бит на пиксель (8, 16, 24, 32)
+ unsigned char Description; //Описание
+} TGAHEADER;
+#pragma pack (pop)
+
+#endif
diff --git a/src/tga_load.c b/src/tga_load.c
new file mode 100644
index 0000000..4b69b6a
--- /dev/null
+++ b/src/tga_load.c
@@ -0,0 +1,237 @@
+
+#include
+#include
+#include
+
+#include "tga_file.h"
+#include "tga_load.h"
+
+static size_t GetFileLength(FILE *f)
+{
+ size_t cur_pos, file_len;
+
+ cur_pos = ftell(f);
+ fseek(f, 0, SEEK_END);
+ file_len = ftell(f);
+ fseek(f, cur_pos, SEEK_SET);
+
+ return file_len;
+}
+
+int tgaLoad(char *fname, image_pc_type *image)
+{
+ FILE *f;
+ TGAHEADER head;
+ unsigned int bpp;
+ unsigned char *pal = 0;
+ unsigned char *databuf = 0, *pdb = 0;
+ unsigned char *temp, *p, *p2, *p3;
+ unsigned char b;
+ unsigned int i, j, k;
+ size_t file_len;
+
+ f = fopen(fname, "rb");
+
+ if(!f)
+ return TGALOAD_CANTOPENFILE;
+
+ file_len = GetFileLength(f);
+
+ if(fread(&head, sizeof(TGAHEADER), 1, f) != 1)
+ return TGALOAD_DAMAGEDFILE;
+
+ switch(head.DataType) { // Проверка правильности\поддержки файла
+ case 1: // Проверяю изображения с палитрой
+ case 9:
+ if((head.ColorMap != 1) || (head.BitPerPel != 8)) {
+ fclose(f);
+ return TGALOAD_DAMAGEDFILE;
+ }
+ if(!((head.CmapDepth == 24) || (head.CmapDepth == 32))) {
+ fclose(f);
+ return TGALOAD_UNSUPPORTEDFILETYPE;
+ }
+ break;
+ case 2: // Проверяю изображения без палитры (24,32 bpp)
+ case 10:
+ if(head.ColorMap != 0) {
+ fclose(f);
+ return TGALOAD_DAMAGEDFILE;
+ }
+ if(!((head.BitPerPel == 24) || (head.BitPerPel == 32))) {
+ fclose(f);
+ return TGALOAD_UNSUPPORTEDFILETYPE;
+ }
+ break;
+ case 3: // Проверяю изображения без палитры (8 bpp, greyscale)
+ case 11:
+ if(head.ColorMap != 0) {
+ fclose(f);
+ return TGALOAD_DAMAGEDFILE;
+ }
+ if(head.BitPerPel != 8) {
+ fclose(f);
+ return TGALOAD_UNSUPPORTEDFILETYPE;
+ }
+ break;
+ default:
+ fclose(f);
+ return TGALOAD_UNSUPPORTEDFILETYPE;
+ }
+
+ if((head.BitPerPel == 8) && (head.ColorMap == 0)) { // Настройка для greyscale-изображений
+ bpp = 1;
+ } else if((head.BitPerPel == 32) || ((head.BitPerPel == 8) && (head.CmapDepth == 32))) { // Настройка 32bpp изображений (и палитрой или без)
+ bpp = 4;
+ } else { // Настройка 24bpp изображений (и палитрой или без)
+ bpp = 3;
+ }
+
+ image->nof_channels = bpp;
+ image->w = head.TGAWidth;
+ image->h = head.TGAHeight;
+
+ image->alloc_memory = image->w*image->h*bpp;
+ image->data = malloc(image->alloc_memory);
+ if(!image->data) {
+ fclose(f);
+ return TGALOAD_MEMORYALLOCERROR;
+ }
+ if(head.ColorMap == 1) {
+ pal = malloc(head.CmapLength*bpp);
+ if(!pal) {
+ free(image->data);
+ fclose(f);
+ return TGALOAD_MEMORYALLOCERROR;
+ }
+ fread(pal, 1, head.CmapLength*bpp, f);
+ }
+
+ // Пропускаю идентификатор
+ fseek(f, head.IdLeight, SEEK_CUR);
+
+ switch(head.DataType) { // Чтение изображения
+ case 1: // Чтение изображения с палитрой
+ temp = malloc(image->w*image->h);
+ if(!temp) {
+ free(image->data);
+ free(pal);
+ return TGALOAD_MEMORYALLOCERROR;
+ }
+ fread(temp, 1, image->w*image->h, f);
+ p = image->data;
+ p3 = temp;
+ for(i = 0;i < image->w*image->h;i++) {
+ p2 = pal+((head.CmapStart+*p3)*bpp);
+ *p = *p2; p++; p2++;
+ *p = *p2; p++; p2++;
+ *p = *p2; p++;
+ if(bpp == 4) { p2++; *p = *p2; p++; }
+ p3++;
+ }
+ free(temp);
+ break;
+ case 2: // Чтение 8(greyscale),24,32bit изображений
+ case 3:
+ fread(image->data, 1, image->alloc_memory, f);
+ break;
+ case 9: // Декодирование изображения с палитрой
+ databuf = malloc(file_len-ftell(f));
+ if(!databuf) {
+ free(image->data);
+ free(pal);
+ fclose(f);
+ return TGALOAD_MEMORYALLOCERROR;
+ }
+ fread(databuf, 1, file_len-ftell(f), f);
+
+ pdb = databuf;
+ i = 0;
+ p = image->data;
+ while(i < image->w*image->h) {
+ b = *pdb; pdb++;
+ if((i+(b&0x7F)+1) > image->w*image->h) {
+ free(image->data);
+ break; // Часть файла не удалось прочитать
+ }
+ if(b & 0x80) { // the packet is a Run-length Packet
+ for(j = 0;j < (unsigned int)((b&0x7F)+1);j++) {
+ p2 = pal+((head.CmapStart+(*pdb))*bpp);
+ *p = *p2; p++; p2++;
+ *p = *p2; p++; p2++;
+ *p = *p2; p++; p2++;
+ if(bpp == 4) { *p = *p2; p++; p2++; }
+ }
+ pdb++;
+ } else { // the packet is a Raw Packet
+ for(j = 0;j < (unsigned int)((b&0x7F)+1);j++) {
+ p2 = pal+((head.CmapStart+pdb[j])*bpp);
+ *p = *p2; p++; p2++;
+ *p = *p2; p++; p2++;
+ *p = *p2; p++; p2++;
+ if(bpp == 4) { *p = *p2; p++; p2++; }
+ }
+ pdb += (b&0x7F)+1;
+ }
+ i += (b&0x7F)+1;
+ }
+ free(databuf);
+ break;
+ case 10: // Чтение 8(greyscale),24,32bit изображений
+ case 11:
+ databuf = malloc(file_len-ftell(f));
+ if(!databuf) {
+ free(image->data);
+ free(pal);
+ fclose(f);
+ return TGALOAD_MEMORYALLOCERROR;
+ }
+ fread(databuf, 1, file_len-ftell(f), f);
+
+ pdb = databuf;
+ i = 0;
+ p = image->data;
+ while(i < image->w*image->h) {
+ b = *pdb; pdb++;
+ if((i+(b&0x7F)+1) > image->w*image->h) {
+ free(databuf);
+ break; // Часть файла не удалось прочитать
+ }
+ if(b & 0x80) { // the packet is a Run-length Packet
+ for(j = 0;j < (unsigned int)((b&0x7F)+1);j++) {
+ p2 = pdb;
+ for(k = 0;k < bpp;k++) {
+ *p = *p2; p++; p2++; }
+ }
+ pdb += bpp;
+ } else { // the packet is a Raw Packet
+ memcpy(p, pdb, bpp*((b&0x7F)+1)); pdb += bpp*((b&0x7F)+1); p += bpp*((b&0x7F)+1);
+ }
+ i += (b&0x7F)+1;
+ }
+ free(databuf);
+ break;
+ }
+
+ // Переворот по оси y (если необходимо, гимп может сохранять с этим флагом)
+ if(head.Description & 0x20) {
+ p = image->data;
+ temp = malloc(image->w*bpp);
+ if(temp) { // Можно здесь завершить работу функции, если !temp -^_^-
+ p2 = &image->data[image->w*bpp*(image->h-1)];
+ for(i = 0;i < image->h/2;i++) {
+ memcpy(temp,p,image->w*bpp);
+ memcpy(p,p2,image->w*bpp);
+ memcpy(p2,temp,image->w*bpp);
+ p += image->w*bpp;
+ p2 -= image->w*bpp;
+ }
+ free(temp);
+ }
+ }
+
+ if(head.ColorMap == 1) free(pal);
+ fclose(f);
+
+ return TGALOAD_OKAY;
+}
diff --git a/src/tga_load.h b/src/tga_load.h
new file mode 100644
index 0000000..b956cb6
--- /dev/null
+++ b/src/tga_load.h
@@ -0,0 +1,15 @@
+
+#ifndef _TGA_LOAD_H
+#define _TGA_LOAD_H
+
+#include "image.h"
+
+#define TGALOAD_OKAY 0
+#define TGALOAD_CANTOPENFILE 1
+#define TGALOAD_DAMAGEDFILE 2
+#define TGALOAD_UNSUPPORTEDFILETYPE 3
+#define TGALOAD_MEMORYALLOCERROR 4
+
+extern int tgaLoad(char *fname, image_pc_type *image);
+
+#endif
diff --git a/src/tga_save.c b/src/tga_save.c
new file mode 100644
index 0000000..e790cbf
--- /dev/null
+++ b/src/tga_save.c
@@ -0,0 +1,40 @@
+
+#include
+#include
+#include
+
+#include "tga_file.h"
+#include "tga_save.h"
+
+int tgaSave(char *fname, image_pc_type *image)
+{
+ FILE *f;
+ TGAHEADER head;
+
+ f = fopen(fname, "wb");
+ if(!f)
+ return TGASAVE_CANTOPENFILE;
+
+ if(image->w*image->h*image->nof_channels != image->alloc_memory)
+ return TGASAVE_DAMAGEDIMAGESTRUCT;
+
+ if(image->nof_channels > 4 || image->nof_channels == 2 || image->nof_channels == 0)
+ return TGASAVE_TOOMANYCHANNELS;
+
+ memset(&head, 0, sizeof(TGAHEADER));
+
+ head.TGAWidth = image->w;
+ head.TGAHeight = image->h;
+ if(image->nof_channels == 1)
+ head.DataType = 3;
+ else
+ head.DataType = 2;
+ head.BitPerPel = image->nof_channels*8;
+
+ fwrite(&head, sizeof(head), 1, f);
+ fwrite(image->data, 1, image->alloc_memory, f);
+
+ fclose(f);
+
+ return TGASAVE_OKAY;
+}
diff --git a/src/tga_save.h b/src/tga_save.h
new file mode 100644
index 0000000..b7b379d
--- /dev/null
+++ b/src/tga_save.h
@@ -0,0 +1,14 @@
+
+#ifndef _TGA_SAVE_H
+#define _TGA_SAVE_H
+
+#include "image.h"
+
+#define TGASAVE_OKAY 0
+#define TGASAVE_CANTOPENFILE 1
+#define TGASAVE_DAMAGEDIMAGESTRUCT 2
+#define TGASAVE_TOOMANYCHANNELS 3
+
+extern int tgaSave(char *fname, image_pc_type *image);
+
+#endif
diff --git a/test0.bat b/test0.bat
new file mode 100644
index 0000000..d2293d3
--- /dev/null
+++ b/test0.bat
@@ -0,0 +1,16 @@
+Release\compress.exe chihiro2.tga chihiro2_b8.fi 8
+Release\decompress.exe chihiro2_b8.fi chihiro2_b8.dec.tga
+Release\compress.exe chihiro2.tga chihiro2_b4.fi 4
+Release\decompress.exe chihiro2_b4.fi chihiro2_b4.dec.tga
+Release\compress.exe chihiro2.tga chihiro2_b2.fi 2
+Release\decompress.exe chihiro2_b2.fi chihiro2_b2.dec.tga
+Release\compress.exe chihiro2.tga chihiro2_b8_rms2.fi 8 4 2
+Release\decompress.exe chihiro2_b8_rms2.fi chihiro2_b8_rms2.dec.tga
+Release\compress.exe chihiro2.tga chihiro2_b8_rms4.fi 8 4 4
+Release\decompress.exe chihiro2_b8_rms4.fi chihiro2_b8_rms4.dec.tga
+Release\compress.exe chihiro2.tga chihiro2_b8_rms6.fi 8 4 6
+Release\decompress.exe chihiro2_b8_rms6.fi chihiro2_b8_rms6.dec.tga
+Release\compress.exe chihiro2.tga chihiro2_b8_rms10.fi 8 4 10
+Release\decompress.exe chihiro2_b8_rms10.fi chihiro2_b8_rms10.dec.tga
+Release\compress.exe chihiro2.tga chihiro2_b8_rms50.fi 8 4 50
+Release\decompress.exe chihiro2_b8_rms50.fi chihiro2_b8_rms50.dec.tga
\ No newline at end of file
diff --git a/test0.md5 b/test0.md5
new file mode 100644
index 0000000..7836c1d
--- /dev/null
+++ b/test0.md5
@@ -0,0 +1,16 @@
+dfe6421540d4ad169c23498bc5b4aa29 *chihiro2_b2.fi
+bac1dc505a5c9be23b27b8e840f5f8f9 *chihiro2_b2.dec.tga
+ce34f55126ee2f980868d08ac1239f68 *chihiro2_b4.fi
+e2037641abf7954b1513a5ebe53506eb *chihiro2_b4.dec.tga
+eb63ca99a084983b91f6414a7e3c241e *chihiro2_b8.fi
+25455b476b7f29b83e803e4a14ab3b19 *chihiro2_b8.dec.tga
+08b69485bfdaefe96c8cb478594f4a0f *chihiro2_b8_rms10.fi
+fe97f99bb573263f31498d3bdd3c19d3 *chihiro2_b8_rms10.dec.tga
+4584a2eb7932474a2b89978aa4b9dce5 *chihiro2_b8_rms2.fi
+1e242b76cc759de203f12ce3e2cca486 *chihiro2_b8_rms2.dec.tga
+d92c596ef1cec8f68616ef948284cae4 *chihiro2_b8_rms4.fi
+a4ab1ee520223fe616069e7e1a2aa3ba *chihiro2_b8_rms4.dec.tga
+035e900148cb8c6945a65cb1669c75af *chihiro2_b8_rms50.fi
+382986d755fad7ec1202776fd628c866 *chihiro2_b8_rms50.dec.tga
+f0515cd41769618ead8867b876772d9c *chihiro2_b8_rms6.fi
+8e84cc78593104f92dc4e00d7550e1f3 *chihiro2_b8_rms6.dec.tga
diff --git a/unix_makefiles/Release/do_not_delete.txt b/unix_makefiles/Release/do_not_delete.txt
new file mode 100644
index 0000000..e69de29
diff --git a/unix_makefiles/ReleaseMPI/do_not_delete.txt b/unix_makefiles/ReleaseMPI/do_not_delete.txt
new file mode 100644
index 0000000..e69de29
diff --git a/unix_makefiles/makefile b/unix_makefiles/makefile
new file mode 100644
index 0000000..c9e341c
--- /dev/null
+++ b/unix_makefiles/makefile
@@ -0,0 +1,29 @@
+CC=gcc
+CPP=g++
+CFLAGS=-c -Wall -O2
+CFLAGS_MPI=$(CFLAGS) -DI_USE_MPI -I/usr/include/mpich2/
+SOURCES_COMPRESS=../src/arrays.c ../src/compress.c ../src/fi_save.c ../src/fi_save_blocks_compression.c ../src/fi_save_blocks_search.c ../src/image_misc.c ../src/tga_load.c ../src/cpu_driven/fi_save_blocks_search_workers.c
+OBJECTS_COMPRESS=arrays.o compress.o fi_save.o fi_save_blocks_compression.o fi_save_blocks_search.o image_misc.o tga_load.o fi_save_blocks_search_workers.o
+SOURCES_DECOMPRESS=../src/arrays.c ../src/decompress.c ../src/fi_load.c ../src/image_misc.c ../src/tga_save.c
+OBJECTS_DECOMPRESS=arrays.o decompress.o fi_load.o image_misc.o tga_save.o
+
+all: release release_mpi
+
+release: compress decompress
+
+release_mpi: compress_mpi
+
+compress:
+ $(CC) $(CFLAGS) $(SOURCES_COMPRESS)
+ $(CPP) -o Release/compress -lpthread $(OBJECTS_COMPRESS) -s
+
+compress_mpi:
+ $(CC) $(CFLAGS_MPI) $(SOURCES_COMPRESS)
+ $(CPP) -o ReleaseMPI/compress -lpthread -lmpi $(OBJECTS_COMPRESS) -s
+
+decompress:
+ $(CC) $(CFLAGS) $(SOURCES_DECOMPRESS)
+ $(CPP) -o Release/decompress $(OBJECTS_DECOMPRESS) -s
+
+clean:
+ rm -rf *.o Release/compress Release/decompress ReleaseMPI/compress
diff --git a/unix_projects/compress/compress.cbp b/unix_projects/compress/compress.cbp
new file mode 100644
index 0000000..057809c
--- /dev/null
+++ b/unix_projects/compress/compress.cbp
@@ -0,0 +1,83 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/unix_projects/compress/compress.depend b/unix_projects/compress/compress.depend
new file mode 100644
index 0000000..782fb9a
--- /dev/null
+++ b/unix_projects/compress/compress.depend
@@ -0,0 +1,107 @@
+# depslib dependency file v1.0
+1447453270 source:/media/CC01-601D/fractal_image_compression/src/arrays.c
+ "arrays.h"
+
+1447378552 /media/CC01-601D/fractal_image_compression/src/arrays.h
+
+
+1456175332 source:/media/CC01-601D/fractal_image_compression/src/compress.c
+
+
+
+
+
+ "tga_load.h"
+ "image_misc.h"
+ "fi_save.h"
+
+1446493988 /media/CC01-601D/fractal_image_compression/src/tga_load.h
+ "image.h"
+
+1455143304 /media/CC01-601D/fractal_image_compression/src/image.h
+
+
+1454630460 /media/CC01-601D/fractal_image_compression/src/image_misc.h
+
+ "image.h"
+
+1447435442 /media/CC01-601D/fractal_image_compression/src/fi_save.h
+ "image.h"
+
+1457447856 source:/media/CC01-601D/fractal_image_compression/src/cpu_driven/fi_save_blocks_search_workers.c
+
+
+ "../image_misc.h"
+ "../fi_save_blocks_search_workers.h"
+
+1456176240 /media/CC01-601D/fractal_image_compression/src/fi_save_blocks_search_workers.h
+ "image.h"
+
+1454626304 source:/media/CC01-601D/fractal_image_compression/src/fi_save_blocks_compression.c
+
+
+
+ "arrays.h"
+ "fi_file.h"
+ "fi_save.h"
+ "image_misc.h"
+ "fi_save_blocks_compression.h"
+
+1446855158 /media/CC01-601D/fractal_image_compression/src/fi_file.h
+
+1454626284 /media/CC01-601D/fractal_image_compression/src/fi_save_blocks_compression.h
+
+1456175564 source:/media/CC01-601D/fractal_image_compression/src/fi_save.c
+
+
+
+
+
+ "arrays.h"
+ "fi_file.h"
+ "fi_save.h"
+ "fi_save_blocks_compression.h"
+ "fi_save_blocks_search.h"
+ "image_misc.h"
+
+1454624988 /media/CC01-601D/fractal_image_compression/src/fi_save_blocks_search.h
+ "image.h"
+
+1454630464 source:/media/CC01-601D/fractal_image_compression/src/image_misc.c
+
+
+
+ "image_misc.h"
+
+1446611444 source:/media/CC01-601D/fractal_image_compression/src/tga_load.c
+
+
+
+ "tga_file.h"
+ "tga_load.h"
+
+1446489332 /media/CC01-601D/fractal_image_compression/src/tga_file.h
+
+1456177190 source:/media/CC01-601D/fractal_image_compression/src/fi_save_blocks_search.c
+
+
+
+
+
+
+
+ "image_misc.h"
+ "fi_save.h"
+ "fi_save_blocks_search.h"
+ "fi_save_blocks_search_workers.h"
+
+1365770559 /usr/include/mpich2//mpi.h
+ "mpio.h"
+ "mpicxx.h"
+
+1365770561 /usr/include/mpich2//mpio.h
+ "mpi.h"
+
+1365770562 /usr/include/mpich2//mpicxx.h
+
+
diff --git a/unix_projects/compress/compress.layout b/unix_projects/compress/compress.layout
new file mode 100644
index 0000000..42ad001
--- /dev/null
+++ b/unix_projects/compress/compress.layout
@@ -0,0 +1,22 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/unix_projects/decompress/decompress.cbp b/unix_projects/decompress/decompress.cbp
new file mode 100644
index 0000000..4a6c1c5
--- /dev/null
+++ b/unix_projects/decompress/decompress.cbp
@@ -0,0 +1,56 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/unix_projects/decompress/decompress.depend b/unix_projects/decompress/decompress.depend
new file mode 100644
index 0000000..c4ac310
--- /dev/null
+++ b/unix_projects/decompress/decompress.depend
@@ -0,0 +1 @@
+# depslib dependency file v1.0
diff --git a/unix_projects/decompress/decompress.layout b/unix_projects/decompress/decompress.layout
new file mode 100644
index 0000000..0e578cf
--- /dev/null
+++ b/unix_projects/decompress/decompress.layout
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/unix_projects/fractal_image_compression.workspace b/unix_projects/fractal_image_compression.workspace
new file mode 100644
index 0000000..a217cdb
--- /dev/null
+++ b/unix_projects/fractal_image_compression.workspace
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+