From 3f9778117f37f2023cecb2165c0d218253b64148 Mon Sep 17 00:00:00 2001 From: Mikhail Morozov Date: Mon, 15 Jan 2024 21:28:23 +0300 Subject: [PATCH] Restore repository from Software Heritage https://archive.softwareheritage.org/browse/origin/directory/?origin_url=https://github.com/plzombie/fractal_image_compression --- .gitattributes | 22 + .gitignore | 163 ++++ README.md | 53 ++ chihiro2.tga | Bin 0 -> 57618 bytes compress/compress.vcxproj | 128 +++ compress/compress.vcxproj.filters | 46 ++ cuda_compress/cuda_compress.vcxproj | 256 ++++++ cuda_compress/cuda_compress.vcxproj.filters | 25 + cudaold_compress/cudaold_compress.vcxproj | 256 ++++++ .../cudaold_compress.vcxproj.filters | 22 + decompress/decompress.vcxproj | 88 ++ decompress/decompress.vcxproj.filters | 34 + fractal_image_compression.sln | 70 ++ src/arrays.c | 86 ++ src/arrays.h | 18 + src/compress.c | 154 ++++ .../fi_save_blocks_search_workers.c | 204 +++++ src/decompress.c | 93 +++ src/fi_file.h | 15 + src/fi_load.c | 469 +++++++++++ src/fi_load.h | 14 + src/fi_save.c | 134 +++ src/fi_save.h | 15 + src/fi_save_blocks_compression.c | 111 +++ src/fi_save_blocks_compression.h | 12 + src/fi_save_blocks_search.c | 171 ++++ src/fi_save_blocks_search.h | 14 + src/fi_save_blocks_search_workers.h | 30 + .../fi_save_blocks_search_workers.cu | 767 ++++++++++++++++++ src/image.h | 49 ++ src/image_misc.c | 283 +++++++ src/image_misc.h | 19 + src/tga_file.h | 29 + src/tga_load.c | 237 ++++++ src/tga_load.h | 15 + src/tga_save.c | 40 + src/tga_save.h | 14 + test0.bat | 16 + test0.md5 | 16 + unix_makefiles/Release/do_not_delete.txt | 0 unix_makefiles/ReleaseMPI/do_not_delete.txt | 0 unix_makefiles/makefile | 29 + unix_projects/compress/compress.cbp | 83 ++ unix_projects/compress/compress.depend | 107 +++ unix_projects/compress/compress.layout | 22 + unix_projects/decompress/decompress.cbp | 56 ++ unix_projects/decompress/decompress.depend | 1 + unix_projects/decompress/decompress.layout | 13 + .../fractal_image_compression.workspace | 7 + 49 files changed, 4506 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 README.md create mode 100644 chihiro2.tga create mode 100644 compress/compress.vcxproj create mode 100644 compress/compress.vcxproj.filters create mode 100644 cuda_compress/cuda_compress.vcxproj create mode 100644 cuda_compress/cuda_compress.vcxproj.filters create mode 100644 cudaold_compress/cudaold_compress.vcxproj create mode 100644 cudaold_compress/cudaold_compress.vcxproj.filters create mode 100644 decompress/decompress.vcxproj create mode 100644 decompress/decompress.vcxproj.filters create mode 100644 fractal_image_compression.sln create mode 100644 src/arrays.c create mode 100644 src/arrays.h create mode 100644 src/compress.c create mode 100644 src/cpu_driven/fi_save_blocks_search_workers.c create mode 100644 src/decompress.c create mode 100644 src/fi_file.h create mode 100644 src/fi_load.c create mode 100644 src/fi_load.h create mode 100644 src/fi_save.c create mode 100644 src/fi_save.h create mode 100644 src/fi_save_blocks_compression.c create mode 100644 src/fi_save_blocks_compression.h create mode 100644 src/fi_save_blocks_search.c create mode 100644 src/fi_save_blocks_search.h create mode 100644 src/fi_save_blocks_search_workers.h create mode 100644 src/gpu_driven/fi_save_blocks_search_workers.cu create mode 100644 src/image.h create mode 100644 src/image_misc.c create mode 100644 src/image_misc.h create mode 100644 src/tga_file.h create mode 100644 src/tga_load.c create mode 100644 src/tga_load.h create mode 100644 src/tga_save.c create mode 100644 src/tga_save.h create mode 100644 test0.bat create mode 100644 test0.md5 create mode 100644 unix_makefiles/Release/do_not_delete.txt create mode 100644 unix_makefiles/ReleaseMPI/do_not_delete.txt create mode 100644 unix_makefiles/makefile create mode 100644 unix_projects/compress/compress.cbp create mode 100644 unix_projects/compress/compress.depend create mode 100644 unix_projects/compress/compress.layout create mode 100644 unix_projects/decompress/decompress.cbp create mode 100644 unix_projects/decompress/decompress.depend create mode 100644 unix_projects/decompress/decompress.layout create mode 100644 unix_projects/fractal_image_compression.workspace diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..412eeda --- /dev/null +++ b/.gitattributes @@ -0,0 +1,22 @@ +# Auto detect text files and perform LF normalization +* text=auto + +# Custom for Visual Studio +*.cs diff=csharp +*.sln merge=union +*.csproj merge=union +*.vbproj merge=union +*.fsproj merge=union +*.dbproj merge=union + +# Standard to msysgit +*.doc diff=astextplain +*.DOC diff=astextplain +*.docx diff=astextplain +*.DOCX diff=astextplain +*.dot diff=astextplain +*.DOT diff=astextplain +*.pdf diff=astextplain +*.PDF diff=astextplain +*.rtf diff=astextplain +*.RTF diff=astextplain diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7cdee98 --- /dev/null +++ b/.gitignore @@ -0,0 +1,163 @@ +# Windows image file caches +Thumbs.db +ehthumbs.db + +# Folder config file +Desktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msm +*.msp + +# ========================= +# Operating System Files +# ========================= + +# OSX +# ========================= + +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + +# Thumbnails +._* + +# Files that might appear on external disk +.Spotlight-V100 +.Trashes + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +# Object files +*.o +*.ko +*.obj +*.elf + +# Precompiled Headers +*.gch +*.pch + +# Libraries +*.lib +*.a +*.la +*.lo + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# Executables +*.exe +*.out +*.app +*.i*86 +*.x86_64 +*.hex + +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. + +# User-specific files +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +!unix_makefiles/Release/ +[Rr]eleases/ +x64/ +x86/ +build/ +bld/ +[Bb]in/ +[Oo]bj/ + +# Visual Studo 2015 cache/options directory +.vs/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +#NUNIT +*.VisualState.xml +TestResult.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +*_i.c +*_p.c +*_i.h +*.ilk +*.meta +*.obj +*.pch +*.pdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opensdf +*.sdf +*.cachefile + +*.VC.db + +# Visual Studio profiler +*.psess +*.vsp +*.vspx + +#OpenWatcom files +*.lk1 +*.map +*.mk +*.mk1 diff --git a/README.md b/README.md new file mode 100644 index 0000000..5705a3a --- /dev/null +++ b/README.md @@ -0,0 +1,53 @@ +Репозиторий Fractal Image Compression +===================================== + +Описание +-------- + +Простой кодек для фрактального сжатия изображений. Основывается на описании из первой главы книги Yuval Fisher, Fractal Image compression: Theory and Application. + +Кодирование реализовано на CPU (compress.exe) и GPU (cuda_compress.exe - cuda 7.5, версии спецификации >=2.0, и cudaold_compress.exe - cuda 6.5, версии спецификации 1.1, 2.0). Имеется версия с поддержкой mpi. + +Ссылки +------ + +[Репозиторий](https://github.com/ImageProcessing-ElectronicPublications/fractal_image_compression) + +[Скачать](https://github.com/ImageProcessing-ElectronicPublications/fractal_image_compression/releases) + +Для запуска под Windows необходим Windows XP и выше и Visual Studio 2013 runtime; для использования с mpi необходим mpich2 v1.4.1p1. + +Использование +------------- + +Как сжимать: + +>compress.exe input.tga output.fi 32 4 5 + +где 32 - размер рангового блока (ширина и высота изображения должны делиться на 32; чем больше размер рангового блока, тем лучше сжатие), 4 - количество потоков ЦПУ, 5 - максимальное значение погрешности для пикселя (чем больше число, тем меньше файл, но хуже качество получившегося изображения). + +Как разжимать: + +>decompress.exe output.fi decompressed.tga + +или + +>decompress.exe output.fi decompressed.tga 3 + +если вы хотите увеличить ширину и высоту изображения в 3 раза (можно использовать любое целое число). + +Сборка +------ + +Под Windows просто используйте Visual Studio 2013 и выше. + +Под unix просто войдите в unix_makefiles и введите в консоли: + +>make all + +Для сборки под юниксами также необходим gcc и mpi (тестировал под Debian 7 с mpich2 версии 1.4.1p1). + +Лицензия +-------- + +Нет её). Общественное достояние. diff --git a/chihiro2.tga b/chihiro2.tga new file mode 100644 index 0000000000000000000000000000000000000000..2a6e8500a025ef2766b24d390be3ec1c9890f03a GIT binary patch literal 57618 zcmeI52V4}__Q$D$EeawiqVx^|Dk6$V7pc4Sr6WZY6%i?l22?tTJ@!r&QK}`j#291D zd&Wc)ljyTVpDE_$pXBBL@?Pryw_F*<+1=T7lfQYh$Y(!#=FXknJKs5b?z!ildsj(G zMXB$vP-(x?7^PCB=lY5p>zNw+8T%_`S$l2IXOdoP4}Z)LILH1DlFkNsw?60l{j)wU zaip~*2>sDLDEK@!;|%_(E9B>#sAd10@AuF8xFt$Tv0?ACaE?xr!Z|5NPR?@$e@{~#{AbnxpMZTBNyu$-LvQ_AU2T;XN@lu}8YLj$_2IzDCLT+(no zBIu%|%h}6gOa%(hmVSfb4$CD0iHwzEG_oqYQ(_)Sx}3c{L1zlR8452)S!GB9lDJq3 zXStn?r0Bchsu~Kep>OXf(YU3NOcv^$^QB zJm|GG%{N&%R~)^=4>~FKTB(K8oL)B4no%;4{Eb8{Td`QeBc&LPh1ESNG55t@E49$h zIN3rsXwg98JXd3sSS*Q)q;Q7y7JH%8!i30AOmuv$$AqOQt17&?pl)!!vPyFFT?U+# zm`~wLuow-Tg2FSdlJU^TZ4V$@YfZ;!lxl{;3(J(0qC?+dAmRs}DnI%+7S1d8|0O!~ z3gapn4;{?rfirK=D1~QBzrkV*z5O~Fh@ujt7>%?dIh8qvchyXHj+M6Cs)3};1bC?H zW(Mb!d?}n`Qsm_9?pz0D4jH6Ff7Sy;j<8!o=NuRQ0ee`ZyzOuNpjYXa$SBVhJhB=e zWvT7Ha^NX5bcrz=%Mb0W>3YawoUiGTi!$RBk9UNwr83GeM`pVI zKAPhP2vxDSDKO*_ToJ(+RyTdX&N(zl9)0$3*<_*Pw``y)Gr^v%Iyjd99#DrxNHH2n zj0uyL7?TurCp_?Vf6pe5X`81xmN;21vNeh~((&{fY$*0A52(UO>r8N#YMiszpGc0r z$FLR2(f5OVJKUyJIaw{X7{@o(@ireFU_FK}9QXCT3`R^@IUzIf2ZMauF``e1Kmy)b zm=SyhLVY{K^u6;GU8`3pDJ^q#T`dqSc5)JhjFQa2WOm3n>o`eZ+K*RgBJ50(XFAvN z1J2N$Lxy1q^*>3?B=Wpkl81~H+W^HVT&$#Y^5n@&moAkiC5he<#%Q_gq{QgfwtQr! z=Lf@UGTPVPG}Y5}E4(!ZQb~saK5fZ;@oMzB;iH`uAg^GVmDL8o^z&O{*x$udoNMy) zMLEwiFsteQJG(xo7B|=!C(Lr$#Gb_@7BxKc0PlXs8*>C@4|cyZPAu+s3coO<*Tp00_k6V`6WkB zkF*?lR&8x(ZRj*D`b3-)Ib>d}bYGj6eEw02ic4&i*fu@TsD zmg3yp++0YzJM>2j3bsq-JU74Jb7rR}``O6gO9ix+Fm8bphs$kk3FpU;ALpp6(>b3# zdv*cs?sAlsUp{nADrdQ~HCV7?gidb>C%`fqnWL)u#TQ=?)Wy@L;ViUzb9FWB#)S(P z7HDcx^)n_;mkPP8^%g(y3=1*&PALFqp^0v5Bb@8Y$_Uj>m6hO(zxg9ZP@FR(7E0xu zQ39j!NIsY>S7GFyOZ#R~P2!fYGdr7bzIx>fnJS_pn&LeuALoqQ<>4(?3@ww;DvO_4U^@SL+~jbr=$u<(Vq1Li9pQZa zz4zwZ+Y`>~YF_~^%zDi6YT?g06FjGlA9f@nt~F+EYy5(vk(rVPH!rw z5WG??o?|l;oRNb z-OYaEa*+;x?CF;MmYcQ z!w-A+?gi%+g}cBfA>t$4epJZi*&g-&Ud?h3M*O{6#A1|Qs6L$QZ0YUK*3|s=+iwZ# zUAuOH^P;RZP(?Zi%vK}U1x`h;U(pS#E1C~OuTPh;br13f=p{1B>D(W56VAdIzM2~5 zEP5Im2n8w=!LS#$fCjO82a~)qw*>r z9~k7?`SV{+Nm-Jfj)@hZK6B;_IM=P)1wOLSV6Ow)?&H1}31gl!wzJn1J&S}Tj)J8( zyTH~KQ{Fp^iV(yoR8d)9QGp&1oUdQMj%=B(uCBcq z1Dj>PmedS!-%D>W7iAdaQ_aoaefQmBjVH_1bIi@RZQBOUh(+(*xf3cKFW3avmwl`! z799wqubl1KNF{ouCwB?LpV>b1oh!M>mfq$(1B3tk=RdEXIYZ@EA^y20FYos4+dy4k zUk}bn@#P3jz_bz{1KEb`!^8N9kI)su1H@3BQFCQsSFm5V*oVSW7(P)xr^{Dz%X20dtxp3c`Du9{41UGa?KVyYrT-Y8jl^8EP= zVjl!cA;#$ol~qJr$d!_OC8g$G{|&LwsFJE3?D})KZZt9G*_=uZy zAlU!b*L`1P@DuME9VMmq^lG`;jhi(|3Teof>X?^A`pcP~PI=IbwE-sHAJp$_2Pn}viY#4aV4op-M9+%6mm&W-QJz~|upvy3`Y4+RsRBsNeDi)L}I5LjA$x=th?=K8(VI$d9gGeIIYWvhdlmX!w3z)M^gu>2qBG-dB5vf$fcr zxxFJ7tddw>xK=c>5g*lk`}Uo#&a?5+O_GM!sIZM6 z*n!-bnDC~K_R}atlr=y105w$Kwc`vRKYaKwD)ch-4pIep-Dw(HV%wJK2-Pbe~R*B zM=_2245`tf?)$0emI8nky#=Uop^wkjEn5(*`|-yg>+2iiqx-!MI&(qTg`GRv30{yy zNM!%M&S)Opsd%(%#gioVqJn2L&anpmqHR;WlI)Qq)1t!e-MfcGj!!ZWrLOka`EwVtG7m9+R3bh32$|UZ^2;w{cyCZwNfly3-p*AKE|wVxFU$f&4;r*K zzX-aDjxRZuEny}$>^g&9O5AbSWL%5CXZ4&TtmX{_!t{HLFcp-oIe}s0g9i_j1hS^x zCJNfFU%yEpqi0M_5N~-HkA@t^gkH%}eQIz@Z;#^1*boUfg6A`;*7;-)L+&-UnFt|BV};2*RGhGV^C0P=F;rvfw3k5E?Z#pR}+yj%PQ) z+S%F)x;CmlT+26|N$P0^vum{MHi(RZ#~qeI3_dP4M*EyBdAE~79m>_)6&4YZ{K z_Zl@8Yz2cfvy_!BCb~n$%h73WT2c#HdjHm?za+>^AvM}=* zvIr3F3-DQ>cmr)w)WK@%WN45Jwl>oMHh8!eZ|DdD8ULDEPshkQExDOw@WQ)psP9>k z-`Ujol&t`5^nI&VG>Jw^84654%uH)5_Vg5OmEx659A}=SF6`(7UK&N%dTWny)ib0x z!*tB{IwVx!n3g+t?t-V-Ka5<83J)<7An8a&CsFW;U*_V~mh) zXWwg(fp_}o@fM~I6lVy|&N+k;kDQE^c8tS zQ7qB~Sg1{$x&|Dt@qF9)c3Nz+5+`RcwfrEqMsV@DF+_m)5RMk-8qoqekIz45Og;tvWGPj$-NXw54 zzaGUq+?$5?3}GT*M=r+<=c;$#eK$|soIAadT$iEAR>pQ7I^%F7js_Nlr~VjYI#^NJ zXVz{>$|++xqSlzhjR<-Jd8G5q&AE6mrQe! z_!~@8IU}m<@7Xw~@7LeJ8U2FPc5=N!l;%(Z&~3eN_L}HNh~!hje5r)v z!awx$ID|Aj`osae#04rstEJel*&|02p5u*dS->(*nOIGqzdnV3+Z#dou=}EShsnAl_DWzoj4{~$^ecI5w zLDj{oE?td@?B%vQP_=&xCQ0q&dbxa@jjfzbO#Be)bhKZLDL;?tTSWm%EkQ_Rvi-_Q zLlxH9$WT$GG6N~jX11Pz(9qmTTgSj-#uhi%a*Rs6XUgj4VUP&l+u!T( zl!+xK2Jwy~oEiHNa$Rrf?ax$Fwi{p(&8rM5Ct8=<#F6+4*|x6K3*zHA>Q6 zSP!h}4h}pVk=Puc-IQ5+RIqf5l~J~tzQ9jIpUflB+onh|)>WBUIeU#1=2OYR*}~q< z)Oy+|opECfEU-jk){Hv16AaF9l7r`TL?kvP7c^pD4*ISg86^$=n&Y^>xuT5vU~Oaz zT$!q>vvkH$20=!BHnNzEsNIMWIupzsktsUaX$gI=7%Y$F$JEa)YA$X4gpQYz7Ze|q zv5^W;ji~W7JtHq|VKAS(%L$j59QI(${=RcdWUCO`eNWg|lOE z?85pLd#_MP#R}}4GqH)c0*zTn$OsW3`%(IIn6jue>Oh|-m^zF%nW(F!XKd*lraB}_ zUE6!Gjwo2{0Jfm&X!_bF{F_T}<*B`GI`3NAM))(Hw5=g zMz-{745NWlWAsh!hiPS6IIZ0Irc|VoD&UI{5WWE2E50u#*^UfP7h??s5TC^S*9j1tjH;2YCf{Z1K=!)CHKKGapuh-xaQ1$PYSWDXorv=OPJWC-X?i=+(z zpOt*{lJzju)<$+D+kw-N(fHZJMjBg9r9+cNW;`0WANW1TXC=vAAJ)UsEyUEumD8~% zD&;jmRn6SqO%}FP9oTadYZvMe0-lr{jvPB@oFyuqtfI8&m5riu%qg$s# zlPbeXFuZQtDZWyn$x)%C@dorcbl6Dlaz@KzRn;a8*A7!vlM0qpFt?v+?=lAuMU?Yo zk8n9K%5sVQw1AnTh2um{ziftUTWSuKC7JUZM0O8Jhv&&@$Y^|++MtmmMl+0nSoIE` z`!X3;lpVQk;o#1&&r^K_6r!5uuPe6yt8BxMrIo*y*8Ebj{fFAN$JAN6u%PM~by#!k z%j)Ld=W^^z;iKt$I-zN+jV-2dee}IlNl9CK47>%gY^g$Ylx0V5EvS0(6Vf4Rs~P@A zzkTlbqH@RomX`e{e3c6SEvxz!*lRn!ppa4m85jx=yFaVg@h?1=vdUjdQGeUNstH~2y;)rU9znG1mGedxQ>do^0rM}l zy@8{ew#TK_zm%5$CVW+a+plGHKUW_3R;*2+fa58YZTLy}T*wPgXZ=sYr_=TMyt2da zqGPxpAe}yVI41b$&_rW#;R`AkRQLZLdeOdj7^VgjvL%hzh1}X63yD?yCd9kv`|8I3 zfkEi!*=Kb{ODYe1S62Ir(A40IQ1KfcR{6FcYCFGJvhftUj{xqEzQfhj8JyAO!JDGU zL!IVVw9z@|Z@<9Eb;MUYXVYmRz0S{T+P{#WFkcs0-(`j_$JoVRyTb}=ZyY~UZ2RRC5I?tf2@E& z2XeEy?sB00=1+45H+naR05!PLIcJnMi*k<1*+lSS%C%%ub94LYV?Ag0G9idZdB_1v$3(kraeG? z?bBd(B6*AT~;p4(J`}f7-B5heXCl&1>oT1g2 zS~3xrMT6c7Shh8uOiWA!t|3E)6c-nN{`u$Ne5Csf29gv~Q2{fyH*ek?Ja{lJ#8vI> z?YIgHJPS8fZAK2E>C?_Il&5#n@QD`*ngU}Kqk_wTRV^_nPKMoG!0rn0g!4Gj%Epc^-CfHQtD zu4rpFs+Au87!!pa+@JflpJ2tX1N6uzY{=LNR;!ynD6W4WlX_N@JYX#u^~qsE!49GN zt#3JW=n(co$A$*50EHXc7)Zq)BHFxuXA^dNAUOHbL3jy2ob^SAao)slxNbmlNSc^f)v z_6o0EwXZ)tk8pM{;Cv!_#Nc7V0M|T3F8vQV>q@INp`>BE*Z)8;; z>yIow{S`o*3VVQHnFe5-@a7NcWZP-X3?J-=`WU9@JR8%Zz zu_HQaI%j&hs2q`19h_P$BSuEGs*d{%qv65Nq&S<|&%kiW$kc)BoB@2!u#v)iiN36U zN|2qiZ*)#T(h6ubgCP#9J0G%7h*uoF8=h4qtl9N&)xoO}CQc7rzUOk$zAMDvAnj2A z&RBS5$qe`lS5nB%*~M27yKJkivtN1N4OO(=&`u&_`;e_xQ=s+58_|H*kvw00%ST-(+c5( z`!`yrj2`C%EZbxR@IlYg!Gj)-rauCP4F3cUgOJH2Z=_xT9XR9Kv^9;86AHHTGs_r| zafs24!;D}EHj{n);|oYI(Ahg0Mr^9D0Plv$al#podm+bNJ4z*-49pqya45U!;Lk57 zOoCEY0UbDFW}IYqV8R6|N<}*<;K6f?Q3v5E>&f0uGkLMOTkwg!>t#GGorGh2ugFEj z&q17=YA4r)XpJGihx07nJi-~@i&Y1%63%d~U+m3_WqxbBv^2&9L8y94Th!t&jh32OA4P+&X} zvF}DN#G+jV&|$_cyOE1T> zOIDPHDh$-=$cyfqpF&60)qQ++U?lzCe~(N&ki~U9GCn(dLuSsVEhq3A!u!@AI z*KGdwox2%!TW-|Q^JTBRHrXRW#&^KtO|dOhn8FNXM;n+6zy1#HYs)Gfota^389|9w zBSBZua?uhyLmRF~o@LPod||Oza=wz%qTa1X>GoEX$sb;VPBTS8Ei*fO*U<)lI82r| zoTa75-X29gD|AIHpGwMQq4YaX*%;asDVERtOj;2<ddxdcm?g9Z=Lj8hBU{m2G7**bG`5)R ztUErc_xH18{D>FT=mZdvNKsJ@8a6U$xR$q;uIH$+?mGI&t;NfEj1dm~hik>Dsv*mr be$@;41%Vd?UJ!Ue;01vf1YQsrumt`O!vzW5 literal 0 HcmV?d00001 diff --git a/compress/compress.vcxproj b/compress/compress.vcxproj new file mode 100644 index 0000000..5922b13 --- /dev/null +++ b/compress/compress.vcxproj @@ -0,0 +1,128 @@ + + + + + Debug + Win32 + + + Release (MPI) + Win32 + + + Release + Win32 + + + + {65DA45D0-C86F-4A4A-8169-2AA90B68AA47} + Win32Proj + compress + + + + Application + true + v120_xp + Unicode + + + Application + false + v120_xp + true + Unicode + + + Application + false + v120_xp + true + Unicode + + + + + + + + + + + + + + + + true + + + false + + + false + $(ProgramFiles)\MPICH2\include;$(VC_IncludePath);$(WindowsSDK_IncludePath) + $(ProgramFiles)\MPICH2\lib;$(VC_LibraryPath_x86);$(WindowsSDK_LibraryPath_x86); + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + + + Console + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + + + Console + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + I_USE_MPI;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + + + Console + true + true + true + mpi.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/compress/compress.vcxproj.filters b/compress/compress.vcxproj.filters new file mode 100644 index 0000000..55e1b33 --- /dev/null +++ b/compress/compress.vcxproj.filters @@ -0,0 +1,46 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + {9856afa4-a7d8-48a4-8188-a60d310d20fc} + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files\CPU driven + + + \ No newline at end of file diff --git a/cuda_compress/cuda_compress.vcxproj b/cuda_compress/cuda_compress.vcxproj new file mode 100644 index 0000000..da8a8af --- /dev/null +++ b/cuda_compress/cuda_compress.vcxproj @@ -0,0 +1,256 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release (MPI) + Win32 + + + Release (MPI) + x64 + + + Release + Win32 + + + Release + x64 + + + + + + + + + + + + + + + + {AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9} + cuda_compress + + + + Application + true + MultiByte + v120 + + + Application + true + MultiByte + v120 + + + Application + false + true + MultiByte + v120 + + + Application + false + true + MultiByte + v120 + + + Application + false + true + MultiByte + v120 + + + Application + false + true + MultiByte + v120 + + + + + + + + + + + + + + + + + + + + + + + + + + true + + + true + + + $(ProgramFiles)\MPICH2\include;$(VC_IncludePath);$(WindowsSDK_IncludePath); + $(ProgramFiles)\MPICH2\lib;$(VC_LibraryPath_x86);$(WindowsSDK_LibraryPath_x86); + + + + Level3 + Disabled + SINGLE_THREAD_BY_DEFAULT;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + true + Console + cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + 5.01 + + + echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" +copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + + + compute_20,sm_20;compute_30,sm_30;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52; + + + + + Level3 + Disabled + WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + true + Console + cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" +copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + + + 64 + + + + + Level3 + MaxSpeed + true + true + SINGLE_THREAD_BY_DEFAULT;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + true + true + true + Console + cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + 5.01 + + + echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" +copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + + + compute_20,sm_20;compute_30,sm_30;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52; + + + + + Level3 + MaxSpeed + true + true + SINGLE_THREAD_BY_DEFAULT;I_USE_MPI;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + true + true + true + Console + mpi.lib;cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + 5.01 + + + echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" +copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + + + compute_20,sm_20;compute_30,sm_30;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52; + + + + + Level3 + MaxSpeed + true + true + WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + true + true + true + Console + cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" +copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + + + 64 + + + + + Level3 + MaxSpeed + true + true + WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + true + true + true + Console + cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" +copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + + + 64 + + + + + + + \ No newline at end of file diff --git a/cuda_compress/cuda_compress.vcxproj.filters b/cuda_compress/cuda_compress.vcxproj.filters new file mode 100644 index 0000000..6e3e19a --- /dev/null +++ b/cuda_compress/cuda_compress.vcxproj.filters @@ -0,0 +1,25 @@ + + + + + {e0e0e71a-6e32-4e79-b5c7-afd5b1a6e698} + + + {04d7c5b1-6017-4c20-9d85-3b4efa11ce02} + + + + + + + + + + + + + + GPU driven + + + \ No newline at end of file diff --git a/cudaold_compress/cudaold_compress.vcxproj b/cudaold_compress/cudaold_compress.vcxproj new file mode 100644 index 0000000..6194b84 --- /dev/null +++ b/cudaold_compress/cudaold_compress.vcxproj @@ -0,0 +1,256 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release (MPI) + Win32 + + + Release (MPI) + x64 + + + Release + Win32 + + + Release + x64 + + + + + + + + + + + + + + + + {D38F1866-2265-48A0-AD93-3A5E05C69163} + cudaold_compress + + + + Application + true + MultiByte + v120 + + + Application + true + MultiByte + v120 + + + Application + false + true + MultiByte + v120 + + + Application + false + true + MultiByte + v120 + + + Application + false + true + MultiByte + v120 + + + Application + false + true + MultiByte + v120 + + + + + + + + + + + + + + + + + + + + + + + + + + true + + + true + + + $(ProgramFiles)\MPICH2\include;$(VC_IncludePath);$(WindowsSDK_IncludePath); + $(ProgramFiles)\MPICH2\lib;$(VC_LibraryPath_x86);$(WindowsSDK_LibraryPath_x86); + + + + Level3 + Disabled + SINGLE_THREAD_BY_DEFAULT;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + true + Console + cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + 5.01 + + + echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" +copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + + + compute_12,sm_12;compute_20,sm_20; + + + + + Level3 + Disabled + WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + true + Console + cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" +copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + + + 64 + + + + + Level3 + MaxSpeed + true + true + SINGLE_THREAD_BY_DEFAULT;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + true + true + true + Console + cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + 5.01 + + + echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" +copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + + + compute_12,sm_12;compute_20,sm_20; + + + + + Level3 + MaxSpeed + true + true + SINGLE_THREAD_BY_DEFAULT;I_USE_MPI;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + true + true + true + Console + mpi.lib;cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + 5.01 + + + echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" +copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + + + compute_12,sm_12;compute_20,sm_20; + + + + + Level3 + MaxSpeed + true + true + WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + true + true + true + Console + cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" +copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + + + 64 + + + + + Level3 + MaxSpeed + true + true + WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + true + true + true + Console + cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" +copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + + + 64 + + + + + + + \ No newline at end of file diff --git a/cudaold_compress/cudaold_compress.vcxproj.filters b/cudaold_compress/cudaold_compress.vcxproj.filters new file mode 100644 index 0000000..ec4a007 --- /dev/null +++ b/cudaold_compress/cudaold_compress.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {3e52e746-98ea-452d-89aa-d1f6432b29fa} + + + + + GPU driven + + + + + + + + + + + + \ No newline at end of file diff --git a/decompress/decompress.vcxproj b/decompress/decompress.vcxproj new file mode 100644 index 0000000..82a628c --- /dev/null +++ b/decompress/decompress.vcxproj @@ -0,0 +1,88 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + + {F97F886B-3E32-4790-B091-A4CBF1B91946} + Win32Proj + decompress + + + + Application + true + v120_xp + Unicode + + + Application + false + v120_xp + true + Unicode + + + + + + + + + + + + + true + + + false + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + + + Console + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + + + Console + true + true + true + + + + + + + + + + + + + \ No newline at end of file diff --git a/decompress/decompress.vcxproj.filters b/decompress/decompress.vcxproj.filters new file mode 100644 index 0000000..a72b55f --- /dev/null +++ b/decompress/decompress.vcxproj.filters @@ -0,0 +1,34 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + \ No newline at end of file diff --git a/fractal_image_compression.sln b/fractal_image_compression.sln new file mode 100644 index 0000000..116543e --- /dev/null +++ b/fractal_image_compression.sln @@ -0,0 +1,70 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 2013 +VisualStudioVersion = 12.0.40629.0 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "compress", "compress\compress.vcxproj", "{65DA45D0-C86F-4A4A-8169-2AA90B68AA47}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "decompress", "decompress\decompress.vcxproj", "{F97F886B-3E32-4790-B091-A4CBF1B91946}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cuda_compress", "cuda_compress\cuda_compress.vcxproj", "{AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cudaold_compress", "cudaold_compress\cudaold_compress.vcxproj", "{D38F1866-2265-48A0-AD93-3A5E05C69163}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Debug|x64 = Debug|x64 + Release (MPI)|Win32 = Release (MPI)|Win32 + Release (MPI)|x64 = Release (MPI)|x64 + Release|Win32 = Release|Win32 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {65DA45D0-C86F-4A4A-8169-2AA90B68AA47}.Debug|Win32.ActiveCfg = Debug|Win32 + {65DA45D0-C86F-4A4A-8169-2AA90B68AA47}.Debug|Win32.Build.0 = Debug|Win32 + {65DA45D0-C86F-4A4A-8169-2AA90B68AA47}.Debug|x64.ActiveCfg = Debug|Win32 + {65DA45D0-C86F-4A4A-8169-2AA90B68AA47}.Release (MPI)|Win32.ActiveCfg = Release (MPI)|Win32 + {65DA45D0-C86F-4A4A-8169-2AA90B68AA47}.Release (MPI)|Win32.Build.0 = Release (MPI)|Win32 + {65DA45D0-C86F-4A4A-8169-2AA90B68AA47}.Release (MPI)|x64.ActiveCfg = Release (MPI)|Win32 + {65DA45D0-C86F-4A4A-8169-2AA90B68AA47}.Release|Win32.ActiveCfg = Release|Win32 + {65DA45D0-C86F-4A4A-8169-2AA90B68AA47}.Release|Win32.Build.0 = Release|Win32 + {65DA45D0-C86F-4A4A-8169-2AA90B68AA47}.Release|x64.ActiveCfg = Release|Win32 + {F97F886B-3E32-4790-B091-A4CBF1B91946}.Debug|Win32.ActiveCfg = Debug|Win32 + {F97F886B-3E32-4790-B091-A4CBF1B91946}.Debug|Win32.Build.0 = Debug|Win32 + {F97F886B-3E32-4790-B091-A4CBF1B91946}.Debug|x64.ActiveCfg = Debug|Win32 + {F97F886B-3E32-4790-B091-A4CBF1B91946}.Release (MPI)|Win32.ActiveCfg = Release|Win32 + {F97F886B-3E32-4790-B091-A4CBF1B91946}.Release (MPI)|Win32.Build.0 = Release|Win32 + {F97F886B-3E32-4790-B091-A4CBF1B91946}.Release (MPI)|x64.ActiveCfg = Release|Win32 + {F97F886B-3E32-4790-B091-A4CBF1B91946}.Release|Win32.ActiveCfg = Release|Win32 + {F97F886B-3E32-4790-B091-A4CBF1B91946}.Release|Win32.Build.0 = Release|Win32 + {F97F886B-3E32-4790-B091-A4CBF1B91946}.Release|x64.ActiveCfg = Release|Win32 + {AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}.Debug|Win32.ActiveCfg = Debug|Win32 + {AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}.Debug|Win32.Build.0 = Debug|Win32 + {AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}.Debug|x64.ActiveCfg = Debug|x64 + {AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}.Debug|x64.Build.0 = Debug|x64 + {AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}.Release (MPI)|Win32.ActiveCfg = Release (MPI)|Win32 + {AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}.Release (MPI)|Win32.Build.0 = Release (MPI)|Win32 + {AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}.Release (MPI)|x64.ActiveCfg = Release|x64 + {AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}.Release (MPI)|x64.Build.0 = Release|x64 + {AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}.Release|Win32.ActiveCfg = Release|Win32 + {AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}.Release|Win32.Build.0 = Release|Win32 + {AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}.Release|x64.ActiveCfg = Release|x64 + {AA1B258B-9E5E-478B-9D70-1B6EA5F7E8E9}.Release|x64.Build.0 = Release|x64 + {D38F1866-2265-48A0-AD93-3A5E05C69163}.Debug|Win32.ActiveCfg = Debug|Win32 + {D38F1866-2265-48A0-AD93-3A5E05C69163}.Debug|Win32.Build.0 = Debug|Win32 + {D38F1866-2265-48A0-AD93-3A5E05C69163}.Debug|x64.ActiveCfg = Debug|x64 + {D38F1866-2265-48A0-AD93-3A5E05C69163}.Debug|x64.Build.0 = Debug|x64 + {D38F1866-2265-48A0-AD93-3A5E05C69163}.Release (MPI)|Win32.ActiveCfg = Release (MPI)|Win32 + {D38F1866-2265-48A0-AD93-3A5E05C69163}.Release (MPI)|Win32.Build.0 = Release (MPI)|Win32 + {D38F1866-2265-48A0-AD93-3A5E05C69163}.Release (MPI)|x64.ActiveCfg = Release|x64 + {D38F1866-2265-48A0-AD93-3A5E05C69163}.Release (MPI)|x64.Build.0 = Release|x64 + {D38F1866-2265-48A0-AD93-3A5E05C69163}.Release|Win32.ActiveCfg = Release|Win32 + {D38F1866-2265-48A0-AD93-3A5E05C69163}.Release|Win32.Build.0 = Release|Win32 + {D38F1866-2265-48A0-AD93-3A5E05C69163}.Release|x64.ActiveCfg = Release|x64 + {D38F1866-2265-48A0-AD93-3A5E05C69163}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/src/arrays.c b/src/arrays.c new file mode 100644 index 0000000..d7e1f77 --- /dev/null +++ b/src/arrays.c @@ -0,0 +1,86 @@ + +#include "arrays.h" + +bool arrayReadBits(array_type *a, unsigned int size, unsigned int *out) +{ + unsigned int output, bits_offset, bytes_offset; + unsigned char *bp; + + if(size > 32) return false; + + if((a->bytes_offset+(a->bits_offset+size)/8+((((a->bits_offset+size)%8)>0)?1:0)) > a->bytes_size) + return false; + + bp = a->bp; + bits_offset = a->bits_offset; + bytes_offset = a->bytes_offset; + output = 0; + + while(size > 0) { + unsigned int this_step; + + if(size > (8-bits_offset)) + this_step = 8-bits_offset; + else + this_step = size; + size -= this_step; + bits_offset += this_step; + + output = (output << this_step)+((unsigned int)(*bp) >> (8-bits_offset))%(1<bp = bp; + a->bits_offset = bits_offset; + a->bytes_offset = bytes_offset; + + return true; +} + +bool arrayWriteBits(array_type *a, unsigned int size, unsigned int out) +{ + unsigned int bits_offset, bytes_offset; + unsigned char *bp; + + if(size > 32) return false; + + if((a->bytes_offset+(a->bits_offset+size)/8+((((a->bits_offset+size)%8)>0)?1:0)) > a->bytes_size) + return false; + + bp = a->bp; + bits_offset = a->bits_offset; + bytes_offset = a->bytes_offset; + + while(size > 0) { + unsigned int this_step; + + if(size > (8-bits_offset)) + this_step = 8-bits_offset; + else + this_step = size; + size -= this_step; + bits_offset += this_step; + + *bp += ( (out>>size)%(1<> (8-bits_offset))%(1<bp = bp; + a->bits_offset = bits_offset; + a->bytes_offset = bytes_offset; + + return true; +} \ No newline at end of file diff --git a/src/arrays.h b/src/arrays.h new file mode 100644 index 0000000..cf4496c --- /dev/null +++ b/src/arrays.h @@ -0,0 +1,18 @@ + +#ifndef _ARRAYS_H +#define _ARRAYS_H + +#include + +typedef struct { + unsigned char *bytes; + unsigned char *bp; + unsigned int bytes_offset; + unsigned int bits_offset; + unsigned int bytes_size; +} array_type; + +extern bool arrayReadBits(array_type *a, unsigned int size, unsigned int *out); +extern bool arrayWriteBits(array_type *a, unsigned int size, unsigned int out); + +#endif diff --git a/src/compress.c b/src/compress.c new file mode 100644 index 0000000..174c0b2 --- /dev/null +++ b/src/compress.c @@ -0,0 +1,154 @@ + +#include +#include +#include +#include +#include + +#ifdef I_USE_MPI +#include +#define RETURN_FROM_MAIN(v) {MPI_Abort(MPI_COMM_WORLD,v);return(v);} +#else +#define RETURN_FROM_MAIN(v) {return(v);} +#endif + +#include "tga_load.h" +#include "image_misc.h" +#include "fi_save.h" + +int main(int argc, char **argv) +{ + char *input_filename, *output_filename; + image_pc_type image_pc; + image_uc_type image_uc; + unsigned int i, blocksize, nof_threads, rms_error, worst_diff; + char *program_name; +#ifdef I_USE_MPI + int my_rank, nof_procs; +#endif + +#ifdef I_USE_MPI + MPI_Init(0, 0); + + MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_size(MPI_COMM_WORLD, &nof_procs); + + printf("Using MPI. Program rank is %d+1/%d\n", my_rank, nof_procs); +#endif + + program_name = strrchr(argv[0], '\\'); + + if(program_name == 0) + program_name = strrchr(argv[0], '/'); + + if(program_name == 0) + program_name = argv[0]; + else + program_name++; + + // Чтение параметров командной строки + if(argc < 3) { + printf("\t%s input.tga output.fi [block_size=8 [nof_threads=4 [rms_error=256]]]\n\n" + "\t\tYou must specify input and output filenames\n", program_name); + + RETURN_FROM_MAIN(0); + } + + input_filename = argv[1]; + output_filename = argv[2]; + if(argc > 3) { + blocksize = abs(atoi(argv[3])); + if(blocksize < 2) + blocksize = 2; + if(blocksize > IMAGE_MAXBLOCKSIZE) + blocksize = IMAGE_MAXBLOCKSIZE; + + printf("Default block size changed to %d\n", blocksize); + } else + blocksize = 8; + + if(argc > 4) { + nof_threads = abs(atoi(argv[4])); + if(nof_threads == 0) nof_threads = 1; + } else { +#ifdef SINGLE_THREAD_BY_DEFAULT + nof_threads = 1; +#else + nof_threads = 4; +#endif + } + printf("Number of threads set to %d\n", nof_threads); + + if(argc > 5) + rms_error = abs(atoi(argv[5])); + else + rms_error = 256; + worst_diff = rms_error*rms_error*blocksize*blocksize; + printf("RMS error %d, worst difference for first block size will be %d\n", rms_error, worst_diff); + + // Чтение исходного изображения + switch(tgaLoad(input_filename, &image_pc)) { + case TGALOAD_OKAY: + break; + case TGALOAD_CANTOPENFILE: + printf("Can\'t open %s\n", input_filename); + RETURN_FROM_MAIN(1); + case TGALOAD_DAMAGEDFILE: + printf("Can\'t open %s, file is damaged\n", input_filename); + RETURN_FROM_MAIN(1); + case TGALOAD_UNSUPPORTEDFILETYPE: + printf("Can\'t open %s, unsupported file type\n", input_filename); + RETURN_FROM_MAIN(1); + case TGALOAD_MEMORYALLOCERROR: + printf("Can\'t open %s, memory allocation error\n", input_filename); + RETURN_FROM_MAIN(1); + default: + printf("Undefined error while opening %s\n", input_filename); + RETURN_FROM_MAIN(1); + } + + if((image_pc.w%blocksize) != 0 || (image_pc.h%blocksize) != 0 || image_pc.w < (blocksize*2) || image_pc.h < (blocksize*2)) { + printf("Image sizes must be divisions of %d and greater than %d\n", blocksize, blocksize*2); + RETURN_FROM_MAIN(2); + } + + if(!UnpackChannels(&image_pc, &image_uc)) { + printf("Can\'t unpack image channels\n"); + RETURN_FROM_MAIN(3); + } + + free(image_pc.data); + + BGRtoYCBCR(&image_uc); + + // Сохранение результирующего изображения + switch(fiSave(output_filename, &image_uc, blocksize, nof_threads, worst_diff)) { + case FISAVE_OKAY: + break; + case FISAVE_CANTOPENFILE: + printf("Can\'t save %s\n", output_filename); + RETURN_FROM_MAIN(4); + case FISAVE_WRONGIMAGESIZE: + printf("Can\'t save %s, wrong image size\n", output_filename); + RETURN_FROM_MAIN(4); + case FISAVE_DAMAGEDIMAGESTRUCT: + printf("Can\'t save %s, internal error, damaged image structure\n", output_filename); + RETURN_FROM_MAIN(4); + default: + printf("Undefined error while saving %s\n", output_filename); + RETURN_FROM_MAIN(4); + } + + for(i = 0; i < image_uc.nof_channels; i++) + free(image_uc.data[i]); + free(image_uc.data); + +#ifdef I_USE_MPI + printf("===\n\trank %d finished\n===\n", my_rank); + + MPI_Barrier(MPI_COMM_WORLD); + MPI_Finalize(); +#endif + + return 0; +} diff --git a/src/cpu_driven/fi_save_blocks_search_workers.c b/src/cpu_driven/fi_save_blocks_search_workers.c new file mode 100644 index 0000000..7e13028 --- /dev/null +++ b/src/cpu_driven/fi_save_blocks_search_workers.c @@ -0,0 +1,204 @@ + +#include +#include + +#include "../image_misc.h" +#include "../fi_save_blocks_search_workers.h" + +static void fiFindBestDomainBlock(unsigned int rx, unsigned int ry, + image_rangeblock_type *block_p, unsigned int blocksize, + unsigned int w, unsigned int h, + unsigned char *scaled_data_p, + unsigned char *data_p, + unsigned int worst_diff + ) +{ + unsigned int best_diff; + long long range_sum, domain_sum, rd_sum; // Сумма пикселей рангового блока, доменного блока, пикселя рангового на пиксель доменного блока + long long domain_disp; + unsigned int l, m, s, t, tr; + unsigned char rotated_data_p[8*IMAGE_MAXBLOCKSIZE*IMAGE_MAXBLOCKSIZE]; + + for(l = 0; l < blocksize; l++) { + memcpy(rotated_data_p+blocksize*l, data_p+w*(ry+l)+rx, blocksize); + } + for(l = 1; l < 8; l++) { + ApplyReversedTransformToRangeBlock(rotated_data_p, rotated_data_p+l*blocksize*blocksize, l, blocksize); + } + + range_sum = 0; + for(l = 0; l < blocksize; l++) + for(m = 0; m < blocksize; m++) { + range_sum += rotated_data_p[l*blocksize+m]; + } + block_p->u = 0; block_p->v = (short)( ((range_sum/(blocksize*blocksize))*63+128)/255 ); // Значения на случай, если совсем ни один блок не подойдёт + best_diff = 0; // Вообще, тут можно поставить любое число > 255*255*blocksize*blocksize=4161600(для 8), но мы найдём разницу для v и u выше + for(l = 0; l < blocksize; l++) { + for(m = 0; m < blocksize; m++) { + int br = rotated_data_p[l*blocksize+m]; + int mult; + + mult = (block_p->v*255/63-br); + best_diff += mult*mult; + } + } + //printf("range_disp %d, range_sum %d\n", range_disp, range_sum); + + for(s = 0; s <= h/2-blocksize; s++) { + for(t = 0; t <= w/2-blocksize; t++) { + domain_disp = domain_sum = 0; + for(l = 0; l < blocksize; l++) { + for(m = 0; m < blocksize; m++) { + int bd = scaled_data_p[(s+l)*w/2+t+m]; + + domain_sum += bd; + domain_disp += bd*bd; + } + } + domain_disp = domain_disp*(int)(blocksize*blocksize)-domain_sum*domain_sum; + + for(tr = 0; tr < 8; tr++) { + unsigned int cur_diff = 0; + int u_int, v_int; + + if(domain_disp == 0) { + u_int = 0; + v_int = (int)( ((range_sum/(blocksize*blocksize))*63+128)/255 ); + } else { + rd_sum = 0; + for(l = 0; l < blocksize; l++) { + for(m = 0; m < blocksize; m++) { + int br = rotated_data_p[tr*blocksize*blocksize+l*blocksize+m]; + int bd = scaled_data_p[(s+l)*w/2+t+m]; + + rd_sum += br*bd; + } + } + + u_int = (int)( 32*((int)(blocksize*blocksize)*rd_sum-range_sum*domain_sum)/domain_disp ); // яркость в интервале [0, 1.0), т.е. от [0 до 32) + // av_u += u_int; nof_u++; + //if(u_int < 0) {/*printf("u = %f\n", (float)(u_int/51.0));*/u_int = -u_int;} if(u_int > 255) {/*printf("u = %f\n", (float)(u_int/51.0));*/u_int = 255;} + //if(u_int < 0 || u_int > 255) continue; + if(u_int < 0) u_int = 0; if(u_int > 31) u_int = 31; + v_int = (int)( (range_sum-domain_sum*u_int/32)/(int)(blocksize*blocksize) ); + if(v_int < -255 || v_int > 255) continue; + if(v_int < 0) + v_int = (v_int*63-128)/255; + else + v_int = (v_int*63+128)/255; + + //if(v_int < -255) v_int = -255; if(v_int > 255) v_int = 255; + //printf("resulted u %d v %d\n", u_int, v_int); + } + + for(l = 0; l < blocksize; l++) { + for(m = 0; m < blocksize; m++) { + int br = rotated_data_p[tr*blocksize*blocksize+l*blocksize+m]; + int bd = scaled_data_p[(s+l)*w/2+t+m]; + int mult; + + mult = (bd*u_int/32+v_int*255/63-br); + cur_diff += mult*mult; + } + } + //printf("%d < %d\n", cur_diff, best_diff); + if(cur_diff < best_diff) { + best_diff = cur_diff; + block_p->dx = t; + block_p->dy = s; + block_p->tr = tr; + block_p->u = u_int; + block_p->v = v_int; + } + + if(domain_disp == 0 || best_diff == 0) + break; + } + /*if(t%10 == 0) + printf("- %d dbs ok\n", i*max_dby*max_dbx+s*max_dbx+t);*/ + if(best_diff == 0) + break; + } + + if(best_diff == 0) + break; + } + + if(best_diff > worst_diff && blocksize > 2 && blocksize%2 == 0) { + printf("best_diff %d/%d (=rms^2*blocksize^2), divided block size %d to %d\n", best_diff, worst_diff, blocksize, blocksize/2); + + fiFindBestDomainBlock(rx, ry, // Позиция нового блока блока + (image_rangeblock_type *)block_p->divided_into, // Указатель на новый блок + blocksize/2, // Уменьшаем размер блока на 2 + w, h, scaled_data_p, data_p, + worst_diff/4); // Уменьшаем разницу на 4 (т.к. новый блок будет в 4 раза меньше) + + fiFindBestDomainBlock(rx+blocksize/2, ry, // Позиция нового блока блока + (image_rangeblock_type *)block_p->divided_into+1, // Указатель на новый блок + blocksize/2, // Уменьшаем размер блока на 2 + w, h, scaled_data_p, data_p, + worst_diff/4); // Уменьшаем разницу на 4 (т.к. новый блок будет в 4 раза меньше) + + fiFindBestDomainBlock(rx, ry+blocksize/2, // Позиция нового блока блока + (image_rangeblock_type *)block_p->divided_into+2, // Указатель на новый блок + blocksize/2, // Уменьшаем размер блока на 2 + w, h, scaled_data_p, data_p, + worst_diff/4); // Уменьшаем разницу на 4 (т.к. новый блок будет в 4 раза меньше) + + fiFindBestDomainBlock(rx+blocksize/2, ry+blocksize/2, // Позиция нового блока блока + (image_rangeblock_type *)block_p->divided_into+3, // Указатель на новый блок + blocksize/2, // Уменьшаем размер блока на 2 + w, h, scaled_data_p, data_p, + worst_diff/4); // Уменьшаем разницу на 4 (т.к. новый блок будет в 4 раза меньше) + + block_p->is_divided = true; + } +} + +#ifdef WIN32 +unsigned int __stdcall fiWorkerFunc(void *arg) +#else +void *fiWorkerFunc(void *arg) +#endif +{ + unsigned int i; + unsigned int rx, ry, channel; + + unsigned char *scaled_data = ((fi_worker_arg_type *)arg)->scaled_data; + unsigned char **data = ((fi_worker_arg_type *)arg)->data; + image_rangeblock_type *block_start_p = ((fi_worker_arg_type *)arg)->block_p, *block_p; + unsigned int nof_blocks_per_channel = ((fi_worker_arg_type *)arg)->nof_blocks_per_channel; + unsigned int w = ((fi_worker_arg_type *)arg)->w; + unsigned int h = ((fi_worker_arg_type *)arg)->h; + unsigned int nof_channels = ((fi_worker_arg_type *)arg)->nof_channels; + unsigned int alloc_memory = ((fi_worker_arg_type *)arg)->alloc_memory; + unsigned int blocksize = ((fi_worker_arg_type *)arg)->blocksize; + unsigned int worker_start = ((fi_worker_arg_type *)arg)->worker_start; + unsigned int worker_step = ((fi_worker_arg_type *)arg)->worker_step; + unsigned int worst_diff = ((fi_worker_arg_type *)arg)->worst_diff; + + unsigned int nof_blocks_per_image = nof_blocks_per_channel*nof_channels; + + block_p = block_start_p+worker_start; + + for(i = worker_start; i < nof_blocks_per_image; i += worker_step) { + channel = i / nof_blocks_per_channel; + rx = ((i % nof_blocks_per_channel) % (w/blocksize)) * blocksize; + ry = ((i % nof_blocks_per_channel) / (w/blocksize)) * blocksize; + + if(((i-worker_start)/worker_step)%10 == 0) + printf("worker %d %d rbs (%f) ok\n", worker_start+1, (i-worker_start)/worker_step, + (float)( 100.0 * (double)((i-worker_start)/worker_step) / (double)((nof_blocks_per_image-worker_start-1)/worker_step+1) )); + + fiFindBestDomainBlock(rx, ry, + block_p, blocksize, + w, h, + scaled_data+channel*alloc_memory/4, + data[channel], + worst_diff); + + block_p += worker_step; + } + + return 0; +} diff --git a/src/decompress.c b/src/decompress.c new file mode 100644 index 0000000..a6a1115 --- /dev/null +++ b/src/decompress.c @@ -0,0 +1,93 @@ + +#include +#include +#include + +#include "fi_load.h" +#include "image_misc.h" +#include "tga_save.h" + +int main(int argc, char **argv) +{ + char *input_filename, *output_filename; + image_pc_type image_pc; + image_uc_type image_uc; + unsigned int i, scale; + char *program_name; + + program_name = strrchr(argv[0], '\\'); + + if(program_name == 0) + program_name = strrchr(argv[0], '/'); + + if(program_name == 0) + program_name = argv[0]; + else + program_name++; + + if(argc < 3) { + printf("\t%s input.fi output.tga [scale=1]\n\n" + "\t\tYou must specify input and output filenames\n", program_name); + + return 0; + } + input_filename = argv[1]; + output_filename = argv[2]; + if(argc > 3) { + scale = abs(atoi(argv[3])); + if(scale < 1) + scale = 1; + + printf("Scale changed to %u\n", scale); + } else + scale = 1; + + switch(fiLoad(input_filename, &image_uc, scale)) { + case FILOAD_OKAY: + break; + case FILOAD_CANTOPENFILE: + printf("Can\'t open %s\n", input_filename); + return 1; + case FILOAD_DAMAGEDFILE: + printf("Can\'t open %s, file is damaged\n", input_filename); + return 1; + case FILOAD_MEMORYALLOCERROR: + printf("Can\'t open %s, memory allocation error\n", input_filename); + return 1; + default: + printf("Undefined error while opening %s\n", input_filename); + return 1; + } + + YCBCRtoBGR(&image_uc); + + if(!PackChannels(&image_uc, &image_pc)) { + printf("Can\'t unpack image channels\n"); + return 3; + } + + for(i = 0; i < image_uc.nof_channels; i++) + free(image_uc.data[i]); + free(image_uc.data); + + switch(tgaSave(output_filename, &image_pc)) { + case TGASAVE_OKAY: + break; + case TGASAVE_CANTOPENFILE: + printf("Can\'t save %s\n", output_filename); + return 4; + case TGASAVE_DAMAGEDIMAGESTRUCT: + printf("Can\'t save %s, internal error, damaged image structure\n", output_filename); + return 4; + case TGASAVE_TOOMANYCHANNELS: + printf("Can\'t save %s, too many channels in input file\n", output_filename); + return 4; + default: + printf("Undefined error while saving %s\n", output_filename); + return 4; + } + + free(image_pc.data); + + return 0; +} diff --git a/src/fi_file.h b/src/fi_file.h new file mode 100644 index 0000000..8faa637 --- /dev/null +++ b/src/fi_file.h @@ -0,0 +1,15 @@ + +#ifndef _FI_FILE_H +#define _FI_FILE_H + +#define FI_SIGN 0xC0F1 + +typedef struct { + unsigned short sign; // Сигнатура + unsigned short blocksize; // Размер блока. if(blocksize == 0) blocksize = 8; + unsigned int w; + unsigned int h; + unsigned int noc; // Количество каналов +} FI_HEADER; + +#endif diff --git a/src/fi_load.c b/src/fi_load.c new file mode 100644 index 0000000..f67f0be --- /dev/null +++ b/src/fi_load.c @@ -0,0 +1,469 @@ + +#include +#include +#include + +#include "fi_file.h" +#include "fi_load.h" +#include "image_misc.h" +#include "arrays.h" + +static size_t GetFileLength(FILE *f) +{ + size_t cur_pos, file_len; + + cur_pos = ftell(f); + fseek(f, 0, SEEK_END); + file_len = ftell(f); + fseek(f, cur_pos, SEEK_SET); + + return file_len; +} + +static bool fiDecompressBlocks(unsigned char *cb, unsigned int cb_size, image_rangeblock_type *b, unsigned int w, unsigned int h, unsigned int blocksize, unsigned int channels, unsigned int scale, unsigned int dxy_bitssize); +static unsigned int fiCoverRangeBlock(image_rangeblock_type *block_p, unsigned int blocksize, unsigned char *range_p, unsigned char *prev_data, unsigned int w, unsigned int h); + +int fiLoad(char *fname, image_uc_type *image, unsigned int scale) +{ + FILE *f; + FI_HEADER head; + size_t file_len; + unsigned int i, j, k, channels, alloc_memory, nof_blocks, blocksize, compressed_blocks_size; + unsigned char *prev_data; // Данные об изображении на предыдущей итерации + unsigned char *compressed_blocks; + unsigned int dxy_bitssize, cblock_bitssize; // Размеры dx, dy и всего блока в битах + image_rangeblock_type *blocks; + + if(scale < 1) + scale = 1; + + f = fopen(fname, "rb"); + + if(!f) + return FILOAD_CANTOPENFILE; + + file_len = GetFileLength(f); + + if(fread(&head, sizeof(FI_HEADER), 1, f) != 1) + return FILOAD_DAMAGEDFILE; + + if(head.sign != FI_SIGN) + return FILOAD_DAMAGEDFILE; + + if(head.blocksize < 2) + return FILOAD_DAMAGEDFILE; + + blocksize = head.blocksize; + + if((head.w%blocksize) != 0 || (head.h%blocksize) != 0 || head.w < (blocksize*2) || head.h < (blocksize*2)) { + return FILOAD_DAMAGEDFILE; + } + + GetDxyBitsSize(head.w, head.h, &dxy_bitssize); + cblock_bitssize = GetCblockBitsSize(dxy_bitssize); + + image->w = head.w*scale; + image->h = head.h*scale; + image->nof_channels = channels = head.noc; + image->alloc_memory = alloc_memory = image->w*image->h; + blocksize *= scale; + nof_blocks = image->w*image->h*image->nof_channels/(blocksize*blocksize); + + // Тут начинается код выделения памяти + compressed_blocks = malloc((cblock_bitssize*image->w*image->h*image->nof_channels/4+nof_blocks*(blocksize*blocksize-1)/3)/8+1); + printf("compressed_blocks = malloc(%u)\n", (cblock_bitssize*image->w*image->h*image->nof_channels/4+nof_blocks*(blocksize*blocksize-1)/3)/8+1); + // Из рассчёта, что все блоки 2х2 и имеют размер cblock_bitssize бит + // + добавляем дополнительные биты разбиения блоков + // Каждый блок может быть разбит на 4. Т.е. для блока размера n (если n - степень двойки) у нас есть s=1+4+16+32+...+pow(4, log2(n)-1) + // Т.е. s = 1*(1-pow(4,log2(n)))/(1-4) = (pow(2*2, log2(n))-1)/3 = (n*n-1)/3 + // Для n не в степени двойки (т.е. вида pow(2,x)*y) будем иметь s=1+4+16+32+...+pow(4, log2(n/y)) + // Т.е. s = 1*(1-pow(4,log2(n/y)))/(1-4) = (pow(2*2, log2(n/y))-1)/3 = ((n/y)*(n/y)-1)/3, что меньше чем (n*n-1)/3. + // Таким образом, достаточно рассмотреть случай, когда n в степени двойки. + // В нашем случае получаем nof_blocks*(blocksize*blocksize-1)/3 бит + // + 1 байт на случай, если количество бит не кратно 8 + if(!compressed_blocks) + return FILOAD_MEMORYALLOCERROR; + + // Немного кода для чтения блоков из файла + compressed_blocks_size = file_len-sizeof(FI_HEADER); + printf("compressed_blocks_size %u\n", compressed_blocks_size); + if(fread(compressed_blocks, 1, compressed_blocks_size, f) != compressed_blocks_size) { + free(compressed_blocks); + + return FILOAD_DAMAGEDFILE; + } + fclose(f); + + // И снова начинается код выделения памяти + blocks = malloc(sizeof(image_rangeblock_type)*nof_blocks*(blocksize*blocksize-1)/3); // (blocksize*blocksize-1)/3 - количество разбиений одного блока в худшем случае + printf("blocks = malloc(%u)\n", (unsigned int)(sizeof(image_rangeblock_type)*nof_blocks*(blocksize*blocksize-1)/3)); + if(!blocks) { + free(compressed_blocks); + return FILOAD_MEMORYALLOCERROR; + } + SetBlocksPointers(blocks, blocksize, nof_blocks, image->w, image->h); + + prev_data = malloc(alloc_memory/4); + printf("prev_data = malloc(%u)\n", alloc_memory/4); + if(!prev_data) { + free(compressed_blocks); + free(blocks); + return FILOAD_MEMORYALLOCERROR; + } + + image->data = malloc(channels*sizeof(void *)); + printf("image->data = malloc(%u)\n", (unsigned int)(channels*sizeof(void *))); + if(!image->data) { + free(compressed_blocks); + free(blocks); + free(prev_data); + return FILOAD_MEMORYALLOCERROR; + } + + for(i = 0; i < channels; i++) { + image->data[i] = malloc(alloc_memory); + printf("image->data[%u] = malloc(%u)\n", i, alloc_memory); + if(!image->data[i]) { + unsigned int j; + + for(j = 0; j < i; j++) + free(image->data[j]); + free(image->data); + free(compressed_blocks); + free(blocks); + free(prev_data); + + return FILOAD_MEMORYALLOCERROR; + } + memset(image->data[i], 0, alloc_memory); + } + // А тут он заканчивается + + if(!fiDecompressBlocks(compressed_blocks, compressed_blocks_size, blocks, head.w, head.h, head.blocksize, head.noc, scale, dxy_bitssize)) { + free(compressed_blocks); + free(blocks); + free(prev_data); + for(i = 0; i < channels; i++) + free(image->data[i]); + free(image->data); + + return FILOAD_DAMAGEDFILE; + } + free(compressed_blocks); + + for(i = 0; i < channels; i++) { + unsigned char *data_p; + unsigned int nof_iterations = 0; + + data_p = image->data[i]; + + while(1) { + unsigned long long diff = 0; + image_rangeblock_type *block_p; + + block_p = blocks+i*nof_blocks/image->nof_channels; + Scale2to1(data_p, prev_data, image->w, image->h); + for(j = 0; j < image->h/blocksize; j++) { + for(k = 0; k < image->w/blocksize; k++) { + unsigned char *range_p; + + range_p = &data_p[j*blocksize*image->w+k*blocksize]; + + diff += fiCoverRangeBlock(block_p, blocksize, range_p, prev_data, image->w, image->h); + + block_p++; + } + } + + diff /= image->w*image->h; + printf("diff %llu\n", diff); + nof_iterations++; + if(nof_iterations >= 1024 || diff == 0) + break; + } + printf("\nchannel %d completed after %d iterations\n", i+1, nof_iterations); + } + + free(blocks); + free(prev_data); + + return FILOAD_OKAY; +} + +static unsigned int fiCoverRangeBlock(image_rangeblock_type *block_p, unsigned int blocksize, unsigned char *range_p, unsigned char *prev_data, unsigned int w, unsigned int h) +{ + unsigned int l, m; + unsigned int diff = 0; + + //printf("%d", block_p->tr); + if(block_p->is_divided) { + //printf("Quadtrees unimplemented in decoder!11\n"); + //range_p = &data_p[j*blocksize*image->w+k*blocksize]; + //domain_p = &prev_data[block_p->dy*image->w/2+block_p->dx]; + + diff += fiCoverRangeBlock((image_rangeblock_type *)block_p->divided_into, blocksize/2, range_p, prev_data, w, h); + diff += fiCoverRangeBlock((image_rangeblock_type *)block_p->divided_into+1, blocksize/2, range_p+blocksize/2, prev_data, w, h); + diff += fiCoverRangeBlock((image_rangeblock_type *)block_p->divided_into+2, blocksize/2, range_p+w*blocksize/2, prev_data, w, h); + diff += fiCoverRangeBlock((image_rangeblock_type *)block_p->divided_into+3, blocksize/2, range_p+(w+1)*blocksize/2, prev_data, w, h); + } else { + unsigned char *domain_p; + + domain_p = &prev_data[block_p->dy*w/2+block_p->dx]; + + switch(block_p->tr) { + case IMAGE_RB_TRANSFORMATION_NONE: + for(l = 0; l < blocksize; l++) { + for(m = 0; m < blocksize; m++) { + int loc_diff, res = (int)(*domain_p)*(int)block_p->u/32+(int)block_p->v; + + + if(res < 0) res = 0;if(res > 255) res = 255; + + loc_diff = (int)(*range_p)-res; + diff += loc_diff*loc_diff; + + *range_p = res; + + range_p++; + domain_p++; + } + + range_p += w-blocksize; + domain_p += w/2-blocksize; + } + + break; + case IMAGE_RB_TRANSFORMATION_90RIGHT: + for(l = 0; l < blocksize; l++) { + for(m = 0; m < blocksize; m++) { + int loc_diff, res = (int)(domain_p[(blocksize-1-m)*w/2+l])*(int)block_p->u/32+(int)block_p->v; + + if(res < 0) res = 0;if(res > 255) res = 255; + + loc_diff = (int)(*range_p)-res; + diff += loc_diff*loc_diff; + + *range_p = res; + + range_p++; + } + + range_p += w-blocksize; + } + + break; + case IMAGE_RB_TRANSFORMATION_180RIGHT: + for(l = 0; l < blocksize; l++) { + for(m = 0; m < blocksize; m++) { + int loc_diff, res = (int)(domain_p[(blocksize-1-l)*w/2+blocksize-1-m])*(int)block_p->u/32+(int)block_p->v; + + if(res < 0) res = 0;if(res > 255) res = 255; + + loc_diff = (int)(*range_p)-res; + diff += loc_diff*loc_diff; + + *range_p = res; + + range_p++; + } + + range_p += w-blocksize; + } + + break; + case IMAGE_RB_TRANSFORMATION_270RIGHT: + for(l = 0; l < blocksize; l++) { + for(m = 0; m < blocksize; m++) { + int loc_diff, res = (int)(domain_p[m*w/2+blocksize-1-l])*(int)block_p->u/32+(int)block_p->v; + + if(res < 0) res = 0;if(res > 255) res = 255; + + loc_diff = (int)(*range_p)-res; + diff += loc_diff*loc_diff; + + *range_p = res; + + range_p++; + } + + range_p += w-blocksize; + } + break; + case IMAGE_RB_TRANSFORMATION_VFLIP: + for(l = 0; l < blocksize; l++) { + for(m = 0; m < blocksize; m++) { + int loc_diff, res = (int)(domain_p[(blocksize-1-l)*w/2+m])*(int)block_p->u/32+(int)block_p->v; + + if(res < 0) res = 0;if(res > 255) res = 255; + + loc_diff = (int)(*range_p)-res; + diff += loc_diff*loc_diff; + + *range_p = res; + + range_p++; + } + + range_p += w-blocksize; + } + break; + case IMAGE_RB_TRANSFORMATION_HFLIP: + for(l = 0; l < blocksize; l++) { + for(m = 0; m < blocksize; m++) { + int loc_diff, res = (int)(domain_p[l*w/2+blocksize-1-m])*(int)block_p->u/32+(int)block_p->v; + + if(res < 0) res = 0;if(res > 255) res = 255; + + loc_diff = (int)(*range_p)-res; + diff += loc_diff*loc_diff; + + *range_p = res; + + range_p++; + } + + range_p += w-blocksize; + } + + break; + case IMAGE_RB_TRANSFORMATION_MAINDIAGFLIP: + for(l = 0; l < blocksize; l++) { + for(m = 0; m < blocksize; m++) { + int loc_diff, res = (int)(domain_p[m*w/2+l])*(int)block_p->u/32+(int)block_p->v; + + if(res < 0) res = 0;if(res > 255) res = 255; + + loc_diff = (int)(*range_p)-res; + diff += loc_diff*loc_diff; + + *range_p = res; + + range_p++; + } + + range_p += w-blocksize; + } + + break; + case IMAGE_RB_TRANSFORMATION_ANTIDIAGFLIP: + for(l = 0; l < blocksize; l++) { + for(m = 0; m < blocksize; m++) { + int loc_diff, res = (int)(domain_p[(blocksize-1-m)*w/2+blocksize-1-l])*(int)block_p->u/32+(int)block_p->v; + + if(res < 0) res = 0;if(res > 255) res = 255; + + loc_diff = (int)(*range_p)-res; + diff += loc_diff*loc_diff; + + *range_p = res; + + range_p++; + } + + range_p += w-blocksize; + } + + break; + } + } + + return diff; +} + +static bool fiDecompressOneBlock(array_type *cbarray, image_rangeblock_type *bp, unsigned int blocksize, unsigned int w, unsigned int h, unsigned int scale, unsigned int dxy_bitssize) +{ + unsigned int is_block_divided, dxy, tr, v, u; + + if(((blocksize % 2) != 1) && (blocksize > 3)) { + if(!arrayReadBits(cbarray, 1, &is_block_divided)) + return false; + } else + is_block_divided = 0; + + if(is_block_divided) { + if(!fiDecompressOneBlock(cbarray, (image_rangeblock_type *)bp->divided_into, blocksize/2, w, h, scale, dxy_bitssize)) + return false; + + if(!fiDecompressOneBlock(cbarray, (image_rangeblock_type *)bp->divided_into+1, blocksize/2, w, h, scale, dxy_bitssize)) + return false; + + if(!fiDecompressOneBlock(cbarray, (image_rangeblock_type *)bp->divided_into+2, blocksize/2, w, h, scale, dxy_bitssize)) + return false; + + if(!fiDecompressOneBlock(cbarray, (image_rangeblock_type *)bp->divided_into+3, blocksize/2, w, h, scale, dxy_bitssize)) + return false; + + bp->is_divided = true; + } else { + if(!arrayReadBits(cbarray, 5, &u)) + return false; + bp->u = u; + + if(bp->u > 0) { + if(!arrayReadBits(cbarray, 7, &v)) + return false; + if(v & 64) + bp->v = -1; + else + bp->v = 1; + bp->v *= (int)v&63; + bp->v = (bp->v*255)/63; + + if(!arrayReadBits(cbarray, dxy_bitssize, &dxy)) + return false; + + bp->dx = dxy%(w/2-1); + if(bp->dx > w/2-blocksize) + return false; + bp->dx *= scale; + + bp->dy = dxy/(w/2-1); + if(bp->dy > h/2-blocksize) + return false; + bp->dy *= scale; + + if(!arrayReadBits(cbarray, 3, &tr)) + return false; + bp->tr = tr; + } else { + if(!arrayReadBits(cbarray, 6, &v)) + return false; + bp->v = v&255; + bp->v = (bp->v*255)/63; + + bp->dx = 0; + bp->dy = 0; + bp->tr = 0; + } + + bp->is_divided = false; + } + + return true; +} + +static bool fiDecompressBlocks(unsigned char *cb, unsigned int cb_size, image_rangeblock_type *b, unsigned int w, unsigned int h, unsigned int blocksize, unsigned int channels, unsigned int scale, unsigned int dxy_bitssize) +{ + unsigned int i, j, k; + image_rangeblock_type *bp; + array_type cbarray; + + cbarray.bp = cbarray.bytes = cb; + cbarray.bytes_offset = cbarray.bits_offset = 0; + cbarray.bytes_size = cb_size; + + //memcpy(b, cb, nof_blocks*IMAGE_COMPRESSEDRB_SIZE); + + bp = b; + for(i = 0; i < channels; i++) { + for(j = 0; j < h/blocksize; j++) { + for(k = 0; k < w/blocksize; k++) { + if(!fiDecompressOneBlock(&cbarray, bp, blocksize, w, h, scale, dxy_bitssize)) + return false; + bp++; + } + } + } + + return true; +} diff --git a/src/fi_load.h b/src/fi_load.h new file mode 100644 index 0000000..f1983f9 --- /dev/null +++ b/src/fi_load.h @@ -0,0 +1,14 @@ + +#ifndef _FI_LOAD_H +#define _FI_LOAD_H + +#include "image.h" + +#define FILOAD_OKAY 0 +#define FILOAD_CANTOPENFILE 1 +#define FILOAD_DAMAGEDFILE 2 +#define FILOAD_MEMORYALLOCERROR 3 + +extern int fiLoad(char *fname, image_uc_type *image, unsigned int scale); + +#endif diff --git a/src/fi_save.c b/src/fi_save.c new file mode 100644 index 0000000..7d564c6 --- /dev/null +++ b/src/fi_save.c @@ -0,0 +1,134 @@ + +#include +#include +#include +#include + +#ifdef I_USE_MPI +#include +#endif + +#include "arrays.h" +#include "fi_file.h" +#include "fi_save.h" +#include "fi_save_blocks_compression.h" +#include "fi_save_blocks_search.h" +#include "image_misc.h" + +static int fiConvertImage(image_uc_type *image, fi_compresseddata_type *cdat, unsigned int blocksize, unsigned int nof_threads, unsigned int worst_diff); +static int fiSaveCompressedToFile(char *fname, fi_compresseddata_type *cdat); + +int fiSave(char *fname, image_uc_type *image, unsigned int blocksize, unsigned int nof_threads, unsigned int worst_diff) +{ + int result; + fi_compresseddata_type cdat; + clock_t compression_time; +#ifdef I_USE_MPI + int my_rank; +#endif + + compression_time = clock(); + result = fiConvertImage(image, &cdat, blocksize, nof_threads, worst_diff); + compression_time = clock()-compression_time; + printf("compression time %d ms\n", (int)compression_time); + + if(result != FISAVE_OKAY) + return result; + +#ifdef I_USE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + + if(my_rank == 0) + return fiSaveCompressedToFile(fname, &cdat); + else + return FISAVE_OKAY; +#else + return fiSaveCompressedToFile(fname, &cdat); +#endif +} + +static int fiConvertImage(image_uc_type *image, fi_compresseddata_type *cdat, unsigned int blocksize, unsigned int nof_threads, unsigned int worst_diff) +{ + FI_HEADER head; + unsigned int result; + fi_data_type data; +#ifdef I_USE_MPI + int my_rank, nof_procs; +#endif + + result = fiFindBestDomainBlocks(image, blocksize, nof_threads, worst_diff, &data); + if(result != FISAVE_OKAY) + return result; + +#ifdef I_USE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_size(MPI_COMM_WORLD, &nof_procs); + if(nof_procs > 1) { + image_rangeblock_type *resulted_blocks = 0; + + if(my_rank == 0) { + resulted_blocks = malloc(sizeof(image_rangeblock_type)*data.nof_blocks*(blocksize*blocksize-1)/3); // Почему такой размер - см. fi_save_blocks_search.c + + if(!resulted_blocks) { + result = FISAVE_MEMORYALLOCERROR; + goto EXIT_STATE; + } + } + + MPI_Barrier(MPI_COMM_WORLD); + + // Склейка блоков. Почему такой размер - см. fi_save_blocks_search.c + MPI_Reduce(data.blocks, resulted_blocks, sizeof(image_rangeblock_type)*data.nof_blocks*(blocksize*blocksize-1)/3, MPI_BYTE, MPI_BOR, 0, MPI_COMM_WORLD); + + if(my_rank == 0) { + free(data.blocks); + data.blocks = resulted_blocks; + + // Пересчитываем указатели, так как при вызове MPI_Reduce + SetBlocksPointers(data.blocks, blocksize, data.nof_blocks, image->w, image->h); + } + } + + if(my_rank > 0) { + cdat->len = 0; + result = FISAVE_OKAY; + + goto EXIT_STATE; + } +#endif + + // Сжимаем полученные блоки для записи в файл + head.sign = FI_SIGN; + head.blocksize = blocksize; + head.noc = image->nof_channels; + head.w = image->w; + head.h = image->h; + result = fiCompressBlocksAndAddHeader(&head ,cdat, data.blocks, data.nof_blocks); + +#ifdef I_USE_MPI +EXIT_STATE: +#endif + + free(data.blocks); + + return result; +} + +static int fiSaveCompressedToFile(char *fname, fi_compresseddata_type *cdat) +{ + FILE *f; + + f = fopen(fname, "wb"); + if(!f) { + free(cdat->data); + return FISAVE_CANTOPENFILE; + } + + fwrite(cdat->data, 1, cdat->len, f); + + fclose(f); + free(cdat->data); + cdat->len = 0; + + return FISAVE_OKAY; +} diff --git a/src/fi_save.h b/src/fi_save.h new file mode 100644 index 0000000..0f44d51 --- /dev/null +++ b/src/fi_save.h @@ -0,0 +1,15 @@ + +#ifndef _FI_SAVE_H +#define _FI_SAVE_H + +#include "image.h" + +#define FISAVE_OKAY 0 +#define FISAVE_CANTOPENFILE 1 +#define FISAVE_WRONGIMAGESIZE 2 +#define FISAVE_DAMAGEDIMAGESTRUCT 3 +#define FISAVE_MEMORYALLOCERROR 4 + +extern int fiSave(char *fname, image_uc_type *image, unsigned int blocksize, unsigned int nof_threads, unsigned int worst_diff); + +#endif diff --git a/src/fi_save_blocks_compression.c b/src/fi_save_blocks_compression.c new file mode 100644 index 0000000..3589a0f --- /dev/null +++ b/src/fi_save_blocks_compression.c @@ -0,0 +1,111 @@ + +#include +#include +#include + +#include "arrays.h" +#include "fi_file.h" +#include "fi_save.h" +#include "image_misc.h" + +#include "fi_save_blocks_compression.h" + +static bool fiCompressOneBlock(array_type *cbarray, image_rangeblock_type *b, unsigned int blocksize, unsigned int w, unsigned int h, unsigned int dxy_bitssize); + +static unsigned int g_statistics_nofdivisions, g_statistics_nofblocks; + +int fiCompressBlocksAndAddHeader(FI_HEADER *head ,fi_compresseddata_type *cdat, image_rangeblock_type *blocks, unsigned int nof_blocks) +{ + unsigned int i; + unsigned int dxy_bitssize, cblock_bitssize; // Размеры dx, dy и всего блока в битах + array_type cbarray; + + GetDxyBitsSize(head->w, head->h, &dxy_bitssize); + cblock_bitssize = GetCblockBitsSize(dxy_bitssize); + + cbarray.bytes_offset = cbarray.bits_offset = 0; + cbarray.bytes_size = (cblock_bitssize*head->w*head->h*head->noc/4+nof_blocks*(head->blocksize*head->blocksize-1)/3)/8+1; + // Из рассчёта, что все блоки 2х2 и имеют размер cblock_bitssize бит + // + добавляем дополнительные биты разбиения блоков (см. fi_save_blocks_search.c) + // + 1 байт на случай, если количество бит не кратно 8 + + cdat->data = malloc(sizeof(FI_HEADER)+cbarray.bytes_size); + if(!cdat->data) { + return FISAVE_MEMORYALLOCERROR; + } + memcpy(cdat->data, head, sizeof(FI_HEADER)); + + cbarray.bp = cbarray.bytes = cdat->data+sizeof(FI_HEADER); + memset(cbarray.bytes, 0, cbarray.bytes_size); + + g_statistics_nofdivisions = 0; + g_statistics_nofblocks = 0; + + for(i = 0; i < nof_blocks; i++) { + if(!fiCompressOneBlock(&cbarray, blocks+i, head->blocksize, head->w, head->h, dxy_bitssize)) { + free(cdat->data); + return FISAVE_MEMORYALLOCERROR; + } + } + + cdat->len = sizeof(FI_HEADER)+cbarray.bytes_offset+((cbarray.bits_offset>0)?1:0); + + printf("statistics:\n\t%d blocks ~ %d bytes\n", g_statistics_nofblocks, (g_statistics_nofblocks*(cblock_bitssize+1)+7)/8); + printf("\t%d divisions - %d bytes\n", g_statistics_nofdivisions, (g_statistics_nofdivisions+7)/8); + printf("\ttotal ~ %d bytes\n", (g_statistics_nofblocks*(cblock_bitssize+1)+g_statistics_nofdivisions+7)/8); + printf("\tcbarray bytes_offset - %d bits_offset - %d\n", cbarray.bytes_offset, cbarray.bits_offset); + printf("\tcdat array size - %d bytes\n", (int)cdat->len); + + return FISAVE_OKAY; +} + +static bool fiCompressOneBlock(array_type *cbarray, image_rangeblock_type *b, unsigned int blocksize, unsigned int w, unsigned int h, unsigned int dxy_bitssize) +{ + if(b->is_divided) { + if(!arrayWriteBits(cbarray, 1, 1)) + return false; + + if(!fiCompressOneBlock(cbarray, (image_rangeblock_type *)b->divided_into, blocksize/2, w, h, dxy_bitssize)) + return false; + if(!fiCompressOneBlock(cbarray, (image_rangeblock_type *)b->divided_into+1, blocksize/2, w, h, dxy_bitssize)) + return false; + if(!fiCompressOneBlock(cbarray, (image_rangeblock_type *)b->divided_into+2, blocksize/2, w, h, dxy_bitssize)) + return false; + if(!fiCompressOneBlock(cbarray, (image_rangeblock_type *)b->divided_into+3, blocksize/2, w, h, dxy_bitssize)) + return false; + + g_statistics_nofdivisions++; + + return true; + } else { + if(((blocksize % 2) != 1) && (blocksize > 3)) { + if(!arrayWriteBits(cbarray, 1, 0)) + return false; + } + + if(!arrayWriteBits(cbarray, 5, (unsigned int)b->u)) + return false; + + if(b->u > 0) { + unsigned int dxy; + + if(!arrayWriteBits(cbarray, 7, (unsigned int)((b->v<0)?64-b->v:b->v))) + return false; + + dxy = (w/2-1)*((unsigned int)b->dy)+(unsigned int)b->dx; + + if(!arrayWriteBits(cbarray, dxy_bitssize, dxy)) + return false; + + if(!arrayWriteBits(cbarray, 3, (unsigned int)b->tr)) + return false; + } else { + if(!arrayWriteBits(cbarray, 6, (unsigned int)(b->v&63))) + return false; + } + + g_statistics_nofblocks++; + + return true; + } +} diff --git a/src/fi_save_blocks_compression.h b/src/fi_save_blocks_compression.h new file mode 100644 index 0000000..3dacbb4 --- /dev/null +++ b/src/fi_save_blocks_compression.h @@ -0,0 +1,12 @@ + +#ifndef _FI_SAVE_BLOCKS_COMPRESSION_H +#define _FI_SAVE_BLOCKS_COMPRESSION_H + +typedef struct { + unsigned char *data; + unsigned int len; +} fi_compresseddata_type; + +extern int fiCompressBlocksAndAddHeader(FI_HEADER *head ,fi_compresseddata_type *cdat, image_rangeblock_type *blocks, unsigned int nof_blocks); + +#endif diff --git a/src/fi_save_blocks_search.c b/src/fi_save_blocks_search.c new file mode 100644 index 0000000..0f03dc0 --- /dev/null +++ b/src/fi_save_blocks_search.c @@ -0,0 +1,171 @@ + +#include +#include +#include +#ifdef WIN32 + #include + #include +#else + #include +#endif + +#ifdef I_USE_MPI +#include +#endif + +#include "image_misc.h" +#include "fi_save.h" +#include "fi_save_blocks_search.h" +#include "fi_save_blocks_search_workers.h" + +int fiFindBestDomainBlocks(image_uc_type *image, unsigned int blocksize, unsigned int nof_threads, unsigned int worst_diff, fi_data_type *data) +{ + unsigned int i, nof_blocks; + unsigned char *scaled_data; // Данные об изображении, уменьшенном в 2 раза (для поиска доменных блоков) (Данные идут последовательно для каждого из каналов) + image_rangeblock_type *blocks; + fi_worker_arg_type *worker_args; +#ifdef WIN32 + HANDLE *worker_thread_handles; +#else + pthread_t *worker_thread_handles; +#endif +#ifdef I_USE_MPI + int my_rank, nof_procs; +#endif + + if(blocksize < 2 || blocksize > IMAGE_MAXBLOCKSIZE) { + return FISAVE_WRONGIMAGESIZE; + } + + if((image->w%blocksize) != 0 || (image->h%blocksize) != 0 || image->w < (blocksize*2) || image->h < (blocksize*2)) { + return FISAVE_WRONGIMAGESIZE; + } + + if(image->w*image->h != image->alloc_memory) + return FISAVE_DAMAGEDIMAGESTRUCT; + +#ifdef I_USE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + MPI_Comm_size(MPI_COMM_WORLD, &nof_procs); +#endif + + memset(data, 0, sizeof(fi_data_type)); + + nof_blocks = image->w*image->h*image->nof_channels/(blocksize*blocksize); + + // Тут начинается код выделения памяти + blocks = malloc(sizeof(image_rangeblock_type)*nof_blocks*(blocksize*blocksize-1)/3); + printf("blocks = malloc(%u), sizeof(image_rangeblock_type)=%u\n", (unsigned int)(sizeof(image_rangeblock_type)*nof_blocks*(blocksize*blocksize-1)/3), (unsigned int)(sizeof(image_rangeblock_type))); + // Каждый блок может быть разбит на 4. Т.е. для блока размера n (если n - степень двойки) у нас есть s=1+4+16+32+...+pow(4, log2(n)-1) + // Т.е. s = 1*(1-pow(4,log2(n)))/(1-4) = (pow(2*2, log2(n))-1)/3 = (n*n-1)/3 + // Для n не в степени двойки (т.е. вида pow(2,x)*y) будем иметь s=1+4+16+32+...+pow(4, log2(n/y)) + // Т.е. s = 1*(1-pow(4,log2(n/y)))/(1-4) = (pow(2*2, log2(n/y))-1)/3 = ((n/y)*(n/y)-1)/3, что меньше чем (n*n-1)/3. + // Таким образом, достаточно рассмотреть случай, когда n в степени двойки. + // В нашем случае получаем s=(blocksize*blocksize-1)/3 + if(!blocks) + return FISAVE_MEMORYALLOCERROR; + memset(blocks, 0, sizeof(image_rangeblock_type)*nof_blocks*(blocksize*blocksize-1)/3); + SetBlocksPointers(blocks, blocksize, nof_blocks, image->w, image->h); + + scaled_data = malloc(image->nof_channels*image->alloc_memory/4); + printf("scaled_data = malloc(%u)\n", image->nof_channels*image->alloc_memory/4); + if(!scaled_data) { + free(blocks); + return FISAVE_MEMORYALLOCERROR; + } + for(i = 0; i < image->nof_channels; i++) + Scale2to1(image->data[i], &scaled_data[i*image->alloc_memory/4], image->w, image->h); + + worker_args = malloc(nof_threads*sizeof(fi_worker_arg_type)); + if(!worker_args) { + free(scaled_data); + free(blocks); + return FISAVE_MEMORYALLOCERROR; + } +#ifdef WIN32 + worker_thread_handles = malloc(nof_threads*sizeof(HANDLE)); +#else + worker_thread_handles = malloc(nof_threads*sizeof(pthread_t)); +#endif + if(!worker_thread_handles) { + free(worker_args); + free(scaled_data); + free(blocks); + return FISAVE_MEMORYALLOCERROR; + } + // А тут он заканчивается + + // Заполняем структуру 1-го "рабочего" + worker_args[0].scaled_data = scaled_data; + worker_args[0].data = image->data; + worker_args[0].block_p = blocks; + worker_args[0].nof_blocks_per_channel = nof_blocks/image->nof_channels; + worker_args[0].w = image->w; + worker_args[0].h = image->h; + worker_args[0].nof_channels = image->nof_channels; + worker_args[0].alloc_memory = image->alloc_memory; + worker_args[0].blocksize = blocksize; +#ifdef I_USE_MPI + worker_args[0].worker_start = nof_threads*my_rank; + worker_args[0].worker_step = nof_threads*nof_procs; +#else + worker_args[0].worker_start = 0; + worker_args[0].worker_step = nof_threads; +#endif + worker_args[0].worst_diff = worst_diff; + for(i = 1; i < nof_threads; i++) { // Заполняем структуры остальных + memcpy(worker_args+i, worker_args, sizeof(fi_worker_arg_type)); + worker_args[i].worker_start = worker_args[0].worker_start+i; + } + +#ifdef WIN32 + // Создаём рабочие потоки + for(i = 0; i < nof_threads; i++) { + + worker_thread_handles[i] = (HANDLE)_beginthreadex(NULL, 0, fiWorkerFunc, worker_args+i, CREATE_SUSPENDED, NULL);//CreateThread(NULL, 0, fiWorkerFunc, worker_args+i, 0, NULL); + + if(!worker_thread_handles[i]) { + unsigned int j; + + for(j = 0; j < i; j++) { + CloseHandle(worker_thread_handles[j]); + } + + free(worker_thread_handles); + free(worker_args); + free(scaled_data); + free(blocks); + return FISAVE_MEMORYALLOCERROR; + } + //fiWorkerFunc(worker_args+i); + + } + + // Вызываем рабочие потоки + for(i = 0; i < nof_threads; i++) + ResumeThread(worker_thread_handles[i]); +#else + // Создаём рабочие потоки + for(i = 0; i < nof_threads; i++) + pthread_create(worker_thread_handles+i, NULL, fiWorkerFunc, worker_args+i); +#endif + +#ifdef WIN32 + WaitForMultipleObjects(nof_threads, worker_thread_handles, TRUE, INFINITE); + + for(i = 0; i < nof_threads; i++) + CloseHandle(worker_thread_handles[i]); +#else + for(i = 0; i < nof_threads; i++) + pthread_join(worker_thread_handles[i], 0); +#endif + + free(scaled_data); + free(worker_args); + free(worker_thread_handles); + + data->blocks = blocks; + data->nof_blocks = nof_blocks; + + return FISAVE_OKAY; +} diff --git a/src/fi_save_blocks_search.h b/src/fi_save_blocks_search.h new file mode 100644 index 0000000..0dabfd6 --- /dev/null +++ b/src/fi_save_blocks_search.h @@ -0,0 +1,14 @@ + +#ifndef _FI_SAVE_BLOCKS_SEARCH_H +#define _FI_SAVE_BLOCKS_SEARCH_H + +#include "image.h" + +typedef struct { + image_rangeblock_type *blocks; + unsigned int nof_blocks; +} fi_data_type; + +extern int fiFindBestDomainBlocks(image_uc_type *image, unsigned int blocksize, unsigned int nof_threads, unsigned int worst_diff, fi_data_type *data); + +#endif diff --git a/src/fi_save_blocks_search_workers.h b/src/fi_save_blocks_search_workers.h new file mode 100644 index 0000000..65bd0c9 --- /dev/null +++ b/src/fi_save_blocks_search_workers.h @@ -0,0 +1,30 @@ + +#ifndef _FI_SAVE_BLOCKS_SEARCH_WORKERS_H +#define _FI_SAVE_BLOCKS_SEARCH_WORKERS_H + +#include "image.h" + +typedef struct { + unsigned char *scaled_data; + unsigned char **data; + image_rangeblock_type *block_p; + unsigned int nof_blocks_per_channel; + unsigned int w; + unsigned int h; + unsigned int nof_channels; + unsigned int alloc_memory; + unsigned int blocksize; + unsigned int worker_start; + unsigned int worker_step; + unsigned int worst_diff; +} fi_worker_arg_type; + +#ifdef WIN32 + extern unsigned int __stdcall fiWorkerFunc(void *arg); +#else + extern void *fiWorkerFunc(void *arg); +#endif + + + +#endif diff --git a/src/gpu_driven/fi_save_blocks_search_workers.cu b/src/gpu_driven/fi_save_blocks_search_workers.cu new file mode 100644 index 0000000..feefebf --- /dev/null +++ b/src/gpu_driven/fi_save_blocks_search_workers.cu @@ -0,0 +1,767 @@ + +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + +#include +#include +#include +#include + +extern "C" { +#include "../image_misc.h" +#include "../fi_save_blocks_search_workers.h" +} + +#define IMAGE_MAXCUDABLOCKSIZE 64 + +static void ShowMeSomeCUDAInfo(void); + +typedef struct { + unsigned int dx; // Смещение по x доменного блока + unsigned int dy; // Смещение по у доменного блока + unsigned int tr; // Трансформация + unsigned int u; // Контрастность + int v; // Яркость + unsigned int diff; +} kernel_block_type; + +__device__ void fiCalcReversedTransformationNone(long long &range_sum, long long &domain_sum, long long &rd_sum, long long &domain_disp, int *range_block, + unsigned int dx, unsigned int dy, + unsigned int rx, unsigned int ry, unsigned int blocksize, + unsigned int w, unsigned int h, + int *scaled_data_cuda, + int *data_cuda + ) +{ + unsigned int l, m; + + rd_sum = domain_disp = domain_sum = range_sum = 0; + + for(l = 0; l < blocksize; l++) { + for(m = 0; m < blocksize; m++) { + int br = data_cuda[(ry+l)*w+rx+m]; + int bd = scaled_data_cuda[(dy+l)*w/2+dx+m]; + + range_block[l*blocksize+m] = br; + + range_sum += range_block[l*blocksize+m]; + + domain_sum += bd; + domain_disp += bd*bd; + rd_sum += br*bd; + } + } + + domain_disp = domain_disp*(int)(blocksize*blocksize)-domain_sum*domain_sum; +} + +__device__ void fiCalcReversedTransformation90Right(long long &range_sum, long long &domain_sum, long long &rd_sum, long long &domain_disp, int *range_block, + unsigned int dx, unsigned int dy, + unsigned int rx, unsigned int ry, unsigned int blocksize, + unsigned int w, unsigned int h, + int *scaled_data_cuda, + int *data_cuda + ) +{ + unsigned int l, m; + + rd_sum = domain_disp = domain_sum = range_sum = 0; + + for(l = 0; l < blocksize; l++) { + for(m = 0; m < blocksize; m++) { + int br = data_cuda[(ry+m)*w+rx+blocksize-1-l]; + int bd = scaled_data_cuda[(dy+l)*w/2+dx+m]; + + range_block[l*blocksize+m] = br; + + range_sum += range_block[l*blocksize+m]; + + domain_sum += bd; + domain_disp += bd*bd; + rd_sum += br*bd; + } + } + + domain_disp = domain_disp*(int)(blocksize*blocksize)-domain_sum*domain_sum; +} + +__device__ void fiCalcReversedTransformation180Right(long long &range_sum, long long &domain_sum, long long &rd_sum, long long &domain_disp, int *range_block, + unsigned int dx, unsigned int dy, + unsigned int rx, unsigned int ry, unsigned int blocksize, + unsigned int w, unsigned int h, + int *scaled_data_cuda, + int *data_cuda + ) +{ + unsigned int l, m; + + rd_sum = domain_disp = domain_sum = range_sum = 0; + + for(l = 0; l < blocksize; l++) { + for(m = 0; m < blocksize; m++) { + int br = data_cuda[(ry+blocksize-1-l)*w+rx+blocksize-1-m]; + int bd = scaled_data_cuda[(dy+l)*w/2+dx+m]; + + range_block[l*blocksize+m] = br; + + range_sum += range_block[l*blocksize+m]; + + domain_sum += bd; + domain_disp += bd*bd; + rd_sum += br*bd; + } + } + + domain_disp = domain_disp*(int)(blocksize*blocksize)-domain_sum*domain_sum; +} + +__device__ void fiCalcReversedTransformation270Right(long long &range_sum, long long &domain_sum, long long &rd_sum, long long &domain_disp, int *range_block, + unsigned int dx, unsigned int dy, + unsigned int rx, unsigned int ry, unsigned int blocksize, + unsigned int w, unsigned int h, + int *scaled_data_cuda, + int *data_cuda + ) +{ + unsigned int l, m; + + rd_sum = domain_disp = domain_sum = range_sum = 0; + + for(l = 0; l < blocksize; l++) { + for(m = 0; m < blocksize; m++) { + int br = data_cuda[(ry+blocksize-1-m)*w+rx+l]; + int bd = scaled_data_cuda[(dy+l)*w/2+dx+m]; + + range_block[l*blocksize+m] = br; + + range_sum += range_block[l*blocksize+m]; + + domain_sum += bd; + domain_disp += bd*bd; + rd_sum += br*bd; + } + } + + domain_disp = domain_disp*(int)(blocksize*blocksize)-domain_sum*domain_sum; +} + +__device__ void fiCalcReversedTransformationVFlip(long long &range_sum, long long &domain_sum, long long &rd_sum, long long &domain_disp, int *range_block, + unsigned int dx, unsigned int dy, + unsigned int rx, unsigned int ry, unsigned int blocksize, + unsigned int w, unsigned int h, + int *scaled_data_cuda, + int *data_cuda + ) +{ + unsigned int l, m; + + rd_sum = domain_disp = domain_sum = range_sum = 0; + + for(l = 0; l < blocksize; l++) { + for(m = 0; m < blocksize; m++) { + int br = data_cuda[(ry+blocksize-1-l)*w+rx+m]; + int bd = scaled_data_cuda[(dy+l)*w/2+dx+m]; + + range_block[l*blocksize+m] = br; + + range_sum += range_block[l*blocksize+m]; + + domain_sum += bd; + domain_disp += bd*bd; + rd_sum += br*bd; + } + } + + domain_disp = domain_disp*(int)(blocksize*blocksize)-domain_sum*domain_sum; +} + +__device__ void fiCalcReversedTransformationHFlip(long long &range_sum, long long &domain_sum, long long &rd_sum, long long &domain_disp, int *range_block, + unsigned int dx, unsigned int dy, + unsigned int rx, unsigned int ry, unsigned int blocksize, + unsigned int w, unsigned int h, + int *scaled_data_cuda, + int *data_cuda + ) +{ + unsigned int l, m; + + rd_sum = domain_disp = domain_sum = range_sum = 0; + + for(l = 0; l < blocksize; l++) { + for(m = 0; m < blocksize; m++) { + int br = data_cuda[(ry+l)*w+rx+blocksize-1-m]; + int bd = scaled_data_cuda[(dy+l)*w/2+dx+m]; + + range_block[l*blocksize+m] = br; + + range_sum += range_block[l*blocksize+m]; + + domain_sum += bd; + domain_disp += bd*bd; + rd_sum += br*bd; + } + } + + domain_disp = domain_disp*(int)(blocksize*blocksize)-domain_sum*domain_sum; +} + +__device__ void fiCalcReversedTransformationMainDiagFlip(long long &range_sum, long long &domain_sum, long long &rd_sum, long long &domain_disp, int *range_block, + unsigned int dx, unsigned int dy, + unsigned int rx, unsigned int ry, unsigned int blocksize, + unsigned int w, unsigned int h, + int *scaled_data_cuda, + int *data_cuda + ) +{ + unsigned int l, m; + + rd_sum = domain_disp = domain_sum = range_sum = 0; + + for(l = 0; l < blocksize; l++) { + for(m = 0; m < blocksize; m++) { + int br = data_cuda[(ry+m)*w+rx+l]; + int bd = scaled_data_cuda[(dy+l)*w/2+dx+m]; + + range_block[l*blocksize+m] = br; + + range_sum += range_block[l*blocksize+m]; + + domain_sum += bd; + domain_disp += bd*bd; + rd_sum += br*bd; + } + } + + domain_disp = domain_disp*(int)(blocksize*blocksize)-domain_sum*domain_sum; +} + +__device__ void fiCalcReversedTransformationAntiDiagFlip(long long &range_sum, long long &domain_sum, long long &rd_sum, long long &domain_disp, int *range_block, + unsigned int dx, unsigned int dy, + unsigned int rx, unsigned int ry, unsigned int blocksize, + unsigned int w, unsigned int h, + int *scaled_data_cuda, + int *data_cuda + ) +{ + unsigned int l, m; + + rd_sum = domain_disp = domain_sum = range_sum = 0; + + for(l = 0; l < blocksize; l++) { + for(m = 0; m < blocksize; m++) { + int br = data_cuda[(ry+blocksize-1-m)*w+rx+blocksize-1-l]; + int bd = scaled_data_cuda[(dy+l)*w/2+dx+m]; + + range_block[l*blocksize+m] = br; + + range_sum += range_block[l*blocksize+m]; + + domain_sum += bd; + domain_disp += bd*bd; + rd_sum += br*bd; + } + } + + domain_disp = domain_disp*(int)(blocksize*blocksize)-domain_sum*domain_sum; +} + +__global__ void fiFindBestDomainBlockKernel(unsigned int rx, unsigned int ry, + kernel_block_type *kernel_blocks_cuda, unsigned int blocksize, + unsigned int w, unsigned int h, + int *scaled_data_cuda, + int *data_cuda + ) +{ + /*__shared__ */int range_block[IMAGE_MAXCUDABLOCKSIZE*IMAGE_MAXCUDABLOCKSIZE]; + long long range_sum, domain_sum, rd_sum; // Сумма пикселей рангового блока, доменного блока, пикселя рангового на пиксель доменного блока + long long domain_disp; + unsigned int diff; + int u; int v; + unsigned int mono_diff; + int mono_u; int mono_v; + unsigned int local_diff; + __shared__ unsigned int best_diff; + int local_u; int local_v; + unsigned int dx, dy; // Координаты доменного блока + unsigned int tr; // Трансформация, вычисляемая в текущем потоке + kernel_block_type *kernel_block_cuda; // Текущий доменный блок для текущего блока cuda + + unsigned int l, m; + + atomicExch(&best_diff, 255*255*blocksize*blocksize*(blockDim.x*blockDim.y)+(blockDim.x*blockDim.y)); + __syncthreads(); + + kernel_block_cuda = kernel_blocks_cuda+blockIdx.z*gridDim.y*gridDim.x+blockIdx.y*gridDim.x+blockIdx.x; + + dx = (blockIdx.x*blockDim.x+threadIdx.x)%(w/2-blocksize+1); + dy = (blockIdx.y*blockDim.y+threadIdx.y)%(h/2-blocksize+1); + tr = blockIdx.z+threadIdx.z; + + switch(tr) { + case IMAGE_RB_TRANSFORMATION_NONE: + fiCalcReversedTransformationNone(range_sum, domain_sum, rd_sum, domain_disp, range_block, + dx, dy, + rx, ry, blocksize, + w, h, + scaled_data_cuda, + data_cuda + ); + break; + case IMAGE_RB_TRANSFORMATION_90RIGHT: + fiCalcReversedTransformation90Right(range_sum, domain_sum, rd_sum, domain_disp, range_block, + dx, dy, + rx, ry, blocksize, + w, h, + scaled_data_cuda, + data_cuda + ); + break; + case IMAGE_RB_TRANSFORMATION_180RIGHT: + fiCalcReversedTransformation180Right(range_sum, domain_sum, rd_sum, domain_disp, range_block, + dx, dy, + rx, ry, blocksize, + w, h, + scaled_data_cuda, + data_cuda + ); + break; + case IMAGE_RB_TRANSFORMATION_270RIGHT: + fiCalcReversedTransformation270Right(range_sum, domain_sum, rd_sum, domain_disp, range_block, + dx, dy, + rx, ry, blocksize, + w, h, + scaled_data_cuda, + data_cuda + ); + break; + case IMAGE_RB_TRANSFORMATION_VFLIP: + fiCalcReversedTransformationVFlip(range_sum, domain_sum, rd_sum, domain_disp, range_block, + dx, dy, + rx, ry, blocksize, + w, h, + scaled_data_cuda, + data_cuda + ); + break; + case IMAGE_RB_TRANSFORMATION_HFLIP: + fiCalcReversedTransformationHFlip(range_sum, domain_sum, rd_sum, domain_disp, range_block, + dx, dy, + rx, ry, blocksize, + w, h, + scaled_data_cuda, + data_cuda + ); + break; + case IMAGE_RB_TRANSFORMATION_MAINDIAGFLIP: + fiCalcReversedTransformationMainDiagFlip(range_sum, domain_sum, rd_sum, domain_disp, range_block, + dx, dy, + rx, ry, blocksize, + w, h, + scaled_data_cuda, + data_cuda + ); + break; + case IMAGE_RB_TRANSFORMATION_ANTIDIAGFLIP: + fiCalcReversedTransformationAntiDiagFlip(range_sum, domain_sum, rd_sum, domain_disp, range_block, + dx, dy, + rx, ry, blocksize, + w, h, + scaled_data_cuda, + data_cuda + ); + break; + } + + mono_u = 0; mono_v = (int)( ((range_sum/(blocksize*blocksize))*63+128)/255 ); // Значения на случай, если совсем ни один блок не подойдёт + + if(domain_disp) { + u = (int)( 32*((int)(blocksize*blocksize)*rd_sum-range_sum*domain_sum)/domain_disp ); // яркость в интервале [0, 1.0), т.е. от [0 до 32) + if(u < 0) u = 0; if(u > 31) u = 31; + v = (int)( (range_sum-domain_sum*u/32)/(int)(blocksize*blocksize) ); + if(v < -255 || v > 255) { + u = mono_u; + v = mono_v; + } else { + if(v < 0) + v = (v*63-128)/255; + else + v = (v*63+128)/255; + } + } else { + u = mono_u; + v = mono_v; + } + + mono_diff = 0; // Найдём разницу для mono_v и mono_u (mono_diff) + diff = 0; // Найдём разницу для v и u блока dx, dy (diff) + for(l = 0; l < blocksize; l++) { + for(m = 0; m < blocksize; m++) { + int br = range_block[l*blocksize+m]; + int bd = scaled_data_cuda[(dy+l)*w/2+dx+m]; + + int mult; + + mult = (mono_v*255/63-br); + mono_diff += mult*mult; + + mult = (bd*u/32+v*255/63-br); + diff += mult*mult; + } + } + + if(diff < mono_diff) { + local_diff = diff; + local_u = u; + local_v = v; + } else { + local_diff = mono_diff; + local_u = mono_u; + local_v = mono_v; + } + + unsigned int uniq_diff = (unsigned int)sqrtf(local_diff)*(blockDim.x*blockDim.y)+(threadIdx.y*blockDim.x+threadIdx.x); // 255*(IMAGE_MAXCUDABLOCKSIZE=64)*64*64=3FC000 < FFFFFFFF + atomicMin(&best_diff, uniq_diff); + __syncthreads(); + + if(best_diff == uniq_diff) { + kernel_block_cuda->dx = dx; + kernel_block_cuda->dy = dy; + kernel_block_cuda->tr = tr; + kernel_block_cuda->u = local_u; + kernel_block_cuda->v = local_v; + kernel_block_cuda->diff = local_diff; + } +} + +static void fiFindBestDomainBlock(unsigned int rx, unsigned int ry, + image_rangeblock_type *block_p, kernel_block_type *kernel_blocks_cuda, kernel_block_type *kernel_blocks, unsigned int blocksize, + unsigned int w, unsigned int h, + dim3 threads, dim3 blocks, + int *scaled_data_cuda, + int *data_cuda, + unsigned int worst_diff + ) +{ + unsigned int best_diff, i; + kernel_block_type *kernel_block; + + memset(kernel_blocks, 0, sizeof(kernel_block_type)*blocks.x*blocks.y*blocks.z); + cudaMemcpy(kernel_blocks_cuda, kernel_blocks, sizeof(kernel_block_type)*blocks.x*blocks.y*blocks.z, cudaMemcpyHostToDevice); + + fiFindBestDomainBlockKernel <<>>(rx, ry, kernel_blocks_cuda, blocksize, w, h, scaled_data_cuda, data_cuda); + + cudaMemcpy(kernel_blocks, kernel_blocks_cuda, sizeof(kernel_block_type)*blocks.x*blocks.y*blocks.z, cudaMemcpyDeviceToHost); + + best_diff = kernel_blocks->diff; + kernel_block = kernel_blocks; + for(i = 0; i < blocks.x*blocks.y*blocks.z; i++) { + if(kernel_blocks[i].diff < best_diff) { + best_diff = kernel_blocks[i].diff; + kernel_block = kernel_blocks+i; + + } + //printf("-? rx %u ry %u i %u dx %u dy %u tr %u l_u %u l_v %d l_diff %u\n", rx, ry, i, kernel_blocks[i].dx, kernel_blocks[i].dy, kernel_blocks[i].tr, kernel_blocks[i].u, kernel_blocks[i].v, kernel_blocks[i].diff); + } + + block_p->dx = kernel_block->dx; + block_p->dy = kernel_block->dy; + block_p->u = kernel_block->u; + block_p->v = kernel_block->v; + block_p->tr = kernel_block->tr; + //printf("-! rx %u ry %u dx %u dy %u tr %u l_u %u l_v %d l_diff %u\n", rx, ry, kernel_block->dx, kernel_block->dy, kernel_block->tr, kernel_block->u, kernel_block->v, kernel_block->diff); + + if(best_diff > worst_diff && blocksize > 2 && blocksize%2 == 0) { + printf("best_diff %d/%d (=rms^2*blocksize^2), divided block size %d to %d\n", best_diff, worst_diff, blocksize, blocksize/2); + + fiFindBestDomainBlock(rx, ry, // Позиция нового блока блока + (image_rangeblock_type *)block_p->divided_into, kernel_blocks_cuda, kernel_blocks, // Указатель на новый блок, на блоки для вычислений на cuda, память под них на cpu + blocksize/2, // Уменьшаем размер блока на 2 + w, h, threads, blocks, scaled_data_cuda, data_cuda, + worst_diff/4); // Уменьшаем разницу на 4 (т.к. новый блок будет в 4 раза меньше) + + fiFindBestDomainBlock(rx+blocksize/2, ry, // Позиция нового блока блока + (image_rangeblock_type *)block_p->divided_into+1, kernel_blocks_cuda, kernel_blocks, // Указатель на новый блок, на блоки для вычислений на cuda, память под них на cpu + blocksize/2, // Уменьшаем размер блока на 2 + w, h, threads, blocks, scaled_data_cuda, data_cuda, + worst_diff/4); // Уменьшаем разницу на 4 (т.к. новый блок будет в 4 раза меньше) + + fiFindBestDomainBlock(rx, ry+blocksize/2, // Позиция нового блока блока + (image_rangeblock_type *)block_p->divided_into+2, kernel_blocks_cuda, kernel_blocks, // Указатель на новый блок, на блоки для вычислений на cuda, память под них на cpu + blocksize/2, // Уменьшаем размер блока на 2 + w, h, threads, blocks, scaled_data_cuda, data_cuda, + worst_diff/4); // Уменьшаем разницу на 4 (т.к. новый блок будет в 4 раза меньше) + + fiFindBestDomainBlock(rx+blocksize/2, ry+blocksize/2, // Позиция нового блока блока + (image_rangeblock_type *)block_p->divided_into+3, kernel_blocks_cuda, kernel_blocks, // Указатель на новый блок, на блоки для вычислений на cuda, память под них на cpu + blocksize/2, // Уменьшаем размер блока на 2 + w, h, threads, blocks, scaled_data_cuda, data_cuda, + worst_diff/4); // Уменьшаем разницу на 4 (т.к. новый блок будет в 4 раза меньше) + + block_p->is_divided = true; + } +} + +#ifdef WIN32 +unsigned int __stdcall fiWorkerFunc(void *arg) +#else +void *fiWorkerFunc(void *arg) +#endif +{ + unsigned int i; + unsigned int rx, ry, channel, copied_channel; + + unsigned char *scaled_data = ((fi_worker_arg_type *)arg)->scaled_data; + unsigned char **data = ((fi_worker_arg_type *)arg)->data; + int *data_in_int = 0; + image_rangeblock_type *block_start_p = ((fi_worker_arg_type *)arg)->block_p, *block_p; + unsigned int nof_blocks_per_channel = ((fi_worker_arg_type *)arg)->nof_blocks_per_channel; + unsigned int w = ((fi_worker_arg_type *)arg)->w; + unsigned int h = ((fi_worker_arg_type *)arg)->h; + unsigned int nof_channels = ((fi_worker_arg_type *)arg)->nof_channels; + unsigned int alloc_memory = ((fi_worker_arg_type *)arg)->alloc_memory; + unsigned int blocksize = ((fi_worker_arg_type *)arg)->blocksize; + unsigned int worker_start = ((fi_worker_arg_type *)arg)->worker_start; + unsigned int worker_step = ((fi_worker_arg_type *)arg)->worker_step; + unsigned int worst_diff = ((fi_worker_arg_type *)arg)->worst_diff; + + unsigned int nof_blocks_per_image = nof_blocks_per_channel*nof_channels; + + int *scaled_data_cuda; + kernel_block_type *kernel_blocks_cuda; + kernel_block_type *kernel_blocks; + int *data_cuda; + + cudaEvent_t start, stop; + double cuda_working_time = 0; + + cudaDeviceProp device_prop; + dim3 threads, blocks; + + ShowMeSomeCUDAInfo(); + + if(blocksize > IMAGE_MAXCUDABLOCKSIZE) { + printf("Sorry, but IMAGE_MAXCUDABLOCKSIZE=%d < blocksize=%d\b", IMAGE_MAXCUDABLOCKSIZE, blocksize); + + return 0; + } + + printf("cudaMalloc(&scaled_data_cuda, alloc_memory/4=%d)\n", sizeof(int)*alloc_memory/4); + if(cudaMalloc(&scaled_data_cuda, sizeof(int)*alloc_memory/4) != cudaSuccess) { + printf("Can\'t allocate memory for scaled_data_cuda\n"); + + return 0; + } + printf("cudaMalloc(&data_cuda, alloc_memory=%d)\n", sizeof(int)*alloc_memory); + if(cudaMalloc(&data_cuda, sizeof(int)*alloc_memory) != cudaSuccess) { + printf("Can\'t allocate memory for data_cuda_p\n"); + + cudaFree(scaled_data_cuda); + + return 0; + } + + if(cudaEventCreate(&start) != cudaSuccess) { + printf("Can\'t create event \"start\"\n"); + + cudaFree(scaled_data_cuda); + cudaFree(data_cuda); + + return 0; + } + if(cudaEventCreate(&stop) != cudaSuccess) { + printf("Can\'t create event \"stop\"\n"); + + cudaFree(scaled_data_cuda); + cudaFree(data_cuda); + + cudaEventDestroy(start); + + return 0; + } + + cudaGetDeviceProperties(&device_prop, 0); + + int number_of_working_threads = device_prop.maxThreadsPerBlock/2; // Магия + int number_of_working_threads_multiplier = 1; + int threads_z = 1; + + if(device_prop.maxGridSize[2] < 8) { + number_of_working_threads /= 8; + threads_z = 8; + } + + while( ((unsigned int)(sqrt((float)number_of_working_threads)+0.5)*(unsigned int)(sqrt((float)number_of_working_threads)+0.5)) != number_of_working_threads) { + number_of_working_threads /= 2; + number_of_working_threads_multiplier *= 2; + } + + threads = dim3((unsigned int)(sqrt((float)number_of_working_threads)+0.5)*number_of_working_threads_multiplier, (unsigned int)(sqrt((float)number_of_working_threads)+0.5), threads_z); + blocks = dim3(((w/2-blocksize+1)+threads.x-1)/threads.x, ((h/2-blocksize+1)+threads.y-1)/threads.y, 8/threads.z); + + printf("threads(%u,%u,%u) blocks(%u,%u,%u)\n", threads.x, threads.y, threads.z, blocks.x, blocks.y, blocks.z); + + if((int)(threads.x*threads.y*threads.z) > device_prop.maxThreadsPerBlock) { + printf("threads.x*threads.y*threads.z is greater than %d, exiting...\n", device_prop.maxThreadsPerBlock); + + cudaFree(scaled_data_cuda); + cudaFree(data_cuda); + + cudaEventDestroy(start); + cudaEventDestroy(stop); + + return 0; + } + + printf("cudaMalloc(&kernel_blocks_cuda, sizeof(image_rangeblock_type)*blocks.x*blocks.y*blocks.z=%d)\n", sizeof(kernel_block_type)*blocks.x*blocks.y*blocks.z); + if(cudaMalloc(&kernel_blocks_cuda, sizeof(kernel_block_type)*blocks.x*blocks.y*blocks.z) != cudaSuccess) { + printf("Can\'t allocate memory for block_start_cuda_p\n"); + + cudaFree(scaled_data_cuda); + cudaFree(data_cuda); + + cudaEventDestroy(start); + cudaEventDestroy(stop); + + return 0; + } + + printf("kernel_blocks = malloc(sizeof(kernel_block_type)*blocks.x*blocks.y*blocks.z=%d), sizeof(kernel_block_type)=%d\n", sizeof(kernel_block_type)*blocks.x*blocks.y*blocks.z, sizeof(kernel_block_type)); + kernel_blocks = (kernel_block_type *)malloc(sizeof(kernel_block_type)*blocks.x*blocks.y*blocks.z); + if(kernel_blocks == 0) { + printf("Can\'t allocate memory for block_start_cuda_p\n"); + + cudaFree(scaled_data_cuda); + cudaFree(data_cuda); + cudaFree(kernel_blocks_cuda); + + cudaEventDestroy(start); + cudaEventDestroy(stop); + + return 0; + } + + copied_channel = nof_channels+1; + block_p = block_start_p+worker_start; + + for(i = worker_start; i < nof_blocks_per_image; i += worker_step) { + float one_block_time; + + channel = i / nof_blocks_per_channel; + rx = ((i % nof_blocks_per_channel) % (w/blocksize)) * blocksize; + ry = ((i % nof_blocks_per_channel) / (w/blocksize)) * blocksize; + + if(copied_channel != channel) { + unsigned int j; + + if(!data_in_int) + data_in_int = (int *)malloc(sizeof(int)*alloc_memory); + + if(!data_in_int) { + printf("Can\'t allocate memory for data_in_int\n"); + + break; + } + + copied_channel = channel; + + for(j = 0; j < alloc_memory/4; j++) { + data_in_int[j] = scaled_data[channel*alloc_memory/4+j]; + } + cudaMemcpy(scaled_data_cuda, data_in_int, sizeof(int)*alloc_memory/4, cudaMemcpyHostToDevice); + + for(j = 0; j < alloc_memory; j++) { + data_in_int[j] = data[channel][j]; + } + cudaMemcpy(data_cuda, data_in_int, sizeof(int)*alloc_memory, cudaMemcpyHostToDevice); + } + + if(((i-worker_start)/worker_step)%10 == 0) + printf("worker %d %d rbs (%f) ok\n", worker_start+1, (i-worker_start)/worker_step, + (float)( 100.0 * (double)((i-worker_start)/worker_step) / (double)((nof_blocks_per_image-worker_start-1)/worker_step+1) )); + + cudaEventRecord(start, 0); + + fiFindBestDomainBlock(rx, ry, + block_p, kernel_blocks_cuda, kernel_blocks, blocksize, + w, h, + threads, blocks, + scaled_data_cuda, + data_cuda, + worst_diff); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&one_block_time, start, stop); + cuda_working_time += one_block_time; + + block_p += worker_step; + } + + if(data_in_int) + free(data_in_int); + cudaFree(scaled_data_cuda); + cudaFree(data_cuda); + cudaFree(kernel_blocks_cuda); + free(kernel_blocks); + + cudaEventDestroy(start); + cudaEventDestroy(stop); + + if(worker_step == 1) { // Сбрасываем устройство, только если имеется только один поток + if(cudaDeviceReset() != cudaSuccess) { + printf("Can\'t reset cuda device\n"); + } + } + + printf("cuda_working_time %f ms\n", cuda_working_time); + + return 0; +} + +static void ShowMeSomeCUDAInfo(void) +{ + int device_count; + cudaDeviceProp device_prop; + + cudaGetDeviceCount(&device_count); + + printf("Device count: %d\n\n", device_count); + + for(int i = 0; i < device_count; i++) { + cudaGetDeviceProperties(&device_prop, i); + + printf("Device name: %s\n", device_prop.name); + printf("Compute capability: %d.%d\n", device_prop.major, device_prop.minor); + printf("Total constant memory: %lld\n", (long long)(device_prop.totalConstMem)); + printf("Total global memory: %lld\n", (long long)(device_prop.totalGlobalMem)); + printf("Shared memory per block: %lld\n", (long long)(device_prop.sharedMemPerBlock)); + printf("Registers per block: %d\n", device_prop.regsPerBlock); + printf("Warp size: %d\n", device_prop.warpSize); + printf("Memory pitch: %lld\n", (long long)(device_prop.memPitch)); + printf("Max threads per block: %d\n", device_prop.maxThreadsPerBlock); + printf("Max threads dimensions: x = %d, y = %d, z = %d\n", + device_prop.maxThreadsDim[0], + device_prop.maxThreadsDim[1], + device_prop.maxThreadsDim[2]); + + printf("Max grid size: x = %d, y = %d, z = %d\n", + device_prop.maxGridSize[0], + device_prop.maxGridSize[1], + device_prop.maxGridSize[2]); + + printf("Max Surface1D: %d\n", device_prop.maxSurface1D); + printf("Max Surface2D: %d\n", device_prop.maxSurface2D); + + printf("Clock rate: %d\n", device_prop.clockRate); + printf("Total constant memory: %d\n", device_prop.totalConstMem); + printf("Compute capability: %d.%d\n", device_prop.major, device_prop.minor); + printf("Texture alignment: %d\n", device_prop.textureAlignment); + printf("Device overlap: %d\n", device_prop.deviceOverlap); + printf("Multiprocessor count: %d\n", device_prop.multiProcessorCount); + printf("Kernel execution timeout enabled: %s\n", + device_prop.kernelExecTimeoutEnabled ? "true" : "false"); + } + + printf("\n"); + +} diff --git a/src/image.h b/src/image.h new file mode 100644 index 0000000..ebbca84 --- /dev/null +++ b/src/image.h @@ -0,0 +1,49 @@ + +#ifndef _IMAGE_H +#define _IMAGE_H + +#include + +typedef struct { + unsigned int w; + unsigned int h; + unsigned int nof_channels; + unsigned int alloc_memory; // Общий размер массива data + unsigned char *data; +} image_pc_type; // pc == packed channels, каналы идут последовательно для одного пикселя, содержатся в одном массиве + +typedef struct { + unsigned int w; + unsigned int h; + unsigned int nof_channels; + unsigned int alloc_memory; // Размер массива, выделенного под каждый канал в data + unsigned char **data; +} image_uc_type; // uc == unpacked channels, отдельный массив под каждый канал (красный, зелёный, синий) + +typedef struct { + unsigned short dx; // Смещение по x доменного блока + unsigned short dy; // Смещение по у доменного блока + unsigned char tr; // Трансформация + unsigned char u; // Контрастность + short v; // Яркость + void *divided_into; // Указывает на массив из 4-х дочерних блоков + bool is_divided; // true, если блок поделён + bool must_process; +} image_rangeblock_type; + +#define IMAGE_MAXBLOCKSIZE 128 + +#define IMAGE_MAXDXY 1023 +#define IMAGE_LHALFDXY 512 +#define IMAGE_HHALFDXY 511 + +#define IMAGE_RB_TRANSFORMATION_NONE 0 +#define IMAGE_RB_TRANSFORMATION_90RIGHT 1 +#define IMAGE_RB_TRANSFORMATION_180RIGHT 2 +#define IMAGE_RB_TRANSFORMATION_270RIGHT 3 +#define IMAGE_RB_TRANSFORMATION_VFLIP 4 +#define IMAGE_RB_TRANSFORMATION_HFLIP 5 +#define IMAGE_RB_TRANSFORMATION_MAINDIAGFLIP 6 +#define IMAGE_RB_TRANSFORMATION_ANTIDIAGFLIP 7 + +#endif diff --git a/src/image_misc.c b/src/image_misc.c new file mode 100644 index 0000000..e55a182 --- /dev/null +++ b/src/image_misc.c @@ -0,0 +1,283 @@ + +#include +#include +#include + +#include "image_misc.h" + +void GetDxyBitsSize(unsigned int w, unsigned int h, unsigned int *dxy_bitssize) +{ + // Нам нужно такое значение, чтобы поместились цифры от 0 до (w/2-(2-1))*(h/2-(2-1))-1. + // В (2-1) число 2 - это размер минимального рангового блока, для которого ищутся доменные блоки + // Чтобы найти это значение, надо найти log2(((w/2-1)*(h/2-1)-1)*2) + // Например, чтобы сохранить 4, нам нужно 3 бита, 3 = log2(4*2) + *dxy_bitssize = (unsigned int)log2(((w/2-1)*(h/2-1)-1)*2); +} + +unsigned int GetCblockBitsSize(unsigned int dxy_bitssize) +{ + unsigned int cblock_bitssize; + + //15 = 3+5+7; 3 - поворот, 5 - контраст, 7 - яркость + cblock_bitssize = 15+dxy_bitssize; + + return cblock_bitssize; +} + +void SetBlocksPointers(image_rangeblock_type *blocks, unsigned int blocksize, unsigned int nof_blocks, unsigned int w, unsigned int h) +{ + unsigned int i, j, k, offset; + + offset = 0; + k = nof_blocks; + for(i = blocksize; i > 2 && i%2 == 0; i /= 2) { + for(j = 0; j < nof_blocks; j++) { + blocks[offset+j].divided_into = blocks+k+j*4; + } + offset = k; + nof_blocks *= 4; + k += nof_blocks; + } +} + +void Scale2to1(unsigned char *src, unsigned char *dst, unsigned int w, unsigned int h) +{ + unsigned int i, j, half_w, half_h; + + half_w = w/2; + half_h = h/2; + + for(i = 0; i < half_h; i++) { + for(j = 0; j < half_w; j++) { + dst[i*half_w+j] = ( (int)src[(i*2)*w+j*2]+(int)src[(i*2)*w+j*2+1]+(int)src[(i*2+1)*w+j*2]+(int)src[(i*2+1)*w+j*2+1] )/4; + } + } +} + +void ApplyReversedTransformToRangeBlock(unsigned char *src, unsigned char *dst, unsigned int tr, unsigned int blocksize) +{ + unsigned int k, l; + switch(tr) { + case IMAGE_RB_TRANSFORMATION_NONE: + memcpy(dst, src, blocksize*blocksize); + + break; + case IMAGE_RB_TRANSFORMATION_90RIGHT: // Поворачиваем на 90 влево (т.к. обратная трансформация) + for(k = 0; k < blocksize; k++) + for(l = 0; l < blocksize; l++) { + dst[k*blocksize+l] = src[l*blocksize+blocksize-1-k]; + } + + break; + case IMAGE_RB_TRANSFORMATION_180RIGHT: // Поворачиваем на 180 влево + for(k = 0; k < blocksize; k++) + for(l = 0; l < blocksize; l++) { + dst[k*blocksize+l] = src[(blocksize-1-k)*blocksize+blocksize-1-l]; + } + + break; + case IMAGE_RB_TRANSFORMATION_270RIGHT: // Поворачиваем на 270 влево + for(k = 0; k < blocksize; k++) + for(l = 0; l < blocksize; l++) { + dst[k*blocksize+l] = src[(blocksize-1-l)*blocksize+k]; + } + + break; + case IMAGE_RB_TRANSFORMATION_VFLIP: + for(k = 0; k < blocksize; k++) + for(l = 0; l < blocksize; l++) { + dst[k*blocksize+l] = src[(blocksize-1-k)*blocksize+l]; + } + + break; + case IMAGE_RB_TRANSFORMATION_HFLIP: + for(k = 0; k < blocksize; k++) + for(l = 0; l < blocksize; l++) { + dst[k*blocksize+l] = src[k*blocksize+blocksize-1-l]; + } + + break; + case IMAGE_RB_TRANSFORMATION_MAINDIAGFLIP: + for(k = 0; k < blocksize; k++) + for(l = 0; l < blocksize; l++) { + dst[k*blocksize+l] = src[l*blocksize+k]; + } + + break; + case IMAGE_RB_TRANSFORMATION_ANTIDIAGFLIP: + for(k = 0; k < blocksize; k++) + for(l = 0; l < blocksize; l++) { + dst[k*blocksize+l] = src[(blocksize-1-l)*blocksize+blocksize-1-k]; + } + + break; + } +} + +bool UnpackChannels(image_pc_type *in, image_uc_type *out) +{ + unsigned int i, channels, alloc_memory; + + out->w = in->w; + out->h = in->h; + out->nof_channels = channels = in->nof_channels; + out->alloc_memory = alloc_memory = in->w*in->h; + + if(channels*alloc_memory != in->alloc_memory) + return false; + + out->data = malloc(channels*sizeof(void *)); + if(!out->data) + return false; + + for(i = 0; i < channels; i++) { + out->data[i] = malloc(alloc_memory); + if(!out->data[i]) { + unsigned int j; + + for(j = 0; j < i; j++) + free(out->data[j]); + free(out->data); + return false; + } + } + + for(i = 0; i < channels; i++) { + unsigned char *p1, *p2; + unsigned int j; + + p1 = in->data+i; + p2 = out->data[i]; + + for(j = 0; j < alloc_memory; j++) { + *p2 = *p1; + p1 += channels; + p2++; + } + } + + return true; +} + +bool PackChannels(image_uc_type *in, image_pc_type *out) +{ + unsigned int i, channels, alloc_memory, in_alloc_memory; + + out->w = in->w; + out->h = in->h; + out->nof_channels = channels = in->nof_channels; + out->alloc_memory = alloc_memory = in->w*in->h*channels; + in_alloc_memory = in->alloc_memory; + + if(alloc_memory != channels*in->alloc_memory) + return false; + + out->data = malloc(alloc_memory); + if(!out->data) return false; + + for(i = 0; i < channels; i++) { + unsigned char *p1, *p2; + unsigned int j; + + p1 = in->data[i]; + p2 = out->data+i; + + for(j = 0; j < in_alloc_memory; j++) { + *p2 = *p1; + p1++; + p2 += channels; + } + } + + return true; +} + +// http://www.w3.org/Graphics/JPEG/jfif3.pdf +void BGRtoYCBCR(image_uc_type *img) +{ + unsigned char *r, *g, *b; + unsigned int i; + + if(img->nof_channels < 3) + return; + + b = img->data[0]; + g = img->data[1]; + r = img->data[2]; + + for(i = 0; i < img->w*img->h; i++) { + double y, cb, cr; + + y = 0.299*(*r) + 0.587*(*g) + 0.114*(*b); + cb = -0.1687*(*r) - 0.3313*(*g) + 0.5*(*b) + 128; + cr = 0.5*(*r) - 0.4187*(*g) - 0.0813*(*b) + 128; + + if(y < 0.0) + *b = 0; + else if(y > 255.0) + *b = 255; + else + *b = (unsigned char)(y+0.5); + + if(cb < 0.0) + *g = 0; + else if(cb > 255.0) + *g = 255; + else + *g = (unsigned char)(cb+0.5); + + if(cr < 0.0) + *r = 0; + else if(cr > 255.0) + *r = 255; + else + *r = (unsigned char)(cr+0.5); + + b++; g++; r++; + } +} + +// http://www.w3.org/Graphics/JPEG/jfif3.pdf +void YCBCRtoBGR(image_uc_type *img) +{ + unsigned char *y, *cb, *cr; + unsigned int i; + + if(img->nof_channels < 3) + return; + + y = img->data[0]; + cb = img->data[1]; + cr = img->data[2]; + + for(i = 0; i < img->w*img->h; i++) { + double b, g, r; + + b = (*y)+1.772*((int)(*cb)-128); + g = (*y)-0.34414*((int)(*cb)-128)-0.71414*((int)(*cr)-128); + r = (*y)+1.402*((int)(*cr)-128); + + if(b < 0.0) + *y = 0; + else if (b > 255.0) + *y = 255; + else + *y = (unsigned char)(b+0.5); + + if(g < 0.0) + *cb = 0; + else if (g > 255.0) + *cb = 255; + else + *cb = (unsigned char)(g+0.5); + + if(r < 0.0) + *cr = 0; + else if (r > 255.0) + *cr = 255; + else + *cr = (unsigned char)(r+0.5); + + y++; cb++; cr++; + } +} diff --git a/src/image_misc.h b/src/image_misc.h new file mode 100644 index 0000000..509707a --- /dev/null +++ b/src/image_misc.h @@ -0,0 +1,19 @@ + +#ifndef _IMAGE_MISC_H +#define _IMAGE_MISC_H + +#include + +#include "image.h" + +extern void GetDxyBitsSize(unsigned int w, unsigned int h, unsigned int *dxy_bitssize); +extern unsigned int GetCblockBitsSize(unsigned int dxy_bitssize); +extern void SetBlocksPointers(image_rangeblock_type *blocks, unsigned int blocksize, unsigned int nof_blocks, unsigned int w, unsigned int h); +extern void Scale2to1(unsigned char *src, unsigned char *dst, unsigned int w, unsigned int h); +extern void ApplyReversedTransformToRangeBlock(unsigned char *src, unsigned char *dst, unsigned int tr, unsigned int blocksize); +extern bool UnpackChannels(image_pc_type *in, image_uc_type *out); +extern bool PackChannels(image_uc_type *in, image_pc_type *out); +extern void BGRtoYCBCR(image_uc_type *img); +extern void YCBCRtoBGR(image_uc_type *img); + +#endif diff --git a/src/tga_file.h b/src/tga_file.h new file mode 100644 index 0000000..3ad9660 --- /dev/null +++ b/src/tga_file.h @@ -0,0 +1,29 @@ + +#ifndef _TGA_FILE_H +#define _TGA_FILE_H + +#pragma pack (push, 1) +typedef struct { + unsigned char IdLeight; //Длина информации после заголовка + unsigned char ColorMap; //Идентификатор наличия цветовой карты (0 - нет, 1 - есть) + unsigned char DataType; //Тип сжатия + // 0 - No Image Data Included + // 1 - Uncompressed, Color-mapped Image + // 2 - Uncompressed, True-color Image + // 3 - Uncompressed, Black-and-white Image + // 9 - Run-length encoded, Color-mapped Image + // 10 - Run-length encoded, True-color Image + // 11 - Run-length encoded, Black-and-white Image + unsigned short CmapStart; //Начало палитры + unsigned short CmapLength; //Длина палитры + unsigned char CmapDepth; //Глубина элементов палитры (15, 16, 24, 32) + unsigned short X_Origin; //Начало изображения по оси X + unsigned short Y_Origin; //Начало изображения по оси Y + unsigned short TGAWidth; //Ширина изображения + unsigned short TGAHeight; //Высота изображения + unsigned char BitPerPel; //Кол-во бит на пиксель (8, 16, 24, 32) + unsigned char Description; //Описание +} TGAHEADER; +#pragma pack (pop) + +#endif diff --git a/src/tga_load.c b/src/tga_load.c new file mode 100644 index 0000000..4b69b6a --- /dev/null +++ b/src/tga_load.c @@ -0,0 +1,237 @@ + +#include +#include +#include + +#include "tga_file.h" +#include "tga_load.h" + +static size_t GetFileLength(FILE *f) +{ + size_t cur_pos, file_len; + + cur_pos = ftell(f); + fseek(f, 0, SEEK_END); + file_len = ftell(f); + fseek(f, cur_pos, SEEK_SET); + + return file_len; +} + +int tgaLoad(char *fname, image_pc_type *image) +{ + FILE *f; + TGAHEADER head; + unsigned int bpp; + unsigned char *pal = 0; + unsigned char *databuf = 0, *pdb = 0; + unsigned char *temp, *p, *p2, *p3; + unsigned char b; + unsigned int i, j, k; + size_t file_len; + + f = fopen(fname, "rb"); + + if(!f) + return TGALOAD_CANTOPENFILE; + + file_len = GetFileLength(f); + + if(fread(&head, sizeof(TGAHEADER), 1, f) != 1) + return TGALOAD_DAMAGEDFILE; + + switch(head.DataType) { // Проверка правильности\поддержки файла + case 1: // Проверяю изображения с палитрой + case 9: + if((head.ColorMap != 1) || (head.BitPerPel != 8)) { + fclose(f); + return TGALOAD_DAMAGEDFILE; + } + if(!((head.CmapDepth == 24) || (head.CmapDepth == 32))) { + fclose(f); + return TGALOAD_UNSUPPORTEDFILETYPE; + } + break; + case 2: // Проверяю изображения без палитры (24,32 bpp) + case 10: + if(head.ColorMap != 0) { + fclose(f); + return TGALOAD_DAMAGEDFILE; + } + if(!((head.BitPerPel == 24) || (head.BitPerPel == 32))) { + fclose(f); + return TGALOAD_UNSUPPORTEDFILETYPE; + } + break; + case 3: // Проверяю изображения без палитры (8 bpp, greyscale) + case 11: + if(head.ColorMap != 0) { + fclose(f); + return TGALOAD_DAMAGEDFILE; + } + if(head.BitPerPel != 8) { + fclose(f); + return TGALOAD_UNSUPPORTEDFILETYPE; + } + break; + default: + fclose(f); + return TGALOAD_UNSUPPORTEDFILETYPE; + } + + if((head.BitPerPel == 8) && (head.ColorMap == 0)) { // Настройка для greyscale-изображений + bpp = 1; + } else if((head.BitPerPel == 32) || ((head.BitPerPel == 8) && (head.CmapDepth == 32))) { // Настройка 32bpp изображений (и палитрой или без) + bpp = 4; + } else { // Настройка 24bpp изображений (и палитрой или без) + bpp = 3; + } + + image->nof_channels = bpp; + image->w = head.TGAWidth; + image->h = head.TGAHeight; + + image->alloc_memory = image->w*image->h*bpp; + image->data = malloc(image->alloc_memory); + if(!image->data) { + fclose(f); + return TGALOAD_MEMORYALLOCERROR; + } + if(head.ColorMap == 1) { + pal = malloc(head.CmapLength*bpp); + if(!pal) { + free(image->data); + fclose(f); + return TGALOAD_MEMORYALLOCERROR; + } + fread(pal, 1, head.CmapLength*bpp, f); + } + + // Пропускаю идентификатор + fseek(f, head.IdLeight, SEEK_CUR); + + switch(head.DataType) { // Чтение изображения + case 1: // Чтение изображения с палитрой + temp = malloc(image->w*image->h); + if(!temp) { + free(image->data); + free(pal); + return TGALOAD_MEMORYALLOCERROR; + } + fread(temp, 1, image->w*image->h, f); + p = image->data; + p3 = temp; + for(i = 0;i < image->w*image->h;i++) { + p2 = pal+((head.CmapStart+*p3)*bpp); + *p = *p2; p++; p2++; + *p = *p2; p++; p2++; + *p = *p2; p++; + if(bpp == 4) { p2++; *p = *p2; p++; } + p3++; + } + free(temp); + break; + case 2: // Чтение 8(greyscale),24,32bit изображений + case 3: + fread(image->data, 1, image->alloc_memory, f); + break; + case 9: // Декодирование изображения с палитрой + databuf = malloc(file_len-ftell(f)); + if(!databuf) { + free(image->data); + free(pal); + fclose(f); + return TGALOAD_MEMORYALLOCERROR; + } + fread(databuf, 1, file_len-ftell(f), f); + + pdb = databuf; + i = 0; + p = image->data; + while(i < image->w*image->h) { + b = *pdb; pdb++; + if((i+(b&0x7F)+1) > image->w*image->h) { + free(image->data); + break; // Часть файла не удалось прочитать + } + if(b & 0x80) { // the packet is a Run-length Packet + for(j = 0;j < (unsigned int)((b&0x7F)+1);j++) { + p2 = pal+((head.CmapStart+(*pdb))*bpp); + *p = *p2; p++; p2++; + *p = *p2; p++; p2++; + *p = *p2; p++; p2++; + if(bpp == 4) { *p = *p2; p++; p2++; } + } + pdb++; + } else { // the packet is a Raw Packet + for(j = 0;j < (unsigned int)((b&0x7F)+1);j++) { + p2 = pal+((head.CmapStart+pdb[j])*bpp); + *p = *p2; p++; p2++; + *p = *p2; p++; p2++; + *p = *p2; p++; p2++; + if(bpp == 4) { *p = *p2; p++; p2++; } + } + pdb += (b&0x7F)+1; + } + i += (b&0x7F)+1; + } + free(databuf); + break; + case 10: // Чтение 8(greyscale),24,32bit изображений + case 11: + databuf = malloc(file_len-ftell(f)); + if(!databuf) { + free(image->data); + free(pal); + fclose(f); + return TGALOAD_MEMORYALLOCERROR; + } + fread(databuf, 1, file_len-ftell(f), f); + + pdb = databuf; + i = 0; + p = image->data; + while(i < image->w*image->h) { + b = *pdb; pdb++; + if((i+(b&0x7F)+1) > image->w*image->h) { + free(databuf); + break; // Часть файла не удалось прочитать + } + if(b & 0x80) { // the packet is a Run-length Packet + for(j = 0;j < (unsigned int)((b&0x7F)+1);j++) { + p2 = pdb; + for(k = 0;k < bpp;k++) { + *p = *p2; p++; p2++; } + } + pdb += bpp; + } else { // the packet is a Raw Packet + memcpy(p, pdb, bpp*((b&0x7F)+1)); pdb += bpp*((b&0x7F)+1); p += bpp*((b&0x7F)+1); + } + i += (b&0x7F)+1; + } + free(databuf); + break; + } + + // Переворот по оси y (если необходимо, гимп может сохранять с этим флагом) + if(head.Description & 0x20) { + p = image->data; + temp = malloc(image->w*bpp); + if(temp) { // Можно здесь завершить работу функции, если !temp -^_^- + p2 = &image->data[image->w*bpp*(image->h-1)]; + for(i = 0;i < image->h/2;i++) { + memcpy(temp,p,image->w*bpp); + memcpy(p,p2,image->w*bpp); + memcpy(p2,temp,image->w*bpp); + p += image->w*bpp; + p2 -= image->w*bpp; + } + free(temp); + } + } + + if(head.ColorMap == 1) free(pal); + fclose(f); + + return TGALOAD_OKAY; +} diff --git a/src/tga_load.h b/src/tga_load.h new file mode 100644 index 0000000..b956cb6 --- /dev/null +++ b/src/tga_load.h @@ -0,0 +1,15 @@ + +#ifndef _TGA_LOAD_H +#define _TGA_LOAD_H + +#include "image.h" + +#define TGALOAD_OKAY 0 +#define TGALOAD_CANTOPENFILE 1 +#define TGALOAD_DAMAGEDFILE 2 +#define TGALOAD_UNSUPPORTEDFILETYPE 3 +#define TGALOAD_MEMORYALLOCERROR 4 + +extern int tgaLoad(char *fname, image_pc_type *image); + +#endif diff --git a/src/tga_save.c b/src/tga_save.c new file mode 100644 index 0000000..e790cbf --- /dev/null +++ b/src/tga_save.c @@ -0,0 +1,40 @@ + +#include +#include +#include + +#include "tga_file.h" +#include "tga_save.h" + +int tgaSave(char *fname, image_pc_type *image) +{ + FILE *f; + TGAHEADER head; + + f = fopen(fname, "wb"); + if(!f) + return TGASAVE_CANTOPENFILE; + + if(image->w*image->h*image->nof_channels != image->alloc_memory) + return TGASAVE_DAMAGEDIMAGESTRUCT; + + if(image->nof_channels > 4 || image->nof_channels == 2 || image->nof_channels == 0) + return TGASAVE_TOOMANYCHANNELS; + + memset(&head, 0, sizeof(TGAHEADER)); + + head.TGAWidth = image->w; + head.TGAHeight = image->h; + if(image->nof_channels == 1) + head.DataType = 3; + else + head.DataType = 2; + head.BitPerPel = image->nof_channels*8; + + fwrite(&head, sizeof(head), 1, f); + fwrite(image->data, 1, image->alloc_memory, f); + + fclose(f); + + return TGASAVE_OKAY; +} diff --git a/src/tga_save.h b/src/tga_save.h new file mode 100644 index 0000000..b7b379d --- /dev/null +++ b/src/tga_save.h @@ -0,0 +1,14 @@ + +#ifndef _TGA_SAVE_H +#define _TGA_SAVE_H + +#include "image.h" + +#define TGASAVE_OKAY 0 +#define TGASAVE_CANTOPENFILE 1 +#define TGASAVE_DAMAGEDIMAGESTRUCT 2 +#define TGASAVE_TOOMANYCHANNELS 3 + +extern int tgaSave(char *fname, image_pc_type *image); + +#endif diff --git a/test0.bat b/test0.bat new file mode 100644 index 0000000..d2293d3 --- /dev/null +++ b/test0.bat @@ -0,0 +1,16 @@ +Release\compress.exe chihiro2.tga chihiro2_b8.fi 8 +Release\decompress.exe chihiro2_b8.fi chihiro2_b8.dec.tga +Release\compress.exe chihiro2.tga chihiro2_b4.fi 4 +Release\decompress.exe chihiro2_b4.fi chihiro2_b4.dec.tga +Release\compress.exe chihiro2.tga chihiro2_b2.fi 2 +Release\decompress.exe chihiro2_b2.fi chihiro2_b2.dec.tga +Release\compress.exe chihiro2.tga chihiro2_b8_rms2.fi 8 4 2 +Release\decompress.exe chihiro2_b8_rms2.fi chihiro2_b8_rms2.dec.tga +Release\compress.exe chihiro2.tga chihiro2_b8_rms4.fi 8 4 4 +Release\decompress.exe chihiro2_b8_rms4.fi chihiro2_b8_rms4.dec.tga +Release\compress.exe chihiro2.tga chihiro2_b8_rms6.fi 8 4 6 +Release\decompress.exe chihiro2_b8_rms6.fi chihiro2_b8_rms6.dec.tga +Release\compress.exe chihiro2.tga chihiro2_b8_rms10.fi 8 4 10 +Release\decompress.exe chihiro2_b8_rms10.fi chihiro2_b8_rms10.dec.tga +Release\compress.exe chihiro2.tga chihiro2_b8_rms50.fi 8 4 50 +Release\decompress.exe chihiro2_b8_rms50.fi chihiro2_b8_rms50.dec.tga \ No newline at end of file diff --git a/test0.md5 b/test0.md5 new file mode 100644 index 0000000..7836c1d --- /dev/null +++ b/test0.md5 @@ -0,0 +1,16 @@ +dfe6421540d4ad169c23498bc5b4aa29 *chihiro2_b2.fi +bac1dc505a5c9be23b27b8e840f5f8f9 *chihiro2_b2.dec.tga +ce34f55126ee2f980868d08ac1239f68 *chihiro2_b4.fi +e2037641abf7954b1513a5ebe53506eb *chihiro2_b4.dec.tga +eb63ca99a084983b91f6414a7e3c241e *chihiro2_b8.fi +25455b476b7f29b83e803e4a14ab3b19 *chihiro2_b8.dec.tga +08b69485bfdaefe96c8cb478594f4a0f *chihiro2_b8_rms10.fi +fe97f99bb573263f31498d3bdd3c19d3 *chihiro2_b8_rms10.dec.tga +4584a2eb7932474a2b89978aa4b9dce5 *chihiro2_b8_rms2.fi +1e242b76cc759de203f12ce3e2cca486 *chihiro2_b8_rms2.dec.tga +d92c596ef1cec8f68616ef948284cae4 *chihiro2_b8_rms4.fi +a4ab1ee520223fe616069e7e1a2aa3ba *chihiro2_b8_rms4.dec.tga +035e900148cb8c6945a65cb1669c75af *chihiro2_b8_rms50.fi +382986d755fad7ec1202776fd628c866 *chihiro2_b8_rms50.dec.tga +f0515cd41769618ead8867b876772d9c *chihiro2_b8_rms6.fi +8e84cc78593104f92dc4e00d7550e1f3 *chihiro2_b8_rms6.dec.tga diff --git a/unix_makefiles/Release/do_not_delete.txt b/unix_makefiles/Release/do_not_delete.txt new file mode 100644 index 0000000..e69de29 diff --git a/unix_makefiles/ReleaseMPI/do_not_delete.txt b/unix_makefiles/ReleaseMPI/do_not_delete.txt new file mode 100644 index 0000000..e69de29 diff --git a/unix_makefiles/makefile b/unix_makefiles/makefile new file mode 100644 index 0000000..c9e341c --- /dev/null +++ b/unix_makefiles/makefile @@ -0,0 +1,29 @@ +CC=gcc +CPP=g++ +CFLAGS=-c -Wall -O2 +CFLAGS_MPI=$(CFLAGS) -DI_USE_MPI -I/usr/include/mpich2/ +SOURCES_COMPRESS=../src/arrays.c ../src/compress.c ../src/fi_save.c ../src/fi_save_blocks_compression.c ../src/fi_save_blocks_search.c ../src/image_misc.c ../src/tga_load.c ../src/cpu_driven/fi_save_blocks_search_workers.c +OBJECTS_COMPRESS=arrays.o compress.o fi_save.o fi_save_blocks_compression.o fi_save_blocks_search.o image_misc.o tga_load.o fi_save_blocks_search_workers.o +SOURCES_DECOMPRESS=../src/arrays.c ../src/decompress.c ../src/fi_load.c ../src/image_misc.c ../src/tga_save.c +OBJECTS_DECOMPRESS=arrays.o decompress.o fi_load.o image_misc.o tga_save.o + +all: release release_mpi + +release: compress decompress + +release_mpi: compress_mpi + +compress: + $(CC) $(CFLAGS) $(SOURCES_COMPRESS) + $(CPP) -o Release/compress -lpthread $(OBJECTS_COMPRESS) -s + +compress_mpi: + $(CC) $(CFLAGS_MPI) $(SOURCES_COMPRESS) + $(CPP) -o ReleaseMPI/compress -lpthread -lmpi $(OBJECTS_COMPRESS) -s + +decompress: + $(CC) $(CFLAGS) $(SOURCES_DECOMPRESS) + $(CPP) -o Release/decompress $(OBJECTS_DECOMPRESS) -s + +clean: + rm -rf *.o Release/compress Release/decompress ReleaseMPI/compress diff --git a/unix_projects/compress/compress.cbp b/unix_projects/compress/compress.cbp new file mode 100644 index 0000000..057809c --- /dev/null +++ b/unix_projects/compress/compress.cbp @@ -0,0 +1,83 @@ + + + + + + diff --git a/unix_projects/compress/compress.depend b/unix_projects/compress/compress.depend new file mode 100644 index 0000000..782fb9a --- /dev/null +++ b/unix_projects/compress/compress.depend @@ -0,0 +1,107 @@ +# depslib dependency file v1.0 +1447453270 source:/media/CC01-601D/fractal_image_compression/src/arrays.c + "arrays.h" + +1447378552 /media/CC01-601D/fractal_image_compression/src/arrays.h + + +1456175332 source:/media/CC01-601D/fractal_image_compression/src/compress.c + + + + + + "tga_load.h" + "image_misc.h" + "fi_save.h" + +1446493988 /media/CC01-601D/fractal_image_compression/src/tga_load.h + "image.h" + +1455143304 /media/CC01-601D/fractal_image_compression/src/image.h + + +1454630460 /media/CC01-601D/fractal_image_compression/src/image_misc.h + + "image.h" + +1447435442 /media/CC01-601D/fractal_image_compression/src/fi_save.h + "image.h" + +1457447856 source:/media/CC01-601D/fractal_image_compression/src/cpu_driven/fi_save_blocks_search_workers.c + + + "../image_misc.h" + "../fi_save_blocks_search_workers.h" + +1456176240 /media/CC01-601D/fractal_image_compression/src/fi_save_blocks_search_workers.h + "image.h" + +1454626304 source:/media/CC01-601D/fractal_image_compression/src/fi_save_blocks_compression.c + + + + "arrays.h" + "fi_file.h" + "fi_save.h" + "image_misc.h" + "fi_save_blocks_compression.h" + +1446855158 /media/CC01-601D/fractal_image_compression/src/fi_file.h + +1454626284 /media/CC01-601D/fractal_image_compression/src/fi_save_blocks_compression.h + +1456175564 source:/media/CC01-601D/fractal_image_compression/src/fi_save.c + + + + + + "arrays.h" + "fi_file.h" + "fi_save.h" + "fi_save_blocks_compression.h" + "fi_save_blocks_search.h" + "image_misc.h" + +1454624988 /media/CC01-601D/fractal_image_compression/src/fi_save_blocks_search.h + "image.h" + +1454630464 source:/media/CC01-601D/fractal_image_compression/src/image_misc.c + + + + "image_misc.h" + +1446611444 source:/media/CC01-601D/fractal_image_compression/src/tga_load.c + + + + "tga_file.h" + "tga_load.h" + +1446489332 /media/CC01-601D/fractal_image_compression/src/tga_file.h + +1456177190 source:/media/CC01-601D/fractal_image_compression/src/fi_save_blocks_search.c + + + + + + + + "image_misc.h" + "fi_save.h" + "fi_save_blocks_search.h" + "fi_save_blocks_search_workers.h" + +1365770559 /usr/include/mpich2//mpi.h + "mpio.h" + "mpicxx.h" + +1365770561 /usr/include/mpich2//mpio.h + "mpi.h" + +1365770562 /usr/include/mpich2//mpicxx.h + + diff --git a/unix_projects/compress/compress.layout b/unix_projects/compress/compress.layout new file mode 100644 index 0000000..42ad001 --- /dev/null +++ b/unix_projects/compress/compress.layout @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/unix_projects/decompress/decompress.cbp b/unix_projects/decompress/decompress.cbp new file mode 100644 index 0000000..4a6c1c5 --- /dev/null +++ b/unix_projects/decompress/decompress.cbp @@ -0,0 +1,56 @@ + + + + + + diff --git a/unix_projects/decompress/decompress.depend b/unix_projects/decompress/decompress.depend new file mode 100644 index 0000000..c4ac310 --- /dev/null +++ b/unix_projects/decompress/decompress.depend @@ -0,0 +1 @@ +# depslib dependency file v1.0 diff --git a/unix_projects/decompress/decompress.layout b/unix_projects/decompress/decompress.layout new file mode 100644 index 0000000..0e578cf --- /dev/null +++ b/unix_projects/decompress/decompress.layout @@ -0,0 +1,13 @@ + + + + + + + + + + + + + diff --git a/unix_projects/fractal_image_compression.workspace b/unix_projects/fractal_image_compression.workspace new file mode 100644 index 0000000..a217cdb --- /dev/null +++ b/unix_projects/fractal_image_compression.workspace @@ -0,0 +1,7 @@ + + + + + + +