From 510e29288eea68ca7611945ee060fad04794507c Mon Sep 17 00:00:00 2001 From: Michael M <12465142+Mike15678@users.noreply.github.com> Date: Tue, 29 May 2018 00:29:22 -0400 Subject: [PATCH] Major Improvements (#5) * Changes to code and build system for Linux support. - Makefile was altered to support the dynamic OpenCL lib with the default path for Ubuntu's ocl-icd-opencl-dev apt package. - Various changes in the source code to resolve -Wall -Werror build errors using cc 7.2.0. * Remove beginning newlines from affected .c and .cl source files * Include stdlib.h instead of malloc.h in ocl_util.c This allows compilation on macOS * Implement full macOS support * Include better compiling instructions and add some comments to the Makefile * Forgot to add an extra newline in the README * Fix two comments inside of the Makefile * Edit comments in Makefile * Edit comments in Makefile... again * Edit Makefile to provide proper static linking instructions * Minor edits to utils.c * Match wording in README * Better comments in Makefile * Small fix-ups in Makefile * Spelling correction * Update README.md * Edit some Makefile comments * Oops * Improve compiling instructions * Simple fix * Make sure output buffers are flushed --- Makefile | 36 +++++++++++++++++++++++-------- README.md | 43 +++++++++++++++++++++++++++++++------ aes_128.c | 2 -- bfcl.c | 2 +- cl/aes_128.cl | 1 - cl/aes_tables.cl | 2 -- cl/bcd.h | 1 - cl/common.h | 2 -- cl/dsi.h | 2 -- cl/kernel_console_id.cl | 2 -- cl/kernel_console_id_ds3.cl | 1 - cl/kernel_emmc_cid.cl | 2 -- cl/kernel_lfcs.cl | 2 -- cl/kernel_msky.cl | 2 -- cl/kernel_tests.cl | 2 -- cl/sha1_16.cl | 2 -- crypto.h | 1 - dsi.h | 2 -- ocl.h | 5 ++++- ocl_brute.c | 15 +++++++------ ocl_test.c | 4 ++-- ocl_util.c | 12 ++++++++--- sha1_16.c | 2 -- utils.c | 13 +++++------ utils.h | 2 -- 25 files changed, 94 insertions(+), 66 deletions(-) diff --git a/Makefile b/Makefile index 757ed6b..0b22218 100644 --- a/Makefile +++ b/Makefile @@ -1,14 +1,32 @@ -# only tested in mingw PNAME = bfcl OBJS = $(PNAME).o ocl_util.o utils.o sha1_16.o aes_128.o ocl_test.o ocl_brute.o -CFLAGS += -std=c11 -Wall -Werror -O2 -mrdrnd -I$(INTELOCLSDKROOT)/include -LDFLAGS += -L$(INTELOCLSDKROOT)/lib/x64 +ifdef SYSTEMROOT + # Intel's OpenCL SDK installer sets an environmental variable on Windows. + CFLAGS += -std=c11 -Wall -Werror -O2 -mrdrnd -I$(INTELOCLSDKROOT)\include + LDFLAGS += -L$(INTELOCLSDKROOT)\lib\x64 +else + ifeq ($(shell uname), Linux) + # Intel's OpenCL SDK installer doesn't set an environmenr variable on Linux, so we'll have to specify its default installation location instead. + CFLAGS += -std=c11 -Wall -Werror -O2 -mrdrnd -I/opt/intel/opencl-sdk/include + LDFLAGS += -L/opt/intel/opencl-sdk/lib64 + endif + ifeq ($(shell uname), Darwin) + # macOS's "ld" likes to warn you about library dirs not being found. That being said, macOS includes its own implementation of OpenCL. + CFLAGS += -std=c11 -Wall -Werror -O2 -mrdrnd + endif +endif -all : $(PNAME) +all: $(PNAME) -$(PNAME) : $(OBJS) - $(CC) $(LDFLAGS) -o $@ $^ -lOpenCL -static -lmbedcrypto - -clean : - rm $(PNAME) *.o +$(PNAME): $(OBJS) +ifeq ($(shell uname), Darwin) + $(CC) -o $@ $^ -framework OpenCL -lmbedcrypto +# If you want to use the mbedcrypto static library instead (on macOS), change "-lmbedcrypto" to "/usr/local/lib/libmbedcrypto.a" (or wherever else it may be) with the quotes. +else + $(CC) $(LDFLAGS) -o $@ $^ -lOpenCL -lmbedcrypto +# If you want to use the mbedcrypto static library instead (whether you're using MSYS2 or are on Linux), change "-lmbedcrypto" to "-l:libmbedcrypto.a" without the quotes. +# Note: Ubuntu (probably Debian as well) doesn't install "libmbedcrypto.a" through apt-get, thus you would have to compile mbedtls yourself. +endif +clean: + rm -f $(PNAME) *.o diff --git a/README.md b/README.md index 9124d46..fd09e9a 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,42 @@ # bfCL -This is an experimental port of [TWLbf](https://github.com/Jimmy-Z/TWLbf) to OpenCL. +This is an experimental port of [TWLbf](https://github.com/Jimmy-Z/TWLbf/) to OpenCL. -### Compile -Only tested with [mingw-w64-x86_64](https://mingw-w64.org/) -/[MSYS2](http://www.msys2.org/) -(and occasionally Visual Studio 2017 Community) and Intel OpenCL SDK. +## Compile +### Windows +Note: If you really want to use Virtual Studio 2017, you're going to probably have to change the Makefile a bit and compile [mbedtls](https://github.com/ARMmbed/mbedtls/) from source. +#### Requirements for compiling with MSYS2 +* **A 64-bit computer** +* [MSYS2](http://www.msys2.org/) (the x86_64 executable; **read its instructions on installing and setting up**) +* An `OpenCL.dll` or `OpenCL.lib` 64-bit library -### License +Note: `OpenCL.dll` can be found inside of your `C:\Windows\System32\` folder, but you may have to install your graphics card's drivers from your graphics card's vendor if it's not there. +You can alternatively install [Intel's OpenCL SDK](https://software.intel.com/intel-opencl/), but this requires you to agree to their TOS and takes up more space on your computer. +#### Instructions for compiling with MSYS2 +1. Close any open instances of MSYS2 (if applicable), then launch the `MSYS2 MinGW 64-bit` shortcut from the Windows Start Menu. +1. In the MSYS2 bash shell that appears, execute `pacman -Syu --needed mingw-w64-x86_64-gcc mingw-w64-x86_64-make mingw-w64-x86_64-mbedtls git` to download and install required packages. +1. If you're going to use the `OpenCL.dll` 64-bit library from your `C:\Windows\System32\` folder (in contrast to installing Intel's OpenCL SDK), copy it into your `msys64/mingw64/lib/` folder (your `msys64` folder is by default installed onto the root of your "C:" drive during the installation of MSYS2). Additionally, if you're going to use `OpenCL.dll`, in MSYS2, execute `git clone https://github.com/KhronosGroup/OpenCL-Headers.git && mv OpenCL-Headers/CL /mingw64/include/` to download and move the required OpenCL C headers folder. +1. In MSYS2, execute `git clone https://github.com/zoogie/bfCL.git && cd bfCL` to download bfCL and change your current directory to it. +1. In MSYS2, execute `mingw32-make` to compile bfCL (**OpenCL and mbedcrypto will both be dynamically linked!** Refer to the Makefile if you want to statically link mbedcrypto instead). +### Linux +#### Requirements for compiling on all Linux distros +* **A 64-bit computer** +#### Instructions for compiling on Debian-based Linux distros +Note: the **concept** is still applicable for all other Linux distros; e.g., some packages may have different names. +1. Open up the "Terminal" application. +1. In "Terminal", execute `sudo apt-get update && sudo apt-get install gcc git libmbedtls-dev make ocl-icd-opencl-dev` to download and install required packages (note that the "ocl-icd-opencl-dev" package includes both the OpenCL C headers and the OpenCL ICD Loader library). +1. After all of the packages have finished installing, in "Terminal", execute `git clone https://github.com/zoogie/bfCL.git && cd bfCL` to download bfCL and change your current directory to it. +1. In "Terminal", execute `make` to compile bfCL (**OpenCL and mbedcrypto will both be dynamically linked!** Refer to the Makefile if you want to statically link mbedcrypto instead). +### macOS +#### Requirements for compiling on macOS +* **An Intel-based 64-bit computer** +* [Homebrew](https://brew.sh/) (**Read its instructions on installing**; installing Homebrew also installs Xcode command-line tools, which is also needed) +#### Instructions for compiling on macOS +1. Open up the "Terminal" application through Launchpad. +1. In "Terminal", execute `brew update && brew install git mbedtls` to download and install required packages. +1. In "Terminal", execute `git clone https://github.com/zoogie/bfCL.git && cd bfCL` to download bfCL and change your current directory to it. +1. In "Terminal", execute `make` to compile bfCL (**OpenCL and mbedcrypto will both be dynamically linked!** Refer to the Makefile if you want to statically link mbedcrypto instead). + +## License AES and SHA1 code from [mbed TLS](https://github.com/ARMmbed/mbedtls/) which is Apache 2.0 license, so I guess this project becomes Apache 2.0 licensed automatically? or only related files are Apache 2.0? I'm not sure. - diff --git a/aes_128.c b/aes_128.c index 3466b3d..c237329 100644 --- a/aes_128.c +++ b/aes_128.c @@ -1,4 +1,3 @@ - #include #include #include @@ -88,4 +87,3 @@ void aes_encrypt_128_bulk(const unsigned char *in, unsigned char *out, unsigned out += AES_BLOCK_SIZE; } } - diff --git a/bfcl.c b/bfcl.c index 9c550e0..4d784cf 100644 --- a/bfcl.c +++ b/bfcl.c @@ -1,4 +1,4 @@ - +#include #include #include #include "utils.h" diff --git a/cl/aes_128.cl b/cl/aes_128.cl index fd1f62d..908d8c7 100644 --- a/cl/aes_128.cl +++ b/cl/aes_128.cl @@ -1,4 +1,3 @@ - /* AES 128 ECB dug out from mbed TLS 2.5.1 * https://github.com/ARMmbed/mbedtls/blob/development/include/mbedtls/aes.h * https://github.com/ARMmbed/mbedtls/blob/development/library/aes.c diff --git a/cl/aes_tables.cl b/cl/aes_tables.cl index 4d77134..07d75a1 100644 --- a/cl/aes_tables.cl +++ b/cl/aes_tables.cl @@ -1,4 +1,3 @@ - // I'm obsessed with the idea of generating this file in memory // on the other hand I know that's pointless... @@ -265,4 +264,3 @@ __constant static const uint32_t RCON[10] = 0x00000010, 0x00000020, 0x00000040, 0x00000080, 0x0000001B, 0x00000036 }; - diff --git a/cl/bcd.h b/cl/bcd.h index dc513f7..6aaf043 100644 --- a/cl/bcd.h +++ b/cl/bcd.h @@ -1,4 +1,3 @@ - #ifdef BCD inline u64 to_dsi_bcd(u64 i) { diff --git a/cl/common.h b/cl/common.h index 38ebfbd..e567d49 100644 --- a/cl/common.h +++ b/cl/common.h @@ -1,4 +1,3 @@ - typedef unsigned int uint32_t; typedef unsigned char u8; @@ -25,4 +24,3 @@ typedef unsigned long u64; (b)[(i) + 3] = (unsigned char) ( (n) ); \ } #endif - diff --git a/cl/dsi.h b/cl/dsi.h index 416b517..e314544 100644 --- a/cl/dsi.h +++ b/cl/dsi.h @@ -1,4 +1,3 @@ - // more about this: https://github.com/Jimmy-Z/TWLbf/blob/master/dsi.c __constant static const u64 DSi_KEY_Y[2] = @@ -70,4 +69,3 @@ inline void dsi_make_key(u64 *key, u64 console_id){ } rol42_128(key); } - diff --git a/cl/kernel_console_id.cl b/cl/kernel_console_id.cl index a070ce3..da45955 100644 --- a/cl/kernel_console_id.cl +++ b/cl/kernel_console_id.cl @@ -1,4 +1,3 @@ - // the caller should feed the target xor pad byte reversed as two uint64_t // the ctr from emmc_cid_sha1 byte reversed as 4 uint32_t __kernel void test_console_id( @@ -29,4 +28,3 @@ __kernel void test_console_id( *out = console_id; } } - diff --git a/cl/kernel_console_id_ds3.cl b/cl/kernel_console_id_ds3.cl index 343f06b..9ea98e5 100644 --- a/cl/kernel_console_id_ds3.cl +++ b/cl/kernel_console_id_ds3.cl @@ -1,4 +1,3 @@ - // dark_samus3's brilliant idea, doesn't need the EMMC CID beforehand // instead, use two known sectors to verify against each other // https://gbatemp.net/threads/twlbf-a-tool-to-brute-force-dsi-console-id-or-emmc-cid.481732/page-4#post-7661355 diff --git a/cl/kernel_emmc_cid.cl b/cl/kernel_emmc_cid.cl index 2ab0eaf..169ad5e 100644 --- a/cl/kernel_emmc_cid.cl +++ b/cl/kernel_emmc_cid.cl @@ -1,4 +1,3 @@ - __kernel void test_emmc_cid( u64 emmc_cid_l, u64 emmc_cid_h, u64 sha1_16_l, u64 sha1_16_h, @@ -16,4 +15,3 @@ __kernel void test_emmc_cid( *out = get_global_id(0); } } - diff --git a/cl/kernel_lfcs.cl b/cl/kernel_lfcs.cl index 8e427f4..dd748f7 100644 --- a/cl/kernel_lfcs.cl +++ b/cl/kernel_lfcs.cl @@ -1,4 +1,3 @@ - __kernel void test_lfcs( u32 lfcs, u16 newflag, u32 v0, u32 v1, @@ -23,4 +22,3 @@ __kernel void test_lfcs( *out = gid; } } - diff --git a/cl/kernel_msky.cl b/cl/kernel_msky.cl index 008a540..689aa14 100644 --- a/cl/kernel_msky.cl +++ b/cl/kernel_msky.cl @@ -1,4 +1,3 @@ - __kernel void test_msky( u32 k0, u32 k1, u32 k2, u32 k3, u32 v0, u32 v1, u32 v2, u32 v3, @@ -17,4 +16,3 @@ __kernel void test_msky( *out = k2; } } - diff --git a/cl/kernel_tests.cl b/cl/kernel_tests.cl index 05b68f4..aeee79f 100644 --- a/cl/kernel_tests.cl +++ b/cl/kernel_tests.cl @@ -1,4 +1,3 @@ - __kernel void sha1_16_test( __global const uint32_t *in, __global uint32_t *out) @@ -70,4 +69,3 @@ __kernel void aes_dec_128_test( } #endif } - diff --git a/cl/sha1_16.cl b/cl/sha1_16.cl index 7ec97be..8ca8f99 100644 --- a/cl/sha1_16.cl +++ b/cl/sha1_16.cl @@ -1,4 +1,3 @@ - // sha1_16 adapted for OpenCL, see "sha1_16.c" for more information void sha1_16(unsigned char *io) @@ -153,4 +152,3 @@ void sha1_16(unsigned char *io) PUT_UINT32_BE(C, io, 8); PUT_UINT32_BE(D, io, 12); } - diff --git a/crypto.h b/crypto.h index 42445d9..eec2349 100644 --- a/crypto.h +++ b/crypto.h @@ -1,4 +1,3 @@ - #pragma once // definition in sha1_16.c diff --git a/dsi.h b/dsi.h index f6cbb25..de2a273 100644 --- a/dsi.h +++ b/dsi.h @@ -1,4 +1,3 @@ - #include typedef uint8_t u8; @@ -116,4 +115,3 @@ static inline void dsi_make_xor(u8 *xor, const u8 *src, const u8 *ver) { xor_128((u64*)target_xor, (u64*)src, (u64*)ver); byte_reverse_16(xor, target_xor); } - diff --git a/ocl.h b/ocl.h index 9dfed73..a708ae3 100644 --- a/ocl.h +++ b/ocl.h @@ -1,7 +1,11 @@ #pragma once #define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#ifdef __APPLE__ +#include +#else #include +#endif typedef struct{ cl_device_id id; @@ -50,4 +54,3 @@ void ocl_get_device(cl_platform_id *p_platform_id, cl_device_id *p_device_id); cl_program ocl_build_from_sources( unsigned num_sources, const char *source_names[], cl_context context, cl_device_id device_id, const char * options); - diff --git a/ocl_brute.c b/ocl_brute.c index 84412e8..bbb610b 100644 --- a/ocl_brute.c +++ b/ocl_brute.c @@ -1,4 +1,4 @@ - +#include #include #include #include "utils.h" @@ -170,7 +170,7 @@ int ocl_brute_console_id(const cl_uchar *console_id, const cl_uchar *emmc_cid, } else { console_id |= (u64)i << group_bits; } - printf("%016"LL"x\n", console_id); + printf("%016"LL"x\n", (unsigned long long) console_id); OCL_ASSERT(clSetKernelArg(kernel, 0, sizeof(cl_ulong), &console_id)); OCL_ASSERT(clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &num_items, &local, 0, NULL, NULL)); @@ -179,7 +179,7 @@ int ocl_brute_console_id(const cl_uchar *console_id, const cl_uchar *emmc_cid, OCL_ASSERT(clEnqueueReadBuffer(command_queue, mem_out, CL_TRUE, 0, sizeof(cl_ulong), &out, 0, NULL, NULL)); if (out) { get_hp_time(&t1); td = hp_time_diff(&t0, &t1); - printf("got a hit: %016"LL"x\n", out); + printf("got a hit: %016"LL"x\n", (unsigned long long) out); // also write to a file dump_to_file(emmc_cid ? hexdump(emmc_cid, 16, 0) : hexdump(src0, 16, 0), &out, 8); break; @@ -372,6 +372,7 @@ int ocl_brute_msky(const cl_uint *msky, const cl_uint *ver, cl_uint msky_offset) int msky3_offset = (j & 1 ? 1 : -1) * ((j + 1) >> 1); cl_uint msky3 = msky[3] + msky3_offset; printf("msed3:%08x offset:%d \r", msky3, msky3_offset); + fflush(stdout); for (i = 0; i < loops; ++i) { cl_uint msky2 = i << group_bits; OCL_ASSERT(clSetKernelArg(kernel, 2, sizeof(cl_uint), &msky2)); @@ -493,7 +494,7 @@ int ocl_brute_lfcs(cl_uint lfcs_template, cl_ushort newflag, const cl_uint *ver, int fan = (j & 1 ? 1 : -1) * ((j + 1) >> 1); if(fan > 0){ //check to see if bf exhausted in both directions, quit if so - if( lfcs_block + fan > upper_bound && (int)lfcs_block - fan < lower_bound){ + if( lfcs_block + fan > upper_bound && (int)lfcs_block - fan < lower_bound){ printf("Exhausted all possible lfcs combinations, exiting ...\n\n"); break; } @@ -504,6 +505,7 @@ int ocl_brute_lfcs(cl_uint lfcs_template, cl_ushort newflag, const cl_uint *ver, } printf("%d \r", fan); + fflush(stdout); for (i = 0; i < loops; ++i) { cl_uint lfcs = lfcs_template + fan * 0x10000 + (i << (group_bits - 16)); OCL_ASSERT(clSetKernelArg(kernel, 0, sizeof(cl_uint), &lfcs)); @@ -531,13 +533,13 @@ int ocl_brute_lfcs(cl_uint lfcs_template, cl_ushort newflag, const cl_uint *ver, printf("movable_part1.sed dumped to file\n"); printf("don't you dare forget to add the id0 to it!\n"); } - + printf("done.\n\n"); break; } } - + if (out) { break; } @@ -559,4 +561,3 @@ int ocl_brute_lfcs(cl_uint lfcs_template, cl_ushort newflag, const cl_uint *ver, clReleaseContext(context); return !out; } - diff --git a/ocl_test.c b/ocl_test.c index 8f45c2a..bb37b9c 100644 --- a/ocl_test.c +++ b/ocl_test.c @@ -1,4 +1,4 @@ - +#include #include #include "utils.h" #include "ocl.h" @@ -86,7 +86,7 @@ int ocl_test() { if(cpu_has_rdrand()){ // ~190 MB/s @ X230, ~200 without the success check printf("randomize source buffer using RDRAND\n"); - if (!rdrand_fill((cl_ulong*)buf_in, BUF_SIZE >> 3)) { + if (!rdrand_fill((unsigned long long*)buf_in, BUF_SIZE >> 3)) { printf("RDRND failed\n"); exit(-1); } diff --git a/ocl_util.c b/ocl_util.c index 4d6cae8..3e2a3c9 100644 --- a/ocl_util.c +++ b/ocl_util.c @@ -1,8 +1,12 @@ - +#include #include #include -#include +#include +#ifdef __APPLE__ +#include +#else #include +#endif #include "ocl.h" #include "utils.h" @@ -28,8 +32,11 @@ const char * ocl_err_msg(cl_int error_code) { return "out of resources"; case CL_OUT_OF_HOST_MEMORY: return "out of host memory"; + /* Apple's OpenCL implementation for some reason doesn't have the following identifier declared. */ + #ifndef __APPLE__ case CL_PLATFORM_NOT_FOUND_KHR: return "platform not found"; + #endif case CL_INVALID_WORK_GROUP_SIZE: return "invalid work group size"; default: @@ -243,4 +250,3 @@ cl_program ocl_build_from_sources( free(source_sizes); return program; } - diff --git a/sha1_16.c b/sha1_16.c index 9e062d7..4c608e5 100644 --- a/sha1_16.c +++ b/sha1_16.c @@ -1,4 +1,3 @@ - #include /* sha1_16 @@ -184,4 +183,3 @@ void sha1_16(const unsigned char in[16], unsigned char out[16]) { PUT_UINT32_BE(C, out, 8); PUT_UINT32_BE(D, out, 12); } - diff --git a/utils.c b/utils.c index 44b6d34..99f7a76 100644 --- a/utils.c +++ b/utils.c @@ -1,4 +1,3 @@ - #include #include #include @@ -81,9 +80,9 @@ void get_hp_time(struct timeval *pt) { } long long hp_time_diff(struct timeval *pt0, struct timeval *pt1) { - long long diff = pt1.tv_sec - pt0.tv_sec; + long long diff = pt1->tv_sec - pt0->tv_sec; diff *= 1000000; - diff += pt1.tv_usec - pt0.tv_usec; + diff += pt1->tv_usec - pt0->tv_usec; return diff; } @@ -93,14 +92,17 @@ long long hp_time_diff(struct timeval *pt0, struct timeval *pt1) { char * read_file(const char *file_name, size_t *p_size) { FILE * f = fopen(file_name, "rb"); if (f == NULL) { - fprintf(stderr, "can't read file: %s", file_name); + fprintf(stderr, "can't read file: %s\n", file_name); exit(-1); } fseek(f, 0, SEEK_END); *p_size = ftell(f); char * buf = malloc(*p_size); fseek(f, 0, SEEK_SET); - fread(buf, *p_size, 1, f); + if (fread(buf, *p_size, 1, f) != 1) { + fprintf(stderr, "error durring fread\n"); + exit(-1); + } fclose(f); return buf; } @@ -176,4 +178,3 @@ char * trim(char *in) { } return first_non_ws; } - diff --git a/utils.h b/utils.h index 5abdc96..5df05de 100644 --- a/utils.h +++ b/utils.h @@ -1,4 +1,3 @@ - #pragma once // a crude cross Windows/POSIX high precision timer @@ -37,4 +36,3 @@ int cpu_has_rdrand(); int rdrand_fill(unsigned long long *p, size_t size); char * trim(char *in); -