diff --git a/Makefile b/Makefile index 0b22218..6796569 100644 --- a/Makefile +++ b/Makefile @@ -6,12 +6,12 @@ ifdef SYSTEMROOT LDFLAGS += -L$(INTELOCLSDKROOT)\lib\x64 else ifeq ($(shell uname), Linux) - # Intel's OpenCL SDK installer doesn't set an environmenr variable on Linux, so we'll have to specify its default installation location instead. + # Intel's OpenCL SDK installer doesn't set an environment variable on Linux, so we'll have to specify its default installation location instead. CFLAGS += -std=c11 -Wall -Werror -O2 -mrdrnd -I/opt/intel/opencl-sdk/include LDFLAGS += -L/opt/intel/opencl-sdk/lib64 endif ifeq ($(shell uname), Darwin) - # macOS's "ld" likes to warn you about library dirs not being found. That being said, macOS includes its own implementation of OpenCL. + # macOS's linker likes to warn you about library dirs not being found. That being said, macOS includes its own implementation of OpenCL, so CFLAGS and LDFLAGS are not needed. CFLAGS += -std=c11 -Wall -Werror -O2 -mrdrnd endif endif @@ -21,7 +21,7 @@ all: $(PNAME) $(PNAME): $(OBJS) ifeq ($(shell uname), Darwin) $(CC) -o $@ $^ -framework OpenCL -lmbedcrypto -# If you want to use the mbedcrypto static library instead (on macOS), change "-lmbedcrypto" to "/usr/local/lib/libmbedcrypto.a" (or wherever else it may be) with the quotes. +# If you want to use the mbedcrypto static library instead (on macOS), change "-lmbedcrypto" to "/usr/local/lib/libmbedcrypto.a" (if you downloaded mbedtls through Homebrew) with the quotes. else $(CC) $(LDFLAGS) -o $@ $^ -lOpenCL -lmbedcrypto # If you want to use the mbedcrypto static library instead (whether you're using MSYS2 or are on Linux), change "-lmbedcrypto" to "-l:libmbedcrypto.a" without the quotes. @@ -29,4 +29,4 @@ else endif clean: - rm -f $(PNAME) *.o + rm -f $(PNAME) *.o \ No newline at end of file diff --git a/bfcl.c b/bfcl.c index 4d784cf..6224ebe 100644 --- a/bfcl.c +++ b/bfcl.c @@ -49,7 +49,7 @@ int main(int argc, const char *argv[]) { puts(invalid_parameters); ret = -1; } - } else if (argc == 7) { + } else if (argc == 7 && strcmp(argv[1], "lfcs")) { unsigned char console_id[8], emmc_cid[16], offset[2], src[16], ver[16]; hex2bytes(console_id, 8, argv[2], 1); hex2bytes(emmc_cid, 16, argv[3], 1); @@ -69,19 +69,53 @@ int main(int argc, const char *argv[]) { puts(invalid_parameters); ret = -1; } - } else if(argc == 5 && !strcmp(argv[1], "msky")){ + } else if(argc == 6 && !strcmp(argv[1], "msky") && !strcmp(argv[5], "sws")){ // "sws" is standard work size uint32_t msky[4], ver[4], msky_offset; hex2bytes((unsigned char*)msky, 16, argv[2], 1); hex2bytes((unsigned char*)ver, 16, argv[3], 1); hex2bytes((unsigned char*)&msky_offset, 4, argv[4], 1); + group_bits = 28; ret = ocl_brute_msky(msky, ver, msky_offset); - } else if(argc == 6 && !strcmp(argv[1], "lfcs")){ + } else if(argc == 6 && !strcmp(argv[1], "msky") && !strcmp(argv[5], "rws")){ // "rws" is reduced work size + uint32_t msky[4], ver[4], msky_offset; + hex2bytes((unsigned char*)msky, 16, argv[2], 1); + hex2bytes((unsigned char*)ver, 16, argv[3], 1); + hex2bytes((unsigned char*)&msky_offset, 4, argv[4], 1); + group_bits = 20; + ret = ocl_brute_msky(msky, ver, msky_offset); + } else if(argc == 5 && !strcmp(argv[1], "msky")){ // In the event a work size argument isn't provided, the standard work size shall be used. + uint32_t msky[4], ver[4], msky_offset; + hex2bytes((unsigned char*)msky, 16, argv[2], 1); + hex2bytes((unsigned char*)ver, 16, argv[3], 1); + hex2bytes((unsigned char*)&msky_offset, 4, argv[4], 1); + group_bits = 28; + ret = ocl_brute_msky(msky, ver, msky_offset); + } else if(argc == 7 && !strcmp(argv[1], "lfcs") && !strcmp(argv[6], "sws")){ // "sws" is standard work size + uint32_t lfcs, ver[2], lfcs_offset; + uint16_t newflag; + hex2bytes((unsigned char*)&lfcs, 4, argv[2], 1); + hex2bytes((unsigned char*)&newflag, 2, argv[3], 1); + hex2bytes((unsigned char*)ver, 8, argv[4], 1); + hex2bytes((unsigned char*)&lfcs_offset, 4, argv[5], 1); + group_bits = 28; + ret = ocl_brute_lfcs(lfcs, newflag, ver, lfcs_offset); + } else if(argc == 7 && !strcmp(argv[1], "lfcs") && !strcmp(argv[6], "rws")){ // "rws" is reduced work size + uint32_t lfcs, ver[2], lfcs_offset; + uint16_t newflag; + hex2bytes((unsigned char*)&lfcs, 4, argv[2], 1); + hex2bytes((unsigned char*)&newflag, 2, argv[3], 1); + hex2bytes((unsigned char*)ver, 8, argv[4], 1); + hex2bytes((unsigned char*)&lfcs_offset, 4, argv[5], 1); + group_bits = 20; + ret = ocl_brute_lfcs(lfcs, newflag, ver, lfcs_offset); + } else if(argc == 6 && !strcmp(argv[1], "lfcs")){ // In the event a work size argument isn't provided, the standard work size shall be used. uint32_t lfcs, ver[2], lfcs_offset; uint16_t newflag; hex2bytes((unsigned char*)&lfcs, 4, argv[2], 1); hex2bytes((unsigned char*)&newflag, 2, argv[3], 1); hex2bytes((unsigned char*)ver, 8, argv[4], 1); hex2bytes((unsigned char*)&lfcs_offset, 4, argv[5], 1); + group_bits = 28; ret = ocl_brute_lfcs(lfcs, newflag, ver, lfcs_offset); } else { printf(invalid_parameters); diff --git a/cl/dsi.h b/cl/dsi.h index e314544..86fdddd 100644 --- a/cl/dsi.h +++ b/cl/dsi.h @@ -1,10 +1,10 @@ // more about this: https://github.com/Jimmy-Z/TWLbf/blob/master/dsi.c __constant static const u64 DSi_KEY_Y[2] = - {0xbd4dc4d30ab9dc76ull, 0xe1a00005202ddd1dull}; + {0xbd4dc4d30ab9dc76ul, 0xe1a00005202ddd1dul}; __constant static const u64 DSi_KEY_MAGIC[2] = - {0x2a680f5f1a4f3e79ull, 0xfffefb4e29590258ull}; + {0x2a680f5f1a4f3e79ul, 0xfffefb4e29590258ul}; // CAUTION this one doesn't work in-place inline void byte_reverse_16(u8 *out, const u8 *in){ diff --git a/ocl_brute.c b/ocl_brute.c index bbb610b..0f7bc09 100644 --- a/ocl_brute.c +++ b/ocl_brute.c @@ -130,7 +130,8 @@ int ocl_brute_console_id(const cl_uchar *console_id, const cl_uchar *emmc_cid, // I wish we could use 1e10 in C, counting 0 is not good to your eye total = from_bcd(1ull << 40); // work items variations on lower bits per enqueue, 8 + 1 digits, including the known digit - group_bits = 36; + // reduced from 36 to 28 to make nvidia runtime happy + group_bits = 28; // work items per enqueue, don't count the known digit here num_items = from_bcd(1ull << (group_bits - 4)); // between the template bits and group bits, it's the loop bits @@ -350,7 +351,6 @@ int ocl_brute_msky(const cl_uint *msky, const cl_uint *ver, cl_uint msky_offset) OCL_ASSERT(clEnqueueWriteBuffer(command_queue, mem_out, CL_TRUE, 0, sizeof(cl_uint), &out, 0, NULL, NULL)); unsigned brute_bits = 32; - unsigned group_bits = 28; unsigned loop_bits = brute_bits - group_bits; unsigned loops = 1ull << loop_bits; size_t num_items = 1ull << group_bits; @@ -471,7 +471,6 @@ int ocl_brute_lfcs(cl_uint lfcs_template, cl_ushort newflag, const cl_uint *ver, OCL_ASSERT(clEnqueueWriteBuffer(command_queue, mem_out, CL_TRUE, 0, sizeof(cl_uint), &out, 0, NULL, NULL)); unsigned brute_bits = 32; - unsigned group_bits = 28; unsigned loop_bits = brute_bits - group_bits; unsigned loops = 1ull << loop_bits; size_t num_items = 1ull << group_bits; diff --git a/ocl_brute.h b/ocl_brute.h index 3cf4da0..a583993 100644 --- a/ocl_brute.h +++ b/ocl_brute.h @@ -18,3 +18,4 @@ int ocl_brute_msky(const cl_uint *msky, const cl_uint *ver, cl_uint msky_offset) int ocl_brute_lfcs(cl_uint lfcs_template, cl_ushort newflag, const cl_uint *ver, cl_uint lfcs_offset); +unsigned group_bits; \ No newline at end of file diff --git a/ocl_util.c b/ocl_util.c index 3e2a3c9..3c98bd1 100644 --- a/ocl_util.c +++ b/ocl_util.c @@ -189,8 +189,10 @@ void ocl_get_device(cl_platform_id *p_platform_id, cl_device_id *p_device_id) { && devices[j].c_avail == CL_TRUE){ cl_ulong cap = 1ull * devices[j].max_compute_units * devices[j].freq; // unfortunately that metric is not comparable between different vendors - if (strstr((const char*)devices[j].name, "Intel") == 0) { + if (strstr((const char*)devices[j].vendor, "Advanced Micro Devices") != 0) { cap *= 64; + } else if(strstr((const char*)devices[j].vendor, "NVIDIA") != 0) { + cap *= 128; } if (cap > maximum) { maximum = cap;