Skip to content

Commit

Permalink
Add work size argument (zoogie#6)
Browse files Browse the repository at this point in the history
* reduced work size to make nvidia runtime happy
give nvidia GPU more weight

* Minor fixes to Makefile comments

* Add arguments that specify type of work size
Applicable to LFCS and MSKY mining

* Allow work size argument to be optional
I still highly advise one is provided
  • Loading branch information
Mike15678 authored and zoogie committed Jun 12, 2018
1 parent 510e292 commit be32f01
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 13 deletions.
8 changes: 4 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ ifdef SYSTEMROOT
LDFLAGS += -L$(INTELOCLSDKROOT)\lib\x64
else
ifeq ($(shell uname), Linux)
# Intel's OpenCL SDK installer doesn't set an environmenr variable on Linux, so we'll have to specify its default installation location instead.
# Intel's OpenCL SDK installer doesn't set an environment variable on Linux, so we'll have to specify its default installation location instead.
CFLAGS += -std=c11 -Wall -Werror -O2 -mrdrnd -I/opt/intel/opencl-sdk/include
LDFLAGS += -L/opt/intel/opencl-sdk/lib64
endif
ifeq ($(shell uname), Darwin)
# macOS's "ld" likes to warn you about library dirs not being found. That being said, macOS includes its own implementation of OpenCL.
# macOS's linker likes to warn you about library dirs not being found. That being said, macOS includes its own implementation of OpenCL, so CFLAGS and LDFLAGS are not needed.
CFLAGS += -std=c11 -Wall -Werror -O2 -mrdrnd
endif
endif
Expand All @@ -21,12 +21,12 @@ all: $(PNAME)
$(PNAME): $(OBJS)
ifeq ($(shell uname), Darwin)
$(CC) -o $@ $^ -framework OpenCL -lmbedcrypto
# If you want to use the mbedcrypto static library instead (on macOS), change "-lmbedcrypto" to "/usr/local/lib/libmbedcrypto.a" (or wherever else it may be) with the quotes.
# If you want to use the mbedcrypto static library instead (on macOS), change "-lmbedcrypto" to "/usr/local/lib/libmbedcrypto.a" (if you downloaded mbedtls through Homebrew) with the quotes.
else
$(CC) $(LDFLAGS) -o $@ $^ -lOpenCL -lmbedcrypto
# If you want to use the mbedcrypto static library instead (whether you're using MSYS2 or are on Linux), change "-lmbedcrypto" to "-l:libmbedcrypto.a" without the quotes.
# Note: Ubuntu (probably Debian as well) doesn't install "libmbedcrypto.a" through apt-get, thus you would have to compile mbedtls yourself.
endif

clean:
rm -f $(PNAME) *.o
rm -f $(PNAME) *.o
40 changes: 37 additions & 3 deletions bfcl.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ int main(int argc, const char *argv[]) {
puts(invalid_parameters);
ret = -1;
}
} else if (argc == 7) {
} else if (argc == 7 && strcmp(argv[1], "lfcs")) {
unsigned char console_id[8], emmc_cid[16], offset[2], src[16], ver[16];
hex2bytes(console_id, 8, argv[2], 1);
hex2bytes(emmc_cid, 16, argv[3], 1);
Expand All @@ -69,19 +69,53 @@ int main(int argc, const char *argv[]) {
puts(invalid_parameters);
ret = -1;
}
} else if(argc == 5 && !strcmp(argv[1], "msky")){
} else if(argc == 6 && !strcmp(argv[1], "msky") && !strcmp(argv[5], "sws")){ // "sws" is standard work size
uint32_t msky[4], ver[4], msky_offset;
hex2bytes((unsigned char*)msky, 16, argv[2], 1);
hex2bytes((unsigned char*)ver, 16, argv[3], 1);
hex2bytes((unsigned char*)&msky_offset, 4, argv[4], 1);
group_bits = 28;
ret = ocl_brute_msky(msky, ver, msky_offset);
} else if(argc == 6 && !strcmp(argv[1], "lfcs")){
} else if(argc == 6 && !strcmp(argv[1], "msky") && !strcmp(argv[5], "rws")){ // "rws" is reduced work size
uint32_t msky[4], ver[4], msky_offset;
hex2bytes((unsigned char*)msky, 16, argv[2], 1);
hex2bytes((unsigned char*)ver, 16, argv[3], 1);
hex2bytes((unsigned char*)&msky_offset, 4, argv[4], 1);
group_bits = 20;
ret = ocl_brute_msky(msky, ver, msky_offset);
} else if(argc == 5 && !strcmp(argv[1], "msky")){ // In the event a work size argument isn't provided, the standard work size shall be used.
uint32_t msky[4], ver[4], msky_offset;
hex2bytes((unsigned char*)msky, 16, argv[2], 1);
hex2bytes((unsigned char*)ver, 16, argv[3], 1);
hex2bytes((unsigned char*)&msky_offset, 4, argv[4], 1);
group_bits = 28;
ret = ocl_brute_msky(msky, ver, msky_offset);
} else if(argc == 7 && !strcmp(argv[1], "lfcs") && !strcmp(argv[6], "sws")){ // "sws" is standard work size
uint32_t lfcs, ver[2], lfcs_offset;
uint16_t newflag;
hex2bytes((unsigned char*)&lfcs, 4, argv[2], 1);
hex2bytes((unsigned char*)&newflag, 2, argv[3], 1);
hex2bytes((unsigned char*)ver, 8, argv[4], 1);
hex2bytes((unsigned char*)&lfcs_offset, 4, argv[5], 1);
group_bits = 28;
ret = ocl_brute_lfcs(lfcs, newflag, ver, lfcs_offset);
} else if(argc == 7 && !strcmp(argv[1], "lfcs") && !strcmp(argv[6], "rws")){ // "rws" is reduced work size
uint32_t lfcs, ver[2], lfcs_offset;
uint16_t newflag;
hex2bytes((unsigned char*)&lfcs, 4, argv[2], 1);
hex2bytes((unsigned char*)&newflag, 2, argv[3], 1);
hex2bytes((unsigned char*)ver, 8, argv[4], 1);
hex2bytes((unsigned char*)&lfcs_offset, 4, argv[5], 1);
group_bits = 20;
ret = ocl_brute_lfcs(lfcs, newflag, ver, lfcs_offset);
} else if(argc == 6 && !strcmp(argv[1], "lfcs")){ // In the event a work size argument isn't provided, the standard work size shall be used.
uint32_t lfcs, ver[2], lfcs_offset;
uint16_t newflag;
hex2bytes((unsigned char*)&lfcs, 4, argv[2], 1);
hex2bytes((unsigned char*)&newflag, 2, argv[3], 1);
hex2bytes((unsigned char*)ver, 8, argv[4], 1);
hex2bytes((unsigned char*)&lfcs_offset, 4, argv[5], 1);
group_bits = 28;
ret = ocl_brute_lfcs(lfcs, newflag, ver, lfcs_offset);
} else {
printf(invalid_parameters);
Expand Down
4 changes: 2 additions & 2 deletions cl/dsi.h
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
// more about this: https://github.com/Jimmy-Z/TWLbf/blob/master/dsi.c

__constant static const u64 DSi_KEY_Y[2] =
{0xbd4dc4d30ab9dc76ull, 0xe1a00005202ddd1dull};
{0xbd4dc4d30ab9dc76ul, 0xe1a00005202ddd1dul};

__constant static const u64 DSi_KEY_MAGIC[2] =
{0x2a680f5f1a4f3e79ull, 0xfffefb4e29590258ull};
{0x2a680f5f1a4f3e79ul, 0xfffefb4e29590258ul};

// CAUTION this one doesn't work in-place
inline void byte_reverse_16(u8 *out, const u8 *in){
Expand Down
5 changes: 2 additions & 3 deletions ocl_brute.c
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,8 @@ int ocl_brute_console_id(const cl_uchar *console_id, const cl_uchar *emmc_cid,
// I wish we could use 1e10 in C, counting 0 is not good to your eye
total = from_bcd(1ull << 40);
// work items variations on lower bits per enqueue, 8 + 1 digits, including the known digit
group_bits = 36;
// reduced from 36 to 28 to make nvidia runtime happy
group_bits = 28;
// work items per enqueue, don't count the known digit here
num_items = from_bcd(1ull << (group_bits - 4));
// between the template bits and group bits, it's the loop bits
Expand Down Expand Up @@ -350,7 +351,6 @@ int ocl_brute_msky(const cl_uint *msky, const cl_uint *ver, cl_uint msky_offset)
OCL_ASSERT(clEnqueueWriteBuffer(command_queue, mem_out, CL_TRUE, 0, sizeof(cl_uint), &out, 0, NULL, NULL));

unsigned brute_bits = 32;
unsigned group_bits = 28;
unsigned loop_bits = brute_bits - group_bits;
unsigned loops = 1ull << loop_bits;
size_t num_items = 1ull << group_bits;
Expand Down Expand Up @@ -471,7 +471,6 @@ int ocl_brute_lfcs(cl_uint lfcs_template, cl_ushort newflag, const cl_uint *ver,
OCL_ASSERT(clEnqueueWriteBuffer(command_queue, mem_out, CL_TRUE, 0, sizeof(cl_uint), &out, 0, NULL, NULL));

unsigned brute_bits = 32;
unsigned group_bits = 28;
unsigned loop_bits = brute_bits - group_bits;
unsigned loops = 1ull << loop_bits;
size_t num_items = 1ull << group_bits;
Expand Down
1 change: 1 addition & 0 deletions ocl_brute.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ int ocl_brute_msky(const cl_uint *msky, const cl_uint *ver, cl_uint msky_offset)

int ocl_brute_lfcs(cl_uint lfcs_template, cl_ushort newflag, const cl_uint *ver, cl_uint lfcs_offset);

unsigned group_bits;
4 changes: 3 additions & 1 deletion ocl_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,10 @@ void ocl_get_device(cl_platform_id *p_platform_id, cl_device_id *p_device_id) {
&& devices[j].c_avail == CL_TRUE){
cl_ulong cap = 1ull * devices[j].max_compute_units * devices[j].freq;
// unfortunately that metric is not comparable between different vendors
if (strstr((const char*)devices[j].name, "Intel") == 0) {
if (strstr((const char*)devices[j].vendor, "Advanced Micro Devices") != 0) {
cap *= 64;
} else if(strstr((const char*)devices[j].vendor, "NVIDIA") != 0) {
cap *= 128;
}
if (cap > maximum) {
maximum = cap;
Expand Down

0 comments on commit be32f01

Please sign in to comment.