Skip to content

Commit ae5d6fe

Browse files
calc84maniacmateoconlechuga
authored andcommitted
Re-optimize count-trailing-zero libcalls based on PR feedback
1 parent 6e9a4c7 commit ae5d6fe

File tree

1 file changed

+41
-33
lines changed

1 file changed

+41
-33
lines changed

src/crt/cttz.src

+41-33
Original file line numberDiff line numberDiff line change
@@ -3,47 +3,53 @@
33
section .text
44
public __bcttz
55
__bcttz:
6-
tst a, 0Fh
6+
cp a, 1
7+
require __cttz_common
8+
9+
section .text
10+
private __cttz_common
11+
; Input: A=byte, CF=(A==0)
12+
; Output: A=cttz(A)
13+
__cttz_common:
14+
adc a, a
15+
add a, a
16+
jr z, .high2
17+
add a, a
18+
add a, a
719
jr z, .high4
8-
tst a, 3
20+
add a, a
21+
add a, a
922
jr z, .high6
10-
cpl
11-
and a, 1
23+
add a, a
24+
add a, a
25+
sbc a, -1
26+
ret p
27+
ld a, 8
1228
ret
1329
.high6:
14-
dec a
15-
and a, 6
16-
ret po
17-
rrca
30+
sbc a, -3
1831
ret
1932
.high4:
20-
tst a, 030h
21-
jr z, .high2
22-
dec a
23-
and a, 014h
24-
ret po
25-
ld a, 5
33+
sbc a, -5
2634
ret
2735
.high2:
28-
add a, a
29-
sbc a, -8
30-
ret p
31-
ld a, 6
36+
sbc a, -7
3237
ret
3338

3439
section .text
3540
public __scttz
3641
__scttz:
37-
ld a, l
38-
or a, a
39-
jr nz, __bcttz
40-
ld a, h
42+
xor a, a
43+
or a, l
44+
jr nz, __cttz_common
45+
sub a, h
46+
ccf
4147
require __scttz.hijack
4248

4349
section .text
4450
private __scttz.hijack
4551
__scttz.hijack:
46-
call __bcttz
52+
call __cttz_common
4753
add a, 8
4854
ret
4955

@@ -57,7 +63,7 @@ __icttz:
5763
private __icttz.hijack
5864
__icttz.hijack:
5965
or a, l
60-
jr nz, __bcttz
66+
jr nz, __cttz_common
6167
or a, h
6268
jr nz, __scttz.hijack
6369
dec sp
@@ -75,8 +81,9 @@ __lcttz:
7581
xor a, a
7682
sbc hl, de
7783
jr nz, __icttz.hijack
78-
ld a, e
79-
call __bcttz
84+
sub a, e
85+
ccf
86+
call __cttz_common
8087
add a, 24
8188
ret
8289

@@ -91,15 +98,16 @@ __llcttz:
9198
sbc hl, de
9299
jr nz, __i48cttz.hijack
93100
or a, c
94-
jr z, .high
95-
call __bcttz
96-
add a, 48
97-
ret
98-
.high:
99-
ld a, b
100-
call __bcttz
101+
jr nz, .low
102+
sub a, b
103+
ccf
104+
call __cttz_common
101105
add a, 56
102106
ret
107+
.low:
108+
call __cttz_common
109+
add a, 48
110+
ret
103111

104112
section .text
105113
public __i48cttz

0 commit comments

Comments
 (0)