@@ -228,7 +228,27 @@ extern uint64 XIANG_Mult[90];
228
228
229
229
#endif
230
230
231
+ #if __arm64__
232
+ #define m128_from_2u64 (data0 ,data1 ) (vcombine_u64(vcreate_u64(data1), vcreate_u64(data0)))
231
233
234
+ #define m_not (bb ) vandq_u64(~bb, ALL_B90)
235
+
236
+ #define m_xor (xmm1 ,xmm2 ) veorq_u64(xmm1, xmm2)
237
+
238
+ #define m_and (xmm1 ,xmm2 ) vandq_u64(xmm1, xmm2)
239
+
240
+ #define m_or (xmm1 ,xmm2 ) vorrq_u64(xmm1, xmm2)
241
+
242
+
243
+ #define BB_u64 (bb ,i ) vgetq_lane_u64(bb, i)
244
+ #define BB_i64 (bb ,i ) vgetq_lane_s64(bb, i)
245
+
246
+ #define set_mask_bb (s ) SetMaskBB[s]
247
+ #define clear_mask_bb (s ) ClearMaskBB[s]
248
+
249
+
250
+ #define M128_get_Or64 (bb ) (BB_u64(bb, 1) | BB_u64(bb, 0))
251
+ #else
232
252
#define m128_from_2u64 (bb ,data0 ,data1 ) (bb = _mm_set_epi64x(data0,data1))
233
253
234
254
#define m_not (bb ) _mm_andnot_si128(bb,ALL_B90)
@@ -248,6 +268,7 @@ extern uint64 XIANG_Mult[90];
248
268
249
269
250
270
#define M128_get_Or64 (bb ) (_mm_extract_epi64(bb,1) | _mm_extract_epi64(bb,0))
271
+ #endif
251
272
//////////////////////////////////////////////////////////////////////////
252
273
#define transform_bbm (bb ,magic64 ,bits ) ((M128_get_Or64(bb) * (magic64)) >> (bits))
253
274
#define transform_mul (bb ,magic64 ,bits ) ((M128_get_Or64(bb) * (magic64)) >> (64 - (bits)))
@@ -264,7 +285,16 @@ extern uint64 XIANG_Mult[90];
264
285
#define transform_bba (bb ,magic64 ,bits ) ((M128_get_Or64(bb) * (magic64)) >> (bits));
265
286
#define get_transfrom_u64 (bb ,magic64 ) (M128_get_Or64(bb) * (magic64))
266
287
288
+ #ifdef __arm64__
289
+ __inline int count_1s (Bitboard bb ){
290
+ return (int ) vaddvq_u8 (vcntq_u8 (bb ));
291
+ }
267
292
293
+ __inline int count_1s (Bitboard b1 , Bitboard b2 ){
294
+ Bitboard bb = m_and (b1 , b2 );
295
+ return (int ) vaddvq_u8 (vcntq_u8 (bb ));
296
+ }
297
+ #else
268
298
__inline int count_1s (Bitboard bb ){
269
299
return (_mm_popcnt_u64 (_mm_extract_epi64 (bb , 0 )) + _mm_popcnt_u64 (_mm_extract_epi64 (bb , 1 )));
270
300
}
@@ -273,9 +303,117 @@ __inline int count_1s(Bitboard b1, Bitboard b2){
273
303
Bitboard bb = m_and (b1 , b2 );
274
304
return (_mm_popcnt_u64 (_mm_extract_epi64 (bb , 0 )) + _mm_popcnt_u64 (_mm_extract_epi64 (bb , 1 )));
275
305
}
306
+ #endif
276
307
277
308
278
309
//__
310
+ #ifdef __arm64__
311
+
312
+ __inline uint64 m_have_bit (Bitboard bb ) {
313
+ return (BB_u64 (bb , 0 ) | BB_u64 (bb , 1 ));
314
+ }
315
+
316
+ FORCE_INLINE bool m128_is_same (Bitboard & b1 , Bitboard & b2 ) {
317
+ uint64x2_t cmp = vceqq_u64 (b1 , b2 );
318
+ return (BB_u64 (cmp , 0 ) == 0xFFFFFFFFFFFFFFFF ) && (BB_u64 (cmp , 1 ) == 0xFFFFFFFFFFFFFFFF );
319
+ }
320
+
321
+ __inline uint64 have_bit (Bitboard b1 , Bitboard b2 ){
322
+ Bitboard bb = vandq_u64 (b1 , b2 );
323
+ return (BB_u64 (bb , 0 ) | BB_u64 (bb , 1 ));
324
+ }
325
+
326
+ __inline uint64 bit_is_set (Bitboard bb , Square sq ){
327
+ return ((BB_u64 (bb , 0 ) & BB_u64 (SetMaskBB [sq ], 0 )) | (BB_u64 (bb , 1 ) & BB_u64 (SetMaskBB [sq ], 1 )));
328
+ }
329
+
330
+ #define set_bit (bb ,sq ) (bb = vorrq_u64(bb,SetMaskBB[sq]))
331
+ #define clear_bit (bb ,sq ) (bb = vandq_u64(bb,ClearMaskBB[sq]))
332
+
333
+ FORCE_INLINE bool pop_1st_bit_sq (Bitboard & b , Square & sq ) {
334
+
335
+ uint64x2_t board_empty_v = vceqzq_u64 (b );
336
+ uint64_t board_empty = board_empty_v [0 ] & board_empty_v [1 ];
337
+
338
+ if (!!board_empty ) {
339
+ return false;
340
+ }
341
+
342
+ uint64_t lo ;
343
+ uint64_t hi ;
344
+ unsigned long index ;
345
+
346
+ lo = BB_u64 (b , 0 );
347
+
348
+ // Count leading zeros of the reverse of lo
349
+ index = __builtin_clzll (__builtin_arm_rbit64 (lo ));
350
+
351
+ if (index < 64 ) {
352
+ // First bit is in the bottom 64 bits
353
+ // Clear the bit to be popped
354
+ lo = lo & (lo - 1 );
355
+ b [0 ] = lo ;
356
+ } else {
357
+ hi = BB_u64 (b , 1 );
358
+ index = 64 + __builtin_clzll (__builtin_arm_rbit64 (hi ));
359
+
360
+ hi = hi & (hi - 1 );
361
+ b [1 ] = hi ;
362
+ }
363
+
364
+ sq = (Square ) index ;
365
+ return true;
366
+ }
367
+
368
+ FORCE_INLINE Square pop_1st_bit_sq (Bitboard & b ) {
369
+ Square sq ;
370
+
371
+ pop_1st_bit_sq (b , sq );
372
+
373
+ return sq ;
374
+ }
375
+
376
+ #define m_Lsf (bb ,count ) {\
377
+ Bitboard sltmp; \
378
+ sltmp = vcombine_u64(vcreate_u64(0), vcreate_u64(vgetq_lane_u64(bb, 0)));\
379
+ sltmp = vshrq_n_u64(sltmp, 64-(count));\
380
+ bb = vshlq_n_u64(bb,count);\
381
+ bb = vorrq_u64(bb,sltmp);\
382
+ }
383
+
384
+ #define m_Rsf (bb ,count ) {\
385
+ Bitboard sltmp; \
386
+ sltmp = vcombine_u64(vcreate_u64(vgetq_lane_u64(bb, 1)), vcreate_u64(0));\
387
+ sltmp = vshlq_n_u64(sltmp,64-(count));\
388
+ bb = vshrq_n_u64(bb,count);\
389
+ bb = vorrq_u64(bb,sltmp);\
390
+ }
391
+
392
+ inline uint32 msb (uint64 b ) {
393
+ return 63 - __builtin_clzll (b );
394
+ }
395
+
396
+ __inline Square first_1 (Bitboard b ){
397
+ uint64_t lo ;
398
+ uint64_t hi ;
399
+ unsigned long index ;
400
+
401
+ lo = BB_u64 (b , 0 );
402
+
403
+ // Count leading zeros of the reverse of lo
404
+ index = __builtin_clzll (__builtin_arm_rbit64 (lo ));
405
+
406
+ if (index < 64 ) {
407
+ // First bit is in the bottom 64 bits
408
+ } else {
409
+ hi = BB_u64 (b , 1 );
410
+ index = 64 + __builtin_clzll (__builtin_arm_rbit64 (hi ));
411
+ }
412
+
413
+ return (Square ) index ;
414
+ }
415
+
416
+ #else
279
417
280
418
#define USE_SSE_BIT_OPERATION
281
419
@@ -496,16 +634,6 @@ inline uint32 msb(uint64 b) {
496
634
return index ;
497
635
}
498
636
499
-
500
- #define one_rpawn_rk_attacks (sq ) OneRpawnOrRking_AttackBB[sq]
501
- #define one_bpawn_bk_attacks (sq ) OneBpawnOrBking_AttackBB[sq]
502
-
503
- #define attacks_by_rpawn_rk (sq ) Attack_By_Rpawn_Rking[sq]
504
- #define attacks_by_bpawn_bk (sq ) Attack_By_Bpawn_Bking[sq]
505
-
506
- #define shi_attacks (sq ) ShiAttackBB[sq]
507
-
508
-
509
637
__inline Square first_1 (Bitboard b ){
510
638
unsigned long index ;
511
639
if (_mm_extract_epi64 (b ,0 )){
@@ -518,6 +646,17 @@ __inline Square first_1(Bitboard b){
518
646
return (Square )index ;
519
647
}
520
648
649
+ #endif
650
+
651
+
652
+ #define one_rpawn_rk_attacks (sq ) OneRpawnOrRking_AttackBB[sq]
653
+ #define one_bpawn_bk_attacks (sq ) OneBpawnOrBking_AttackBB[sq]
654
+
655
+ #define attacks_by_rpawn_rk (sq ) Attack_By_Rpawn_Rking[sq]
656
+ #define attacks_by_bpawn_bk (sq ) Attack_By_Bpawn_Bking[sq]
657
+
658
+ #define shi_attacks (sq ) ShiAttackBB[sq]
659
+
521
660
/// squares_between returns a bitboard representing all squares between
522
661
/// two squares. For instance, squares_between(SQ_C4, SQ_F7) returns a
523
662
/// bitboard with the bits for square d5 and e6 set. If s1 and s2 are not
0 commit comments