Skip to content

Commit

Permalink
a6.0: --check-sex/--impute-sex
Browse files Browse the repository at this point in the history
  • Loading branch information
chrchang committed Nov 11, 2024
1 parent f0578b6 commit 39d4f67
Show file tree
Hide file tree
Showing 26 changed files with 1,577 additions and 725 deletions.
45 changes: 1 addition & 44 deletions 2.0/include/plink2_string.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2871,7 +2871,7 @@ char* lntoa_g(double ln_val, char* start) {
// makes sense to just promote to double like printf does.


CXXCONST_CP ScanForDuplicateIds(const char* sorted_ids, uintptr_t id_ct, uintptr_t max_id_blen) {
CXXCONST_CP FindSortedStrboxDuplicate(const char* sorted_ids, uintptr_t id_ct, uintptr_t max_id_blen) {
--id_ct;
for (uintptr_t id_idx = 0; id_idx != id_ct; ++id_idx) {
if (strequal_overread(&(sorted_ids[id_idx * max_id_blen]), &(sorted_ids[(id_idx + 1) * max_id_blen]))) {
Expand All @@ -2881,49 +2881,6 @@ CXXCONST_CP ScanForDuplicateIds(const char* sorted_ids, uintptr_t id_ct, uintptr
return nullptr;
}

uint32_t CollapseDuplicateIds(uintptr_t id_ct, uintptr_t max_id_blen, char* sorted_ids, uint32_t* id_starts) {
// Collapses array of sorted IDs to remove duplicates, and writes
// pre-collapse positions to id_starts (so e.g. duplication count of any
// sample ID can be determined via subtraction) if it isn't nullptr.
// Returns id_ct of collapsed array.
if (!id_ct) {
return 0;
}
uintptr_t read_idx = 1;
uintptr_t write_idx;
if (id_starts) {
id_starts[0] = 0;
for (; read_idx != id_ct; ++read_idx) {
if (strequal_overread(&(sorted_ids[(read_idx - 1) * max_id_blen]), &(sorted_ids[read_idx * max_id_blen]))) {
break;
}
id_starts[read_idx] = read_idx;
}
write_idx = read_idx;
while (++read_idx < id_ct) {
// this loop can probably be improved with string-length tracking...
if (!strequal_overread(&(sorted_ids[(write_idx - 1) * max_id_blen]), &(sorted_ids[read_idx * max_id_blen]))) {
strcpy(&(sorted_ids[write_idx * max_id_blen]), &(sorted_ids[read_idx * max_id_blen]));
id_starts[write_idx++] = read_idx;
}
}
} else {
for (; read_idx != id_ct; ++read_idx) {
if (strequal_overread(&(sorted_ids[(read_idx - 1) * max_id_blen]), &(sorted_ids[read_idx * max_id_blen]))) {
break;
}
}
write_idx = read_idx;
while (++read_idx < id_ct) {
if (!strequal_overread(&(sorted_ids[(write_idx - 1) * max_id_blen]), &(sorted_ids[read_idx * max_id_blen]))) {
strcpy(&(sorted_ids[write_idx * max_id_blen]), &(sorted_ids[read_idx * max_id_blen]));
++write_idx;
}
}
}
return write_idx;
}


int32_t bsearch_strbox(const char* idbuf, const char* sorted_strbox, uintptr_t cur_id_slen, uintptr_t max_id_blen, uintptr_t end_idx) {
// does not assume null-terminated idbuf, or nonempty array.
Expand Down
12 changes: 8 additions & 4 deletions 2.0/include/plink2_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,10 @@ void SortStrbox64bFinish(uintptr_t str_ct, uintptr_t max_str_blen, uint32_t use_
// Must be ok to overread.
void SortStrboxIndexed2(uintptr_t str_ct, uintptr_t max_str_blen, uint32_t use_nsort, char* strbox, uint32_t* id_map, void* sort_wkspace);
#else // !__cplusplus
HEADER_INLINE uint32_t strcmp_overread_lt(const char* s1, const char* s2) {
return strcmp_overread(s1, s2) < 0;
}

HEADER_INLINE void StrptrArrSort(uintptr_t ct, const char** strptr_arr) {
qsort(strptr_arr, ct, sizeof(intptr_t), strcmp_deref);
}
Expand Down Expand Up @@ -1513,11 +1517,11 @@ HEADER_INLINE char* i32toa_x(int32_t ii, char extra_char, char* start) {


// overread must be ok.
CXXCONST_CP ScanForDuplicateIds(const char* sorted_ids, uintptr_t id_ct, uintptr_t max_id_blen);
CXXCONST_CP FindSortedStrboxDuplicate(const char* sorted_ids, uintptr_t id_ct, uintptr_t max_id_blen);

#ifdef __cplusplus
HEADER_INLINE char* ScanForDuplicateIds(char* sorted_ids, uintptr_t id_ct, uintptr_t max_id_blen) {
return const_cast<char*>(ScanForDuplicateIds(const_cast<const char*>(sorted_ids), id_ct, max_id_blen));
HEADER_INLINE char* FindSortedStrboxDuplicate(char* sorted_ids, uintptr_t id_ct, uintptr_t max_id_blen) {
return const_cast<char*>(FindSortedStrboxDuplicate(const_cast<const char*>(sorted_ids), id_ct, max_id_blen));
}
#endif

Expand All @@ -1526,7 +1530,7 @@ HEADER_INLINE char* ScanForDuplicateIds(char* sorted_ids, uintptr_t id_ct, uintp
// determined via subtraction) if it isn't nullptr.
// Overread must be ok.
// Returns id_ct of collapsed array.
uint32_t CollapseDuplicateIds(uintptr_t id_ct, uintptr_t max_id_blen, char* sorted_ids, uint32_t* id_starts);
// uint32_t CollapseSortedStrbox(uintptr_t id_ct, uintptr_t max_id_blen, char* sorted_ids, uint32_t* id_starts);


// returns position of string, or -1 if not found.
Expand Down
9 changes: 9 additions & 0 deletions 2.0/include/plink2_thread.cc
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,15 @@ void UpdateU64IfSmaller(uint64_t newval, uint64_t* oldval_ptr) {
}
}

void UpdateU32IfSmaller(uint32_t newval, uint32_t* oldval_ptr) {
uint32_t oldval = *oldval_ptr;
while (oldval > newval) {
if (ATOMIC_COMPARE_EXCHANGE_N_U32(oldval_ptr, &oldval, newval, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
break;
}
}
}

#ifdef __cplusplus
} // namespace plink2
#endif
2 changes: 2 additions & 0 deletions 2.0/include/plink2_thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,8 @@ HEADER_INLINE void JoinThreads0(ThreadGroup* tg_ptr) {
// deterministic behavior is desired.
void UpdateU64IfSmaller(uint64_t newval, uint64_t* oldval_ptr);

void UpdateU32IfSmaller(uint32_t newval, uint32_t* oldval_ptr);

#ifdef __cplusplus
} // namespace plink2
#endif
Expand Down
Loading

0 comments on commit 39d4f67

Please sign in to comment.