From 7c586b62459a79698e656f435b32a2664a6cc773 Mon Sep 17 00:00:00 2001 From: Christopher Chang Date: Sun, 28 Apr 2024 21:39:59 -0700 Subject: [PATCH] VCF-import sanity checks --- 2.0/plink2_import.cc | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/2.0/plink2_import.cc b/2.0/plink2_import.cc index b442b11e..02cc2c88 100644 --- a/2.0/plink2_import.cc +++ b/2.0/plink2_import.cc @@ -3083,9 +3083,8 @@ PglErr VcfToPgen(const char* vcfname, const char* preexisting_psamname, const ch goto VcfToPgen_ret_MISSING_TOKENS; } if (unlikely(S_CAST(uintptr_t, id_end - pos_end) > kMaxIdBlen)) { - putc_unlocked('\n', stdout); snprintf(g_logbuf, kLogbufSize, "Error: Invalid ID on line %" PRIuPTR " of --vcf file (max " MAX_ID_SLEN_STR " chars).\n", line_idx); - goto VcfToPgen_ret_MALFORMED_INPUT_WW; + goto VcfToPgen_ret_MALFORMED_INPUT_WWN; } // note REF length @@ -3095,6 +3094,10 @@ PglErr VcfToPgen(const char* vcfname, const char* preexisting_psamname, const ch goto VcfToPgen_ret_MISSING_TOKENS; } uint32_t cur_max_allele_slen = linebuf_iter - ref_allele_start; + if (unlikely(memchr(ref_allele_start, ',', cur_max_allele_slen) != nullptr)) { + snprintf(g_logbuf, kLogbufSize, "Error: Invalid REF allele on line %" PRIuPTR " of --vcf file.\n", line_idx); + goto VcfToPgen_ret_MALFORMED_INPUT_WWN; + } uint32_t alt_ct = 1; unsigned char ucc; @@ -3812,6 +3815,10 @@ PglErr VcfToPgen(const char* vcfname, const char* preexisting_psamname, const ch goto VcfToPgen_load_start; } if ((!vic.vibc.gt_exists) && (!format_dosage_relevant) && (!format_hds_search)) { + line_iter = AdvToDelim(format_start, '\n'); + if (unlikely(CountByte(format_start, '\t', line_iter - format_start) != sample_ct)) { + goto VcfToPgen_ret_MISSING_TOKENS; + } gparse_flags = kfGparseNull; genotext_byte_ct = 1; } else {