Skip to content

Commit

Permalink
Created CreateExtendedIlluminaManifest tool (#1667)
Browse files Browse the repository at this point in the history
Added a new Build37ExtendedIlluminaManifestRecordCreator class to manage the creation of the Build37ExtendedIlluminaManifestRecord.
simplified made less memory intensive the two pass algorithm.
Improved report file, show number of dups
Added a CLP flag to do duplicate flagging (true by default)
Make CLP parameters for FLAG_DUPS and CLUSTER_FILE co-required.
Made support for other references (by liftover) optional.
Make the writing of the bad_assays_file optional.
Change version to 2.0
Respond to code review feedback.
Added additional usage statement for liftover case
  • Loading branch information
gbggrant authored May 17, 2021
1 parent 69e63f1 commit 6ac2bc3
Show file tree
Hide file tree
Showing 20 changed files with 1,938 additions and 104 deletions.
4 changes: 2 additions & 2 deletions src/main/java/picard/arrays/GtcToVcf.java
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ private void fillContexts(final SortingCollection<VariantContext> contexts, fina
while (iterator.hasNext()) {
final Build37ExtendedIlluminaManifestRecord record = iterator.next();

if (!record.isBad()) {
if (!record.isFail()) {
InfiniumGTCRecord gtcRecord = gtcFile.getRecord(gtcIndex);
VariantContext context = makeVariantContext(record, gtcRecord, egtFile, progressLogger);
numVariantsWritten++;
Expand All @@ -300,7 +300,7 @@ private void fillContexts(final SortingCollection<VariantContext> contexts, fina
}

log.info(numVariantsWritten + " Variants were written to file");
log.info(gtcFile.getNumberOfSnps() + " SNPs in the GTC file");
log.info(gtcFile.getNumberOfSnps() + " Variants in the GTC file");
log.info(manifest.getNumAssays() + " Variants on the " + manifest.getDescriptorFileName() + " genotyping array manifest file");
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ protected int doWork() {
joiner.add("" + locusEntry.mapInfo);
joiner.add(df.format(genTrainScore));
joiner.add(locusEntry.snp);
joiner.add(locusEntry.ilmnStrand);
joiner.add(locusEntry.ilmnStrand.toString());
joiner.add(locusEntry.customerStrand);
joiner.add("" + locusEntry.normalizationId);
writer.write(joiner.toString());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

package picard.arrays.illumina;

import htsjdk.samtools.util.Log;
import htsjdk.tribble.annotation.Strand;
import htsjdk.variant.variantcontext.Allele;
import org.apache.commons.lang.StringUtils;

Expand All @@ -37,32 +37,46 @@
*/
public class Build37ExtendedIlluminaManifestRecord extends IlluminaManifestRecord {
protected enum Flag {
/** The record in the manifest passes validation and will be used in VCF generation */
PASS,

/** The record passes validation but is a duplicate (another assay at the same locus with the same alleles) */
DUPE,

/** Flagged by Illumina as a bad assay */
ILLUMINA_FLAGGED,

LIFTOVER_FAILED,

UNSUPPORTED_GENOME_BUILD,

/** Probe sequence not found in reference. */
PROBE_SEQUENCE_MISMATCH,

/** Probe sequence is on unexpected strand. */
PROBE_SEQUENCE_STRAND_INVALID,

/** Source sequenc not found in reference. */
SOURCE_SEQUENCE_MISMATCH,
/** Manifest contained no allele B probe sequence */
MISSING_ALLELE_B_PROBESEQ,

/** Source sequence is invalid (contains invalid character). */
SOURCE_SEQUENCE_INVALID,

/** Source sequence is on unexpected strand. */
SOURCE_SEQUENCE_STRAND_INVALID,

/** Neither insertion nor deletion sequence found in reference. */
INDEL_NOT_FOUND,

/** Both insertion and deletion sequence found in reference. */
INDEL_CONFLICT,

/** @deprecated - but used in existing extended manifest files. */
@Deprecated
PROBE_SEQUENCE_STRAND_INVALID,

/** @deprecated - but used in existing extended manifest files. */
@Deprecated
SOURCE_SEQUENCE_MISMATCH,

/** @deprecated - but used in existing extended manifest files. */
@Deprecated
SOURCE_SEQUENCE_STRAND_INVALID,

/** @deprecated - but used in existing extended manifest files. */
@Deprecated
SEQUENCE_MISMATCH,
Expand All @@ -73,71 +87,100 @@ protected enum Flag {

/** @deprecated - but used in existing extended manifest files. */
@Deprecated
INDEL_EXTENSION_ERROR,
DUPE,
PASS,
INDEL_EXTENSION_ERROR
}

private String b37Chr;
private Integer b37Pos;
private String snpRefAllele;
private String snpAlleleA;
private String snpAlleleB;
private String rsId;
private Flag flag = Flag.PASS;
String b37Chr;
Integer b37Pos;
String snpRefAllele;
String snpAlleleA;
String snpAlleleB;
String rsId;
Flag flag = Flag.PASS;

Allele aAllele = null;
Allele bAllele = null;
Allele refAllele = null;

private Allele A;
private Allele B;
private Allele ref;
private final Log log = Log.getInstance(Build37ExtendedIlluminaManifestRecord.class);
// The refStrand if provided in the Illumina manifest, otherwise calculated
Strand referenceStrand = null;

/**
* This constructor is used to read records from an already created Build37ExtendedIlluminaManifestRecord file.
* It does not work to set the Extended-specific fields
*/
Build37ExtendedIlluminaManifestRecord(final Map<String, Integer> columnNameToIndex, final String[] line, final int index) {
super(columnNameToIndex, line, index);

final int end = line.length;
flag = Flag.valueOf(line[end - 1]);

if (!isBad()) {
if (!isFail()) {
b37Chr = line[end - 7];
b37Pos = parseIntOrNull(line[end - 6]);
snpRefAllele = line[end - 5];
snpAlleleA = line[end - 4];
snpAlleleB = line[end - 3];
rsId = line[end - 2];

A = Allele.create(snpAlleleA, snpAlleleA.equals(snpRefAllele));
B = Allele.create(snpAlleleB, snpAlleleB.equals(snpRefAllele));
ref = Allele.create(snpRefAllele, true);
} else {
b37Chr = "0";
b37Pos = 0;
snpRefAllele = "";
snpAlleleA = "";
snpAlleleB = "";
rsId = "";

A = Allele.NO_CALL;
B = Allele.NO_CALL;
ref = Allele.NO_CALL;
}
}

Build37ExtendedIlluminaManifestRecord(final IlluminaManifestRecord record,
final Flag flag,
final String b37Chr,
final Integer b37Pos,
final String snpRefAllele,
final String snpAlleleA,
final String snpAlleleB,
final String rsId) {
super(record);
this.flag = flag;
this.b37Chr = b37Chr;
this.b37Pos = b37Pos;
this.snpRefAllele = snpRefAllele;
this.snpAlleleA = snpAlleleA;
this.snpAlleleB = snpAlleleB;
this.rsId = rsId;
}

public Allele getAlleleA() {
return A;
if (aAllele == null) {
aAllele = Allele.NO_CALL;
if (!isFail() && !StringUtils.isEmpty(snpAlleleA)) {
aAllele = Allele.create(snpAlleleA, snpAlleleA.equals(snpRefAllele));
}
}
return aAllele;
}

public Allele getAlleleB() {
return B;
if (bAllele == null) {
bAllele = Allele.NO_CALL;
if (!isFail() && !StringUtils.isEmpty(snpAlleleB)) {
bAllele = Allele.create(snpAlleleB, snpAlleleB.equals(snpRefAllele));
}
}
return bAllele;
}

public Allele getRefAllele() {
return ref;
if (refAllele == null) {
refAllele = Allele.NO_CALL;
if (!isFail() && !StringUtils.isEmpty(snpRefAllele)) {
refAllele = Allele.create(snpRefAllele, true);
}
}
return refAllele;
}

public Strand getReferenceStrand() { return referenceStrand; }

public String getB37Chr() {
return b37Chr;
}
Expand All @@ -146,9 +189,21 @@ public Integer getB37Pos() {
return b37Pos;
}

public String getSnpRefAllele() {
return snpRefAllele;
}

public String getSnpAlleleA() {
return snpAlleleA;
}

public String getSnpAlleleB() {
return snpAlleleB;
}

public String getRsId() { return rsId; }

public Boolean isBad() {
public Boolean isFail() {
return flag != Flag.DUPE && flag != Flag.PASS;
}

Expand All @@ -160,6 +215,18 @@ public Flag getFlag() {
return flag;
}

public void setRsId(String rsId) {
this.rsId = rsId;
}

public void setDupe(boolean isDupe) {
if (!isFail()) {
if (isDupe) {
flag = Flag.DUPE;
}
}
}

@Override
public String getLine() {
final String originalLine = super.getLine();
Expand All @@ -175,5 +242,4 @@ public String getLine() {

return originalLine + "," + StringUtils.join(extensions, ",");
}

}
}
Loading

0 comments on commit 6ac2bc3

Please sign in to comment.