Skip to content

Commit

Permalink
Merge pull request #26 from Novartis/develop
Browse files Browse the repository at this point in the history
Add user defined GTF tag names, default disable_infer_features
  • Loading branch information
mdshw5 authored Nov 17, 2021
2 parents a74909c + 14d0c22 commit 07e29c1
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 26 deletions.
12 changes: 9 additions & 3 deletions pisces/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
"extra_fastas": [],
"options": {
"-k": 31,
"infer_features": false
"infer_features": false,
"gtf_name_tag": "gene_name",
"gtf_type_tag": "gene_type"
}
}
}
Expand All @@ -26,7 +28,9 @@
"extra_fastas": [],
"options": {
"-k": 31,
"infer_features": false
"infer_features": false,
"gtf_name_tag": "gene_name",
"gtf_type_tag": "gene_type"
}
}
}
Expand All @@ -44,7 +48,9 @@
"extra_fastas": [],
"options": {
"-k": 31,
"infer_features": false
"infer_features": false,
"gtf_name_tag": "gene_name",
"gtf_type_tag": "gene_type"
}
}
}
Expand Down
56 changes: 33 additions & 23 deletions pisces/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,40 +297,53 @@ def features_to_string(features, fasta_in, masked=True, strand=True):
total=db.count_features_of_type('gene'),
unit='gene') as pbar:
for gene in db.features_of_type('gene'):
first_exon = next(
db.children(
gene,
featuretype='exon',
order_by='start'))
try:
if options["gene_type"] == True:
type_tag = "gene_type"
else:
type_tag = options["gene_type"]
gene_type = first_exon[type_tag][0]
except KeyError:
logging.info("No gene type tag found for %s", gene['gene_id'][0])
gene_type = 'NA'
try:
if options["gene_name"] == True:
name_tag = "gene_name"
else:
name_tag = options["gene_name"]
gene_name = first_exon[name_tag][0]
except KeyError:
logging.info("No gene name tag found for %s", gene['gene_id'][0])
gene_name = 'NA'

transcripts = db.children(gene, featuretype='transcript', order_by='start')
for transcript in transcripts:
# Write entry in the transcripts to genes table
gene2tx.write("{txp}\t{gene}\n".format(
gene=gene['gene_id'][0],
txp=transcript['transcript_id'][0]))
# Construct the transcript sequences and write them to the FASTA
fa_seq, frac_masked = features_to_string(db.children(transcript,
featuretype='exon',
order_by='start'),
reference,
masked=options["masked"])
transcripts_fasta.write('>' + transcript['transcript_id'][0] + '\n')
transcripts_fasta.write(fa_seq + '\n')

exons = db.children(gene, featuretype='exon', order_by='start')
merged_exons = db.merge(exons, merge_criteria=(mc.seqid, mc.feature_type, mc.overlap_any_inclusive))
if options["unprocessed_transcripts"]:
exons = db.children(gene, featuretype='exon', order_by='start')
merged_exons = db.merge(exons, merge_criteria=(mc.seqid, mc.feature_type, mc.overlap_any_inclusive))
introns = db.interfeatures(merged_exons, new_featuretype='intron')
transcripts_fasta.write('>' + "intronic_" + gene['gene_id'][0] + '\n')
fa_seq, _ = features_to_string(introns, reference, masked=options["masked"])
transcripts_fasta.write(fa_seq + '\n')

first_transcript = next(
db.children(
gene,
featuretype='transcript',
order_by='start'))
try:
gene_type = first_transcript['gene_type'][0]
except KeyError:
gene_type = 'NA'
try:
gene_name = first_transcript['gene_name'][0]
except KeyError:
gene_name = 'NA'

exons = db.children(gene, featuretype='exon', order_by='start')
merged_exons = db.merge(exons, merge_criteria=(mc.seqid, mc.feature_type, mc.overlap_any_inclusive))

annotation.write(
"{gene}\t{type}\t{name}\t{chrom}\t{start}\t{stop}\t{length}\t{frac_masked}\n".
format(
Expand All @@ -342,14 +355,11 @@ def features_to_string(features, fasta_in, masked=True, strand=True):
chrom=gene.chrom,
length=sum(len(exon) for exon in merged_exons),
frac_masked=str(frac_masked)))

transcripts = db.children(
gene,
featuretype='transcript',
order_by='start')
for transcript in transcripts:
gene2tx.write("{txp}\t{gene}\n".format(
gene=gene['gene_id'][0],
txp=transcript['transcript_id'][0]))
pbar.update(1)

if options["intergenes"]:
Expand Down

0 comments on commit 07e29c1

Please sign in to comment.