Skip to content

Commit

Permalink
Display CIGAR ins / del
Browse files Browse the repository at this point in the history
  • Loading branch information
veghp committed Nov 21, 2024
1 parent f677258 commit 28537d9
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 3 deletions.
47 changes: 45 additions & 2 deletions ediacara/Comparator.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ def compute_feature_color(self, feature):
ediacara_qualifier = "ediacara"
if feature.qualifiers[ediacara_qualifier] == "assembly":
return "#d3d3d3"
elif feature.qualifiers[ediacara_qualifier] == "deletion":
return "#f5685e"
elif feature.qualifiers[ediacara_qualifier] == "insertion":
return "#2a9d8f"
else:
return "#7245dc" # default dna_features_viewer colour

Expand Down Expand Up @@ -384,6 +388,7 @@ def interpret_asm(self):
self.wrong_length = False

part_type = "assembly" # all entries should be the assembly
# This section adds both a generic alignment feature, and one for each ins/del:
for index, row in self.aln.iterrows():

location = SeqFeature.FeatureLocation(
Expand All @@ -395,13 +400,51 @@ def interpret_asm(self):
id=row["query_name"],
qualifiers={"label": row["query_name"], "ediacara": part_type},
)

self.asm.features.append(feature)

cigar = row["CIGAR"].replace("cg:Z:", "") # paf custom tag label
matches = re.findall(r"(\d+)([A-Z]{1})", cigar)
current_position = row["target_start"] # in the ref
for length, change in matches:
if change == "M":
current_position += int(length)
elif change == "D":
new_position = current_position + int(length)
feat_loc = SeqFeature.FeatureLocation(
current_position, new_position
)
feat_id = "del" + length
feature = SeqFeature.SeqFeature(
location=feat_loc,
type="misc_feature",
id=feat_id,
qualifiers={"label": feat_id, "ediacara": "deletion"},
)
current_position = new_position # step forward after annotation
self.asm.features.append(feature)

elif change == "I":
# do not change position, insert is in query (asm)
feat_loc = SeqFeature.FeatureLocation(
current_position, current_position
)
feat_id = "ins" + length
feature = SeqFeature.SeqFeature(
location=feat_loc,
type="misc_feature",
id=feat_id,
qualifiers={"label": feat_id, "ediacara": "insertion"},
)
self.asm.features.append(feature)
else:
raise ValueError("Incorrect symbol in CIGAR")

# This should come after all feature additions:
self.asm_figure = self.plot_aln()

self.cigar_str = ""
self.cigar_dict = {"M": [], "D": [], "I": []} # see cigar definition
for entry in self.aln["CIGAR"]: # conncatenate any entries
for entry in self.aln["CIGAR"]: # concatenate any entries
self.cigar_str += entry.replace("cg:Z:", "") # paf custom tag label
matches = re.findall(r"(\d+)([A-Z]{1})", self.cigar_str)
for length, change in matches:
Expand Down
2 changes: 1 addition & 1 deletion ediacara/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def tr_modifier(tr):
comparatorgroup.fastq_plot, fmt="svg", size=[8, 3]
)
comparatorgroup.asm_figure_data = pdf_tools.figure_data(
comparatorgroup.asm_figure, fmt="svg", size=[8, 1]
comparatorgroup.asm_figure, fmt="svg", size=[8, 2]
)

comparatorgroup.aln_table = dataframe_to_html(
Expand Down

0 comments on commit 28537d9

Please sign in to comment.