Skip to content

Commit

Permalink
Merge pull request #6 from Edinburgh-Genome-Foundry/dev
Browse files Browse the repository at this point in the history
PDF report
  • Loading branch information
veghp authored Oct 18, 2020
2 parents 962822c + 8f45aa4 commit 5208b96
Show file tree
Hide file tree
Showing 12 changed files with 230 additions and 69 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ python:
- "3.6"
# command to install dependencies
install:
- pip install coveralls geneblocks pytest-cov==2.6 pytest==3.2.3
- pip install coveralls geneblocks pdf_reports pytest-cov==2.6 pytest==3.2.3
- pip install -e .
- sudo apt-get install ncbi-blast+
# command to run tests
Expand Down
8 changes: 5 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -142,22 +142,24 @@ The simulation and reporting on an assembly plan is very similar to that of a si
# Write a detailed report on each assembly and on the plan as a whole
plan_simulation.write_report("my_assembly_simulation.zip")
Installation
-------------

You can install DnaCauldron through PIP

You can install DnaCauldron through PIP:

.. code:: shell
sudo pip install dnacauldron
Alternatively, you can unzip the sources in a folder and type
The full installation using `dnacauldron[reports]` is required for report generation.
Alternatively, you can unzip the sources in a folder and type:

.. code:: shell
sudo python setup.py install
How it works
------------

Expand Down
57 changes: 23 additions & 34 deletions dnacauldron/Assembly/AssemblyReportWriter/AssemblyReportWriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@


class AssemblyReportWriter(AssemblyReportPlotsMixin):
"""Class to configure assembly simulation reports writing.
"""Class to configure assembly simulation report writing.
Responsible to write the final sequence(s) of the assembly in Genbank
Responsible for writing the final sequence(s) of the assembly in Genbank
format as well as a .csv report on all assemblies produced and PDF figures
to allow a quick overview or diagnostic.
Expand All @@ -28,32 +28,34 @@ class AssemblyReportWriter(AssemblyReportPlotsMixin):
include_part_plots
Either True/False/"on_error" to plot schemas of the parts used, possibly
with restriction sites relevant to the AssemblyMix.
include_mix_graphs
Either True/False/"on_error" to plot representations of fragments
Either True/False/"on_error" to plot representations of fragment
connectivity in the AssemblyMix created during the simulation.
include_part_records
True/False to include the parts records in the simulation results (makes
for larger folders and zips, but is better for traceability)
for larger folders and zips, but is better for traceability).
include_assembly_plots
True/False to include assembly schemas in the reports (makes the
report generation slower, but makes it easier to check assemblies at a
glance)
glance).
show_overhangs_in_graph
If true, the AssemblyMix graph representations will display the sequence
of all fragments overhangs.
of all fragment overhangs.
include_errors_spreadsheet
If true and there are errors, an errors spreadsheet will be added to the
report
report.
include_warnings_spreadsheet
If true and there are warnings, a warnings spreadsheet will be added to
the report
the report.
include_pdf_report
If true, a PDF report file is also generated.
"""

def __init__(
Expand All @@ -67,6 +69,7 @@ def __init__(
annotate_parts_homologies=True,
include_errors_spreadsheet=True,
include_warnings_spreadsheet=True,
include_pdf_report=False,
):
self.include_fragment_plots = include_fragment_plots
self.include_part_plots = include_part_plots
Expand All @@ -77,6 +80,7 @@ def __init__(
self.annotate_parts_homologies = annotate_parts_homologies
self.include_errors_spreadsheet = include_errors_spreadsheet
self.include_warnings_spreadsheet = include_warnings_spreadsheet
self.include_pdf_report = include_pdf_report

def _write_constructs_spreadsheet(self, simulation, report_root):
dataframe = simulation.compute_summary_dataframe()
Expand Down Expand Up @@ -114,20 +118,12 @@ def _write_records_plots(self, assembly_simulation, report_root):
construct_record = construct_record.as_biopython_record()
self.plot_construct(construct_record, plots_dir)

def _write_errors_spreadsheet(
self, simulation, report_root, error_type="error"
):
errors = (
simulation.errors if error_type == "error" else simulation.warnings
)
def _write_errors_spreadsheet(self, simulation, report_root, error_type="error"):
errors = simulation.errors if error_type == "error" else simulation.warnings
if len(errors) > 0:
columns = ";".join(
["assembly_name", "message", "suggestion", "data"]
)
columns = ";".join(["assembly_name", "message", "suggestion", "data"])
all_error_rows = [
";".join(
[err.assembly.name, err.message, err.data_as_string(),]
)
";".join([err.assembly.name, err.message, err.data_as_string(),])
for err in errors
]
filename = "%s.csv" % error_type
Expand All @@ -153,9 +149,7 @@ def write_report(self, assembly_simulation, target):

self._write_records(assembly_simulation, report_root)
if self.include_part_records:
self._write_part_records(
assembly_simulation, part_records, report_root
)
self._write_part_records(assembly_simulation, part_records, report_root)
if self.include_assembly_plots:
self._write_records_plots(assembly_simulation, report_root)

Expand All @@ -165,9 +159,7 @@ def write_report(self, assembly_simulation, target):
if plot_options["parts_plots"]:
enzymes = assembly.enzymes if hasattr(assembly, "enzymes") else []
self.plot_provided_parts(
report_root=report_root,
parts_records=part_records,
enzymes=enzymes,
report_root=report_root, parts_records=part_records, enzymes=enzymes,
)
if plot_options["fragment_plots"]:
for mix in assembly_simulation.mixes:
Expand All @@ -180,9 +172,7 @@ def write_report(self, assembly_simulation, target):
with_overhangs=self.show_overhangs_in_graph,
)
if len(assembly_simulation.construct_records):
self._write_constructs_spreadsheet(
assembly_simulation, report_root
)
self._write_constructs_spreadsheet(assembly_simulation, report_root)
if self.include_errors_spreadsheet:
self._write_errors_spreadsheet(
assembly_simulation, report_root, error_type="error"
Expand All @@ -191,7 +181,6 @@ def write_report(self, assembly_simulation, target):
self._write_errors_spreadsheet(
assembly_simulation, report_root, error_type="warnings"
)


if target == "@memory":
return report_root._close()
82 changes: 53 additions & 29 deletions dnacauldron/AssemblyPlan/AssemblyPlanSimulation.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
from flametree import file_tree
import proglog
import pandas
from ..tools import (
format_data_dicts_records_for_spreadsheet
)
from ..tools import format_data_dicts_records_for_spreadsheet
from ..biotools import write_record
from ..Assembly.AssemblyReportWriter import AssemblyReportWriter
from .plot_leveled_graph import plot_leveled_graph
import matplotlib.pyplot as plt

try:
import pdf_reports

PDF_REPORTS_AVAILABLE = True
except ImportError:
PDF_REPORTS_AVAILABLE = False
from ..reports import write_simulation_pdf_report


class AssemblyPlanSimulation:
def __init__(
Expand Down Expand Up @@ -73,7 +79,7 @@ def compute_summary_dataframe(self):
return pandas.DataFrame(data, columns=columns)

def compute_stats(self):
"""Return a dictionnary of stats.
"""Return a dictionary of stats.
For instance {"cancelled_assemblies": 2, "errored_assemblies": 1,
"valid_assemblies": 5}.
Expand All @@ -94,41 +100,37 @@ def write_report(
logger="bar",
include_original_parts_records=True,
):
"""Write a comprehensive report to a folder or zip file
"""Write a comprehensive report to a folder or zip file.
Parameters
----------
target
Either a path to a folder, to a zip file, or ``"@memory"`` to write
into a virtual zip file whose raw data is then returned.
folder_name
Name of the folder created inside the target to host the report (yes,
it is a folder inside a folder, which can be very practical).
assembly_report_writer
Either the "default" or any AssemblyReportWriter instance.
logger
Either "bar" for a progress bar, or None, or any Proglog logger.
include_original_parts_records
If true, the original provided part records will be included in the
report (creates larger file sizes, but better for traceability).
"""
report (creates larger file sizes, but better for traceability).
"""
if assembly_report_writer == "default":
# We'll write all records into one folder for the whole plan
assembly_report_writer = AssemblyReportWriter(
include_part_records=False
)
assembly_report_writer = AssemblyReportWriter(include_part_records=False)
logger = proglog.default_bar_logger(logger)
if folder_name == "auto":
folder_name = self.assembly_plan.name + "_simulation"
report_root = file_tree(target)._dir(folder_name, replace=True)
self._write_assembly_reports(
report_root, assembly_report_writer, logger=logger
)
self._write_assembly_reports(report_root, assembly_report_writer, logger=logger)
self._write_errors_spreadsheet(report_root, error_type="error")
self._write_errors_spreadsheet(report_root, error_type="warning")

Expand All @@ -142,6 +144,19 @@ def write_report(
self._write_all_required_parts_records(report_root)
if not self.has_single_level:
self._plot_assembly_graph(report_root)

if assembly_report_writer.include_pdf_report:
if not PDF_REPORTS_AVAILABLE:
raise ImportError(
"Could not load PDF Reports. Install with `pip install pdf_reports`"
" to generate a PDF report."
)

simulation_info = self._calculate_simulation_info()
write_simulation_pdf_report(
report_root._file("Report.pdf"), simulation_info
)

if target == "@memory":
return report_root._close()

Expand All @@ -164,8 +179,7 @@ def _write_cancelled_assemblies(self, report_root):
filename = self._get_file_name("cancelled_assemblies.csv")
columns = ",".join(["cancelled_assembly", "failed_parent_assembly"])
cancelled = [
",".join([c.assembly_name, c.failed_dependency])
for c in self.cancelled
",".join([c.assembly_name, c.failed_dependency]) for c in self.cancelled
]
report_root._file(filename).write("\n".join([columns] + cancelled))

Expand All @@ -179,9 +193,7 @@ def parts_sort_key(name):
return 1000000
return indices[0]

all_parts = (
self.list_all_original_parts_used() + self.assembly_plan.all_parts
)
all_parts = self.list_all_original_parts_used() + self.assembly_plan.all_parts
all_parts = sorted(set(all_parts), key=parts_sort_key)

def sort_key(name):
Expand All @@ -199,9 +211,7 @@ def draw_node(x, y, node, ax):
text = node.replace("_", " ")
ax.text(x, y, text, bbox={"facecolor": "white"})

_, ax = plot_leveled_graph(
levels=levels, edges=edges, draw_node=draw_node
)
_, ax = plot_leveled_graph(levels=levels, edges=edges, draw_node=draw_node)
target = report_root._file("assembly_plan_graph.pdf")
ax.figure.savefig(target.open("wb"), format="pdf")
plt.close(ax.figure)
Expand All @@ -211,9 +221,7 @@ def _write_errors_spreadsheet(self, report_root, error_type="error"):
error
for simulation in self.assembly_simulations
for error in (
simulation.errors
if error_type == "error"
else simulation.warnings
simulation.errors if error_type == "error" else simulation.warnings
)
]
if len(all_errors) > 0:
Expand Down Expand Up @@ -260,8 +268,7 @@ def list_all_original_parts_used(self):
for part in simulation.list_all_parts_used()
]
assemblies = [
simulation.assembly.name
for simulation in self.assembly_simulations
simulation.assembly.name for simulation in self.assembly_simulations
]
parts_that_arent_assembled = set(all_parts).difference(set(assemblies))
return sorted(parts_that_arent_assembled)
Expand Down Expand Up @@ -295,3 +302,20 @@ def _write_assembly_plan_spreadsheets(self, report_root):
f = report_root._file(file_name)
lines = [",".join([c] + parts) for c, parts in construct_parts]
f.write("\n".join(["construct, parts"] + lines))

def _calculate_simulation_info(self):
stats_dict = self.compute_stats()
stats_dict_series = {
"Outcome": pandas.Series(["Valid", "Cancelled", "Errored"]),
"Number of assemblies": pandas.Series(
[
stats_dict["valid_assemblies"],
stats_dict["cancelled_assemblies"],
stats_dict["errored_assemblies"],
]
),
}

stats_df = pandas.DataFrame(stats_dict_series)

return stats_df
3 changes: 3 additions & 0 deletions dnacauldron/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@
write_record,
autoselect_enzyme,
)

from .reports import write_simulation_pdf_report

from .utils import (
swap_donor_vector_part,
insert_parts_on_backbones,
Expand Down
34 changes: 34 additions & 0 deletions dnacauldron/report_assets/domestication_report.pug
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#sidebar: p {{sidebar_text}}

.logos
img(src="file:///{{ dc_logo_url }}")
img(src="file:///{{ egf_logo_url }}")

hr
h1 DNA assembly simulation report
hr

p.
The 'all_construct_records' folder contains the final assemblies and 'part_records'
(if generated) contains the original input Genbank files of all parts provided for
the assembly. There is one folder for each assembly, which contains:
ul
li The Genbank file of the assembly (.gb)
li A CSV file about the assembly
li PDF files with schematic views of how the parts assemble together (if generated)
li Genbank files of the parts, in the 'provided_parts_records' folder

p.
In addition, various summary text and csv files are provided about the simulation.


h2 Summary table

{{ summary_table }}

//- h2 Domesticators
//- each domesticator in domesticators
//- .ui.segment.raised
//- .ui.title.ribbon.label.teal {{domesticator.name}}
//- .description {{ domesticator.html_details() }}
Binary file added dnacauldron/report_assets/imgs/logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 5208b96

Please sign in to comment.