diff --git a/.travis.yml b/.travis.yml index e425cd7..d8dfd98 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,7 @@ python: - "3.6" # command to install dependencies install: - - pip install coveralls geneblocks pytest-cov==2.6 pytest==3.2.3 + - pip install coveralls geneblocks pdf_reports pytest-cov==2.6 pytest==3.2.3 - pip install -e . - sudo apt-get install ncbi-blast+ # command to run tests diff --git a/README.rst b/README.rst index a25fd4f..3bdfeed 100644 --- a/README.rst +++ b/README.rst @@ -142,22 +142,24 @@ The simulation and reporting on an assembly plan is very similar to that of a si # Write a detailed report on each assembly and on the plan as a whole plan_simulation.write_report("my_assembly_simulation.zip") + Installation ------------- -You can install DnaCauldron through PIP - +You can install DnaCauldron through PIP: .. code:: shell sudo pip install dnacauldron -Alternatively, you can unzip the sources in a folder and type +The full installation using `dnacauldron[reports]` is required for report generation. +Alternatively, you can unzip the sources in a folder and type: .. code:: shell sudo python setup.py install + How it works ------------ diff --git a/dnacauldron/Assembly/AssemblyReportWriter/AssemblyReportWriter.py b/dnacauldron/Assembly/AssemblyReportWriter/AssemblyReportWriter.py index 520f77f..0210047 100644 --- a/dnacauldron/Assembly/AssemblyReportWriter/AssemblyReportWriter.py +++ b/dnacauldron/Assembly/AssemblyReportWriter/AssemblyReportWriter.py @@ -6,9 +6,9 @@ class AssemblyReportWriter(AssemblyReportPlotsMixin): - """Class to configure assembly simulation reports writing. + """Class to configure assembly simulation report writing. - Responsible to write the final sequence(s) of the assembly in Genbank + Responsible for writing the final sequence(s) of the assembly in Genbank format as well as a .csv report on all assemblies produced and PDF figures to allow a quick overview or diagnostic. @@ -28,32 +28,34 @@ class AssemblyReportWriter(AssemblyReportPlotsMixin): include_part_plots Either True/False/"on_error" to plot schemas of the parts used, possibly with restriction sites relevant to the AssemblyMix. + include_mix_graphs - Either True/False/"on_error" to plot representations of fragments + Either True/False/"on_error" to plot representations of fragment connectivity in the AssemblyMix created during the simulation. include_part_records True/False to include the parts records in the simulation results (makes - for larger folders and zips, but is better for traceability) + for larger folders and zips, but is better for traceability). include_assembly_plots True/False to include assembly schemas in the reports (makes the report generation slower, but makes it easier to check assemblies at a - glance) - + glance). + show_overhangs_in_graph If true, the AssemblyMix graph representations will display the sequence - of all fragments overhangs. - + of all fragment overhangs. + include_errors_spreadsheet If true and there are errors, an errors spreadsheet will be added to the - report - + report. + include_warnings_spreadsheet If true and there are warnings, a warnings spreadsheet will be added to - the report - + the report. + include_pdf_report + If true, a PDF report file is also generated. """ def __init__( @@ -67,6 +69,7 @@ def __init__( annotate_parts_homologies=True, include_errors_spreadsheet=True, include_warnings_spreadsheet=True, + include_pdf_report=False, ): self.include_fragment_plots = include_fragment_plots self.include_part_plots = include_part_plots @@ -77,6 +80,7 @@ def __init__( self.annotate_parts_homologies = annotate_parts_homologies self.include_errors_spreadsheet = include_errors_spreadsheet self.include_warnings_spreadsheet = include_warnings_spreadsheet + self.include_pdf_report = include_pdf_report def _write_constructs_spreadsheet(self, simulation, report_root): dataframe = simulation.compute_summary_dataframe() @@ -114,20 +118,12 @@ def _write_records_plots(self, assembly_simulation, report_root): construct_record = construct_record.as_biopython_record() self.plot_construct(construct_record, plots_dir) - def _write_errors_spreadsheet( - self, simulation, report_root, error_type="error" - ): - errors = ( - simulation.errors if error_type == "error" else simulation.warnings - ) + def _write_errors_spreadsheet(self, simulation, report_root, error_type="error"): + errors = simulation.errors if error_type == "error" else simulation.warnings if len(errors) > 0: - columns = ";".join( - ["assembly_name", "message", "suggestion", "data"] - ) + columns = ";".join(["assembly_name", "message", "suggestion", "data"]) all_error_rows = [ - ";".join( - [err.assembly.name, err.message, err.data_as_string(),] - ) + ";".join([err.assembly.name, err.message, err.data_as_string(),]) for err in errors ] filename = "%s.csv" % error_type @@ -153,9 +149,7 @@ def write_report(self, assembly_simulation, target): self._write_records(assembly_simulation, report_root) if self.include_part_records: - self._write_part_records( - assembly_simulation, part_records, report_root - ) + self._write_part_records(assembly_simulation, part_records, report_root) if self.include_assembly_plots: self._write_records_plots(assembly_simulation, report_root) @@ -165,9 +159,7 @@ def write_report(self, assembly_simulation, target): if plot_options["parts_plots"]: enzymes = assembly.enzymes if hasattr(assembly, "enzymes") else [] self.plot_provided_parts( - report_root=report_root, - parts_records=part_records, - enzymes=enzymes, + report_root=report_root, parts_records=part_records, enzymes=enzymes, ) if plot_options["fragment_plots"]: for mix in assembly_simulation.mixes: @@ -180,9 +172,7 @@ def write_report(self, assembly_simulation, target): with_overhangs=self.show_overhangs_in_graph, ) if len(assembly_simulation.construct_records): - self._write_constructs_spreadsheet( - assembly_simulation, report_root - ) + self._write_constructs_spreadsheet(assembly_simulation, report_root) if self.include_errors_spreadsheet: self._write_errors_spreadsheet( assembly_simulation, report_root, error_type="error" @@ -191,7 +181,6 @@ def write_report(self, assembly_simulation, target): self._write_errors_spreadsheet( assembly_simulation, report_root, error_type="warnings" ) - if target == "@memory": return report_root._close() diff --git a/dnacauldron/AssemblyPlan/AssemblyPlanSimulation.py b/dnacauldron/AssemblyPlan/AssemblyPlanSimulation.py index 521189d..d39e22f 100644 --- a/dnacauldron/AssemblyPlan/AssemblyPlanSimulation.py +++ b/dnacauldron/AssemblyPlan/AssemblyPlanSimulation.py @@ -1,14 +1,20 @@ from flametree import file_tree import proglog import pandas -from ..tools import ( - format_data_dicts_records_for_spreadsheet -) +from ..tools import format_data_dicts_records_for_spreadsheet from ..biotools import write_record from ..Assembly.AssemblyReportWriter import AssemblyReportWriter from .plot_leveled_graph import plot_leveled_graph import matplotlib.pyplot as plt +try: + import pdf_reports + + PDF_REPORTS_AVAILABLE = True +except ImportError: + PDF_REPORTS_AVAILABLE = False +from ..reports import write_simulation_pdf_report + class AssemblyPlanSimulation: def __init__( @@ -73,7 +79,7 @@ def compute_summary_dataframe(self): return pandas.DataFrame(data, columns=columns) def compute_stats(self): - """Return a dictionnary of stats. + """Return a dictionary of stats. For instance {"cancelled_assemblies": 2, "errored_assemblies": 1, "valid_assemblies": 5}. @@ -94,7 +100,7 @@ def write_report( logger="bar", include_original_parts_records=True, ): - """Write a comprehensive report to a folder or zip file + """Write a comprehensive report to a folder or zip file. Parameters ---------- @@ -102,33 +108,29 @@ def write_report( target Either a path to a folder, to a zip file, or ``"@memory"`` to write into a virtual zip file whose raw data is then returned. - + folder_name Name of the folder created inside the target to host the report (yes, it is a folder inside a folder, which can be very practical). - + assembly_report_writer Either the "default" or any AssemblyReportWriter instance. - + logger Either "bar" for a progress bar, or None, or any Proglog logger. include_original_parts_records If true, the original provided part records will be included in the - report (creates larger file sizes, but better for traceability). - """ + report (creates larger file sizes, but better for traceability). + """ if assembly_report_writer == "default": # We'll write all records into one folder for the whole plan - assembly_report_writer = AssemblyReportWriter( - include_part_records=False - ) + assembly_report_writer = AssemblyReportWriter(include_part_records=False) logger = proglog.default_bar_logger(logger) if folder_name == "auto": folder_name = self.assembly_plan.name + "_simulation" report_root = file_tree(target)._dir(folder_name, replace=True) - self._write_assembly_reports( - report_root, assembly_report_writer, logger=logger - ) + self._write_assembly_reports(report_root, assembly_report_writer, logger=logger) self._write_errors_spreadsheet(report_root, error_type="error") self._write_errors_spreadsheet(report_root, error_type="warning") @@ -142,6 +144,19 @@ def write_report( self._write_all_required_parts_records(report_root) if not self.has_single_level: self._plot_assembly_graph(report_root) + + if assembly_report_writer.include_pdf_report: + if not PDF_REPORTS_AVAILABLE: + raise ImportError( + "Could not load PDF Reports. Install with `pip install pdf_reports`" + " to generate a PDF report." + ) + + simulation_info = self._calculate_simulation_info() + write_simulation_pdf_report( + report_root._file("Report.pdf"), simulation_info + ) + if target == "@memory": return report_root._close() @@ -164,8 +179,7 @@ def _write_cancelled_assemblies(self, report_root): filename = self._get_file_name("cancelled_assemblies.csv") columns = ",".join(["cancelled_assembly", "failed_parent_assembly"]) cancelled = [ - ",".join([c.assembly_name, c.failed_dependency]) - for c in self.cancelled + ",".join([c.assembly_name, c.failed_dependency]) for c in self.cancelled ] report_root._file(filename).write("\n".join([columns] + cancelled)) @@ -179,9 +193,7 @@ def parts_sort_key(name): return 1000000 return indices[0] - all_parts = ( - self.list_all_original_parts_used() + self.assembly_plan.all_parts - ) + all_parts = self.list_all_original_parts_used() + self.assembly_plan.all_parts all_parts = sorted(set(all_parts), key=parts_sort_key) def sort_key(name): @@ -199,9 +211,7 @@ def draw_node(x, y, node, ax): text = node.replace("_", " ") ax.text(x, y, text, bbox={"facecolor": "white"}) - _, ax = plot_leveled_graph( - levels=levels, edges=edges, draw_node=draw_node - ) + _, ax = plot_leveled_graph(levels=levels, edges=edges, draw_node=draw_node) target = report_root._file("assembly_plan_graph.pdf") ax.figure.savefig(target.open("wb"), format="pdf") plt.close(ax.figure) @@ -211,9 +221,7 @@ def _write_errors_spreadsheet(self, report_root, error_type="error"): error for simulation in self.assembly_simulations for error in ( - simulation.errors - if error_type == "error" - else simulation.warnings + simulation.errors if error_type == "error" else simulation.warnings ) ] if len(all_errors) > 0: @@ -260,8 +268,7 @@ def list_all_original_parts_used(self): for part in simulation.list_all_parts_used() ] assemblies = [ - simulation.assembly.name - for simulation in self.assembly_simulations + simulation.assembly.name for simulation in self.assembly_simulations ] parts_that_arent_assembled = set(all_parts).difference(set(assemblies)) return sorted(parts_that_arent_assembled) @@ -295,3 +302,20 @@ def _write_assembly_plan_spreadsheets(self, report_root): f = report_root._file(file_name) lines = [",".join([c] + parts) for c, parts in construct_parts] f.write("\n".join(["construct, parts"] + lines)) + + def _calculate_simulation_info(self): + stats_dict = self.compute_stats() + stats_dict_series = { + "Outcome": pandas.Series(["Valid", "Cancelled", "Errored"]), + "Number of assemblies": pandas.Series( + [ + stats_dict["valid_assemblies"], + stats_dict["cancelled_assemblies"], + stats_dict["errored_assemblies"], + ] + ), + } + + stats_df = pandas.DataFrame(stats_dict_series) + + return stats_df diff --git a/dnacauldron/__init__.py b/dnacauldron/__init__.py index 2b9a805..b1b4453 100644 --- a/dnacauldron/__init__.py +++ b/dnacauldron/__init__.py @@ -46,6 +46,9 @@ write_record, autoselect_enzyme, ) + +from .reports import write_simulation_pdf_report + from .utils import ( swap_donor_vector_part, insert_parts_on_backbones, diff --git a/dnacauldron/report_assets/domestication_report.pug b/dnacauldron/report_assets/domestication_report.pug new file mode 100644 index 0000000..a83584f --- /dev/null +++ b/dnacauldron/report_assets/domestication_report.pug @@ -0,0 +1,34 @@ +#sidebar: p {{sidebar_text}} + +.logos + img(src="file:///{{ dc_logo_url }}") + img(src="file:///{{ egf_logo_url }}") + +hr +h1 DNA assembly simulation report +hr + +p. + The 'all_construct_records' folder contains the final assemblies and 'part_records' + (if generated) contains the original input Genbank files of all parts provided for + the assembly. There is one folder for each assembly, which contains: +ul + li The Genbank file of the assembly (.gb) + li A CSV file about the assembly + li PDF files with schematic views of how the parts assemble together (if generated) + li Genbank files of the parts, in the 'provided_parts_records' folder + +p. + In addition, various summary text and csv files are provided about the simulation. + + +h2 Summary table + +{{ summary_table }} + +//- h2 Domesticators + +//- each domesticator in domesticators +//- .ui.segment.raised +//- .ui.title.ribbon.label.teal {{domesticator.name}} +//- .description {{ domesticator.html_details() }} diff --git a/dnacauldron/report_assets/imgs/logo.png b/dnacauldron/report_assets/imgs/logo.png new file mode 100644 index 0000000..28207d6 Binary files /dev/null and b/dnacauldron/report_assets/imgs/logo.png differ diff --git a/dnacauldron/report_assets/report_style.css b/dnacauldron/report_assets/report_style.css new file mode 100644 index 0000000..2186452 --- /dev/null +++ b/dnacauldron/report_assets/report_style.css @@ -0,0 +1,29 @@ +.logos { + margin: 0 auto; +} + +h1.appendix { + page-break-before: always; +} + +h1 { + text-align: center; +} + +.ribbon { + margin-left: -2.3em !important; +} + +.description { + margin-top: 1em; +} + +table { + font-size: 0.6em !important; +} + +table img { + width: 1.5em; + /* height: 1em; */ + margin-right: 1em +} diff --git a/dnacauldron/reports.py b/dnacauldron/reports.py new file mode 100644 index 0000000..57463a9 --- /dev/null +++ b/dnacauldron/reports.py @@ -0,0 +1,53 @@ +from datetime import datetime +import os + +from pdf_reports import ( + dataframe_to_html, + style_table_rows, + add_css_class, + pug_to_html, + write_report, +) + +from .version import __version__ + +THIS_PATH = os.path.dirname(os.path.realpath(__file__)) +ASSETS_PATH = os.path.join(THIS_PATH, "report_assets") +DOMESTICATION_REPORT_TEMPLATE = os.path.join(ASSETS_PATH, "domestication_report.pug") +STYLESHEET = os.path.join(ASSETS_PATH, "report_style.css") + + +def dnacauldron_pug_to_html(template, **context): + now = datetime.now().strftime("%Y-%m-%d") + defaults = { + "sidebar_text": "Generated on %s by DNA Cauldron version %s" + % (now, __version__), + "dc_logo_url": os.path.join(ASSETS_PATH, "imgs", "logo.png"), + } + for k in defaults: + if k not in context: + context[k] = defaults[k] + return pug_to_html(template, **context) + + +def write_simulation_pdf_report(target, simulation_info): + summary_table = dataframe_to_html(simulation_info, extra_classes=("definition",)) + + def tr_modifier(tr): + tds = list(tr.find_all("td")) + if len(tds) == 0: + return + outcome, number = tds + if outcome.text == "Valid": + if number.text == "0": + add_css_class(tr, "negative") + else: + add_css_class(tr, "positive") + elif number.text != "0": + add_css_class(tr, "negative") + + summary_table = style_table_rows(summary_table, tr_modifier) + html = dnacauldron_pug_to_html( + DOMESTICATION_REPORT_TEMPLATE, summary_table=summary_table + ) + write_report(html, target, extra_stylesheets=(STYLESHEET,)) diff --git a/dnacauldron/version.py b/dnacauldron/version.py index 0309ae2..5fa9130 100644 --- a/dnacauldron/version.py +++ b/dnacauldron/version.py @@ -1 +1 @@ -__version__ = "2.0.2" +__version__ = "2.0.3" diff --git a/setup.py b/setup.py index 1e7cf88..b91c084 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ url="https://github.com/Edinburgh-Genome-Foundry/DnaCauldron", description="Cloning simulation for DNA assembly (Golden Gate, Gibson...)", long_description=open("pypi-readme.rst").read(), - license="see LICENSE.txt", + license="MIT", keywords="DNA assembly cloning simulator synthetic biology", scripts=["scripts/dnacauldron"], packages=find_packages(exclude="docs"), @@ -34,4 +34,5 @@ "python-Levenshtein", "xlrd", ], + extras_require={"reports": ["pdf_reports"]}, ) diff --git a/tests/test_reports.py b/tests/test_reports.py new file mode 100644 index 0000000..214ad23 --- /dev/null +++ b/tests/test_reports.py @@ -0,0 +1,26 @@ +import os +import dnacauldron as dc + +this_directory = os.path.join("tests", "test_hierarchical_type2s") +parts_folder = os.path.join(this_directory, "parts") + + +def test_single_assembly(tmpdir): + repository = dc.SequenceRepository() + repository.import_records(folder=parts_folder, use_file_names_as_ids=True) + assembly_plan = dc.AssemblyPlan.from_spreadsheet( + assembly_class=dc.Type2sRestrictionAssembly, + path=os.path.join(this_directory, "type2s_two-level.csv"), + ) + plan_simulation = assembly_plan.simulate(sequence_repository=repository) + stats = plan_simulation.compute_stats() + report_writer = dc.AssemblyReportWriter( + include_fragment_plots=False, + include_part_plots=False, + include_mix_graphs=False, + include_assembly_plots=False, + show_overhangs_in_graph=False, + annotate_parts_homologies=False, + include_pdf_report=True, + ) + plan_simulation.write_report(target="@memory", assembly_report_writer=report_writer)