Merge pull request #6 from Edinburgh-Genome-Foundry/dev

PDF report
Edinburgh-Genome-Foundry · Oct 18, 2020 · 5208b96 · 5208b96
2 parents 962822c + 8f45aa4
commit 5208b96
Show file tree

Hide file tree

Showing 12 changed files with 230 additions and 69 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -3,7 +3,7 @@ python:
   - "3.6"
 # command to install dependencies
 install:
-  - pip install coveralls geneblocks pytest-cov==2.6 pytest==3.2.3
+  - pip install coveralls geneblocks pdf_reports pytest-cov==2.6 pytest==3.2.3
   - pip install -e .
   - sudo apt-get install ncbi-blast+
 # command to run tests

diff --git a/README.rst b/README.rst
@@ -142,22 +142,24 @@ The simulation and reporting on an assembly plan is very similar to that of a si
    # Write a detailed report on each assembly and on the plan as a whole
    plan_simulation.write_report("my_assembly_simulation.zip")
 
+
 Installation
 -------------
 
-You can install DnaCauldron through PIP
-
+You can install DnaCauldron through PIP:
 
 .. code:: shell
 
     sudo pip install dnacauldron
 
-Alternatively, you can unzip the sources in a folder and type
+The full installation using `dnacauldron[reports]` is required for report generation.
+Alternatively, you can unzip the sources in a folder and type:
 
 .. code:: shell
 
     sudo python setup.py install
 
+
 How it works
 ------------
 

diff --git a/dnacauldron/Assembly/AssemblyReportWriter/AssemblyReportWriter.py b/dnacauldron/Assembly/AssemblyReportWriter/AssemblyReportWriter.py
@@ -6,9 +6,9 @@
 
 
 class AssemblyReportWriter(AssemblyReportPlotsMixin):
-    """Class to configure assembly simulation reports writing.
+    """Class to configure assembly simulation report writing.
 
-    Responsible to write the final sequence(s) of the assembly in Genbank
+    Responsible for writing the final sequence(s) of the assembly in Genbank
     format as well as a .csv report on all assemblies produced and PDF figures
     to allow a quick overview or diagnostic.
 
@@ -28,32 +28,34 @@ class AssemblyReportWriter(AssemblyReportPlotsMixin):
     include_part_plots
       Either True/False/"on_error" to plot schemas of the parts used, possibly
       with restriction sites relevant to the AssemblyMix.
+
     include_mix_graphs
-      Either True/False/"on_error" to plot representations of fragments
+      Either True/False/"on_error" to plot representations of fragment
       connectivity in the AssemblyMix created during the simulation.
 
     include_part_records
       True/False to include the parts records in the simulation results (makes
-      for larger folders and zips, but is better for traceability)
+      for larger folders and zips, but is better for traceability).
 
     include_assembly_plots
       True/False to include assembly schemas in the reports (makes the
       report generation slower, but makes it easier to check assemblies at a
-      glance)
-    
+      glance).
+
     show_overhangs_in_graph
       If true, the AssemblyMix graph representations will display the sequence
-      of all fragments overhangs.
-    
+      of all fragment overhangs.
+
     include_errors_spreadsheet
       If true and there are errors, an errors spreadsheet will be added to the
-      report
-    
+      report.
+
     include_warnings_spreadsheet
       If true and there are warnings, a warnings spreadsheet will be added to
-      the report
-
+      the report.
 
+    include_pdf_report
+      If true, a PDF report file is also generated.
     """
 
     def __init__(
@@ -67,6 +69,7 @@ def __init__(
         annotate_parts_homologies=True,
         include_errors_spreadsheet=True,
         include_warnings_spreadsheet=True,
+        include_pdf_report=False,
     ):
         self.include_fragment_plots = include_fragment_plots
         self.include_part_plots = include_part_plots
@@ -77,6 +80,7 @@ def __init__(
         self.annotate_parts_homologies = annotate_parts_homologies
         self.include_errors_spreadsheet = include_errors_spreadsheet
         self.include_warnings_spreadsheet = include_warnings_spreadsheet
+        self.include_pdf_report = include_pdf_report
 
     def _write_constructs_spreadsheet(self, simulation, report_root):
         dataframe = simulation.compute_summary_dataframe()
@@ -114,20 +118,12 @@ def _write_records_plots(self, assembly_simulation, report_root):
                 construct_record = construct_record.as_biopython_record()
             self.plot_construct(construct_record, plots_dir)
 
-    def _write_errors_spreadsheet(
-        self, simulation, report_root, error_type="error"
-    ):
-        errors = (
-            simulation.errors if error_type == "error" else simulation.warnings
-        )
+    def _write_errors_spreadsheet(self, simulation, report_root, error_type="error"):
+        errors = simulation.errors if error_type == "error" else simulation.warnings
         if len(errors) > 0:
-            columns = ";".join(
-                ["assembly_name", "message", "suggestion", "data"]
-            )
+            columns = ";".join(["assembly_name", "message", "suggestion", "data"])
             all_error_rows = [
-                ";".join(
-                    [err.assembly.name, err.message, err.data_as_string(),]
-                )
+                ";".join([err.assembly.name, err.message, err.data_as_string(),])
                 for err in errors
             ]
             filename = "%s.csv" % error_type
@@ -153,9 +149,7 @@ def write_report(self, assembly_simulation, target):
 
         self._write_records(assembly_simulation, report_root)
         if self.include_part_records:
-            self._write_part_records(
-                assembly_simulation, part_records, report_root
-            )
+            self._write_part_records(assembly_simulation, part_records, report_root)
         if self.include_assembly_plots:
             self._write_records_plots(assembly_simulation, report_root)
 
@@ -165,9 +159,7 @@ def write_report(self, assembly_simulation, target):
         if plot_options["parts_plots"]:
             enzymes = assembly.enzymes if hasattr(assembly, "enzymes") else []
             self.plot_provided_parts(
-                report_root=report_root,
-                parts_records=part_records,
-                enzymes=enzymes,
+                report_root=report_root, parts_records=part_records, enzymes=enzymes,
             )
         if plot_options["fragment_plots"]:
             for mix in assembly_simulation.mixes:
@@ -180,9 +172,7 @@ def write_report(self, assembly_simulation, target):
                     with_overhangs=self.show_overhangs_in_graph,
                 )
         if len(assembly_simulation.construct_records):
-            self._write_constructs_spreadsheet(
-                assembly_simulation, report_root
-            )
+            self._write_constructs_spreadsheet(assembly_simulation, report_root)
         if self.include_errors_spreadsheet:
             self._write_errors_spreadsheet(
                 assembly_simulation, report_root, error_type="error"
@@ -191,7 +181,6 @@ def write_report(self, assembly_simulation, target):
             self._write_errors_spreadsheet(
                 assembly_simulation, report_root, error_type="warnings"
             )
-
 
         if target == "@memory":
             return report_root._close()
diff --git a/dnacauldron/AssemblyPlan/AssemblyPlanSimulation.py b/dnacauldron/AssemblyPlan/AssemblyPlanSimulation.py
@@ -1,14 +1,20 @@
 from flametree import file_tree
 import proglog
 import pandas
-from ..tools import (
-    format_data_dicts_records_for_spreadsheet
-)
+from ..tools import format_data_dicts_records_for_spreadsheet
 from ..biotools import write_record
 from ..Assembly.AssemblyReportWriter import AssemblyReportWriter
 from .plot_leveled_graph import plot_leveled_graph
 import matplotlib.pyplot as plt
 
+try:
+    import pdf_reports
+
+    PDF_REPORTS_AVAILABLE = True
+except ImportError:
+    PDF_REPORTS_AVAILABLE = False
+from ..reports import write_simulation_pdf_report
+
 
 class AssemblyPlanSimulation:
     def __init__(
@@ -73,7 +79,7 @@ def compute_summary_dataframe(self):
         return pandas.DataFrame(data, columns=columns)
 
     def compute_stats(self):
-        """Return a dictionnary of stats.
+        """Return a dictionary of stats.
 
         For instance {"cancelled_assemblies": 2, "errored_assemblies": 1,
         "valid_assemblies": 5}.
@@ -94,41 +100,37 @@ def write_report(
         logger="bar",
         include_original_parts_records=True,
     ):
-        """Write a comprehensive report to a folder or zip file
+        """Write a comprehensive report to a folder or zip file.
 
         Parameters
         ----------
 
         target
           Either a path to a folder, to a zip file, or ``"@memory"`` to write
           into a virtual zip file whose raw data is then returned.
-        
+
         folder_name
           Name of the folder created inside the target to host the report (yes,
           it is a folder inside a folder, which can be very practical).
-        
+
         assembly_report_writer
           Either the "default" or any AssemblyReportWriter instance.
-        
+
         logger
           Either "bar" for a progress bar, or None, or any Proglog logger.
 
         include_original_parts_records
           If true, the original provided part records will be included in the
-          report (creates larger file sizes, but better for traceability). 
-        """ 
+          report (creates larger file sizes, but better for traceability).
+        """
         if assembly_report_writer == "default":
             # We'll write all records into one folder for the whole plan
-            assembly_report_writer = AssemblyReportWriter(
-                include_part_records=False
-            )
+            assembly_report_writer = AssemblyReportWriter(include_part_records=False)
         logger = proglog.default_bar_logger(logger)
         if folder_name == "auto":
             folder_name = self.assembly_plan.name + "_simulation"
         report_root = file_tree(target)._dir(folder_name, replace=True)
-        self._write_assembly_reports(
-            report_root, assembly_report_writer, logger=logger
-        )
+        self._write_assembly_reports(report_root, assembly_report_writer, logger=logger)
         self._write_errors_spreadsheet(report_root, error_type="error")
         self._write_errors_spreadsheet(report_root, error_type="warning")
 
@@ -142,6 +144,19 @@ def write_report(
             self._write_all_required_parts_records(report_root)
         if not self.has_single_level:
             self._plot_assembly_graph(report_root)
+
+        if assembly_report_writer.include_pdf_report:
+            if not PDF_REPORTS_AVAILABLE:
+                raise ImportError(
+                    "Could not load PDF Reports. Install with `pip install pdf_reports`"
+                    " to generate a PDF report."
+                )
+
+            simulation_info = self._calculate_simulation_info()
+            write_simulation_pdf_report(
+                report_root._file("Report.pdf"), simulation_info
+            )
+
         if target == "@memory":
             return report_root._close()
 
@@ -164,8 +179,7 @@ def _write_cancelled_assemblies(self, report_root):
         filename = self._get_file_name("cancelled_assemblies.csv")
         columns = ",".join(["cancelled_assembly", "failed_parent_assembly"])
         cancelled = [
-            ",".join([c.assembly_name, c.failed_dependency])
-            for c in self.cancelled
+            ",".join([c.assembly_name, c.failed_dependency]) for c in self.cancelled
         ]
         report_root._file(filename).write("\n".join([columns] + cancelled))
 
@@ -179,9 +193,7 @@ def parts_sort_key(name):
                 return 1000000
             return indices[0]
 
-        all_parts = (
-            self.list_all_original_parts_used() + self.assembly_plan.all_parts
-        )
+        all_parts = self.list_all_original_parts_used() + self.assembly_plan.all_parts
         all_parts = sorted(set(all_parts), key=parts_sort_key)
 
         def sort_key(name):
@@ -199,9 +211,7 @@ def draw_node(x, y, node, ax):
             text = node.replace("_", " ")
             ax.text(x, y, text, bbox={"facecolor": "white"})
 
-        _, ax = plot_leveled_graph(
-            levels=levels, edges=edges, draw_node=draw_node
-        )
+        _, ax = plot_leveled_graph(levels=levels, edges=edges, draw_node=draw_node)
         target = report_root._file("assembly_plan_graph.pdf")
         ax.figure.savefig(target.open("wb"), format="pdf")
         plt.close(ax.figure)
@@ -211,9 +221,7 @@ def _write_errors_spreadsheet(self, report_root, error_type="error"):
             error
             for simulation in self.assembly_simulations
             for error in (
-                simulation.errors
-                if error_type == "error"
-                else simulation.warnings
+                simulation.errors if error_type == "error" else simulation.warnings
             )
         ]
         if len(all_errors) > 0:
@@ -260,8 +268,7 @@ def list_all_original_parts_used(self):
             for part in simulation.list_all_parts_used()
         ]
         assemblies = [
-            simulation.assembly.name
-            for simulation in self.assembly_simulations
+            simulation.assembly.name for simulation in self.assembly_simulations
         ]
         parts_that_arent_assembled = set(all_parts).difference(set(assemblies))
         return sorted(parts_that_arent_assembled)
@@ -295,3 +302,20 @@ def _write_assembly_plan_spreadsheets(self, report_root):
             f = report_root._file(file_name)
             lines = [",".join([c] + parts) for c, parts in construct_parts]
             f.write("\n".join(["construct, parts"] + lines))
+
+    def _calculate_simulation_info(self):
+        stats_dict = self.compute_stats()
+        stats_dict_series = {
+            "Outcome": pandas.Series(["Valid", "Cancelled", "Errored"]),
+            "Number of assemblies": pandas.Series(
+                [
+                    stats_dict["valid_assemblies"],
+                    stats_dict["cancelled_assemblies"],
+                    stats_dict["errored_assemblies"],
+                ]
+            ),
+        }
+
+        stats_df = pandas.DataFrame(stats_dict_series)
+
+        return stats_df
diff --git a/dnacauldron/__init__.py b/dnacauldron/__init__.py
@@ -46,6 +46,9 @@
     write_record,
     autoselect_enzyme,
 )
+
+from .reports import write_simulation_pdf_report
+
 from .utils import (
     swap_donor_vector_part,
     insert_parts_on_backbones,

diff --git a/dnacauldron/report_assets/domestication_report.pug b/dnacauldron/report_assets/domestication_report.pug
@@ -0,0 +1,34 @@
+#sidebar: p {{sidebar_text}}
+
+.logos
+  img(src="file:///{{ dc_logo_url }}")
+  img(src="file:///{{ egf_logo_url }}")
+
+hr
+h1 DNA assembly simulation report
+hr
+
+p.
+  The 'all_construct_records' folder contains the final assemblies and 'part_records'
+  (if generated) contains the original input Genbank files of all parts provided for
+  the assembly. There is one folder for each assembly, which contains:
+ul
+  li The Genbank file of the assembly (.gb)
+  li A CSV file about the assembly
+  li PDF files with schematic views of how the parts assemble together (if generated)
+  li Genbank files of the parts, in the 'provided_parts_records' folder
+
+p.
+  In addition, various summary text and csv files are provided about the simulation.
+
+
+h2  Summary table
+
+{{ summary_table }}
+
+//- h2 Domesticators
+
+//- each domesticator in domesticators
+//-   .ui.segment.raised
+//-     .ui.title.ribbon.label.teal {{domesticator.name}}
+//-     .description {{ domesticator.html_details() }}
diff --git a/dnacauldron/report_assets/imgs/logo.png b/dnacauldron/report_assets/imgs/logo.png