From 890e3fdfe1620f8af757090ec0e07d20c69fb989 Mon Sep 17 00:00:00 2001
From: NJ-Thomson <contactneilt@icloud.com>
Date: Wed, 4 Dec 2024 20:37:17 +0000
Subject: [PATCH] Updated and tested all .rst tutorial files.

---
 docs/tut-2-preprocessing.rst  | 12 +++++++++-
 docs/tut-3-featurization.rst  | 20 ++++++++---------
 docs/tut-4-comparison.rst     |  9 +++-----
 docs/tut-5-dimensionality.rst | 12 ++++------
 docs/tut-6-ssi.rst            | 16 ++++---------
 scripts/density_featurizer.py | 42 +++++++++++++++++------------------
 6 files changed, 52 insertions(+), 59 deletions(-)

diff --git a/docs/tut-2-preprocessing.rst b/docs/tut-2-preprocessing.rst
index e7f7fbd..30c7dde 100644
--- a/docs/tut-2-preprocessing.rst
+++ b/docs/tut-2-preprocessing.rst
@@ -141,9 +141,17 @@ can use the ``_combined`` version of the extraction function:
 ``extract_coordinates_combined()``. It takes lists as arguments for the
 topology files, too. To use the same selection, “multiply” a list of one
 string, as demonstrated below. For this to work, the two selections need
-to have the exactly same atoms.
+to have the exactly same atoms, so we mak a new selection below removing the additional hydrogen in simulation B.
 
 .. code:: python
+    # # Residue numbers (same in both simulations)
+    resnums = "76:98 105:133 138:173 182:208 226:264 270:308 315:354"
+    # # Generate the selection strings
+    sel_string_a = "not name HD2 and protein and resnum "+resnums
+    print('Selection A:\n', sel_string_a, '\n')
+    sel_string_b = "not name HD2 and protein and resnum "+resnums
+    print('Selection B:\n', sel_string_b, '\n')
+
 
     all_refs = [ref_file_a]*3 + [ref_file_b]*3
     all_trjs = trj_file_a + trj_file_b
@@ -186,6 +194,8 @@ densities from a smaller selection.
 
 .. code:: python
 
+    from pensa.preprocessing import *
+
     # Base for the selection string for protein and all waters (OH2)
     sel_base_water = "protein or byres name OH2"
     # Names of the output files
diff --git a/docs/tut-3-featurization.rst b/docs/tut-3-featurization.rst
index 0c47feb..4b42489 100644
--- a/docs/tut-3-featurization.rst
+++ b/docs/tut-3-featurization.rst
@@ -22,7 +22,7 @@ For example, we can read protein backbone torsions.
 
 .. code:: python
 
-    from pensa.features import read_protein_backbone_torsions
+    from pensa.features import *
 
     bbtors_feat, bbtors_data = read_protein_backbone_torsions(
         "traj/condition-a_receptor.gro", "traj/condition-a_receptor.xtc",
@@ -72,7 +72,7 @@ It was modeled after the feature loader in PyEMMA.
 
 .. code:: python
 
-    from pensa.features import read_structure_features
+    from pensa.features import *
 
 
 .. code:: python
@@ -147,7 +147,7 @@ by a water molecule, and what that water molecule's orientation (polarisation) i
 
 .. code:: python
 
-    from pensa.features import read_water_features
+    from pensa.features import *
 
 For the pdb visualisation, the trajectory needs to be fit to the first frame of the simulation
 so that the density and protein align with each other.
@@ -155,9 +155,9 @@ so that the density and protein align with each other.
 Here we featurize the top 2 most probable water sites (top_waters = 2).
 Orientation of the waters (water_data - spherical coordinates [radians]) is a 
 timeseries distribution. When water is not present at the site, the orientation 
-is recorded as 10000.0 to represent an empty state. If write=True, we can 
-visualise the pocket occupancies on the reference structure in a pdb file with 
-pocket occupancy saved as b_factors. 
+is recorded as 10000.0 to represent an empty state. By specifying an name 
+to write data out with in the argument - out_name, we can visualise the pocket 
+occupancies on the reference structure in a pdb file with pocket occupancy saved as b_factors. 
 
 You must specify the water model for writing out the grid.
 options include:
@@ -169,13 +169,12 @@ water
 .. code:: python
     
     struc = "traj/condition-a_water.gro"
-    xtc = "traj/condition-a_water.gro"
+    xtc = "traj/condition-a_water_aligned.xtc"
     water_feat, water_data = read_water_features(
         structure_input = struc, 
         xtc_input = xtc,
         top_waters = 2,
         atomgroup = "OH2",
-        write = True,
         write_grid_as="TIP3P",
         out_name = "features/11426_dyn_151_water"
     )
@@ -206,7 +205,7 @@ but ignores orientations as atoms are considered spherically symmetric.
 
 .. code:: python
 
-    from pensa.features import read_atom_features
+    from pensa.features import *
 
 Here we locate the sodium site which has the highest probability. The density is 
 written (write=True) using the default density conversion "Angstrom^{-3}" in MDAnalysis.
@@ -218,10 +217,9 @@ written (write=True) using the default density conversion "Angstrom^{-3}" in MDA
     atom_feat, atom_data = read_atom_features(
         structure_input = struc,
         xtc_input = xtc,
-        top_atoms = 2,
+        top_atoms = 1,
         atomgroup = "SOD",
         element = "Na",
-        write = True,
         out_name = "features/11426_dyn_151_sodium"
     )
                                             
diff --git a/docs/tut-4-comparison.rst b/docs/tut-4-comparison.rst
index da789ef..417d084 100644
--- a/docs/tut-4-comparison.rst
+++ b/docs/tut-4-comparison.rst
@@ -8,12 +8,9 @@ First we import the necessary modules.
 
 .. code:: python
 
-    from pensa.comparison import \
-        relative_entropy_analysis, ssi_ensemble_analysis, \
-        residue_visualization, distances_visualization
-    from pensa.features import read_structure_features, \
-        sort_features, get_multivar_res
-    from pensa.statesinfo import get_discrete_states
+    from pensa.comparison import *
+    from pensa.features import *
+    from pensa.statesinfo import *
     import numpy as np
 
 
diff --git a/docs/tut-5-dimensionality.rst b/docs/tut-5-dimensionality.rst
index 9333d9c..94319ec 100644
--- a/docs/tut-5-dimensionality.rst
+++ b/docs/tut-5-dimensionality.rst
@@ -19,14 +19,10 @@ First we import the necessary modules and functions.
 
 .. code:: python
     
-    from pensa.features import read_structure_features
-    from pensa.dimensionality import \
-        calculate_pca, get_components_pca, pca_eigenvalues_plot, \
-        compare_projections, sort_trajs_along_common_pc, sort_traj_along_pc
-    from pensa.comparison import pca_feature_correlation
-    from pensa.clusters import \
-        obtain_clusters, wss_over_number_of_clusters, write_cluster_traj, \
-        obtain_combined_clusters, wss_over_number_of_combined_clusters
+    from pensa.features import *
+    from pensa.dimensionality import *
+    from pensa.comparison import *
+    from pensa.clusters import *
     import numpy as np
 
 
diff --git a/docs/tut-6-ssi.rst b/docs/tut-6-ssi.rst
index de0c732..c4e58bb 100644
--- a/docs/tut-6-ssi.rst
+++ b/docs/tut-6-ssi.rst
@@ -20,17 +20,9 @@ Features and States
 
     import os
     import numpy as np
-    from pensa.features import \
-        read_structure_features, \
-        read_water_features, \
-        get_multivar_res, \
-        sort_features
-    from pensa.statesinfo import \
-        get_discrete_states
-    from pensa.comparison import \
-        ssi_ensemble_analysis, \
-        ssi_feature_analysis, \
-        cossi_featens_analysis
+    from pensa.features import *
+    from pensa.statesinfo import *
+    from pensa.comparison import *
 
 First, load the structural features as described in the previous tutorial:
 
@@ -75,7 +67,7 @@ the preprocessing tutorial.
 
 .. code:: python
 
-    grid = "traj/water_grid_ab_OH2_density.dx"
+    grid = "ab_grid_OH2_density.dx"
     water_feat_a, water_data_a = read_water_features(
         "traj/condition-a_water.gro", "traj/condition-a_water_aligned.xtc",
         top_waters = 5, atomgroup = "OH2", grid_input = grid
diff --git a/scripts/density_featurizer.py b/scripts/density_featurizer.py
index 64fc185..e86f051 100755
--- a/scripts/density_featurizer.py
+++ b/scripts/density_featurizer.py
@@ -20,38 +20,38 @@
  - TIP4P
  - water
 
- """
+  """
 
-# struc = "mor-data/11426_dyn_151.pdb"
-# xtc = "mor-data/11423_trj_151.xtc"
-# water_feat, water_data = read_water_features(
-#     structure_input=struc,
-#     xtc_input=xtc,
-#     top_waters=1,
-#     atomgroup="OH2",
-#     write_grid_as="TIP3P",
-#     out_name="11426_dyn_151"
-# )
+struc = "mor-data/11426_dyn_151.pdb"
+xtc = "mor-data/11423_trj_151.xtc"
+water_feat, water_data = read_water_features(
+    structure_input=struc,
+    xtc_input=xtc,
+    top_waters=1,
+    atomgroup="OH2",
+    write_grid_as="TIP3P",
+    out_name="11426_dyn_151"
+)
 
 
 # # We can use the get_atom_features, which provides the same
 # # functionality but ignores orientations as atoms are considered spherically symmetric.
 
-# struc = "mor-data/11426_dyn_151.pdb"
+struc = "mor-data/11426_dyn_151.pdb"
 
-# xtc = "mor-data/11423_trj_151.xtc"
+xtc = "mor-data/11423_trj_151.xtc"
 
 # # Here we locate the sodium site which has the highest probability
 # # The density grid is written (write=True) using the default density conversion "Angstrom^{-3}" in MDAnalysis
 
-# atom_feat, atom_data = read_atom_features(
-#     structure_input=struc,
-#     xtc_input=xtc,
-#     top_atoms=1,
-#     atomgroup="SOD",
-#     element="Na",
-#     out_name="11426_dyn_151"
-# )
+atom_feat, atom_data = read_atom_features(
+    structure_input=struc,
+    xtc_input=xtc,
+    top_atoms=1,
+    atomgroup="SOD",
+    element="Na",
+    out_name="11426_dyn_151"
+)
 
 
 # If we have already obtained the grid, we can speed up featurization by reading it in.