Merge pull request #66 from drorlab/TutorialUpdates

Updated and tested all .rst tutorial files.
drorlab · Dec 6, 2024 · ca93519 · ca93519
2 parents 7ff7a71 + 890e3fd
commit ca93519
Show file tree

Hide file tree

Showing 6 changed files with 52 additions and 59 deletions.
diff --git a/docs/tut-2-preprocessing.rst b/docs/tut-2-preprocessing.rst
@@ -141,9 +141,17 @@ can use the ``_combined`` version of the extraction function:
 ``extract_coordinates_combined()``. It takes lists as arguments for the
 topology files, too. To use the same selection, “multiply” a list of one
 string, as demonstrated below. For this to work, the two selections need
-to have the exactly same atoms.
+to have the exactly same atoms, so we mak a new selection below removing the additional hydrogen in simulation B.
 
 .. code:: python
+    # # Residue numbers (same in both simulations)
+    resnums = "76:98 105:133 138:173 182:208 226:264 270:308 315:354"
+    # # Generate the selection strings
+    sel_string_a = "not name HD2 and protein and resnum "+resnums
+    print('Selection A:\n', sel_string_a, '\n')
+    sel_string_b = "not name HD2 and protein and resnum "+resnums
+    print('Selection B:\n', sel_string_b, '\n')
+
 
     all_refs = [ref_file_a]*3 + [ref_file_b]*3
     all_trjs = trj_file_a + trj_file_b
@@ -186,6 +194,8 @@ densities from a smaller selection.
 
 .. code:: python
 
+    from pensa.preprocessing import *
+
     # Base for the selection string for protein and all waters (OH2)
     sel_base_water = "protein or byres name OH2"
     # Names of the output files

diff --git a/docs/tut-3-featurization.rst b/docs/tut-3-featurization.rst
@@ -22,7 +22,7 @@ For example, we can read protein backbone torsions.
 
 .. code:: python
 
-    from pensa.features import read_protein_backbone_torsions
+    from pensa.features import *
 
     bbtors_feat, bbtors_data = read_protein_backbone_torsions(
         "traj/condition-a_receptor.gro", "traj/condition-a_receptor.xtc",
@@ -72,7 +72,7 @@ It was modeled after the feature loader in PyEMMA.
 
 .. code:: python
 
-    from pensa.features import read_structure_features
+    from pensa.features import *
 
 
 .. code:: python
@@ -147,17 +147,17 @@ by a water molecule, and what that water molecule's orientation (polarisation) i
 
 .. code:: python
 
-    from pensa.features import read_water_features
+    from pensa.features import *
 
 For the pdb visualisation, the trajectory needs to be fit to the first frame of the simulation
 so that the density and protein align with each other.
 
 Here we featurize the top 2 most probable water sites (top_waters = 2).
 Orientation of the waters (water_data - spherical coordinates [radians]) is a 
 timeseries distribution. When water is not present at the site, the orientation 
-is recorded as 10000.0 to represent an empty state. If write=True, we can 
-visualise the pocket occupancies on the reference structure in a pdb file with 
-pocket occupancy saved as b_factors. 
+is recorded as 10000.0 to represent an empty state. By specifying an name 
+to write data out with in the argument - out_name, we can visualise the pocket 
+occupancies on the reference structure in a pdb file with pocket occupancy saved as b_factors. 
 
 You must specify the water model for writing out the grid.
 options include:
@@ -169,13 +169,12 @@ water
 .. code:: python
     
     struc = "traj/condition-a_water.gro"
-    xtc = "traj/condition-a_water.gro"
+    xtc = "traj/condition-a_water_aligned.xtc"
     water_feat, water_data = read_water_features(
         structure_input = struc, 
         xtc_input = xtc,
         top_waters = 2,
         atomgroup = "OH2",
-        write = True,
         write_grid_as="TIP3P",
         out_name = "features/11426_dyn_151_water"
     )
@@ -206,7 +205,7 @@ but ignores orientations as atoms are considered spherically symmetric.
 
 .. code:: python
 
-    from pensa.features import read_atom_features
+    from pensa.features import *
 
 Here we locate the sodium site which has the highest probability. The density is 
 written (write=True) using the default density conversion "Angstrom^{-3}" in MDAnalysis.
@@ -218,10 +217,9 @@ written (write=True) using the default density conversion "Angstrom^{-3}" in MDA
     atom_feat, atom_data = read_atom_features(
         structure_input = struc,
         xtc_input = xtc,
-        top_atoms = 2,
+        top_atoms = 1,
         atomgroup = "SOD",
         element = "Na",
-        write = True,
         out_name = "features/11426_dyn_151_sodium"
     )
                                             
diff --git a/docs/tut-4-comparison.rst b/docs/tut-4-comparison.rst
@@ -8,12 +8,9 @@ First we import the necessary modules.
 
 .. code:: python
 
-    from pensa.comparison import \
-        relative_entropy_analysis, ssi_ensemble_analysis, \
-        residue_visualization, distances_visualization
-    from pensa.features import read_structure_features, \
-        sort_features, get_multivar_res
-    from pensa.statesinfo import get_discrete_states
+    from pensa.comparison import *
+    from pensa.features import *
+    from pensa.statesinfo import *
     import numpy as np
 
 

diff --git a/docs/tut-5-dimensionality.rst b/docs/tut-5-dimensionality.rst
@@ -19,14 +19,10 @@ First we import the necessary modules and functions.
 
 .. code:: python
     
-    from pensa.features import read_structure_features
-    from pensa.dimensionality import \
-        calculate_pca, get_components_pca, pca_eigenvalues_plot, \
-        compare_projections, sort_trajs_along_common_pc, sort_traj_along_pc
-    from pensa.comparison import pca_feature_correlation
-    from pensa.clusters import \
-        obtain_clusters, wss_over_number_of_clusters, write_cluster_traj, \
-        obtain_combined_clusters, wss_over_number_of_combined_clusters
+    from pensa.features import *
+    from pensa.dimensionality import *
+    from pensa.comparison import *
+    from pensa.clusters import *
     import numpy as np
 
 

diff --git a/docs/tut-6-ssi.rst b/docs/tut-6-ssi.rst
@@ -20,17 +20,9 @@ Features and States
 
     import os
     import numpy as np
-    from pensa.features import \
-        read_structure_features, \
-        read_water_features, \
-        get_multivar_res, \
-        sort_features
-    from pensa.statesinfo import \
-        get_discrete_states
-    from pensa.comparison import \
-        ssi_ensemble_analysis, \
-        ssi_feature_analysis, \
-        cossi_featens_analysis
+    from pensa.features import *
+    from pensa.statesinfo import *
+    from pensa.comparison import *
 
 First, load the structural features as described in the previous tutorial:
 
@@ -75,7 +67,7 @@ the preprocessing tutorial.
 
 .. code:: python
 
-    grid = "traj/water_grid_ab_OH2_density.dx"
+    grid = "ab_grid_OH2_density.dx"
     water_feat_a, water_data_a = read_water_features(
         "traj/condition-a_water.gro", "traj/condition-a_water_aligned.xtc",
         top_waters = 5, atomgroup = "OH2", grid_input = grid

diff --git a/scripts/density_featurizer.py b/scripts/density_featurizer.py
@@ -20,38 +20,38 @@
  - TIP4P
  - water
 
- """
+  """
 
-# struc = "mor-data/11426_dyn_151.pdb"
-# xtc = "mor-data/11423_trj_151.xtc"
-# water_feat, water_data = read_water_features(
-#     structure_input=struc,
-#     xtc_input=xtc,
-#     top_waters=1,
-#     atomgroup="OH2",
-#     write_grid_as="TIP3P",
-#     out_name="11426_dyn_151"
-# )
+struc = "mor-data/11426_dyn_151.pdb"
+xtc = "mor-data/11423_trj_151.xtc"
+water_feat, water_data = read_water_features(
+    structure_input=struc,
+    xtc_input=xtc,
+    top_waters=1,
+    atomgroup="OH2",
+    write_grid_as="TIP3P",
+    out_name="11426_dyn_151"
+)
 
 
 # # We can use the get_atom_features, which provides the same
 # # functionality but ignores orientations as atoms are considered spherically symmetric.
 
-# struc = "mor-data/11426_dyn_151.pdb"
+struc = "mor-data/11426_dyn_151.pdb"
 
-# xtc = "mor-data/11423_trj_151.xtc"
+xtc = "mor-data/11423_trj_151.xtc"
 
 # # Here we locate the sodium site which has the highest probability
 # # The density grid is written (write=True) using the default density conversion "Angstrom^{-3}" in MDAnalysis
 
-# atom_feat, atom_data = read_atom_features(
-#     structure_input=struc,
-#     xtc_input=xtc,
-#     top_atoms=1,
-#     atomgroup="SOD",
-#     element="Na",
-#     out_name="11426_dyn_151"
-# )
+atom_feat, atom_data = read_atom_features(
+    structure_input=struc,
+    xtc_input=xtc,
+    top_atoms=1,
+    atomgroup="SOD",
+    element="Na",
+    out_name="11426_dyn_151"
+)
 
 
 # If we have already obtained the grid, we can speed up featurization by reading it in.