From 890e3fdfe1620f8af757090ec0e07d20c69fb989 Mon Sep 17 00:00:00 2001 From: NJ-Thomson Date: Wed, 4 Dec 2024 20:37:17 +0000 Subject: [PATCH] Updated and tested all .rst tutorial files. --- docs/tut-2-preprocessing.rst | 12 +++++++++- docs/tut-3-featurization.rst | 20 ++++++++--------- docs/tut-4-comparison.rst | 9 +++----- docs/tut-5-dimensionality.rst | 12 ++++------ docs/tut-6-ssi.rst | 16 ++++--------- scripts/density_featurizer.py | 42 +++++++++++++++++------------------ 6 files changed, 52 insertions(+), 59 deletions(-) diff --git a/docs/tut-2-preprocessing.rst b/docs/tut-2-preprocessing.rst index e7f7fbd..30c7dde 100644 --- a/docs/tut-2-preprocessing.rst +++ b/docs/tut-2-preprocessing.rst @@ -141,9 +141,17 @@ can use the ``_combined`` version of the extraction function: ``extract_coordinates_combined()``. It takes lists as arguments for the topology files, too. To use the same selection, “multiply” a list of one string, as demonstrated below. For this to work, the two selections need -to have the exactly same atoms. +to have the exactly same atoms, so we mak a new selection below removing the additional hydrogen in simulation B. .. code:: python + # # Residue numbers (same in both simulations) + resnums = "76:98 105:133 138:173 182:208 226:264 270:308 315:354" + # # Generate the selection strings + sel_string_a = "not name HD2 and protein and resnum "+resnums + print('Selection A:\n', sel_string_a, '\n') + sel_string_b = "not name HD2 and protein and resnum "+resnums + print('Selection B:\n', sel_string_b, '\n') + all_refs = [ref_file_a]*3 + [ref_file_b]*3 all_trjs = trj_file_a + trj_file_b @@ -186,6 +194,8 @@ densities from a smaller selection. .. code:: python + from pensa.preprocessing import * + # Base for the selection string for protein and all waters (OH2) sel_base_water = "protein or byres name OH2" # Names of the output files diff --git a/docs/tut-3-featurization.rst b/docs/tut-3-featurization.rst index 0c47feb..4b42489 100644 --- a/docs/tut-3-featurization.rst +++ b/docs/tut-3-featurization.rst @@ -22,7 +22,7 @@ For example, we can read protein backbone torsions. .. code:: python - from pensa.features import read_protein_backbone_torsions + from pensa.features import * bbtors_feat, bbtors_data = read_protein_backbone_torsions( "traj/condition-a_receptor.gro", "traj/condition-a_receptor.xtc", @@ -72,7 +72,7 @@ It was modeled after the feature loader in PyEMMA. .. code:: python - from pensa.features import read_structure_features + from pensa.features import * .. code:: python @@ -147,7 +147,7 @@ by a water molecule, and what that water molecule's orientation (polarisation) i .. code:: python - from pensa.features import read_water_features + from pensa.features import * For the pdb visualisation, the trajectory needs to be fit to the first frame of the simulation so that the density and protein align with each other. @@ -155,9 +155,9 @@ so that the density and protein align with each other. Here we featurize the top 2 most probable water sites (top_waters = 2). Orientation of the waters (water_data - spherical coordinates [radians]) is a timeseries distribution. When water is not present at the site, the orientation -is recorded as 10000.0 to represent an empty state. If write=True, we can -visualise the pocket occupancies on the reference structure in a pdb file with -pocket occupancy saved as b_factors. +is recorded as 10000.0 to represent an empty state. By specifying an name +to write data out with in the argument - out_name, we can visualise the pocket +occupancies on the reference structure in a pdb file with pocket occupancy saved as b_factors. You must specify the water model for writing out the grid. options include: @@ -169,13 +169,12 @@ water .. code:: python struc = "traj/condition-a_water.gro" - xtc = "traj/condition-a_water.gro" + xtc = "traj/condition-a_water_aligned.xtc" water_feat, water_data = read_water_features( structure_input = struc, xtc_input = xtc, top_waters = 2, atomgroup = "OH2", - write = True, write_grid_as="TIP3P", out_name = "features/11426_dyn_151_water" ) @@ -206,7 +205,7 @@ but ignores orientations as atoms are considered spherically symmetric. .. code:: python - from pensa.features import read_atom_features + from pensa.features import * Here we locate the sodium site which has the highest probability. The density is written (write=True) using the default density conversion "Angstrom^{-3}" in MDAnalysis. @@ -218,10 +217,9 @@ written (write=True) using the default density conversion "Angstrom^{-3}" in MDA atom_feat, atom_data = read_atom_features( structure_input = struc, xtc_input = xtc, - top_atoms = 2, + top_atoms = 1, atomgroup = "SOD", element = "Na", - write = True, out_name = "features/11426_dyn_151_sodium" ) diff --git a/docs/tut-4-comparison.rst b/docs/tut-4-comparison.rst index da789ef..417d084 100644 --- a/docs/tut-4-comparison.rst +++ b/docs/tut-4-comparison.rst @@ -8,12 +8,9 @@ First we import the necessary modules. .. code:: python - from pensa.comparison import \ - relative_entropy_analysis, ssi_ensemble_analysis, \ - residue_visualization, distances_visualization - from pensa.features import read_structure_features, \ - sort_features, get_multivar_res - from pensa.statesinfo import get_discrete_states + from pensa.comparison import * + from pensa.features import * + from pensa.statesinfo import * import numpy as np diff --git a/docs/tut-5-dimensionality.rst b/docs/tut-5-dimensionality.rst index 9333d9c..94319ec 100644 --- a/docs/tut-5-dimensionality.rst +++ b/docs/tut-5-dimensionality.rst @@ -19,14 +19,10 @@ First we import the necessary modules and functions. .. code:: python - from pensa.features import read_structure_features - from pensa.dimensionality import \ - calculate_pca, get_components_pca, pca_eigenvalues_plot, \ - compare_projections, sort_trajs_along_common_pc, sort_traj_along_pc - from pensa.comparison import pca_feature_correlation - from pensa.clusters import \ - obtain_clusters, wss_over_number_of_clusters, write_cluster_traj, \ - obtain_combined_clusters, wss_over_number_of_combined_clusters + from pensa.features import * + from pensa.dimensionality import * + from pensa.comparison import * + from pensa.clusters import * import numpy as np diff --git a/docs/tut-6-ssi.rst b/docs/tut-6-ssi.rst index de0c732..c4e58bb 100644 --- a/docs/tut-6-ssi.rst +++ b/docs/tut-6-ssi.rst @@ -20,17 +20,9 @@ Features and States import os import numpy as np - from pensa.features import \ - read_structure_features, \ - read_water_features, \ - get_multivar_res, \ - sort_features - from pensa.statesinfo import \ - get_discrete_states - from pensa.comparison import \ - ssi_ensemble_analysis, \ - ssi_feature_analysis, \ - cossi_featens_analysis + from pensa.features import * + from pensa.statesinfo import * + from pensa.comparison import * First, load the structural features as described in the previous tutorial: @@ -75,7 +67,7 @@ the preprocessing tutorial. .. code:: python - grid = "traj/water_grid_ab_OH2_density.dx" + grid = "ab_grid_OH2_density.dx" water_feat_a, water_data_a = read_water_features( "traj/condition-a_water.gro", "traj/condition-a_water_aligned.xtc", top_waters = 5, atomgroup = "OH2", grid_input = grid diff --git a/scripts/density_featurizer.py b/scripts/density_featurizer.py index 64fc185..e86f051 100755 --- a/scripts/density_featurizer.py +++ b/scripts/density_featurizer.py @@ -20,38 +20,38 @@ - TIP4P - water - """ + """ -# struc = "mor-data/11426_dyn_151.pdb" -# xtc = "mor-data/11423_trj_151.xtc" -# water_feat, water_data = read_water_features( -# structure_input=struc, -# xtc_input=xtc, -# top_waters=1, -# atomgroup="OH2", -# write_grid_as="TIP3P", -# out_name="11426_dyn_151" -# ) +struc = "mor-data/11426_dyn_151.pdb" +xtc = "mor-data/11423_trj_151.xtc" +water_feat, water_data = read_water_features( + structure_input=struc, + xtc_input=xtc, + top_waters=1, + atomgroup="OH2", + write_grid_as="TIP3P", + out_name="11426_dyn_151" +) # # We can use the get_atom_features, which provides the same # # functionality but ignores orientations as atoms are considered spherically symmetric. -# struc = "mor-data/11426_dyn_151.pdb" +struc = "mor-data/11426_dyn_151.pdb" -# xtc = "mor-data/11423_trj_151.xtc" +xtc = "mor-data/11423_trj_151.xtc" # # Here we locate the sodium site which has the highest probability # # The density grid is written (write=True) using the default density conversion "Angstrom^{-3}" in MDAnalysis -# atom_feat, atom_data = read_atom_features( -# structure_input=struc, -# xtc_input=xtc, -# top_atoms=1, -# atomgroup="SOD", -# element="Na", -# out_name="11426_dyn_151" -# ) +atom_feat, atom_data = read_atom_features( + structure_input=struc, + xtc_input=xtc, + top_atoms=1, + atomgroup="SOD", + element="Na", + out_name="11426_dyn_151" +) # If we have already obtained the grid, we can speed up featurization by reading it in.