Add ppi embedding api

anton-bushuiev · Dec 28, 2024 · 5d23afa · 5d23afa
1 parent 0b49195
commit 5d23afa
Show file tree

Hide file tree

Showing 4 changed files with 255 additions and 122 deletions.
diff --git a/README.md b/README.md
@@ -51,12 +51,18 @@ pip install -U torch --index-url https://download.pytorch.org/whl/rocm6.0
 ```python
 import torch
 from ppiformer.tasks.node import DDGPPIformer
-from ppiformer.utils.api import download_from_zenodo, predict_ddg
+from ppiformer.utils.api import download_from_zenodo, predict_ddg, embed
 from ppiformer.definitions import PPIFORMER_WEIGHTS_DIR, PPIFORMER_TEST_DATA_DIR
 
 # Download the weights
 download_from_zenodo('weights.zip')
+```
+
+### Predict ddG for a PPI upon mutation
 
+PPIformer was fine-tuned on the SKEMPI v2.0 dataset via log odds. The fine-tuned models can be used to predict the binding energy changes (ddG) for a PPI upon mutation.
+
+```python
 # Load the ensamble of fine-tuned models
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 models = [DDGPPIformer.load_from_checkpoint(PPIFORMER_WEIGHTS_DIR / f'ddg_regression/{i}.ckpt', map_location=torch.device('cpu')).eval() for i in range(3)]
@@ -72,6 +78,25 @@ ddg
 > tensor([-0.3708,  1.5188,  1.1482])
 ```
 
+### Embed PPI
+
+PPIformer was pre-trained using structural masked modeling. The pre-trained model can be used to obtain PPI embeddings, similar to [BERT embeddings](https://arxiv.org/abs/1810.04805) in natural language processing.
+
+```python
+# Load the pre-trained model
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+model = PPIformer.load_from_checkpoint(PPIFORMER_WEIGHTS_DIR / 'masked_modeling.ckpt', map_location=torch.device('cpu'))
+model = model.to(device).eval()
+
+# Specify input
+ppi_path = PPIFORMER_TEST_DATA_DIR / '1bui_A_C.pdb'  # PDB or PPIRef file (see https://ppiref.readthedocs.io/en/latest/extracting_ppis.html)
+
+# Embed (get the final type-0 features). Here, 128-dimensional embedding for each of 124 amino acids in the PPI
+embedding = embed(model, ppi_path)
+embedding.shape
+> torch.Size([124, 128])
+```
+
 ## Training and testing
 
 To train and validate PPIformer, please see `PPIformer/scripts/README.md`. To test the model and reproduce the results from the paper, please see `PPIformer/notebooks/test.ipynb`.

diff --git a/notebooks/demo_ddg_minimal.ipynb b/notebooks/demo_ddg_minimal.ipynb
diff --git a/notebooks/demo_readme.ipynb b/notebooks/demo_readme.ipynb
@@ -0,0 +1,162 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n"
+     ]
+    }
+   ],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "import torch\n",
+    "from ppiformer.tasks.node import DDGPPIformer\n",
+    "from ppiformer.model.ppiformer import PPIformer\n",
+    "from ppiformer.utils.api import download_from_zenodo, predict_ddg, embed\n",
+    "from ppiformer.definitions import PPIFORMER_WEIGHTS_DIR, PPIFORMER_TEST_DATA_DIR"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Downloading to /Users/anton/dev/PPIformer/weights: 100%|██████████| 535M/535M [00:30<00:00, 17.5MiB/s] \n",
+      "Extracting: 100%|██████████| 5/5 [00:03<00:00,  1.54files/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Download the weights\n",
+    "download_from_zenodo('weights.zip')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Process 29715 preparing data: 100%|██████████| 1/1 [00:00<00:00,  6.79it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 PPIs loaded: PPIInMemoryDataset(, n_muts=3)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "tensor([-0.3708,  1.5188,  1.1482])"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Load the ensamble of fine-tuned models\n",
+    "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
+    "models = [DDGPPIformer.load_from_checkpoint(PPIFORMER_WEIGHTS_DIR / f'ddg_regression/{i}.ckpt', map_location=torch.device('cpu')).eval() for i in range(3)]\n",
+    "models = [model.to(device) for model in models]\n",
+    "\n",
+    "# Specify input\n",
+    "ppi_path = PPIFORMER_TEST_DATA_DIR / '1bui_A_C.pdb'  # PDB or PPIRef file (see https://ppiref.readthedocs.io/en/latest/extracting_ppis.html)\n",
+    "muts = ['SC16A', 'FC47A', 'SC16A,FC47A']  # List of single- or multi-point mutations\n",
+    "\n",
+    "# Predict\n",
+    "ddg = predict_ddg(models, ppi_path, muts)\n",
+    "ddg"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/anton/miniconda3/envs/ppiformer/lib/python3.10/site-packages/pytorch_lightning/utilities/parsing.py:197: UserWarning: Attribute 'encoder' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['encoder'])`.\n",
+      "  rank_zero_warn(\n",
+      "/Users/anton/miniconda3/envs/ppiformer/lib/python3.10/site-packages/pytorch_lightning/utilities/parsing.py:197: UserWarning: Attribute 'classifier' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['classifier'])`.\n",
+      "  rank_zero_warn(\n",
+      "Process 29715 preparing data: 100%|██████████| 1/1 [00:00<00:00,  7.44it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 PPIs loaded: PPIInMemoryDataset()\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "torch.Size([124, 128])"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Load the pre-trained model\n",
+    "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
+    "model = PPIformer.load_from_checkpoint(PPIFORMER_WEIGHTS_DIR / 'masked_modeling.ckpt', map_location=torch.device('cpu'))\n",
+    "model = model.to(device).eval()\n",
+    "\n",
+    "# Specify input\n",
+    "ppi_path = PPIFORMER_TEST_DATA_DIR / '1bui_A_C.pdb'  # PDB or PPIRef file (see https://ppiref.readthedocs.io/en/latest/extracting_ppis.html)\n",
+    "\n",
+    "# Embed (get the final type-0 features)\n",
+    "h = embed(model, ppi_path)\n",
+    "h.shape\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ppiformer",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}