Skip to content

Commit

Permalink
Add ppi embedding api
Browse files Browse the repository at this point in the history
  • Loading branch information
anton-bushuiev committed Dec 28, 2024
1 parent 0b49195 commit 5d23afa
Show file tree
Hide file tree
Showing 4 changed files with 255 additions and 122 deletions.
27 changes: 26 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,18 @@ pip install -U torch --index-url https://download.pytorch.org/whl/rocm6.0
```python
import torch
from ppiformer.tasks.node import DDGPPIformer
from ppiformer.utils.api import download_from_zenodo, predict_ddg
from ppiformer.utils.api import download_from_zenodo, predict_ddg, embed
from ppiformer.definitions import PPIFORMER_WEIGHTS_DIR, PPIFORMER_TEST_DATA_DIR

# Download the weights
download_from_zenodo('weights.zip')
```

### Predict ddG for a PPI upon mutation

PPIformer was fine-tuned on the SKEMPI v2.0 dataset via log odds. The fine-tuned models can be used to predict the binding energy changes (ddG) for a PPI upon mutation.

```python
# Load the ensamble of fine-tuned models
device = 'cuda' if torch.cuda.is_available() else 'cpu'
models = [DDGPPIformer.load_from_checkpoint(PPIFORMER_WEIGHTS_DIR / f'ddg_regression/{i}.ckpt', map_location=torch.device('cpu')).eval() for i in range(3)]
Expand All @@ -72,6 +78,25 @@ ddg
> tensor([-0.3708, 1.5188, 1.1482])
```

### Embed PPI

PPIformer was pre-trained using structural masked modeling. The pre-trained model can be used to obtain PPI embeddings, similar to [BERT embeddings](https://arxiv.org/abs/1810.04805) in natural language processing.

```python
# Load the pre-trained model
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = PPIformer.load_from_checkpoint(PPIFORMER_WEIGHTS_DIR / 'masked_modeling.ckpt', map_location=torch.device('cpu'))
model = model.to(device).eval()

# Specify input
ppi_path = PPIFORMER_TEST_DATA_DIR / '1bui_A_C.pdb' # PDB or PPIRef file (see https://ppiref.readthedocs.io/en/latest/extracting_ppis.html)

# Embed (get the final type-0 features). Here, 128-dimensional embedding for each of 124 amino acids in the PPI
embedding = embed(model, ppi_path)
embedding.shape
> torch.Size([124, 128])
```

## Training and testing

To train and validate PPIformer, please see `PPIformer/scripts/README.md`. To test the model and reproduce the results from the paper, please see `PPIformer/notebooks/test.ipynb`.
Expand Down
119 changes: 0 additions & 119 deletions notebooks/demo_ddg_minimal.ipynb

This file was deleted.

162 changes: 162 additions & 0 deletions notebooks/demo_readme.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The autoreload extension is already loaded. To reload it, use:\n",
" %reload_ext autoreload\n"
]
}
],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"import torch\n",
"from ppiformer.tasks.node import DDGPPIformer\n",
"from ppiformer.model.ppiformer import PPIformer\n",
"from ppiformer.utils.api import download_from_zenodo, predict_ddg, embed\n",
"from ppiformer.definitions import PPIFORMER_WEIGHTS_DIR, PPIFORMER_TEST_DATA_DIR"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Downloading to /Users/anton/dev/PPIformer/weights: 100%|██████████| 535M/535M [00:30<00:00, 17.5MiB/s] \n",
"Extracting: 100%|██████████| 5/5 [00:03<00:00, 1.54files/s]\n"
]
}
],
"source": [
"# Download the weights\n",
"download_from_zenodo('weights.zip')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Process 29715 preparing data: 100%|██████████| 1/1 [00:00<00:00, 6.79it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 PPIs loaded: PPIInMemoryDataset(, n_muts=3)\n"
]
},
{
"data": {
"text/plain": [
"tensor([-0.3708, 1.5188, 1.1482])"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Load the ensamble of fine-tuned models\n",
"device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
"models = [DDGPPIformer.load_from_checkpoint(PPIFORMER_WEIGHTS_DIR / f'ddg_regression/{i}.ckpt', map_location=torch.device('cpu')).eval() for i in range(3)]\n",
"models = [model.to(device) for model in models]\n",
"\n",
"# Specify input\n",
"ppi_path = PPIFORMER_TEST_DATA_DIR / '1bui_A_C.pdb' # PDB or PPIRef file (see https://ppiref.readthedocs.io/en/latest/extracting_ppis.html)\n",
"muts = ['SC16A', 'FC47A', 'SC16A,FC47A'] # List of single- or multi-point mutations\n",
"\n",
"# Predict\n",
"ddg = predict_ddg(models, ppi_path, muts)\n",
"ddg"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/anton/miniconda3/envs/ppiformer/lib/python3.10/site-packages/pytorch_lightning/utilities/parsing.py:197: UserWarning: Attribute 'encoder' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['encoder'])`.\n",
" rank_zero_warn(\n",
"/Users/anton/miniconda3/envs/ppiformer/lib/python3.10/site-packages/pytorch_lightning/utilities/parsing.py:197: UserWarning: Attribute 'classifier' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['classifier'])`.\n",
" rank_zero_warn(\n",
"Process 29715 preparing data: 100%|██████████| 1/1 [00:00<00:00, 7.44it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 PPIs loaded: PPIInMemoryDataset()\n"
]
},
{
"data": {
"text/plain": [
"torch.Size([124, 128])"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Load the pre-trained model\n",
"device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
"model = PPIformer.load_from_checkpoint(PPIFORMER_WEIGHTS_DIR / 'masked_modeling.ckpt', map_location=torch.device('cpu'))\n",
"model = model.to(device).eval()\n",
"\n",
"# Specify input\n",
"ppi_path = PPIFORMER_TEST_DATA_DIR / '1bui_A_C.pdb' # PDB or PPIRef file (see https://ppiref.readthedocs.io/en/latest/extracting_ppis.html)\n",
"\n",
"# Embed (get the final type-0 features)\n",
"h = embed(model, ppi_path)\n",
"h.shape\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "ppiformer",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 5d23afa

Please sign in to comment.