Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Openvino #228

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Bonito

[![PyPI version](https://badge.fury.io/py/ont-bonito.svg)](https://badge.fury.io/py/ont-bonito)
[![PyPI version](https://badge.fury.io/py/ont-bonito.svg)](https://badge.fury.io/py/ont-bonito)
[![py36](https://img.shields.io/badge/python-3.6-brightgreen.svg)](https://img.shields.io/badge/python-3.6-brightgreen.svg)
[![py37](https://img.shields.io/badge/python-3.7-brightgreen.svg)](https://img.shields.io/badge/python-3.7-brightgreen.svg)
[![py38](https://img.shields.io/badge/python-3.8-brightgreen.svg)](https://img.shields.io/badge/python-3.8-brightgreen.svg)
Expand Down Expand Up @@ -36,6 +36,12 @@ The default `ont-bonito` package is built against CUDA 10.2 however CUDA 11.1 an
$ pip install -f https://download.pytorch.org/whl/torch_stable.html ont-bonito-cuda111
```

To optimize inference on CPU with Intel OpenVINO use `--use_openvino` flag:

```bash
$ bonito basecaller dna_r9.4.1 --reference reference.mmi --use_openvino --device=cpu /data/reads > basecalls.sam
```

## Modified Bases

Modified base calling is handled by [Remora](https://github.com/nanoporetech/remora).
Expand All @@ -55,7 +61,7 @@ $ bonito basecaller dna_r9.4.1 --save-ctc --reference reference.mmi /data/reads
$ bonito train --directory /data/training/ctc-data /data/training/model-dir
```

In addition to training a new model from scratch you can also easily fine tune one of the pretrained models.
In addition to training a new model from scratch you can also easily fine tune one of the pretrained models.

```bash
bonito train --epochs 1 --lr 5e-4 --pretrained [email protected] --directory /data/training/ctc-data /data/training/fine-tuned-model
Expand All @@ -68,7 +74,7 @@ $ bonito download --training
$ bonito train /data/training/model-dir
```

All training calls use Automatic Mixed Precision to speed up training. To disable this, set the `--no-amp` flag to True.
All training calls use Automatic Mixed Precision to speed up training. To disable this, set the `--no-amp` flag to True.

## Developer Quickstart

Expand All @@ -82,6 +88,11 @@ $ source venv3/bin/activate
(venv3) $ python setup.py develop
```

To build with OpenVINO backend:
```bash
(venv3) $ pip install develop .[openvino]
```

## Interface

- `bonito view` - view a model architecture for a given `.toml` file and the number of parameters in the network.
Expand Down
8 changes: 7 additions & 1 deletion bonito/cli/basecaller.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from itertools import islice as take
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter

import bonito.openvino.basecall
from bonito.aligner import align_map, Aligner
from bonito.io import CTCWriter, Writer, biofmt
from bonito.mod_util import call_mods, load_mods_model
Expand Down Expand Up @@ -40,6 +41,7 @@ def main(args):
batchsize=args.batchsize,
quantize=args.quantize,
use_koi=True,
use_openvino=args.use_openvino,
)
except FileNotFoundError:
sys.stderr.write(f"> error: failed to load {args.model_directory}\n")
Expand All @@ -50,7 +52,10 @@ def main(args):
if args.verbose:
sys.stderr.write(f"> model basecaller params: {model.config['basecaller']}\n")

basecall = load_symbol(args.model_directory, "basecall")
if args.use_openvino:
basecall = bonito.openvino.basecall.basecall
else:
basecall = load_symbol(args.model_directory, "basecall")

mods_model = None
if args.modified_base_model is not None or args.modified_bases is not None:
Expand Down Expand Up @@ -173,4 +178,5 @@ def argparser():
parser.add_argument("--max-reads", default=0, type=int)
parser.add_argument("--alignment-threads", default=8, type=int)
parser.add_argument('-v', '--verbose', action='count', default=0)
parser.add_argument("--use_openvino", action="store_true", default=False)
return parser
3 changes: 2 additions & 1 deletion bonito/cli/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def main(args):
seqs = []

print("* loading model", w)
model = load_model(args.model_directory, args.device, weights=w)
model = load_model(args.model_directory, args.device, weights=w, use_openvino=args.use_openvino)

print("* calling")
t0 = time.perf_counter()
Expand Down Expand Up @@ -109,4 +109,5 @@ def argparser():
parser.add_argument("--beamsize", default=5, type=int)
parser.add_argument("--poa", action="store_true", default=False)
parser.add_argument("--min-coverage", default=0.5, type=float)
parser.add_argument("--use_openvino", action="store_true", default=False)
return parser
5 changes: 3 additions & 2 deletions bonito/crf/basecall.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,13 @@ def compute_scores(model, batch, beam_width=32, beam_cut=100.0, scale=1.0, offse
"""
with torch.inference_mode():
device = next(model.parameters()).device
dtype = torch.float16 if half_supported() else torch.float32
dtype = torch.float16 if device != torch.device('cpu') and half_supported() else torch.float32
scores = model(batch.to(dtype).to(device))
if reverse:
scores = model.seqdist.reverse_complement(scores)
# beam_search expects scores in FP16 precision
sequence, qstring, moves = beam_search(
scores, beam_width=beam_width, beam_cut=beam_cut,
scores.to(torch.float16), beam_width=beam_width, beam_cut=beam_cut,
scale=scale, offset=offset, blank_score=blank_score
)
return {
Expand Down
1 change: 1 addition & 0 deletions bonito/crf/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ def decode(self, x):
def loss(self, scores, targets, target_lengths, **kwargs):
return self.seqdist.ctc_loss(scores.to(torch.float32), targets, target_lengths, **kwargs)


class Model(SeqdistModel):
iiSeymour marked this conversation as resolved.
Show resolved Hide resolved

def __init__(self, config):
Expand Down
3 changes: 2 additions & 1 deletion bonito/ctc/basecall.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ def compute_scores(model, batch):
"""
with torch.no_grad():
device = next(model.parameters()).device
chunks = batch.to(torch.half).to(device)
chunks = batch.to(torch.half) if device != torch.device('cpu') and half_supported() else batch
chunks = chunks.to(device)
probs = permute(model(chunks), 'TNC', 'NTC')
return probs.cpu().to(torch.float32)

Expand Down
2 changes: 1 addition & 1 deletion bonito/nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def forward(self, x):
if self.blank_score is not None and self.expand_blanks:
T, N, C = scores.shape
scores = torch.nn.functional.pad(
scores.view(T, N, C // self.n_base, self.n_base),
scores.view(T, N, -1, self.n_base),
(1, 0, 0, 0, 0, 0, 0, 0),
value=self.blank_score
).view(T, N, -1)
Expand Down
47 changes: 47 additions & 0 deletions bonito/openvino/basecall.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import torch
from crf_beam import beam_search
from bonito.crf.basecall import stitch_results
from bonito.multiprocessing import thread_iter, thread_map
from bonito.util import chunk, stitch, batchify, unbatchify


def compute_scores(model, batch):
scores = model(batch)
fwd = model.seqdist.forward_scores(scores)
bwd = model.seqdist.backward_scores(scores)
posts = torch.softmax(fwd + bwd, dim=-1)
return {
'scores': scores.transpose(0, 1),
'bwd': bwd.transpose(0, 1),
'posts': posts.transpose(0, 1),
}


def decode(x, beam_width=32, beam_cut=100.0, scale=1.0, offset=0.0, blank_score=2.0):
sequence, qstring, moves = beam_search(x['scores'], x['bwd'], x['posts'])
return {
'sequence': sequence,
'qstring': qstring,
'moves': moves,
}


def basecall(model, reads, chunksize=4000, overlap=100, batchsize=32, reverse=False):

chunks = thread_iter(
((read, 0, len(read.signal)), chunk(torch.from_numpy(read.signal), chunksize, overlap))
for read in reads
)

batches = thread_iter(batchify(chunks, batchsize=batchsize))

scores = thread_iter(
(read, compute_scores(model, batch)) for read, batch in batches
)

results = thread_iter(
(read, stitch_results(scores, end - start, chunksize, overlap, model.stride))
for ((read, start, end), scores) in unbatchify(scores)
)

return thread_map(decode, results, n_thread=48)
Loading