Skip to content

Preetha/optimize bin file #618

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 5 commits into
base: ovep-develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 58 additions & 2 deletions onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,20 @@ BackendManager::BackendManager(SessionContext& session_context,
ptr_stream_t model_stream;
std::unique_ptr<onnx::ModelProto> model_proto;
if (subgraph_context_.is_ep_ctx_graph) {
model_stream = ep_ctx_handle_.GetModelBlobStream(session_context_.so_context_file_path, subgraph);
std::string filename;
if (!session_context_.so_context_file_path.empty()) {
filename = session_context_.so_context_file_path.filename().string();
} else if (!session_context_.onnx_model_path_name.empty()) {
filename = session_context_.onnx_model_path_name.filename().string();
} else {
ORT_THROW("Either Session_options ep.context_file_path or model path must be specified");
}
std::string model_name = onnxruntime::openvino_ep::BackendManager::stripAfterFirstDot(filename);
auto subgraph_name = model_name + "_" + subgraph_context_.subgraph_name;
model_stream = ep_ctx_handle_.GetModelBlobStream(shared_context_,
session_context_.so_context_file_path,
subgraph_name,
subgraph);
} else {
model_proto = GetModelProtoFromFusedNode(fused_node, subgraph, logger);
}
Expand All @@ -98,6 +111,8 @@ BackendManager::BackendManager(SessionContext& session_context,
sw.mapped_weights = std::make_unique<SharedContext::SharedWeights::WeightsFile>(weight_filename);
}
backend_utils::CreateOVTensors(session_context_.device_type, sw.metadata, *sw.mapped_weights);
} else {
ORT_THROW(" External weight file is not found ");
}
}

Expand Down Expand Up @@ -198,6 +213,19 @@ BackendManager::BackendManager(SessionContext& session_context,
}
}

std::string BackendManager::stripAfterFirstDot(std::string filename) {
size_t dotPos = filename.find('.'); // Find first dot
size_t ctxPos = filename.find("_ctx"); // Find first dot
if (dotPos == std::string::npos && ctxPos == std::string::npos) {
return filename; // No dot found, return full filename
}
if (dotPos != std::string::npos)
filename = filename.substr(0, dotPos); // strip everything after first dot
if (ctxPos != std::string::npos)
filename = filename.substr(0, ctxPos); // strip everything after _ctx
return filename;
}

// Call EPContext model exporter here if the provider option for exporting
// precompiled blob is set. If that's the case:
// By default, create model in embed mode where the blob stream is exported as data within
Expand All @@ -215,7 +243,35 @@ Status BackendManager::ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphVie
// If not embed_mode, dump the blob here and only pass on the path to the blob
std::string model_blob_str;
auto compiled_model = concrete_backend_->GetOVCompiledModel();
if (session_context_.so_context_embed_mode) { // Internal blob
if (session_context_.so_share_ep_contexts) {
std::ostringstream model_blob_stream;
compiled_model.export_model(model_blob_stream);

auto& subgraph_metadata = shared_context_.shared_weights.subgraph_metadata;
std::string filename = "";
if (!session_context_.so_context_file_path.empty()) {
filename = session_context_.so_context_file_path.filename().string();
} else if (!session_context_.onnx_model_path_name.empty()) {
filename = session_context_.onnx_model_path_name.filename().string();
} else {
ORT_THROW("Either Session_options ep.context_file_path or model path must be specified");
}
std::string model_name = onnxruntime::openvino_ep::BackendManager::stripAfterFirstDot(filename);
auto subgraph_name = model_name + "_" + subgraph_context_.subgraph_name;
sw::SubgraphMetadata::Map::key_type key{subgraph_name};
sw::SubgraphMetadata::Map::mapped_type value{};

auto& bin_file = shared_context_.shared_weights.shared_bin_file.bin_file_;
if (!subgraph_metadata.contains(key) && bin_file.is_open()) {
value.epctx_offset = static_cast<uint64_t>(bin_file.tellp());
bin_file << model_blob_stream.str();
value.epctx_length = static_cast<size_t>(static_cast<uint64_t>(bin_file.tellp()) - value.epctx_offset);
subgraph_metadata.emplace(key, std::move(value));
}

model_blob_str = shared_context_.shared_weights.shared_bin_file.shared_bin_filename.filename().string();
} else if (session_context_.so_context_embed_mode) {
// Internal blob
std::ostringstream model_blob_stream;
compiled_model.export_model(model_blob_stream);
model_blob_str = std::move(model_blob_stream).str();
Expand Down
2 changes: 2 additions & 0 deletions onnxruntime/core/providers/openvino/backend_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ class BackendManager {
ReWriteInputShapeInfo(const ONNX_NAMESPACE::ModelProto& model_proto,
const std::vector<std::vector<int64_t>>& input_shapes);

std::string stripAfterFirstDot(std::string filename);

std::unique_ptr<ONNX_NAMESPACE::ModelProto> model_proto_;
std::shared_ptr<IBackend> concrete_backend_;
std::map<std::string, std::shared_ptr<IBackend>> backend_map_;
Expand Down
104 changes: 3 additions & 101 deletions onnxruntime/core/providers/openvino/backend_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,105 +19,6 @@ using Exception = ov::Exception;

namespace onnxruntime {
namespace openvino_ep {

SharedContext::SharedWeights::WeightsFile::WeightsFile(std::filesystem::path filename) : file_(filename, std::ios::in | std::ios::binary) {
try {
file_.exceptions(std::ifstream::failbit | std::ifstream::badbit);
weights_size_ = file_.seekg(0, std::ios::end).tellg();
} catch (std::ifstream::failure& e) {
ORT_THROW("Error: Failed to open weight file at ", filename.string(), " ", e.what());
}
}

void SharedContext::SharedWeights::WeightsFile::load_weights(size_t file_offset, void* data, size_t size) {
ORT_ENFORCE(file_offset < weights_size_ && size <= weights_size_ && (file_offset <= weights_size_ - size), "Error: File offset is out of bounds.");
file_.seekg(file_offset);
file_.read(reinterpret_cast<char*>(data), size);
}

std::ostream& operator<<(std::ostream& stream, const SharedContext::SharedWeights::Metadata::Map& metadata) {
try {
stream << metadata.size();

// Write each key-value pair
// Put elements in separate lines to facilitate reading
for (const auto& [key, value] : metadata) {
stream << std::endl
<< key.name;
stream << std::endl
<< value.location;
stream << std::endl
<< value.data_offset;
stream << std::endl
<< value.size;
stream << std::endl
<< value.dimensions.size();
for (const auto& dim : value.dimensions) {
stream << std::endl
<< dim;
}
stream << std::endl
<< value.element_type;
}
} catch (const Exception& e) {
ORT_THROW("Error: Failed to write map data.", e.what());
} catch (...) {
ORT_THROW("Error: Failed to write map data.");
}

ORT_ENFORCE(stream.good(), "Error: Failed to write map data.");
return stream;
}

std::istream& operator>>(std::istream& stream, SharedContext::SharedWeights::Metadata::Map& metadata) {
size_t map_size{0};
try {
stream >> map_size;

while (!stream.eof()) {
SharedContext::SharedWeights::Metadata::Key key;
SharedContext::SharedWeights::Metadata::Value value;
stream >> key.name;
stream >> value.location;
stream >> value.data_offset;
stream >> value.size;
size_t num_dimensions;
stream >> num_dimensions;

if (stream.fail()) {
ORT_THROW("Error: Failed to read num_dimensions from stream.");
}

constexpr size_t MAX_SAFE_DIMENSIONS = 1024;

size_t safe_num_dimensions = num_dimensions;

if (num_dimensions == 0 || safe_num_dimensions > MAX_SAFE_DIMENSIONS) {
ORT_THROW("Invalid number of dimensions provided.");
}
try {
value.dimensions.resize(safe_num_dimensions);
} catch (const std::bad_alloc&) {
ORT_THROW("Error: Memory allocation failed while resizing dimensions.");
}

for (auto& dim : value.dimensions) {
stream >> dim;
}
stream >> value.element_type;
metadata.emplace(key, value);
}
} catch (const Exception& e) {
ORT_THROW("Error: Failed to read map data.", e.what());
} catch (...) {
ORT_THROW("Error: Failed to read map data.");
}

ORT_ENFORCE(metadata.size() == map_size, "Error: Inconsistent map data.");

return stream;
}

namespace backend_utils {

bool IsDebugEnabled() {
Expand Down Expand Up @@ -402,7 +303,9 @@ void CreateOVTensors(const std::string& device_name,
SharedContext::SharedWeights::Metadata::Map& metadata_map,
SharedContext::SharedWeights::WeightsFile& weights) {
for (auto& [key, value] : metadata_map) {
if (value.tensor) continue;
if (value.tensor) {
continue;
}

// Get element data type
auto onnx_element_type = (ONNX_NAMESPACE::TensorProto_DataType)value.element_type;
Expand All @@ -414,7 +317,6 @@ void CreateOVTensors(const std::string& device_name,
// Use remote tensors
auto npu_context = OVCore::Get()->core.get_default_context("NPU").as<ov::intel_npu::level_zero::ZeroContext>();
auto&& remote_tensor = npu_context.create_l0_host_tensor(ov_elementType, value.dimensions, ov::intel_npu::TensorType::INPUT);

// Copy data to remote tensor
weights.load_weights(value.data_offset, remote_tensor.get(), value.size);
value.tensor = std::make_shared<ov::Tensor>(remote_tensor);
Expand Down
88 changes: 83 additions & 5 deletions onnxruntime/core/providers/openvino/contexts.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <filesystem>
#include <memory>
#include "core/common/common.h"
#include "core/providers/shared_library/provider_api.h"
#include "core/providers/openvino/ov_interface.h"

namespace onnxruntime {
Expand All @@ -25,6 +26,17 @@ class SharedContext : public WeakSingleton<SharedContext> {
public:
SharedContext() : OVCore_(OVCore::Get()) {}
struct SharedWeights {
struct Header {
uint32_t bin_version = 1;
uint64_t footer_offset = 0;
} header_;
struct Footer {
uint64_t subgraph_offset;
size_t subgraph_length;
uint64_t metadata_offset;
size_t metadata_length;
} footer_;

struct Metadata {
struct Key {
std::string name;
Expand All @@ -37,16 +49,37 @@ class SharedContext : public WeakSingleton<SharedContext> {
};
struct Value {
std::string location;
unsigned int data_offset;
unsigned int size;
uint32_t data_offset;
uint32_t size;
std::vector<size_t> dimensions;
std::int32_t element_type;
std::shared_ptr<ov::Tensor> tensor;
};
using Map = std::unordered_map<Key, Value, Hash>;
friend std::ostream& operator<<(std::ostream& right, const Metadata::Map& metadata);
friend std::istream& operator>>(std::istream& right, Metadata::Map& metadata);
};
void writeMetadataToBinaryFile(SharedContext& shared_context, const Metadata::Map& metadata);
void readMetadataFromBinaryFile(SharedContext& shared_context, Metadata::Map& metadata);
} metadata_;

struct SubgraphMetadata {
struct Key {
std::string name;
bool operator==(const Key&) const = default;
};
struct Hash {
std::size_t operator()(const Key& key) const noexcept {
return std::hash<std::string>()(key.name);
}
};
struct Value {
uint64_t epctx_offset;
size_t epctx_length;
};
using Map = std::unordered_map<Key, Value, Hash>;
void writeSubgraphDataToBinaryFile(SharedContext& shared_context,
const SubgraphMetadata::Map& subgraph_metadata);
void readSubgraphDataFromBinaryFile(SharedContext& shared_context,
SubgraphMetadata::Map& subgraph_metadata);
} subgraph_metadata_;

struct WeightsFile {
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(WeightsFile);
Expand All @@ -60,9 +93,54 @@ class SharedContext : public WeakSingleton<SharedContext> {
size_t weights_size_;
};

struct SharedBinFile {
fs::path shared_bin_filename;
std::fstream bin_file_;
size_t bin_size_;

SharedBinFile() = default; // Default constructor
~SharedBinFile() {
if (bin_file_.is_open()) {
bin_file_.close(); // Close file when object is destroyed
}
}

void openBinFile(const fs::path shared_bin_filename) {
// Check if the file exists before trying to open
if (!fs::exists(shared_bin_filename)) {
std::ofstream createFile(shared_bin_filename, std::ios::binary); // Create an empty binary file
if (!createFile) {
ORT_THROW("Failed to create the shared bin file!");
}
createFile.close();
}

// Check if the file is accessible for reading and writing
fs::perms file_perms = fs::status(shared_bin_filename).permissions();

if ((file_perms & fs::perms::owner_read) == fs::perms::none ||
(file_perms & fs::perms::owner_write) == fs::perms::none) {
ORT_THROW("Failed to open shared bin file! Insufficient permissions for file " + shared_bin_filename + ".");
}

if (!bin_file_.is_open()) { // Prevent reopening
bin_file_.open(shared_bin_filename, std::ios::in | std::ios::out | std::ios::binary);
bin_size_ = bin_file_.seekg(0, std::ios::end).tellg();
bin_file_.seekg(0, std::ios::beg); // Reset to the beginning of the file

if (!bin_file_) {
ORT_THROW("Failed to open shared bin file!");
}
}
}
void readBinFile(SharedContext& shared_context_);
void dumpBinFile(SharedContext& shared_context_);
} shared_bin_file;

fs::path external_weight_filename;
std::unique_ptr<WeightsFile> mapped_weights;
Metadata::Map metadata;
SubgraphMetadata::Map subgraph_metadata;
} shared_weights;
};

Expand Down
32 changes: 30 additions & 2 deletions onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,10 @@
return Status::OK();
}

std::unique_ptr<std::istream> EPCtxHandler::GetModelBlobStream(const std::filesystem::path& so_context_file_path, const GraphViewer& graph_viewer) const {
std::unique_ptr<std::istream> EPCtxHandler::GetModelBlobStream(SharedContext& shared_context_,
const std::filesystem::path& so_context_file_path,
const std::string& subgraph_name,
const GraphViewer& graph_viewer) const {
auto first_index = *graph_viewer.GetNodesInTopologicalOrder().begin();
auto node = graph_viewer.GetNode(first_index);
ORT_ENFORCE(node != nullptr);
Expand All @@ -121,7 +124,32 @@
}
blob_filepath = blob_filepath.parent_path() / ep_cache_context;
ORT_ENFORCE(std::filesystem::exists(blob_filepath), "Blob file not found: ", blob_filepath.string());
result.reset((std::istream*)new std::ifstream(blob_filepath, std::ios_base::binary | std::ios_base::in));
if (blob_filepath == shared_context_.shared_weights.shared_bin_file.shared_bin_filename) {
LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Read blob from Shared bin file - " << blob_filepath;
auto& sb = shared_context_.shared_weights.shared_bin_file;
// check if size of bin file is greater than the header as it gets written at the begining

Check notice on line 130 in onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc

View workflow job for this annotation

GitHub Actions / misspell

[misspell] onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc#L130

"begining" is a misspelling of "beginning"
Raw output
./onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc:130:88: "begining" is a misspelling of "beginning"
ORT_ENFORCE(sb.bin_size_ > 8, " Bin file is empty. Regenerate the epctx model. Bin file path : ", blob_filepath.string());
auto subgraph_metadata = shared_context_.shared_weights.subgraph_metadata;
using Key = SharedContext::SharedWeights::SubgraphMetadata::Key;
const auto subgraph_key = Key{subgraph_name};
auto it = subgraph_metadata.find(subgraph_key);
if (it != subgraph_metadata.end()) {
auto& value = it->second;
if (value.epctx_offset < sb.bin_size_ && value.epctx_length <= sb.bin_size_ &&
(value.epctx_offset <= sb.bin_size_ - value.epctx_length)) {
sb.bin_file_.seekg(value.epctx_offset); // Move to the specified offset
std::string buffer(value.epctx_length, '\0'); // preallocate space
sb.bin_file_.read(&buffer[0], value.epctx_length); // Read the specified length
// Adjust string size in case of a short read
buffer.resize(sb.bin_file_.gcount());
result.reset((std::istream*)new std::istringstream(buffer));
}
}
ORT_ENFORCE(result != nullptr, " Epctx blob is not read. Check bin file correctness from Bin path: ",
blob_filepath.string());
} else {
result.reset((std::istream*)new std::ifstream(blob_filepath, std::ios_base::binary | std::ios_base::in));
}
}
LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Read blob from EPContext Node";
return result;
Expand Down
Loading
Loading