Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cherry-pick memory usage improvements PRs #27700

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 14 additions & 78 deletions src/bindings/python/src/pyopenvino/core/core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -496,97 +496,33 @@ void regclass_Core(py::module m) {
:rtype: openvino.runtime.Model
)");

cls.def(
"import_model",
[](ov::Core& self,
const std::string& model_stream,
const std::string& device_name,
const std::map<std::string, py::object>& properties) {
auto _properties = Common::utils::properties_to_any_map(properties);
py::gil_scoped_release release;
std::stringstream _stream;
_stream << model_stream;
return self.import_model(_stream, device_name, _properties);
},
py::arg("model_stream"),
py::arg("device_name"),
py::arg("properties"),
R"(
Imports a compiled model from a previously exported one.

GIL is released while running this function.

:param model_stream: Input stream, containing a model previously exported, using export_model method.
:type model_stream: bytes
:param device_name: Name of device to which compiled model is imported.
Note: if device_name is not used to compile the original model, an exception is thrown.
:type device_name: str
:param properties: Optional map of pairs: (property name, property value) relevant only for this load operation.
:type properties: dict, optional
:return: A compiled model.
:rtype: openvino.runtime.CompiledModel

:Example:
.. code-block:: python

user_stream = compiled.export_model()

with open('./my_model', 'wb') as f:
f.write(user_stream)

# ...

new_compiled = core.import_model(user_stream, "CPU")
)");

// keep as second one to solve overload resolution problem
cls.def(
"import_model",
[](ov::Core& self,
const py::object& model_stream,
const std::string& device_name,
const std::map<std::string, py::object>& properties) {
const auto _properties = Common::utils::properties_to_any_map(properties);
if (!(py::isinstance(model_stream, pybind11::module::import("io").attr("BytesIO")))) {
if (!(py::isinstance(model_stream, pybind11::module::import("io").attr("BytesIO"))) &&
!py::isinstance<py::bytes>(model_stream)) {
throw py::type_error("CompiledModel.import_model(model_stream) incompatible function argument: "
"`model_stream` must be an io.BytesIO object but " +
"`model_stream` must be an io.BytesIO object or bytes but " +
(std::string)(py::repr(model_stream)) + "` provided");
}
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<> distr(1000, 9999);
std::string filename = "model_stream_" + std::to_string(distr(gen)) + ".txt";
std::fstream _stream(filename, std::ios::out | std::ios::binary);
model_stream.attr("seek")(0); // Always rewind stream!
if (_stream.is_open()) {
const py::bytes data = model_stream.attr("read")();
// convert the Python bytes object to C++ string
char* buffer;
Py_ssize_t length;
PYBIND11_BYTES_AS_STRING_AND_SIZE(data.ptr(), &buffer, &length);
_stream.write(buffer, length);
_stream.close();
} else {
OPENVINO_THROW("Failed to open temporary file for model stream");
}
py::buffer_info info;

ov::CompiledModel result;
std::fstream _fstream(filename, std::ios::in | std::ios::binary);
if (_fstream.is_open()) {
py::gil_scoped_release release;
result = self.import_model(_fstream, device_name, _properties);
_fstream.close();
if (std::remove(filename.c_str()) != 0) {
const std::string abs_path =
py::module_::import("os").attr("getcwd")().cast<std::string>() + "/" + filename;
const std::string warning_message = "Temporary file " + abs_path + " failed to delete!";
PyErr_WarnEx(PyExc_RuntimeWarning, warning_message.c_str(), 1);
}
if (py::isinstance(model_stream, pybind11::module::import("io").attr("BytesIO"))) {
model_stream.attr("seek")(0);
info = py::buffer(model_stream.attr("getbuffer")()).request();
} else {
OPENVINO_THROW("Failed to open temporary file for model stream");
info = py::buffer(model_stream).request();
}

return result;
Common::utils::MemoryBuffer mb(reinterpret_cast<char*>(info.ptr), info.size);
std::istream stream(&mb);

py::gil_scoped_release release;
return self.import_model(stream, device_name, _properties);
},
py::arg("model_stream"),
py::arg("device_name"),
Expand All @@ -601,7 +537,7 @@ void regclass_Core(py::module m) {


:param model_stream: Input stream, containing a model previously exported, using export_model method.
:type model_stream: io.BytesIO
:type model_stream: Union[io.BytesIO, bytes]
:param device_name: Name of device to which compiled model is imported.
Note: if device_name is not used to compile the original model, an exception is thrown.
:type device_name: str
Expand Down
9 changes: 1 addition & 8 deletions src/bindings/python/src/pyopenvino/frontend/frontend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,6 @@ namespace py = pybind11;

using namespace ov::frontend;

class MemoryBuffer : public std::streambuf {
public:
MemoryBuffer(char* data, std::size_t size) {
setg(data, data, data + size);
}
};

void regclass_frontend_FrontEnd(py::module m) {
py::class_<FrontEnd, std::shared_ptr<FrontEnd>> fem(m, "FrontEnd", py::dynamic_attr(), py::module_local());
fem.doc() = "openvino.frontend.FrontEnd wraps ov::frontend::FrontEnd";
Expand Down Expand Up @@ -57,7 +50,7 @@ void regclass_frontend_FrontEnd(py::module m) {
} else if (py::isinstance(py_obj, pybind11::module::import("io").attr("BytesIO"))) {
// support of BytesIO
py::buffer_info info = py::buffer(py_obj.attr("getbuffer")()).request();
MemoryBuffer mb(reinterpret_cast<char*>(info.ptr), info.size);
Common::utils::MemoryBuffer mb(reinterpret_cast<char*>(info.ptr), info.size);
std::istream _istream(&mb);
return self.load(&_istream, enable_mmap);
} else {
Expand Down
31 changes: 31 additions & 0 deletions src/bindings/python/src/pyopenvino/utils/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,37 @@ namespace py = pybind11;

namespace Common {
namespace utils {
class MemoryBuffer : public std::streambuf {
public:
MemoryBuffer(char* data, std::size_t size) {
setg(data, data, data + size);
}

protected:
pos_type seekoff(off_type off,
std::ios_base::seekdir dir,
std::ios_base::openmode which = std::ios_base::in) override {
switch (dir) {
case std::ios_base::beg:
setg(eback(), eback() + off, egptr());
break;
case std::ios_base::end:
setg(eback(), egptr() + off, egptr());
break;
case std::ios_base::cur:
setg(eback(), gptr() + off, egptr());
break;
default:
return pos_type(off_type(-1));
}
return (gptr() < eback() || gptr() > egptr()) ? pos_type(off_type(-1)) : pos_type(gptr() - eback());
}

pos_type seekpos(pos_type pos, std::ios_base::openmode which) override {
return seekoff(pos, std::ios_base::beg, which);
}
};

enum class PY_TYPE : int { UNKNOWN = 0, STR, INT, FLOAT, BOOL, PARTIAL_SHAPE };

struct EmptyList {};
Expand Down
10 changes: 10 additions & 0 deletions src/inference/src/cpp/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
#include "openvino/runtime/icompiled_model.hpp"
#include "openvino/runtime/properties.hpp"

#if defined(OPENVINO_GNU_LIBC) && !defined(__ANDROID__)
# include <malloc.h>
#endif

#define OV_COMPILED_MODEL_CALL_STATEMENT(...) \
if (_impl == nullptr) \
OPENVINO_THROW("CompiledModel was not initialized."); \
Expand All @@ -23,6 +27,12 @@ namespace ov {

CompiledModel::~CompiledModel() {
_impl = {};
#if defined(OPENVINO_GNU_LIBC) && !defined(__ANDROID__)
// Linux memory manager doesn't return system memory immediately after release.
// It depends on memory chunk size and allocation history.
// Try return memory from a process to system now to reduce memory usage and not wait to the end of the process.
malloc_trim(0);
#endif
}

CompiledModel::CompiledModel(const std::shared_ptr<ov::ICompiledModel>& impl, const std::shared_ptr<void>& so)
Expand Down
Loading