-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'feature/identity_cpu' of https://github.com/PiotrKrzem/…
…openvino into feature/identity_cpu
- Loading branch information
Showing
94 changed files
with
2,211 additions
and
1,022 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
on: | ||
merge_group: | ||
|
||
jobs: | ||
merge_group_stub_check: | ||
name: ci/jenkins | ||
runs-on: ubuntu-latest | ||
defaults: | ||
run: | ||
shell: bash | ||
if: ${{ github.event_name == 'merge_group' }} | ||
steps: | ||
- run: echo "Just a stub check to keep Jenkins running in pre-commits but not in merge queue" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
# Copyright (C) 2018-2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
#! [dataset] | ||
import nncf | ||
import torch | ||
|
||
calibration_loader = torch.utils.data.DataLoader(...) | ||
|
||
def transform_fn(data_item): | ||
images, _ = data_item | ||
return images | ||
|
||
calibration_dataset = nncf.Dataset(calibration_loader, transform_fn) | ||
#! [dataset] | ||
|
||
#! [quantization] | ||
import torchvision | ||
from nncf.torch import disable_patching | ||
|
||
input_fp32 = torch.ones((1, 3, 224, 224)) # FP32 model input | ||
model = torchvision.models.resnet50(pretrained=True) | ||
|
||
with disable_patching(): | ||
exported_model = torch.export.export_for_training(model, args=(input_fp32,)).module() | ||
quantized_model = nncf.quantize(exported_model, calibration_dataset) | ||
#! [quantization] | ||
|
||
#! [inference] | ||
import openvino.torch | ||
|
||
input_fp32 = ... # FP32 model input | ||
|
||
# compile quantized model using torch.compile API | ||
with disable_patching(): | ||
compiled_model_int8 = torch.compile(quantized_model, backend="openvino") | ||
# OpenVINO backend compiles the model during the first call, | ||
# so the first call is expected to be slower than the following calls | ||
res = compiled_model_int8(input_fp32) | ||
|
||
# save the model | ||
exported_program = torch.export.export(quantized_model, args=(input_fp32,)) | ||
torch.export.save(exported_program, 'exported_program.pt2') | ||
#! [inference] |
53 changes: 53 additions & 0 deletions
53
src/common/snippets/include/snippets/lowered/pass/mha_parallel_wa_optimizer.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
// Copyright (C) 2024 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "snippets/lowered/linear_ir.hpp" | ||
#include "snippets/lowered/loop_info.hpp" | ||
#include "snippets/lowered/pass/runtime_optimizer.hpp" | ||
|
||
namespace ov { | ||
namespace snippets { | ||
namespace lowered { | ||
namespace pass { | ||
/** | ||
* @class MHAParallelWAOptimizer | ||
* @brief Optimizes the dynamic MHA execution increasing parallel work amount dy dividing Brgemm's "M" dimension to "parallel_m" | ||
* and "kernel_m". Uses heuristics from snippets::pass::SplitDimensionM for dimension splitting. | ||
* The optimizer performs the following steps: | ||
* - Identifies applicable Brgemm operations within the LinearIR. | ||
* - Finds parameters whose shapes and layouts need to be adjusted after the split. | ||
* - Determines loops that should be adjusted. | ||
*/ | ||
class MHAParallelWAOptimizer : public lowered::pass::RuntimeOptimizer { | ||
public: | ||
MHAParallelWAOptimizer() = default; | ||
MHAParallelWAOptimizer(const lowered::LinearIRCPtr& linear_ir, const RuntimeConfigurator* configurator); | ||
|
||
bool run(const lowered::LinearIR& linear_ir) override; | ||
bool applicable() const override { return !m_loops_to_split.empty(); } | ||
|
||
private: | ||
static std::unordered_set<lowered::ExpressionPtr> find_applicable_brgemms(const lowered::LinearIRCPtr& linear_ir); | ||
static std::unordered_set<size_t> find_unsqueezed_params( | ||
const lowered::LinearIRCPtr& linear_ir, | ||
const std::unordered_set<lowered::ExpressionPtr>& brgemms); | ||
static std::vector<lowered::ExpandedLoopInfoPtr> find_loops_to_split( | ||
const lowered::LinearIRCPtr& linear_ir, | ||
const std::unordered_set<size_t>& unsqueezed_params); | ||
|
||
std::vector<lowered::ExpandedLoopInfoPtr> m_loops_to_split{}; | ||
std::unordered_set<size_t> m_unsqueezed_params{}; | ||
std::vector<std::vector<size_t>> m_optimized_layouts{}; | ||
std::vector<size_t> m_dim_M_idces{}; | ||
size_t m_concurrency = 0; | ||
|
||
static const size_t m_dim_M_idx; | ||
}; | ||
|
||
} // namespace pass | ||
} // namespace lowered | ||
} // namespace snippets | ||
} // namespace ov |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
52 changes: 52 additions & 0 deletions
52
src/common/snippets/include/snippets/lowered/pass/runtime_optimizer.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
// Copyright (C) 2024 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "snippets/lowered/linear_ir.hpp" | ||
#include "snippets/lowered/pass/pass.hpp" | ||
#include "snippets/runtime_configurator.hpp" | ||
|
||
namespace ov { | ||
namespace snippets { | ||
namespace lowered { | ||
namespace pass { | ||
/** | ||
* @class RuntimeOptimizer | ||
* @brief Base class for runtime optimizers that operate on LinearIR and RuntimeConfigurator during | ||
* RuntimeConfigurator::update stage. | ||
*/ | ||
class RuntimeOptimizer : public ConstPass { | ||
public: | ||
RuntimeOptimizer() = default; | ||
RuntimeOptimizer(const RuntimeConfigurator* configurator) : m_configurator(configurator) { | ||
OPENVINO_ASSERT(configurator, "RuntimeConfigurator musn't be nullptr"); | ||
} | ||
/** | ||
* @brief Defines if this pass is applicable. If it is not applicable, its registration in pass pipeline can be skipped. | ||
*/ | ||
virtual bool applicable() const = 0; | ||
|
||
/** | ||
* @brief Creates an instance of the specified pass type and checks if it is applicable. | ||
* If the pass is applicable, it is registered in the provided pipeline. | ||
* @param pipeline The pipeline in which the pass should be registered. | ||
* @param args The arguments to be forwarded to the pass constructor. | ||
*/ | ||
template <typename OptimizerType, typename... Args, typename = std::enable_if<std::is_base_of<RuntimeOptimizer, OptimizerType>::value>> | ||
static void register_if_applicable(PassPipeline& pipeline, Args&&... args) { | ||
auto pass = std::make_shared<OptimizerType>(std::forward<Args>(args)...); | ||
if (pass->applicable()) { | ||
pipeline.register_pass(pass); | ||
} | ||
} | ||
|
||
protected: | ||
const RuntimeConfigurator* m_configurator = nullptr; | ||
}; | ||
|
||
} // namespace pass | ||
} // namespace lowered | ||
} // namespace snippets | ||
} // namespace ov |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.