Skip to content

Commit

Permalink
[Conformance][TorchFX] GPU quantization support
Browse files Browse the repository at this point in the history
  • Loading branch information
daniil-lyakhov committed Nov 22, 2024
1 parent dc9f5cb commit 2f47307
Show file tree
Hide file tree
Showing 8 changed files with 95 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@
class FXSQMultiply(torch.nn.Module):
def __init__(self, scale: torch.Tensor):
super().__init__()
self._scale_value = scale
self.register_buffer("_scale_value", scale)
self._scale_value: torch.Tensor

def forward(self, x: torch.Tensor) -> torch.Tensor:
return torch.mul(x, self._scale_value)
Expand Down
5 changes: 5 additions & 0 deletions tests/post_training/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ def pytest_addoption(parser):
parser.addoption("--fp32", action="store_true", help="Test original model")
parser.addoption("--cuda", action="store_true", help="Enable CUDA_TORCH backend")
parser.addoption("--benchmark", action="store_true", help="Run benchmark_app")
parser.addoption(
"--validate-in-backend",
action="store_true",
help="Validate quantized model in native backend, not in openvino.",
)
parser.addoption(
"--extra-columns",
action="store_true",
Expand Down
8 changes: 8 additions & 0 deletions tests/post_training/data/ptq_reference_data.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ torchvision/resnet18_backend_CUDA_TORCH:
metric_value: 0.69152
torchvision/resnet18_backend_FX_TORCH:
metric_value: 0.6946
torchvision/resnet18_backend_CUDA_FX_TORCH:
metric_value: 0.6946
torchvision/mobilenet_v3_small_BC_backend_FP32:
metric_value: 0.6766
torchvision/mobilenet_v3_small_BC_backend_OV:
Expand All @@ -46,18 +48,24 @@ torchvision/mobilenet_v3_small_BC_backend_ONNX:
metric_value: 0.6679
torchvision/mobilenet_v3_small_BC_backend_FX_TORCH:
metric_value: 0.6679
torchvision/mobilenet_v3_small_BC_backend_CUDA_FX_TORCH:
metric_value: 0.6679
torchvision/vit_b_16_backend_FP32:
metric_value: 0.8107
torchvision/vit_b_16_backend_OV:
metric_value: 0.80948
torchvision/vit_b_16_backend_FX_TORCH:
metric_value: 0.80922
torchvision/vit_b_16_backend_CUDA_FX_TORCH:
metric_value: 0.80922
torchvision/swin_v2_s_backend_FP32:
metric_value: 0.83712
torchvision/swin_v2_s_backend_OV:
metric_value: 0.83638
torchvision/swin_v2_s_backend_FX_TORCH:
metric_value: 0.8360
torchvision/swin_v2_s_backend_CUDA_FX_TORCH:
metric_value: 0.8360
timm/crossvit_9_240_backend_CUDA_TORCH:
metric_value: 0.7275
timm/crossvit_9_240_backend_FP32:
Expand Down
15 changes: 11 additions & 4 deletions tests/post_training/model_scope.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,14 @@
"model_id": "resnet18",
"pipeline_cls": ImageClassificationTorchvision,
"compression_params": {},
"backends": [BackendType.FX_TORCH, BackendType.TORCH, BackendType.CUDA_TORCH, BackendType.OV, BackendType.ONNX],
"backends": [
BackendType.FX_TORCH,
BackendType.CUDA_FX_TORCH,
BackendType.TORCH,
BackendType.CUDA_TORCH,
BackendType.OV,
BackendType.ONNX,
],
"batch_size": 128,
},
{
Expand All @@ -98,7 +105,7 @@
"fast_bias_correction": False,
"preset": QuantizationPreset.MIXED,
},
"backends": [BackendType.FX_TORCH, BackendType.OV, BackendType.ONNX],
"backends": [BackendType.FX_TORCH, BackendType.CUDA_FX_TORCH, BackendType.OV, BackendType.ONNX],
"batch_size": 128,
},
{
Expand All @@ -109,7 +116,7 @@
"model_type": ModelType.TRANSFORMER,
"advanced_parameters": AdvancedQuantizationParameters(smooth_quant_alpha=0.15),
},
"backends": [BackendType.FX_TORCH, BackendType.OV],
"backends": [BackendType.FX_TORCH, BackendType.CUDA_FX_TORCH, BackendType.OV],
"batch_size": 1,
},
{
Expand All @@ -120,7 +127,7 @@
"model_type": ModelType.TRANSFORMER,
"advanced_parameters": AdvancedQuantizationParameters(smooth_quant_alpha=0.5),
},
"backends": [BackendType.FX_TORCH, BackendType.OV],
"backends": [BackendType.FX_TORCH, BackendType.CUDA_FX_TORCH, BackendType.OV],
"batch_size": 1,
},
# Timm models
Expand Down
13 changes: 11 additions & 2 deletions tests/post_training/pipelines/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class BackendType(Enum):
TORCH = "TORCH"
CUDA_TORCH = "CUDA_TORCH"
FX_TORCH = "FX_TORCH"
CUDA_FX_TORCH = "CUDA_FX_TORCH"
ONNX = "ONNX"
OV = "OV"
OPTIMUM = "OPTIMUM"
Expand All @@ -52,6 +53,7 @@ class BackendType(Enum):
NNCF_PTQ_BACKENDS = [BackendType.TORCH, BackendType.CUDA_TORCH, BackendType.ONNX, BackendType.OV]
ALL_PTQ_BACKENDS = NNCF_PTQ_BACKENDS
PT_BACKENDS = [BackendType.TORCH, BackendType.CUDA_TORCH]
FX_BACKENDS = [BackendType.FX_TORCH, BackendType.CUDA_FX_TORCH]
OV_BACKENDS = [BackendType.OV, BackendType.OPTIMUM]

LIMIT_LENGTH_OF_STATUS = 120
Expand Down Expand Up @@ -211,6 +213,7 @@ def __init__(
reference_data: dict,
no_eval: bool,
run_benchmark_app: bool,
validate_in_backend: bool = False,
params: dict = None,
batch_size: int = 1,
memory_monitor: bool = False,
Expand All @@ -227,6 +230,7 @@ def __init__(
self.memory_monitor = memory_monitor
self.no_eval = no_eval
self.run_benchmark_app = run_benchmark_app
self.validate_in_backend = validate_in_backend
self.output_model_dir: Path = self.output_dir / self.reported_name / self.backend.value
self.output_model_dir.mkdir(parents=True, exist_ok=True)
self.model_name = f"{self.reported_name}_{self.backend.value}"
Expand Down Expand Up @@ -405,11 +409,16 @@ def save_compressed_model(self) -> None:
)
self.path_compressed_ir = self.output_model_dir / "model.xml"
ov.serialize(ov_model, self.path_compressed_ir)
elif self.backend == BackendType.FX_TORCH:
exported_model = torch.export.export(self.compressed_model, (self.dummy_tensor,))
elif self.backend in FX_BACKENDS:
exported_model = torch.export.export(self.compressed_model.cpu(), (self.dummy_tensor.cpu(),))
ov_model = ov.convert_model(exported_model, example_input=self.dummy_tensor.cpu(), input=self.input_size)
self.path_compressed_ir = self.output_model_dir / "model.xml"
ov.serialize(ov_model, self.path_compressed_ir)

if BackendType.CUDA_FX_TORCH:
self.model = self.model.cuda()
self.dummy_tensor = self.dummy_tensor.cuda()

elif self.backend == BackendType.ONNX:
onnx_path = self.output_model_dir / "model.onnx"
onnx.save(self.compressed_model, str(onnx_path))
Expand Down
46 changes: 36 additions & 10 deletions tests/post_training/pipelines/image_classification_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import nncf
from nncf.common.logging.track_progress import track
from tests.post_training.pipelines.base import DEFAULT_VAL_THREADS
from tests.post_training.pipelines.base import FX_BACKENDS
from tests.post_training.pipelines.base import PTQTestPipeline


Expand All @@ -33,18 +34,15 @@ def prepare_calibration_dataset(self):

self.calibration_dataset = nncf.Dataset(loader, self.get_transform_calibration_fn())

def _validate(self):
val_dataset = datasets.ImageFolder(root=self.data_dir / "imagenet" / "val", transform=self.transform)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=2, shuffle=False)

dataset_size = len(val_loader)

# Initialize result tensors for async inference support.
predictions = np.zeros((dataset_size))
references = -1 * np.ones((dataset_size))
def _validate_ov(
self,
val_loader: torch.utils.data.DataLoader,
predictions: np.ndarray,
references: np.ndarray,
dataset_size: int,
):

core = ov.Core()

if os.environ.get("INFERENCE_NUM_THREADS"):
# Set CPU_THREADS_NUM for OpenVINO inference
inference_num_threads = os.environ.get("INFERENCE_NUM_THREADS")
Expand Down Expand Up @@ -73,6 +71,34 @@ def process_result(request, userdata):
references[i] = target

infer_queue.wait_all()
return predictions, references

def _validate_torch_compile(
self, val_loader: torch.utils.data.DataLoader, predictions: np.ndarray, references: np.ndarray
):
compiled_model = torch.compile(self.compressed_model.cpu(), backend="openvino")
for i, (images, target) in enumerate(val_loader):
# W/A for memory leaks when using torch DataLoader and OpenVINO
pred = compiled_model(images)
pred = torch.argmax(pred, dim=1)
predictions[i] = pred.numpy()
references[i] = target.numpy()
return predictions, references

def _validate(self):
val_dataset = datasets.ImageFolder(root=self.data_dir / "imagenet" / "val", transform=self.transform)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=2, shuffle=False)

dataset_size = len(val_loader)

# Initialize result tensors for async inference support.
predictions = np.zeros((dataset_size))
references = -1 * np.ones((dataset_size))

if self.validate_in_backend and self.backend in FX_BACKENDS:
predictions, references = self._validate_torch_compile(val_loader, predictions, references)
else:
predictions, references = self._validate_ov(val_loader, predictions, references, dataset_size)

acc_top1 = accuracy_score(predictions, references)

Expand Down
20 changes: 15 additions & 5 deletions tests/post_training/pipelines/image_classification_torchvision.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from torchvision import models

from nncf.torch import disable_patching
from tests.post_training.pipelines.base import FX_BACKENDS
from tests.post_training.pipelines.base import PT_BACKENDS
from tests.post_training.pipelines.base import BackendType
from tests.post_training.pipelines.image_classification_base import ImageClassificationBase
Expand Down Expand Up @@ -75,9 +76,12 @@ def prepare_model(self) -> None:
if self.batch_size > 1: # Dynamic batch_size shape export
self.input_size[0] = -1

if self.backend == BackendType.FX_TORCH:
if self.backend in FX_BACKENDS:
with torch.no_grad():
with disable_patching():
if self.backend is BackendType.CUDA_FX_TORCH:
model = model.cuda()
self.dummy_tensor = self.dummy_tensor.cuda()
self.model = self.model_params.export_fn(model, (self.dummy_tensor,))

elif self.backend in PT_BACKENDS:
Expand Down Expand Up @@ -121,20 +125,26 @@ def _dump_model_fp32(self) -> None:
)
ov.serialize(ov_model, self.fp32_model_dir / "model_fp32.xml")

if self.backend == BackendType.FX_TORCH:
exported_model = torch.export.export(self.model, (self.dummy_tensor,))
if self.backend in FX_BACKENDS:
exported_model = torch.export.export(self.model.cpu(), (self.dummy_tensor.cpu(),))
ov_model = ov.convert_model(exported_model, example_input=self.dummy_tensor, input=self.input_size)
ov.serialize(ov_model, self.fp32_model_dir / "fx_model_fp32.xml")

if self.backend is BackendType.CUDA_FX_TORCH:
self.model = self.model.cuda()
self.dummy_tensor = self.dummy_tensor.cuda()

if self.backend in [BackendType.FP32, BackendType.OV]:
ov.serialize(self.model, self.fp32_model_dir / "model_fp32.xml")

def prepare_preprocessor(self) -> None:
self.transform = self.model_params.weights.transforms()

def get_transform_calibration_fn(self):
if self.backend in [BackendType.FX_TORCH] + PT_BACKENDS:
device = torch.device("cuda" if self.backend == BackendType.CUDA_TORCH else "cpu")
if self.backend in FX_BACKENDS + PT_BACKENDS:
device = torch.device(
"cuda" if self.backend in [BackendType.CUDA_TORCH, BackendType.CUDA_FX_TORCH] else "cpu"
)

def transform_fn(data_item):
images, _ = data_item
Expand Down
7 changes: 7 additions & 0 deletions tests/post_training/test_quantize_conformance.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ def fixture_run_benchmark_app(pytestconfig):
return pytestconfig.getoption("benchmark")


@pytest.fixture(scope="session", name="validate_in_backend")
def fixture_validate_in_backend(pytestconfig):
return pytestconfig.getoption("validate_in_backend")


@pytest.fixture(scope="session", name="extra_columns")
def fixture_extra_columns(pytestconfig):
return pytestconfig.getoption("extra_columns")
Expand Down Expand Up @@ -266,6 +271,7 @@ def test_ptq_quantization(
run_torch_cuda_backend: bool,
subset_size: Optional[int],
run_benchmark_app: bool,
validate_in_backend: bool,
capsys: pytest.CaptureFixture,
extra_columns: bool,
memory_monitor: bool,
Expand Down Expand Up @@ -293,6 +299,7 @@ def test_ptq_quantization(
"data_dir": data_dir,
"no_eval": no_eval,
"run_benchmark_app": run_benchmark_app,
"validate_in_backend": validate_in_backend,
"batch_size": batch_size,
"memory_monitor": memory_monitor,
}
Expand Down

0 comments on commit 2f47307

Please sign in to comment.