forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 1
/
QuantizerBase.h
84 lines (72 loc) · 2.61 KB
/
QuantizerBase.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#pragma once
#include <c10/core/ScalarType.h>
#include <c10/core/QScheme.h>
#include <c10/util/intrusive_ptr.h>
namespace at {
class Tensor;
struct QTensorImpl;
struct Quantizer;
using ConstQuantizerPtr = const c10::intrusive_ptr<Quantizer>&;
using QuantizerPtr = c10::intrusive_ptr<Quantizer>;
/**
* Quantizer is the class for storing all the information
* that's necessary to perform quantize and dequantize
* operation.
*
* We might have different types of quantization schemes and this is
* the base class for all quantizers.
*
* QTensorImpl will hold a pointer to Quantizer so that we can support
* different quantization schemes on Tensor.
*
* For example, the most common quantization scheme, Affine Quantization,
* requires scale and zero_point as parameters, we'll store scale and zero_point
* inside the instance and we can use it to quantize a float Tensor or
* dequantize a quantized Tensor.
*
* When you add new types of leaf Quantizer class, please also
* make sure to add a corresponding QScheme enum since
* they should have one to one mapping.
*
* Note about intrusive_ptr:
* Quantized Tensor holds an intrusive_ptr to Quantizer, and multiple Tensor can
* share the same Quantizer. Quantizer should be immutable.
*/
struct TORCH_API Quantizer : public c10::intrusive_ptr_target {
// NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
const ScalarType scalar_type_;
explicit Quantizer(ScalarType scalar_type) : scalar_type_(scalar_type) {}
~Quantizer() override;
// Copied from torch/csrc/jit/ir/scope.h
QuantizerPtr intrusive_from_this() {
c10::raw::intrusive_ptr::incref(this); // we are creating a new pointer
// from a raw `this` pointer
// so we need to bump the refcount
// to account for this ownership
return c10::intrusive_ptr<Quantizer>::reclaim(this);
}
/**
* Each concrete Quantizer type should have a unique QScheme type.
*/
virtual QScheme qscheme() const = 0;
ScalarType scalar_type() const {
return scalar_type_;
}
/**
* quantize a float Tensor into a quantized Tensor.
*/
virtual Tensor quantize(const Tensor& t) = 0;
/**
* dequantize a quantized Tensor into a float Tensor.
*/
virtual Tensor dequantize(const Tensor& t) = 0;
/**
* dequantize a quantized Tensor into a float Tensor, out= variant
*/
virtual Tensor& dequantize_out(Tensor& out, const Tensor& t) = 0;
/**
* Compare against `other` for equality.
*/
virtual bool equalTo(QuantizerPtr other) const = 0;
};
} // namespace at