Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Forbid inefficient TensorDescriptor initialization #3393

Draft
wants to merge 1 commit into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/batch_norm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ void DeriveBNTensorDescriptor(TensorDescriptor& derivedBnDesc,
{

auto lengths = xDesc.GetLengths();
std::vector<int> newlens(lengths.size());
std::vector<size_t> newlens(lengths.size());
newlens[1] = lengths[1];
if(bn_mode == miopenBNSpatial)
{
Expand Down
2 changes: 1 addition & 1 deletion src/include/miopen/pooling.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ struct MIOPEN_EXPORT PoolingDescriptor : miopenPoolingDescriptor
std::tuple<std::size_t, std::size_t, std::size_t, std::size_t>
GetForwardOutputDim(const TensorDescriptor& xDesc) const;

void GetForwardOutputDimNd(const TensorDescriptor& xDesc, int dims, int* tensorDimArr) const;
void GetForwardOutputDimNd(const TensorDescriptor& xDesc, int dims, size_t* tensorDimArr) const;

TensorDescriptor GetForwardOutputTensor(const TensorDescriptor& xDesc) const;

Expand Down
4 changes: 2 additions & 2 deletions src/include/miopen/rnn.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ struct MIOPEN_INTERNALS_EXPORT RNNDescriptor : miopenRNNDescriptor

size_t paramsOffsetCalculation(const TensorDescriptor& xDesc, int layer, int paramID) const;

std::vector<int>
std::vector<size_t>
pTensorLengthsCalculation(const TensorDescriptor& xDesc, int layer, int paramID) const;

static SeqTensorDescriptor makeSeqTensorDescriptor(miopenDataType_t t,
Expand Down Expand Up @@ -538,7 +538,7 @@ struct MIOPEN_INTERNALS_EXPORT RNNDescriptor : miopenRNNDescriptor
size_t reserveSpaceSize) const;

void RNNForwardMS(Handle& handle,
std::vector<int>& seq_array,
std::vector<size_t>& seq_array,
const TensorDescriptor& xDesc,
ConstData_t x,
const TensorDescriptor& hxDesc,
Expand Down
2 changes: 1 addition & 1 deletion src/include/miopen/rnn_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ struct RNNTensorPaddingConverter
{
static void ConvertTensorData(const Handle& handle,
const TensorDescriptor& padded_tensor_desc,
std::vector<int>& bsize_per_time,
std::vector<size_t>& bsize_per_time,
ConstData_t src,
Data_t dst,
bool is_src_padded);
Expand Down
8 changes: 0 additions & 8 deletions src/include/miopen/tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,15 +140,10 @@ struct MIOPEN_INTERNALS_EXPORT TensorDescriptor : miopenTensorDescriptor
// The delegation constructor should be placed above the target constructor in the
// code for better dependency tracking

TensorDescriptor(miopenDataType_t t, const std::initializer_list<int>& lens_in);
TensorDescriptor(miopenDataType_t t, const std::vector<int>& lens_in);
TensorDescriptor(miopenDataType_t t, const std::initializer_list<std::size_t>& lens_in);
TensorDescriptor(miopenDataType_t t, const std::vector<std::size_t>& lens_in);
TensorDescriptor(miopenDataType_t t, std::vector<std::size_t>&& lens_in);

TensorDescriptor(miopenDataType_t t,
miopenTensorLayout_t layout_in,
const std::vector<int>& lens_in);
TensorDescriptor(miopenDataType_t t,
miopenTensorLayout_t layout_in,
const std::initializer_list<std::size_t>& lens_in);
Expand All @@ -159,9 +154,6 @@ struct MIOPEN_INTERNALS_EXPORT TensorDescriptor : miopenTensorDescriptor
miopenTensorLayout_t layout_in,
std::vector<std::size_t>&& lens_in);

TensorDescriptor(miopenDataType_t t,
const std::vector<int>& lens_in,
const std::vector<int>& strides_in);
TensorDescriptor(miopenDataType_t t,
const std::initializer_list<std::size_t>& lens_in,
const std::initializer_list<std::size_t>& strides_in);
Expand Down
2 changes: 1 addition & 1 deletion src/ocl/ctcocl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ void CTCLossDescriptor::CTCLoss(Handle& handle,
float time = 0.;
if(apply_softmax_layer)
{
std::vector<int> sfm_size(4, 1);
std::vector<size_t> sfm_size(4, 1);
sfm_size[0] = max_time_step * batch_size;
sfm_size[1] = class_sz;
auto sfm_desc = miopen::TensorDescriptor(probsDesc.GetType(), sfm_size);
Expand Down
50 changes: 25 additions & 25 deletions src/ocl/rnnocl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ miopenStatus_t ReducAddBias(miopen::Handle& handle,
int lda = k, ldb = ws_desc.GetStrides()[1], ldc = n;

const miopen::TensorDescriptor red_matrix{
red_type, std::vector<int>{1, 1, k}, std::vector<int>{k, k, 1}};
red_type, std::vector<size_t>{1, 1, k}, std::vector<size_t>{k, k, 1}};

SetTensor(handle, red_matrix, red_workSpace, &alpha1);

Expand Down Expand Up @@ -254,7 +254,7 @@ miopenStatus_t ReducAddBias(miopen::Handle& handle,
} // namespace

void RNNDescriptor::RNNForwardMS(Handle& handle,
std::vector<int>& seq_array,
std::vector<size_t>& seq_array,
const TensorDescriptor& xDesc,
ConstData_t x,
const TensorDescriptor& hxDesc,
Expand All @@ -271,7 +271,7 @@ void RNNDescriptor::RNNForwardMS(Handle& handle,
miopenRNNFWDMode_t fwd_mode) const
{
#if MIOPEN_USE_GEMM && MIOPEN_BACKEND_HIP
std::vector<int> in_n;
std::vector<size_t> in_n;
int in_vec = xDesc.GetLengths()[1]; // input vector size
int out_vec = yDesc.GetLengths()[1]; // output vector size

Expand All @@ -289,7 +289,7 @@ void RNNDescriptor::RNNForwardMS(Handle& handle,
ms_controller.ChangeActiveStream(root_stream_id);

int total_batch_size = 0;
std::vector<int> bacc_per_time(seq_len + 1);
std::vector<size_t> bacc_per_time(seq_len + 1);

for(int i = 0; i < seq_len; i++)
{
Expand Down Expand Up @@ -583,13 +583,13 @@ void RNNDescriptor::RNNForwardMS(Handle& handle,

const auto bias_desc =
miopen::TensorDescriptor(wDesc.GetType(),
std::vector<int>{1, 1, WeiBuf.bias_vector_mul_gate()},
std::vector<int>{bias_stride, bias_stride, 1});
std::vector<size_t>{1, 1, WeiBuf.bias_vector_mul_gate()},
std::vector<size_t>{bias_stride, bias_stride, 1});

const auto hidden_interim_desc = miopen::TensorDescriptor(
wDesc.GetType(),
std::vector<int>{1, RBuff.batches, WeiBuf.bias_vector_mul_gate()},
std::vector<int>{
std::vector<size_t>{1, RBuff.batches, WeiBuf.bias_vector_mul_gate()},
std::vector<size_t>{
RBuff.batches * RBuff.gemm_write_stride(), RBuff.gemm_write_stride(), 1});

const auto RB_layer_out_off = RBuff.layer_offset(layer);
Expand Down Expand Up @@ -1064,7 +1064,7 @@ void RNNDescriptor::RNNForwardMS(Handle& handle,
}
else
{
std::vector<int> layer_stream_id(nLayers, 2);
std::vector<size_t> layer_stream_id(nLayers, 2);
layer_stream_id[0] = 1;

auto dispatch_next_chunk = [&layer_upd_cur_time,
Expand Down Expand Up @@ -1244,7 +1244,7 @@ void RNNDescriptor::RNNForwardInference(Handle& handle,
// RNNTensorPaddingConverter::CreatePackedDescriptor()
// for future developments: as long as we don't use strides from xDesc and yDesc
// we ignoring conversion of this descriptors.
std::vector<int> in_n(seqLen);
std::vector<size_t> in_n(seqLen);

for(int i = 0; i < seqLen; i++)
{
Expand Down Expand Up @@ -1327,7 +1327,7 @@ void RNNDescriptor::RNNForwardInferencePacked(Handle& handle,
// reset kernel timer
profileRNNkernels(handle, 0, ctime);

std::vector<int> in_n;
std::vector<size_t> in_n;
int in_h = xDesc[0].GetLengths()[1]; // input vector size
int hy_d = hyDesc.GetLengths()[0]; // biNumLayers
int hy_n = hyDesc.GetLengths()[1]; // max batch size
Expand Down Expand Up @@ -1421,7 +1421,7 @@ void RNNDescriptor::RNNForwardInferencePacked(Handle& handle,
float alpha0, alpha1, beta_t;
float alpha = 1, beta = 0;

std::vector<int> sp_size(3, 1), sp_stride(3, 1), w_size(3, 1), w_stride(3, 1), x_size(3, 1),
std::vector<size_t> sp_size(3, 1), sp_stride(3, 1), w_size(3, 1), w_stride(3, 1), x_size(3, 1),
x_stride(3, 1), y_size(3, 1), y_stride(3, 1), hx_size(3, 1), hx_stride(3, 1);
miopen::TensorDescriptor sp_desc, w_desc, x_desc, y_desc, hx_desc;

Expand Down Expand Up @@ -2635,7 +2635,7 @@ void RNNDescriptor::RNNForwardTraining(Handle& handle,
// RNNTensorPaddingConverter::CreatePackedDescriptor()
// for future developments: as long as we don't use strides from xDesc and yDesc
// we ignoring conversion of this descriptors.
std::vector<int> in_n(seqLen);
std::vector<size_t> in_n(seqLen);

for(int i = 0; i < seqLen; i++)
{
Expand Down Expand Up @@ -2749,7 +2749,7 @@ void RNNDescriptor::RNNForwardTrainingPackedTensors(
}

int batch_n = 0;
std::vector<int> in_n;
std::vector<size_t> in_n;
for(int i = 0; i < seqLen; i++)
{
int batchval, batchvalout;
Expand Down Expand Up @@ -2842,7 +2842,7 @@ void RNNDescriptor::RNNForwardTrainingPackedTensors(
float alpha0, alpha1, beta_t;
float alpha = 1, beta = 0;

std::vector<int> sp_size(3, 1), sp_stride(3, 1), w_size(3, 1), w_stride(3, 1), x_size(3, 1),
std::vector<size_t> sp_size(3, 1), sp_stride(3, 1), w_size(3, 1), w_stride(3, 1), x_size(3, 1),
x_stride(3, 1), y_size(3, 1), y_stride(3, 1), hx_size(3, 1), hx_stride(3, 1);
miopen::TensorDescriptor sp_desc, w_desc, x_desc, y_desc, hx_desc;

Expand Down Expand Up @@ -2990,7 +2990,7 @@ void RNNDescriptor::RNNForwardTrainingPackedTensors(

if(use_dropout)
{
std::vector<int> drop_size(2), drop_in_str(2, 1), drop_out_str(2, 1);
std::vector<size_t> drop_size(2), drop_in_str(2, 1), drop_out_str(2, 1);
drop_size[0] = batch_n;
drop_size[1] = hy_h * bi;
drop_in_str[0] = hy_stride;
Expand Down Expand Up @@ -4139,7 +4139,7 @@ void RNNDescriptor::RNNBackwardData(Handle& handle,
(packedDYSize + packedDXSize));
auto shifted_workSpace_size = workSpaceSize - (packedDYSize + packedDXSize);

std::vector<int> in_n(seqLen);
std::vector<size_t> in_n(seqLen);

for(int i = 0; i < seqLen; i++)
{
Expand Down Expand Up @@ -4244,7 +4244,7 @@ void RNNDescriptor::RNNBackwardDataPackedTensors(

auto rnn_data_type = dhxDesc.GetType();

std::vector<int> in_n;
std::vector<size_t> in_n;
int in_h = dxDesc[0].GetLengths()[1];
int hy_d = dhxDesc.GetLengths()[0];
int hy_n = dhxDesc.GetLengths()[1];
Expand Down Expand Up @@ -4345,7 +4345,7 @@ void RNNDescriptor::RNNBackwardDataPackedTensors(
float alpha0, alpha1, beta_t;
float alpha = 1, beta = 0;

std::vector<int> sp_size(3, 1), sp_stride(3, 1), x_size(3, 1), x_stride(3, 1), y_size(3, 1),
std::vector<size_t> sp_size(3, 1), sp_stride(3, 1), x_size(3, 1), x_stride(3, 1), y_size(3, 1),
y_stride(3, 1), hx_size(3, 1), hx_stride(3, 1);
miopen::TensorDescriptor sp_desc, x_desc, y_desc, hx_desc;

Expand Down Expand Up @@ -4497,7 +4497,7 @@ void RNNDescriptor::RNNBackwardDataPackedTensors(

if(use_dropout)
{
std::vector<int> drop_size(2), drop_in_str(2, 1);
std::vector<size_t> drop_size(2), drop_in_str(2, 1);
drop_size[0] = batch_n;
drop_size[1] = hy_h * bi;
drop_in_str[0] = hy_stride;
Expand Down Expand Up @@ -5685,7 +5685,7 @@ void RNNDescriptor::RNNBackwardDataPackedTensors(
// dinput
if(inputMode == miopenRNNskip)
{
const std::vector<int> dx_size{1, batch_n, hy_h};
const std::vector<size_t> dx_size{1, batch_n, hy_h};
x_desc = miopen::TensorDescriptor(rnn_data_type, dx_size, x_stride);
sp_desc = miopen::TensorDescriptor(rnn_data_type, dx_size, sp_stride);

Expand Down Expand Up @@ -5828,7 +5828,7 @@ void RNNDescriptor::RNNBackwardWeights(Handle& handle,
(packedXSize + WA_workSpace_bug));
auto shifted_workSpace_size = workSpaceSize - (packedXSize + WA_workSpace_bug);

std::vector<int> in_n(seqLen);
std::vector<size_t> in_n(seqLen);

for(int i = 0; i < seqLen; i++)
{
Expand Down Expand Up @@ -5917,7 +5917,7 @@ void RNNDescriptor::RNNBackwardWeightsPackedTensors(
}

std::string network_config;
std::vector<int> in_n;
std::vector<size_t> in_n;
int in_h = xDesc[0].GetLengths()[1];
int hy_d = hxDesc.GetLengths()[0];
int hy_n = hxDesc.GetLengths()[1];
Expand Down Expand Up @@ -6012,7 +6012,7 @@ void RNNDescriptor::RNNBackwardWeightsPackedTensors(

float alpha0, alpha1, beta_t = 0;

std::vector<int> sp_size(3, 1), sp_stride(3, 1), w_size(3, 1), w_stride(3, 1);
std::vector<size_t> sp_size(3, 1), sp_stride(3, 1), w_size(3, 1), w_stride(3, 1);
miopen::TensorDescriptor sp_desc, w_desc;

sp_stride[0] = batch_n * hy_stride;
Expand Down Expand Up @@ -6233,7 +6233,7 @@ void RNNDescriptor::RNNBackwardWeightsPackedTensors(
else
{
// second dw bias equal to the first, so just copy reduction result
const std::vector<int> dw_bias_strides{wei_stride, wei_stride, 1};
const std::vector<size_t> dw_bias_strides{wei_stride, wei_stride, 1};
const miopen::TensorDescriptor dw_desc{
rnn_data_t, {1, 1, wei_stride}, dw_bias_strides};

Expand Down
12 changes: 6 additions & 6 deletions src/pooling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,11 +157,11 @@ PoolingDescriptor::GetForwardOutputDim(const TensorDescriptor& xDesc) const

void PoolingDescriptor::GetForwardOutputDimNd(const TensorDescriptor& xDesc,
int dims,
int* tensorDimArr) const
size_t* tensorDimArr) const
{
assert(xDesc.GetLengths().size() == dims && xDesc.GetLengths().size() <= 5 &&
xDesc.GetLengths().size() >= 4); // currently only support 2D/3D pooling
std::vector<int> out_dim;
std::vector<size_t> out_dim;
auto input_dim = xDesc.GetLengths();
auto strs = GetStrides();
auto padd = GetPads();
Expand All @@ -175,8 +175,8 @@ void PoolingDescriptor::GetForwardOutputDimNd(const TensorDescriptor& xDesc,
assert(std::all_of(padd.begin(), padd.end(), [](int s) { return s >= 0; }));

auto in_itr = input_dim.begin();
out_dim.push_back(int(*(in_itr++))); // n
out_dim.push_back(int(*(in_itr++))); // c
out_dim.push_back(*(in_itr++)); // n
out_dim.push_back(*(in_itr++)); // c

auto str_itr = strs.begin();
auto pad_itr = padd.begin();
Expand Down Expand Up @@ -215,12 +215,12 @@ void PoolingDescriptor::GetForwardOutputDimNd(const TensorDescriptor& xDesc,

TensorDescriptor PoolingDescriptor::GetForwardOutputTensor(const TensorDescriptor& xDesc) const
{
std::vector<int> out_dim(xDesc.GetNumDims());
std::vector<size_t> out_dim(xDesc.GetNumDims());
GetForwardOutputDimNd(xDesc, xDesc.GetNumDims(), out_dim.data());

const std::string default_layout = tensor_layout_get_default(xDesc.GetNumDims());
const std::string in_layout = xDesc.GetLayout(default_layout);
std::vector<int> out_strides;
std::vector<size_t> out_strides;
tensor_layout_to_strides(out_dim, default_layout, in_layout, out_strides);

return {xDesc.GetType(), out_dim, out_strides};
Expand Down
5 changes: 3 additions & 2 deletions src/pooling_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,9 @@ miopenGetPoolingNdForwardOutputDim(const miopenPoolingDescriptor_t poolDesc,

MIOPEN_LOG_FUNCTION(poolDesc, tensorDesc, dims);
return miopen::try_([&] {
miopen::deref(poolDesc).GetForwardOutputDimNd(
miopen::deref(tensorDesc), dims, tensorDimArr);
std::vector<size_t> tmp(dims);
miopen::deref(poolDesc).GetForwardOutputDimNd(miopen::deref(tensorDesc), dims, tmp.data());
std::copy_n(tmp.data(), dims, tensorDimArr);
});
}

Expand Down
Loading
Loading