diff --git a/Applications/LLaMA/jni/custom_multi_head_attention_layer.cpp b/Applications/LLaMA/jni/custom_multi_head_attention_layer.cpp index 2a7bcbae28..aad10c1f5f 100644 --- a/Applications/LLaMA/jni/custom_multi_head_attention_layer.cpp +++ b/Applications/LLaMA/jni/custom_multi_head_attention_layer.cpp @@ -272,58 +272,58 @@ void MultiHeadAttentionLayer::finalize(InitLayerContext &context) { {batch_size, 1, query_height, num_heads * projected_query_dim_prop}, activation_type); weight_idx[AttentionParams::projected_query] = context.requestTensor( - projected_query_dim, "projected_query", Tensor::Initializer::NONE, true, + projected_query_dim, "projected_query", Initializer::NONE, true, TensorLifespan::ITERATION_LIFESPAN); /** tensor for output of key fc */ TensorDim projected_key_dim( {batch_size, 1, key_height, num_heads * projected_key_dim_prop}, activation_type); - weight_idx[AttentionParams::projected_key] = context.requestTensor( - projected_key_dim, "projected_key", Tensor::Initializer::NONE, true, - TensorLifespan::ITERATION_LIFESPAN); + weight_idx[AttentionParams::projected_key] = + context.requestTensor(projected_key_dim, "projected_key", Initializer::NONE, + true, TensorLifespan::ITERATION_LIFESPAN); /** tensor for output of value fc */ TensorDim projected_value_dim( {batch_size, 1, value_height, num_heads * projected_value_dim_prop}, activation_type); weight_idx[AttentionParams::projected_value] = context.requestTensor( - projected_value_dim, "projected_value", Tensor::Initializer::NONE, true, + projected_value_dim, "projected_value", Initializer::NONE, true, TensorLifespan::ITERATION_LIFESPAN); TensorDim cache_key_dim( {batch_size, 1, max_timestep, num_heads * projected_key_dim_prop}, activation_type); weight_idx[AttentionParams::cache_key] = - context.requestTensor(cache_key_dim, "cache_key", Tensor::Initializer::NONE, - true, TensorLifespan::MAX_LIFESPAN); + context.requestTensor(cache_key_dim, "cache_key", Initializer::NONE, true, + TensorLifespan::MAX_LIFESPAN); TensorDim cache_value_dim( {batch_size, 1, max_timestep, num_heads * projected_value_dim_prop}, activation_type); - weight_idx[AttentionParams::cache_value] = context.requestTensor( - cache_value_dim, "cache_value", Tensor::Initializer::NONE, true, - TensorLifespan::MAX_LIFESPAN); + weight_idx[AttentionParams::cache_value] = + context.requestTensor(cache_value_dim, "cache_value", Initializer::NONE, + true, TensorLifespan::MAX_LIFESPAN); if (provide_attention_mask) { /** Intended comment for bool type mask */ // TensorDim attention_mask_dim( // {batch_size, num_heads, query_height, key_height}); // weight_idx[AttentionParams::attention_mask] = context.requestTensor( - // attention_mask_dim, "attention_mask", Tensor::Initializer::NONE, false, + // attention_mask_dim, "attention_mask", Initializer::NONE, false, // TensorLifespan::FORWARD_FUNC_LIFESPAN); } /** tensor for attention weight */ TensorDim attention_weight_dim( {batch_size, num_heads, query_height, key_height}, activation_type); weight_idx[AttentionParams::attention_weight] = context.requestTensor( - attention_weight_dim, "attention_weight", Tensor::Initializer::NONE, true, + attention_weight_dim, "attention_weight", Initializer::NONE, true, TensorLifespan::ITERATION_LIFESPAN); if (dropout_rate > epsilon) { /** tensor for dropout mask */ TensorDim dropout_mask_dim( {batch_size, num_heads, query_height, key_height}, activation_type); - weight_idx[AttentionParams::dropout_mask] = context.requestTensor( - dropout_mask_dim, "dropout_mask", Tensor::Initializer::NONE, false, - TensorLifespan::ITERATION_LIFESPAN); + weight_idx[AttentionParams::dropout_mask] = + context.requestTensor(dropout_mask_dim, "dropout_mask", Initializer::NONE, + false, TensorLifespan::ITERATION_LIFESPAN); } /** tensor for attention output */ @@ -331,7 +331,7 @@ void MultiHeadAttentionLayer::finalize(InitLayerContext &context) { {batch_size, 1, query_height, num_heads * projected_value_dim_prop}, activation_type); weight_idx[AttentionParams::attention_output] = context.requestTensor( - attention_output_dim, "attention_output", Tensor::Initializer::NONE, true, + attention_output_dim, "attention_output", Initializer::NONE, true, TensorLifespan::ITERATION_LIFESPAN); TensorDim output_dim({batch_size, 1, query_height, output_shape}, diff --git a/Applications/LLaMA/jni/rms_norm.h b/Applications/LLaMA/jni/rms_norm.h index 1180db82e6..8f769527ab 100644 --- a/Applications/LLaMA/jni/rms_norm.h +++ b/Applications/LLaMA/jni/rms_norm.h @@ -38,8 +38,8 @@ class RMS_NORM_GAMMA_INIT final /** * @brief Construct a RMS_NORM_GAMMA_INIT object */ - RMS_NORM_GAMMA_INIT(nntrainer::Tensor::Initializer value = - nntrainer::Tensor::Initializer::ONES) { + RMS_NORM_GAMMA_INIT( + nntrainer::Initializer value = nntrainer::Initializer::ONES) { set(value); }; diff --git a/Applications/YOLOv2/jni/yolo_v2_loss.cpp b/Applications/YOLOv2/jni/yolo_v2_loss.cpp index 8421dd24ee..67b262d283 100644 --- a/Applications/YOLOv2/jni/yolo_v2_loss.cpp +++ b/Applications/YOLOv2/jni/yolo_v2_loss.cpp @@ -319,141 +319,136 @@ void YoloV2LossLayer::finalize(nntrainer::InitLayerContext &context) { nntrainer::TensorDim bbox_x_pred_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV2LossParams::bbox_x_pred] = context.requestTensor( - bbox_x_pred_dim, "bbox_x_pred", nntrainer::Tensor::Initializer::NONE, true, + bbox_x_pred_dim, "bbox_x_pred", nntrainer::Initializer::NONE, true, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim bbox_y_pred_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV2LossParams::bbox_y_pred] = context.requestTensor( - bbox_y_pred_dim, "bbox_y_pred", nntrainer::Tensor::Initializer::NONE, true, + bbox_y_pred_dim, "bbox_y_pred", nntrainer::Initializer::NONE, true, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim bbox_w_pred_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV2LossParams::bbox_w_pred] = context.requestTensor( - bbox_w_pred_dim, "bbox_w_pred", nntrainer::Tensor::Initializer::NONE, true, + bbox_w_pred_dim, "bbox_w_pred", nntrainer::Initializer::NONE, true, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim bbox_h_pred_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV2LossParams::bbox_h_pred] = context.requestTensor( - bbox_h_pred_dim, "bbox_h_pred", nntrainer::Tensor::Initializer::NONE, true, + bbox_h_pred_dim, "bbox_h_pred", nntrainer::Initializer::NONE, true, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim confidence_pred_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); - wt_idx[YoloV2LossParams::confidence_pred] = - context.requestTensor(confidence_pred_dim, "confidence_pred", - nntrainer::Tensor::Initializer::NONE, true, - nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); + wt_idx[YoloV2LossParams::confidence_pred] = context.requestTensor( + confidence_pred_dim, "confidence_pred", nntrainer::Initializer::NONE, true, + nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim class_pred_dim(batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, class_number); wt_idx[YoloV2LossParams::class_pred] = context.requestTensor( - class_pred_dim, "class_pred", nntrainer::Tensor::Initializer::NONE, true, + class_pred_dim, "class_pred", nntrainer::Initializer::NONE, true, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim bbox_w_pred_anchor_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); - wt_idx[YoloV2LossParams::bbox_w_pred_anchor] = - context.requestTensor(bbox_w_pred_anchor_dim, "bbox_w_pred_anchor", - nntrainer::Tensor::Initializer::NONE, false, - nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); + wt_idx[YoloV2LossParams::bbox_w_pred_anchor] = context.requestTensor( + bbox_w_pred_anchor_dim, "bbox_w_pred_anchor", nntrainer::Initializer::NONE, + false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim bbox_h_pred_anchor_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); - wt_idx[YoloV2LossParams::bbox_h_pred_anchor] = - context.requestTensor(bbox_h_pred_anchor_dim, "bbox_h_pred_anchor", - nntrainer::Tensor::Initializer::NONE, false, - nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); + wt_idx[YoloV2LossParams::bbox_h_pred_anchor] = context.requestTensor( + bbox_h_pred_anchor_dim, "bbox_h_pred_anchor", nntrainer::Initializer::NONE, + false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim bbox_x_gt_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV2LossParams::bbox_x_gt] = context.requestTensor( - bbox_x_gt_dim, "bbox_x_gt", nntrainer::Tensor::Initializer::NONE, false, + bbox_x_gt_dim, "bbox_x_gt", nntrainer::Initializer::NONE, false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim bbox_y_gt_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV2LossParams::bbox_y_gt] = context.requestTensor( - bbox_y_gt_dim, "bbox_y_gt", nntrainer::Tensor::Initializer::NONE, false, + bbox_y_gt_dim, "bbox_y_gt", nntrainer::Initializer::NONE, false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim bbox_w_gt_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV2LossParams::bbox_w_gt] = context.requestTensor( - bbox_w_gt_dim, "bbox_w_gt", nntrainer::Tensor::Initializer::NONE, false, + bbox_w_gt_dim, "bbox_w_gt", nntrainer::Initializer::NONE, false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim bbox_h_gt_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV2LossParams::bbox_h_gt] = context.requestTensor( - bbox_h_gt_dim, "bbox_h_gt", nntrainer::Tensor::Initializer::NONE, false, + bbox_h_gt_dim, "bbox_h_gt", nntrainer::Initializer::NONE, false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim confidence_gt_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV2LossParams::confidence_gt] = context.requestTensor( - confidence_gt_dim, "confidence_gt", nntrainer::Tensor::Initializer::NONE, - false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); + confidence_gt_dim, "confidence_gt", nntrainer::Initializer::NONE, false, + nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim class_gt_dim(batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, class_number); wt_idx[YoloV2LossParams::class_gt] = context.requestTensor( - class_gt_dim, "class_gt", nntrainer::Tensor::Initializer::NONE, false, + class_gt_dim, "class_gt", nntrainer::Initializer::NONE, false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim bbox_class_mask_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); - wt_idx[YoloV2LossParams::bbox_class_mask] = - context.requestTensor(bbox_class_mask_dim, "bbox_class_mask", - nntrainer::Tensor::Initializer::NONE, false, - nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); + wt_idx[YoloV2LossParams::bbox_class_mask] = context.requestTensor( + bbox_class_mask_dim, "bbox_class_mask", nntrainer::Initializer::NONE, false, + nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim iou_mask_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV2LossParams::iou_mask] = context.requestTensor( - iou_mask_dim, "iou_mask", nntrainer::Tensor::Initializer::NONE, false, + iou_mask_dim, "iou_mask", nntrainer::Initializer::NONE, false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim bbox1_width_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV2LossParams::bbox1_width] = context.requestTensor( - bbox1_width_dim, "bbox1_width", nntrainer::Tensor::Initializer::NONE, false, + bbox1_width_dim, "bbox1_width", nntrainer::Initializer::NONE, false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim bbox1_height_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV2LossParams::bbox1_height] = context.requestTensor( - bbox1_height_dim, "bbox1_height", nntrainer::Tensor::Initializer::NONE, - false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); + bbox1_height_dim, "bbox1_height", nntrainer::Initializer::NONE, false, + nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim is_xy_min_max_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 4); wt_idx[YoloV2LossParams::is_xy_min_max] = context.requestTensor( - is_xy_min_max_dim, "is_xy_min_max", nntrainer::Tensor::Initializer::NONE, - false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); + is_xy_min_max_dim, "is_xy_min_max", nntrainer::Initializer::NONE, false, + nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim intersection_width_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); - wt_idx[YoloV2LossParams::intersection_width] = - context.requestTensor(intersection_width_dim, "intersection_width", - nntrainer::Tensor::Initializer::NONE, false, - nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); + wt_idx[YoloV2LossParams::intersection_width] = context.requestTensor( + intersection_width_dim, "intersection_width", nntrainer::Initializer::NONE, + false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim intersection_height_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV2LossParams::intersection_height] = context.requestTensor(intersection_height_dim, "intersection_height", - nntrainer::Tensor::Initializer::NONE, false, + nntrainer::Initializer::NONE, false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim unions_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV2LossParams::unions] = context.requestTensor( - unions_dim, "unions", nntrainer::Tensor::Initializer::NONE, false, + unions_dim, "unions", nntrainer::Initializer::NONE, false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); } diff --git a/Applications/YOLOv3/jni/yolo_v3_loss.cpp b/Applications/YOLOv3/jni/yolo_v3_loss.cpp index 0187e21f87..dc4300a0a7 100644 --- a/Applications/YOLOv3/jni/yolo_v3_loss.cpp +++ b/Applications/YOLOv3/jni/yolo_v3_loss.cpp @@ -335,141 +335,136 @@ void YoloV3LossLayer::finalize(nntrainer::InitLayerContext &context) { nntrainer::TensorDim bbox_x_pred_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV3LossParams::bbox_x_pred] = context.requestTensor( - bbox_x_pred_dim, "bbox_x_pred", nntrainer::Tensor::Initializer::NONE, true, + bbox_x_pred_dim, "bbox_x_pred", nntrainer::Initializer::NONE, true, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim bbox_y_pred_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV3LossParams::bbox_y_pred] = context.requestTensor( - bbox_y_pred_dim, "bbox_y_pred", nntrainer::Tensor::Initializer::NONE, true, + bbox_y_pred_dim, "bbox_y_pred", nntrainer::Initializer::NONE, true, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim bbox_w_pred_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV3LossParams::bbox_w_pred] = context.requestTensor( - bbox_w_pred_dim, "bbox_w_pred", nntrainer::Tensor::Initializer::NONE, true, + bbox_w_pred_dim, "bbox_w_pred", nntrainer::Initializer::NONE, true, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim bbox_h_pred_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV3LossParams::bbox_h_pred] = context.requestTensor( - bbox_h_pred_dim, "bbox_h_pred", nntrainer::Tensor::Initializer::NONE, true, + bbox_h_pred_dim, "bbox_h_pred", nntrainer::Initializer::NONE, true, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim confidence_pred_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); - wt_idx[YoloV3LossParams::confidence_pred] = - context.requestTensor(confidence_pred_dim, "confidence_pred", - nntrainer::Tensor::Initializer::NONE, true, - nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); + wt_idx[YoloV3LossParams::confidence_pred] = context.requestTensor( + confidence_pred_dim, "confidence_pred", nntrainer::Initializer::NONE, true, + nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim class_pred_dim(batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, class_number); wt_idx[YoloV3LossParams::class_pred] = context.requestTensor( - class_pred_dim, "class_pred", nntrainer::Tensor::Initializer::NONE, true, + class_pred_dim, "class_pred", nntrainer::Initializer::NONE, true, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim bbox_w_pred_anchor_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); - wt_idx[YoloV3LossParams::bbox_w_pred_anchor] = - context.requestTensor(bbox_w_pred_anchor_dim, "bbox_w_pred_anchor", - nntrainer::Tensor::Initializer::NONE, false, - nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); + wt_idx[YoloV3LossParams::bbox_w_pred_anchor] = context.requestTensor( + bbox_w_pred_anchor_dim, "bbox_w_pred_anchor", nntrainer::Initializer::NONE, + false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim bbox_h_pred_anchor_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); - wt_idx[YoloV3LossParams::bbox_h_pred_anchor] = - context.requestTensor(bbox_h_pred_anchor_dim, "bbox_h_pred_anchor", - nntrainer::Tensor::Initializer::NONE, false, - nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); + wt_idx[YoloV3LossParams::bbox_h_pred_anchor] = context.requestTensor( + bbox_h_pred_anchor_dim, "bbox_h_pred_anchor", nntrainer::Initializer::NONE, + false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim bbox_x_gt_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV3LossParams::bbox_x_gt] = context.requestTensor( - bbox_x_gt_dim, "bbox_x_gt", nntrainer::Tensor::Initializer::NONE, false, + bbox_x_gt_dim, "bbox_x_gt", nntrainer::Initializer::NONE, false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim bbox_y_gt_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV3LossParams::bbox_y_gt] = context.requestTensor( - bbox_y_gt_dim, "bbox_y_gt", nntrainer::Tensor::Initializer::NONE, false, + bbox_y_gt_dim, "bbox_y_gt", nntrainer::Initializer::NONE, false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim bbox_w_gt_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV3LossParams::bbox_w_gt] = context.requestTensor( - bbox_w_gt_dim, "bbox_w_gt", nntrainer::Tensor::Initializer::NONE, false, + bbox_w_gt_dim, "bbox_w_gt", nntrainer::Initializer::NONE, false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim bbox_h_gt_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV3LossParams::bbox_h_gt] = context.requestTensor( - bbox_h_gt_dim, "bbox_h_gt", nntrainer::Tensor::Initializer::NONE, false, + bbox_h_gt_dim, "bbox_h_gt", nntrainer::Initializer::NONE, false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim confidence_gt_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV3LossParams::confidence_gt] = context.requestTensor( - confidence_gt_dim, "confidence_gt", nntrainer::Tensor::Initializer::NONE, - false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); + confidence_gt_dim, "confidence_gt", nntrainer::Initializer::NONE, false, + nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim class_gt_dim(batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, class_number); wt_idx[YoloV3LossParams::class_gt] = context.requestTensor( - class_gt_dim, "class_gt", nntrainer::Tensor::Initializer::NONE, false, + class_gt_dim, "class_gt", nntrainer::Initializer::NONE, false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim bbox_class_mask_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); - wt_idx[YoloV3LossParams::bbox_class_mask] = - context.requestTensor(bbox_class_mask_dim, "bbox_class_mask", - nntrainer::Tensor::Initializer::NONE, false, - nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); + wt_idx[YoloV3LossParams::bbox_class_mask] = context.requestTensor( + bbox_class_mask_dim, "bbox_class_mask", nntrainer::Initializer::NONE, false, + nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim iou_mask_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV3LossParams::iou_mask] = context.requestTensor( - iou_mask_dim, "iou_mask", nntrainer::Tensor::Initializer::NONE, false, + iou_mask_dim, "iou_mask", nntrainer::Initializer::NONE, false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim bbox1_width_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV3LossParams::bbox1_width] = context.requestTensor( - bbox1_width_dim, "bbox1_width", nntrainer::Tensor::Initializer::NONE, false, + bbox1_width_dim, "bbox1_width", nntrainer::Initializer::NONE, false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim bbox1_height_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV3LossParams::bbox1_height] = context.requestTensor( - bbox1_height_dim, "bbox1_height", nntrainer::Tensor::Initializer::NONE, - false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); + bbox1_height_dim, "bbox1_height", nntrainer::Initializer::NONE, false, + nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim is_xy_min_max_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 4); wt_idx[YoloV3LossParams::is_xy_min_max] = context.requestTensor( - is_xy_min_max_dim, "is_xy_min_max", nntrainer::Tensor::Initializer::NONE, - false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); + is_xy_min_max_dim, "is_xy_min_max", nntrainer::Initializer::NONE, false, + nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim intersection_width_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); - wt_idx[YoloV3LossParams::intersection_width] = - context.requestTensor(intersection_width_dim, "intersection_width", - nntrainer::Tensor::Initializer::NONE, false, - nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); + wt_idx[YoloV3LossParams::intersection_width] = context.requestTensor( + intersection_width_dim, "intersection_width", nntrainer::Initializer::NONE, + false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim intersection_height_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV3LossParams::intersection_height] = context.requestTensor(intersection_height_dim, "intersection_height", - nntrainer::Tensor::Initializer::NONE, false, + nntrainer::Initializer::NONE, false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); nntrainer::TensorDim unions_dim( batch_size, grid_height_number * grid_width_number, NUM_ANCHOR, 1); wt_idx[YoloV3LossParams::unions] = context.requestTensor( - unions_dim, "unions", nntrainer::Tensor::Initializer::NONE, false, + unions_dim, "unions", nntrainer::Initializer::NONE, false, nntrainer::TensorLifespan::FORWARD_DERIV_LIFESPAN); } diff --git a/api/ccapi/include/tensor_api.h b/api/ccapi/include/tensor_api.h index 087d3b1f25..b4fc20cf5e 100644 --- a/api/ccapi/include/tensor_api.h +++ b/api/ccapi/include/tensor_api.h @@ -48,9 +48,10 @@ class Tensor : public nntrainer::Var_Grad { * @param needg If the tensor needs gradient * @param name Name for this tensor */ - explicit Tensor(const TensorDim &dim, - const iTensor::Initializer init = iTensor::Initializer::ZEROS, - bool ng = false, std::string name = ""){}; + explicit Tensor( + const TensorDim &dim, + const nntrainer::Initializer init = nntrainer::Initializer::ZEROS, + bool ng = false, std::string name = ""){}; /** * @brief Swap for weight diff --git a/debian/nntrainer-dev.install b/debian/nntrainer-dev.install index 4fd55b3774..73cc5f924d 100644 --- a/debian/nntrainer-dev.install +++ b/debian/nntrainer-dev.install @@ -9,7 +9,6 @@ # tensor headers /usr/include/nntrainer/memory_data.h /usr/include/nntrainer/tensor.h -/usr/include/nntrainer/tensor_v2.h /usr/include/nntrainer/tensor_base.h /usr/include/nntrainer/float_tensor.h /usr/include/nntrainer/tensor_wrap_specs.h diff --git a/nntrainer/compiler/ini_interpreter.cpp b/nntrainer/compiler/ini_interpreter.cpp index 1d82ef693d..146e62ed1e 100644 --- a/nntrainer/compiler/ini_interpreter.cpp +++ b/nntrainer/compiler/ini_interpreter.cpp @@ -49,8 +49,7 @@ namespace nntrainer { IniGraphInterpreter::IniGraphInterpreter( const AppContext &app_context_, std::function pathResolver_) : - app_context(app_context_), - pathResolver(pathResolver_) {} + app_context(app_context_), pathResolver(pathResolver_) {} IniGraphInterpreter::~IniGraphInterpreter() {} @@ -235,8 +234,8 @@ referenced // /** TODO #361: this needs update in model file to be of dictionary format // */ // // if (preload) { -// // layer->weight_initializer = Tensor::Initializer::FILE_INITIALIZER; -// // layer->bias_initializer = Tensor::Initializer::FILE_INITIALIZER; +// // layer->weight_initializer = Initializer::FILE_INITIALIZER; +// // layer->bias_initializer = Initializer::FILE_INITIALIZER; // // layer->initializer_file = backbone.save_path; // // } // } diff --git a/nntrainer/graph/network_graph.cpp b/nntrainer/graph/network_graph.cpp index 2d4cfdc769..54afe6353d 100644 --- a/nntrainer/graph/network_graph.cpp +++ b/nntrainer/graph/network_graph.cpp @@ -1557,7 +1557,7 @@ void NetworkGraph::requestOptimizerVariable( std::vector dims = cb(dim); w->setOptimizerVariables(tensor_manager->requestWeightOptimizerVariables( dims, w->getName(), TensorLifespan::MAX_LIFESPAN, - w->isGradientClipByGlobalNorm(), Tensor::Initializer::ZEROS)); + w->isGradientClipByGlobalNorm(), Initializer::ZEROS)); } } } diff --git a/nntrainer/layers/acti_func.h b/nntrainer/layers/acti_func.h index 9e43219ee5..c6c3576414 100644 --- a/nntrainer/layers/acti_func.h +++ b/nntrainer/layers/acti_func.h @@ -16,6 +16,7 @@ #define __ACTI_FUNC_H__ #ifdef __cplusplus +#include #include namespace nntrainer { diff --git a/nntrainer/layers/attention_layer.cpp b/nntrainer/layers/attention_layer.cpp index 1309214bca..eab36a9af3 100644 --- a/nntrainer/layers/attention_layer.cpp +++ b/nntrainer/layers/attention_layer.cpp @@ -65,8 +65,8 @@ void AttentionLayer::finalize(InitLayerContext &context) { auto weights_dim = query_dim; weights_dim.width(value_dim.height()); wt_idx[AttentionParams::weights] = - context.requestTensor(weights_dim, "weights", Tensor::Initializer::NONE, - false, TensorLifespan::ITERATION_LIFESPAN); + context.requestTensor(weights_dim, "weights", Initializer::NONE, false, + TensorLifespan::ITERATION_LIFESPAN); context.setOutputDimensions({query_dim}); diff --git a/nntrainer/layers/bn_layer.cpp b/nntrainer/layers/bn_layer.cpp index 1723ac677f..d74f4395cf 100644 --- a/nntrainer/layers/bn_layer.cpp +++ b/nntrainer/layers/bn_layer.cpp @@ -117,11 +117,11 @@ void BatchNormalizationLayer::finalize(InitLayerContext &context) { * more in-place calculation) can save memory during memory optimization. */ wt_idx[BNParams::deviation] = - context.requestTensor(in_dim, "deviation", Tensor::Initializer::NONE, false, + context.requestTensor(in_dim, "deviation", Initializer::NONE, false, TensorLifespan::ITERATION_LIFESPAN); /** caches the inverse standard deviation */ wt_idx[BNParams::invstd] = - context.requestTensor(dim, "invstd", Tensor::Initializer::NONE, false, + context.requestTensor(dim, "invstd", Initializer::NONE, false, TensorLifespan::ITERATION_LIFESPAN); /** * Temporary tensor to store the full sized tensors in order to allow batch @@ -130,20 +130,19 @@ void BatchNormalizationLayer::finalize(InitLayerContext &context) { * as the output of this layer need not be stored all the time. */ wt_idx[BNParams::t_full] = - context.requestTensor(in_dim, "tensor_full", Tensor::Initializer::NONE, - false, TensorLifespan::CALC_DERIV_LIFESPAN); + context.requestTensor(in_dim, "tensor_full", Initializer::NONE, false, + TensorLifespan::CALC_DERIV_LIFESPAN); /** * caches variance + epsilon as well. */ - wt_idx[BNParams::cvar] = - context.requestTensor(dim, "cvar", Tensor::Initializer::NONE, false, - TensorLifespan::ITERATION_LIFESPAN); + wt_idx[BNParams::cvar] = context.requestTensor( + dim, "cvar", Initializer::NONE, false, TensorLifespan::ITERATION_LIFESPAN); /** * Temporary tensor to store the reduced tensors along the axes_to_reduce. */ wt_idx[BNParams::t_reduced] = - context.requestTensor(dim, "tensor_reduced", Tensor::Initializer::NONE, - false, TensorLifespan::FORWARD_DERIV_LIFESPAN); + context.requestTensor(dim, "tensor_reduced", Initializer::NONE, false, + TensorLifespan::FORWARD_DERIV_LIFESPAN); } void BatchNormalizationLayer::setProperty( diff --git a/nntrainer/layers/centroid_knn.cpp b/nntrainer/layers/centroid_knn.cpp index 611dca1d97..1ccfa15c26 100644 --- a/nntrainer/layers/centroid_knn.cpp +++ b/nntrainer/layers/centroid_knn.cpp @@ -62,11 +62,11 @@ void CentroidKNN::finalize(nntrainer::InitLayerContext &context) { auto samples_seen = nntrainer::TensorDim({num_class}); weight_idx[KNNParams::map] = context.requestWeight( - map_dim, nntrainer::Tensor::Initializer::ZEROS, - nntrainer::WeightRegularizer::NONE, 1.0f, 0.0f, "map", false); + map_dim, nntrainer::Initializer::ZEROS, nntrainer::WeightRegularizer::NONE, + 1.0f, 0.0f, "map", false); weight_idx[KNNParams::num_samples] = context.requestWeight( - samples_seen, nntrainer::Tensor::Initializer::ZEROS, + samples_seen, nntrainer::Initializer::ZEROS, nntrainer::WeightRegularizer::NONE, 1.0f, 0.0f, "num_samples", false); } diff --git a/nntrainer/layers/cl_layers/fc_layer_cl.cpp b/nntrainer/layers/cl_layers/fc_layer_cl.cpp index 890450bebe..0e3cb178f0 100644 --- a/nntrainer/layers/cl_layers/fc_layer_cl.cpp +++ b/nntrainer/layers/cl_layers/fc_layer_cl.cpp @@ -124,7 +124,8 @@ void FullyConnectedLayerCl::forwarding(RunLayerContext &context, unsigned int axis = context.getWeightObject(weight_idx[FCParams::weight]).getOutputAxis(); - weight.dequantize(weight_, axis); + // Dequantize is currently disabled + // weight.dequantize(weight_, axis); dotCl(input_, weight_, hidden_, context); } else { diff --git a/nntrainer/layers/cl_layers/rmsnorm_layer_cl.h b/nntrainer/layers/cl_layers/rmsnorm_layer_cl.h index cd2fc9dea9..4b34729409 100644 --- a/nntrainer/layers/cl_layers/rmsnorm_layer_cl.h +++ b/nntrainer/layers/cl_layers/rmsnorm_layer_cl.h @@ -24,7 +24,7 @@ namespace nntrainer { -namespace props{ +namespace props { /** * @brief RMS_NORM_GAMMA_INIT_GPU Initialization Enumeration Information @@ -36,15 +36,14 @@ class RMS_NORM_GAMMA_INIT_GPU final /** * @brief Construct a RMS_NORM_GAMMA_INIT object */ - RMS_NORM_GAMMA_INIT_GPU(::nntrainer::Tensor::Initializer value = - ::nntrainer::Tensor::Initializer::ONES) { + RMS_NORM_GAMMA_INIT_GPU( + ::nntrainer::Initializer value = ::nntrainer::Initializer::ONES) { set(value); }; using prop_tag = enum_class_prop_tag; static constexpr const char *key = "gamma_initializer"; }; -}; - +}; // namespace props /** * @class RMSNormLayer @@ -111,9 +110,7 @@ class RMSNormLayerCl : public LayerImpl { /** * @copydoc Layer::getType() */ - const std::string getType() const override { - return RMSNormLayerCl::type; - }; + const std::string getType() const override { return RMSNormLayerCl::type; }; static opencl::Kernel kernel_rmsnorm; static opencl::Kernel kernel_rmsnorm_fp16; @@ -127,10 +124,8 @@ class RMSNormLayerCl : public LayerImpl { * @param[in] RunLayerContext reference */ - - void rmsnormProcess(Tensor const &input, Tensor &result, Tensor const &gamma, const float epsilon, - RunLayerContext &context); - + void rmsnormProcess(Tensor const &input, Tensor &result, Tensor const &gamma, + const float epsilon, RunLayerContext &context); /** * @brief Process data and dimensions for FP16 rms norm operation @@ -141,15 +136,13 @@ class RMSNormLayerCl : public LayerImpl { * @param[in] RunLayerContext reference */ - - void rmsnormProcess_fp16(Tensor const &input, Tensor &result, Tensor const &gamma, const float epsilon, - RunLayerContext &context); + void rmsnormProcess_fp16(Tensor const &input, Tensor &result, + Tensor const &gamma, const float epsilon, + RunLayerContext &context); /** * @copydoc Layer::supportBackwarding() */ - bool supportBackwarding() const override { - return false; - } + bool supportBackwarding() const override { return false; } /** * @copydoc Layer::setProperty(const std::vector &values) @@ -167,4 +160,3 @@ class RMSNormLayerCl : public LayerImpl { #endif /* __cplusplus */ #endif /* __RMSNORM_LAYER_CL__ */ - diff --git a/nntrainer/layers/common_properties.cpp b/nntrainer/layers/common_properties.cpp index 737d47609f..755f4407c6 100644 --- a/nntrainer/layers/common_properties.cpp +++ b/nntrainer/layers/common_properties.cpp @@ -306,21 +306,17 @@ RecurrentActivation::RecurrentActivation(ActivationTypeInfo::Enum value) { set(value); }; -WeightInitializer::WeightInitializer(Tensor::Initializer value) { set(value); } +WeightInitializer::WeightInitializer(Initializer value) { set(value); } -BiasInitializer::BiasInitializer(Tensor::Initializer value) { set(value); } +BiasInitializer::BiasInitializer(Initializer value) { set(value); } -BNPARAMS_MU_INIT::BNPARAMS_MU_INIT(Tensor::Initializer value) { set(value); } +BNPARAMS_MU_INIT::BNPARAMS_MU_INIT(Initializer value) { set(value); } -BNPARAMS_VAR_INIT::BNPARAMS_VAR_INIT(Tensor::Initializer value) { set(value); } +BNPARAMS_VAR_INIT::BNPARAMS_VAR_INIT(Initializer value) { set(value); } -BNPARAMS_GAMMA_INIT::BNPARAMS_GAMMA_INIT(Tensor::Initializer value) { - set(value); -} +BNPARAMS_GAMMA_INIT::BNPARAMS_GAMMA_INIT(Initializer value) { set(value); } -BNPARAMS_BETA_INIT::BNPARAMS_BETA_INIT(Tensor::Initializer value) { - set(value); -} +BNPARAMS_BETA_INIT::BNPARAMS_BETA_INIT(Initializer value) { set(value); } BasicRegularizer::BasicRegularizer(nntrainer::WeightRegularizer value) { set(value); diff --git a/nntrainer/layers/common_properties.h b/nntrainer/layers/common_properties.h index c5a514b637..2591ab454b 100644 --- a/nntrainer/layers/common_properties.h +++ b/nntrainer/layers/common_properties.h @@ -969,7 +969,7 @@ class RecurrentActivation final : public EnumProperty { * @brief Enumeration of tensor initialization type */ struct InitializerInfo { - using Enum = Tensor::Initializer; + using Enum = Initializer; static constexpr std::initializer_list EnumList = { Enum::ZEROS, Enum::ONES, Enum::LECUN_NORMAL, Enum::LECUN_UNIFORM, Enum::XAVIER_NORMAL, Enum::XAVIER_UNIFORM, @@ -990,8 +990,7 @@ class WeightInitializer final : public EnumProperty { /** * @brief Construct a WeightInitializer object */ - WeightInitializer( - Tensor::Initializer value = Tensor::Initializer::XAVIER_UNIFORM); + WeightInitializer(Initializer value = Initializer::XAVIER_UNIFORM); using prop_tag = enum_class_prop_tag; static constexpr const char *key = "weight_initializer"; }; @@ -1005,7 +1004,7 @@ class BiasInitializer final : public EnumProperty { /** * @brief Construct a BiasInitializer object */ - BiasInitializer(Tensor::Initializer value = Tensor::Initializer::ZEROS); + BiasInitializer(Initializer value = Initializer::ZEROS); using prop_tag = enum_class_prop_tag; static constexpr const char *key = "bias_initializer"; }; @@ -1019,7 +1018,7 @@ class BNPARAMS_MU_INIT final : public EnumProperty { /** * @brief Construct a BNPARAMS_MU_INIT object */ - BNPARAMS_MU_INIT(Tensor::Initializer value = Tensor::Initializer::ZEROS); + BNPARAMS_MU_INIT(Initializer value = Initializer::ZEROS); using prop_tag = enum_class_prop_tag; static constexpr const char *key = "moving_mean_initializer"; }; @@ -1033,7 +1032,7 @@ class BNPARAMS_VAR_INIT final : public EnumProperty { /** * @brief Construct a BNPARAMS_VAR_INIT object */ - BNPARAMS_VAR_INIT(Tensor::Initializer value = Tensor::Initializer::ONES); + BNPARAMS_VAR_INIT(Initializer value = Initializer::ONES); using prop_tag = enum_class_prop_tag; static constexpr const char *key = "moving_variance_initializer"; }; @@ -1047,7 +1046,7 @@ class BNPARAMS_GAMMA_INIT final : public EnumProperty { /** * @brief Construct a BNPARAMS_GAMMA_INIT object */ - BNPARAMS_GAMMA_INIT(Tensor::Initializer value = Tensor::Initializer::ONES); + BNPARAMS_GAMMA_INIT(Initializer value = Initializer::ONES); using prop_tag = enum_class_prop_tag; static constexpr const char *key = "gamma_initializer"; }; @@ -1061,7 +1060,7 @@ class BNPARAMS_BETA_INIT final : public EnumProperty { /** * @brief Construct a BNPARAMS_BETA_INIT object */ - BNPARAMS_BETA_INIT(Tensor::Initializer value = Tensor::Initializer::ZEROS); + BNPARAMS_BETA_INIT(Initializer value = Initializer::ZEROS); using prop_tag = enum_class_prop_tag; static constexpr const char *key = "beta_initializer"; }; diff --git a/nntrainer/layers/dropout.cpp b/nntrainer/layers/dropout.cpp index c00c31d10b..63307345b0 100644 --- a/nntrainer/layers/dropout.cpp +++ b/nntrainer/layers/dropout.cpp @@ -28,9 +28,8 @@ void DropOutLayer::finalize(InitLayerContext &context) { mask_idx.reserve(input_dims.size()); for (auto &t : input_dims) { - mask_idx.push_back( - context.requestTensor(t, "Mask", Tensor::Initializer::NONE, false, - TensorLifespan::ITERATION_LIFESPAN)); + mask_idx.push_back(context.requestTensor( + t, "Mask", Initializer::NONE, false, TensorLifespan::ITERATION_LIFESPAN)); } } diff --git a/nntrainer/layers/fc_layer.cpp b/nntrainer/layers/fc_layer.cpp index 436a936439..67a41f50ed 100644 --- a/nntrainer/layers/fc_layer.cpp +++ b/nntrainer/layers/fc_layer.cpp @@ -136,20 +136,20 @@ void FullyConnectedLayer::finalize(InitLayerContext &context) { is_nchw ? 0b0001 : 0b0100); lora_idx[LORAParams::loraA] = context.requestWeight( - loraA_dim, Tensor::Initializer::ZEROS, weight_regularizer, + loraA_dim, Initializer::ZEROS, weight_regularizer, weight_regularizer_constant, weight_decay, "loraA", true); lora_idx[LORAParams::loraB] = context.requestWeight( - loraB_dim, Tensor::Initializer::LECUN_NORMAL, weight_regularizer, + loraB_dim, Initializer::LECUN_NORMAL, weight_regularizer, weight_regularizer_constant, weight_decay, "loraB", true); - lora_idx[LORAParams::loraTmp] = context.requestTensor( - loraTmp_dim, "hidden_tmp_lora", Tensor::Initializer::NONE, true, - TensorLifespan::FORWARD_DERIV_LIFESPAN); + lora_idx[LORAParams::loraTmp] = + context.requestTensor(loraTmp_dim, "hidden_tmp_lora", Initializer::NONE, + true, TensorLifespan::FORWARD_DERIV_LIFESPAN); lora_idx[LORAParams::loraOut] = - context.requestTensor(bias_dim, "hidden_lora", Tensor::Initializer::NONE, - true, TensorLifespan::FORWARD_FUNC_LIFESPAN); + context.requestTensor(bias_dim, "hidden_lora", Initializer::NONE, true, + TensorLifespan::FORWARD_FUNC_LIFESPAN); } } @@ -181,7 +181,7 @@ void FullyConnectedLayer::forwarding(RunLayerContext &context, bool training) { unsigned int axis = context.getWeightObject(weight_idx[FCParams::weight]).getOutputAxis(); - weight.dequantize(weight_, axis); + // weight.dequantize(weight_, axis); input_.dot(weight_, hidden_, false, false); } else { input_.dot(weight, hidden_, false, false); diff --git a/nntrainer/layers/gru.cpp b/nntrainer/layers/gru.cpp index 1b90247b9a..f9ec829f72 100644 --- a/nntrainer/layers/gru.cpp +++ b/nntrainer/layers/gru.cpp @@ -64,9 +64,9 @@ GRULayer::GRULayer() : } void GRULayer::finalize(InitLayerContext &context) { - const Tensor::Initializer weight_initializer = + const Initializer weight_initializer = std::get(*layer_impl_props).get(); - const Tensor::Initializer bias_initializer = + const Initializer bias_initializer = std::get(*layer_impl_props).get(); const WeightRegularizer weight_regularizer = std::get(*layer_impl_props).get(); @@ -148,27 +148,27 @@ void GRULayer::finalize(InitLayerContext &context) { // hidden_state_dim = [ batch, 1, max_timestep, unit ] TensorDim hidden_state_dim(batch_size, 1, max_timestep, unit); - wt_idx[GRUParams::hidden_state] = context.requestTensor( - hidden_state_dim, "hidden_state", Tensor::Initializer::NONE, true, - TensorLifespan::ITERATION_LIFESPAN); + wt_idx[GRUParams::hidden_state] = + context.requestTensor(hidden_state_dim, "hidden_state", Initializer::NONE, + true, TensorLifespan::ITERATION_LIFESPAN); // zrg_dim = [ batch, 1, max_timestep, NUM_GATE * unit ] TensorDim zrg_dim(batch_size, 1, max_timestep, NUM_GATE * unit); wt_idx[GRUParams::zrg] = - context.requestTensor(zrg_dim, "zrg", Tensor::Initializer::NONE, true, + context.requestTensor(zrg_dim, "zrg", Initializer::NONE, true, TensorLifespan::ITERATION_LIFESPAN); // h_prev_dim = [ batch, 1, 1, unit ] TensorDim h_prev_dim = TensorDim({batch_size, 1, 1, unit}); wt_idx[GRUParams::h_prev] = - context.requestTensor(h_prev_dim, "h_prev", Tensor::Initializer::NONE, - false, TensorLifespan::FORWARD_FUNC_LIFESPAN); + context.requestTensor(h_prev_dim, "h_prev", Initializer::NONE, false, + TensorLifespan::FORWARD_FUNC_LIFESPAN); if (dropout_rate > epsilon) { TensorDim dropout_mask_dim(batch_size, 1, max_timestep, unit); - wt_idx[GRUParams::dropout_mask] = context.requestTensor( - output_dim, "dropout_mask", Tensor::Initializer::NONE, false, - TensorLifespan::ITERATION_LIFESPAN); + wt_idx[GRUParams::dropout_mask] = + context.requestTensor(output_dim, "dropout_mask", Initializer::NONE, + false, TensorLifespan::ITERATION_LIFESPAN); } acti_func.setActiFunc(hidden_state_activation_type); diff --git a/nntrainer/layers/grucell.cpp b/nntrainer/layers/grucell.cpp index 57b840e482..e260bd898a 100644 --- a/nntrainer/layers/grucell.cpp +++ b/nntrainer/layers/grucell.cpp @@ -276,9 +276,9 @@ GRUCellLayer::GRUCellLayer() : } void GRUCellLayer::finalize(InitLayerContext &context) { - const Tensor::Initializer weight_initializer = + const Initializer weight_initializer = std::get(*layer_impl_props).get(); - const Tensor::Initializer bias_initializer = + const Initializer bias_initializer = std::get(*layer_impl_props).get(); const WeightRegularizer weight_regularizer = std::get(*layer_impl_props).get(); @@ -368,15 +368,15 @@ void GRUCellLayer::finalize(InitLayerContext &context) { // zrg_dim = [ batch_size, 1, 1, NUM_GATE * unit ] TensorDim zrg_dim(batch_size, 1, 1, NUM_GATE * unit); wt_idx[GRUCellParams::zrg] = - context.requestTensor(zrg_dim, "zrg", Tensor::Initializer::NONE, true, + context.requestTensor(zrg_dim, "zrg", Initializer::NONE, true, TensorLifespan::ITERATION_LIFESPAN); if (dropout_rate > epsilon) { // dropout_mask_dim = [ batch_size, 1, 1, unit ] TensorDim dropout_mask_dim(batch_size, 1, 1, unit); - wt_idx[GRUCellParams::dropout_mask] = context.requestTensor( - dropout_mask_dim, "dropout_mask", Tensor::Initializer::NONE, false, - TensorLifespan::ITERATION_LIFESPAN); + wt_idx[GRUCellParams::dropout_mask] = + context.requestTensor(dropout_mask_dim, "dropout_mask", Initializer::NONE, + false, TensorLifespan::ITERATION_LIFESPAN); } acti_func.setActiFunc(hidden_state_activation_type); diff --git a/nntrainer/layers/layer_context.cpp b/nntrainer/layers/layer_context.cpp index d71221c352..53951d4f69 100644 --- a/nntrainer/layers/layer_context.cpp +++ b/nntrainer/layers/layer_context.cpp @@ -244,7 +244,7 @@ const Tensor &RunLayerContext::getOutput(unsigned int idx) const { */ const Tensor RunLayerContext::getOutputGrad(unsigned int idx) const { if (!outputs[idx]->hasGradient()) { - return Tensor(outputs[idx]->getDim(), true, Tensor::Initializer::ZEROS); + return Tensor(outputs[idx]->getDim(), true, Initializer::ZEROS); } return const_cast(this)->getOutputGradUnsafe(idx); } diff --git a/nntrainer/layers/layer_context.h b/nntrainer/layers/layer_context.h index b8b8ffccd8..993e98fd01 100644 --- a/nntrainer/layers/layer_context.h +++ b/nntrainer/layers/layer_context.h @@ -184,8 +184,7 @@ class InitLayerContext { * @todo Consider providing a guarantee that the returned indices will always * start from 0 and will always be incremental. */ - unsigned int requestWeight(const TensorDim &dim, - const Tensor::Initializer init, + unsigned int requestWeight(const TensorDim &dim, const Initializer init, const WeightRegularizer reg, const float reg_const, const float decay, const std::string &name, bool trainable = true, unsigned int out_axis = 3) { @@ -231,7 +230,7 @@ class InitLayerContext { */ unsigned int requestTensor(const TensorDim &dim, const std::string &name, - const Tensor::Initializer init = Tensor::Initializer::NONE, + const Initializer init = Initializer::NONE, bool trainable = false, TensorLifespan lifespan = TensorLifespan::ITERATION_LIFESPAN, bool private_ = true) { @@ -441,7 +440,7 @@ class RunLayerContext { } unsigned int o_ax = getWeightObject(idx).getOutputAxis(); - t_w.dequantize(w, o_ax); + // t_w.dequantize(w, o_ax); return; } diff --git a/nntrainer/layers/layer_normalization_layer.cpp b/nntrainer/layers/layer_normalization_layer.cpp index 466ca93bb7..a115e82b62 100644 --- a/nntrainer/layers/layer_normalization_layer.cpp +++ b/nntrainer/layers/layer_normalization_layer.cpp @@ -98,25 +98,25 @@ void LayerNormalizationLayer::finalize(InitLayerContext &context) { /** caches the deviation -> input - avg(input) */ wt_idx[LNParams::deviation] = - context.requestTensor(input_dim, "deviation", Tensor::Initializer::NONE, - false, TensorLifespan::ITERATION_LIFESPAN); + context.requestTensor(input_dim, "deviation", Initializer::NONE, false, + TensorLifespan::ITERATION_LIFESPAN); /** caches variance + epsilon as well */ wt_idx[LNParams::variance] = - context.requestTensor(remain_dim, "variance", Tensor::Initializer::NONE, - false, TensorLifespan::ITERATION_LIFESPAN); + context.requestTensor(remain_dim, "variance", Initializer::NONE, false, + TensorLifespan::ITERATION_LIFESPAN); /** caches the inverse standard deviation */ wt_idx[LNParams::inv_std_dev] = - context.requestTensor(remain_dim, "inv_std_dev", Tensor::Initializer::NONE, - false, TensorLifespan::ITERATION_LIFESPAN); + context.requestTensor(remain_dim, "inv_std_dev", Initializer::NONE, false, + TensorLifespan::ITERATION_LIFESPAN); /** temporary tensor (origin size) */ - wt_idx[LNParams::temp_origin_size] = context.requestTensor( - input_dim, "temp_origin_size", Tensor::Initializer::NONE, false, - TensorLifespan::CALC_DERIV_LIFESPAN); + wt_idx[LNParams::temp_origin_size] = + context.requestTensor(input_dim, "temp_origin_size", Initializer::NONE, + false, TensorLifespan::CALC_DERIV_LIFESPAN); /** temporary tensor (normalized size) */ - wt_idx[LNParams::temp_normalized_size] = context.requestTensor( - remain_dim, "temp_normalized_size", Tensor::Initializer::NONE, false, - TensorLifespan::CALC_DERIV_LIFESPAN); + wt_idx[LNParams::temp_normalized_size] = + context.requestTensor(remain_dim, "temp_normalized_size", Initializer::NONE, + false, TensorLifespan::CALC_DERIV_LIFESPAN); } void LayerNormalizationLayer::setProperty( diff --git a/nntrainer/layers/lstm.cpp b/nntrainer/layers/lstm.cpp index d5f13a1fc5..a46b3d4c32 100644 --- a/nntrainer/layers/lstm.cpp +++ b/nntrainer/layers/lstm.cpp @@ -409,9 +409,9 @@ LSTMLayer::LSTMLayer() : } void LSTMLayer::finalize(InitLayerContext &context) { - const Tensor::Initializer weight_initializer = + const Initializer weight_initializer = std::get(*layer_impl_props).get(); - const Tensor::Initializer bias_initializer = + const Initializer bias_initializer = std::get(*layer_impl_props).get(); const nntrainer::WeightRegularizer weight_regularizer = std::get(*layer_impl_props).get(); @@ -511,21 +511,21 @@ void LSTMLayer::finalize(InitLayerContext &context) { // hidden_state_dim : [ batch_size, 1, max_timestep, unit ] const TensorDim hidden_state_dim(batch_size, 1, max_timestep, unit, weight_tensor_type); - wt_idx[LSTMParams::hidden_state] = context.requestTensor( - hidden_state_dim, "hidden_state", Tensor::Initializer::NONE, true, - TensorLifespan::ITERATION_LIFESPAN); + wt_idx[LSTMParams::hidden_state] = + context.requestTensor(hidden_state_dim, "hidden_state", Initializer::NONE, + true, TensorLifespan::ITERATION_LIFESPAN); // cell_state_dim : [ batch_size, 1, max_timestep, unit ] const TensorDim cell_state_dim(batch_size, 1, max_timestep, unit, weight_tensor_type); - wt_idx[LSTMParams::cell_state] = context.requestTensor( - cell_state_dim, "cell_state", Tensor::Initializer::NONE, true, - TensorLifespan::ITERATION_LIFESPAN); + wt_idx[LSTMParams::cell_state] = + context.requestTensor(cell_state_dim, "cell_state", Initializer::NONE, true, + TensorLifespan::ITERATION_LIFESPAN); // ifgo_dim : [ batch_size, 1, max_timestep, NUM_GATE * unit ] const TensorDim ifgo_dim(batch_size, 1, max_timestep, NUM_GATE * unit, weight_tensor_type); wt_idx[LSTMParams::ifgo] = - context.requestTensor(ifgo_dim, "ifgo", Tensor::Initializer::NONE, true, + context.requestTensor(ifgo_dim, "ifgo", Initializer::NONE, true, TensorLifespan::ITERATION_LIFESPAN); if (bidirectional) { @@ -579,30 +579,30 @@ void LSTMLayer::finalize(InitLayerContext &context) { const TensorDim reverse_hidden_state_dim(batch_size, 1, max_timestep, unit, weight_tensor_type); wt_idx[LSTMParams::reverse_hidden_state] = context.requestTensor( - reverse_hidden_state_dim, "reverse_hidden_state", - Tensor::Initializer::NONE, true, TensorLifespan::ITERATION_LIFESPAN); + reverse_hidden_state_dim, "reverse_hidden_state", Initializer::NONE, true, + TensorLifespan::ITERATION_LIFESPAN); // reverse_cell_state_dim : [ batch_size, 1, max_timestep, unit ] const TensorDim reverse_cell_state_dim(batch_size, 1, max_timestep, unit, weight_tensor_type); wt_idx[LSTMParams::reverse_cell_state] = context.requestTensor( - reverse_cell_state_dim, "reverse_cell_state", Tensor::Initializer::NONE, - true, TensorLifespan::ITERATION_LIFESPAN); + reverse_cell_state_dim, "reverse_cell_state", Initializer::NONE, true, + TensorLifespan::ITERATION_LIFESPAN); // reverse_ifgo_dim : [ batch_size, 1, max_timestep, NUM_GATE * unit ] const TensorDim reverse_ifgo_dim(batch_size, 1, max_timestep, NUM_GATE * unit, weight_tensor_type); - wt_idx[LSTMParams::reverse_ifgo] = context.requestTensor( - reverse_ifgo_dim, "reverse_ifgo", Tensor::Initializer::NONE, true, - TensorLifespan::ITERATION_LIFESPAN); + wt_idx[LSTMParams::reverse_ifgo] = + context.requestTensor(reverse_ifgo_dim, "reverse_ifgo", Initializer::NONE, + true, TensorLifespan::ITERATION_LIFESPAN); } if (dropout_rate > epsilon) { // dropout_mask_dim = [ batch, 1, time_iteration, unit ] const TensorDim dropout_mask_dim(batch_size, 1, max_timestep, unit, weight_tensor_type); - wt_idx[LSTMParams::dropout_mask] = context.requestTensor( - dropout_mask_dim, "dropout_mask", Tensor::Initializer::NONE, false, - TensorLifespan::ITERATION_LIFESPAN); + wt_idx[LSTMParams::dropout_mask] = + context.requestTensor(dropout_mask_dim, "dropout_mask", Initializer::NONE, + false, TensorLifespan::ITERATION_LIFESPAN); } if (context.getActivationDataType() == TensorDim::DataType::FP32) { diff --git a/nntrainer/layers/lstmcell.cpp b/nntrainer/layers/lstmcell.cpp index 4a578e7d8a..a9cad5d260 100644 --- a/nntrainer/layers/lstmcell.cpp +++ b/nntrainer/layers/lstmcell.cpp @@ -34,9 +34,9 @@ LSTMCellLayer::LSTMCellLayer() : lstmcell_props(props::DropOutRate()) { } void LSTMCellLayer::finalize(InitLayerContext &context) { - const Tensor::Initializer weight_initializer = + const Initializer weight_initializer = std::get(*layer_impl_props).get(); - const Tensor::Initializer bias_initializer = + const Initializer bias_initializer = std::get(*layer_impl_props).get(); const WeightRegularizer weight_regularizer = std::get(*layer_impl_props).get(); @@ -151,16 +151,16 @@ void LSTMCellLayer::finalize(InitLayerContext &context) { const TensorDim ifgo_dim(batch_size, 1, 1, NUM_GATE * unit, weight_tensor_type); wt_idx[LSTMCellParams::ifgo] = - context.requestTensor(ifgo_dim, "ifgo", Tensor::Initializer::NONE, true, + context.requestTensor(ifgo_dim, "ifgo", Initializer::NONE, true, TensorLifespan::ITERATION_LIFESPAN); if (dropout_rate > epsilon) { // dropout_mask_dim = [ batch_size, 1, 1, unit ] const TensorDim dropout_mask_dim(batch_size, 1, 1, unit, weight_tensor_type); - wt_idx[LSTMCellParams::dropout_mask] = context.requestTensor( - dropout_mask_dim, "dropout_mask", Tensor::Initializer::NONE, false, - TensorLifespan::ITERATION_LIFESPAN); + wt_idx[LSTMCellParams::dropout_mask] = + context.requestTensor(dropout_mask_dim, "dropout_mask", Initializer::NONE, + false, TensorLifespan::ITERATION_LIFESPAN); } if (context.getActivationDataType() == TensorDim::DataType::FP32) { diff --git a/nntrainer/layers/mol_attention_layer.cpp b/nntrainer/layers/mol_attention_layer.cpp index efacd24849..3d3fb77865 100644 --- a/nntrainer/layers/mol_attention_layer.cpp +++ b/nntrainer/layers/mol_attention_layer.cpp @@ -111,44 +111,44 @@ void MoLAttentionLayer::finalize(InitLayerContext &context) { TensorDim fc_out_dim = query_dim; fc_out_dim.width(fc_w_dim.width()); wt_idx[MoLAttentionParams::fc_out] = - context.requestTensor(fc_out_dim, "fc_out", Tensor::Initializer::NONE, - false, TensorLifespan::FORWARD_FUNC_LIFESPAN); + context.requestTensor(fc_out_dim, "fc_out", Initializer::NONE, false, + TensorLifespan::FORWARD_FUNC_LIFESPAN); wt_idx[MoLAttentionParams::fc_tanh] = - context.requestTensor(fc_out_dim, "fc_tanh", Tensor::Initializer::NONE, - false, TensorLifespan::ITERATION_LIFESPAN); + context.requestTensor(fc_out_dim, "fc_tanh", Initializer::NONE, false, + TensorLifespan::ITERATION_LIFESPAN); TensorDim fc_proj_out_dim = fc_out_dim; fc_proj_out_dim.width(fc_proj_w_dim.width()); - wt_idx[MoLAttentionParams::fc_proj_out] = context.requestTensor( - fc_proj_out_dim, "fc_proj_out", Tensor::Initializer::NONE, false, - TensorLifespan::ITERATION_LIFESPAN); + wt_idx[MoLAttentionParams::fc_proj_out] = + context.requestTensor(fc_proj_out_dim, "fc_proj_out", Initializer::NONE, + false, TensorLifespan::ITERATION_LIFESPAN); TensorDim scores_dim = TensorDim({value_dim.batch(), 1, 1, value_dim.height()}); wt_idx[MoLAttentionParams::scores] = - context.requestTensor(scores_dim, "scores", Tensor::Initializer::NONE, - false, TensorLifespan::ITERATION_LIFESPAN); + context.requestTensor(scores_dim, "scores", Initializer::NONE, false, + TensorLifespan::ITERATION_LIFESPAN); TensorDim prob_dim = value_dim; prob_dim.width(mol_k); wt_idx[MoLAttentionParams::prob] = - context.requestTensor(prob_dim, "prob", Tensor::Initializer::NONE, false, + context.requestTensor(prob_dim, "prob", Initializer::NONE, false, TensorLifespan::ITERATION_LIFESPAN); wt_idx[MoLAttentionParams::prob_left] = - context.requestTensor(prob_dim, "prob_left", Tensor::Initializer::NONE, - false, TensorLifespan::ITERATION_LIFESPAN); + context.requestTensor(prob_dim, "prob_left", Initializer::NONE, false, + TensorLifespan::ITERATION_LIFESPAN); wt_idx[MoLAttentionParams::prob_right] = - context.requestTensor(prob_dim, "prob_right", Tensor::Initializer::NONE, - false, TensorLifespan::ITERATION_LIFESPAN); + context.requestTensor(prob_dim, "prob_right", Initializer::NONE, false, + TensorLifespan::ITERATION_LIFESPAN); wt_idx[MoLAttentionParams::u_neg_div] = - context.requestTensor(prob_dim, "u_neg_div", Tensor::Initializer::NONE, - false, TensorLifespan::ITERATION_LIFESPAN); + context.requestTensor(prob_dim, "u_neg_div", Initializer::NONE, false, + TensorLifespan::ITERATION_LIFESPAN); wt_idx[MoLAttentionParams::u_pos_div] = - context.requestTensor(prob_dim, "u_pos_div", Tensor::Initializer::NONE, - false, TensorLifespan::ITERATION_LIFESPAN); + context.requestTensor(prob_dim, "u_pos_div", Initializer::NONE, false, + TensorLifespan::ITERATION_LIFESPAN); wt_idx[MoLAttentionParams::dstate] = - context.requestTensor(state_dim, "dstate", Tensor::Initializer::NONE, false, + context.requestTensor(state_dim, "dstate", Initializer::NONE, false, TensorLifespan::BACKWARD_FUNC_LIFESPAN); if (context.getNumRequestedOutputs() == 2) diff --git a/nntrainer/layers/multi_head_attention_layer.cpp b/nntrainer/layers/multi_head_attention_layer.cpp index 0d4b73b67f..bdb38fcf92 100644 --- a/nntrainer/layers/multi_head_attention_layer.cpp +++ b/nntrainer/layers/multi_head_attention_layer.cpp @@ -261,52 +261,52 @@ void MultiHeadAttentionLayer::finalize(InitLayerContext &context) { {batch_size, 1, query_height, num_heads * projected_query_dim_prop}, activation_type); weight_idx[AttentionParams::projected_query] = context.requestTensor( - projected_query_dim, "projected_query", Tensor::Initializer::NONE, true, + projected_query_dim, "projected_query", Initializer::NONE, true, TensorLifespan::ITERATION_LIFESPAN); /** tensor for output of key fc */ TensorDim projected_key_dim( {batch_size, 1, key_height, num_heads * projected_key_dim_prop}, activation_type); - weight_idx[AttentionParams::projected_key] = context.requestTensor( - projected_key_dim, "projected_key", Tensor::Initializer::NONE, true, - TensorLifespan::ITERATION_LIFESPAN); + weight_idx[AttentionParams::projected_key] = + context.requestTensor(projected_key_dim, "projected_key", Initializer::NONE, + true, TensorLifespan::ITERATION_LIFESPAN); /** tensor for output of value fc */ TensorDim projected_value_dim( {batch_size, 1, value_height, num_heads * projected_value_dim_prop}, activation_type); weight_idx[AttentionParams::projected_value] = context.requestTensor( - projected_value_dim, "projected_value", Tensor::Initializer::NONE, true, + projected_value_dim, "projected_value", Initializer::NONE, true, TensorLifespan::ITERATION_LIFESPAN); - weight_idx[AttentionParams::cache_key] = context.requestTensor( - projected_key_dim, "cache_key", Tensor::Initializer::NONE, true, - TensorLifespan::MAX_LIFESPAN); + weight_idx[AttentionParams::cache_key] = + context.requestTensor(projected_key_dim, "cache_key", Initializer::NONE, + true, TensorLifespan::MAX_LIFESPAN); - weight_idx[AttentionParams::cache_value] = context.requestTensor( - projected_value_dim, "cache_value", Tensor::Initializer::NONE, true, - TensorLifespan::MAX_LIFESPAN); + weight_idx[AttentionParams::cache_value] = + context.requestTensor(projected_value_dim, "cache_value", Initializer::NONE, + true, TensorLifespan::MAX_LIFESPAN); if (provide_attention_mask) { /** Intended comment for bool type mask */ // TensorDim attention_mask_dim( // {batch_size, num_heads, query_height, key_height}); // weight_idx[AttentionParams::attention_mask] = context.requestTensor( - // attention_mask_dim, "attention_mask", Tensor::Initializer::NONE, false, + // attention_mask_dim, "attention_mask", Initializer::NONE, false, // TensorLifespan::FORWARD_FUNC_LIFESPAN); } /** tensor for attention weight */ TensorDim attention_weight_dim( {batch_size, num_heads, query_height, key_height}, activation_type); weight_idx[AttentionParams::attention_weight] = context.requestTensor( - attention_weight_dim, "attention_weight", Tensor::Initializer::NONE, true, + attention_weight_dim, "attention_weight", Initializer::NONE, true, TensorLifespan::ITERATION_LIFESPAN); if (dropout_rate > epsilon) { /** tensor for dropout mask */ TensorDim dropout_mask_dim( {batch_size, num_heads, query_height, key_height}, activation_type); - weight_idx[AttentionParams::dropout_mask] = context.requestTensor( - dropout_mask_dim, "dropout_mask", Tensor::Initializer::NONE, false, - TensorLifespan::ITERATION_LIFESPAN); + weight_idx[AttentionParams::dropout_mask] = + context.requestTensor(dropout_mask_dim, "dropout_mask", Initializer::NONE, + false, TensorLifespan::ITERATION_LIFESPAN); } /** tensor for attention output */ @@ -314,7 +314,7 @@ void MultiHeadAttentionLayer::finalize(InitLayerContext &context) { {batch_size, 1, query_height, num_heads * projected_value_dim_prop}, activation_type); weight_idx[AttentionParams::attention_output] = context.requestTensor( - attention_output_dim, "attention_output", Tensor::Initializer::NONE, true, + attention_output_dim, "attention_output", Initializer::NONE, true, TensorLifespan::ITERATION_LIFESPAN); TensorDim output_dim({batch_size, 1, query_height, output_shape}, @@ -570,9 +570,7 @@ void MultiHeadAttentionLayer::incremental_forwarding(RunLayerContext &context, Tensor &key = context.getInput(INOUT_INDEX::KEY); Tensor &value = context.getInput(INOUT_INDEX::VALUE); - Tensor empty_tensor; - - empty_tensor.setTensorType(value.getTensorType()); + Tensor empty_tensor("empty", value.getFormat(), value.getDataType()); Tensor &mask = provide_attention_mask ? context.getInput(INOUT_INDEX::MASK) : empty_tensor; diff --git a/nntrainer/layers/pooling2d_layer.cpp b/nntrainer/layers/pooling2d_layer.cpp index a68e42e8d0..52f5ee5066 100644 --- a/nntrainer/layers/pooling2d_layer.cpp +++ b/nntrainer/layers/pooling2d_layer.cpp @@ -112,13 +112,13 @@ void Pooling2DLayer::finalize(InitLayerContext &context) { */ if (pooling_type == props::PoolingTypeInfo::Enum::global_max) { pool_helper_idx = - context.requestTensor(in_dim, "helper_idx", Tensor::Initializer::NONE, - false, TensorLifespan::ITERATION_LIFESPAN); + context.requestTensor(in_dim, "helper_idx", Initializer::NONE, false, + TensorLifespan::ITERATION_LIFESPAN); pool_helper_size.resize(in_dim.batch() * in_dim.channel()); } else { pool_helper_idx = - context.requestTensor(out_dim, "helper_idx", Tensor::Initializer::NONE, - false, TensorLifespan::ITERATION_LIFESPAN); + context.requestTensor(out_dim, "helper_idx", Initializer::NONE, false, + TensorLifespan::ITERATION_LIFESPAN); } } diff --git a/nntrainer/layers/positional_encoding_layer.cpp b/nntrainer/layers/positional_encoding_layer.cpp index 6295bbad76..5f98b41e8e 100644 --- a/nntrainer/layers/positional_encoding_layer.cpp +++ b/nntrainer/layers/positional_encoding_layer.cpp @@ -47,7 +47,7 @@ void PositionalEncodingLayer::finalize(InitLayerContext &context) { {context.getFormat(), context.getWeightDataType()}); weight_idx[PositionalEncodingParams::positional_encoding] = context.requestTensor(pe_dim, "positional_encoding", - nntrainer::Tensor::Initializer::NONE, false, + nntrainer::Initializer::NONE, false, nntrainer::TensorLifespan::MAX_LIFESPAN); } diff --git a/nntrainer/layers/rnn.cpp b/nntrainer/layers/rnn.cpp index e5fb70a6ed..5e50a8484a 100644 --- a/nntrainer/layers/rnn.cpp +++ b/nntrainer/layers/rnn.cpp @@ -53,9 +53,9 @@ void RNNLayer::finalize(InitLayerContext &context) { std::get(*layer_impl_props); const float weight_regularizer_constant = std::get(*layer_impl_props); - const Tensor::Initializer weight_initializer = + const Initializer weight_initializer = std::get(*layer_impl_props); - const Tensor::Initializer bias_initializer = + const Initializer bias_initializer = std::get(*layer_impl_props); auto &weight_decay = std::get(*layer_impl_props); auto &bias_decay = std::get(*layer_impl_props); @@ -128,18 +128,18 @@ void RNNLayer::finalize(InitLayerContext &context) { // hidden_state_dim : [ batch_size, 1, max_timestep, unit ] const TensorDim hidden_state_dim(batch_size, 1, max_timestep, unit); - wt_idx[RNNParams::hidden_state] = context.requestTensor( - hidden_state_dim, "hidden_state", Tensor::Initializer::NONE, true, - TensorLifespan::ITERATION_LIFESPAN); + wt_idx[RNNParams::hidden_state] = + context.requestTensor(hidden_state_dim, "hidden_state", Initializer::NONE, + true, TensorLifespan::ITERATION_LIFESPAN); if (dropout_rate > epsilon) { // dropout_mask_dim = [ batch, 1, (return_sequences ? time_iteration : 1), // unit ] const TensorDim dropout_mask_dim(batch_size, 1, return_sequences ? max_timestep : 1, unit); - wt_idx[RNNParams::dropout_mask] = context.requestTensor( - dropout_mask_dim, "dropout_mask", Tensor::Initializer::NONE, false, - TensorLifespan::ITERATION_LIFESPAN); + wt_idx[RNNParams::dropout_mask] = + context.requestTensor(dropout_mask_dim, "dropout_mask", Initializer::NONE, + false, TensorLifespan::ITERATION_LIFESPAN); } acti_func.setActiFunc(hidden_state_activation_type); diff --git a/nntrainer/layers/rnncell.cpp b/nntrainer/layers/rnncell.cpp index 9a2191f97d..eefbfa1b6f 100644 --- a/nntrainer/layers/rnncell.cpp +++ b/nntrainer/layers/rnncell.cpp @@ -54,9 +54,9 @@ void RNNCellLayer::finalize(InitLayerContext &context) { std::get(*layer_impl_props); const float weight_regularizer_constant = std::get(*layer_impl_props); - const Tensor::Initializer weight_initializer = + const Initializer weight_initializer = std::get(*layer_impl_props); - const Tensor::Initializer bias_initializer = + const Initializer bias_initializer = std::get(*layer_impl_props); auto &weight_decay = std::get(*layer_impl_props); auto &bias_decay = std::get(*layer_impl_props); @@ -138,9 +138,9 @@ void RNNCellLayer::finalize(InitLayerContext &context) { if (dropout_rate > epsilon) { // dropout_mask_dim = [ batch, 1, 1, unit ] const TensorDim dropout_mask_dim(batch_size, 1, 1, unit); - wt_idx[RNNCellParams::dropout_mask] = context.requestTensor( - dropout_mask_dim, "dropout_mask", Tensor::Initializer::NONE, false, - TensorLifespan::ITERATION_LIFESPAN); + wt_idx[RNNCellParams::dropout_mask] = + context.requestTensor(dropout_mask_dim, "dropout_mask", Initializer::NONE, + false, TensorLifespan::ITERATION_LIFESPAN); } acti_func.setActiFunc(hidden_state_activation_type); diff --git a/nntrainer/layers/time_dist.cpp b/nntrainer/layers/time_dist.cpp index 80451416df..fe2a2173b1 100644 --- a/nntrainer/layers/time_dist.cpp +++ b/nntrainer/layers/time_dist.cpp @@ -205,9 +205,8 @@ void TimeDistLayer::forwarding(RunLayerContext &context, bool training) { // TODO: This transposed Input Tensor could be resued for backwarding Tensor in = transposeTensor(input_); - Tensor out = - Tensor({ho_dim[2], 1, ho_dim[0], ho_dim[3]}, true, - Tensor::Initializer::NONE, context.getName() + ":inter_output"); + Tensor out = Tensor({ho_dim[2], 1, ho_dim[0], ho_dim[3]}, true, + Initializer::NONE, context.getName() + ":inter_output"); TensorDim i_dim = in_dim; i_dim.channel(1); @@ -223,8 +222,8 @@ void TimeDistLayer::forwarding(RunLayerContext &context, bool training) { h_g = transposeTensor(hidden_g); } - Var_Grad in_var(i_dim, Tensor::Initializer::NONE, false, false, "input"); - Var_Grad out_var(h_dim, Tensor::Initializer::NONE, + Var_Grad in_var(i_dim, Initializer::NONE, false, false, "input"); + Var_Grad out_var(h_dim, Initializer::NONE, dist_layer->requireLabel() && context.isLabelAvailable(SINGLE_INOUT_IDX), false, "output"); @@ -280,8 +279,8 @@ void TimeDistLayer::calcDerivative(RunLayerContext &context) { TensorDim r_dim = {ret_dim[2], 1, 1, ret_dim[3]}; TensorDim d_dim = {der_dim[2], 1, 1, der_dim[3]}; - Var_Grad in_var(r_dim, Tensor::Initializer::NONE, true, false, "input"); - Var_Grad out_var(d_dim, Tensor::Initializer::NONE, true, false, "output"); + Var_Grad in_var(r_dim, Initializer::NONE, true, false, "input"); + Var_Grad out_var(d_dim, Initializer::NONE, true, false, "output"); fillWeightsFromContext(context); fillTensorsFromContext(context); @@ -346,8 +345,8 @@ void TimeDistLayer::calcGradient(RunLayerContext &context) { Tensor d_iter = derivative_.getSharedDataTensor( d_dim, i * d_dim.batch() * d_dim.width(), true, derivative_.getName()); - Var_Grad in_var(i_dim, Tensor::Initializer::NONE, true, false, "input"); - Var_Grad out_var(d_dim, Tensor::Initializer::NONE, true, false, "output"); + Var_Grad in_var(i_dim, Initializer::NONE, true, false, "input"); + Var_Grad out_var(d_dim, Initializer::NONE, true, false, "output"); in_var.initializeVariable(in_iter); out_var.initializeGradient(d_iter); @@ -388,8 +387,8 @@ void TimeDistLayer::setBatch(RunLayerContext &context, unsigned int batch) { TensorDim i_dim = {in_dim[2], 1, 1, in_dim[3]}; TensorDim o_dim = {out_dim[2], 1, 1, out_dim[3]}; - Var_Grad in_var(i_dim, Tensor::Initializer::NONE, true, false, "input"); - Var_Grad out_var(o_dim, Tensor::Initializer::NONE, true, false, "output"); + Var_Grad in_var(i_dim, Initializer::NONE, true, false, "input"); + Var_Grad out_var(o_dim, Initializer::NONE, true, false, "output"); fillWeightsFromContext(context); fillTensorsFromContext(context); diff --git a/nntrainer/layers/zoneout_lstmcell.cpp b/nntrainer/layers/zoneout_lstmcell.cpp index 419a02e17f..20976f8b3c 100644 --- a/nntrainer/layers/zoneout_lstmcell.cpp +++ b/nntrainer/layers/zoneout_lstmcell.cpp @@ -58,9 +58,9 @@ bool ZoneoutLSTMCellLayer::CellStateZoneOutRate::isValid( } void ZoneoutLSTMCellLayer::finalize(InitLayerContext &context) { - const Tensor::Initializer weight_initializer = + const Initializer weight_initializer = std::get(*layer_impl_props).get(); - const Tensor::Initializer bias_initializer = + const Initializer bias_initializer = std::get(*layer_impl_props).get(); const WeightRegularizer weight_regularizer = std::get(*layer_impl_props).get(); @@ -187,7 +187,7 @@ void ZoneoutLSTMCellLayer::finalize(InitLayerContext &context) { * ] */ const TensorDim ifgo_dim(batch_size, 1, 1, NUM_GATE * unit); wt_idx[ZoneoutLSTMParams::ifgo] = - context.requestTensor(ifgo_dim, "ifgo", Tensor::Initializer::NONE, true, + context.requestTensor(ifgo_dim, "ifgo", Initializer::NONE, true, TensorLifespan::ITERATION_LIFESPAN); // hidden_state_zoneout_mask_dim = [ max_timestep @@ -196,15 +196,14 @@ void ZoneoutLSTMCellLayer::finalize(InitLayerContext &context) { unit); if (test) { wt_idx[ZoneoutLSTMParams::hidden_state_zoneout_mask] = - context.requestWeight(hidden_state_zoneout_mask_dim, - Tensor::Initializer::NONE, WeightRegularizer::NONE, - 1.0f, 0.0f, "hidden_state_zoneout_mask", false); + context.requestWeight(hidden_state_zoneout_mask_dim, Initializer::NONE, + WeightRegularizer::NONE, 1.0f, 0.0f, + "hidden_state_zoneout_mask", false); } else { wt_idx[ZoneoutLSTMParams::hidden_state_zoneout_mask] = context.requestTensor(hidden_state_zoneout_mask_dim, - "hidden_state_zoneout_mask", - Tensor::Initializer::NONE, false, - TensorLifespan::ITERATION_LIFESPAN, false); + "hidden_state_zoneout_mask", Initializer::NONE, + false, TensorLifespan::ITERATION_LIFESPAN, false); } // cell_state_zoneout_mask_dim = [ max_timestep * batch_size, 1, 1, unit ] @@ -212,19 +211,18 @@ void ZoneoutLSTMCellLayer::finalize(InitLayerContext &context) { unit); if (test) { wt_idx[ZoneoutLSTMParams::cell_state_zoneout_mask] = context.requestWeight( - cell_state_zoneout_mask_dim, Tensor::Initializer::NONE, - WeightRegularizer::NONE, 1.0f, 0.0f, "cell_state_zoneout_mask", false); + cell_state_zoneout_mask_dim, Initializer::NONE, WeightRegularizer::NONE, + 1.0f, 0.0f, "cell_state_zoneout_mask", false); } else { wt_idx[ZoneoutLSTMParams::cell_state_zoneout_mask] = context.requestTensor( - cell_state_zoneout_mask_dim, "cell_state_zoneout_mask", - Tensor::Initializer::NONE, false, TensorLifespan::ITERATION_LIFESPAN, - false); + cell_state_zoneout_mask_dim, "cell_state_zoneout_mask", Initializer::NONE, + false, TensorLifespan::ITERATION_LIFESPAN, false); } // lstm_cell_state_dim = [ batch_size, 1, 1, unit ] const TensorDim lstm_cell_state_dim(batch_size, 1, 1, unit); wt_idx[ZoneoutLSTMParams::lstm_cell_state] = context.requestTensor( - lstm_cell_state_dim, "lstm_cell_state", Tensor::Initializer::NONE, true, + lstm_cell_state_dim, "lstm_cell_state", Initializer::NONE, true, TensorLifespan::ITERATION_LIFESPAN); acti_func.setActiFunc(hidden_state_activation_type); @@ -441,7 +439,9 @@ void ZoneoutLSTMCellLayer::calcGradient(RunLayerContext &context) { Tensor hidden_state_zoneout_mask = hs_zoneout_mask.getBatchSlice(timestep, 1); hidden_state_zoneout_mask.reshape({batch_size, 1, 1, unit}); Tensor prev_hidden_state_zoneout_mask = hidden_state_zoneout_mask.apply( - (std::function) [epsilon = epsilon](float x) { return x < epsilon; }); + (std::function)[epsilon = epsilon](float x) { + return x < epsilon; + }); d_hidden_state.multiply(prev_hidden_state_zoneout_mask, d_prev_hidden_state_residual); @@ -456,7 +456,9 @@ void ZoneoutLSTMCellLayer::calcGradient(RunLayerContext &context) { Tensor cell_state_zoneout_mask = cs_zoneout_mask.getBatchSlice(timestep, 1); cell_state_zoneout_mask.reshape({batch_size, 1, 1, unit}); Tensor prev_cell_state_zoneout_mask = cell_state_zoneout_mask.apply( - (std::function) [epsilon = epsilon](float x) { return x < epsilon; }); + (std::function)[epsilon = epsilon](float x) { + return x < epsilon; + }); d_cell_state.multiply(prev_cell_state_zoneout_mask, d_prev_cell_state_residual); diff --git a/nntrainer/tensor/float_tensor.cpp b/nntrainer/tensor/float_tensor.cpp index 915d1b9466..c0781de954 100644 --- a/nntrainer/tensor/float_tensor.cpp +++ b/nntrainer/tensor/float_tensor.cpp @@ -14,6 +14,7 @@ #include #include +#include #include namespace nntrainer { @@ -36,57 +37,6 @@ FloatTensor::FloatTensor(const TensorDim &d, const void *buf) : } } -FloatTensor::FloatTensor( - std::vector>>> const &d, - Tformat fm) { - if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty()) { - throw std::out_of_range( - "[Tensor] trying to initialize FloatTensor from empty vector"); - } - - dim.setTensorDim(0, d.size()); - if (fm == Tformat::NCHW) { - dim.setTensorDim(1, d[0].size()); - dim.setTensorDim(2, d[0][0].size()); - dim.setTensorDim(3, d[0][0][0].size()); - } else { - dim.setTensorDim(2, d[0].size()); - dim.setTensorDim(3, d[0][0].size()); - dim.setTensorDim(1, d[0][0][0].size()); - } - - dim.setTensorType({fm, Tdatatype::FP32}); - - strides = dim.computeStrides(); - contiguous = true; - initializer = Initializer::NONE; - - MemoryData *mem_data = - new MemoryData((void *)(new float[dim.getDataLen()]())); - data = std::shared_ptr(mem_data, [](MemoryData *mem_data) { - delete[] mem_data->getAddr(); - }); - - offset = 0; - - // if fm == Tformat::NCHW, then dim[0] == batch , dim[1] == channel, dim[2] - // == height, dim[3] == width. and if fm == Tformat::NHWC, dim[0] == batch, - // dim[1] == height, dim[2] == width, dim[3] == channel - if (fm == Tformat::NCHW) { - for (unsigned int i = 0; i < batch(); ++i) - for (unsigned int j = 0; j < channel(); ++j) - for (unsigned int k = 0; k < height(); ++k) - for (unsigned int l = 0; l < width(); ++l) - this->setValue(i, j, k, l, d[i][j][k][l]); - } else { - for (unsigned int i = 0; i < batch(); ++i) - for (unsigned int j = 0; j < height(); ++j) - for (unsigned int k = 0; k < width(); ++k) - for (unsigned int l = 0; l < channel(); ++l) - this->setValue(i, l, j, k, d[i][j][k][l]); - } -} - bool FloatTensor::operator==(const FloatTensor &rhs) const { const float *_data = (float *)getData(); const float *_rdata = (float *)rhs.getData(); @@ -282,9 +232,9 @@ void FloatTensor::initialize(Initializer init) { initialize(); } -TensorV2 &FloatTensor::apply(std::function f, - TensorV2 &output) const { - CREATE_V2_IF_EMPTY_DIMS(output, dim, nullptr); +Tensor &FloatTensor::apply(std::function f, + Tensor &output) const { + CREATE_IF_EMPTY_DIMS(output, dim, nullptr); if (contiguous && output.getContiguous()) { const float *data = (float *)getData(); @@ -317,9 +267,9 @@ TensorV2 &FloatTensor::apply(std::function f, return output; } -TensorV2 FloatTensor::multiply_strided(TensorV2 const &m, TensorV2 &output, - const float beta) const { - CREATE_V2_IF_EMPTY_DIMS(output, dim, nullptr); +Tensor FloatTensor::multiply_strided(Tensor const &m, Tensor &output, + const float beta) const { + CREATE_IF_EMPTY_DIMS(output, dim, nullptr); if (size() != m.size() || size() != output.size()) throw std::invalid_argument( @@ -386,28 +336,18 @@ int FloatTensor::multiply_i(float const &value) { return ML_ERROR_NONE; } -TensorV2 &FloatTensor::multiply(float const &value, TensorV2 &out) const { +Tensor &FloatTensor::multiply(float const &value, Tensor &out) const { auto f = std::bind(std::multiplies(), std::placeholders::_1, value); apply(f, out); return out; } -TensorV2 &FloatTensor::multiply(TensorV2 const &m, TensorV2 &output, - const float beta) const { - auto f = [&](const BroadcastInfoV2 &e, const float *buf, const float *m_buf, +Tensor &FloatTensor::multiply(Tensor const &m, Tensor &output, + const float beta) const { + auto f = [&](const BroadcastInfo &e, const float *buf, const float *m_buf, float *out_buf) { - if (e.strides[3] == 1 && output.getStrides()[3] == 1 && strides[3] == 1 && - std::fpclassify(beta) == FP_ZERO) { - std::transform(buf, buf + e.buffer_size, m_buf, out_buf, - std::multiplies()); - } else { - for (unsigned int i = 0; i < e.buffer_size; ++i) { - *out_buf = *buf * *m_buf + beta * *out_buf; - buf += strides[3]; - m_buf += e.strides[3]; - out_buf += output.getStrides()[3]; - } - } + ele_mul(e.buffer_size, buf, m_buf, out_buf, 1, beta, e.strides[3], + strides[3]); }; NNTR_THROW_IF(m.getFormat() != this->getFormat(), std::invalid_argument) @@ -427,34 +367,24 @@ TensorV2 &FloatTensor::multiply(TensorV2 const &m, TensorV2 &output, return output; } -TensorV2 &FloatTensor::divide(float const &value, TensorV2 &output) const { +Tensor &FloatTensor::divide(float const &value, Tensor &output) const { auto f = std::bind(std::divides(), std::placeholders::_1, value); apply(f, output); return output; } -TensorV2 &FloatTensor::divide(TensorV2 const &m, TensorV2 &output) const { - auto f = [&](const BroadcastInfoV2 &e, const float *buf, const float *m_buf, +Tensor &FloatTensor::divide(Tensor const &m, Tensor &output) const { + auto f = [&](const BroadcastInfo &e, const float *buf, const float *m_buf, float *out_buf) { - if (e.strides[3] == 1 && output.getStrides()[3] == 1 && strides[3] == 1) { - std::transform(buf, buf + e.buffer_size, m_buf, out_buf, - std::divides()); - } else { - for (unsigned int i = 0; i < e.buffer_size; ++i) { - *out_buf = *buf / *m_buf; - buf += strides[3]; - m_buf += e.strides[3]; - out_buf += output.getStrides()[3]; - } - } + ele_div(e.buffer_size, buf, m_buf, out_buf, 1, 0, e.strides[3], strides[3]); }; apply_broadcast(m, f, output); return output; } -TensorV2 &FloatTensor::add_strided(TensorV2 const &input, TensorV2 &output, - const float beta) const { +Tensor &FloatTensor::add_strided(Tensor const &input, Tensor &output, + const float beta) const { NNTR_THROW_IF(getData() == nullptr, std::invalid_argument) << getName() << " is not allocated"; NNTR_THROW_IF(input.getData() == nullptr, std::invalid_argument) @@ -507,54 +437,68 @@ TensorV2 &FloatTensor::add_strided(TensorV2 const &input, TensorV2 &output, return output; } -TensorV2 &FloatTensor::add(float const &value, TensorV2 &output) const { +int FloatTensor::add_i(Tensor const &m, Tensor &output, float const alpha) { + auto f = [&](const BroadcastInfo &e, const float *buf, const float *m_buf, + float *out_buf) { + saxpy(e.buffer_size, alpha, m_buf, e.strides[3], out_buf, strides[3]); + }; + + try { + apply_broadcast(m, f, output); + } catch (std::exception &err) { + ml_loge("%s %s", typeid(err).name(), err.what()); + return ML_ERROR_INVALID_PARAMETER; + } + return ML_ERROR_NONE; +} + +int FloatTensor::add_i_partial(unsigned int len, unsigned int addr_idx, + Tensor &m, unsigned int incX, unsigned int incY, + const Tensor alphas, unsigned int alpha_idx) { + saxpy(len, alphas.getValue(alpha_idx), m.getData(), incX, + (float *)getAddress(addr_idx), incY); + + return ML_ERROR_NONE; +} + +Tensor &FloatTensor::add(float const &value, Tensor &output) const { auto f = std::bind(std::plus(), std::placeholders::_1, value); apply(f, output); return output; } -TensorV2 &FloatTensor::add(TensorV2 const &m, TensorV2 &output, - float const alpha) const { - auto f = [&](const BroadcastInfoV2 &e, const float *buf, const float *m_buf, +Tensor &FloatTensor::add(Tensor const &m, Tensor &output, + float const alpha) const { + auto f = [&](const BroadcastInfo &e, const float *buf, const float *m_buf, float *out_buf) { - if (e.strides[3] == 1 && strides[3] == 1 && strides[3] == 1 && - std::fpclassify(alpha) == FP_ZERO) { - std::transform(buf, buf + e.buffer_size, m_buf, out_buf, - std::plus()); - } else { - for (unsigned int i = 0; i < e.buffer_size; ++i) { - *out_buf = *buf + *m_buf * alpha; - buf += strides[3]; - m_buf += e.strides[3]; - out_buf += strides[3]; - } - } + ele_add(e.buffer_size, buf, m_buf, out_buf, alpha, 0, e.strides[3], + strides[3]); }; apply_broadcast(m, f, output); return output; } -TensorV2 &FloatTensor::subtract(float const &value, TensorV2 &output) const { +Tensor &FloatTensor::subtract(float const &value, Tensor &output) const { auto f = std::bind(std::minus(), std::placeholders::_1, value); apply(f, output); return output; } -void FloatTensor::sum_by_batch(TensorV2 &output) const { +void FloatTensor::sum_by_batch(Tensor &output) const { size_t feat_len = dim.getFeatureLen(); size_t batch = dim.batch(); const float *data = (float *)getData(); float *out_data = output.getData(); - TensorV2 ones(1, 1, 1, feat_len, this->getFormat()); + Tensor ones(1, 1, 1, feat_len, this->getFormat()); ones.setValue(1.0); sgemv(CblasRowMajor, CblasNoTrans, batch, feat_len, 1, data, feat_len, ones.getData(), 1, 0.0, out_data, 1); } -TensorV2 &FloatTensor::sum(unsigned int axis, TensorV2 &output, float alpha, - float beta) const { +Tensor &FloatTensor::sum(unsigned int axis, Tensor &output, float alpha, + float beta) const { const float *data = (float *)getData(); NNTR_THROW_IF(!contiguous, std::invalid_argument) @@ -564,35 +508,35 @@ TensorV2 &FloatTensor::sum(unsigned int axis, TensorV2 &output, float alpha, throw std::out_of_range("Error: axis is invalid"); if (dim.getDim()[axis] == 1 and alpha == 1.0 and !beta) { - CREATE_V2_IF_EMPTY_DIMS(output, dim); + CREATE_IF_EMPTY_DIMS(output, dim); scopy(size(), (float *)getData(), 1, output.getData(), 1); return output; } switch (axis) { case 0: { - CREATE_V2_IF_EMPTY_DIMS(output, 1, dim.channel(), dim.height(), dim.width(), - getTensorType()); + CREATE_IF_EMPTY_DIMS(output, 1, dim.channel(), dim.height(), dim.width(), + getTensorType()); size_t feat_len = dim.getFeatureLen(); size_t batch = dim.batch(); - TensorV2 ones(1, 1, 1, batch, getTensorType()); + Tensor ones(1, 1, 1, batch, getTensorType()); ones.setValue(alpha); sgemv(CblasRowMajor, CblasTrans, batch, feat_len, 1, data, feat_len, ones.getData(), 1, beta, output.getData(), 1); } break; case 1: { - CREATE_V2_IF_EMPTY_DIMS(output, dim[0], 1, dim[2], dim[3], getTensorType()); + CREATE_IF_EMPTY_DIMS(output, dim[0], 1, dim[2], dim[3], getTensorType()); if (this->getFormat() == Tformat::NHWC) { unsigned int feat_len = output.getDim().getDataLen(); unsigned int t_axis = dim[1]; - TensorV2 ones(1, 1, 1, t_axis, getTensorType()); + Tensor ones(1, 1, 1, t_axis, getTensorType()); ones.setValue(alpha); sgemv(CblasRowMajor, CblasNoTrans, feat_len, t_axis, 1, data, t_axis, ones.getData(), 1, beta, output.getData(), 1); } else { unsigned int feat_len = dim[2] * dim[3]; unsigned int t_axis = dim[1]; - TensorV2 ones(1, 1, 1, t_axis, getTensorType()); + Tensor ones(1, 1, 1, t_axis, getTensorType()); ones.setValue(alpha); float *rdata = output.getData(); for (unsigned int k = 0; k < dim[0]; ++k) { @@ -603,11 +547,11 @@ TensorV2 &FloatTensor::sum(unsigned int axis, TensorV2 &output, float alpha, } } break; case 2: { - CREATE_V2_IF_EMPTY_DIMS(output, dim[0], dim[1], 1, dim[3], getTensorType()); + CREATE_IF_EMPTY_DIMS(output, dim[0], dim[1], 1, dim[3], getTensorType()); if (this->getFormat() == Tformat::NHWC) { unsigned int feat_len = dim[1] * dim[3]; unsigned int t_axis = dim[2]; - TensorV2 ones(1, 1, 1, t_axis, getTensorType()); + Tensor ones(1, 1, 1, t_axis, getTensorType()); ones.setValue(alpha); float *rdata = output.getData(); for (unsigned int k = 0; k < dim[0]; ++k) { @@ -618,7 +562,7 @@ TensorV2 &FloatTensor::sum(unsigned int axis, TensorV2 &output, float alpha, } else { unsigned int t_3 = dim[3]; unsigned int t_axis = dim[2]; - TensorV2 ones(1, 1, 1, t_axis, getTensorType()); + Tensor ones(1, 1, 1, t_axis, getTensorType()); ones.setValue(alpha); if (dim.getStorageOrder() == TStorageOrder::ROW_MAJOR) { @@ -641,12 +585,12 @@ TensorV2 &FloatTensor::sum(unsigned int axis, TensorV2 &output, float alpha, } } break; case 3: { - CREATE_V2_IF_EMPTY_DIMS(output, dim[0], dim[1], dim[2], 1, - this->getTensorType()); + CREATE_IF_EMPTY_DIMS(output, dim[0], dim[1], dim[2], 1, + this->getTensorType()); if (this->getFormat() == Tformat::NHWC) { unsigned int t_3 = dim[1]; unsigned int t_axis = dim[3]; - TensorV2 ones(1, 1, 1, t_axis, getTensorType()); + Tensor ones(1, 1, 1, t_axis, getTensorType()); ones.setValue(alpha); float *rdata = output.getData(); for (unsigned int k = 0; k < dim[0]; ++k) { @@ -660,7 +604,7 @@ TensorV2 &FloatTensor::sum(unsigned int axis, TensorV2 &output, float alpha, } else { unsigned int m = output.getDim().getDataLen(); unsigned int n = dim[3]; - TensorV2 ones(1, 1, 1, n, getTensorType()); + Tensor ones(1, 1, 1, n, getTensorType()); ones.setValue(alpha); if (dim.getStorageOrder() == TStorageOrder::ROW_MAJOR) { @@ -692,19 +636,19 @@ float FloatTensor::l2norm() const { return snrm2(size(), (float *)getData(), 1); } -TensorV2 &FloatTensor::pow(float exponent, TensorV2 &output) const { +Tensor &FloatTensor::pow(float exponent, Tensor &output) const { auto f = [exponent](float in) { return powf(in, exponent); }; apply(f, output); return output; } -TensorV2 &FloatTensor::erf(TensorV2 &output) const { +Tensor &FloatTensor::erf(Tensor &output) const { auto f = [](float in) { return std::erf(in); }; apply(f, output); return output; } -void FloatTensor::sin(TensorV2 &out, float alpha) { +void FloatTensor::sin(Tensor &out, float alpha) { if (!contiguous) { auto f = [alpha](float val) -> float { return std::sin(alpha * val); }; apply(f, out); @@ -713,7 +657,7 @@ void FloatTensor::sin(TensorV2 &out, float alpha) { } } -void FloatTensor::cos(TensorV2 &out, float alpha) { +void FloatTensor::cos(Tensor &out, float alpha) { if (!contiguous) { auto f = [alpha](float val) -> float { return std::cos(alpha * val); }; apply(f, out); @@ -722,8 +666,16 @@ void FloatTensor::cos(TensorV2 &out, float alpha) { } } -TensorV2 &FloatTensor::dot(TensorV2 const &input, TensorV2 &output, bool trans, - bool trans_in, float beta) const { +void FloatTensor::inv_sqrt(Tensor &out) { + if (!contiguous) { + apply([](float val) -> float { return 1 / std::sqrt(val); }, out); + } else { + inv_sqrt_inplace(out.size(), out.getData()); + } +} + +Tensor &FloatTensor::dot(Tensor const &input, Tensor &output, bool trans, + bool trans_in, float beta) const { // Comment out with intension to support the calculation wrt. batch and height // direction. It supposes to have this->dim as [ BxCxH,W ] and input.dim is // [BxCxH,W] as well if (input.dim.rank() > 2) { @@ -782,12 +734,12 @@ TensorV2 &FloatTensor::dot(TensorV2 const &input, TensorV2 &output, bool trans, return output; } -void FloatTensor::copy(const TensorV2 &from) { +void FloatTensor::copy(const Tensor &from) { reshape(from.getDim()); copy(from.getData()); } -void FloatTensor::copyData(const TensorV2 &from) { +void FloatTensor::copyData(const Tensor &from) { NNTR_THROW_IF(!contiguous, std::invalid_argument) << getName() << " is not contiguous, cannot copy."; @@ -812,6 +764,18 @@ void FloatTensor::copyData(const TensorV2 &from) { } } +void FloatTensor::copy_with_stride(const Tensor &input, Tensor &output) { + for (unsigned int b = 0; b < output.batch(); ++b) { + for (unsigned int c = 0; c < output.channel(); ++c) { + for (unsigned int h = 0; h < output.height(); ++h) { + for (unsigned int w = 0; w < output.width(); ++w) { + output.setValue(b, c, h, w, input.getValue(b, c, h, w)); + } + } + } + } +} + std::vector FloatTensor::argmax() const { std::vector result; const float *data = (float *)getData(); @@ -844,8 +808,8 @@ float FloatTensor::minValue() const { return *std::min_element(data, data + size()); } -TensorV2 &FloatTensor::transpose(const std::string &direction, - TensorV2 &output) const { +Tensor &FloatTensor::transpose(const std::string &direction, + Tensor &output) const { unsigned int SL, SI, SJ, SK; output.reshape(dim.transpose(direction)); @@ -921,7 +885,7 @@ void FloatTensor::dropout_mask(float dropout) { } } -void FloatTensor::filter_mask(const TensorV2 &mask_len, bool reverse) { +void FloatTensor::filter_mask(const Tensor &mask_len, bool reverse) { float fill_mask_val = 0.0; float en_mask_val = 1.0 - fill_mask_val; @@ -942,7 +906,7 @@ void FloatTensor::filter_mask(const TensorV2 &mask_len, bool reverse) { } } -void FloatTensor::zoneout_mask(TensorV2 &opposite, float zoneout) { +void FloatTensor::zoneout_mask(Tensor &opposite, float zoneout) { opposite.setRandBernoulli(zoneout); float *data = (float *)getData(); @@ -957,7 +921,7 @@ void FloatTensor::zoneout_mask(TensorV2 &opposite, float zoneout) { } } -std::vector FloatTensor::split(std::vector sizes, int axis) { +std::vector FloatTensor::split(std::vector sizes, int axis) { size_t num_size = sizes.size(); if (axis == -1) { @@ -977,7 +941,7 @@ std::vector FloatTensor::split(std::vector sizes, int axis) { } bool is_format_nchw = (dim.getFormat() == Tformat::NCHW) ? true : false; - std::vector ret; + std::vector ret; auto iter_value = [this, is_format_nchw]( std::array &loc, @@ -1059,17 +1023,16 @@ std::vector FloatTensor::split(std::vector sizes, int axis) { return ret; } -TensorV2 FloatTensor::cat(const std::vector &tensors, int axis) { +Tensor FloatTensor::concat(const std::vector &tensors, int axis) { if (axis == -1) { axis = 3; } - TensorV2 ret; auto ref_dim = tensors.front().getDim(); bool is_format_nchw = (ref_dim.getFormat() == Tformat::NCHW); ref_dim.setTensorDim(axis, 1); NNTR_THROW_IF(!std::all_of(tensors.begin(), tensors.end(), - [&ref_dim, axis](const TensorV2 &t) { + [&ref_dim, axis](const Tensor &t) { auto cur_dim = t.getDim(); cur_dim.setTensorDim(axis, 1); return ref_dim == cur_dim; @@ -1079,12 +1042,12 @@ TensorV2 FloatTensor::cat(const std::vector &tensors, int axis) { << ref_dim << " axis : " << axis; auto axis_dim = std::accumulate(tensors.begin(), tensors.end(), 0u, - [axis](unsigned cur, const TensorV2 &t) { + [axis](unsigned cur, const Tensor &t) { return cur += t.getDim().getTensorDim(axis); }); auto iter_value = [is_format_nchw](std::array &loc, - const std::array &start_loc, TensorV2 &t, + const std::array &start_loc, Tensor &t, const std::array &ref_dim_arr) -> float & { auto &value = is_format_nchw ? t.getValue(loc[0], loc[1], loc[2], loc[3]) @@ -1104,7 +1067,7 @@ TensorV2 FloatTensor::cat(const std::vector &tensors, int axis) { auto ret_dim = ref_dim; ret_dim.setTensorDim(axis, axis_dim); - ret = TensorV2(ret_dim); + Tensor ret = Tensor(ret_dim); std::array loc = {0, 0, 0, 0}; for (auto &t : tensors) { @@ -1143,7 +1106,6 @@ TensorV2 FloatTensor::cat(const std::vector &tensors, int axis) { } void FloatTensor::print(std::ostream &out) const { - printInstance(out, this); const float *data = (float *)getData(); unsigned int len = size(); out << "data addr: " << data << '\n'; @@ -1203,11 +1165,11 @@ void FloatTensor::copy(const void *buf) { } void FloatTensor::apply_broadcast_util( - TensorV2 const &m, - std::function v_func, - TensorV2 &output, const BroadcastInfoV2 &e, int cur_axis, size_t offset, + Tensor &output, const BroadcastInfo &e, int cur_axis, size_t offset, size_t m_offset) const { const float *buf = (float *)this->getData(); @@ -1235,12 +1197,12 @@ void FloatTensor::apply_broadcast_util( } void FloatTensor::apply_broadcast( - TensorV2 const &m, - std::function v_func, - TensorV2 &output) const { - CREATE_V2_IF_EMPTY_DIMS(output, dim); + Tensor &output) const { + CREATE_IF_EMPTY_DIMS(output, dim); NNTR_THROW_IF(getData() == nullptr, std::invalid_argument) << getName() << " is not allocated"; @@ -1253,7 +1215,7 @@ void FloatTensor::apply_broadcast( /// note that buffer_size, the last stride is only used in v_func but it /// might be changed if (dim == m.getDim()) { - BroadcastInfoV2 e; + BroadcastInfo e; e.buffer_size = size(); e.strides[3] = 1; e.tensor_type = getTensorType(); diff --git a/nntrainer/tensor/float_tensor.h b/nntrainer/tensor/float_tensor.h index 6eae7d0d9c..dd976d91d9 100644 --- a/nntrainer/tensor/float_tensor.h +++ b/nntrainer/tensor/float_tensor.h @@ -14,7 +14,6 @@ #ifdef __cplusplus #include -#include #ifdef DEBUG #define EXCEPT_WHEN_DEBUG @@ -62,7 +61,60 @@ class FloatTensor : public TensorBase { */ FloatTensor( std::vector>>> const &d, - Tformat fm); + Tformat fm) { + if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty()) { + throw std::out_of_range( + "[Tensor] trying to initialize FloatTensor from empty vector"); + } + + dim.setTensorDim(0, d.size()); + if (fm == Tformat::NCHW) { + dim.setTensorDim(1, d[0].size()); + dim.setTensorDim(2, d[0][0].size()); + dim.setTensorDim(3, d[0][0][0].size()); + } else { + dim.setTensorDim(2, d[0].size()); + dim.setTensorDim(3, d[0][0].size()); + dim.setTensorDim(1, d[0][0][0].size()); + } + + dim.setTensorType({fm, Tdatatype::FP32}); + + strides = dim.computeStrides(); + contiguous = true; + initializer = Initializer::NONE; + + MemoryData *mem_data = + new MemoryData((void *)(new float[dim.getDataLen()]())); + data = std::shared_ptr(mem_data, [](MemoryData *mem_data) { + delete[] mem_data->getAddr(); + }); + + offset = 0; + + // if fm == Tformat::NCHW, then dim[0] == batch , dim[1] == channel, dim[2] + // == height, dim[3] == width. and if fm == Tformat::NHWC, dim[0] == batch, + // dim[1] == height, dim[2] == width, dim[3] == channel + if (fm == Tformat::NCHW) { + for (unsigned int i = 0; i < batch(); ++i) + for (unsigned int j = 0; j < channel(); ++j) + for (unsigned int k = 0; k < height(); ++k) + for (unsigned int l = 0; l < width(); ++l) + this->setValue(i, j, k, l, d[i][j][k][l]); + } else { + for (unsigned int i = 0; i < batch(); ++i) + for (unsigned int j = 0; j < height(); ++j) + for (unsigned int k = 0; k < width(); ++k) + for (unsigned int l = 0; l < channel(); ++l) + this->setValue(i, l, j, k, d[i][j][k][l]); + } + } + + /** + * @brief Construct a new FloatTensor object + * @param rhs TensorBase object to copy + */ + FloatTensor(TensorBase &rhs) : TensorBase(rhs) {} /** * @brief Basic Destructor @@ -84,22 +136,22 @@ class FloatTensor : public TensorBase { bool operator!=(const FloatTensor &rhs) const { return !(*this == rhs); } /** - * @copydoc TensorV2::allocate() + * @copydoc Tensor::allocate() */ void allocate() override; /** - * @copydoc TensorV2::deallocate() + * @copydoc Tensor::deallocate() */ void deallocate() override; /** - * @copydoc TensorV2::getData() + * @copydoc Tensor::getData() */ void *getData() const override; /** - * @copydoc TensorV2::getData(size_t idx) + * @copydoc Tensor::getData(size_t idx) */ void *getData(size_t idx) const override; @@ -148,24 +200,24 @@ class FloatTensor : public TensorBase { unsigned int w); /** - * @copydoc TensorV2::setValue(float value) + * @copydoc Tensor::setValue(float value) */ void setValue(float value) override; /** - * @copydoc TensorV2::setValue(b, c, h, w, value) + * @copydoc Tensor::setValue(b, c, h, w, value) */ void setValue(unsigned int b, unsigned int c, unsigned int h, unsigned int w, float value) override; /** - * @copydoc TensorV2::addValue(b, c, h, w, value, beta) + * @copydoc Tensor::addValue(b, c, h, w, value, beta) */ void addValue(unsigned int b, unsigned int c, unsigned int h, unsigned int w, float value, float beta) override; /** - * @copydoc TensorV2::setZero() + * @copydoc Tensor::setZero() */ void setZero() override; @@ -186,180 +238,203 @@ class FloatTensor : public TensorBase { }; /** - * @copydoc TensorV2::setRandNormal() + * @copydoc Tensor::setRandNormal() */ - void setRandNormal(float mean = 0.0f, float stddev = 0.05f); + void setRandNormal(float mean = 0.0f, float stddev = 0.05f) override; /** - * @copydoc TensorV2::setRandUniform() + * @copydoc Tensor::setRandUniform() */ - void setRandUniform(float min = -0.05f, float max = 0.05f); + void setRandUniform(float min = -0.05f, float max = 0.05f) override; /** - * @copydoc TensorV2::setRandBernoulli() + * @copydoc Tensor::setRandBernoulli() */ - void setRandBernoulli(float probability = 0.5f); + void setRandBernoulli(float probability = 0.5f) override; /** - * @copydoc TensorV2::initialize() + * @copydoc Tensor::initialize() */ void initialize() override; /** - * @copydoc TensorV2::initialize(Initializer init) + * @copydoc Tensor::initialize(Initializer init) */ void initialize(Initializer init) override; /** - * @copydoc TensorV2::apply(std::function f, TensorV2 &output) + * @copydoc Tensor::apply(std::function f, Tensor &output) */ - TensorV2 &apply(std::function f, - TensorV2 &output) const override; + Tensor &apply(std::function f, Tensor &output) const override; /** - * @copydoc TensorV2::multiply_strided(TensorV2 const &m, TensorV2 &output, + * @copydoc Tensor::multiply_strided(Tensor const &m, Tensor &output, * const float beta) */ - TensorV2 multiply_strided(TensorV2 const &m, TensorV2 &output, - const float beta) const override; + Tensor multiply_strided(Tensor const &m, Tensor &output, + const float beta) const override; /** - * @copydoc TensorV2::multiply_i(float const &value) + * @copydoc Tensor::multiply_i(float const &value) */ int multiply_i(float const &value) override; /** - * @copydoc TensorV2::multiply(float const &value, TensorV2 &out) + * @copydoc Tensor::multiply(float const &value, Tensor &out) */ - TensorV2 &multiply(float const &value, TensorV2 &out) const override; + Tensor &multiply(float const &value, Tensor &out) const override; /** - * @copydoc TensorV2::multiply(TensorV2 const &m, TensorV2 &output, const + * @copydoc Tensor::multiply(Tensor const &m, Tensor &output, const * float beta = 0.0) */ - TensorV2 &multiply(TensorV2 const &m, TensorV2 &output, - const float beta = 0.0) const override; + Tensor &multiply(Tensor const &m, Tensor &output, + const float beta = 0.0) const override; /** - * @copydoc TensorV2::divide(float const &value, TensorV2 &output) + * @copydoc Tensor::divide(float const &value, Tensor &output) */ - TensorV2 ÷(float const &value, TensorV2 &output) const override; + Tensor ÷(float const &value, Tensor &output) const override; /** - * @copydoc TensorV2::divide(TensorV2 const &m, TensorV2 &output) + * @copydoc Tensor::divide(Tensor const &m, Tensor &output) */ - TensorV2 ÷(TensorV2 const &m, TensorV2 &output) const override; + Tensor ÷(Tensor const &m, Tensor &output) const override; /** - * @copydoc TensorV2::add_strided(TensorV2 const &input, TensorV2 &output, + * @copydoc Tensor::add_strided(Tensor const &input, Tensor &output, * const float beta) */ - TensorV2 &add_strided(TensorV2 const &input, TensorV2 &output, - const float beta) const override; + Tensor &add_strided(Tensor const &input, Tensor &output, + const float beta) const override; + + /** + * @copydoc Tensor::add_i(Tensor const &m, float const alpha) + */ + int add_i(Tensor const &m, Tensor &output, float const alpha) override; /** - * @copydoc TensorV2::add(float const &value, TensorV2 &output) + * @copydoc Tensor::add_i_partial() */ - TensorV2 &add(float const &value, TensorV2 &output) const override; + int add_i_partial(unsigned int len, unsigned int addr_idx, Tensor &m, + unsigned int incX, unsigned int incY, const Tensor alphas, + unsigned int alpha_idx) override; /** - * @copydoc TensorV2::add(TensorV2 const &m, TensorV2 &output, float const + * @copydoc Tensor::add(float const &value, Tensor &output) + */ + Tensor &add(float const &value, Tensor &output) const override; + + /** + * @copydoc Tensor::add(Tensor const &m, Tensor &output, float const * alpha) */ - TensorV2 &add(TensorV2 const &m, TensorV2 &output, - float const alpha) const override; + Tensor &add(Tensor const &m, Tensor &output, + float const alpha) const override; /** - * @copydoc TensorV2::subtract(float const &value, TensorV2 &output) + * @copydoc Tensor::subtract(float const &value, Tensor &output) */ - TensorV2 &subtract(float const &value, TensorV2 &output) const override; + Tensor &subtract(float const &value, Tensor &output) const override; /** - * @copydoc TensorBase::sum_by_batch(TensorV2 &output) + * @copydoc TensorBase::sum_by_batch(Tensor &output) */ - void sum_by_batch(TensorV2 &output) const override; + void sum_by_batch(Tensor &output) const override; /** - * @copydoc TensorV2::sum(unsigned int axis, TensorV2 &output, float alpha, + * @copydoc Tensor::sum(unsigned int axis, Tensor &output, float alpha, * float beta) const */ - TensorV2 &sum(unsigned int axis, TensorV2 &output, float alpha, - float beta) const override; + Tensor &sum(unsigned int axis, Tensor &output, float alpha, + float beta) const override; /** - * @copydoc TensorV2::l2norm + * @copydoc Tensor::l2norm */ float l2norm() const override; /** - * @copydoc TensorV2::pow(float exponent, TensorV2 &output) + * @copydoc Tensor::pow(float exponent, Tensor &output) + */ + Tensor &pow(float exponent, Tensor &output) const override; + + /** + * @copydoc Tensor::erf(Tensor &output) */ - TensorV2 &pow(float exponent, TensorV2 &output) const override; + Tensor &erf(Tensor &output) const override; /** - * @copydoc TensorV2::erf(TensorV2 &output) + * @copydoc Tensor::sin(Tensor &out, float alpha) */ - TensorV2 &erf(TensorV2 &output) const override; + void sin(Tensor &out, float alpha) override; /** - * @copydoc TensorV2::sin(TensorV2 &out, float alpha) + * @copydoc Tensor::cos(Tensor &out, float alpha) */ - void sin(TensorV2 &out, float alpha) override; + void cos(Tensor &out, float alpha) override; /** - * @copydoc TensorV2::cos(TensorV2 &out, float alpha) + * @copydoc TensorBase::inv_sqrt(Tensor &out) */ - void cos(TensorV2 &out, float alpha) override; + void inv_sqrt(Tensor &out) override; /** - * @copydoc TensorV2::dot(TensorV2 const &input, TensorV2 &output, bool + * @copydoc Tensor::dot(Tensor const &input, Tensor &output, bool * trans, bool trans_in, float beta) */ - TensorV2 &dot(TensorV2 const &input, TensorV2 &output, bool trans, - bool trans_in, float beta) const override; + Tensor &dot(Tensor const &input, Tensor &output, bool trans, bool trans_in, + float beta) const override; /** - * @copydoc TensorV2::dropout_mask(float dropout) + * @copydoc Tensor::dropout_mask(float dropout) */ void dropout_mask(float dropout) override; /** - * @copydoc TensorV2::filter_mask(const TensorV2 &mask_len, bool reverse) + * @copydoc Tensor::filter_mask(const Tensor &mask_len, bool reverse) */ - void filter_mask(const TensorV2 &mask_len, bool reverse) override; + void filter_mask(const Tensor &mask_len, bool reverse) override; /** - * @copydoc TensorV2::zoneout_mask(TensorV2 &opposite, float zoneout) + * @copydoc Tensor::zoneout_mask(Tensor &opposite, float zoneout) */ - void zoneout_mask(TensorV2 &opposite, float zoneout) override; + void zoneout_mask(Tensor &opposite, float zoneout) override; /** - * @copydoc TensorV2::split(std::vector sizes, int axis) + * @copydoc Tensor::split(std::vector sizes, int axis) */ - std::vector split(std::vector sizes, int axis) override; + std::vector split(std::vector sizes, int axis) override; /** - * @copydoc TensorV2::cat(const std::vector &tensors, int axis) + * @copydoc Tensor::cat(const std::vector &tensors, int axis) */ - static TensorV2 cat(const std::vector &tensors, int axis); + Tensor concat(const std::vector &tensors, int axis) override; /** - * @copydoc TensorV2::copy(const TensorV2 &from) + * @copydoc Tensor::copy(const Tensor &from) */ - void copy(const TensorV2 &from); + void copy(const Tensor &from) override; /** - * @copydoc TensorV2::copyData(const TensorV2 &from) + * @copydoc Tensor::copyData(const Tensor &from) */ - void copyData(const TensorV2 &from); + void copyData(const Tensor &from) override; /** - * @copydoc TensorV2::argmax() + * @brief Copy the Tensor + * @param[in] input Tensor to be copied + * @param[out] output output Tensor + */ + void copy_with_stride(const Tensor &input, Tensor &output) override; + + /** + * @copydoc Tensor::argmax() */ std::vector argmax() const override; /** - * @copydoc TensorV2::max_abs() + * @copydoc Tensor::max_abs() */ float max_abs() const override; /** @@ -373,13 +448,13 @@ class FloatTensor : public TensorBase { float minValue() const override; /** - * @copydoc TensorV2::transpose(const std::string &direction, TensorV2 &out) + * @copydoc Tensor::transpose(const std::string &direction, Tensor &out) */ - TensorV2 &transpose(const std::string &direction, - TensorV2 &output) const override; + Tensor &transpose(const std::string &direction, + Tensor &output) const override; /** - * @copydoc TensorV2::print(std::ostream &out) + * @copydoc Tensor::print(std::ostream &out) */ void print(std::ostream &out) const override; @@ -403,13 +478,14 @@ class FloatTensor : public TensorBase { * @retval #ML_ERROR_NONE Successful * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter */ - void apply_broadcast_util( - TensorV2 const &m, - std::function - v_func, - TensorV2 &output, const BroadcastInfoV2 &e, int cur_axis = -1, - size_t offset = 0, size_t m_offset = 0) const; + void + apply_broadcast_util(Tensor const &m, + std::function + v_func, + Tensor &output, const BroadcastInfo &e, + int cur_axis = -1, size_t offset = 0, + size_t m_offset = 0) const; /** * @brief Applies the given operator to the tensor with the passed argument @@ -419,12 +495,11 @@ class FloatTensor : public TensorBase { * @retval #ML_ERROR_NONE Successful * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter */ - void - apply_broadcast(TensorV2 const &m, - std::function - v_func, - TensorV2 &output) const; + void apply_broadcast(Tensor const &m, + std::function + v_func, + Tensor &output) const; }; } // namespace nntrainer diff --git a/nntrainer/tensor/half_tensor.cpp b/nntrainer/tensor/half_tensor.cpp index cff0691895..f34955f38f 100644 --- a/nntrainer/tensor/half_tensor.cpp +++ b/nntrainer/tensor/half_tensor.cpp @@ -14,6 +14,7 @@ #include #include +#include #include namespace nntrainer { @@ -36,58 +37,6 @@ HalfTensor::HalfTensor(const TensorDim &d, const void *buf) : } } -HalfTensor::HalfTensor( - std::vector>>> const &d, - Tformat fm) { - - if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty()) { - throw std::out_of_range( - "[Tensor] trying to initialize HalfTensor from empty vector"); - } - - dim.setTensorDim(0, d.size()); - if (fm == Tformat::NCHW) { - dim.setTensorDim(1, d[0].size()); - dim.setTensorDim(2, d[0][0].size()); - dim.setTensorDim(3, d[0][0][0].size()); - } else { - dim.setTensorDim(2, d[0].size()); - dim.setTensorDim(3, d[0][0].size()); - dim.setTensorDim(1, d[0][0][0].size()); - } - - dim.setTensorType({fm, Tdatatype::FP16}); - - strides = dim.computeStrides(); - contiguous = true; - initializer = Initializer::NONE; - - MemoryData *mem_data = - new MemoryData((void *)(new _FP16[dim.getDataLen()]())); - data = std::shared_ptr(mem_data, [](MemoryData *mem_data) { - delete[] mem_data->getAddr<_FP16>(); - }); - - offset = 0; - - // if fm == Tformat::NCHW, then dim[0] == batch , dim[1] == channel, dim[2] - // == height, dim[3] == width. and if fm == Tformat::NHWC, dim[0] == batch, - // dim[1] == height, dim[2] == width, dim[3] == channel - if (fm == Tformat::NCHW) { - for (unsigned int i = 0; i < batch(); ++i) - for (unsigned int j = 0; j < channel(); ++j) - for (unsigned int k = 0; k < height(); ++k) - for (unsigned int l = 0; l < width(); ++l) - this->setValue(i, j, k, l, d[i][j][k][l]); - } else { - for (unsigned int i = 0; i < batch(); ++i) - for (unsigned int j = 0; j < height(); ++j) - for (unsigned int k = 0; k < width(); ++k) - for (unsigned int l = 0; l < channel(); ++l) - this->setValue(i, l, j, k, d[i][j][k][l]); - } -} - bool HalfTensor::operator==(const HalfTensor &rhs) const { const _FP16 *_data = (_FP16 *)getData(); const _FP16 *_rdata = (_FP16 *)rhs.getData(); @@ -282,9 +231,8 @@ void HalfTensor::initialize(Initializer init) { initialize(); } -TensorV2 &HalfTensor::apply(std::function<_FP16(_FP16)> f, - TensorV2 &output) const { - CREATE_V2_IF_EMPTY_DIMS(output, dim, nullptr); +Tensor &HalfTensor::apply(std::function<_FP16(_FP16)> f, Tensor &output) const { + CREATE_IF_EMPTY_DIMS(output, dim, nullptr); if (contiguous && output.getContiguous()) { const _FP16 *data = (_FP16 *)getData(); @@ -317,9 +265,9 @@ TensorV2 &HalfTensor::apply(std::function<_FP16(_FP16)> f, return output; } -TensorV2 HalfTensor::multiply_strided(TensorV2 const &m, TensorV2 &output, - const float beta) const { - CREATE_V2_IF_EMPTY_DIMS(output, dim, nullptr); +Tensor HalfTensor::multiply_strided(Tensor const &m, Tensor &output, + const float beta) const { + CREATE_IF_EMPTY_DIMS(output, dim, nullptr); if (size() != m.size() || size() != output.size()) throw std::invalid_argument( @@ -385,28 +333,19 @@ int HalfTensor::multiply_i(float const &value) { return ML_ERROR_NONE; } -TensorV2 &HalfTensor::multiply(float const &value, TensorV2 &out) const { +Tensor &HalfTensor::multiply(float const &value, Tensor &out) const { auto f = std::bind(std::multiplies<_FP16>(), std::placeholders::_1, static_cast<_FP16>(value)); apply(f, out); return out; } -TensorV2 &HalfTensor::multiply(TensorV2 const &m, TensorV2 &output, - const float beta) const { - auto f = [&](const BroadcastInfoV2 &e, const _FP16 *buf, const _FP16 *m_buf, +Tensor &HalfTensor::multiply(Tensor const &m, Tensor &output, + const float beta) const { + auto f = [&](const BroadcastInfo &e, const _FP16 *buf, const _FP16 *m_buf, _FP16 *out_buf) { - if (e.strides[3] == 1 && output.getStrides()[3] == 1 && strides[3] == 1 && - std::fpclassify(beta) == FP_ZERO) { - ele_mul(e.buffer_size, buf, m_buf, out_buf); - } else { - for (unsigned int i = 0; i < e.buffer_size; ++i) { - *out_buf = *buf * *m_buf + static_cast<_FP16>(beta) * *out_buf; - buf += strides[3]; - m_buf += e.strides[3]; - out_buf += output.getStrides()[3]; - } - } + ele_mul(e.buffer_size, buf, m_buf, out_buf, 1, beta, e.strides[3], + strides[3]); }; NNTR_THROW_IF(m.getFormat() != this->getFormat(), std::invalid_argument) @@ -422,8 +361,8 @@ TensorV2 &HalfTensor::multiply(TensorV2 const &m, TensorV2 &output, return output; } -TensorV2 &HalfTensor::add_strided(TensorV2 const &input, TensorV2 &output, - const float beta) const { +Tensor &HalfTensor::add_strided(Tensor const &input, Tensor &output, + const float beta) const { if (size() != input.size() || size() != output.size()) throw std::invalid_argument( "Strided multiplication does not support broadcasting"); @@ -480,54 +419,71 @@ TensorV2 &HalfTensor::add_strided(TensorV2 const &input, TensorV2 &output, return output; } -TensorV2 &HalfTensor::add(float const &value, TensorV2 &output) const { +int HalfTensor::add_i(Tensor const &m, Tensor &output, float const alpha) { + auto f = [&](const BroadcastInfo &e, const _FP16 *buf, const _FP16 *m_buf, + _FP16 *out_buf) { + saxpy(e.buffer_size, alpha, m_buf, e.strides[3], out_buf, strides[3]); + /// @todo: saxpy is not valid for _FP16 + }; + + try { + apply_broadcast(m, f, output); + } catch (std::exception &err) { + ml_loge("%s %s", typeid(err).name(), err.what()); + return ML_ERROR_INVALID_PARAMETER; + } + return ML_ERROR_NONE; +} + +int HalfTensor::add_i_partial(unsigned int len, unsigned int addr_idx, + Tensor &m, unsigned int incX, unsigned int incY, + const Tensor alphas, unsigned int alpha_idx) { + saxpy(len, alphas.getValue<_FP16>(alpha_idx), m.getData<_FP16>(), incX, + (_FP16 *)getAddress(addr_idx), incY); + + return ML_ERROR_NONE; +} + +Tensor &HalfTensor::add(float const &value, Tensor &output) const { auto f = std::bind(std::plus<_FP16>(), std::placeholders::_1, static_cast<_FP16>(value)); apply(f, output); return output; } -TensorV2 &HalfTensor::add(TensorV2 const &m, TensorV2 &output, - float const alpha) const { - auto f = [&](const BroadcastInfoV2 &e, const _FP16 *buf, const _FP16 *m_buf, +Tensor &HalfTensor::add(Tensor const &m, Tensor &output, + float const alpha) const { + auto f = [&](const BroadcastInfo &e, const _FP16 *buf, const _FP16 *m_buf, _FP16 *out_buf) { - if (e.strides[3] == 1 && strides[3] == 1 && strides[3] == 1 && alpha == 1) { - ele_add(e.buffer_size, buf, m_buf, out_buf); - } else { - for (unsigned int i = 0; i < e.buffer_size; ++i) { - *out_buf = *buf + *m_buf * static_cast<_FP16>(alpha); - buf += strides[3]; - m_buf += e.strides[3]; - out_buf += strides[3]; - } - } + ele_add(e.buffer_size, buf, m_buf, out_buf, alpha, 0, e.strides[3], + strides[3]); }; apply_broadcast(m, f, output); return output; } -TensorV2 &HalfTensor::subtract(float const &value, TensorV2 &output) const { +Tensor &HalfTensor::subtract(float const &value, Tensor &output) const { auto f = std::bind(std::minus<_FP16>(), std::placeholders::_1, static_cast<_FP16>(value)); apply(f, output); return output; } -void HalfTensor::sum_by_batch(TensorV2 &output) const { +void HalfTensor::sum_by_batch(Tensor &output) const { size_t feat_len = dim.getFeatureLen(); size_t batch = dim.batch(); const _FP16 *data = (_FP16 *)getData(); _FP16 *out_data = output.getData<_FP16>(); - TensorV2 ones(1, 1, 1, feat_len, this->getTensorType()); + Tensor ones(1, 1, 1, feat_len, this->getTensorType()); ones.setValue((_FP16)1.0); sgemv(CblasRowMajor, CblasNoTrans, batch, feat_len, 1, data, feat_len, ones.getData<_FP16>(), 1, 0.0, out_data, 1); } -TensorV2 &HalfTensor::sum(unsigned int axis, TensorV2 &output, float alpha, - float beta) const { +Tensor &HalfTensor::sum(unsigned int axis, Tensor &output, float alpha, + float beta) const { const _FP16 *data = (_FP16 *)getData(); @@ -538,35 +494,35 @@ TensorV2 &HalfTensor::sum(unsigned int axis, TensorV2 &output, float alpha, throw std::out_of_range("Error: axis is invalid"); if (dim.getDim()[axis] == 1 and alpha == 1.0 and !beta) { - CREATE_V2_IF_EMPTY_DIMS(output, dim); + CREATE_IF_EMPTY_DIMS(output, dim); scopy(size(), (_FP16 *)getData(), 1, output.getData<_FP16>(), 1); return output; } switch (axis) { case 0: { - CREATE_V2_IF_EMPTY_DIMS(output, 1, dim.channel(), dim.height(), dim.width(), - this->getTensorType()); + CREATE_IF_EMPTY_DIMS(output, 1, dim.channel(), dim.height(), dim.width(), + this->getTensorType()); size_t feat_len = dim.getFeatureLen(); size_t batch = dim.batch(); - TensorV2 ones(1, 1, 1, batch, this->getTensorType()); + Tensor ones(1, 1, 1, batch, this->getTensorType()); ones.setValue(alpha); sgemv(CblasRowMajor, CblasTrans, batch, feat_len, 1, data, feat_len, ones.getData<_FP16>(), 1, beta, output.getData<_FP16>(), 1); } break; case 1: { - CREATE_V2_IF_EMPTY_DIMS(output, dim[0], 1, dim[2], dim[3], getTensorType()); + CREATE_IF_EMPTY_DIMS(output, dim[0], 1, dim[2], dim[3], getTensorType()); if (this->getFormat() == Tformat::NHWC) { unsigned int feat_len = output.getDim().getDataLen(); unsigned int t_axis = dim[1]; - TensorV2 ones(1, 1, 1, t_axis, this->getTensorType()); + Tensor ones(1, 1, 1, t_axis, this->getTensorType()); ones.setValue(alpha); sgemv(CblasRowMajor, CblasNoTrans, feat_len, t_axis, 1, data, t_axis, ones.getData<_FP16>(), 1, beta, output.getData<_FP16>(), 1); } else { unsigned int feat_len = dim[2] * dim[3]; unsigned int t_axis = dim[1]; - TensorV2 ones(1, 1, 1, t_axis, getTensorType()); + Tensor ones(1, 1, 1, t_axis, getTensorType()); ones.setValue(alpha); _FP16 *rdata = output.getData<_FP16>(); for (unsigned int k = 0; k < dim[0]; ++k) { @@ -577,12 +533,12 @@ TensorV2 &HalfTensor::sum(unsigned int axis, TensorV2 &output, float alpha, } } break; case 2: { - CREATE_V2_IF_EMPTY_DIMS(output, dim[0], dim[1], 1, dim[3], getTensorType()); + CREATE_IF_EMPTY_DIMS(output, dim[0], dim[1], 1, dim[3], getTensorType()); if (this->getFormat() == Tformat::NHWC) { unsigned int feat_len = dim[1] * dim[3]; unsigned int t_axis = dim[2]; - TensorV2 ones(1, 1, 1, t_axis, getTensorType()); + Tensor ones(1, 1, 1, t_axis, getTensorType()); ones.setValue(alpha); _FP16 *rdata = output.getData<_FP16>(); for (unsigned int k = 0; k < dim[0]; ++k) { @@ -593,7 +549,7 @@ TensorV2 &HalfTensor::sum(unsigned int axis, TensorV2 &output, float alpha, } else { unsigned int t_3 = dim[3]; unsigned int t_axis = dim[2]; - TensorV2 ones(1, 1, 1, t_axis, getTensorType()); + Tensor ones(1, 1, 1, t_axis, getTensorType()); ones.setValue(alpha); _FP16 *rdata = output.getData<_FP16>(); for (unsigned int k = 0; k < dim[0]; ++k) { @@ -607,11 +563,11 @@ TensorV2 &HalfTensor::sum(unsigned int axis, TensorV2 &output, float alpha, } } break; case 3: { - CREATE_V2_IF_EMPTY_DIMS(output, dim[0], dim[1], dim[2], 1, getTensorType()); + CREATE_IF_EMPTY_DIMS(output, dim[0], dim[1], dim[2], 1, getTensorType()); if (this->getFormat() == Tformat::NHWC) { unsigned int t_3 = dim[1]; unsigned int t_axis = dim[3]; - TensorV2 ones(1, 1, 1, t_axis, getTensorType()); + Tensor ones(1, 1, 1, t_axis, getTensorType()); ones.setValue(alpha); _FP16 *rdata = output.getData<_FP16>(); for (unsigned int k = 0; k < dim[0]; ++k) { @@ -625,7 +581,7 @@ TensorV2 &HalfTensor::sum(unsigned int axis, TensorV2 &output, float alpha, } else { unsigned int m = output.getDim().getDataLen(); unsigned int n = dim[3]; - TensorV2 ones(1, 1, 1, n, getTensorType()); + Tensor ones(1, 1, 1, n, getTensorType()); ones.setValue(alpha); sgemv(CblasRowMajor, CblasNoTrans, m, n, 1, data, n, ones.getData<_FP16>(), 1, beta, output.getData<_FP16>(), 1); @@ -642,7 +598,7 @@ float HalfTensor::l2norm() const { return snrm2(size(), (_FP16 *)getData(), 1); } -TensorV2 &HalfTensor::pow(float exponent, TensorV2 &output) const { +Tensor &HalfTensor::pow(float exponent, Tensor &output) const { auto f = [exponent](float in) { return static_cast<_FP16>(powf(in, exponent)); }; @@ -650,7 +606,7 @@ TensorV2 &HalfTensor::pow(float exponent, TensorV2 &output) const { return output; } -TensorV2 &HalfTensor::erf(TensorV2 &output) const { +Tensor &HalfTensor::erf(Tensor &output) const { auto f = [](_FP16 in) { return static_cast<_FP16>(std::erf(static_cast(in))); }; @@ -658,8 +614,20 @@ TensorV2 &HalfTensor::erf(TensorV2 &output) const { return output; } -TensorV2 &HalfTensor::dot(TensorV2 const &input, TensorV2 &output, bool trans, - bool trans_in, float beta) const { +void HalfTensor::inv_sqrt(Tensor &out) { + if (!contiguous) { + apply( + [](_FP16 val) -> _FP16 { + return static_cast<_FP16>(1 / std::sqrt(static_cast(val))); + }, + out); + } else { + inv_sqrt_inplace(out.size(), out.getData<_FP16>()); + } +} + +Tensor &HalfTensor::dot(Tensor const &input, Tensor &output, bool trans, + bool trans_in, float beta) const { // Comment out with intension to support the calculation wrt. batch and height // direction. It supposes to have this->dim as [ BxCxH,W ] and input.dim is // [BxCxH,W] as well if (input.dim.rank() > 2) { @@ -729,7 +697,7 @@ void HalfTensor::dropout_mask(float dropout) { } } -void HalfTensor::filter_mask(const TensorV2 &mask_len, bool reverse) { +void HalfTensor::filter_mask(const Tensor &mask_len, bool reverse) { float fill_mask_val = 0.0; float en_mask_val = 1.0 - fill_mask_val; @@ -750,7 +718,7 @@ void HalfTensor::filter_mask(const TensorV2 &mask_len, bool reverse) { } } -void HalfTensor::zoneout_mask(TensorV2 &opposite, float zoneout) { +void HalfTensor::zoneout_mask(Tensor &opposite, float zoneout) { _FP16 zoneout_fp16 = (_FP16)zoneout; opposite.setRandBernoulli(zoneout_fp16); @@ -766,7 +734,7 @@ void HalfTensor::zoneout_mask(TensorV2 &opposite, float zoneout) { } } -std::vector HalfTensor::split(std::vector sizes, int axis) { +std::vector HalfTensor::split(std::vector sizes, int axis) { size_t num_size = sizes.size(); if (axis == -1) { @@ -786,7 +754,7 @@ std::vector HalfTensor::split(std::vector sizes, int axis) { } bool is_format_nchw = (dim.getFormat() == Tformat::NCHW) ? true : false; - std::vector ret; + std::vector ret; auto iter_value = [this, is_format_nchw]( std::array &loc, @@ -868,16 +836,15 @@ std::vector HalfTensor::split(std::vector sizes, int axis) { return ret; } -TensorV2 HalfTensor::cat(const std::vector &tensors, int axis) { +Tensor HalfTensor::concat(const std::vector &tensors, int axis) { if (axis == -1) { axis = 3; } - TensorV2 ret; auto ref_dim = tensors.front().getDim(); bool is_format_nchw = (ref_dim.getFormat() == Tformat::NCHW); ref_dim.setTensorDim(axis, 1); NNTR_THROW_IF(!std::all_of(tensors.begin(), tensors.end(), - [&ref_dim, axis](const TensorV2 &t) { + [&ref_dim, axis](const Tensor &t) { auto cur_dim = t.getDim(); cur_dim.setTensorDim(axis, 1); return ref_dim == cur_dim; @@ -887,12 +854,12 @@ TensorV2 HalfTensor::cat(const std::vector &tensors, int axis) { << ref_dim << " axis : " << axis; auto axis_dim = std::accumulate(tensors.begin(), tensors.end(), 0u, - [axis](unsigned cur, const TensorV2 &t) { + [axis](unsigned cur, const Tensor &t) { return cur += t.getDim().getTensorDim(axis); }); auto iter_value = [is_format_nchw](std::array &loc, - const std::array &start_loc, TensorV2 &t, + const std::array &start_loc, Tensor &t, const std::array &ref_dim_arr) -> _FP16 & { auto &value = is_format_nchw ? t.getValue<_FP16>(loc[0], loc[1], loc[2], loc[3]) @@ -912,7 +879,7 @@ TensorV2 HalfTensor::cat(const std::vector &tensors, int axis) { auto ret_dim = ref_dim; ret_dim.setTensorDim(axis, axis_dim); - ret = TensorV2(ret_dim); + Tensor output = Tensor(ret_dim); std::array loc = {0, 0, 0, 0}; for (auto &t : tensors) { @@ -931,7 +898,7 @@ TensorV2 HalfTensor::cat(const std::vector &tensors, int axis) { } for (size_t i = 0u, sz = t.size(); i < sz; ++i) { - iter_value(loc, start_loc, ret, tensor_dim_arr) = t.getValue<_FP16>(i); + iter_value(loc, start_loc, output, tensor_dim_arr) = t.getValue<_FP16>(i); } if (is_format_nchw) { @@ -946,11 +913,10 @@ TensorV2 HalfTensor::cat(const std::vector &tensors, int axis) { } } } - return ret; + return output; } void HalfTensor::print(std::ostream &out) const { - printInstance(out, this); const _FP16 *data = (_FP16 *)getData(); unsigned int len = size(); out << "data addr: " << data << '\n'; @@ -999,39 +965,29 @@ void HalfTensor::print(std::ostream &out) const { out.copyfmt(init); } -TensorV2 &HalfTensor::divide(float const &value, TensorV2 &output) const { +Tensor &HalfTensor::divide(float const &value, Tensor &output) const { auto f = std::bind(std::divides<_FP16>(), std::placeholders::_1, static_cast<_FP16>(value)); apply(f, output); return output; } -TensorV2 &HalfTensor::divide(TensorV2 const &m, TensorV2 &output) const { - auto f = [&](const BroadcastInfoV2 &e, const _FP16 *buf, const _FP16 *m_buf, +Tensor &HalfTensor::divide(Tensor const &m, Tensor &output) const { + auto f = [&](const BroadcastInfo &e, const _FP16 *buf, const _FP16 *m_buf, _FP16 *out_buf) { - if (e.strides[3] == 1 && output.getStrides()[3] == 1 && strides[3] == 1) { - std::transform(buf, buf + e.buffer_size, m_buf, out_buf, - std::divides<_FP16>()); - } else { - for (unsigned int i = 0; i < e.buffer_size; ++i) { - *out_buf = *buf / *m_buf; - buf += strides[3]; - m_buf += e.strides[3]; - out_buf += output.getStrides()[3]; - } - } + ele_div(e.buffer_size, buf, m_buf, out_buf, 1, 0, e.strides[3], strides[3]); }; apply_broadcast(m, f, output); return output; } -void HalfTensor::copy(const TensorV2 &from) { +void HalfTensor::copy(const Tensor &from) { reshape(from.getDim()); copy(from.getData<_FP16>()); } -void HalfTensor::copyData(const TensorV2 &from) { +void HalfTensor::copyData(const Tensor &from) { if (!contiguous) { throw std::runtime_error("Cannot copy non-contiguous tensor"); } @@ -1052,6 +1008,18 @@ void HalfTensor::copyData(const TensorV2 &from) { } } +void HalfTensor::copy_with_stride(const Tensor &input, Tensor &output) { + for (unsigned int b = 0; b < output.batch(); ++b) { + for (unsigned int c = 0; c < output.channel(); ++c) { + for (unsigned int h = 0; h < output.height(); ++h) { + for (unsigned int w = 0; w < output.width(); ++w) { + output.setValue(b, c, h, w, input.getValue<_FP16>(b, c, h, w)); + } + } + } + } +} + std::vector HalfTensor::argmax() const { std::vector result; const _FP16 *data = (_FP16 *)getData(); @@ -1085,8 +1053,8 @@ float HalfTensor::minValue() const { return (float)*std::min_element(data, data + size()); } -TensorV2 &HalfTensor::transpose(const std::string &direction, - TensorV2 &output) const { +Tensor &HalfTensor::transpose(const std::string &direction, + Tensor &output) const { unsigned int SL, SI, SJ, SK; output.reshape(dim.transpose(direction)); @@ -1110,7 +1078,14 @@ TensorV2 &HalfTensor::transpose(const std::string &direction, } } else { if (is_format_nchw) { - transposeloop(l, i, k, j, SL, SI, SK, SJ); + for (unsigned int b = 0; b < batch(); ++b) { + for (unsigned int c = 0; c < channel(); ++c) { + transpose_matrix( + height(), width(), (_FP16 *)getData() + getIndex(b, c, 0, 0), + width(), (_FP16 *)output.getData() + output.getIndex(b, c, 0, 0), + output.width()); + } + } } else { transposeloop_nhwc(l, k, j, i, SL, SK, SJ, SI); } @@ -1163,12 +1138,12 @@ void HalfTensor::copy(const void *buf) { } void HalfTensor::apply_broadcast( - TensorV2 const &m, - std::function v_func, - TensorV2 &output) const { - CREATE_V2_IF_EMPTY_DIMS(output, dim, nullptr); + Tensor &output) const { + CREATE_IF_EMPTY_DIMS(output, dim, nullptr); NNTR_THROW_IF(getData() == nullptr, std::invalid_argument) << getName() << " is not allocated"; @@ -1181,7 +1156,7 @@ void HalfTensor::apply_broadcast( /// note that buffer_size, the last stride is only used in v_func but it /// might be changed if (dim == m.getDim()) { - BroadcastInfoV2 e; + BroadcastInfo e; e.buffer_size = size(); e.strides[3] = 1; v_func(e, (_FP16 *)getData(), m.getData<_FP16>(), output.getData<_FP16>()); @@ -1192,11 +1167,11 @@ void HalfTensor::apply_broadcast( } void HalfTensor::apply_broadcast_util( - TensorV2 const &m, - std::function v_func, - TensorV2 &output, const BroadcastInfoV2 &e, int cur_axis, size_t offset, + Tensor &output, const BroadcastInfo &e, int cur_axis, size_t offset, size_t m_offset) const { const _FP16 *buf = (_FP16 *)this->getData(); diff --git a/nntrainer/tensor/half_tensor.h b/nntrainer/tensor/half_tensor.h index 57451e3517..e0dfd77748 100644 --- a/nntrainer/tensor/half_tensor.h +++ b/nntrainer/tensor/half_tensor.h @@ -14,7 +14,6 @@ #ifdef __cplusplus #include -#include #ifdef DEBUG #define EXCEPT_WHEN_DEBUG @@ -61,7 +60,61 @@ class HalfTensor : public TensorBase { * @param fm format for the Tensor */ HalfTensor(std::vector>>> const &d, - Tformat fm); + Tformat fm) { + if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty()) { + throw std::out_of_range( + "[Tensor] trying to initialize HalfTensor from empty vector"); + } + + dim.setTensorDim(0, d.size()); + if (fm == Tformat::NCHW) { + dim.setTensorDim(1, d[0].size()); + dim.setTensorDim(2, d[0][0].size()); + dim.setTensorDim(3, d[0][0][0].size()); + } else { + dim.setTensorDim(2, d[0].size()); + dim.setTensorDim(3, d[0][0].size()); + dim.setTensorDim(1, d[0][0][0].size()); + } + + dim.setTensorType({fm, Tdatatype::FP16}); + + strides = dim.computeStrides(); + contiguous = true; + initializer = Initializer::NONE; + + MemoryData *mem_data = + new MemoryData((void *)(new _FP16[dim.getDataLen()]())); + data = std::shared_ptr(mem_data, [](MemoryData *mem_data) { + delete[] mem_data->getAddr<_FP16>(); + }); + + offset = 0; + + // if fm == Tformat::NCHW, then dim[0] == batch , dim[1] == channel, dim[2] + // == height, dim[3] == width. and if fm == Tformat::NHWC, dim[0] == batch, + // dim[1] == height, dim[2] == width, dim[3] == channel + if (fm == Tformat::NCHW) { + for (unsigned int i = 0; i < batch(); ++i) + for (unsigned int j = 0; j < channel(); ++j) + for (unsigned int k = 0; k < height(); ++k) + for (unsigned int l = 0; l < width(); ++l) + this->setValue(i, j, k, l, d[i][j][k][l]); + } else { + for (unsigned int i = 0; i < batch(); ++i) + for (unsigned int j = 0; j < height(); ++j) + for (unsigned int k = 0; k < width(); ++k) + for (unsigned int l = 0; l < channel(); ++l) + this->setValue(i, l, j, k, d[i][j][k][l]); + } + } + + /** + * @brief Construct a new FloatTensor object + * + * @param rhs TensorBase object to copy + */ + HalfTensor(TensorBase &rhs) : TensorBase(rhs) {} /** * @brief Basic Destructor @@ -83,22 +136,22 @@ class HalfTensor : public TensorBase { bool operator!=(const HalfTensor &rhs) const { return !(*this == rhs); } /** - * @copydoc TensorV2::allocate() + * @copydoc Tensor::allocate() */ void allocate() override; /** - * @copydoc TensorV2::deallocate() + * @copydoc Tensor::deallocate() */ void deallocate() override; /** - * @copydoc TensorV2::getData() + * @copydoc Tensor::getData() */ void *getData() const override; /** - * @copydoc TensorV2::getData(size_t idx) + * @copydoc Tensor::getData(size_t idx) */ void *getData(size_t idx) const override; @@ -147,24 +200,24 @@ class HalfTensor : public TensorBase { unsigned int w); /** - * @copydoc TensorV2::setValue(float value) + * @copydoc Tensor::setValue(float value) */ void setValue(float value) override; /** - * @copydoc TensorV2::setValue(b, c, h, w, value) + * @copydoc Tensor::setValue(b, c, h, w, value) */ void setValue(unsigned int b, unsigned int c, unsigned int h, unsigned int w, float value) override; /** - * @copydoc TensorV2::addValue(b, c, h, w, value, beta) + * @copydoc Tensor::addValue(b, c, h, w, value, beta) */ void addValue(unsigned int b, unsigned int c, unsigned int h, unsigned int w, float value, float beta) override; /** - * @copydoc TensorV2::setZero() + * @copydoc Tensor::setZero() */ void setZero() override; @@ -185,170 +238,193 @@ class HalfTensor : public TensorBase { }; /** - * @copydoc TensorV2::setRandNormal() + * @copydoc Tensor::setRandNormal() */ - void setRandNormal(float mean = 0.0f, float stddev = 0.05f); + void setRandNormal(float mean = 0.0f, float stddev = 0.05f) override; /** - * @copydoc TensorV2::setRandUniform() + * @copydoc Tensor::setRandUniform() */ - void setRandUniform(float min = -0.05f, float max = 0.05f); + void setRandUniform(float min = -0.05f, float max = 0.05f) override; /** - * @copydoc TensorV2::setRandBernoulli() + * @copydoc Tensor::setRandBernoulli() */ - void setRandBernoulli(float probability = 0.5f); + void setRandBernoulli(float probability = 0.5f) override; /** - * @copydoc TensorV2::initialize() + * @copydoc Tensor::initialize() */ void initialize() override; /** - * @copydoc TensorV2::initialize(Initializer init) + * @copydoc Tensor::initialize(Initializer init) */ void initialize(Initializer init) override; /** - * @copydoc TensorV2::apply(std::function f, TensorV2 &output) + * @copydoc Tensor::apply(std::function f, Tensor &output) */ - TensorV2 &apply(std::function<_FP16(_FP16)> f, - TensorV2 &output) const override; + Tensor &apply(std::function<_FP16(_FP16)> f, Tensor &output) const override; /** - * @copydoc TensorV2::multiply_strided(TensorV2 const &m, TensorV2 &output, + * @copydoc Tensor::multiply_strided(Tensor const &m, Tensor &output, * const float beta) */ - TensorV2 multiply_strided(TensorV2 const &m, TensorV2 &output, - const float beta) const override; + Tensor multiply_strided(Tensor const &m, Tensor &output, + const float beta) const override; /** - * @copydoc TensorV2::multiply_i(float const &value) + * @copydoc Tensor::multiply_i(float const &value) */ int multiply_i(float const &value) override; /** - * @copydoc TensorV2::multiply(float const &value, TensorV2 &out) + * @copydoc Tensor::multiply(float const &value, Tensor &out) */ - TensorV2 &multiply(float const &value, TensorV2 &out) const override; + Tensor &multiply(float const &value, Tensor &out) const override; /** - * @copydoc TensorV2::multiply(TensorV2 const &m, TensorV2 &output, const + * @copydoc Tensor::multiply(Tensor const &m, Tensor &output, const * float beta = 0.0) */ - TensorV2 &multiply(TensorV2 const &m, TensorV2 &output, - const float beta = 0.0) const override; + Tensor &multiply(Tensor const &m, Tensor &output, + const float beta = 0.0) const override; /** - * @copydoc TensorV2::divide(float const &value, TensorV2 &output) + * @copydoc Tensor::divide(float const &value, Tensor &output) */ - TensorV2 ÷(float const &value, TensorV2 &output) const override; + Tensor ÷(float const &value, Tensor &output) const override; /** - * @copydoc TensorV2::divide(TensorV2 const &m, TensorV2 &output) + * @copydoc Tensor::divide(Tensor const &m, Tensor &output) */ - TensorV2 ÷(TensorV2 const &m, TensorV2 &output) const override; + Tensor ÷(Tensor const &m, Tensor &output) const override; /** - * @copydoc TensorV2::add_strided(TensorV2 const &input, TensorV2 &output, + * @copydoc Tensor::add_strided(Tensor const &input, Tensor &output, * const float beta) */ - TensorV2 &add_strided(TensorV2 const &input, TensorV2 &output, - const float beta) const override; + Tensor &add_strided(Tensor const &input, Tensor &output, + const float beta) const override; + + /** + * @copydoc Tensor::add_i(Tensor const &m, float const alpha) + */ + int add_i(Tensor const &m, Tensor &output, float const alpha) override; /** - * @copydoc TensorV2::add(float const &value, TensorV2 &output) + * @copydoc Tensor::add_i_partial() */ - TensorV2 &add(float const &value, TensorV2 &output) const override; + int add_i_partial(unsigned int len, unsigned int addr_idx, Tensor &m, + unsigned int incX, unsigned int incY, const Tensor alphas, + unsigned int alpha_idx) override; /** - * @copydoc TensorV2::add(TensorV2 const &m, TensorV2 &output, float const + * @copydoc Tensor::add(float const &value, Tensor &output) + */ + Tensor &add(float const &value, Tensor &output) const override; + + /** + * @copydoc Tensor::add(Tensor const &m, Tensor &output, float const * alpha) */ - TensorV2 &add(TensorV2 const &m, TensorV2 &output, - float const alpha) const override; + Tensor &add(Tensor const &m, Tensor &output, + float const alpha) const override; /** - * @copydoc TensorV2::subtract(float const &value, TensorV2 &output) + * @copydoc Tensor::subtract(float const &value, Tensor &output) */ - TensorV2 &subtract(float const &value, TensorV2 &output) const override; + Tensor &subtract(float const &value, Tensor &output) const override; /** - * @copydoc TensorBase::sum_by_batch(TensorV2 &output) + * @copydoc TensorBase::sum_by_batch(Tensor &output) */ - void sum_by_batch(TensorV2 &output) const override; + void sum_by_batch(Tensor &output) const override; /** - * @copydoc TensorV2::sum(unsigned int axis, TensorV2 &output, float alpha, + * @copydoc Tensor::sum(unsigned int axis, Tensor &output, float alpha, * float beta) const */ - TensorV2 &sum(unsigned int axis, TensorV2 &output, float alpha, - float beta) const override; + Tensor &sum(unsigned int axis, Tensor &output, float alpha, + float beta) const override; /** - * @copydoc TensorV2::l2norm + * @copydoc Tensor::l2norm */ float l2norm() const override; /** - * @copydoc TensorV2::pow(float exponent, TensorV2 &output) + * @copydoc Tensor::pow(float exponent, Tensor &output) + */ + Tensor &pow(float exponent, Tensor &output) const override; + + /** + * @copydoc Tensor::erf(Tensor &output) */ - TensorV2 &pow(float exponent, TensorV2 &output) const override; + Tensor &erf(Tensor &output) const override; /** - * @copydoc TensorV2::erf(TensorV2 &output) + * @copydoc TensorBase::inv_sqrt(Tensor &out) */ - TensorV2 &erf(TensorV2 &output) const override; + void inv_sqrt(Tensor &out) override; /** - * @copydoc TensorV2::dot(TensorV2 const &input, TensorV2 &output, bool + * @copydoc Tensor::dot(Tensor const &input, Tensor &output, bool * trans, bool trans_in, float beta) */ - TensorV2 &dot(TensorV2 const &input, TensorV2 &output, bool trans, - bool trans_in, float beta) const override; + Tensor &dot(Tensor const &input, Tensor &output, bool trans, bool trans_in, + float beta) const override; /** - * @copydoc TensorV2::dropout_mask(float dropout) + * @copydoc Tensor::dropout_mask(float dropout) */ void dropout_mask(float dropout) override; /** - * @copydoc TensorV2::filter_mask(const TensorV2 &mask_len, bool reverse) + * @copydoc Tensor::filter_mask(const Tensor &mask_len, bool reverse) */ - void filter_mask(const TensorV2 &mask_len, bool reverse) override; + void filter_mask(const Tensor &mask_len, bool reverse) override; /** - * @copydoc TensorV2::zoneout_mask(TensorV2 &opposite, float zoneout) + * @copydoc Tensor::zoneout_mask(Tensor &opposite, float zoneout) */ - void zoneout_mask(TensorV2 &opposite, float zoneout) override; + void zoneout_mask(Tensor &opposite, float zoneout) override; /** - * @copydoc TensorV2::split(std::vector sizes, int axis) + * @copydoc Tensor::split(std::vector sizes, int axis) */ - std::vector split(std::vector sizes, int axis) override; + std::vector split(std::vector sizes, int axis) override; /** - * @copydoc TensorV2::cat(const std::vector &tensors, int axis) + * @copydoc Tensor::cat(const std::vector &tensors, int axis) */ - static TensorV2 cat(const std::vector &tensors, int axis); + Tensor concat(const std::vector &tensors, int axis) override; /** - * @copydoc TensorV2::copy(const TensorV2 &from) + * @copydoc Tensor::copy(const Tensor &from) */ - void copy(const TensorV2 &from); + void copy(const Tensor &from) override; /** - * @copydoc TensorV2::copyData(const TensorV2 &from) + * @copydoc Tensor::copyData(const Tensor &from) */ - void copyData(const TensorV2 &from); + void copyData(const Tensor &from) override; /** - * @copydoc TensorV2::argmax() + * @brief Copy the Tensor + * @param[in] input Tensor to be copied + * @param[out] output output Tensor + */ + void copy_with_stride(const Tensor &input, Tensor &output) override; + + /** + * @copydoc Tensor::argmax() */ std::vector argmax() const override; /** - * @copydoc TensorV2::max_abs() + * @copydoc Tensor::max_abs() */ float max_abs() const override; @@ -363,13 +439,13 @@ class HalfTensor : public TensorBase { float minValue() const override; /** - * @copydoc TensorV2::transpose(const std::string &direction, TensorV2 &out) + * @copydoc Tensor::transpose(const std::string &direction, Tensor &out) */ - TensorV2 &transpose(const std::string &direction, - TensorV2 &output) const override; + Tensor &transpose(const std::string &direction, + Tensor &output) const override; /** - * @copydoc TensorV2::print(std::ostream &out) + * @copydoc Tensor::print(std::ostream &out) */ void print(std::ostream &out) const override; @@ -393,13 +469,14 @@ class HalfTensor : public TensorBase { * @retval #ML_ERROR_NONE Successful * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter */ - void apply_broadcast_util( - TensorV2 const &m, - std::function - v_func, - TensorV2 &output, const BroadcastInfoV2 &e, int cur_axis = -1, - size_t offset = 0, size_t m_offset = 0) const; + void + apply_broadcast_util(Tensor const &m, + std::function + v_func, + Tensor &output, const BroadcastInfo &e, + int cur_axis = -1, size_t offset = 0, + size_t m_offset = 0) const; /** * @brief Applies the given operator to the tensor with the passed argument @@ -409,12 +486,11 @@ class HalfTensor : public TensorBase { * @retval #ML_ERROR_NONE Successful * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter */ - void - apply_broadcast(TensorV2 const &m, - std::function - v_func, - TensorV2 &output) const; + void apply_broadcast(Tensor const &m, + std::function + v_func, + Tensor &output) const; }; } // namespace nntrainer diff --git a/nntrainer/tensor/manager.cpp b/nntrainer/tensor/manager.cpp index 9a0d235ba9..8b47235791 100644 --- a/nntrainer/tensor/manager.cpp +++ b/nntrainer/tensor/manager.cpp @@ -430,7 +430,7 @@ std::vector Manager::requestWeights( */ grad = tensor_pool.requestOrExtend(shared_name + Var_Grad::grad_suffix, dim_g, grad_exec_order, grad_ls, - Tensor::Initializer::ZEROS); + Initializer::ZEROS); } } else { /** case requesting fresh weights */ @@ -446,8 +446,8 @@ std::vector Manager::requestWeights( if (Weight::isGradientClipByGlobalNorm(clip_by_global_norm)) is_wgrad = false; grad = tensor_pool.request(name + Var_Grad::grad_suffix, dim_g, - grad_exec_order, grad_ls, - Tensor::Initializer::ZEROS, is_wgrad); + grad_exec_order, grad_ls, Initializer::ZEROS, + is_wgrad); } } @@ -515,17 +515,16 @@ std::vector Manager::requestTensors( if (need_grad && tspan > TensorLifespan::FORWARD_FUNC_LIFESPAN) { grad = tensor_pool.requestOrExtend(shared_name + Var_Grad::grad_suffix, dim, grad_exec_order, tspan, - Tensor::Initializer::ZEROS); + Initializer::ZEROS); } } else { var = tensor_pool.request(name, dim, var_exec_order, tspan, t_init); if (need_grad && tspan > TensorLifespan::FORWARD_FUNC_LIFESPAN) { - grad = - tensor_pool.request(name + Var_Grad::grad_suffix, /// name - dim, grad_exec_order, tspan, - Tensor::Initializer::ZEROS /// tensor initializer - ); + grad = tensor_pool.request(name + Var_Grad::grad_suffix, /// name + dim, grad_exec_order, tspan, + Initializer::ZEROS /// tensor initializer + ); } } @@ -668,8 +667,7 @@ bool Manager::isSecondLastAccess(const std::string &name, */ std::vector Manager::requestWeightOptimizerVariables( const std::vector &dims, const std::string &name, - const TensorLifespan &lifespan, bool is_grad_clip, - Tensor::Initializer initializer) { + const TensorLifespan &lifespan, bool is_grad_clip, Initializer initializer) { std::vector ret; ret.reserve(dims.size()); diff --git a/nntrainer/tensor/manager.h b/nntrainer/tensor/manager.h index 8ae5aa890a..9ccde77113 100644 --- a/nntrainer/tensor/manager.h +++ b/nntrainer/tensor/manager.h @@ -225,7 +225,7 @@ class Manager { std::vector requestWeightOptimizerVariables( const std::vector &dims, const std::string &name, const TensorLifespan &lifespan, bool is_grad_clip, - Tensor::Initializer initializer = Tensor::Initializer::NONE); + Initializer initializer = Initializer::NONE); /** * @brief Create tensors with the given spec diff --git a/nntrainer/tensor/meson.build b/nntrainer/tensor/meson.build index 202b730060..d1d824b61d 100644 --- a/nntrainer/tensor/meson.build +++ b/nntrainer/tensor/meson.build @@ -6,7 +6,6 @@ tensor_sources = [ 'lazy_tensor.cpp', 'manager.cpp', 'tensor.cpp', - 'tensor_v2.cpp', 'tensor_base.cpp', 'float_tensor.cpp', 'tensor_dim.cpp', @@ -25,7 +24,6 @@ tensor_sources = [ tensor_headers = [ 'memory_data.h', 'tensor.h', - 'tensor_v2.h', 'tensor_base.h', 'float_tensor.h', 'weight.h', diff --git a/nntrainer/tensor/tensor.cpp b/nntrainer/tensor/tensor.cpp index 0997a5ee37..a1d3525602 100644 --- a/nntrainer/tensor/tensor.cpp +++ b/nntrainer/tensor/tensor.cpp @@ -1,571 +1,186 @@ +// SPDX-License-Identifier: Apache-2.0 /** - * Copyright (C) 2019 Samsung Electronics Co., Ltd. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * * @file tensor.cpp - * @date 04 December 2019 - * @brief This is Tensor class for calculation + * @date 01 December 2023 + * @brief This is a Tensor class * @see https://github.com/nnstreamer/nntrainer * @author Jijoong Moon + * @author Donghyeon Jeong * @bug No known bugs except for NYI items - * */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - +#include #include #include -#include - -#define transposeloop(cl, ci, cj, ck, sl, si, sj, sk) \ - do { \ - unsigned int i, j, k, l; \ - int inidx = 0, outidx = 0; \ - for (cl = 0; cl < sl; cl++) \ - for (ci = 0; ci < si; ci++) \ - for (cj = 0; cj < sj; cj++) \ - for (ck = 0; ck < sk; ck++) { \ - outidx = si * sj * sk * cl + sj * sk * ci + sk * cj + ck; \ - inidx = l * SI * SJ * SK + i * SJ * SK + j * SK + k; \ - outptr[outidx] = inptr[inidx]; \ - } \ - } while (0); - -#define transposeloop_nhwc(cl, ci, cj, ck, sl, si, sj, sk) \ - do { \ - unsigned int i, j, k, l; \ - int inidx = 0, outidx = 0; \ - for (cl = 0; cl < sl; cl++) \ - for (ci = 0; ci < si; ci++) \ - for (cj = 0; cj < sj; cj++) \ - for (ck = 0; ck < sk; ck++) { \ - outidx = si * sj * sk * cl + sj * sk * ci + sk * cj + ck; \ - inidx = l * SJ * SK * SI + j * SK * SI + k * SI + i; \ - outptr[outidx] = inptr[inidx]; \ - } \ - } while (0); -namespace nntrainer { +#ifdef ENABLE_FP16 +#include +#endif -/** - * @struct External Loop Info for broadcasted info - * @brief External Loop Info for broadcasted iteration. Please refer to - * DISABLED_private_external_loop_n in unittest_nntrainer_tensor. - * @note This should better be implemented in iterator fashion before used - * extensively. - */ -struct Tensor::BroadcastInfo { - - /** - * @brief Construct a new External Loop Info object - * - */ - BroadcastInfo() : - buffer_size(0), - buffer_axis(-1), - strides{0, 0, 0, 0}, - tensor_type(nntrainer::TensorDim::TensorType()) {} - - unsigned int buffer_size; /**< virtual size of the buffer */ - int buffer_axis; /**< the smallest axis that should be looped. - -1 means no loop needed*/ - std::array - strides; /**< modified strides for the loop */ - nntrainer::TensorDim::TensorType tensor_type; -}; +namespace nntrainer { -Tensor::Tensor(const TensorDim &d, bool alloc_now, Tensor::Initializer init, - std::string name_) : - Tensor(name_, d.getFormat()) { - if (d.getDataLen() != 0) { - dim = d; - strides = d.computeStrides(); - initializer = init; - if (alloc_now) - allocate(); - } -} +Tensor::Tensor(std::string name_, Tformat fm, Tdatatype d_type) { + itensor = nullptr; -Tensor::Tensor(const TensorDim &d, const void *buf) : Tensor(d, true) { - if (d.getDataLen() != 0) { - if (buf != nullptr) - copy(buf); + if (d_type == Tdatatype::FP32) { + itensor = std::shared_ptr(new FloatTensor(name_, fm), + std::default_delete()); + } else if (d_type == Tdatatype::FP16) { +#ifdef ENABLE_FP16 + itensor = std::shared_ptr(new HalfTensor(name_, fm), + std::default_delete()); +#else + throw std::invalid_argument("Error: enable-fp16 is not enabled"); +#endif + } else { + throw std::invalid_argument( + "Error: Tensor cannot be constructed because the given d_type is not " + "compatible with itensor. The supported d_types are: FP32, FP16 " + "(if built with ENABLE_FP16)."); } } -/** - * @class SrcSharedTensor - * @brief Source of the shared tensor - */ -class SrcSharedTensor { -public: - /** - * @brief Constructor for the class - */ - SrcSharedTensor() : src(nullptr), off(0) {} - - SrcSharedTensor(const Tensor *tensor, size_t offset) : - src(tensor), off(offset) {} - - /** - * @brief Get the allocated src tensor - */ - const Tensor *tensor() const { - if (!src) - throw std::runtime_error("Accessing empty src tensor"); - - return src; - } - - /** - * @brief Get the offset from the source tensor - */ - size_t offset() const { return off; } - -private: - const Tensor *src; /**< Tensor of the source */ - size_t off; /**< offset from the source data ptr */ -}; - -void Tensor::allocate() { - if (empty() || data) - /// already allocated - return; +Tensor::Tensor(const TensorDim &d, bool alloc_now, Initializer init, + std::string name) { + itensor = nullptr; - if (src_tensor) { - /// allocate data based on the source tensor - data = src_tensor->tensor()->data; - offset = src_tensor->tensor()->offset + src_tensor->offset(); - /** as this memory is shared, do NOT initialize */ - } else { - /// allocate new memory for the tensor data - - MemoryData *mem_data; - - if (getDataType() == ml::train::TensorDim::DataType::FP32) { - mem_data = new MemoryData((void *)(new float[dim.getDataLen()]{})); - data = std::shared_ptr(mem_data, [](auto *mem_data) { - delete[] mem_data->template getAddr(); - delete mem_data; - }); - - } else if (getDataType() == ml::train::TensorDim::DataType::FP16) { + if (d.getDataType() == Tdatatype::FP32) { + itensor = + std::shared_ptr(new FloatTensor(d, alloc_now, init, name), + std::default_delete()); + } else if (d.getDataType() == Tdatatype::FP16) { #ifdef ENABLE_FP16 - mem_data = new MemoryData((void *)(new _FP16[dim.getDataLen()]{})); - data = std::shared_ptr(mem_data, [](auto *mem_data) { - delete[] mem_data->template getAddr<_FP16>(); - delete mem_data; - }); + itensor = + std::shared_ptr(new HalfTensor(d, alloc_now, init, name), + std::default_delete()); #else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); + throw std::invalid_argument("Error: enable-fp16 is not enabled"); #endif - } else if (getDataType() == ml::train::TensorDim::DataType::QINT8) { - mem_data = new MemoryData((void *)(new uint8_t[dim.getDataLen()]{})); - data = std::shared_ptr(mem_data, [](auto *mem_data) { - delete[] mem_data->template getAddr(); - delete mem_data; - }); - } else if (getDataType() == ml::train::TensorDim::DataType::QINT4) { - mem_data = - new MemoryData((void *)(new uint8_t[(dim.getDataLen() + 1) / 2]{})); - data = std::shared_ptr(mem_data, [](auto *mem_data) { - delete[] mem_data->template getAddr(); - delete mem_data; - }); - } - offset = 0; - initialize(); + } else { + throw std::invalid_argument( + "Error: Tensor cannot be constructed because the given d_type is not " + "compatible with itensor. The supported d_types are: FP32, FP16 " + "(if built with ENABLE_FP16)."); } } -bool Tensor::operator==(const Tensor &rhs) const { - if (this->dim != rhs.dim) - return false; - - size_t len = size(); - - if (len != rhs.size()) - return false; - - if (contiguous != rhs.contiguous) - return false; - - if (strides != rhs.strides) - return false; - - if (getScaleFactors() != rhs.getScaleFactors()) - return false; - - if (getZeroPoints() != rhs.getZeroPoints()) - return false; +Tensor::Tensor(const TensorDim &d, const void *buf) { + itensor = nullptr; - if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) { - const float *_data = getData(); - const float *_rdata = rhs.getData(); - for (size_t i = 0; i < len; ++i) { - /** not checking sign change is intentional to avoid float calculation - * errors around 0 */ - if (std::isnan(_data[i]) || std::isnan(_rdata[i]) || - std::fabs(_data[i] - _rdata[i]) > epsilon) - return false; - } - } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) { + if (d.getDataType() == Tdatatype::FP32) { + itensor = std::shared_ptr(new FloatTensor(d, buf), + std::default_delete()); + } else if (d.getDataType() == Tdatatype::FP16) { #ifdef ENABLE_FP16 - const _FP16 *_data = getData<_FP16>(); - const _FP16 *_rdata = rhs.getData<_FP16>(); - for (size_t i = 0; i < len; ++i) { - // @todo: need to check if float casting valid - if ((std::isnan((float)_data[i]) && !std::isnan((float)_rdata[i])) || - (!std::isnan((float)_data[i]) && std::isnan((float)_rdata[i])) || - std::fabs((float)(_data[i] - _rdata[i])) > epsilon) - return false; - } + itensor = std::shared_ptr(new HalfTensor(d, buf), + std::default_delete()); #else throw std::invalid_argument("Error: enable-fp16 is not enabled"); #endif - } else if (dim.getDataType() == ml::train::TensorDim::DataType::QINT8) { - const uint8_t *_data = getData(); - const uint8_t *_rdata = rhs.getData(); - for (size_t i = 0; i < len; ++i) { - if (_data[i] != _rdata[i]) - return false; - } - } else if (dim.getDataType() == ml::train::TensorDim::DataType::QINT4) { - const uint8_t *_data = getData(); - const uint8_t *_rdata = rhs.getData(); - uint8_t data, rdata; - for (size_t i = 0; i < len; ++i) { - /** not checking sign change is intentional to avoid float calculation - * errors around 0 */ - data = decode_qint(_data[i / 2], (i % 2 == 0)); - rdata = decode_qint(_rdata[i / 2], (i % 2 == 0)); - - if (data != rdata) - return false; - } + } else { + throw std::invalid_argument( + "Error: Tensor cannot be constructed because the given d_type is not " + "compatible with itensor. The supported d_types are: FP32, FP16 " + "(if built with ENABLE_FP16)."); } - - return true; } -void Tensor::setRandNormal(float mean, float std) { - if (this->getDataType() == ml::train::TensorDim::DataType::FP32) { - setDist>( - std::normal_distribution(mean, std)); - } else if (this->getDataType() == ml::train::TensorDim::DataType::FP16) { +Tensor::Tensor(const Tensor &rhs) { + if (rhs.getDataType() == Tdatatype::FP32) { + itensor = std::shared_ptr(new FloatTensor(*rhs.itensor), + std::default_delete()); + } else if (rhs.getDataType() == Tdatatype::FP16) { #ifdef ENABLE_FP16 - setDist<_FP16, std::normal_distribution>( - std::normal_distribution(mean, std)); + itensor = std::shared_ptr(new HalfTensor(*rhs.itensor), + std::default_delete()); #else throw std::invalid_argument("Error: enable-fp16 is not enabled"); #endif - } else if (this->getDataType() == ml::train::TensorDim::DataType::QINT8) { - throw std::invalid_argument("Error: RandNormal is invalid for QINT8"); - } else if (this->getDataType() == ml::train::TensorDim::DataType::QINT4) { - throw std::invalid_argument("Error: RandNormal is invalid for QINT4"); } } -void Tensor::setRandUniform(float min, float max) { - if (this->getDataType() == ml::train::TensorDim::DataType::FP32) { - setDist>( - std::uniform_real_distribution(min, max)); - } else if (this->getDataType() == ml::train::TensorDim::DataType::FP16) { +Tensor &Tensor::operator=(const Tensor &rhs) { + if (rhs.getDataType() == Tdatatype::FP32) { + itensor = std::shared_ptr(new FloatTensor(*rhs.itensor), + std::default_delete()); + } else if (rhs.getDataType() == Tdatatype::FP16) { #ifdef ENABLE_FP16 - setDist<_FP16, std::uniform_real_distribution>( - std::uniform_real_distribution(min, max)); + itensor = std::shared_ptr(new HalfTensor(*rhs.itensor), + std::default_delete()); #else throw std::invalid_argument("Error: enable-fp16 is not enabled"); #endif - } else if (this->getDataType() == ml::train::TensorDim::DataType::QINT8) { - throw std::invalid_argument("Error: RandUniform is invalid for QINT8"); - } else if (this->getDataType() == ml::train::TensorDim::DataType::QINT4) { - throw std::invalid_argument("Error: RandUniform is invalid for QINT4"); } + return *this; } -void Tensor::setRandBernoulli(float probability) { - if (this->getDataType() == ml::train::TensorDim::DataType::FP32) { - setDist( - std::bernoulli_distribution(probability)); - } else if (this->getDataType() == ml::train::TensorDim::DataType::FP16) { +bool Tensor::operator==(const Tensor &rhs) const { + /// compares tensor information + if (*itensor == *rhs.itensor) { + /// compares tensor data + if (getDataType() == Tdatatype::FP32) { + return *std::dynamic_pointer_cast(itensor) == + *std::dynamic_pointer_cast(rhs.itensor); + } else if (getDataType() == Tdatatype::FP16) { #ifdef ENABLE_FP16 - setDist<_FP16, std::bernoulli_distribution>( - std::bernoulli_distribution(probability)); + return *std::dynamic_pointer_cast(itensor) == + *std::dynamic_pointer_cast(rhs.itensor); #else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); + throw std::invalid_argument( + "Error: HalfTensor cannot be created or used when FP16 is not enabled. " + "Please check if the tensor data type is set properly."); #endif - } else if (this->getDataType() == ml::train::TensorDim::DataType::QINT8) { - throw std::invalid_argument("Error: setRandBernoulli is invalid for QINT8"); - } else if (this->getDataType() == ml::train::TensorDim::DataType::QINT4) { - throw std::invalid_argument("Error: setRandBernoulli is invalid for QINT4"); + } } + return false; } -void Tensor::initialize() { - if (empty() || !isAllocated()) - return; +void Tensor::allocate() { itensor->allocate(); } - unsigned int fan_in, fan_out; - - /// @fixme: when unit is equal to one, this does not work, we need to rely on - /// effective dimension then actual numbers here. For now, some heuristics - /// added to infer what would be fan_in/fan_out - if (dim.batch() * dim.channel() * dim.height() == 1) { - fan_out = fan_in = dim.width(); - } else if (dim.batch() * dim.channel() == 1) { /// fc layer - 2-D tensor - fan_in = dim.height(); - fan_out = dim.width(); - } else { /// conv2d filters - 4d tensor, @todo extend this to > 4 - auto field_size = dim.height() * dim.width(); - - // this also handles below cases. - // 1. fan_in = fan_out = 1 as well. - // 2. batch == 1, channel == 1 and height == 1, theoretical rank of 1 - fan_in = dim.channel() * field_size; - fan_out = dim.batch() * field_size; - } +void Tensor::deallocate() { itensor->deallocate(); } - switch (initializer) { - case Tensor::Initializer::ZEROS: - setZero(); - break; - case Tensor::Initializer::ONES: - setValue(1.0f); - break; - case Tensor::Initializer::LECUN_NORMAL: - setRandNormal(0.0f, sqrtFloat(1.0f / fan_in)); - break; - case Tensor::Initializer::XAVIER_NORMAL: - setRandNormal(0.0f, sqrtFloat(2.0f / (fan_in + fan_out))); - break; - case Tensor::Initializer::HE_NORMAL: - setRandNormal(0.0f, sqrtFloat(2.0f / (fan_in))); - break; - case Tensor::Initializer::LECUN_UNIFORM: - setRandUniform(-1.0f * sqrtFloat(1.0f / fan_in), sqrtFloat(1.0f / fan_in)); - break; - case Tensor::Initializer::XAVIER_UNIFORM: - setRandUniform(-1.0f * sqrtFloat(6.0f / (fan_in + fan_out)), - sqrtFloat(6.0 / (fan_in + fan_out))); - break; - case Tensor::Initializer::HE_UNIFORM: - setRandUniform(-1.0f * sqrtFloat(6.0f / (fan_in)), - sqrtFloat(6.0 / (fan_in))); - break; - default: - break; - } +bool Tensor::isAllocated() { return itensor->isAllocated(); } - putData(); +void Tensor::setValue(float value) { itensor->setValue(value); } + +void Tensor::setValue(unsigned int b, unsigned int c, unsigned int h, + unsigned int w, float value) { + itensor->setValue(b, c, h, w, value); } -int Tensor::multiply_i_strided(Tensor const &m, const float beta) { - try { - this->multiply_strided(m, *this, beta); - } catch (std::exception &err) { - ml_loge("%s %s", typeid(err).name(), err.what()); - return ML_ERROR_INVALID_PARAMETER; - } +void Tensor::addValue(unsigned int b, unsigned int c, unsigned int h, + unsigned int w, float value, float beta) noexcept { + itensor->addValue(b, c, h, w, value, beta); +} - return ML_ERROR_NONE; +void Tensor::setZero() { itensor->setZero(); } + +void Tensor::setRandNormal(float mean, float stddev) { + itensor->setRandNormal(mean, stddev); } -Tensor Tensor::multiply_strided(Tensor const &m, const float beta) const { - Tensor t; - return this->multiply_strided(m, t, beta); +void Tensor::setRandUniform(float min, float max) { + itensor->setRandUniform(min, max); } -Tensor &Tensor::multiply_strided(Tensor const &m, Tensor &output, - const float beta) const { - /** TODO: throw than create new dimenions */ - CREATE_IF_EMPTY_DIMS(output, dim, nullptr); +void Tensor::setRandBernoulli(float probability) { + itensor->setRandBernoulli(probability); +} - if (size() != m.size() || size() != output.size()) - throw std::invalid_argument( - "Strided multiplication does not support broadcasting"); - - if (getDataType() == Tdatatype::FP32) { - NNTR_THROW_IF(getData() == nullptr, std::invalid_argument) - << getName() << " is not allocated"; - NNTR_THROW_IF(m.getData() == nullptr, std::invalid_argument) - << m.getName() << " is not allocated"; - NNTR_THROW_IF(output.getData() == nullptr, std::invalid_argument) - << output.getName() << " is not allocated"; - } else if (getDataType() == Tdatatype::FP16) { -#ifdef ENABLE_FP16 - NNTR_THROW_IF(getData<_FP16>() == nullptr, std::invalid_argument) - << getName() << " is not allocated"; - NNTR_THROW_IF(m.getData<_FP16>() == nullptr, std::invalid_argument) - << m.getName() << " is not allocated"; - NNTR_THROW_IF(output.getData<_FP16>() == nullptr, std::invalid_argument) - << output.getName() << " is not allocated"; -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } +void Tensor::initialize() { itensor->initialize(); } - // Format NCHW Case - if (this->getFormat() == Tformat::NCHW) { - if (getDataType() == Tdatatype::FP32) { - if (strides[3] != 1 || m.strides[3] != 1 || output.strides[3] != 1 || - beta != 0.0) { - for (unsigned int b = 0; b < batch(); ++b) { - for (unsigned int c = 0; c < channel(); ++c) { - for (unsigned int h = 0; h < height(); ++h) { - for (unsigned int w = 0; w < width(); ++w) { - output.addValue(b, c, h, w, - getValue(b, c, h, w) * - m.getValue(b, c, h, w), - beta); - } - } - } - } - } else { - /** @todo optimize this with combining these loops where stride is 1 - */ - for (unsigned int b = 0; b < batch(); ++b) { - for (unsigned int c = 0; c < channel(); ++c) { - for (unsigned int h = 0; h < height(); ++h) { - float *out_data = output.getAddress(b, c, h, 0); - const float *m_data = m.getAddress(b, c, h, 0); - const float *in_data = getAddress(b, c, h, 0); - std::transform(in_data, in_data + width(), m_data, out_data, - std::multiplies()); - } - } - } - } - } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - if (strides[3] != 1 || m.strides[3] != 1 || output.strides[3] != 1 || - beta != 0.0) { - for (unsigned int b = 0; b < batch(); ++b) { - for (unsigned int c = 0; c < channel(); ++c) { - for (unsigned int h = 0; h < height(); ++h) { - for (unsigned int w = 0; w < width(); ++w) { - output.addValue(b, c, h, w, - getValue<_FP16>(b, c, h, w) * - m.getValue<_FP16>(b, c, h, w), - beta); - } - } - } - } - } else { - for (unsigned int b = 0; b < batch(); ++b) { - for (unsigned int c = 0; c < channel(); ++c) { - for (unsigned int h = 0; h < height(); ++h) { - _FP16 *out_data = output.getAddress<_FP16>(b, c, h, 0); - const _FP16 *m_data = m.getAddress<_FP16>(b, c, h, 0); - const _FP16 *in_data = getAddress<_FP16>(b, c, h, 0); - std::transform(in_data, in_data + width(), m_data, out_data, - std::multiplies<_FP16>()); - } - } - } - } -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - } else { // Format NHWC Case - if (getDataType() == Tdatatype::FP32) { - if (strides[3] != 1 || m.strides[3] != 1 || output.strides[3] != 1 || - beta != 0.0) { - for (unsigned int b = 0; b < batch(); ++b) { - for (unsigned int h = 0; h < height(); ++h) { - for (unsigned int w = 0; w < width(); ++w) { - for (unsigned int c = 0; c < channel(); ++c) { - output.addValue(b, c, h, w, - getValue(b, c, h, w) * - m.getValue(b, c, h, w), - beta); - } - } - } - } - } else { - /** @todo optimize this with combining these loops where - * stride is 1 */ - for (unsigned int b = 0; b < batch(); ++b) { - for (unsigned int h = 0; h < height(); ++h) { - for (unsigned int w = 0; w < width(); ++w) { - float *out_data = output.getAddress(b, 0, h, w); - const float *m_data = m.getAddress(b, 0, h, w); - const float *in_data = getAddress(b, 0, h, w); - std::transform(in_data, in_data + channel(), m_data, out_data, - std::multiplies()); - } - } - } - } - } else if (getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - if (strides[3] != 1 || m.strides[3] != 1 || output.strides[3] != 1 || - beta != 0.0) { - for (unsigned int b = 0; b < batch(); ++b) { - for (unsigned int h = 0; h < height(); ++h) { - for (unsigned int w = 0; w < width(); ++w) { - for (unsigned int c = 0; c < channel(); ++c) { - output.addValue(b, c, h, w, - getValue<_FP16>(b, c, h, w) * - m.getValue<_FP16>(b, c, h, w), - beta); - } - } - } - } - } else { - /** @todo optimize this with combining these loops where - * stride is 1 */ - for (unsigned int b = 0; b < batch(); ++b) { - for (unsigned int h = 0; h < height(); ++h) { - for (unsigned int w = 0; w < width(); ++w) { - _FP16 *out_data = output.getAddress<_FP16>(b, 0, h, w); - const _FP16 *m_data = m.getAddress<_FP16>(b, 0, h, w); - const _FP16 *in_data = getAddress<_FP16>(b, 0, h, w); - std::transform(in_data, in_data + channel(), m_data, out_data, - std::multiplies<_FP16>()); - } - } - } - } -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - } +void Tensor::initialize(Initializer init) { itensor->initialize(init); } - return output; +Tensor Tensor::apply(std::function f) const { return f(*this); } + +Tensor &Tensor::apply(std::function f, + Tensor &output) const { + return f(*this, output); } -int Tensor::add_i_strided(Tensor const &m, const float beta) { +int Tensor::multiply_i_strided(Tensor const &m, const float beta) { try { - this->add_strided(m, *this, beta); + this->multiply_strided(m, *this, beta); } catch (std::exception &err) { ml_loge("%s %s", typeid(err).name(), err.what()); return ML_ERROR_INVALID_PARAMETER; @@ -574,214 +189,31 @@ int Tensor::add_i_strided(Tensor const &m, const float beta) { return ML_ERROR_NONE; } -Tensor Tensor::add_strided(Tensor const &m, const float beta) const { - Tensor t; - return this->add_strided(m, t, beta); +Tensor Tensor::multiply_strided(Tensor const &m, const float beta) const { + Tensor t("", getFormat(), getDataType()); + return this->multiply_strided(m, t, beta); } -Tensor &Tensor::add_strided(Tensor const &m, Tensor &output, - const float beta) const { - /** TODO: throw than create new dimenions */ - CREATE_IF_EMPTY_DIMS(output, dim, nullptr); - - if (size() != m.size() || size() != output.size()) - throw std::invalid_argument( - "Strided addition does not support broadcasting"); - - if (getDataType() == Tdatatype::FP32) { - NNTR_THROW_IF(getData() == nullptr, std::invalid_argument) - << getName() << " is not allocated"; - NNTR_THROW_IF(m.getData() == nullptr, std::invalid_argument) - << m.getName() << " is not allocated"; - NNTR_THROW_IF(output.getData() == nullptr, std::invalid_argument) - << output.getName() << " is not allocated"; - } else if (getDataType() == Tdatatype::FP16) { -#ifdef ENABLE_FP16 - NNTR_THROW_IF(getData<_FP16>() == nullptr, std::invalid_argument) - << getName() << " is not allocated"; - NNTR_THROW_IF(m.getData<_FP16>() == nullptr, std::invalid_argument) - << m.getName() << " is not allocated"; - NNTR_THROW_IF(output.getData<_FP16>() == nullptr, std::invalid_argument) - << output.getName() << " is not allocated"; -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - - // Format NCHW Case - if (this->getFormat() == Tformat::NCHW) { - if (getDataType() == Tdatatype::FP32) { - if (strides[3] != 1 || m.strides[3] != 1 || output.strides[3] != 1 || - beta != 0.0) { - for (unsigned int b = 0; b < batch(); ++b) { - for (unsigned int c = 0; c < channel(); ++c) { - for (unsigned int h = 0; h < height(); ++h) { - for (unsigned int w = 0; w < width(); ++w) { - output.setValue(b, c, h, w, - getValue(b, c, h, w) + - m.getValue(b, c, h, w) * beta); - } - } - } - } - } else { - /** @todo optimize this with combining these loops where stride is 1 */ - for (unsigned int b = 0; b < batch(); ++b) { - for (unsigned int c = 0; c < channel(); ++c) { - for (unsigned int h = 0; h < height(); ++h) { - float *out_data = output.getAddress(b, c, h, 0); - const float *m_data = m.getAddress(b, c, h, 0); - const float *in_data = getAddress(b, c, h, 0); - std::transform(in_data, in_data + width(), m_data, out_data, - std::plus()); - } - } - } - } - } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - if (strides[3] != 1 || m.strides[3] != 1 || output.strides[3] != 1 || - beta != 0.0) { - for (unsigned int b = 0; b < batch(); ++b) { - for (unsigned int c = 0; c < channel(); ++c) { - for (unsigned int h = 0; h < height(); ++h) { - for (unsigned int w = 0; w < width(); ++w) { - output.setValue(b, c, h, w, - getValue<_FP16>(b, c, h, w) + - m.getValue<_FP16>(b, c, h, w) * beta); - } - } - } - } - } else { - for (unsigned int b = 0; b < batch(); ++b) { - for (unsigned int c = 0; c < channel(); ++c) { - for (unsigned int h = 0; h < height(); ++h) { - _FP16 *out_data = output.getAddress<_FP16>(b, c, h, 0); - const _FP16 *m_data = m.getAddress<_FP16>(b, c, h, 0); - const _FP16 *in_data = getAddress<_FP16>(b, c, h, 0); - std::transform(in_data, in_data + width(), m_data, out_data, - std::plus<_FP16>()); - } - } - } - } -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - } else { // Format NHWC Case - if (getDataType() == Tdatatype::FP32) { - if (strides[3] != 1 || m.strides[3] != 1 || output.strides[3] != 1 || - beta != 0.0) { - for (unsigned int b = 0; b < batch(); ++b) { - for (unsigned int h = 0; h < height(); ++h) { - for (unsigned int w = 0; w < width(); ++w) { - for (unsigned int c = 0; c < channel(); ++c) { - output.setValue(b, c, h, w, - getValue(b, c, h, w) + - m.getValue(b, c, h, w) * beta); - } - } - } - } - } else { - /** @todo optimize this with combining these loops where - * stride is 1 */ - for (unsigned int b = 0; b < batch(); ++b) { - for (unsigned int h = 0; h < height(); ++h) { - for (unsigned int w = 0; w < width(); ++w) { - float *out_data = output.getAddress(b, 0, h, w); - const float *m_data = m.getAddress(b, 0, h, w); - const float *in_data = getAddress(b, 0, h, w); - std::transform(in_data, in_data + channel(), m_data, out_data, - std::plus()); - } - } - } - } - } else if (getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - if (strides[3] != 1 || m.strides[3] != 1 || output.strides[3] != 1 || - beta != 0.0) { - for (unsigned int b = 0; b < batch(); ++b) { - for (unsigned int h = 0; h < height(); ++h) { - for (unsigned int w = 0; w < width(); ++w) { - for (unsigned int c = 0; c < channel(); ++c) { - output.setValue(b, c, h, w, - getValue<_FP16>(b, c, h, w) + - m.getValue<_FP16>(b, c, h, w) * beta); - } - } - } - } - } else { - /** @todo optimize this with combining these loops where - * stride is 1 */ - for (unsigned int b = 0; b < batch(); ++b) { - for (unsigned int h = 0; h < height(); ++h) { - for (unsigned int w = 0; w < width(); ++w) { - _FP16 *out_data = output.getAddress<_FP16>(b, 0, h, w); - const _FP16 *m_data = m.getAddress<_FP16>(b, 0, h, w); - const _FP16 *in_data = getAddress<_FP16>(b, 0, h, w); - std::transform(in_data, in_data + channel(), m_data, out_data, - std::plus<_FP16>()); - } - } - } - } -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - } +Tensor &Tensor::multiply_strided(Tensor const &m, Tensor &output, + const float beta) const { + itensor->multiply_strided(m, output, beta); return output; } int Tensor::multiply_i(float const &value) { - NNTR_THROW_IF(!contiguous, std::invalid_argument) + NNTR_THROW_IF(!getContiguous(), std::invalid_argument) << getName() << " is not contiguous, cannot multiply"; - /// @note this is not depending on multiply_i as there is an optimized - /// version for multiply_i - if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) { - float *data = getData(); - unsigned int len = size(); - - sscal(len, value, data, 1); - } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - _FP16 *data = getData<_FP16>(); - unsigned int len = size(); - sscal(len, value, data, 1); -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - return ML_ERROR_NONE; + return itensor->multiply_i(value); } Tensor Tensor::multiply(float const &value) const { - Tensor t; + Tensor t("", getFormat(), getDataType()); return multiply(value, t); } Tensor &Tensor::multiply(float const &value, Tensor &out) const { - /// @todo add unittest - if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) { - auto f = std::bind(std::multiplies(), std::placeholders::_1, value); - apply(f, out); - return out; - } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - auto f = std::bind(std::multiplies<_FP16>(), std::placeholders::_1, - static_cast<_FP16>(value)); - apply<_FP16>(f, out); - return out; -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } + itensor->multiply(value, out); return out; } @@ -798,48 +230,26 @@ int Tensor::multiply_i(Tensor const &m, const float beta) { Tensor Tensor::multiply(Tensor const &m, const float beta) const { Tensor t("", this->getFormat()); - return this->multiply(m, t, beta); + return multiply(m, t, beta); } Tensor &Tensor::multiply(Tensor const &m, Tensor &output, const float beta) const { - /** - * @note this does not work correctly with differently strided inputs. - * Use multiply_strided alternatively - */ NNTR_THROW_IF(m.getFormat() != this->getFormat(), std::invalid_argument) << "Tensor Format of " << getName() << ":" << ((bool)(this->getFormat()) ? "NHWC" : "NCHW") << " is not match. (" << ((bool)(m.getFormat()) ? "NHWC" : "NCHW") << ")"; - NNTR_THROW_IF(!contiguous || !m.contiguous || !output.contiguous, + NNTR_THROW_IF(!getContiguous() || !m.getContiguous() || + !output.getContiguous(), std::invalid_argument) << getName() << " is not contiguous, cannot multiply"; - NNTR_THROW_IF(!contiguous || !m.contiguous || !output.contiguous, + NNTR_THROW_IF(!getContiguous() || !m.getContiguous() || + !output.getContiguous(), std::invalid_argument) << getName() << " is not contiguous, cannot multiply"; - if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) { - auto f = [&](const BroadcastInfo &e, const float *buf, const float *m_buf, - float *out_buf) { - ele_mul(e.buffer_size, buf, m_buf, out_buf, 1, beta, e.strides[3], - strides[3]); - }; - apply_broadcast(m, f, output); - - } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - auto f = [&](const BroadcastInfo &e, const _FP16 *buf, const _FP16 *m_buf, - _FP16 *out_buf) { - ele_mul(e.buffer_size, buf, m_buf, out_buf, 1, beta, e.strides[3], - strides[3]); - }; - apply_broadcast(m, f, output); - -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } + itensor->multiply(m, output, beta); return output; } @@ -852,33 +262,19 @@ int Tensor::divide_i(float const &value) { } Tensor Tensor::divide(float const &value) const { - Tensor t; - return divide(value, t); + Tensor output("", getFormat(), getDataType()); + return divide(value, output); } -Tensor &Tensor::divide(float const &value, Tensor &out) const { - /// @todo add unittest, _FP16 ZeroDivisionError +Tensor &Tensor::divide(float const &value, Tensor &output) const { + /// @todo add unittest, ZeroDivisionError if (value == 0.0f) { std::stringstream ss; ss << "[Tensor] divide by value failed, value: " << value; throw std::invalid_argument(ss.str().c_str()); } - - if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) { - auto f = std::bind(std::divides(), std::placeholders::_1, value); - apply(f, out); - return out; - } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - auto f = std::bind(std::divides<_FP16>(), std::placeholders::_1, - static_cast<_FP16>(value)); - apply<_FP16>(f, out); - return out; -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - return out; + itensor->divide(value, output); + return output; } int Tensor::divide_i(Tensor const &m) { @@ -893,34 +289,45 @@ int Tensor::divide_i(Tensor const &m) { } Tensor Tensor::divide(Tensor const &m) const { - Tensor t; - return this->divide(m, t); + Tensor output("", getFormat(), getDataType()); + return this->divide(m, output); } Tensor &Tensor::divide(Tensor const &m, Tensor &output) const { - - NNTR_THROW_IF(!contiguous || !m.contiguous || !output.contiguous, + NNTR_THROW_IF(!getContiguous() || !m.getContiguous() || + !output.getContiguous(), std::invalid_argument) << getName() << " is not contiguous, cannot divide"; - if (getDataType() == ml::train::TensorDim::DataType::FP32) { - auto f = [&](const BroadcastInfo &e, const float *buf, const float *m_buf, - float *out_buf) { - ele_div(e.buffer_size, buf, m_buf, out_buf, 1, 0, e.strides[3], - strides[3]); - }; - apply_broadcast(m, f, output); - } else if (getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - auto f = [&](const BroadcastInfo &e, const _FP16 *buf, const _FP16 *m_buf, - _FP16 *out_buf) { - ele_div(e.buffer_size, buf, m_buf, out_buf, 1, 0, e.strides[3], - strides[3]); - }; - apply_broadcast(m, f, output); -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif + itensor->divide(m, output); + return output; +} + +int Tensor::add_i_strided(Tensor const &input, const float beta) { + try { + this->add_strided(input, *this, beta); + } catch (std::exception &err) { + ml_loge("%s %s", typeid(err).name(), err.what()); + return ML_ERROR_INVALID_PARAMETER; } + + return ML_ERROR_NONE; +} + +Tensor Tensor::add_strided(Tensor const &input, const float beta) const { + Tensor output("", getFormat(), getDataType()); + return this->add_strided(input, output, beta); +} + +Tensor &Tensor::add_strided(Tensor const &input, Tensor &output, + const float beta) const { + CREATE_IF_EMPTY_DIMS(output, getDim(), nullptr); + + if (size() != input.size() || size() != output.size()) + throw std::invalid_argument( + "Strided addition does not support broadcasting"); + + itensor->add_strided(input, output, beta); + return output; } @@ -930,123 +337,37 @@ int Tensor::add_i(float const &value) { } Tensor Tensor::add(float const &value) const { - Tensor t; + Tensor t("", getFormat(), getDataType()); return add(value, t); } -Tensor &Tensor::add(float const &value, Tensor &out) const { - /// @todo add unittest - if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) { - auto f = std::bind(std::plus(), std::placeholders::_1, value); - apply(f, out); - return out; - } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - auto f = std::bind(std::plus<_FP16>(), std::placeholders::_1, - static_cast<_FP16>(value)); - apply<_FP16>(f, out); - return out; -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - return out; +Tensor &Tensor::add(float const &value, Tensor &output) const { + itensor->add(value, output); + return output; } int Tensor::add_i(Tensor const &m, float const alpha) { - /// @todo: add axis rather doing add over the last two dimensions always - /// operator i has optimized version - if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) { - auto f = [&](const BroadcastInfo &e, const float *buf, const float *m_buf, - float *out_buf) { - saxpy(e.buffer_size, alpha, m_buf, e.strides[3], out_buf, strides[3]); - }; - - /// @todo: enable this after add_strided supports broadcast - // NNTR_THROW_IF(!contiguous || !m.contiguous, std::invalid_argument) - // << getName() << " is not contiguous, cannot add"; - - try { - apply_broadcast(m, f, *this); - } catch (std::exception &err) { - ml_loge("%s %s", typeid(err).name(), err.what()); - return ML_ERROR_INVALID_PARAMETER; - } - - } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - auto f = [&](const BroadcastInfo &e, const _FP16 *buf, const _FP16 *m_buf, - _FP16 *out_buf) { - saxpy(e.buffer_size, alpha, m_buf, e.strides[3], out_buf, strides[3]); - /// @todo: saxpy is not valid for _FP16 - }; - - /// @todo: enable this after add_strided supports broadcast - // NNTR_THROW_IF(!contiguous || !m.contiguous, std::invalid_argument) - // << getName() << " is not contiguous, cannot add"; - - try { - apply_broadcast(m, f, *this); - } catch (std::exception &err) { - ml_loge("%s %s", typeid(err).name(), err.what()); - return ML_ERROR_INVALID_PARAMETER; - } - -#else - ml_loge("%s", "Error: enable-fp16 is not enabled"); - return ML_ERROR_INVALID_PARAMETER; -#endif - } - return ML_ERROR_NONE; + return itensor->add_i(m, *this, alpha); } int Tensor::add_i_partial(unsigned int len, unsigned int addr_idx, Tensor &m, unsigned int incX, unsigned int incY, const Tensor alphas, unsigned int alpha_idx) { - if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) { - saxpy(len, alphas.getValue(alpha_idx), m.getData(), incX, - getAddress(addr_idx), incY); - } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - saxpy(len, alphas.getValue<_FP16>(alpha_idx), m.getData<_FP16>(), incX, - getAddress<_FP16>(addr_idx), incY); -#else - ml_loge("%s", "Error: enable-fp16 is not enabled"); - return ML_ERROR_INVALID_PARAMETER; -#endif - } - return ML_ERROR_NONE; + return itensor->add_i_partial(len, addr_idx, m, incX, incY, alphas, + alpha_idx); } Tensor Tensor::add(Tensor const &m, float const alpha) const { - Tensor t; + Tensor t("", getFormat(), getDataType()); return this->add(m, t, alpha); } Tensor &Tensor::add(Tensor const &m, Tensor &output, float const alpha) const { - NNTR_THROW_IF(!contiguous || !m.contiguous || !output.contiguous, + NNTR_THROW_IF(!itensor->getContiguous() || !m.getContiguous() || + !output.getContiguous(), std::invalid_argument) << getName() << " is not contiguous, cannot add"; - - if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) { - auto f = [&](const BroadcastInfo &e, const float *buf, const float *m_buf, - float *out_buf) { - ele_add(e.buffer_size, buf, m_buf, out_buf, alpha, 0, e.strides[3], - strides[3]); - }; - apply_broadcast(m, f, output); - } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - auto f = [&](const BroadcastInfo &e, const _FP16 *buf, const _FP16 *m_buf, - _FP16 *out_buf) { - ele_add(e.buffer_size, buf, m_buf, out_buf, alpha, 0, e.strides[3], - strides[3]); - }; - apply_broadcast(m, f, output); -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } + itensor->add(m, output, alpha); return output; } @@ -1056,27 +377,13 @@ int Tensor::subtract_i(float const &value) { } Tensor Tensor::subtract(float const &value) const { - Tensor t; - return subtract(value, t); + Tensor output("", getFormat(), getDataType()); + return subtract(value, output); } -Tensor &Tensor::subtract(float const &value, Tensor &out) const { - /// @todo add unittest - if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) { - auto f = std::bind(std::minus(), std::placeholders::_1, value); - apply(f, out); - return out; - } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - auto f = std::bind(std::minus<_FP16>(), std::placeholders::_1, - static_cast<_FP16>(value)); - apply<_FP16>(f, out); - return out; -#else - ml_loge("%s", "Error: enable-fp16 is not enabled"); -#endif - } - return out; // shouldn't reach +Tensor &Tensor::subtract(float const &value, Tensor &output) const { + itensor->subtract(value, output); + return output; } int Tensor::subtract_i(Tensor const &m) { return add_i(m, -1); } @@ -1086,2160 +393,85 @@ Tensor Tensor::subtract(Tensor const &m) const { return this->subtract(m, t); } -Tensor &Tensor::subtract(Tensor const &m, Tensor &out) const { - NNTR_THROW_IF(!contiguous || !m.contiguous || !out.contiguous, - std::invalid_argument) - << getName() << " is not contiguous, cannot add"; +Tensor &Tensor::subtract(Tensor const &m, Tensor &output) const { + return add(m, output, -1); +} - if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) { - auto f = [&](const BroadcastInfo &e, const float *buf, const float *m_buf, - float *out_buf) { - ele_sub(e.buffer_size, buf, m_buf, out_buf, 1, 0, e.strides[3], - strides[3]); - }; - apply_broadcast(m, f, out); - } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - auto f = [&](const BroadcastInfo &e, const _FP16 *buf, const _FP16 *m_buf, - _FP16 *out_buf) { - ele_sub(e.buffer_size, buf, m_buf, out_buf, 1, 0, e.strides[3], - strides[3]); - }; - apply_broadcast(m, f, out); -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - return out; +/** + * This is to sum the Tensor data according to the dim.batch(). + * Therefore the result has M(dim.batch(), 1, 1, 1) dimension. + */ +Tensor Tensor::sum_by_batch() const { + NNTR_THROW_IF(!getContiguous(), std::invalid_argument) + << getName() << " is not contiguous, cannot sum"; + + Tensor output(batch(), 1, 1, 1, this->getFormat(), getDataType()); + itensor->sum_by_batch(output); + return output; } -int Tensor::pow_i(float exponent) { - pow(exponent, *this); - return ML_ERROR_NONE; -} - -Tensor Tensor::pow(float exponent) const { - Tensor t; - return pow(exponent, t); -} - -Tensor &Tensor::pow(float exponent, Tensor &out) const { - if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) { - auto f = [exponent](float in) { return powf(in, exponent); }; - apply(f, out); - return out; - } - if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - auto f = [exponent](_FP16 in) { - return static_cast<_FP16>(powf(in, exponent)); - }; - apply<_FP16>(f, out); - return out; -#else - ml_loge("%s", "Error: enable-fp16 is not enabled"); -#endif - } - return out; -} - -Tensor Tensor::getBatchSlice(size_t offset, unsigned int size) const { - TensorDim dim_ = dim; - dim_.batch(size); - - return getSharedDataTensor(dim_, offset * this->dim.getFeatureLen()); -} - -void Tensor::createSharedDataTensor(const Tensor &src, Tensor &dest, - size_t offset) { - /** - * - If src already has data allocaed, then directly make dest tensor based on - * the src tensor. - * - If src.data does not exist (meaning tensor does not memory allocated), - * and src.src_tensor does not exist (meaning the src tensor does not depened - * on another tensor), then create a SrcSharedTensor around the src. - * - If src.src_tensor exists, then use the src.src_tensor to create the - * required SrcSharedTensor to avoid recursive dependency. - * - * @note src.data and src.src_tensor CAN co-exist. src.src_tensor is stored - * if the batch size of src is updated and needs reallocation. - */ - dest.data = nullptr; - if (src.data) { - dest.src_tensor = std::make_shared(&src, offset); - dest.allocate(); - } else if (!src.src_tensor) - dest.src_tensor = std::make_shared(&src, offset); - else - dest.src_tensor = std::make_shared( - src.src_tensor->tensor(), offset + src.src_tensor->offset()); -} - -Tensor Tensor::getSharedDataTensor(const TensorDim dim_, size_t offset, - bool reset_stride, - const std::string &name_) const { - Tensor ret = *this; - if (dim_.getFormat() != ret.dim.getFormat()) - throw std::invalid_argument("Tensor format does not match"); - - ret.dim = dim_; - if (!name_.empty()) - ret.name = name_; - - if (dim_.getDataLen() + offset > dim.getDataLen()) - throw std::invalid_argument( - "Creating shared tensor of size bigger than tensor memory."); - - if (reset_stride) - ret.strides = ret.dim.computeStrides(); - - TensorDim new_match_dim = dim_; - new_match_dim.batch(dim.batch()); - if (new_match_dim != dim && !reset_stride) - ret.contiguous = false; - - /** - * In this case, its the caller's responsibility to ensure that allocate() is - * called for the output tensor before operating on the output tensor. - */ - createSharedDataTensor(*this, ret, offset); - - return ret; -} - -std::vector Tensor::split(unsigned num_size, int axis) { - NNTR_THROW_IF(num_size == 0, std::invalid_argument) - << "num size cannot be zero"; - - if (axis == -1) { - axis = 3; - } - - NNTR_THROW_IF(!(0 <= axis && axis < 4), std::invalid_argument) - << "cannot split axis of axis: " << axis; - - NNTR_THROW_IF(dim.getTensorDim(axis) % num_size != 0, std::invalid_argument) - << "axis is not divisible by num_size, axis: " << axis - << " num size: " << num_size; - - std::vector sizes; - sizes.resize(num_size); - - unsigned int sz = dim.getTensorDim(axis) / num_size; - std::fill(sizes.begin(), sizes.end(), sz); - - return split(sizes, axis); -} - -std::vector Tensor::split(std::vector sizes, int axis) { - size_t num_size = sizes.size(); - - NNTR_THROW_IF(num_size == 0, std::invalid_argument) - << "num size cannot be zero"; - - if (axis == -1) { - axis = 3; - } - - NNTR_THROW_IF(!(0 <= axis && axis < 4), std::invalid_argument) - << "cannot split axis of axis: " << axis; - - NNTR_THROW_IF( - std::any_of(sizes.begin(), sizes.end(), [](size_t sz) { return !sz; }), - std::invalid_argument) - << "among given sizes at least one of size is 0"; - - size_t total_size = std::accumulate(sizes.begin(), sizes.end(), 0); - NNTR_THROW_IF(dim.getTensorDim(axis) != total_size, std::invalid_argument) - << "given sum of sizes did not match with origin tensor dim, tensor dim: " - << dim.getTensorDim(axis) << " total size: " << total_size; - - std::vector ret_dims; - ret_dims.reserve(num_size); - for (unsigned int i = 0; i < num_size; ++i) { - ret_dims[i] = dim; - ret_dims[i].setTensorDim(axis, sizes[i]); - } - - bool is_format_nchw = (dim.getFormat() == Tformat::NCHW) ? true : false; - std::vector ret; - - if (getDataType() == ml::train::TensorDim::DataType::FP32) { - auto iter_value = [this, is_format_nchw]( - std::array &loc, - const std::array &end_loc, - const std::array &reset_dim_arr) -> float & { - auto &value = (is_format_nchw) ? getValue(loc[0], loc[1], loc[2], loc[3]) - : getValue(loc[0], loc[3], loc[1], loc[2]); - for (int i = 3; i >= 0; --i) { - loc[i]++; - if (loc[i] == end_loc[i]) { - loc[i] -= reset_dim_arr[i]; - continue; - } - break; - } - return value; - }; - - ret.reserve(num_size); - - unsigned int accumulated_size = 0; - for (unsigned int i = 0; i < num_size; ++i) { - std::array loc = {0, 0, 0, 0}; - - if (is_format_nchw) { - loc[axis] += accumulated_size; - } else { - if (axis == 0) { - loc[0] += accumulated_size; - } else if (axis == 1) { - loc[3] += accumulated_size; - } else if (axis == 2 || axis == 3) { - loc[axis - 1] += accumulated_size; - } - } - - ret.emplace_back(ret_dims[i]); - auto &ret_t = ret.back(); - - std::array end_loc; - - if (is_format_nchw) { - end_loc = {ret_dims[i].batch(), ret_dims[i].channel(), - ret_dims[i].height(), ret_dims[i].width()}; - } else { - end_loc = {ret_dims[i].batch(), ret_dims[i].height(), - ret_dims[i].width(), ret_dims[i].channel()}; - } - - accumulated_size += sizes[i]; - - if (is_format_nchw) { - end_loc[axis] = accumulated_size; - } else { - if (axis == 0) { - end_loc[0] = accumulated_size; - } else if (axis == 1) { - end_loc[3] = accumulated_size; - } else if (axis == 2 || axis == 3) { - end_loc[axis - 1] = accumulated_size; - } - } - - std::array reset_dim_arr; - if (is_format_nchw) { - reset_dim_arr = {ret_dims[i].batch(), ret_dims[i].channel(), - ret_dims[i].height(), ret_dims[i].width()}; - } else { - reset_dim_arr = {ret_dims[i].batch(), ret_dims[i].height(), - ret_dims[i].width(), ret_dims[i].channel()}; - } - - ret_t.apply_i( - [&iter_value, &loc, &end_loc, &reset_dim_arr](float _) { - return iter_value(loc, end_loc, reset_dim_arr); - }); - } - } - if (getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - auto iter_value = [this, is_format_nchw]( - std::array &loc, - const std::array &end_loc, - const std::array &reset_dim_arr) -> _FP16 & { - auto &value = (is_format_nchw) - ? getValue<_FP16>(loc[0], loc[1], loc[2], loc[3]) - : getValue<_FP16>(loc[0], loc[3], loc[1], loc[2]); - for (int i = 3; i >= 0; --i) { - loc[i]++; - if (loc[i] == end_loc[i]) { - loc[i] -= reset_dim_arr[i]; - continue; - } - break; - } - return value; - }; - - ret.reserve(num_size); - - unsigned int accumulated_size = 0; - for (unsigned int i = 0; i < num_size; ++i) { - std::array loc = {0, 0, 0, 0}; - - if (is_format_nchw) { - loc[axis] += accumulated_size; - } else { - if (axis == 0) { - loc[0] += accumulated_size; - } else if (axis == 1) { - loc[3] += accumulated_size; - } else if (axis == 2 || axis == 3) { - loc[axis - 1] += accumulated_size; - } - } - - ret.emplace_back(ret_dims[i]); - auto &ret_t = ret.back(); - - std::array end_loc; - - if (is_format_nchw) { - end_loc = {ret_dims[i].batch(), ret_dims[i].channel(), - ret_dims[i].height(), ret_dims[i].width()}; - } else { - end_loc = {ret_dims[i].batch(), ret_dims[i].height(), - ret_dims[i].width(), ret_dims[i].channel()}; - } - - accumulated_size += sizes[i]; - - if (is_format_nchw) { - end_loc[axis] = accumulated_size; - } else { - if (axis == 0) { - end_loc[0] = accumulated_size; - } else if (axis == 1) { - end_loc[3] = accumulated_size; - } else if (axis == 2 || axis == 3) { - end_loc[axis - 1] = accumulated_size; - } - } - - std::array reset_dim_arr; - if (is_format_nchw) { - reset_dim_arr = {ret_dims[i].batch(), ret_dims[i].channel(), - ret_dims[i].height(), ret_dims[i].width()}; - } else { - reset_dim_arr = {ret_dims[i].batch(), ret_dims[i].height(), - ret_dims[i].width(), ret_dims[i].channel()}; - } - - ret_t.apply_i<_FP16>( - [&iter_value, &loc, &end_loc, &reset_dim_arr](_FP16 _) { - return iter_value(loc, end_loc, reset_dim_arr); - }); - } - -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - - return ret; -} - -Tensor Tensor::cat(const std::vector &tensors, int axis) { - - if (axis == -1) { - axis = 3; - } - - NNTR_THROW_IF(!(0 <= axis && axis < 4), std::invalid_argument) - << "cannot split axis of axis: " << axis; - - NNTR_THROW_IF(tensors.empty(), std::invalid_argument) - << "given tensor vector is empty"; - - Tensor ret; - auto ref_dim = tensors.front().getDim(); - bool is_format_nchw = (ref_dim.getFormat() == Tformat::NCHW); - ref_dim.setTensorDim(axis, 1); - NNTR_THROW_IF(!std::all_of(tensors.begin(), tensors.end(), - [&ref_dim, axis](const Tensor &t) { - auto cur_dim = t.getDim(); - cur_dim.setTensorDim(axis, 1); - return ref_dim == cur_dim; - }), - std::invalid_argument) - << " all tensor must have the same dimension except for the axis, ref_dim: " - << ref_dim << " axis : " << axis; - - auto axis_dim = std::accumulate(tensors.begin(), tensors.end(), 0u, - [axis](unsigned cur, const Tensor &t) { - return cur += t.getDim().getTensorDim(axis); - }); - if (ref_dim.getDataType() == ml::train::TensorDim::DataType::FP32) { - auto iter_value = - [is_format_nchw](std::array &loc, - const std::array &start_loc, Tensor &t, - const std::array &ref_dim_arr) -> float & { - auto &value = is_format_nchw - ? t.getValue(loc[0], loc[1], loc[2], loc[3]) - : t.getValue(loc[0], loc[3], loc[1], loc[2]); - - for (int i = 3; i >= 0; --i) { - loc[i]++; - if (loc[i] - start_loc[i] == ref_dim_arr[i]) { - loc[i] = start_loc[i]; - continue; - } - break; - } - return value; - }; - - auto ret_dim = ref_dim; - ret_dim.setTensorDim(axis, axis_dim); - - ret = Tensor(ret_dim); - - std::array loc = {0, 0, 0, 0}; - for (auto &t : tensors) { - std::array start_loc = loc; - std::array tensor_dim_arr; - if (is_format_nchw) { - tensor_dim_arr[0] = t.getDim().getTensorDim(0); - tensor_dim_arr[1] = t.getDim().getTensorDim(1); - tensor_dim_arr[2] = t.getDim().getTensorDim(2); - tensor_dim_arr[3] = t.getDim().getTensorDim(3); - } else { - tensor_dim_arr[0] = t.getDim().getTensorDim(0); - tensor_dim_arr[1] = t.getDim().getTensorDim(2); - tensor_dim_arr[2] = t.getDim().getTensorDim(3); - tensor_dim_arr[3] = t.getDim().getTensorDim(1); - } - - for (size_t i = 0u, sz = t.size(); i < sz; ++i) { - iter_value(loc, start_loc, ret, tensor_dim_arr) = t.getValue(i); - } - - if (is_format_nchw) { - loc[axis] += t.getDim().getTensorDim(axis); - } else { - if (axis == 0) { - loc[0] += t.getDim().getTensorDim(axis); - } else if (axis == 1) { - loc[3] += t.getDim().getTensorDim(axis); - } else if (axis == 2 || axis == 3) { - loc[axis - 1] += t.getDim().getTensorDim(axis); - } - } - } - - // return ret; - } else if (ref_dim.getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - auto iter_value = - [is_format_nchw](std::array &loc, - const std::array &start_loc, Tensor &t, - const std::array &ref_dim_arr) -> _FP16 & { - auto &value = is_format_nchw - ? t.getValue<_FP16>(loc[0], loc[1], loc[2], loc[3]) - : t.getValue<_FP16>(loc[0], loc[3], loc[1], loc[2]); - - for (int i = 3; i >= 0; --i) { - loc[i]++; - if (loc[i] - start_loc[i] == ref_dim_arr[i]) { - loc[i] = start_loc[i]; - continue; - } - break; - } - return value; - }; - - auto ret_dim = ref_dim; - ret_dim.setTensorDim(axis, axis_dim); - - ret = Tensor(ret_dim); - - std::array loc = {0, 0, 0, 0}; - for (auto &t : tensors) { - std::array start_loc = loc; - std::array tensor_dim_arr; - if (is_format_nchw) { - tensor_dim_arr[0] = t.getDim().getTensorDim(0); - tensor_dim_arr[1] = t.getDim().getTensorDim(1); - tensor_dim_arr[2] = t.getDim().getTensorDim(2); - tensor_dim_arr[3] = t.getDim().getTensorDim(3); - } else { - tensor_dim_arr[0] = t.getDim().getTensorDim(0); - tensor_dim_arr[1] = t.getDim().getTensorDim(2); - tensor_dim_arr[2] = t.getDim().getTensorDim(3); - tensor_dim_arr[3] = t.getDim().getTensorDim(1); - } - - for (size_t i = 0u, sz = t.size(); i < sz; ++i) { - iter_value(loc, start_loc, ret, tensor_dim_arr) = t.getValue<_FP16>(i); - } - - if (is_format_nchw) { - loc[axis] += t.getDim().getTensorDim(axis); - } else { - if (axis == 0) { - loc[0] += t.getDim().getTensorDim(axis); - } else if (axis == 1) { - loc[3] += t.getDim().getTensorDim(axis); - } else if (axis == 2 || axis == 3) { - loc[axis - 1] += t.getDim().getTensorDim(axis); - } - } - } - -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - return ret; -} - -void Tensor::makeSharedDataTensor(const Tensor &src, size_t offset) { - if (strides != src.strides) - throw std::invalid_argument( - "Creating shared tensor of different stride than source tensor."); - - if (getDim().getDataLen() + offset > src.getDim().getDataLen()) - throw std::invalid_argument( - "Creating shared tensor of different size or stride than source tensor."); - - /** - * In this case, its the caller's responsibility to ensure that allocate() is - * called for the output tensor before operating on the output tensor. - */ - createSharedDataTensor(src, *this, offset); -} - -void Tensor::apply_broadcast( - Tensor const &m, - std::function - v_func, - Tensor &output) const { - CREATE_IF_EMPTY_DIMS(output, dim); - - NNTR_THROW_IF(getData() == nullptr, std::invalid_argument) - << getName() << " is not allocated"; - NNTR_THROW_IF(m.getData() == nullptr, std::invalid_argument) - << m.getName() << " is not allocated"; - NNTR_THROW_IF(output.getData() == nullptr, std::invalid_argument) - << output.getName() << " is not allocated"; - - /// shortcut to cover when dimension matches - /// note that buffer_size, the last stride is only used in v_func but it - /// might be changed - if (dim == m.dim) { - BroadcastInfo e; - e.buffer_size = size(); - e.strides[3] = 1; - e.tensor_type = getTensorType(); - v_func(e, getData(), m.getData(), output.getData()); - return; - } - - return apply_broadcast_util(m, v_func, output, this->computeBroadcastInfo(m)); -} - -#ifdef ENABLE_FP16 -void Tensor::apply_broadcast( - Tensor const &m, - std::function - v_func, - Tensor &output) const { - CREATE_IF_EMPTY_DIMS(output, dim, nullptr); - - NNTR_THROW_IF(getData<_FP16>() == nullptr, std::invalid_argument) - << getName() << " is not allocated"; - NNTR_THROW_IF(m.getData<_FP16>() == nullptr, std::invalid_argument) - << m.getName() << " is not allocated"; - NNTR_THROW_IF(output.getData<_FP16>() == nullptr, std::invalid_argument) - << output.getName() << " is not allocated"; - - /// shortcut to cover when dimension matches - /// note that buffer_size, the last stride is only used in v_func but it - /// might be changed - if (dim == m.dim) { - BroadcastInfo e; - e.buffer_size = size(); - e.strides[3] = 1; - v_func(e, getData<_FP16>(), m.getData<_FP16>(), output.getData<_FP16>()); - return; - } - - return apply_broadcast_util(m, v_func, output, this->computeBroadcastInfo(m)); -} - -void Tensor::apply_broadcast_util( - Tensor const &m, - std::function - v_func, - Tensor &output, const BroadcastInfo &e, int cur_axis, size_t offset, - size_t m_offset) const { - - const _FP16 *buf = this->getData<_FP16>(); - const _FP16 *m_buf = m.getData<_FP16>(); - _FP16 *out_buf = output.getData<_FP16>(); - - if (e.buffer_axis == cur_axis) { - v_func(e, buf + offset, m_buf + m_offset, out_buf + offset); - return; - } - - cur_axis++; - for (unsigned int i = 0; i < dim.getTensorDim(cur_axis); ++i) { - size_t next_offset = offset + i * strides[cur_axis]; - size_t next_m_offset = m_offset + i * e.strides[cur_axis]; - apply_broadcast_util(m, v_func, output, e, cur_axis, next_offset, - next_m_offset); - } -} - -#endif - -void Tensor::apply_broadcast_util( - Tensor const &m, - std::function - v_func, - Tensor &output, const BroadcastInfo &e, int cur_axis, size_t offset, - size_t m_offset) const { - - const float *buf = this->getData(); - const float *m_buf = m.getData(); - float *out_buf = output.getData(); - - if (e.buffer_axis == cur_axis) { - v_func(e, buf + offset, m_buf + m_offset, out_buf + offset); - return; - } - - cur_axis++; - uint continuity[4] = {0, 1, 2, 3}; - if (getFormat() == Tformat::NHWC) { - continuity[1] = 2; - continuity[2] = 3; - continuity[3] = 1; - } - for (unsigned int i = 0; i < dim.getTensorDim(continuity[cur_axis]); ++i) { - size_t next_offset = offset + i * strides[cur_axis]; - size_t next_m_offset = m_offset + i * e.strides[cur_axis]; - apply_broadcast_util(m, v_func, output, e, cur_axis, next_offset, - next_m_offset); - } -} - -/** - * This is to sum the Tensor data according to the dim.batch(). - * Therefore the result has M(dim.batch(), 1, 1, 1) dimension. - */ -Tensor Tensor::sum_by_batch() const { - NNTR_THROW_IF(!contiguous, std::invalid_argument) - << getName() << " is not contiguous, cannot sum"; - - Tensor ret(dim.batch(), 1, 1, 1, this->getFormat(), getDataType()); - size_t feat_len = dim.getFeatureLen(); - size_t batch = dim.batch(); - - if (getDataType() == ml::train::TensorDim::DataType::FP32) { - const float *data = getData(); - float *rdata = ret.getData(); - - Tensor ones(1, 1, 1, feat_len, this->getFormat()); - ones.setValue(1.0); - sgemv(CblasRowMajor, CblasNoTrans, batch, feat_len, 1, data, feat_len, - ones.getData(), 1, 0.0, rdata, 1); - } else if (getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - const _FP16 *data = getData<_FP16>(); - _FP16 *rdata = ret.getData<_FP16>(); - - Tensor ones(1, 1, 1, feat_len, this->getTensorType()); - ones.setValue((_FP16)1.0); - sgemv(CblasRowMajor, CblasNoTrans, batch, feat_len, 1, data, feat_len, - ones.getData<_FP16>(), 1, 0.0, rdata, 1); -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - - return ret; -} - -/** - * @brief Calculate sum according to the axis. - */ -Tensor Tensor::sum(unsigned int axis, float alpha) const { - Tensor ret("", this->getFormat(), this->getDataType()); - return sum(axis, ret, alpha, 0); -} - -Tensor &Tensor::sum(unsigned int axis, Tensor &ret, float alpha, - float beta) const { - - if (getDataType() == ml::train::TensorDim::DataType::FP32) { - const float *data = getData(); - - NNTR_THROW_IF(!contiguous, std::invalid_argument) - << getName() << " is not contiguous, cannot sum"; - - if (axis >= 4) - throw std::out_of_range("Error: axis is invalid"); - - if (dim.getDim()[axis] == 1 and alpha == 1.0 and !beta) { - CREATE_IF_EMPTY_DIMS(ret, dim); - ret.copy(this->getData()); - return ret; - } - - switch (axis) { - case 0: { - CREATE_IF_EMPTY_DIMS(ret, 1, dim.channel(), dim.height(), dim.width(), - this->getTensorType()); - size_t feat_len = dim.getFeatureLen(); - size_t batch = dim.batch(); - Tensor ones(1, 1, 1, batch, this->getFormat()); - ones.setValue(alpha); - sgemv(CblasRowMajor, CblasTrans, batch, feat_len, 1, data, feat_len, - ones.getData(), 1, beta, ret.getData(), 1); - } break; - case 1: { - CREATE_IF_EMPTY_DIMS(ret, dim[0], 1, dim[2], dim[3], getTensorType()); - if (this->getFormat() == Tformat::NHWC) { - unsigned int m = ret.dim.getDataLen(); - unsigned int n = dim[1]; - Tensor ones(1, 1, 1, n, this->getTensorType()); - ones.setValue(alpha); - sgemv(CblasRowMajor, CblasNoTrans, m, n, 1, data, n, - ones.getData(), 1, beta, ret.getData(), 1); - } else { - unsigned int feat_len = dim[2] * dim[3]; - unsigned int t_axis = dim[1]; - Tensor ones(1, 1, 1, t_axis, getTensorType()); - ones.setValue(alpha); - float *rdata = ret.getData(); - for (unsigned int k = 0; k < dim[0]; ++k) { - sgemv(CblasRowMajor, CblasTrans, t_axis, feat_len, 1, - &data[k * dim.getFeatureLen()], feat_len, ones.getData(), - 1, beta, &rdata[k * feat_len], 1); - } - } - } break; - case 2: { - CREATE_IF_EMPTY_DIMS(ret, dim[0], dim[1], 1, dim[3], getTensorType()); - - if (this->getFormat() == Tformat::NHWC) { - unsigned int feat_len = dim[1] * dim[3]; - unsigned int t_axis = dim[2]; - Tensor ones(1, 1, 1, t_axis, this->getTensorType()); - ones.setValue(alpha); - float *rdata = ret.getData(); - for (unsigned int k = 0; k < dim[0]; ++k) { - sgemv(CblasRowMajor, CblasTrans, t_axis, feat_len, 1, - &data[k * dim.getFeatureLen()], feat_len, ones.getData(), - 1, beta, &rdata[k * feat_len], 1); - } - } else { - unsigned int t_3 = dim[3]; - unsigned int t_axis = dim[2]; - Tensor ones(1, 1, 1, t_axis, this->getTensorType()); - ones.setValue(alpha); - - if (dim.getStorageOrder() == TStorageOrder::ROW_MAJOR) { - float *rdata = ret.getData(); - for (unsigned int k = 0; k < dim[0]; ++k) { - for (unsigned int c = 0; c < dim[1]; ++c) { - unsigned int idx = k * dim.getFeatureLen() + c * dim[3] * dim[2]; - unsigned int ridx = k * ret.dim.getFeatureLen() + c * dim[3]; - - sgemv(CblasRowMajor, CblasTrans, t_axis, t_3, 1, &data[idx], t_3, - ones.getData(), 1, beta, &rdata[ridx], 1); - } - } - } else { - sgemv(CblasColMajor, CblasTrans, t_axis, ret.dim.getDataLen(), 1, - data, t_axis, ones.getData(), 1, beta, - ret.getData(), 1); - } - } - } break; - case 3: { - CREATE_IF_EMPTY_DIMS(ret, dim[0], dim[1], dim[2], 1, - this->getTensorType()); - if (this->getFormat() == Tformat::NHWC) { - unsigned int t_3 = dim[1]; - unsigned int t_axis = dim[3]; - Tensor ones(1, 1, 1, t_axis, this->getTensorType()); - ones.setValue(alpha); - float *rdata = ret.getData(); - for (unsigned int k = 0; k < dim[0]; ++k) { - for (unsigned int c = 0; c < dim[2]; ++c) { - unsigned int idx = k * dim.getFeatureLen() + c * dim[3] * dim[1]; - unsigned int ridx = k * ret.dim.getFeatureLen() + c * dim[1]; - sgemv(CblasRowMajor, CblasTrans, t_axis, t_3, 1, &data[idx], t_3, - ones.getData(), 1, beta, &rdata[ridx], 1); - } - } - } else { - unsigned int m = ret.dim.getDataLen(); - unsigned int n = dim[3]; - Tensor ones(1, 1, 1, n); - ones.setValue(alpha); - - if (dim.getStorageOrder() == TStorageOrder::ROW_MAJOR) { - sgemv(CblasRowMajor, CblasNoTrans, m, n, 1, data, n, - ones.getData(), 1, beta, ret.getData(), 1); - } else { - float *rdata = ret.getData(); - - for (unsigned int k = 0; k < dim[0]; ++k) { - for (unsigned int c = 0; c < dim[1]; ++c) { - unsigned int idx = k * dim.getFeatureLen() + c * dim[3] * dim[2]; - unsigned int ridx = k * dim[1] * dim[2] + c * dim[2]; - - sgemv(CblasColMajor, CblasNoTrans, dim[2], n, 1, &data[idx], - dim[2], ones.getData(), 1, beta, &rdata[ridx], 1); - } - } - } - } - } break; - default: - throw std::out_of_range("Error: Dimension cannot exceed 3"); - } - } else if (getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - const _FP16 *data = getData<_FP16>(); - - NNTR_THROW_IF(!contiguous, std::invalid_argument) - << getName() << " is not contiguous, cannot sum"; - - if (axis >= 4) - throw std::out_of_range("Error: axis is invalid"); - - if (dim.getDim()[axis] == 1 and alpha == 1.0 and !beta) { - CREATE_IF_EMPTY_DIMS(ret, dim); - ret.copy(this->getData<_FP16>()); - return ret; - } - - switch (axis) { - case 0: { - CREATE_IF_EMPTY_DIMS(ret, 1, dim.channel(), dim.height(), dim.width(), - this->getTensorType()); - size_t feat_len = dim.getFeatureLen(); - size_t batch = dim.batch(); - Tensor ones(1, 1, 1, batch, this->getTensorType()); - ones.setValue(alpha); - sgemv(CblasRowMajor, CblasTrans, batch, feat_len, 1, data, feat_len, - ones.getData<_FP16>(), 1, beta, ret.getData<_FP16>(), 1); - } break; - case 1: { - CREATE_IF_EMPTY_DIMS(ret, dim[0], 1, dim[2], dim[3], getTensorType()); - if (this->getFormat() == Tformat::NHWC) { - unsigned int m = ret.dim.getDataLen(); - unsigned int n = dim[1]; - Tensor ones(1, 1, 1, n, this->getTensorType()); - ones.setValue(alpha); - sgemv(CblasRowMajor, CblasNoTrans, m, n, 1, data, n, - ones.getData<_FP16>(), 1, beta, ret.getData<_FP16>(), 1); - } else { - unsigned int feat_len = dim[2] * dim[3]; - unsigned int t_axis = dim[1]; - Tensor ones(1, 1, 1, t_axis, getTensorType()); - ones.setValue(alpha); - _FP16 *rdata = ret.getData<_FP16>(); - for (unsigned int k = 0; k < dim[0]; ++k) { - sgemv(CblasRowMajor, CblasTrans, t_axis, feat_len, 1, - &data[k * dim.getFeatureLen()], feat_len, ones.getData<_FP16>(), - 1, beta, &rdata[k * feat_len], 1); - } - } - } break; - case 2: { - CREATE_IF_EMPTY_DIMS(ret, dim[0], dim[1], 1, dim[3], getTensorType()); - - if (this->getFormat() == Tformat::NHWC) { - unsigned int feat_len = dim[1] * dim[3]; - unsigned int t_axis = dim[2]; - Tensor ones(1, 1, 1, t_axis, getTensorType()); - ones.setValue(alpha); - _FP16 *rdata = ret.getData<_FP16>(); - for (unsigned int k = 0; k < dim[0]; ++k) { - sgemv(CblasRowMajor, CblasTrans, t_axis, feat_len, 1, - &data[k * dim.getFeatureLen()], feat_len, ones.getData<_FP16>(), - 1, beta, &rdata[k * feat_len], 1); - } - } else { - unsigned int t_3 = dim[3]; - unsigned int t_axis = dim[2]; - Tensor ones(1, 1, 1, t_axis, getTensorType()); - ones.setValue(alpha); - _FP16 *rdata = ret.getData<_FP16>(); - for (unsigned int k = 0; k < dim[0]; ++k) { - for (unsigned int c = 0; c < dim[1]; ++c) { - unsigned int idx = k * dim.getFeatureLen() + c * dim[3] * dim[2]; - unsigned int ridx = k * ret.dim.getFeatureLen() + c * dim[3]; - sgemv(CblasRowMajor, CblasTrans, t_axis, t_3, 1, &data[idx], t_3, - ones.getData<_FP16>(), 1, beta, &rdata[ridx], 1); - } - } - } - } break; - case 3: { - CREATE_IF_EMPTY_DIMS(ret, dim[0], dim[1], dim[2], 1, getTensorType()); - if (this->getFormat() == Tformat::NHWC) { - unsigned int t_3 = dim[1]; - unsigned int t_axis = dim[3]; - Tensor ones(1, 1, 1, t_axis, getTensorType()); - ones.setValue(alpha); - _FP16 *rdata = ret.getData<_FP16>(); - for (unsigned int k = 0; k < dim[0]; ++k) { - for (unsigned int c = 0; c < dim[2]; ++c) { - unsigned int idx = k * dim.getFeatureLen() + c * dim[3] * dim[1]; - unsigned int ridx = k * ret.dim.getFeatureLen() + c * dim[1]; - sgemv(CblasRowMajor, CblasTrans, t_axis, t_3, 1, &data[idx], t_3, - ones.getData<_FP16>(), 1, beta, &rdata[ridx], 1); - } - } - } else { - unsigned int m = ret.dim.getDataLen(); - unsigned int n = dim[3]; - Tensor ones(1, 1, 1, n, getTensorType()); - ones.setValue(alpha); - sgemv(CblasRowMajor, CblasNoTrans, m, n, 1, data, n, - ones.getData<_FP16>(), 1, beta, ret.getData<_FP16>(), 1); - } - } break; - default: - throw std::out_of_range("Error: Dimension cannot exceed 3"); - } -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - return ret; -} - -Tensor Tensor::sum(const std::vector &axes, float alpha) const { - Tensor ret("", this->getFormat()); - return sum(axes, ret, alpha); -} - -void Tensor::mergeAxis(unsigned int axis1, unsigned int axis2) { - std::vector continuous_order = {0, 3, 1, 2}; - NNTR_THROW_IF(!contiguous, std::invalid_argument) - << getName() << " is not contiguous, cannot merge axis"; - - if (axis2 != axis1 + 1) - if (!checkContinuous(axis1, axis2)) - throw std::invalid_argument("axis2 must be axis1 + 1 for merging."); - - dim.setTensorDim(axis2, dim.getTensorDim(axis1) * dim.getTensorDim(axis2)); - dim.setTensorDim(axis1, 1); -} - -Tensor &Tensor::sum(const std::vector &axes, Tensor &output, - float alpha) const { - if (axes.empty()) - throw std::invalid_argument("empty axes given"); - - if (axes.size() == 1) { - this->sum(axes[0], output, alpha); - } else { - /** club axes together */ - Tensor new_reshaped = *this; - std::vector continuous_order = {0, 3, 1, 2}; - std::vector new_axes = {axes[0]}; - - for (unsigned int i = 1; i < axes.size(); ++i) { - if (checkContinuous(axes[i - 1], axes[i])) { - new_reshaped.mergeAxis(axes[i - 1], axes[i]); - new_axes.back() = axes[i]; - } else { - new_axes.push_back(axes[i]); - } - } - - Tensor ret = new_reshaped.sum(new_axes[0]); - for (unsigned int i = 1; i < new_axes.size() - 1; ++i) - ret = ret.sum(axes[i]); - ret.sum(new_axes.back(), output, alpha); - } - - return output; -} - -Tensor &Tensor::dotBatched(Tensor const &m, Tensor &result, bool trans, - bool trans_m, float beta) const { - if (!result.isAllocated()) - throw std::invalid_argument( - "Output tensor must be preallocated for dotBatched operation"); - for (unsigned int b = 0; b < batch(); b++) { - /** @todo try using transpose to speedup the operation */ - const Tensor this_b = this->getBatchSlice(b, 1); - Tensor m_b = m.getBatchSlice(b, 1); - Tensor result_b = result.getBatchSlice(b, 1); - - this_b.dot(m_b, result_b, trans, trans_m, beta); - } - - return result; -} - -Tensor Tensor::dot(Tensor const &m, bool trans, bool trans_m) const { - Tensor output("", this->getFormat(), this->getDataType()); - dot(m, output, trans, trans_m); - - return output; -} -/** - * @brief compute the derivative of this in the current tensor - * @todo will have to see if beta effects this computation - */ -Tensor &Tensor::dot_deriv_wrt_1(Tensor const &m, Tensor const &output_deriv, - bool trans, bool trans_m, float beta) { - bool deriv_trans_m = true; - bool deriv_trans = false; - /** @todo handle all cases of trans and trans_m */ - if (!trans && trans_m) { - deriv_trans_m = false; - } - - return output_deriv.dot(m, *this, deriv_trans, deriv_trans_m, beta); -} - -/** - * @brief compute the derivative wrt m in the m tensor - * @note The caller tensor must be the same tensor as the one which called the - * dot() product. - */ -Tensor &Tensor::dot_deriv_wrt_2(Tensor &m_deriv, Tensor const &output_deriv, - bool trans, bool trans_m, float beta) const { - bool deriv_trans_m = false; - bool deriv_trans = true; - /** @todo handle all cases of trans and trans_m */ - - if (!trans && trans_m) { - output_deriv.dot(*this, m_deriv, deriv_trans, deriv_trans_m, beta); - return m_deriv; - } else { - return dot(output_deriv, m_deriv, deriv_trans, deriv_trans_m, beta); - } -} - -Tensor &Tensor::dot_batched_deriv_wrt_1(Tensor const &m, - Tensor const &output_deriv, bool trans, - bool trans_m, float beta) { - bool deriv_trans_m = true; - bool deriv_trans = false; - /** @todo handle all cases of trans and trans_m */ - if (!trans && trans_m) { - deriv_trans_m = false; - } - - return output_deriv.dotBatched(m, *this, deriv_trans, deriv_trans_m, beta); -} - -Tensor &Tensor::dot_batched_deriv_wrt_2(Tensor &m_deriv, - Tensor const &output_deriv, bool trans, - bool trans_m, float beta) const { - bool deriv_trans_m = false; - bool deriv_trans = true; - /** @todo handle all cases of trans and trans_m */ - - if (!trans && trans_m) { - output_deriv.dotBatched(*this, m_deriv, deriv_trans, deriv_trans_m, beta); - return m_deriv; - } else { - return dotBatched(output_deriv, m_deriv, deriv_trans, deriv_trans_m, beta); - } -} - -/** - * @note: This dot product flattens the fist 3 axis for the purpose of - * computation. So, while performing, these matrices are behaving as 2-D - * matrices. The dimensions are restored while returning back the tensor - * in case of trans is false. - */ -Tensor &Tensor::dot(Tensor const &m, Tensor &result, bool trans, bool trans_m, - float beta) const { - NNTR_THROW_IF(!contiguous, std::invalid_argument) - << getName() << " is not contiguous. Cannot dot product."; - - // Comment out with intension to support the calculation wrt. batch and height - // direction. It supposes to have this->dim as [ BxCxH,W ] and m.dim is - // [BxCxH,W] as well if (m.dim.rank() > 2) { - // throw exception::not_supported("Error: support only for rank of dot " - // "matrix <= 2"); - // } - - // Comment out with intension to support the calculation wrt. batch and height - // direction of this tensor. It is OK as long as m is 2D - // - if (trans && dim.rank() > 2) { - ml_logw("Warning: support only for rank of dot matrix <= 2 with trans"); - } - unsigned int dim1, dim2, mdim1, mdim2; - if (getFormat() == Tformat::NHWC) { - dim1 = batch() * height() * width(); - dim2 = channel(); - mdim1 = m.batch() * m.height() * m.width(); - mdim2 = m.channel(); - } else { - dim1 = batch() * channel() * height(); - dim2 = width(); - mdim1 = m.batch() * m.channel() * m.height(); - mdim2 = m.width(); - } - - unsigned int M, N, K, lda, ldb, ldc; - - if (!trans && !trans_m) { - if (dim2 != mdim1) - throw std::runtime_error( - "Error: incompatible dimensions for dot product"); - K = mdim1; /** == dim2 */ - N = mdim2; - M = dim1; - if (getFormat() == Tformat::NHWC) { - CREATE_IF_EMPTY_DIMS(result, batch(), N, height(), width(), - getTensorType()); // NHWC Result Tensor - } else { - CREATE_IF_EMPTY_DIMS(result, batch(), channel(), height(), N, - getTensorType()); - } - - // We are not set zero the result because of performance reason. - // However, result is not initialized properly. There might include - // garbage like nan. When we have to use this value as in C = alpha*A*B + - // beta*C, then have to check garbage data of C is not effect or not. - - } else if (!trans && trans_m) { - if (dim2 != mdim2) - throw std::runtime_error( - "Error: incompatible dimensions for dot product"); - K = mdim2; /** == dim2 */ - N = mdim1; - M = dim1; - if (getFormat() == Tformat::NHWC) { - CREATE_IF_EMPTY_DIMS(result, batch(), N, height(), width(), - getTensorType()); - } else { - CREATE_IF_EMPTY_DIMS(result, batch(), channel(), height(), N, - getTensorType()); - } - } else if (trans && !trans_m) { - if (dim1 != mdim1) - throw std::runtime_error( - "Error: incompatible dimensions for dot product"); - K = mdim1; /** == dim1 */ - N = mdim2; - M = dim2; - if (getFormat() == Tformat::NHWC) { - CREATE_IF_EMPTY_DIMS(result, 1, N, M, 1, getTensorType()); - } else { - CREATE_IF_EMPTY_DIMS(result, 1, 1, M, N, getTensorType()); - } - } else { - if (dim1 != mdim2) - throw std::runtime_error( - "Error: incompatible dimensions for dot product"); - K = mdim2; /** == dim1 */ - N = mdim1; - M = dim2; - if (getFormat() == Tformat::NHWC) { - CREATE_IF_EMPTY_DIMS(result, 1, N, M, 1, getTensorType()); - } else { - CREATE_IF_EMPTY_DIMS(result, 1, 1, M, N, getTensorType()); - } - } - lda = dim2; - ldb = mdim2; - ldc = (getFormat() == Tformat::NHWC) ? result.channel() : result.width(); - - if (getDataType() == ml::train::TensorDim::DataType::FP32) { - const float *data = getData(); - const float *mdata = m.getData(); - float *rdata = result.getData(); - const float alpha = 1.0f; - enum CBLAS_TRANSPOSE transA = trans ? CblasTrans : CblasNoTrans; - enum CBLAS_TRANSPOSE transB = trans_m ? CblasTrans : CblasNoTrans; - - /// shortcut handling in case of vector - /// for vector, (1 * K) == (K * 1) in current memory layout... - /// and plaese note that N, K, M is a fixed place holder after considering - /// transpose. - /// For example, there is no case like (1 * K) X (1 * K) while - /// (1 * K) X (1 * M) can be a case - /// case1: (1 * K) X (K * 1) - if (M == 1 && N == 1) { - *rdata = sdot(K, data, 1, mdata, 1) + beta * (*rdata); - } - /// case2: (M * K) X (K * 1) - else if (N == 1) { - sgemv(CblasRowMajor, transA, dim1, dim2, alpha, data, lda, mdata, 1, beta, - rdata, 1); - } - /// case3: (1 * K) X (K * N) = 1 * N = R - /// = R^T = (K * N) ^T * (1 * K) ^T = (N * K) * (K * 1) = (N * K) * (1 * K) - /// Effectively a translation of sgemv - else if (M == 1) { - transB = transB == CblasTrans ? CblasNoTrans : CblasTrans; - sgemv(CblasRowMajor, transB, mdim1, mdim2, alpha, mdata, ldb, data, 1, - beta, rdata, 1); - } - /// case others: use gemm - else { - sgemm(CblasRowMajor, transA, transB, M, N, K, alpha, data, lda, mdata, - ldb, beta, rdata, ldc); - } - } else if (getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - const _FP16 *data = getData<_FP16>(); - const _FP16 *mdata = m.getData<_FP16>(); - _FP16 *rdata = result.getData<_FP16>(); - const float alpha = 1.0f; - enum CBLAS_TRANSPOSE transA = trans ? CblasTrans : CblasNoTrans; - enum CBLAS_TRANSPOSE transB = trans_m ? CblasTrans : CblasNoTrans; - - /// shortcut handling in case of vector - /// for vector, (1 * K) == (K * 1) in current memory layout... - /// and plaese note that N, K, M is a fixed place holder after considering - /// transpose. - /// For example, there is no case like (1 * K) X (1 * K) while - /// (1 * K) X (1 * M) can be a case - /// case1: (1 * K) X (K * 1) - if (M == 1 && N == 1) { - *rdata = sdot(K, data, 1, mdata, 1) + static_cast<_FP16>(beta) * (*rdata); - } - /// case2: (M * K) X (K * 1) - else if (N == 1) { - sgemv(CblasRowMajor, transA, dim1, dim2, alpha, data, lda, mdata, 1, beta, - rdata, 1); - } - /// case3: (1 * K) X (K * N) = 1 * N = R - /// = R^T = (K * N) ^T * (1 * K) ^T = (N * K) * (K * 1) = (N * K) * (1 * K) - /// Effectively a translation of sgemv - else if (M == 1) { - transB = transB == CblasTrans ? CblasNoTrans : CblasTrans; - sgemv(CblasRowMajor, transB, mdim1, mdim2, alpha, mdata, ldb, data, 1, - beta, rdata, 1); - } - /// case others: use sgemm - else { - sgemm(CblasRowMajor, transA, transB, M, N, K, alpha, data, lda, mdata, - ldb, beta, rdata, ldc); - } -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - - return result; -} - -Tensor &Tensor::transpose(const std::string &direction, Tensor &out) const { - NNTR_THROW_IF(!contiguous, std::invalid_argument) - << getName() << " is not contiguous. Cannot transpose."; - - if (out.getData() == getData()) { - Tensor tmp = clone(); - return tmp.transpose(direction, out); - } - - unsigned int SL, SI, SJ, SK; - - out.reshape(dim.transpose(direction)); - int indexI = direction[0] - '0'; - int indexJ = direction[2] - '0'; - - SL = dim.batch(), SI = dim.channel(), SJ = dim.height(), SK = dim.width(); - - bool is_format_nchw = (getFormat() == Tformat::NCHW); - - if (getDataType() == ml::train::TensorDim::DataType::FP32) { - const float *inptr = getData(); - float *outptr = out.getData(); - switch (indexI) { - case 0: - if (indexJ == 1) { - if (is_format_nchw) { - transposeloop(l, i, j, k, SL, SI, SJ, SK); - } else { - transposeloop_nhwc(l, j, k, i, SL, SJ, SK, SI); - } - } else { - if (is_format_nchw) { - transposeloop(l, i, k, j, SL, SI, SK, SJ); - } else { - transposeloop_nhwc(l, k, j, i, SL, SK, SJ, SI); - } - } - break; - case 1: - if (indexJ == 0) { - if (is_format_nchw) { - transposeloop(l, j, i, k, SL, SJ, SI, SK); - } else { - transposeloop_nhwc(l, i, k, j, SL, SI, SK, SJ); - } - } else { - if (is_format_nchw) { - transposeloop(l, j, k, i, SL, SJ, SK, SI); - } else { - transposeloop_nhwc(l, k, i, j, SL, SK, SI, SJ); - } - } - break; - case 2: - if (indexJ == 0) { - if (is_format_nchw) { - transposeloop(l, k, i, j, SL, SK, SI, SJ); - } else { - transposeloop_nhwc(l, i, j, k, SL, SI, SJ, SK); - } - } else { - if (is_format_nchw) { - transposeloop(l, k, j, i, SL, SK, SJ, SI); - } else { - transposeloop_nhwc(l, j, i, k, SL, SJ, SI, SK); - } - } - break; - } - } else if (getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - const _FP16 *inptr = getData<_FP16>(); - _FP16 *outptr = out.getData<_FP16>(); - switch (indexI) { - case 0: - if (indexJ == 1) { - if (is_format_nchw) { - transposeloop(l, i, j, k, SL, SI, SJ, SK); - } else { - transposeloop_nhwc(l, j, k, i, SL, SJ, SK, SI); - } - } else { - if (is_format_nchw) { - for (unsigned int b = 0; b < batch(); ++b) { - for (unsigned int c = 0; c < channel(); ++c) { - transpose_matrix(height(), width(), - getData<_FP16>() + getIndex(b, c, 0, 0), width(), - out.getData<_FP16>() + out.getIndex(b, c, 0, 0), - out.width()); - } - } - } else { - transposeloop_nhwc(l, k, j, i, SL, SK, SJ, SI); - } - } - break; - case 1: - if (indexJ == 0) { - if (is_format_nchw) { - transposeloop(l, j, i, k, SL, SJ, SI, SK); - } else { - transposeloop_nhwc(l, i, k, j, SL, SI, SK, SJ); - } - } else { - if (is_format_nchw) { - transposeloop(l, j, k, i, SL, SJ, SK, SI); - } else { - transposeloop_nhwc(l, k, i, j, SL, SK, SI, SJ); - } - } - break; - case 2: - if (indexJ == 0) { - if (is_format_nchw) { - transposeloop(l, k, i, j, SL, SK, SI, SJ); - } else { - transposeloop_nhwc(l, i, j, k, SL, SI, SJ, SK); - } - } else { - if (is_format_nchw) { - transposeloop(l, k, j, i, SL, SK, SJ, SI); - } else { - transposeloop_nhwc(l, j, i, k, SL, SJ, SI, SK); - } - } - break; - } -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - - return out; -} - -Tensor Tensor::transpose(const std::string &direction) const { - Tensor result(dim); - transpose(direction, result); - return result; -} - -Tensor Tensor::dropout_mask(float dropout) const { - Tensor result(dim); - result.dropout_mask(dropout); - return result; -} - -void Tensor::dropout_mask(float dropout) { - setRandUniform(0.0, 1.0); - if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) { - float scale = 1.0 / (1 - dropout); - float *data_ = getData(); - for (unsigned int i = 0; i < size(); ++i) { - if (data_[i] >= dropout) - data_[i] = scale; - else - data_[i] = 0.0; - } - } else if (getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - _FP16 scale = static_cast<_FP16>(1.0 / (1 - dropout)); - _FP16 *data_ = getData<_FP16>(); - for (unsigned int i = 0; i < size(); ++i) { - if (data_[i] >= dropout) - data_[i] = scale; - else - data_[i] = 0; - } -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } -} - -void Tensor::filter_mask(const Tensor &mask_len, bool reverse) { - float fill_mask_val = 0.0; - float en_mask_val = 1.0 - fill_mask_val; - - if (reverse) { - fill_mask_val = 1.0; - en_mask_val = 1.0 - fill_mask_val; - } - - setValue(fill_mask_val); - if (mask_len.batch() != batch()) - throw std::invalid_argument("Number of filter masks mismatched"); - if (getDataType() == ml::train::TensorDim::DataType::FP32) { - for (unsigned int b = 0; b < batch(); b++) { - float *addr = getAddress(b, 0, 0, 0); - const uint *mask_len_val = mask_len.getAddress(b, 0, 0, 0); - std::fill(addr, addr + (*mask_len_val), en_mask_val); - } - } else if (getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - for (unsigned int b = 0; b < batch(); b++) { - _FP16 *addr = getAddress<_FP16>(b, 0, 0, 0); - const uint *mask_len_val = mask_len.getAddress(b, 0, 0, 0); - std::fill(addr, addr + (*mask_len_val), (_FP16)en_mask_val); - } -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } -} - -Tensor Tensor::zoneout_mask(float zoneout) { - Tensor ret(getDim()); - zoneout_mask(ret, zoneout); - return ret; -} - -void Tensor::zoneout_mask(Tensor &opposite, float zoneout) { - if (dim != opposite.dim) { - throw std::invalid_argument( - "[Tensor::zoneout_mask] opposite dimension does not match"); - } - - if (getDataType() == ml::train::TensorDim::DataType::FP32) { - opposite.setRandBernoulli(zoneout); - - float *data = getData(); - float *opposite_data = opposite.getData(); - - for (unsigned int i = 0; i < size(); ++i) { - if (opposite_data[i] > epsilon) { - data[i] = 0.0f; - } else { - data[i] = 1.0f; - } - } - } else if (getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - _FP16 zoneout_fp16 = (_FP16)zoneout; - opposite.setRandBernoulli(zoneout_fp16); - - _FP16 *data = getData<_FP16>(); - _FP16 *opposite_data = opposite.getData<_FP16>(); - - for (unsigned int i = 0; i < size(); ++i) { - if (opposite_data[i] > epsilon) { - data[i] = (_FP16)0.0; - } else { - data[i] = (_FP16)1.0; - } - } -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } -} - -Tensor Tensor::apply(std::function f) const { return f(*this); } - -Tensor &Tensor::apply(std::function f, - Tensor &output) const { - return f(*this, output); -} - -void Tensor::print(std::ostream &out) const { - printInstance(out, this); - if (getDataType() == ml::train::TensorDim::DataType::FP32) { - const float *data = getData(); - unsigned int len = size(); - out << "data addr: " << data << '\n'; - out << dim; - - if (len > 100) { - out << '[' << data[0] << ' ' << data[1] << ' ' << data[2] << " ... " - << data[len - 3] << ' ' << data[len - 2] << ' ' << data[len - 1] - << ']' << std::endl; - return; - } - - std::ios init(NULL); - init.copyfmt(out); - float max_ = 0.0; - float min_ = 10000000; - if (getFormat() == Tformat::NCHW) { - for (unsigned int k = 0; k < batch(); k++) { - for (unsigned int l = 0; l < channel(); l++) { - for (unsigned int i = 0; i < height(); i++) { - for (unsigned int j = 0; j < width(); j++) { - out << std::setw(10) << std::setprecision(10) - << this->getValue(k, l, i, j) << " "; - } - out << std::endl; - } - out << std::endl; - } - out << "-------" << std::endl; - } - } else { - for (unsigned int k = 0; k < batch(); k++) { - for (unsigned int i = 0; i < height(); i++) { - for (unsigned int j = 0; j < width(); j++) { - for (unsigned int l = 0; l < channel(); l++) { - out << std::setw(10) << std::setprecision(10) - << this->getValue(k, l, i, j) << " "; - } - out << std::endl; - } - out << std::endl; - } - out << "-------" << std::endl; - } - } - out.copyfmt(init); - } else if (getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - const _FP16 *data = getData<_FP16>(); - unsigned int len = size(); - out << "data addr: " << data << '\n'; - out << dim; - - if (len > 100) { - out << '[' << (float)data[0] << ' ' << (float)data[1] << ' ' - << (float)data[2] << " ... " << (float)data[len - 3] << ' ' - << (float)data[len - 2] << ' ' << (float)data[len - 1] << ']' - << std::endl; - return; - } - - std::ios init(NULL); - init.copyfmt(out); - float max_ = 0.0; - float min_ = 10000000; - if (getFormat() == Tformat::NCHW) { - for (unsigned int k = 0; k < batch(); k++) { - for (unsigned int l = 0; l < channel(); l++) { - for (unsigned int i = 0; i < height(); i++) { - for (unsigned int j = 0; j < width(); j++) { - out << std::setw(10) << std::setprecision(10) - << (float)this->getValue<_FP16>(k, l, i, j) << " "; - if (std::isinf((float)this->getValue<_FP16>(k, l, i, j))) - out << "INF or NAN " << k << ":" << l << ":" << i << ":" << j - << std::endl; - if ((float)this->getValue<_FP16>(k, l, i, j) < min_) - min_ = (float)this->getValue<_FP16>(k, l, i, j); - if ((float)this->getValue<_FP16>(k, l, i, j) > max_) - max_ = (float)this->getValue<_FP16>(k, l, i, j); - } - out << std::endl; - } - out << std::endl; - } - out << "-------" << std::endl; - } - } else { - for (unsigned int k = 0; k < batch(); k++) { - for (unsigned int i = 0; i < height(); i++) { - for (unsigned int j = 0; j < width(); j++) { - for (unsigned int l = 0; l < channel(); l++) { - out << std::setw(10) << std::setprecision(10) - << (float)this->getValue<_FP16>(k, l, i, j) << " "; - } - out << std::endl; - } - out << std::endl; - } - out << "-------" << std::endl; - } - } - out.copyfmt(init); -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } else if (getDataType() == ml::train::TensorDim::DataType::QINT8) { - const uint8_t *data = getData(); - unsigned int len = size(); - out << "data addr: " << reinterpret_cast(data) << '\n'; - out << dim; - - if (len > 100) { - out << '[' << (int)data[0] << ' ' << (int)data[1] << ' ' << (int)data[2] - << " ... " << (int)data[len - 3] << ' ' << (int)data[len - 2] << ' ' - << (int)data[len - 1] << ']' << std::endl; - return; - } - - std::ios init(NULL); - init.copyfmt(out); - if (getFormat() == Tformat::NCHW) { - for (unsigned int k = 0; k < batch(); k++) { - for (unsigned int l = 0; l < channel(); l++) { - for (unsigned int i = 0; i < height(); i++) { - for (unsigned int j = 0; j < width(); j++) { - out << std::setw(10) << (int)this->getValue(k, l, i, j) - << " "; - } - out << std::endl; - } - out << std::endl; - } - out << "-------" << std::endl; - } - } else { - for (unsigned int k = 0; k < batch(); k++) { - for (unsigned int i = 0; i < height(); i++) { - for (unsigned int j = 0; j < width(); j++) { - for (unsigned int l = 0; l < channel(); l++) { - out << std::setw(10) << (int)this->getValue(k, l, i, j) - << " "; - } - out << std::endl; - } - out << std::endl; - } - out << "-------" << std::endl; - } - out.copyfmt(init); - } - } else if (getDataType() == ml::train::TensorDim::DataType::QINT4) { - const uint8_t *data = getData(); - unsigned int len = (size() + 1) / 2; - out << "data addr: " << (float *)data << '\n'; - out << dim; - - if (len > 100) { - out << '[' << (int)decode_qint(data[0], true) << ' ' - << (int)decode_qint(data[0], false) << ' ' - << (int)decode_qint(data[1], true) << " ... " - << (int)decode_qint(data[len - 2], false) << ' ' - << (int)decode_qint(data[len - 1], true) << ' ' - << (int)decode_qint(data[len - 1], false) << ']' << std::endl; - return; - } - - std::ios init(NULL); - init.copyfmt(out); - if (getFormat() == Tformat::NCHW) { - for (unsigned int k = 0; k < batch(); k++) { - for (unsigned int l = 0; l < channel(); l++) { - for (unsigned int i = 0; i < height(); i++) { - for (unsigned int j = 0; j < width(); j++) { - out << std::setw(3) << (int)this->getValueQint4(k, l, i, j) - << " "; - } - out << std::endl; - } - out << std::endl; - } - out << "-------" << std::endl; - } - } else { - for (unsigned int k = 0; k < batch(); k++) { - for (unsigned int i = 0; i < height(); i++) { - for (unsigned int j = 0; j < width(); j++) { - for (unsigned int l = 0; l < channel(); l++) { - out << std::setw(3) << (int)this->getValueQint4(k, l, i, j) - << " "; - } - out << std::endl; - } - out << std::endl; - } - out << "-------" << std::endl; - } - out.copyfmt(init); - } - } -} - -void Tensor::print_(std::ostream &out, uint opt) const { - printInstance(out, this); - - unsigned int len = size(); - - std::ios init(NULL); - init.copyfmt(out); - if (opt == 0) { - if (getFormat() == Tformat::NCHW) { - out << "{"; - for (unsigned int k = 0; k < batch(); k++) { - out << "{"; - for (unsigned int i = 0; i < channel(); i++) { - out << "{"; - for (unsigned int j = 0; j < height(); j++) { - out << "{"; - for (unsigned int l = 0; l < width(); l++) { - if (l < width() - 1) - out << std::setw(10) << std::setprecision(10) - << this->getValue(k, l, i, j) << ", "; - else - out << std::setw(10) << std::setprecision(10) - << this->getValue(k, l, i, j); - } - if (j < height() - 1) - out << "},"; - else - out << "}"; - out << std::endl; - } - if (i < channel() - 1) - out << "},"; - else - out << "}"; - out << std::endl; - } - if (k < batch() - 1) - out << "},"; - else - out << "}"; - out << std::endl; - } - out << "}"; - } else { - out << "{"; - for (unsigned int k = 0; k < batch(); k++) { - out << "{"; - for (unsigned int i = 0; i < height(); i++) { - out << "{"; - for (unsigned int j = 0; j < width(); j++) { - out << "{"; - for (unsigned int l = 0; l < channel(); l++) { - if (l < channel() - 1) - out << std::setw(10) << std::setprecision(10) - << this->getValue(k, l, i, j) << ", "; - else - out << std::setw(10) << std::setprecision(10) - << this->getValue(k, l, i, j); - } - if (j < width() - 1) - out << "},"; - else - out << "}"; - out << std::endl; - } - if (i < height() - 1) - out << "},"; - else - out << "}"; - out << std::endl; - } - if (k < batch() - 1) - out << "},"; - else - out << "}"; - out << std::endl; - } - out << "}"; - } - } else { - for (uint i = 0; i < len; ++i) { - out << getData()[i] << ", "; - } - } - out.copyfmt(init); -} - -std::ostream &operator<<(std::ostream &out, Tensor const &m) { - m.print(out); - return out; -} - -void Tensor::copy(const void *buf) { - NNTR_THROW_IF(!contiguous, std::invalid_argument) - << getName() << "Tensor is not contiguous, cannot copy."; - - if (getDataType() == ml::train::TensorDim::DataType::FP32) { - if (buf == getData()) { - return; - } - } else if (getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - if (buf == getData<_FP16>()) { - return; - } -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } else if (getDataType() == ml::train::TensorDim::DataType::QINT8) { - if (buf == getData()) { - return; - } - } else if (getDataType() == ml::train::TensorDim::DataType::QINT4) { - if (buf == getData()) { - return; - } - } - - if (getDataType() == ml::train::TensorDim::DataType::FP32) { - scopy(size(), (float *)buf, 1, getData(), 1); - } else if (getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - scopy(size(), (_FP16 *)buf, 1, getData<_FP16>(), 1); -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } else if (getDataType() == ml::train::TensorDim::DataType::QINT8) { - for (unsigned int i = 0; i < size(); ++i) { - getData()[i] = ((uint8_t *)buf)[i]; - } - } else if (getDataType() == ml::train::TensorDim::DataType::QINT4) { - for (unsigned int i = 0; i < (size() + 1) / 2; ++i) { - getData()[i] = ((uint8_t *)buf)[i]; - } - } -} - -void Tensor::copy_with_stride(const Tensor &from) { - - if (dim == from.getDim()) { - if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) { - for (unsigned int b = 0; b < batch(); ++b) { - for (unsigned int c = 0; c < channel(); ++c) { - for (unsigned int h = 0; h < height(); ++h) { - for (unsigned int w = 0; w < width(); ++w) { - setValue(b, c, h, w, from.getValue(b, c, h, w)); - } - } - } - } - } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - for (unsigned int b = 0; b < batch(); ++b) { - for (unsigned int c = 0; c < channel(); ++c) { - for (unsigned int h = 0; h < height(); ++h) { - for (unsigned int w = 0; w < width(); ++w) { - setValue(b, c, h, w, from.getValue<_FP16>(b, c, h, w)); - } - } - } - } -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - } else { - Tensor t = Tensor(from.getDim(), true); - if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) { - for (unsigned int b = 0; b < t.batch(); ++b) { - for (unsigned int c = 0; c < t.channel(); ++c) { - for (unsigned int h = 0; h < t.height(); ++h) { - for (unsigned int w = 0; w < t.width(); ++w) { - t.setValue(b, c, h, w, from.getValue(b, c, h, w)); - } - } - } - } - } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - for (unsigned int b = 0; b < batch(); ++b) { - for (unsigned int c = 0; c < channel(); ++c) { - for (unsigned int h = 0; h < height(); ++h) { - for (unsigned int w = 0; w < width(); ++w) { - setValue(b, c, h, w, from.getValue<_FP16>(b, c, h, w)); - } - } - } - } -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - swap(t, *this); - } -} - -void Tensor::copy(const Tensor &from) { - // todo: enable copy to non-contiguous tensor - if (!contiguous) { - throw std::runtime_error("Cannot copy non-contiguous tensor"); - } - - if (from.size() != 0 && size() == from.size() && - getDataType() == from.getDataType()) { - reshape(from.getDim()); - if (from.getDataType() == ml::train::TensorDim::DataType::FP32) { - copy(from.getData()); - } else if (from.getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - copy(from.getData<_FP16>()); -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - - } else { - if (from.getDataType() == ml::train::TensorDim::DataType::FP32) { - Tensor t = Tensor(from.getDim(), from.getData()); - swap(t, *this); - } else if (from.getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - Tensor t = Tensor(from.getDim(), from.getData<_FP16>()); - swap(t, *this); -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - } -} - -void Tensor::copyData(const Tensor &from) { - // todo: enable copy to non-contiguous tensor - if (!contiguous) { - throw std::runtime_error("Cannot copy non-contiguous tensor"); - } - - if (size() != from.size()) - throw std::invalid_argument("Size of tensor to copy must match"); - - if (getDataType() == from.getDataType()) { - if (getDataType() == ml::train::TensorDim::DataType::FP32) { - copy(from.getData()); - } else if (getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - copy(from.getData<_FP16>()); -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } else { - copy(from.getData()); - } - } else { - if (getDataType() == ml::train::TensorDim::DataType::FP32) { - if (from.getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - scopy(size(), from.getData<_FP16>(), 1, getData(), 1); -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } else if (from.getDataType() == ml::train::TensorDim::DataType::QINT8) { - scopy_int8_to_float32(from.size(), from.getData(), 1, - getData(), 1); - } else if (from.getDataType() == ml::train::TensorDim::DataType::QINT4) { - scopy_int4_to_float32((from.size() + 1) / 2, from.getData(), 1, - getData(), 1); - } - } else if (getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - if (from.getDataType() == ml::train::TensorDim::DataType::FP32) { - scopy(size(), from.getData(), 1, getData<_FP16>(), 1); - } else if (from.getDataType() == ml::train::TensorDim::DataType::QINT8) { - scopy_int8_to_float16(from.size(), from.getData(), 1, - getData<_FP16>(), 1); - } else if (from.getDataType() == ml::train::TensorDim::DataType::QINT4) { - scopy_int4_to_float16((from.size() + 1) / 2, from.getData(), 1, - getData<_FP16>(), 1); - } -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - } -} - -Tensor Tensor::clone() const { - Tensor t; - t.copy(*this); - t.name = name; - return t; -} - -void Tensor::reshape(const TensorDim &d) { - - NNTR_THROW_IF(!contiguous, std::invalid_argument) - << getName() << " is not contiguous, cannot reshape."; - - NNTR_THROW_IF(d.getDataLen() != dim.getDataLen(), std::invalid_argument) - << "[Tensor]: reshape cannot change the buffer size, trying reshaping " - "\nfrom " - << getDim() << " to " << d; - - // dim = d; - dim.batch(d.batch()); - dim.channel(d.channel()); - dim.height(d.height()); - dim.width(d.width()); - - strides = d.computeStrides(); -} - -void Tensor::fill(const Tensor &from, bool alloc) { - if (alloc && this->empty()) { - this->copy(from); - return; - } - - if (!from.contiguous || !contiguous) { - /// @todo enable this if needed - throw nntrainer::exception::not_supported( - "[Tensor::fill] non-contiguous tensors are not supported"); - } - - if (dim != from.getDim()) { - throw std::invalid_argument("[Tensor::fill] dimension must be the same"); - } - - if (strides != from.getStrides()) { - /// @todo length does not represent buffer size, there should be way to - /// get the buffer size - throw std::invalid_argument("[Tensor::fill] buffer size must be the same"); - } - - if (this->getDataType() == ml::train::TensorDim::DataType::FP32) { - this->copy(from.getData()); - } else if (this->getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - this->copy(from.getData<_FP16>()); -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } +Tensor Tensor::sum(unsigned int axis, float alpha) const { + Tensor output("", this->getFormat(), this->getDataType()); + return sum(axis, output, alpha, 0); } -void Tensor::save(std::ostream &file) { - NNTR_THROW_IF(!contiguous, std::invalid_argument) - << getName() << " is not contiguous, cannot save."; - - std::streamsize sz = static_cast(bytes()); - NNTR_THROW_IF(sz < 0, std::invalid_argument) - << "save size: " << bytes() - << " is too big. It cannot be represented by std::streamsize"; - - if (this->getDataType() == ml::train::TensorDim::DataType::FP32) { - checkedWrite(file, (char *)getData(), sz, - "[Tensor::save] operation failed"); - } else if (this->getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - checkedWrite(file, (char *)getData<_FP16>(), - static_cast(size() * sizeof(_FP16)), - "[Tensor::save] operation failed"); -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } +Tensor &Tensor::sum(unsigned int axis, Tensor &output, float alpha, + float beta) const { + NNTR_THROW_IF(!getContiguous(), std::invalid_argument) + << getName() << " is not contiguous, cannot sum"; - putData(); + itensor->sum(axis, output, alpha, beta); + return output; } -void Tensor::read(std::ifstream &file, Tdatatype s_type) { - NNTR_THROW_IF(!contiguous, std::invalid_argument) - << getName() << " is not contiguous, cannot read."; - - std::streamsize sz = static_cast(bytes()); - - NNTR_THROW_IF(sz < 0, std::invalid_argument) - << "read size: " << bytes() - << " is too big. It cannot be represented by std::streamsize"; +Tensor Tensor::sum(const std::vector &axes, float alpha) const { + Tensor output("", this->getFormat()); + return sum(axes, output, alpha); +} - if (getDataType() == Tdatatype::QINT4 || getDataType() == Tdatatype::QINT8) { - uint8_t axis, zp; - unsigned int len = 0; +Tensor &Tensor::sum(const std::vector &axes, Tensor &output, + float alpha) const { + if (axes.empty()) + throw std::invalid_argument("empty axes given"); - file.read((char *)&axis, sizeof(uint8_t)); + if (axes.size() == 1) { + this->sum(axes[0], output, alpha); + } else { - if (axis == 0) - len = batch(); - else if (axis == 1) { - len = channel(); - } else if (axis == 2) { - len = height(); - } else if (axis == 3) { - len = width(); - } + /** club axes together */ + Tensor new_reshaped = Tensor(getDim()); + new_reshaped.copy(*this); + std::vector continuous_order = {0, 3, 1, 2}; + std::vector new_axes = {axes[0]}; - // read scale factors - for (unsigned int i = 0; i < len; ++i) { - if (s_type == Tdatatype::FP32) { - float scale; - file.read((char *)&scale, sizeof(float)); - scale_factors_fp32.push_back(scale); - } else if (s_type == Tdatatype::FP16) { -#ifdef ENABLE_FP16 - _FP16 scale; - file.read((char *)&scale, sizeof(_FP16)); - scale_factors_fp16.push_back(scale); -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif + for (unsigned int i = 1; i < axes.size(); ++i) { + if (checkContinuous(axes[i - 1], axes[i])) { + new_reshaped.mergeAxis(axes[i - 1], axes[i]); + new_axes.back() = axes[i]; + } else { + new_axes.push_back(axes[i]); } } - // read zero points and parse if needed - if (getDataType() == Tdatatype::QINT4) { - for (unsigned int i = 0; i < (len + 1) / 2; ++i) { - file.read((char *)&zp, sizeof(uint8_t)); - zero_points.push_back(decode_qint(zp, true)); - zero_points.push_back(decode_qint(zp, false)); - } - } else if (getDataType() == Tdatatype::QINT8) { - for (unsigned int i = 0; i < len; ++i) { - file.read((char *)&zp, sizeof(uint8_t)); - zero_points.push_back(zp); - } - } + Tensor ret = new_reshaped.sum(new_axes[0]); + for (unsigned int i = 1; i < new_axes.size() - 1; ++i) + ret = ret.sum(axes[i]); + ret.sum(new_axes.back(), output, alpha); } - - checkedRead(file, (char *)getData(), sz, "[Tensor::read] operation failed"); - putData(); + return output; } -/** - * @brief Calculate average value according to the axis. - */ Tensor Tensor::average(unsigned int axis) const { - Tensor t("", this->getFormat(), this->getDataType()); - return average(axis, t); + Tensor output("", this->getFormat(), this->getDataType()); + return average(axis, output); } -/** - * @brief Calculate average value according to the axis. - */ Tensor &Tensor::average(unsigned int axis, Tensor &output) const { if (axis >= TensorDim::MAXDIM) throw std::out_of_range( "negative axis or axis more then MAXDIM is invalid"); - unsigned int axis_size = dim.getDim()[axis]; + unsigned int axis_size = getDim()[axis]; if (axis_size == 1) output.copy(*this); else @@ -3249,8 +481,8 @@ Tensor &Tensor::average(unsigned int axis, Tensor &output) const { } Tensor Tensor::average(const std::vector &axes) const { - Tensor t("", this->getFormat(), this->getDataType()); - return average(axes, t); + Tensor output("", this->getFormat(), this->getDataType()); + return average(axes, output); } Tensor &Tensor::average(const std::vector &axes, @@ -3264,564 +496,581 @@ Tensor &Tensor::average(const std::vector &axes, if (idx >= TensorDim::MAXDIM) { throw std::out_of_range("axis more then MAXDIM is invalid"); } - ret_shape.setTensorDim(idx, dim.getTensorDim(idx)); + ret_shape.setTensorDim(idx, getDim().getTensorDim(idx)); } return this->sum(axes, output, 1.0 / (float)ret_shape.getDataLen()); } -/** - * @brief Calculate average value according to the axis. - */ Tensor Tensor::average() const { - Tensor result = *this; + Tensor output = *this; unsigned int axis = 0; if (this->getFormat() == Tformat::NHWC) { - result.reshape({1, dim.getDataLen(), 1, 1, this->getTensorType()}); + output.reshape({1, getDim().getDataLen(), 1, 1, this->getTensorType()}); axis = 1; } else { - result.reshape({1, 1, 1, dim.getDataLen(), this->getTensorType()}); + output.reshape({1, 1, 1, getDim().getDataLen(), this->getTensorType()}); axis = 3; } - return result.average(axis); + return output.average(axis); } -/** - * @brief Calculate average value according to the axis. - */ Tensor &Tensor::average(Tensor &output) const { Tensor result = *this; - result.reshape({1, 1, 1, dim.getDataLen()}); + result.reshape({1, 1, 1, getDim().getDataLen()}); return result.average(3, output); } -void Tensor::setValue(float val) { - NNTR_THROW_IF(!contiguous, std::invalid_argument) - << getName() << " is not contiguous, cannot set value."; +int Tensor::pow_i(float exponent) { + pow(exponent, *this); + return ML_ERROR_NONE; +} + +Tensor Tensor::pow(float exponent) const { + Tensor output("", getFormat(), getDataType()); + return pow(exponent, output); +} + +Tensor &Tensor::pow(float exponent, Tensor &output) const { + itensor->pow(exponent, output); + return output; +} + +int Tensor::erf_i() { + erf(*this); + return ML_ERROR_NONE; +} + +Tensor Tensor::erf() const { + Tensor output("", getFormat(), getDataType()); + return erf(output); +} + +Tensor &Tensor::erf(Tensor &output) const { + itensor->erf(output); + return output; +} + +void Tensor::sin(Tensor &out, float alpha) { + if (size() != out.size()) + throw std::invalid_argument("Error: Size of out of Tensor::sin must match"); + + itensor->sin(out, alpha); +} + +void Tensor::cos(Tensor &out, float alpha) { + if (size() != out.size()) + throw std::invalid_argument("Error: Size of out of Tensor::cos must match"); + + itensor->cos(out, alpha); +} + +void Tensor::inv_sqrt_i() { itensor->inv_sqrt(*this); } + +LazyTensor Tensor::chain() const { return LazyTensor(*this); } + +float Tensor::l2norm() const { return itensor->l2norm(); } + +void Tensor::normalization_i() { + NNTR_THROW_IF(!getContiguous(), std::invalid_argument) + << getName() << " is not contiguous, cannot do normalization."; + + const float min = minValue(); + const float max = maxValue(); + + if (max == min) { + Tensor tmp = *this; + this->subtract_i(tmp); + } else { + this->subtract_i(min); + this->divide_i(max - min); + } +} + +void Tensor::standardization_i() { + Tensor mean_by_batch = this->sum_by_batch(); + mean_by_batch.divide_i(getDim().getFeatureLen()); + + this->subtract_i(mean_by_batch); + Tensor std_dev_by_batch(batch(), 1, 1, 1, getFormat(), getDataType()); + std_dev_by_batch.setZero(); + + /// @todo remove conditional statement if (getDataType() == ml::train::TensorDim::DataType::FP32) { - float *data = getData(); - std::fill(data, data + size(), val); + float *std_dev = std_dev_by_batch.getData(); + + for (unsigned int k = 0; k < batch(); ++k) { + Tensor sub_this = this->getBatchSlice(k, 1); + std_dev[k] = sub_this.l2norm(); + } } else if (getDataType() == ml::train::TensorDim::DataType::FP16) { #ifdef ENABLE_FP16 - _FP16 *data = getData<_FP16>(); - std::fill(data, data + size(), static_cast<_FP16>(val)); + _FP16 *std_dev = std_dev_by_batch.getData<_FP16>(); + + for (unsigned int k = 0; k < batch(); ++k) { + Tensor sub_this = this->getBatchSlice(k, 1); + std_dev[k] = static_cast<_FP16>(sub_this.l2norm()); + } #else throw std::invalid_argument("Error: enable-fp16 is not enabled"); #endif - } else if (getDataType() == ml::train::TensorDim::DataType::QINT8) { - uint8_t *data = getData(); - std::fill(data, data + size(), val); - } else if (getDataType() == ml::train::TensorDim::DataType::QINT4) { - uint8_t *data = getData(); - uint8_t mixed = encode_qint(val, val); - std::fill(data, data + (size() + 1) / 2, mixed); } + + std_dev_by_batch.divide_i(getDim().getFeatureLen()); + this->divide_i(std_dev_by_batch); +} + +Tensor Tensor::dot(Tensor const &input, bool trans, bool trans_in) const { + Tensor output("", this->getFormat(), this->getDataType()); + dot(input, output, trans, trans_in); + + return output; +} + +/** + * @note: This dot product flattens the fist 3 axis for the purpose of + * computation. So, while performing, these matrices are behaving as 2-D + * matrices. The dimensions are restored while returning back the tensor + * in case of trans is false. + */ +Tensor &Tensor::dot(Tensor const &input, Tensor &output, bool trans, + bool trans_in, float beta) const { + NNTR_THROW_IF(!getContiguous(), std::invalid_argument) + << getName() << " is not contiguous. Cannot dot product."; + + itensor->dot(input, output, trans, trans_in, beta); + return output; +} + +Tensor &Tensor::dot_deriv_wrt_1(Tensor const &m, Tensor const &output_deriv, + bool trans, bool trans_m, float beta) { + bool deriv_trans_m = true; + bool deriv_trans = false; + /** @todo handle all cases of trans and trans_m */ + if (!trans && trans_m) { + deriv_trans_m = false; + } + + return output_deriv.dot(m, *this, deriv_trans, deriv_trans_m, beta); +} + +/** + * @brief compute the derivative wrt m in the m tensor + * @note The caller tensor must be the same tensor as the one which called the + * dot() product. + */ +Tensor &Tensor::dot_deriv_wrt_2(Tensor &m_deriv, Tensor const &output_deriv, + bool trans, bool trans_m, float beta) const { + bool deriv_trans_m = false; + bool deriv_trans = true; + /** @todo handle all cases of trans and trans_m */ + + if (!trans && trans_m) { + output_deriv.dot(*this, m_deriv, deriv_trans, deriv_trans_m, beta); + return m_deriv; + } else { + return dot(output_deriv, m_deriv, deriv_trans, deriv_trans_m, beta); + } +} + +Tensor &Tensor::dotBatched(Tensor const &m, Tensor &result, bool trans, + bool trans_m, float beta) const { + if (!result.isAllocated()) + throw std::invalid_argument( + "Output tensor must be preallocated for dotBatched operation"); + for (unsigned int b = 0; b < batch(); b++) { + /** @todo try using transpose to speedup the operation */ + const Tensor this_b = this->getBatchSlice(b, 1); + Tensor m_b = m.getBatchSlice(b, 1); + Tensor result_b = result.getBatchSlice(b, 1); + + this_b.dot(m_b, result_b, trans, trans_m, beta); + } + + return result; +} + +Tensor &Tensor::dot_batched_deriv_wrt_1(Tensor const &m, + Tensor const &output_deriv, bool trans, + bool trans_m, float beta) { + bool deriv_trans_m = true; + bool deriv_trans = false; + /** @todo handle all cases of trans and trans_m */ + if (!trans && trans_m) { + deriv_trans_m = false; + } + + return output_deriv.dotBatched(m, *this, deriv_trans, deriv_trans_m, beta); +} + +Tensor &Tensor::dot_batched_deriv_wrt_2(Tensor &m_deriv, + Tensor const &output_deriv, bool trans, + bool trans_m, float beta) const { + bool deriv_trans_m = false; + bool deriv_trans = true; + /** @todo handle all cases of trans and trans_m */ + + if (!trans && trans_m) { + output_deriv.dotBatched(*this, m_deriv, deriv_trans, deriv_trans_m, beta); + return m_deriv; + } else { + return dotBatched(output_deriv, m_deriv, deriv_trans, deriv_trans_m, beta); + } +} + +Tensor Tensor::dropout_mask(float dropout) const { + Tensor output(getDim()); + output.dropout_mask(dropout); + return output; +} + +void Tensor::dropout_mask(float dropout) { + /// @todo add unittest + NNTR_THROW_IF(dropout < 0 || dropout > 1, std::invalid_argument) + << "[Tensor::dropout_mask] Dropout rate should be between 0 and 1"; + + // if the rate is zero, no change is needed + if (std::fpclassify(dropout) == FP_ZERO) + return; + + setRandUniform(0.0, 1.0); + itensor->dropout_mask(dropout); +} + +void Tensor::filter_mask(const Tensor &mask_len, bool reverse) { + /// @todo add unittest + itensor->filter_mask(mask_len, reverse); } -void Tensor::setZero() { - if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) { - if (contiguous) - sscal(size(), 0, getData(), 1); - else - apply_i([](float val) -> float { return 0; }); - } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - if (contiguous) - sscal(size(), 0, getData<_FP16>(), 1); - else - apply_i<_FP16>([](_FP16 val) -> _FP16 { return 0; }); -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } else if (dim.getDataType() == ml::train::TensorDim::DataType::QINT8) { - apply_i([](uint8_t val) -> uint8_t { return 0; }); - } else if (dim.getDataType() == ml::train::TensorDim::DataType::QINT4) { - setValue(0); - } +Tensor Tensor::zoneout_mask(float zoneout) { + Tensor output(getDim()); + zoneout_mask(output, zoneout); + return output; } -std::vector Tensor::argmax() const { - NNTR_THROW_IF(!contiguous, std::invalid_argument) - << getName() << " is not contiguous, cannot get argmax."; - std::vector result; +void Tensor::zoneout_mask(Tensor &opposite, float zoneout) { + NNTR_THROW_IF(getDim() != opposite.getDim(), std::invalid_argument) + << "[Tensor::zoneout_mask] opposite dimension does not match"; - if (getDataType() == ml::train::TensorDim::DataType::FP32) { - const float *data = getData(); - size_t batch_size = batch(); - size_t feature_len = dim.getFeatureLen(); + NNTR_THROW_IF(zoneout < 0 || zoneout > 1, std::invalid_argument) + << "[Tensor::zoneout_mask] Zoneout rate should be between 0 and 1"; - result.resize(batch_size); + // if the rate is zero, no change is needed + if (std::fpclassify(zoneout) == FP_ZERO) + return; - for (unsigned int b = 0; b < batch_size; b++) { - auto max_iter = - std::max_element(data + b * feature_len, data + (b + 1) * feature_len); - result[b] = std::distance(data, max_iter) - (b * feature_len); - } - } - if (getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - const _FP16 *data = getData<_FP16>(); - size_t batch_size = batch(); - size_t feature_len = dim.getFeatureLen(); + itensor->zoneout_mask(opposite, zoneout); +} - result.resize(batch_size); +std::vector Tensor::split(unsigned num_size, int axis) { + NNTR_THROW_IF(num_size == 0, std::invalid_argument) + << "num size cannot be zero"; - for (unsigned int b = 0; b < batch_size; b++) { - auto max_iter = - std::max_element(data + b * feature_len, data + (b + 1) * feature_len); - result[b] = std::distance(data, max_iter) - (b * feature_len); - } -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif + if (axis == -1) { + axis = 3; } - return result; -} + NNTR_THROW_IF(!(0 <= axis && axis < 4), std::invalid_argument) + << "cannot split axis of axis: " << axis; -int Tensor::erf_i() { - erf(*this); - return ML_ERROR_NONE; -} + NNTR_THROW_IF(getDim().getTensorDim(axis) % num_size != 0, + std::invalid_argument) + << "axis is not divisible by num_size, axis: " << axis + << " num size: " << num_size; -Tensor Tensor::erf() const { - Tensor t; - return erf(t); -} + std::vector sizes; + sizes.resize(num_size); -Tensor &Tensor::erf(Tensor &out) const { - if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) { - auto f = [](float in) { return std::erf(in); }; - apply(f, out); - } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - auto f = [](_FP16 in) { - return static_cast<_FP16>(std::erf(static_cast(in))); - }; - apply<_FP16>(f, out); -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - return out; -} + unsigned int sz = getDim().getTensorDim(axis) / num_size; + std::fill(sizes.begin(), sizes.end(), sz); -void Tensor::sin(Tensor &out, float alpha) { - if (size() != out.size()) - throw std::invalid_argument("Error: Size of out of Tensor::sin must match"); - if (getDataType() == ml::train::TensorDim::DataType::FP32) { - if (!contiguous) { - auto f = [alpha](float val) -> float { return std::sin(alpha * val); }; - apply(f, out); - } else { - sine(size(), getData(), out.getData(), alpha); - } - } else - throw std::invalid_argument("Error: Tensor::sin supports fp32 case only."); + return split(sizes, axis); } -void Tensor::cos(Tensor &out, float alpha) { - if (size() != out.size()) - throw std::invalid_argument("Error: Size of out of Tensor::sin must match"); - if (getDataType() == ml::train::TensorDim::DataType::FP32) { - if (!contiguous) { - auto f = [alpha](float val) -> float { return std::cos(alpha * val); }; - apply(f, out); - } else { - cosine(size(), getData(), out.getData(), alpha); - } - } else - throw std::invalid_argument("Error: Tensor::cos supports fp32 case only."); -} +std::vector Tensor::split(std::vector sizes, int axis) { + NNTR_THROW_IF(sizes.size() == 0, std::invalid_argument) + << "num size cannot be zero"; -void Tensor::inv_sqrt_i() { - if (getDataType() == ml::train::TensorDim::DataType::FP32) { - if (!contiguous) { - apply_i([](float val) -> float { return 1 / std::sqrt(val); }); - } else { - inv_sqrt_inplace(this->size(), getData()); - } - } else if (getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - if (!contiguous) { - apply_i<_FP16>([](_FP16 val) -> _FP16 { - return static_cast<_FP16>(1 / std::sqrt(static_cast(val))); - }); - } else { - inv_sqrt_inplace(this->size(), getData<_FP16>()); - } -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } else - throw std::invalid_argument( - "Error: Tensor::inv_sqrt_i only supports fp32, fp16"); + NNTR_THROW_IF(!(-1 <= axis && axis < 4), std::invalid_argument) + << "cannot split axis of axis: " << axis; + + NNTR_THROW_IF( + std::any_of(sizes.begin(), sizes.end(), [](size_t sz) { return !sz; }), + std::invalid_argument) + << "among given sizes at least one of size is 0"; + + return itensor->split(sizes, axis); } -float Tensor::l2norm() const { - NNTR_THROW_IF(!contiguous, std::invalid_argument) - << getName() << " is not contiguous, cannot get l2norm."; - float ret = 0; - unsigned int len = size(); - if (getDataType() == ml::train::TensorDim::DataType::FP32) { - const float *data = getData(); - ret = snrm2(len, data, 1); - } else if (getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - const _FP16 *data = getData<_FP16>(); - ret = snrm2(len, data, 1); -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - return ret; +Tensor Tensor::concat(const std::vector &tensors, int axis) { + NNTR_THROW_IF(!(-1 <= axis && axis < 4), std::invalid_argument) + << "cannot split axis of axis: " << axis; + + NNTR_THROW_IF(tensors.empty(), std::invalid_argument) + << "given tensor vector is empty"; + + return itensor->concat(tensors, axis); } -float Tensor::max_abs() const { - NNTR_THROW_IF(!contiguous, std::invalid_argument) - << getName() << " is not contiguous, cannot get max_abs."; +Tensor Tensor::cat(const std::vector &tensors, int axis) { + Tensor input = tensors[0]; + return input.concat(tensors, axis); +} - unsigned int len = size(); - float ret = 0; - if (getDataType() == ml::train::TensorDim::DataType::FP32) { - const float *data = getData(); +void Tensor::print(std::ostream &out) const { + printInstance(out, this); + itensor->print(out); +} - unsigned int idx = isamax(len, data, 1); - ret = *(data + idx); +void Tensor::putData() const { itensor->putData(); } - } else if (getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - const _FP16 *data = getData<_FP16>(); +void Tensor::setData(const std::shared_ptr buf, size_t off, + bool init) { + itensor->setMemoryData(buf, off); - unsigned int idx = isamax(len, data, 1); - ret = *(data + idx); -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif + if (buf && init) { + initialize(); } - return ret; } -Tensor &Tensor::normalization(Tensor &output) const { - if (output.empty()) - output = Tensor(dim); +const std::shared_ptr Tensor::getMemoryData() const { + return itensor->getMemoryData(); +} - output.copy(*this); - output.normalization_i(); +size_t Tensor::getOffset() const { return itensor->getOffset(); } - return output; +void Tensor::copy(const Tensor &from) { + /// @todo enable copy to non-contiguous tensor + if (!itensor->getContiguous()) { + throw std::runtime_error("Cannot copy non-contiguous tensor"); + } + + if (from.size() != 0 && size() == from.size() && + getDataType() == from.getDataType()) { + // if tensor size and data type match, copy data + itensor->copy(from); + } else { + Tensor t = Tensor(from.getDim(), from.getData()); + swap(t, *this); + } } -void Tensor::normalization_i() { - NNTR_THROW_IF(!contiguous, std::invalid_argument) - << getName() << " is not contiguous, cannot do normalization."; +void Tensor::copyData(const Tensor &from) { itensor->copyData(from); } - if (getDataType() == ml::train::TensorDim::DataType::FP32) { - const float *data = getData(); - - auto bounds = std::minmax_element(data, data + size()); - const float min = *bounds.first; - const float max = *bounds.second; - - if (max == min) { - Tensor tmp = *this; - this->subtract_i(tmp); - } else { - this->subtract_i(min); - this->divide_i(max - min); - } - } else if (getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - const _FP16 *data = getData<_FP16>(); - - auto bounds = std::minmax_element(data, data + size()); - const _FP16 min = *bounds.first; - const _FP16 max = *bounds.second; - - if (max == min) { - Tensor tmp = *this; - this->subtract_i(tmp); - } else { - this->subtract_i(min); - this->divide_i(max - min); - } -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif +void Tensor::copy_with_stride(const Tensor &from) { + if (itensor->getDim() == from.getDim()) { + // If the tensor dim matches, copy the data. This also applies to + // uncontigous tensor. + itensor->copy_with_stride(from, *this); + } else { + // replace with a new tensor that has the same data as the given tensor + Tensor t = Tensor(from.getDim(), true); + itensor->copy_with_stride(from, t); + swap(t, *this); } } -LazyTensor Tensor::chain() const { return LazyTensor(*this); } +Tensor Tensor::getBatchSlice(size_t offset, unsigned int size) const { + TensorDim dim_ = getDim(); + dim_.batch(size); -Tensor &Tensor::standardization(Tensor &output) const { - if (output.empty()) - output = Tensor(dim); + return getSharedDataTensor(dim_, offset * this->getDim().getFeatureLen(), + true, ""); +} +Tensor Tensor::clone() const { + Tensor output(getName(), getFormat(), getDataType()); output.copy(*this); - output.standardization_i(); - return output; } -void Tensor::standardization_i() { - Tensor mean_by_batch = this->sum_by_batch(); - mean_by_batch.divide_i(dim.getFeatureLen()); +void Tensor::save(std::ostream &file) { + NNTR_THROW_IF(!getContiguous(), std::invalid_argument) + << getName() << " is not contiguous, cannot save."; - this->subtract_i(mean_by_batch); - if (getDataType() == ml::train::TensorDim::DataType::FP32) { - Tensor std_dev_by_batch(dim.batch(), 1, 1, 1, dim.getFormat(), - dim.getDataType()); - std_dev_by_batch.setZero(); - float *std_dev = std_dev_by_batch.getData(); + std::streamsize sz = static_cast(bytes()); + NNTR_THROW_IF(sz < 0, std::invalid_argument) + << "save size: " << bytes() + << " is too big. It cannot be represented by std::streamsize"; - for (unsigned int k = 0; k < dim.batch(); ++k) { - Tensor sub_this = this->getBatchSlice(k, 1); - std_dev[k] = sub_this.l2norm(); - } + checkedWrite(file, getData(), sz, "[Tensor::save] operation failed"); + putData(); +} - std_dev_by_batch.divide_i(dim.getFeatureLen()); - this->divide_i(std_dev_by_batch); - } else if (getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - Tensor std_dev_by_batch(dim.batch(), 1, 1, 1, dim.getFormat(), - dim.getDataType()); - std_dev_by_batch.setZero(); - _FP16 *std_dev = std_dev_by_batch.getData<_FP16>(); +void Tensor::read(std::ifstream &file) { + NNTR_THROW_IF(!getContiguous(), std::invalid_argument) + << getName() << " is not contiguous, cannot read."; - for (unsigned int k = 0; k < dim.batch(); ++k) { - Tensor sub_this = this->getBatchSlice(k, 1); - std_dev[k] = static_cast<_FP16>(sub_this.l2norm()); - } + std::streamsize sz = static_cast(bytes()); - std_dev_by_batch.divide_i(dim.getFeatureLen()); - this->divide_i(std_dev_by_batch); -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } + NNTR_THROW_IF(sz < 0, std::invalid_argument) + << "read size: " << bytes() + << " is too big. It cannot be represented by std::streamsize"; + + checkedRead(file, getData(), sz, "[Tensor::read] operation failed"); + putData(); } -Tensor::BroadcastInfo Tensor::computeBroadcastInfo(const Tensor &m) const { - if (m.size() > this->size()) - throw exception::not_supported("broadcasting *this is not supported"); +std::vector Tensor::argmax() const { + NNTR_THROW_IF(!getContiguous(), std::invalid_argument) + << getName() << " is not contiguous, cannot get argmax."; + return itensor->argmax(); +} - const TensorDim m_dim = m.getDim(); +float Tensor::max_abs() const { + NNTR_THROW_IF(!getContiguous(), std::invalid_argument) + << getName() << " is not contiguous, cannot get max_abs."; + return itensor->max_abs(); +} - BroadcastInfo e; - e.tensor_type = getTensorType(); +float Tensor::maxValue() const { return itensor->maxValue(); } - uint continuity[4] = {0, 1, 2, 3}; - if (getFormat() == Tformat::NHWC) { - continuity[1] = 2; - continuity[2] = 3; - continuity[3] = 1; - } +float Tensor::minValue() const { return itensor->minValue(); } - /// checking if given Tensor's can be broadcasted - for (unsigned int i = 0; i < TensorDim::MAXDIM; ++i) { - if (dim.getTensorDim(continuity[i]) == m_dim.getTensorDim(continuity[i])) { - e.strides[i] = m.strides[i]; - continue; - } +Tensor Tensor::transpose(const std::string &direction) const { + Tensor output(getDim()); + transpose(direction, output); + return output; +} - /// If given dimension is 1, it could be reused, the stride remaining 0 - /// Need to check if dim[i] == 1 && m_dim[i] == 1 first though - /// If so, strides should not change - if (m_dim.getTensorDim(continuity[i]) == 1) { - continue; - } +Tensor &Tensor::transpose(const std::string &direction, Tensor &output) const { + NNTR_THROW_IF(!getContiguous(), std::invalid_argument) + << getName() << " is not contiguous. Cannot transpose."; - std::stringstream ss; - ss << "[computeBroadcastInfo] broadcasting only allowed for " - "dimension value of 1 \n" - << "this: " << dim << "target: " << m_dim; - throw std::invalid_argument(ss.str().c_str()); + if (output.getData() == getData()) { + Tensor result = clone(); + return result.transpose(direction, output); } - /// calculate inner loop size - e.buffer_size = 1; - e.buffer_axis = -1; - e.strides[3] = m.strides[3]; - - /// initiate buffer info with matching dimension strategy - for (int axis = 3; axis >= 0; --axis) { - if (dim.getTensorDim(continuity[axis]) != - m_dim.getTensorDim(continuity[axis])) { - e.buffer_axis = axis; - break; - } + itensor->transpose(direction, output); - e.buffer_size *= dim.getTensorDim(continuity[axis]); - } + return output; +} - /// check strategy that uses consecutive ones - if (m_dim.getTensorDim(continuity[3]) == 1) { - unsigned int inner_loop_size = 1; - int axis; - for (axis = 3; axis >= 0; --axis) { - if (m_dim.getTensorDim(continuity[axis]) != 1) { - break; - } +void Tensor::reshape(const TensorDim &d) { itensor->reshape(d); } - inner_loop_size *= dim.getTensorDim(continuity[axis]); - } +void Tensor::fill(const Tensor &from, bool allocate) { + if (allocate && this->empty()) { + this->copy(from); + return; + } - /// if consecutive-one strategy has bigger chunk size, replace the - /// information - if (inner_loop_size > e.buffer_size) { - e.buffer_axis = axis; - e.buffer_size = inner_loop_size; - e.strides[3] = 0; - } + if (!from.getContiguous() || !getContiguous()) { + /// @todo enable this if needed + throw nntrainer::exception::not_supported( + "[Tensor::fill] non-contiguous tensors are not supported"); } - return e; -} - -Tensor Tensor::rotate_180(Tensor in) { - Tensor output(in.getDim()); - if (in.getDataType() == ml::train::TensorDim::DataType::FP32) { - output.setZero(); - for (unsigned int i = 0; i < in.batch(); ++i) { - for (unsigned int j = 0; j < in.channel(); ++j) { - for (unsigned int k = 0; k < in.height(); ++k) { - for (unsigned int l = 0; l < in.width(); ++l) { - output.setValue(i, j, k, l, - in.getValue(i, j, (in.height() - k - 1), - (in.width() - l - 1))); - } - } - } - } - } else if (in.getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - output.setZero(); - for (unsigned int i = 0; i < in.batch(); ++i) { - for (unsigned int j = 0; j < in.channel(); ++j) { - for (unsigned int k = 0; k < in.height(); ++k) { - for (unsigned int l = 0; l < in.width(); ++l) { - output.setValue(i, j, k, l, - in.getValue<_FP16>(i, j, (in.height() - k - 1), - (in.width() - l - 1))); - } - } - } - } -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif + if (getDim() != from.getDim()) { + throw std::invalid_argument("[Tensor::fill] dimension must be the same"); } - return output; + + if (getStrides() != from.getStrides()) { + /// @todo length does not represent buffer size, there should be way to + /// get the buffer size + throw std::invalid_argument("[Tensor::fill] buffer size must be the same"); + } + + copyData(from); } -uint8_t Tensor::encode_qint(uint8_t high, uint8_t low) const { - return (high << 4) | (low & 0x0f); +TensorDim Tensor::getDim() const { return itensor->getDim(); } + +TensorDim::TensorType Tensor::getTensorType() const { + return itensor->getTensorType(); }; -uint8_t Tensor::decode_qint(uint8_t val, bool isHigh) const { - if (isHigh) { - val = val >> 4; - } else { - val = val << 4; - val = val >> 4; - } +Initializer Tensor::getInitializer() const { return itensor->getInitializer(); } + +TensorDim::Format Tensor::getFormat() const { return itensor->getFormat(); } + +Tdatatype Tensor::getDataType() const { return itensor->getDataType(); } - return val; +void Tensor::updateBatch(unsigned int batch) { itensor->updateBatch(batch); } + +const bool Tensor::getContiguous() const noexcept { + return itensor->getContiguous(); } -std::vector Tensor::getScaleFactors() const { - return scale_factors_fp32; +const std::array +Tensor::getStrides() const noexcept { + return itensor->getStrides(); } -void Tensor::setZeroPoints(std::vector zp) { - if (zp.empty()) { - throw std::invalid_argument("Error: invalid parameter"); +bool Tensor::checkContinuous(unsigned int np1, unsigned int np2) const { + if (np1 > 3 || np2 > 3) { + throw std::invalid_argument( + "Error: Input value must be within the range of 0 to 3."); + } + + if (getFormat() == Tformat::NCHW) { + if (np1 + 1 == np2) + return true; + } else { + std::vector continuous_order_nhwc = {0, 3, 1, 2}; + if (continuous_order_nhwc[np2] == continuous_order_nhwc[np1] + 1) + return true; } - zero_points = zp; + return false; } -std::vector Tensor::getZeroPoints() const { return zero_points; } +void Tensor::setName(const std::string &name_) { itensor->setName(name_); } -void Tensor::dequantize(Tensor &output, unsigned int axis) const { - if (getDataType() == Tdatatype::FP32 || getDataType() == Tdatatype::FP16) { - throw std::invalid_argument("Error: Tensor cannot be dequantized"); - } +const std::string &Tensor::getName() const { return itensor->getName(); } - if (output.getDataType() == Tdatatype::QINT8 || - output.getDataType() == Tdatatype::QINT4) { - throw std::invalid_argument("Error: Target datatype is quantized type"); - } +size_t Tensor::getIndex(unsigned int b, unsigned int c, unsigned int h, + unsigned int w) const noexcept { + return itensor->getIndex(b, c, h, w); +} - if (getFormat() != output.getFormat()) - throw std::invalid_argument("Error: TensorType do not match"); +size_t Tensor::size() const { return itensor->size(); } - if (batch() != output.batch() || channel() != output.channel() || - width() != output.width() || height() != output.height()) - throw std::invalid_argument("Error: TensorDim do not match"); +bool Tensor::empty() const { return itensor->empty(); } - if (output.getDataType() == Tdatatype::FP32 && scale_factors_fp32.empty()) { - throw std::invalid_argument("Error: No scale factors"); - } -#ifdef ENABLE_FP16 - if (output.getDataType() == Tdatatype::FP16 && scale_factors_fp16.empty()) { - throw std::invalid_argument("Error: No scale factors"); - } -#endif - if (axis == 0 && zero_points.size() != batch()) { - throw std::invalid_argument("Error: output axis do not match "); - } +size_t Tensor::bytes() const { return itensor->bytes(); } - if (axis == 1 && zero_points.size() != channel()) { - throw std::invalid_argument("Error: output axis do not match "); - } +size_t Tensor::batch() const { return itensor->batch(); } - if (axis == 2 && zero_points.size() != height()) { - throw std::invalid_argument("Error: output axis do not match "); - } +size_t Tensor::channel() const { return itensor->channel(); } - if (axis == 3 && zero_points.size() != width()) { - throw std::invalid_argument("Error: output axis do not match "); - } +size_t Tensor::height() const { return itensor->height(); } - size_t b = (axis == 0) ? zero_points.size() : 1; - size_t c = (axis == 1) ? zero_points.size() : 1; - size_t h = (axis == 2) ? zero_points.size() : 1; - size_t w = (axis == 3) ? zero_points.size() : 1; +size_t Tensor::width() const { return itensor->width(); } - output.copyData(*this); +void Tensor::mergeAxis(unsigned int axis1, unsigned int axis2) { + NNTR_THROW_IF(!getContiguous(), std::invalid_argument) + << getName() << " is not contiguous, cannot merge axis"; - if (output.getDataType() == Tdatatype::FP16) { -#ifdef ENABLE_FP16 - std::vector<_FP16> zero_points_16(zero_points.begin(), zero_points.end()); - Tensor zero_points_fp16_tensor( - {{b, c, h, w}, {getFormat(), Tdatatype::FP16}}, zero_points_16.data()); + if (axis2 != axis1 + 1) + if (!checkContinuous(axis1, axis2)) + throw std::invalid_argument("axis2 must be axis1 + 1 for merging."); - Tensor scale_factors_fp16_tensor( - {{b, c, h, w}, {getFormat(), Tdatatype::FP16}}, - scale_factors_fp16.data()); + itensor->mergeAxis(axis1, axis2); +} - output.subtract_i(zero_points_fp16_tensor); - output.multiply_i(scale_factors_fp16_tensor); +void Tensor::createSharedDataTensor(const Tensor &src, Tensor &dest, + size_t offset) const { + itensor->createSharedDataTensor(src.itensor.get(), dest.itensor.get(), + offset); +} -#else - throw std::invalid_argument("enble-fp16 is not set"); -#endif - } else if (output.getDataType() == Tdatatype::FP32) { - std::vector zero_points_32(zero_points.begin(), zero_points.end()); - Tensor zero_points_fp32_tensor( - {{b, c, h, w}, {getFormat(), Tdatatype::FP32}}, zero_points_32.data()); - Tensor scale_factors_fp32_tensor( - {{b, c, h, w}, {getFormat(), Tdatatype::FP32}}, - scale_factors_fp32.data()); - - output.subtract_i(zero_points_fp32_tensor); - output.multiply_i(scale_factors_fp32_tensor); - } +Tensor Tensor::getSharedDataTensor(const TensorDim dim_, size_t offset, + bool reset_stride, + const std::string &name_) const { + Tensor ret = *this; + itensor->getSharedDataTensor(dim_, offset, reset_stride, name_, + ret.itensor.get()); + return ret; +} - return; +void Tensor::setTensorVar(TensorDim d, void *buf, size_t offset) { + itensor->setTensorVar(d, buf, offset); } -// namespace nntrainer +std::ostream &operator<<(std::ostream &out, Tensor const &input) { + input.print(out); + return out; +} -} /* namespace nntrainer */ +} // namespace nntrainer diff --git a/nntrainer/tensor/tensor.h b/nntrainer/tensor/tensor.h index 4904e9cfba..b5df3ab9bd 100644 --- a/nntrainer/tensor/tensor.h +++ b/nntrainer/tensor/tensor.h @@ -1,52 +1,18 @@ +// SPDX-License-Identifier: Apache-2.0 /** - * Copyright (C) 2019 Samsung Electronics Co., Ltd. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * * @file tensor.h - * @date 04 December 2019 - * @brief This is Tensor class for calculation + * @date 01 December 2023 + * @brief This is a Tensor class * @see https://github.com/nnstreamer/nntrainer * @author Jijoong Moon + * @author Donghyeon Jeong * @bug No known bugs except for NYI items - * - * @todo deprecate new tensor allocation for out of place operations. */ #ifndef __TENSOR_H__ #define __TENSOR_H__ #ifdef __cplusplus -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#ifdef DEBUG -#define EXCEPT_WHEN_DEBUG -#else -#define EXCEPT_WHEN_DEBUG noexcept -#endif - #define MAKE_SHARED_TENSOR(...) std::make_shared(__VA_ARGS__) #define CREATE_IF_EMPTY_DIMS(tensor, ...) \ @@ -55,51 +21,32 @@ tensor = Tensor(__VA_ARGS__); \ } while (0); -namespace nntrainer { +#include + +#include +#include +#include +#include + +#ifdef ENABLE_FP16 +#include +#endif -using TensorDim = ml::train::TensorDim; -using Tformat = ml::train::TensorDim::Format; -using Tdatatype = ml::train::TensorDim::DataType; -using TStorageOrder = ml::train::TensorDim::StorageOrder; +namespace nntrainer { class LazyTensor; -class SrcSharedTensor; /** - * @class Tensor Class for Calculation - * @brief Tensor Class for Calculation + * @class Tensor Class + * @brief Tensor Class */ class Tensor { public: - /** - * @brief Enumeration of Weight Initialization Type - * @todo support intialization from file - */ - enum class Initializer { - ZEROS, /** Zero initialization */ - ONES, /** One initialization */ - LECUN_NORMAL, /** LeCun normal initialization */ - LECUN_UNIFORM, /** uniform initialization */ - XAVIER_NORMAL, /** Xavier normal initialization */ - XAVIER_UNIFORM, /** Xavier uniform initialization */ - HE_NORMAL, /** He normal initialization */ - HE_UNIFORM, /** He uniform initialization */ - NONE /** No initialization */ - }; - /** * @brief Basic Constructor of Tensor */ Tensor(std::string name_ = "", Tformat fm = Tformat::NCHW, - Tdatatype d_type = Tdatatype::FP32) : - dim(TensorDim(fm, d_type)), - strides(dim.computeStrides()), - contiguous(true), - initializer(Initializer::NONE), - name(name_), - data(nullptr), - offset(0), - src_tensor() {} + Tdatatype d_type = Tdatatype::FP32); /** * @brief Constructor of Tensor with dimension, possibly lazily @@ -125,6 +72,8 @@ class Tensor { * @param[in] d1 Channel * @param[in] d2 Height * @param[in] d3 Width + * @param[in] fm Tensor Format + * @param[in] d_type Tensor Data Type */ Tensor(size_t d0, size_t d1, size_t d2, size_t d3, Tformat fm = Tformat::NCHW, Tdatatype d_type = Tdatatype::FP32) : @@ -135,6 +84,8 @@ class Tensor { * @param[in] d1 Channel * @param[in] d2 Height * @param[in] d3 Width + * @param[in] fm Tensor Format + * @param[in] d_type Tensor Data Type */ Tensor(size_t d1, size_t d2, size_t d3, Tformat fm = Tformat::NCHW, Tdatatype d_type = Tdatatype::FP32) : @@ -144,6 +95,8 @@ class Tensor { * @brief Constructor of Tensor with batch size one and d1 size one * @param[in] d2 Height (NCHW) or Width (NHWC) * @param[in] d3 Width (NCHW) or Channel (NHWC) + * @param[in] fm Tensor Format + * @param[in] d_type Tensor Data Type */ Tensor(size_t d2, size_t d3, Tformat fm = Tformat::NCHW, Tdatatype d_type = Tdatatype::FP32) : @@ -152,6 +105,8 @@ class Tensor { /** * @brief Constructor of Tensor with just Width or Channel * @param[in] d3 Width (NCHW) or Channel (NHWC) + * @param[in] fm Tensor Format + * @param[in] d_type Tensor Data Type */ explicit Tensor(size_t d3, Tformat fm = Tformat::NCHW, Tdatatype d_type = Tdatatype::FP32) : @@ -163,6 +118,7 @@ class Tensor { * @param[in] d1 Channel (NCHW) or Height (NHWC) * @param[in] d2 Height (NCHW) or Width (NHWC) * @param[in] d3 Width (NCHW) or Channel (NHWC) + * @param[in] t_type Tensor Type */ Tensor(size_t d0, size_t d1, size_t d2, size_t d3, ml::train::TensorDim::TensorType t_type) : @@ -173,6 +129,7 @@ class Tensor { * @param[in] d1 Channel * @param[in] d2 Height * @param[in] d3 Width + * @param[in] t_type Tensor Type */ Tensor(size_t d1, size_t d2, size_t d3, ml::train::TensorDim::TensorType t_type) : @@ -182,6 +139,7 @@ class Tensor { * @brief Constructor of Tensor with batch size one and d1 size one * @param[in] d2 Height (NCHW) or Width (NHWC) * @param[in] d3 Width (NCHW) or Channel (NHWC) + * @param[in] t_type Tensor Type */ Tensor(size_t d2, size_t d3, ml::train::TensorDim::TensorType t_type) : Tensor(1, (t_type.format == Tformat::NCHW) ? 1 : d3, @@ -190,6 +148,7 @@ class Tensor { /** * @brief Constructor of Tensor with just Width or Channel * @param[in] d3 Width (NCHW) or Channel (NHWC) + * @param[in] t_type Tensor Type */ explicit Tensor(size_t d3, ml::train::TensorDim::TensorType t_type) : Tensor(1, (t_type.format == Tformat::NCHW) ? 1 : d3, 1, @@ -198,62 +157,19 @@ class Tensor { /** * @brief Constructor of Tensor * @param[in] d data for the Tensor. It needs to set format properly. + * @param[in] t_type Tensor Type */ - Tensor(std::vector>>> const &d, ml::train::TensorDim::TensorType t_type) { - if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty()) { - throw std::out_of_range( - "[Tensor] trying to initialize Tensor from empty vector"); - } - // if fm == Tformat::NCHW, then dim[0] == batch , dim[1] == channel, dim[2] - // == height, dim[3] == width. and if fm == Tformat::NHWC, dim[0] == batch, - // dim[1] == height, dim[2] == width, dim[3] == channel - dim.setTensorDim(0, d.size()); - if (t_type.format == Tformat::NCHW) { - dim.setTensorDim(1, d[0].size()); - dim.setTensorDim(2, d[0][0].size()); - dim.setTensorDim(3, d[0][0][0].size()); - } else { - dim.setTensorDim(2, d[0].size()); - dim.setTensorDim(3, d[0][0].size()); - dim.setTensorDim(1, d[0][0][0].size()); - } - - setTensorType(t_type); - - strides = dim.computeStrides(); - - MemoryData *mem_data = - new MemoryData((void *)(new float[dim.getDataLen()]())); - data = std::shared_ptr(mem_data, [](MemoryData *mem_data) { - delete[] mem_data->getAddr(); - }); - offset = 0; - contiguous = true; - initializer = Initializer::NONE; - // if fm == Tformat::NCHW, then dim[0] == batch , dim[1] == channel, dim[2] - // == height, dim[3] == width. and if fm == Tformat::NHWC, dim[0] == batch, - // dim[1] == height, dim[2] == width, dim[3] == channel - if (t_type.format == Tformat::NCHW) { - for (unsigned int i = 0; i < batch(); ++i) - for (unsigned int j = 0; j < channel(); ++j) - for (unsigned int k = 0; k < height(); ++k) - for (unsigned int l = 0; l < width(); ++l) - this->setValue(i, j, k, l, d[i][j][k][l]); - } else { - for (unsigned int i = 0; i < batch(); ++i) - for (unsigned int j = 0; j < height(); ++j) - for (unsigned int k = 0; k < width(); ++k) - for (unsigned int l = 0; l < channel(); ++l) - this->setValue(i, l, j, k, d[i][j][k][l]); - } - }; + itensor = std::shared_ptr(new FloatTensor(d, t_type.format), + std::default_delete()); + } /** * @brief Constructor of Tensor * @note This constructor copies vector again. needs refactoring * @param[in] d data for the Tensor. It needs to set format properly. + * @param[in] t_type Tensor Type */ Tensor(std::vector>> const &d, ml::train::TensorDim::TensorType t_type) : @@ -263,6 +179,7 @@ class Tensor { * @brief Constructor of Tensor * @note This constructor copies vector again. needs refactoring * @param[in] d data for the Tensor with batch size one + * @param[in] t_type Tensor Type */ Tensor(std::vector> const &d, ml::train::TensorDim::TensorType t_type) : @@ -273,63 +190,19 @@ class Tensor { * @brief Constructor of Tensor * @note This constructor copies vector again. needs refactoring * @param[in] d data for the Tensor with batch size one + * @param[in] t_type Tensor Type */ Tensor(std::vector>>> const &d, ml::train::TensorDim::TensorType t_type) { - - if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty()) { - throw std::out_of_range( - "[Tensor] trying to initialize Tensor from empty vector"); - } - - dim.setTensorDim(0, d.size()); - if (t_type.format == Tformat::NCHW) { - dim.setTensorDim(1, d[0].size()); - dim.setTensorDim(2, d[0][0].size()); - dim.setTensorDim(3, d[0][0][0].size()); - } else { - dim.setTensorDim(2, d[0].size()); - dim.setTensorDim(3, d[0][0].size()); - dim.setTensorDim(1, d[0][0][0].size()); - } - - setTensorType(t_type); - - strides = dim.computeStrides(); - - MemoryData *mem_data = - new MemoryData((void *)(new _FP16[dim.getDataLen()]())); - data = std::shared_ptr(mem_data, [](MemoryData *mem_data) { - delete[] mem_data->getAddr<_FP16>(); - }); - offset = 0; - contiguous = true; - initializer = Initializer::NONE; - - setDataType(Tdatatype::FP16); - - // if fm == Tformat::NCHW, then dim[0] == batch , dim[1] == channel, dim[2] - // == height, dim[3] == width. and if fm == Tformat::NHWC, dim[0] == batch, - // dim[1] == height, dim[2] == width, dim[3] == channel - if (t_type.format == Tformat::NCHW) { - for (unsigned int i = 0; i < batch(); ++i) - for (unsigned int j = 0; j < channel(); ++j) - for (unsigned int k = 0; k < height(); ++k) - for (unsigned int l = 0; l < width(); ++l) - this->setValue(i, j, k, l, d[i][j][k][l]); - } else { - for (unsigned int i = 0; i < batch(); ++i) - for (unsigned int j = 0; j < height(); ++j) - for (unsigned int k = 0; k < width(); ++k) - for (unsigned int l = 0; l < channel(); ++l) - this->setValue(i, l, j, k, d[i][j][k][l]); - } - }; + itensor = std::shared_ptr(new HalfTensor(d, t_type.format), + std::default_delete()); + } /** * @brief Constructor of Tensor * @note This constructor copies vector again. needs refactoring * @param[in] d data for the Tensor. It needs to set format properly. + * @param[in] t_type Tensor Type */ Tensor(std::vector>> const &d, ml::train::TensorDim::TensorType t_type) : @@ -339,101 +212,23 @@ class Tensor { * @brief Constructor of Tensor * @note This constructor copies vector again. needs refactoring * @param[in] d data for the Tensor with batch size one + * @param[in] t_type Tensor Type */ Tensor(std::vector> const &d, ml::train::TensorDim::TensorType t_type) : Tensor(std::vector::type>{d}, t_type){}; - #endif /** - * @brief Constructor of Tensor - * @param[in] d data for the Tensor. It needs to set format properly. - * @param[in] t_type Tensor type. - */ - Tensor(std::vector>>> const &d, - ml::train::TensorDim::TensorType t_type) { - if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty()) { - throw std::out_of_range( - "[Tensor] trying to initialize Tensor from empty vector"); - } - - if (t_type.data_type != Tdatatype::QINT8 && - t_type.data_type != Tdatatype::QINT4) { - throw std::out_of_range( - "[Tensor] TensorType do not match with input data type"); - } - - // if fm == Tformat::NCHW, then dim[0] == batch , dim[1] == channel, dim[2] - // == height, dim[3] == width. and if fm == Tformat::NHWC, dim[0] == batch, - // dim[1] == height, dim[2] == width, dim[3] == channel - dim.setTensorDim(0, d.size()); - if (t_type.format == Tformat::NCHW) { - dim.setTensorDim(1, d[0].size()); - dim.setTensorDim(2, d[0][0].size()); - dim.setTensorDim(3, d[0][0][0].size()); - } else { - dim.setTensorDim(2, d[0].size()); - dim.setTensorDim(3, d[0][0].size()); - dim.setTensorDim(1, d[0][0][0].size()); - } - - setTensorType(t_type); - - strides = dim.computeStrides(); - - MemoryData *mem_data = - (t_type.data_type == Tdatatype::QINT8) - ? new MemoryData((void *)(new uint8_t[dim.getDataLen()]())) - : new MemoryData((void *)(new uint8_t[(dim.getDataLen() + 1) / 2]())); - data = std::shared_ptr(mem_data, [](MemoryData *mem_data) { - delete[] mem_data->getAddr(); - }); - offset = 0; - contiguous = true; - initializer = Initializer::NONE; - - // if fm == Tformat::NCHW, then dim[0] == batch , dim[1] == channel, dim[2] - // == height, dim[3] == width. and if fm == Tformat::NHWC, dim[0] == batch, - // dim[1] == height, dim[2] == width, dim[3] == channel - if (t_type.format == Tformat::NCHW) { - for (unsigned int i = 0; i < batch(); ++i) - for (unsigned int j = 0; j < channel(); ++j) - for (unsigned int k = 0; k < height(); ++k) - for (unsigned int l = 0; l < width(); ++l) - this->setValue(i, j, k, l, d[i][j][k][l]); - } else { - for (unsigned int i = 0; i < batch(); ++i) - for (unsigned int j = 0; j < height(); ++j) - for (unsigned int k = 0; k < width(); ++k) - for (unsigned int l = 0; l < channel(); ++l) - this->setValue(i, l, j, k, d[i][j][k][l]); - } - }; - - /** - * @brief Constructor of Tensor - * @note This constructor copies vector again. needs refactoring - * @param[in] d data for the Tensor. It needs to set format properly. - */ - Tensor(std::vector>> const &d, - ml::train::TensorDim::TensorType t_type) : - Tensor(std::vector::type>{d}, t_type){}; - - /** - * @brief Constructor of Tensor - * @note This constructor copies vector again. needs refactoring - * @param[in] d data for the Tensor with batch size one + * @brief Basic Destructor */ - Tensor(std::vector> const &d, - ml::train::TensorDim::TensorType t_type) : - Tensor(std::vector::type>{d}, t_type){}; + ~Tensor() = default; /** * @brief Copy constructor of Tensor. * @param[in] Tensor & */ - Tensor(const Tensor &rhs) = default; + Tensor(const Tensor &rhs); /** * @brief Move constructor of Tensor. @@ -445,7 +240,7 @@ class Tensor { * @brief Copy assignment operator. * @param[in] rhs Tensor to be copied. */ - Tensor &operator=(const Tensor &rhs) = default; + Tensor &operator=(const Tensor &rhs); /** * @brief Move assignment operator. @@ -453,16 +248,28 @@ class Tensor { */ Tensor &operator=(Tensor &&rhs) noexcept = default; + /** + * @brief Comparison operator overload + * @param[in] rhs Tensor to be compared with + */ + bool operator==(const Tensor &rhs) const; + + /** + * @brief Comparison operator overload + * @param[in] rhs Tensor to be compared with + */ + bool operator!=(const Tensor &rhs) const { return !(*this == rhs); } + /** * @brief Construct a new Tensor object from a buffer * This will not copy buffer to a new tensor but directly uses it * - * @param buf buffer - * @param bytes buffer size in bytes - * @param d tensor dim - * @param offset offset to be used from current - * @return Tensor object - * @throws std::invalid_argument if buf is null + * @param[in] buf buffer + * @param[in] bytes buffer size in bytes + * @param[in] d tensor dim + * @param[in] offset offset to be used from current + * @return Tensor object + * @throws std::invalid_argument if buf is null */ template static Tensor Map(T *buf, unsigned int bytes, const TensorDim &d, @@ -477,117 +284,92 @@ class Tensor { "Creating shared tensor of size bigger than tensor memory."); } - Tensor tmp; - tmp.dim = d; - tmp.strides = d.computeStrides(); - /// Tensor does not own the memory - tmp.data = std::shared_ptr(new MemoryData((void *)buf), - std::default_delete()); - tmp.offset = offset; - - return tmp; + Tensor output("", d.getFormat(), d.getDataType()); + output.setTensorVar(d, buf, offset); + return output; }; - friend void swap(Tensor &lhs, Tensor &rhs) noexcept { - std::swap(lhs.dim, rhs.dim); - std::swap(lhs.strides, rhs.strides); - std::swap(lhs.contiguous, rhs.contiguous); - std::swap(lhs.initializer, rhs.initializer); - std::swap(lhs.data, rhs.data); - std::swap(lhs.name, rhs.name); - } - /** - * @brief Comparison operator overload - * @param[in] rhs Tensor to be compared with + * @brief Allocate memory for this tensor */ - bool operator==(const Tensor &rhs) const; + void allocate(); /** - * @brief Comparison operator overload - * @param[in] rhs Tensor to be compared with + * @brief Deallocate memory for this tensor + * @note This will not necessary free the memory as tensors share memory */ - bool operator!=(const Tensor &rhs) const { return !(*this == rhs); } + void deallocate(); /** - * @brief Allocate memory for this tensor + * @brief Check if the tensor has memory allocated/assigned/associated */ - void allocate(); + bool isAllocated(); /** - * @brief Deallocate memory for this tensor - * @note This will not necessary free the memory as tensors share memory + * @brief return Data pointer of Tensor + * @retval template T pointer */ - void deallocate() { - data = nullptr; - offset = 0; + template T *getData() const { + return (T *)itensor->getData(); } /** - * @brief Check if the tensor has memory allocated/assigned/associated + * @brief return Data pointer of Tensor + * @retval template T pointer */ - bool isAllocated() const { return data != nullptr; } + template T *getData(size_t idx) const { + return (T *)itensor->getData(idx); + } /** - * @brief return value at specific location - * @param[in] batch batch location - * @param[in] c channel location - * @param[in] h height location - * @param[in] w width location + * @brief i data index + * @retval template T pointer (address of ith data) */ - template - const T &getValue(unsigned int batch, unsigned int c, unsigned int h, - unsigned int w) const noexcept { - return getValue(getIndex(batch, c, h, w)); + template T *getAddress(unsigned int i) { + return (T *)itensor->getAddress(i); } - template - T &getValue(unsigned int batch, unsigned int c, unsigned int h, - unsigned int w) noexcept { - return getValue(getIndex(batch, c, h, w)); + /** + * @brief i data index + * @retval template T pointer (address of ith data) + */ + template const T *getAddress(unsigned int i) const { + return (T *)itensor->getAddress(i); } /** - * @brief return value at specific location - * @param[in] idx location + * @brief get address of n-d data */ template - const T &getValue(unsigned int idx) const noexcept { - if (getDataType() == Tdatatype::QINT4) { - return getData()[idx / 2]; - } - return getData()[idx]; + T *getAddress(unsigned int b, unsigned int c, unsigned int h, + unsigned int w) { + return getAddress(getIndex(b, c, h, w)); } /** - * @brief return value at specific location - * @param[in] idx location + * @brief get address of n-d data */ - template T &getValue(unsigned int idx) noexcept { - if (getDataType() == Tdatatype::QINT4) { - return getData()[idx / 2]; - } - return getData()[idx]; + template + const T *getAddress(unsigned int b, unsigned int c, unsigned int h, + unsigned int w) const { + return getAddress(getIndex(b, c, h, w)); } /** * @brief return value at specific location * @param[in] idx location - * @retval qint4 value in location */ - uint8_t getValueQint4(unsigned int idx) const noexcept { - uint8_t value = getData()[idx / 2]; - return decode_qint(value, (idx % 2 == 0)); + template + const T &getValue(unsigned int idx) const noexcept { + return getData()[idx]; } /** * @brief return value at specific location * @param[in] idx location - * @retval qint4 value in location */ - uint8_t getValueQint4(unsigned int idx) noexcept { - uint8_t value = getData()[idx / 2]; - return decode_qint(value, (idx % 2 == 0)); + template T &getValue(unsigned int idx) noexcept { + return getData()[idx]; } /** @@ -596,13 +378,11 @@ class Tensor { * @param[in] c channel location * @param[in] h height location * @param[in] w width location - * @retval qint4 value in location */ - uint8_t getValueQint4(unsigned int b, unsigned int c, unsigned int h, - unsigned int w) const noexcept { - size_t idx = getIndex(b, c, h, w); - uint8_t value = getData()[idx / 2]; - return decode_qint(value, (idx % 2 == 0)); + template + const T &getValue(unsigned int b, unsigned int c, unsigned int h, + unsigned int w) const noexcept { + return getValue(getIndex(b, c, h, w)); } /** @@ -611,184 +391,242 @@ class Tensor { * @param[in] c channel location * @param[in] h height location * @param[in] w width location - * @retval qint4 value in location */ - uint8_t getValueQint4(unsigned int b, unsigned int c, unsigned int h, - unsigned int w) noexcept { - size_t idx = getIndex(b, c, h, w); - uint8_t value = getData()[idx / 2]; - return decode_qint(value, (idx % 2 == 0)); + template + T &getValue(unsigned int b, unsigned int c, unsigned int h, + unsigned int w) noexcept { + return getValue(getIndex(b, c, h, w)); } /** - * @brief Get the Value thinking that it is padded - * for example, for the tensor (virtually padded) below, - * getValue(0, 0, 2, 2, 1, 1, .0f) will return 5 - * padding available for height and width axis for now - * 0 0 0 0 0 - * 0 1 2 3 0 - * 0 4 5 6 0 - * 0 7 8 9 0 - * 0 0 0 0 0 - * @param b batch index - * @param c channel index - * @param h height index - * @param w width index - * @param ph padding height - * @param pw padding width - * @return float value + * @brief Fill the Tensor elements with value + * @param[in] value value to be stored */ - template - const T getValuePaddedVirtual(unsigned int b, unsigned int c, unsigned int h, - unsigned int w, unsigned int ph, - unsigned int pw, - T pad_value = 0) const EXCEPT_WHEN_DEBUG { -#if DEBUG - unsigned int padded_h = 2 * ph + h; - unsigned int padded_w = 2 * pw + w; - if (h > padded_h && w > padded_w) { - throw std::out_of_range( - "[Tensor::getValuePadded] trying to access out of range"); - } -#endif - - if (ph <= h && h < ph + height() && pw <= w && w < pw + width()) { - return getValue(b, c, h - ph, w - pw); - } - - return pad_value; - } + void setValue(float value); /** - * @brief Multiply value element by element immediately - * @param[in] value multiplier - * @retval #ML_ERROR_INVALID_PARAMETER Tensor dimension is not right - * @retval #ML_ERROR_NONE Successful + * @brief Set the element value + * @param[in] b batch location + * @param[in] c channel location + * @param[in] h height location + * @param[in] w width location + * @param[in] value value to be stored */ - int multiply_i(float const &value); + void setValue(unsigned int b, unsigned int c, unsigned int h, unsigned int w, + float value); /** - * @brief Multiply value element by element - * @param[in] value multiplier - * @retval Calculated Tensor + * @brief Set the element value + * @param[in] offset offset from start location + * @param[in] value value to be stored + * + * @todo This is a temporary workout. Remove this */ - Tensor multiply(float const &value) const; + void setValueInt(unsigned int offset, int value) noexcept { + int *data_int = (int *)getData(); + data_int[offset] = value; + } /** - * @brief multiply value element by element - * @param[in] value multiplier - * @param[out] out out tensor to store the result - * @retval Calculated Tensor + * @brief add the element value to the location + * @param[in] b batch location + * @param[in] c channel location + * @param[in] h height location + * @param[in] w width location + * @param[in] value value to be stored + * @param[in] beta scalar to multiply output with and add */ - Tensor &multiply(float const &value, Tensor &out) const; + void addValue(unsigned int b, unsigned int c, unsigned int h, unsigned int w, + float value, float beta) noexcept; /** - * @brief Multiply Tensor Elementwise - * @param[in] m Tensor to be multiplied - * @param[in] beta scalar to multiply output with and add - * @retval #ML_ERROR_NONE successful + * @brief Fill the Tensor elements with zero */ - int multiply_i(Tensor const &m, const float beta = 0.0); + void setZero(); /** - * @brief Multiply Tensor Element by Element ( Not the MxM ) - * @param[in] m Tensor to be multiplied - * @param[in] beta scalar to multiply output with and add - * @retval Calculated Tensor + * @brief Set the tensor with random normal distribution + * @param[in] mean mean of the distribution + * @param[in] std standard deviation of the distribution */ - Tensor multiply(Tensor const &m, const float beta = 0.0) const; + void setRandNormal(float mean = 0.0f, float stddev = 0.05f); /** - * @brief Multiply Tensor Element by Element ( Not the MxM ) - * @param[in] m Tensor to be multiplied - * @param[out] output Tensor to store the result - * @param[in] beta scalar to multiply output with and add - * @retval Calculated Tensor + * @brief Set the tensor with random uniform distribution + * @param[in] min minimum value for the distribution + * @param[in] max maximum value for the distribution */ - Tensor &multiply(Tensor const &m, Tensor &output, - const float beta = 0.0) const; + void setRandUniform(float min = -0.05f, float max = 0.05f); /** - * @brief Multiply Tensor Elementwise - * @param[in] m Tensor to be multiplied - * @param[in] beta scalar to multiply output with and add - * @retval #ML_ERROR_NONE successful - * - * @note support different strided inputs and output - * @note does not support broadcasting - * - * @todo merge this to multiply_i + * @brief Set the tensor with random bernoulli distribution + * @param[in] probability probability value for the distribution */ - int multiply_i_strided(Tensor const &m, const float beta = 0.0); + void setRandBernoulli(float probability = 0.5f); /** - * @brief Multiply Tensor Element by Element ( Not the MxM ) - * @param[in] m Tensor to be multiplied - * @param[in] beta scalar to multiply output with and add - * @retval Calculated Tensor - * - * @note support different strided inputs and output - * @note does not support broadcasting - * - * @todo merge this to multiply + * @brief Initialize the memory of the given tensor */ - Tensor multiply_strided(Tensor const &m, const float beta = 0.0) const; + void initialize(); /** - * @brief Multiply Tensor Element by Element ( Not the MxM ) - * @param[in] m Tensor to be multiplied - * @param[out] output Tensor to store the result - * @param[in] beta scalar to multiply output with and add - * @retval Calculated Tensor - * - * @note support different strided inputs and output - * @note does not support broadcasting - * - * @todo merge this to multiply + * @brief Initialize the memory of the given tensor + * @param init Initiailizer to use for the initialization */ - Tensor &multiply_strided(Tensor const &m, Tensor &output, - const float beta = 0.0) const; + void initialize(Initializer init); /** - * @brief Add Tensor Elementwise - * @param[in] m Tensor to be added - * @param[in] beta scalar to add output with and add - * @retval #ML_ERROR_NONE successful - * - * @note support different strided inputs and output + * @brief Apply instantly to the element + * @param[in] *function function pointer applied + * @return int ML_ERROR_NONE if successful + */ + template int apply_i(std::function f) { + Tensor result = *this; + apply(f, result); + + return ML_ERROR_NONE; + }; + + /** + * @brief Apply function element by element + * @param[in] *function function pointer applied + * @retval Tensor + */ + template Tensor apply(std::function f) const { + Tensor result; + apply(f, result); + + return result; + }; + + /** + * @brief Apply function element by element + * @param[in] *function function pointer applied + * @param[out] output output tensor + * @retval Tensor + */ + template + Tensor &apply(std::function f, Tensor &output) const { + CREATE_IF_EMPTY_DIMS(output, {itensor->getFormat(), itensor->getDataType()}, + nullptr); + + if (itensor->getFormat() != output.itensor->getFormat() || + itensor->getDataType() != itensor->getDataType()) { + /// @todo add unittest + throw std::invalid_argument( + "[Tensor::apply] output dimension does not match"); + } + + itensor->apply(f, output); + + return output; + } + + /** + * @brief Apply function to Tensor + * @param[in] *function function pointer applied + * @retval Tensor + */ + Tensor apply(std::function f) const; + + /** + * @brief Apply function to Tensor + * @param[in] *function function pointer applied + * @param[out] output output tensor + * @retval Tensor + */ + Tensor &apply(std::function f, + Tensor &output) const; + + /** + * @brief Multiply Tensor Elementwise + * @param[in] m Tensor to be multiplied + * @param[in] beta scalar to multiply output with and add + * @retval #ML_ERROR_NONE successful + * + * @note support different strided inputs and output * @note does not support broadcasting * - * @todo merge this to add_i + * @todo merge this to multiply_i */ - int add_i_strided(Tensor const &m, const float beta = 0.0); + int multiply_i_strided(Tensor const &m, const float beta = 0.0); /** - * @brief Add Tensor Element by Element - * @param[in] m Tensor to be added - * @param[in] beta Value to be scale the added tensor + * @brief Multiply Tensor Element by Element ( Not the MxM ) + * @param[in] m Tensor to be multiplied + * @param[in] beta scalar to multiply output with and add * @retval Calculated Tensor * * @note support different strided inputs and output * @note does not support broadcasting * - * @todo merge this to add + * @todo merge this to multiply */ - Tensor add_strided(Tensor const &m, const float beta = 0.0) const; + Tensor multiply_strided(Tensor const &m, const float beta = 0.0) const; /** - * @brief Add Tensor Element by Element - * @param[in] m Tensor to be added + * @brief Multiply Tensor Element by Element ( Not the MxM ) + * @param[in] m Tensor to be multiplied * @param[out] output Tensor to store the result - * @param[in] beta Value to be scale the added tensor + * @param[in] beta scalar to multiply output with and add * @retval Calculated Tensor * * @note support different strided inputs and output * @note does not support broadcasting * - * @todo merge this to add + * @todo merge this to multiply */ - Tensor &add_strided(Tensor const &m, Tensor &output, - const float beta = 0.0) const; + Tensor &multiply_strided(Tensor const &m, Tensor &output, + const float beta = 0.0) const; + + /** + * @brief Multiply value element by element immediately + * @param[in] value multiplier + * @retval #ML_ERROR_INVALID_PARAMETER Tensor dimension is not right + * @retval #ML_ERROR_NONE Successful + */ + int multiply_i(float const &value); + + /** + * @brief Multiply value element by element + * @param[in] value multiplier + * @retval Calculated Tensor + */ + Tensor multiply(float const &value) const; + + /** + * @brief multiply value element by element + * @param[in] value multiplier + * @param[out] out out tensor to store the result + * @retval Calculated Tensor + */ + Tensor &multiply(float const &value, Tensor &out) const; + + /** + * @brief Multiply Tensor Elementwise + * @param[in] m Tensor to be multiplied + * @param[in] beta scalar to multiply output with and add + * @retval #ML_ERROR_NONE successful + */ + int multiply_i(Tensor const &m, const float beta = 0.0); + + /** + * @brief Multiply Tensor Element by Element ( Not the MxM ) + * @param[in] m Tensor to be multiplied + * @param[in] beta scalar to multiply output with and add + * @retval Calculated Tensor + */ + Tensor multiply(Tensor const &m, const float beta = 0.0) const; + + /** + * @brief Multiply Tensor Element by Element ( Not the MxM ) + * @param[in] m Tensor to be multiplied + * @param[out] output Tensor to store the result + * @param[in] beta scalar to multiply output with and add + * @retval Calculated Tensor + */ + Tensor &multiply(Tensor const &m, Tensor &output, + const float beta = 0.0) const; /** * @brief Divide value element by element immediately @@ -808,10 +646,10 @@ class Tensor { /** * @brief Divide value element by element * @param[in] value Divisor - * @param[out] out out parameter to store the result + * @param[out] output Tensor to store the result * @retval Calculated Tensor */ - Tensor ÷(float const &value, Tensor &out) const; + Tensor ÷(float const &value, Tensor &output) const; /** * @brief divide Tensor Elementwise @@ -836,10 +674,51 @@ class Tensor { Tensor ÷(Tensor const &m, Tensor &output) const; /** - * @brief Add Tensor Element immediately to target tensor without mem copy + * @brief Add Tensor Elementwise + * @param[in] input Tensor to be added + * @param[in] beta scalar to add output with and add + * @retval #ML_ERROR_NONE successful + * + * @note support different strided inputs and output + * @note does not support broadcasting + * + * @todo merge this to add_i + */ + int add_i_strided(Tensor const &input, const float beta = 0.0); + + /** + * @brief Add Tensor Element by Element + * @param[in] input Tensor to be added + * @param[in] beta Value to be scale the input tensor + * @retval Calculated Tensor + * + * @note support different strided inputs and output + * @note does not support broadcasting + * + * @todo merge this to add + */ + Tensor add_strided(Tensor const &input, const float beta = 0.0) const; + + /** + * @brief Add Tensor Element by Element + * @param[in] input Tensor to be added + * @param[out] output Tensor to store the result + * @param[in] beta Value to be scale the input tensor + * @retval Calculated Tensor + * + * @note support different strided inputs and output + * @note does not support broadcasting + * + * @todo merge this to add + */ + Tensor &add_strided(Tensor const &input, Tensor &output, + const float beta = 0.0) const; + + /** + * @brief Add Tensor Element immediately to target tensor without mem copy * @param[in] value value to be added - * @retval #ML_ERROR_NONE Successful - * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter + * @retval #ML_ERROR_NONE Successful + * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter */ int add_i(float const &value); @@ -851,35 +730,35 @@ class Tensor { Tensor add(float const &value) const; /** - * @brief Add Tensor Element by Element - * @param[in] value value to be added - * @param[out] out Tensor to save output without allocating new memory - * @retval Calculated Tensor + * @brief Add Tensor Element by Element + * @param[in] value value to be added + * @param[out] output Tensor to save output without allocating new memory + * @retval Calculated Tensor */ - Tensor &add(float const &value, Tensor &out) const; + Tensor &add(float const &value, Tensor &output) const; /** - * @brief Add Tensor Element by Element without mem copy + * @brief Add Tensor Element by Element without mem copy * @param[in] m Tensor to be added - * @param[out] alpha Values to be scaled - * @retval #ML_ERROR_NONE Successful - * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter + * @param[in] alpha Values to be scaled + * @retval #ML_ERROR_NONE Successful + * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter */ int add_i(Tensor const &m, float const alpha = 1.F); -/** - * @brief Do add_i for specific section - * - * @param len Length of the specific section - * @param addr_idx Starting index of the psecific section - * @param m Input Tensor to be added - * @param incX Incremental index of X - * @param incY Incremental index of Y - * @param alphas Vector of multiple alpha values - * @param alpha_idx Index of alpha in alpha vector - * @retval #ML_ERROR_NONE Successful - * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter - */ + /** + * @brief Do add_i for specific section + * + * @param len Length of the specific section + * @param addr_idx Starting index of the psecific section + * @param m Input Tensor to be added + * @param incX Incremental index of X + * @param incY Incremental index of Y + * @param alphas Vector of multiple alpha values + * @param alpha_idx Index of alpha in alpha vector + * @retval #ML_ERROR_NONE Successful + * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter + */ int add_i_partial(unsigned int len, unsigned int addr_idx, Tensor &m, unsigned int incX, unsigned int incY, const Tensor alphas, unsigned int alpha_idx); @@ -887,23 +766,24 @@ class Tensor { /** * @brief Add Tensor Element by Element * @param[in] m Tensor to be added + * @param[in] alpha Values to be scaled * @retval Calculated Tensor */ Tensor add(Tensor const &m, float const alpha = 1) const; /** - * @brief Add Tensor Element by Element - * @param[in] m Tensor to be added - * @param[out] m Tensor to be out - * @retval Calculated Tensor + * @brief Add Tensor Element by Element + * @param[in] m Tensor to be added + * @param[out] output Tensor to be out + * @param[in] alpha Values to be scaled + * @retval Calculated Tensor */ - Tensor &add(Tensor const &m, Tensor &out, float const alpha = 1) const; + Tensor &add(Tensor const &m, Tensor &output, float const alpha = 1) const; /** * @brief memcpyless version of subtract - * @param[in] value value to subtract - * @retval #ML_ERROR_NONE Successful - * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter + * @retval #ML_ERROR_NONE Successful + * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter */ int subtract_i(float const &value); @@ -915,18 +795,18 @@ class Tensor { Tensor subtract(float const &value) const; /** - * @brief Subtract Tensor Element by Element - * @param[in] value value to be added - * @param[out] out Tensor to save output without allocating new memory - * @retval Calculated Tensor + * @brief Subtract Tensor Element by Element + * @param[in] value value to be added + * @param[out] output Tensor to save output without allocating new memory + * @retval Calculated Tensor */ - Tensor &subtract(float const &value, Tensor &out) const; + Tensor &subtract(float const &value, Tensor &output) const; /** * @brief memcpyless version of subtract * @param[in] m Tensor to be subtracted - * @retval #ML_ERROR_NONE Successful - * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter + * @retval #ML_ERROR_NONE Successful + * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter */ int subtract_i(Tensor const &m); @@ -938,308 +818,165 @@ class Tensor { Tensor subtract(Tensor const &m) const; /** - * @brief Subtract Tensor Element by Element - * @param[in] m Tensor to be added - * @param[out] m Tensor to be out - * @retval Calculated Tensor + * @brief Subtract Tensor Element by Element + * @param[in] m Tensor to be added + * @param[out] output Tensor to be out + * @retval Calculated Tensor */ - Tensor &subtract(Tensor const &m, Tensor &out) const; + Tensor &subtract(Tensor const &m, Tensor &output) const; /** - * @brief Tensor power elementwise - * - * @param exponent exponent - * @return int ML_ERROR_NONE if successful + * @brief sum all the Tensor elements according to the batch + * @retval Calculated Tensor(batch, 1, 1, 1) */ - int pow_i(float exponent); + Tensor sum_by_batch() const; /** - * @brief Tensor power Element by Element - * @param[in] exponent exponent - * @retval Calculated Tensor + * @brief sum all the Tensor elements according to the axis + * 0 : batch direction + * 1 : channel direction + * 2 : height direction + * 3 : width direction + * @param[in] axis Axis to calculate sum along + * @param[in] alpha Scale the sum by this value + * @retval Calculated Tensor */ - Tensor pow(float exponent) const; + Tensor sum(unsigned int axis, float alpha = 1.0) const; /** - * @brief Tensor power Element by Element - * @param[in] exponent exponent - * @param[out] out out to store the result - * @retval Calculated Tensor + * @brief sum all the Tensor elements according to the axis + * 0 : batch direction + * 1 : channel direction + * 2 : height direction + * 3 : width direction + * @param[in] axis Axis to calculate sum along + * @param[out] output output tensor + * @param[in] alpha Scale the sum by this value + * @retval Calculated Tensor */ - Tensor &pow(float exponent, Tensor &out) const; + Tensor &sum(unsigned int axis, Tensor &output, float alpha = 1.0, + float beta = 0.0) const; /** - * @brief gaussian error function - * @return int ML_ERROR_NONE if successful + * @brief sum all the Tensor by multiple axes + * + * @param axes axes to sum along + * @param alpha Scale the sum by this value + * @return Tensor */ - int erf_i(); + Tensor sum(const std::vector &axes, float alpha = 1.0) const; /** - * @brief gaussian error function - * @retval Calculated Tensor + * @brief sum all the Tensor by multiple axes + * + * @param axes axes to sum along + * @param[out] output output tensor + * @param alpha Scale the sum by this value + * @return Tensor */ - Tensor erf() const; + Tensor &sum(const std::vector &axes, Tensor &output, + float alpha = 1.0) const; /** - * @brief gaussian error function - * @param[out] out out to store the result - * @retval Calculated Tensor + * @brief Averaging the Tensor elements according to the axis + * 0 : batch direction + * 1 : channel direction + * 2 : height direction + * 3 : width direction + * @retval Calculated Tensor */ - Tensor &erf(Tensor &out) const; + Tensor average(unsigned int axis) const; /** - * @brief sin transform function - * @param[out] out out to store the result + * @brief Averaging the Tensor elements according to the axis + * @retval Calculated Tensor */ - void sin(Tensor &out, float alpha = 1.0); + Tensor &average(unsigned int axis, Tensor &output) const; /** - * @brief cos transform function - * @param[out] out out to store the result + * @brief Average all the Tensor by multiple axes + * @param[in] axes axes to sum along + * @retval Calculated Tensor */ - void cos(Tensor &out, float alpha = 1.0); + Tensor average(const std::vector &axes) const; /** - * @brief inverse squared root function - * + * @brief Average all the Tensor by multiple axes + * @param[in] axes axes to sum along + * @param[out] output output tensor + * @retval Calculated Tensor */ - void inv_sqrt_i(); + Tensor &average(const std::vector &axes, Tensor &output) const; /** - * @brief getter of size of data - * @retval size of data + * @brief Average the Tensor elements by all axis + * @retval Calculated Tensor */ - unsigned int sizeofData() { return dim.getDataTypeSize(); } - - /** - * @brief Dot Product of Tensor ( equal MxM ) - * @details This applies dot of the last dimension of this and second-last - * dimension of passed tensor m. - * @param[in] m Tensor - * @param[in] trans Transpose - * @param[in] trans_m Transpose m - * @retval Calculated Tensor - */ - Tensor dot(Tensor const &m, bool trans = false, bool trans_m = false) const; + Tensor average() const; /** - * @brief Dot Product of Tensor ( equal MxM ) - * @details This applies dot of the last dimension of this and second-last - * dimension of passed tensor m. - * @param[in] m Tensor - * @param[in] output output Tensor - * @param[in] trans Transpose - * @param[in] trans_m Transpose m - * @param[in] beta beta + * @brief Averaging the Tensor elements by all axis * @retval Calculated Tensor */ - Tensor &dot(Tensor const &m, Tensor &output, bool trans = false, - bool trans_m = false, float beta = 0.0f) const; - - /** - * @brief compute the derivative of this in the current tensor - * @param m same as given to the dot() - * @param output_deriv the derivative of the output - * @param[in] trans same as given to the dot() - * @param[in] trans_m same as given to the dot() - * @param[in] beta same as given to the dot() - * @note This will compute the derivative in-place and will overwrite existing - * data in the tensor - */ - Tensor &dot_deriv_wrt_1(Tensor const &m, Tensor const &output_deriv, - bool trans = false, bool trans_m = false, - float beta = 0.0f); - - /** - * @brief compute the derivative wrt m in the m tensor - * @param m_deriv tensor where derivative wrt m will be stored - * @param output_deriv the derivative of the output - * @param[in] trans same as given to the dot() - * @param[in] trans_m same as given to the dot() - * @param[in] beta same as given to the dot() - * @note The caller tensor must be the same tensor as the one which called the - * dot() product. - */ - Tensor &dot_deriv_wrt_2(Tensor &m_deriv, Tensor const &output_deriv, - bool trans = false, bool trans_m = false, - float beta = 0.0f) const; - - /** - * @copydoc Tensor::dot(Tensor const &m, Tensor &output, bool trans, - bool trans_m, float beta) const - * @details performs dot operation over a batch of inputs - */ - Tensor &dotBatched(Tensor const &m, Tensor &result, bool trans = false, - bool trans_m = false, float beta = 0.0f) const; - - /** - * @copydoc Tensor::dot_deriv_wrt_1(Tensor const &m, Tensor const - &output_deriv, bool trans, bool trans_m, float beta) - */ - Tensor &dot_batched_deriv_wrt_1(Tensor const &m, Tensor const &output_deriv, - bool trans = false, bool trans_m = false, - float beta = 0.0f); - - /** - * @brief Tensor::dot_deriv_wrt_2(Tensor const &m_deriv, Tensor const - &output_deriv, bool trans, bool trans_m, float beta) const - */ - Tensor &dot_batched_deriv_wrt_2(Tensor &m_deriv, Tensor const &output_deriv, - bool trans = false, bool trans_m = false, - float beta = 0.0f) const; - - /** - * @brief Transpose Tensor - * - * @param direction to transpose ex) 0:2:1 - * @return Tensor - */ - Tensor transpose(const std::string &direction) const; - - /** - * @brief Transpose Tensor - * @param direction to transpose ex) 0:2:1 - * @param[out] Tensor to save to, dimension is always reshaped. - * @retval Tensor& reference to the out - */ - Tensor &transpose(const std::string &direction, Tensor &out) const; - - /** - * @brief Calculate Drop Out Mask : x * 1.0/(1.0-rate) - * @param dropout drop out rate - * @retval Tensor& reference of drop out mask - */ - Tensor dropout_mask(float dropout) const; - - /** - * @brief Calculate Drop Out Mask : x * 1.0/(1.0-rate) inplace - * @param dropout drop out rate - */ - void dropout_mask(float dropout); - - /** - * @brief Calculate filter mask - * @param mask_len length of each mask along the last axis - * @param invert invert the mask - */ - void filter_mask(const Tensor &mask_len, bool reverse = false); - - /** - * @brief Calculate 2 Zone Out Mask - * @details Calculate zone out mask according to the bernoulli distribution. - * Zone out mask with rate @a zoneout for inplace and the other zone out mask - * with rate @a (1-zoneout). - * @param zoneout zone out rate - * @retval Tensor zone out mask for opposite tensor - */ - Tensor zoneout_mask(float zoneout); - - /** - * @brief Calculate 2 Zone Out Mask - * @details Calculate zone out mask according to the bernoulli distribution. - * Zone out mask with rate @a zoneout for inplace and the other zone out mask - * with rate @a (1-zoneout). - * @param opposite opposite zone out mask - * @param zoneout zone out rate - */ - void zoneout_mask(Tensor &opposite, float zoneout); - - /** - * @brief sum all the Tensor elements according to the batch - * @retval Calculated Tensor(batch, 1, 1, 1) - */ - Tensor sum_by_batch() const; + Tensor &average(Tensor &output) const; /** - * @brief sum all the Tensor elements according to the axis - * 0 : batch direction - * 1 : channel direction - * 2 : height direction - * 3 : width direction - * @param[in] axis Axis to calculate sum along - * @param[in] alpha Scale the sum by this value - * @retval Calculated Tensor + * @brief Tensor power element without mem copy + * @param[in] exponent exponent + * @retval #ML_ERROR_NONE Successful */ - Tensor sum(unsigned int axis, float alpha = 1.0) const; + int pow_i(float exponent); /** - * @brief sum all the Tensor elements according to the axis - * 0 : batch direction - * 1 : channel direction - * 2 : height direction - * 3 : width direction - * @param[in] axis Axis to calculate sum along - * @param[out] output output tensor - * @param[in] alpha Scale the sum by this value + * @brief Tensor power element by element + * @param[in] exponent exponent * @retval Calculated Tensor */ - Tensor &sum(unsigned int axis, Tensor &output, float alpha = 1.0, - float beta = 0.0) const; + Tensor pow(float exponent) const; /** - * @brief sum all the Tensor by multiple axes - * - * @param axes axes to sum along - * @param alpha Scale the sum by this value - * @return Tensor + * @brief Tensor power element by element + * @param[in] exponent exponent + * @param[out] output out to store the result + * @retval Calculated Tensor */ - Tensor sum(const std::vector &axes, float alpha = 1.0) const; + Tensor &pow(float exponent, Tensor &output) const; /** - * @brief sum all the Tensor by multiple axes - * - * @param axes axes to sum along - * @param[out] output output tensor - * @param alpha Scale the sum by this value - * @return Tensor + * @brief Gauss error function + * @retval #ML_ERROR_NONE Successful */ - Tensor &sum(const std::vector &axes, Tensor &output, - float alpha = 1.0) const; + int erf_i(); /** - * @brief Averaging the Tensor elements according to the axis - * 0 : batch direction - * 1 : channel direction - * 2 : height direction - * 3 : width direction + * @brief Gauss error function * @retval Calculated Tensor */ - Tensor average(unsigned int axis) const; - /** - * @brief Averaging the Tensor elements according to the axis - * - * @retval Calculated Tensor - */ - Tensor &average(unsigned int axis, Tensor &output) const; + Tensor erf() const; /** - * @brief average all the Tensor by multiple axes - * - * @param axes axes to sum along - * @return Tensor + * @brief Gauss error function + * @param[out] output out to store the result + * @retval Calculated Tensor */ - Tensor average(const std::vector &axes) const; + Tensor &erf(Tensor &output) const; /** - * @brief average all the Tensor by multiple axes - * - * @param axes axes to sum along - * @param output output tensor - * @return Tensor + * @brief sin transform function + * @param[out] out out to store the result */ - Tensor &average(const std::vector &axes, Tensor &output) const; + void sin(Tensor &out, float alpha = 1.0); /** - * @brief Averaging the Tensor elements by all axis - * @retval Calculated Tensor + * @brief cos transform function + * @param[out] out out to store the result */ - Tensor average() const; + void cos(Tensor &out, float alpha = 1.0); /** - * @brief Averaging the Tensor elements by all axis - * @retval Calculated Tensor + * @brief inverse squared root function */ - Tensor &average(Tensor &output) const; + void inv_sqrt_i(); /** * @brief Anchor a starting point to defer following evaluation @@ -1247,12 +984,6 @@ class Tensor { */ LazyTensor chain() const; - /** - * @brief Softmax the Tensor elements - * @retval Calculated Tensor - */ - Tensor softmax() const; - /** * @brief l2norm the Tensor elements * @retval Calculated l2norm @@ -1284,371 +1015,126 @@ class Tensor { void standardization_i(); /** - * @brief i data index - * @retval address of ith data - */ - template T *getAddress(unsigned int i) { - size_t index = getIndex(batch(), channel(), height(), width()); - if (i > index) { - return nullptr; - } - if (getDataType() == Tdatatype::QINT4) - return &getData()[i / 2]; - return &getData()[i]; - } - - /** - * @brief i data index - * @retval address of ith data - */ - template const T *getAddress(unsigned int i) const { - size_t index = getIndex(batch(), channel(), height(), width()); - if (i > index) { - return nullptr; - } - - if (getDataType() == Tdatatype::QINT4) - return &getData()[i / 2]; - return &getData()[i]; - } - - /** - * @brief get address of n-d data - */ - template - T *getAddress(unsigned int b, unsigned int c, unsigned int h, - unsigned int w) { - return getAddress(getIndex(b, c, h, w)); - } - - /** - * @brief get address of n-d data - */ - template - const T *getAddress(unsigned int b, unsigned int c, unsigned int h, - unsigned int w) const { - return getAddress(getIndex(b, c, h, w)); - } - - /** - * @brief Apply instantly to the element - * - * @param f function to apply - * @return int ML_ERROR_NONE if successful - */ - template int apply_i(std::function f) { - Tensor result = *this; - apply(f, result); - - return ML_ERROR_NONE; - }; - - /** - * @brief Apply function element by element - * @param[in] *function function pointer applied - * @param[out] output output tensor - * @retval Tensor - */ - template - Tensor &apply(std::function f, Tensor &output) const { - CREATE_IF_EMPTY_DIMS(output, dim, nullptr); - - if (dim != output.dim) { - /// @todo add unittest - throw std::invalid_argument( - "[Tensor::apply] output dimension does not match"); - } - - if (contiguous && output.contiguous) { - const T *data = (getData()); - T *rdata = (output.getData()); - - std::transform(data, data + size(), rdata, f); - } else if (strides[3] == 1 && output.strides[3] == 1) { - /** @todo optimize this with combining these loops where stride is 1 */ - for (unsigned int b = 0; b < batch(); ++b) { - for (unsigned int c = 0; c < channel(); ++c) { - for (unsigned int h = 0; h < height(); ++h) { - T *out_data = output.getAddress(b, c, h, 0); - const T *in_data = getAddress(b, c, h, 0); - std::transform(in_data, in_data + width(), out_data, f); - } - } - } - } else { - for (unsigned int b = 0; b < batch(); ++b) { - for (unsigned int c = 0; c < channel(); ++c) { - for (unsigned int h = 0; h < height(); ++h) { - for (unsigned int w = 0; w < width(); ++w) { - output.setValue(b, c, h, w, f(getValue(b, c, h, w))); - } - } - } - } - } - - return output; - }; - - /** - * @brief Apply function element by element - * @param[in] *function function pointer applied - * @retval Tensor - */ - template Tensor apply(std::function f) const { - Tensor result; - apply(f, result); - - return result; - }; - - /** - * @brief Apply function to Tensor - * @param[in] *function function pointer applied - * @retval Tensor - */ - Tensor apply(std::function f) const; - - /** - * @brief Apply function to Tensor - * @param[in] *function function pointer applied - * @param[out] output output tensor - * @retval Tensor - */ - Tensor &apply(std::function f, - Tensor &output) const; - - /** - * @brief Print element - * @param[in] out out stream - * @retval Tensor - */ - void print(std::ostream &out) const; - - /** - * @brief Print element - * @param[in] out out stream - * @param[in] opt print formatting option. opt=0 would pretty print the data, - * else it would print the raw data. - * @retval Tensor - */ - void print_(std::ostream &out, uint opt = 0) const; - - /** - * @brief Get size of current tensor - * @retval unsigned int size of the current tensor - */ - size_t size() const { return dim.getDataLen(); } - - /** - * @brief Get if the tensor is empty - * @retval true if the tensor is empty - */ - bool empty() const { return size() == 0; } - - /** - * @brief Get size of the data in bytes - * @retval size_t Size in bytes - */ - size_t bytes() const { - if (getDataType() == Tdatatype::QINT4) { - return (size() * dim.getDataTypeSize() + 1) / 2; - } - return size() * dim.getDataTypeSize(); - } - - /** - * @brief Set the element value - * @param[in] batch batch location - * @param[in] c channel location - * @param[in] h height location - * @param[in] w width location - * @param[in] value value to be stored - */ - void setValue(unsigned int batch, unsigned int c, unsigned int h, - unsigned int w, float value) noexcept { - if (getDataType() == Tdatatype::FP32) { - getData()[getIndex(batch, c, h, w)] = value; - } else if (getDataType() == Tdatatype::FP16) { -#ifdef ENABLE_FP16 - getData<_FP16>()[getIndex(batch, c, h, w)] = static_cast<_FP16>(value); -#else - ml_loge("%s", "Error: enable-fp16 is not enabled"); -#endif - } else if (getDataType() == Tdatatype::QINT8) { - getData()[getIndex(batch, c, h, w)] = value; - } else if (getDataType() == Tdatatype::QINT4) { - int idx = getIndex(batch, c, h, w); - - if (idx % 2 == 0) { - getData()[idx / 2] = - encode_qint(value, getData()[idx / 2]); - } else { - getData()[idx / 2] = - encode_qint(getData()[idx / 2] >> 4, value); - } - } - } - - /** - * @brief add the element value to the location - * @param[in] batch batch location - * @param[in] c channel location - * @param[in] h height location - * @param[in] w width location - * @param[in] value value to be stored - * @param[in] beta scalar to multiply output with and add - */ - void addValue(unsigned int batch, unsigned int c, unsigned int h, - unsigned int w, float value, float beta) noexcept { - auto const &idx = getIndex(batch, c, h, w); - if (dim.getDataType() == Tdatatype::FP32) { - getData()[idx] *= beta; - getData()[idx] += value; - } else if (dim.getDataType() == Tdatatype::FP16) { -#ifdef ENABLE_FP16 - getData<_FP16>()[idx] *= static_cast<_FP16>(beta); - getData<_FP16>()[idx] += static_cast<_FP16>(value); -#else - ml_loge("%s", "Error: enable-fp16 is not enabled"); -#endif - } else if (getDataType() == Tdatatype::QINT8) { - getData()[idx] *= beta; - getData()[idx] += value; - } - } - - /** - * @brief Set the element value - * @param[in] offset offset from start location - * @param[in] value value to be stored - * - * @todo This is a temporary workout. Remove this once multiple datatypes - * are supported. - */ - void setValueInt(unsigned int offset, int value) noexcept { - int *data_int = (int *)getData(); - data_int[offset] = value; - } - - /** - * @brief Fill the Tensor elements with value - * @param[in] value value to be stored - */ - void setValue(float value); - - /** - * @brief Fill the Tensor elements with zero + * @brief Dot Product of Tensor ( equal MxM ) + * @details This applies dot of the last dimension of this and second-last + * dimension of passed input tensor. + * @param[in] input Tensor + * @param[in] trans Transpose + * @param[in] trans_in Transpose input + * @retval Calculated Tensor */ - void setZero(); + Tensor dot(Tensor const &input, bool trans = false, + bool trans_in = false) const; /** - * @brief Set the Dist object - * - * @tparam T distrubution engine - * @param dist distribution engine + * @brief Dot Product of Tensor ( equal MxM ) + * @details This applies dot of the last dimension of this and + * second-last dimension of passed input tensor. + * @param[in] input Tensor + * @param[in] output output Tensor + * @param[in] trans Transpose + * @param[in] trans_in Transpose input + * @param[in] beta beta + * @retval Calculated Tensor */ - template void setDist(Engine dist) { - NNTR_THROW_IF(!contiguous, std::invalid_argument) - << getName() << " Tensor is not contiguous, cannot set distribution"; - - T *data_ = getData(); - unsigned int len = size(); - for (unsigned int i = 0; i < len; ++i) { - data_[i] = (T)dist(rng); - } - }; + Tensor &dot(Tensor const &input, Tensor &output, bool trans = false, + bool trans_in = false, float beta = 0.0f) const; /** - * @brief Set the tensor with random normal distribution - * @param[in] mean mean of the distribution - * @param[in] std standard deviation of the distribution + * @brief compute the derivative of this in the current tensor + * @param input same as given to the dot() + * @param output_deriv the derivative of the output + * @param[in] trans same as given to the dot() + * @param[in] trans_in same as given to the dot() + * @param[in] beta same as given to the dot() + * @note This will compute the derivative in-place and will overwrite + existing + * data in the tensor */ - void setRandNormal(float mean = 0.0f, float std = 0.05f); + Tensor &dot_deriv_wrt_1(Tensor const &input, Tensor const &output_deriv, + bool trans = false, bool trans_in = false, + float beta = 0.0f); /** - * @brief Set the tensor with random uniform distribution - * @param[in] min minimum value for the distribution - * @param[in] max maximum value for the distribution + * @brief compute the derivative wrt m in the input tensor + * @param input_deriv tensor where derivative wrt m will be stored + * @param output_deriv the derivative of the output + * @param[in] trans same as given to the dot() + * @param[in] trans_in same as given to the dot() + * @param[in] beta same as given to the dot() + * @note The caller tensor must be the same tensor as the one which called + the dot() product. */ - void setRandUniform(float min = -0.05f, float max = 0.05f); + Tensor &dot_deriv_wrt_2(Tensor &input_deriv, Tensor const &output_deriv, + bool trans = false, bool trans_in = false, + float beta = 0.0f) const; /** - * @brief Set the tensor with random bernoulli distribution - * @param[in] probability probability value for the distribution + * @copydoc Tensor::dot(Tensor const &input, Tensor &output, bool trans, + bool trans_in, float beta) const + * @details performs dot operation over a batch of inputs */ - void setRandBernoulli(float probability = 0.5f); + Tensor &dotBatched(Tensor const &input, Tensor &result, bool trans = false, + bool trans_in = false, float beta = 0.0f) const; /** - * @brief Initialize the memory of the given tensor + * @copydoc Tensor::dot_deriv_wrt_1(Tensor const &input, Tensor const + &output_deriv, bool trans, bool trans_in, float beta) */ - void initialize(); + Tensor &dot_batched_deriv_wrt_1(Tensor const &input, + Tensor const &output_deriv, + bool trans = false, bool trans_in = false, + float beta = 0.0f); /** - * @brief Initialize the memory of the given tensor - * @param init Initiailizer to use for the initialization + * @brief Tensor::dot_deriv_wrt_2(Tensor const &input_deriv, Tensor const + &output_deriv, bool trans, bool trans_in, float beta) const */ - void initialize(Initializer init) { - initializer = init; - initialize(); - } + Tensor &dot_batched_deriv_wrt_2(Tensor &input_deriv, + Tensor const &output_deriv, + bool trans = false, bool trans_in = false, + float beta = 0.0f) const; /** - * @brief set the memory format - * @param fm format of Tensor + * @brief Calculate Drop Out Mask : x * 1.0/(1.0-rate) + * @param dropout drop out rate + * @retval Tensor& reference of drop out mask */ - void convertFormat(TensorDim::Format fm) { - if (getFormat() != fm) { - transpose("2:1:0"); - } - - dim.setFormat(fm); - } + Tensor dropout_mask(float dropout) const; /** - * @brief Copy the Tensor - * @param[in] from Tensor to be copied - * - * @note copy can reshape the tensor to match the shape + * @brief Calculate Drop Out Mask : x * 1.0/(1.0-rate) inplace + * @param dropout drop out rate */ - void copy(const Tensor &from); + void dropout_mask(float dropout); /** - * @brief Copy the Tensor - * @param[in] from Tensor to be copied + * @brief Calculate filter mask + * @param mask_len length of each mask along the last axis + * @param invert invert the mask */ - void copyData(const Tensor &from); + void filter_mask(const Tensor &mask_len, bool reverse = false); /** - * @brief Copy the Tensor - * @param[in] from Tensor to be copied + * @brief Calculate 2 Zone Out Mask + * @details Calculate zone out mask according to the bernoulli distribution. + * Zone out mask with rate @a zoneout for inplace and the other zone out mask + * with rate @a (1-zoneout). + * @param zoneout zone out rate + * @retval Tensor zone out mask for opposite tensor */ - void copy_with_stride(const Tensor &from); + Tensor zoneout_mask(float zoneout); /** - * @brief Get slice of the tensor, sliced by batch - * @param[in] offset offset in batch to start the slice - * @param[in] size size of the slice - * @retval slice of this tensor - * @note This function provides a slice of this tensor, and does not create a - * copy + * @brief Calculate 2 Zone Out Mask + * @details Calculate zone out mask according to the bernoulli distribution. + * Zone out mask with rate @a zoneout for inplace and the other zone out mask + * with rate @a (1-zoneout). + * @param opposite opposite zone out mask + * @param zoneout zone out rate */ - Tensor getBatchSlice(size_t offset, unsigned int size) const; + void zoneout_mask(Tensor &opposite, float zoneout); - /** - * @brief Get new tensor which shares memory with current tensor but different - * shape - * - * @param dim new dimension to be set for this tensor - * @param offset offset to be used from the start of the data in elements - * @note The new tensor will share the same data as the current tensor but - * can have different size. - * @note New size added with offset must be less than the size of the original - * tensor. - */ - Tensor getSharedDataTensor(const TensorDim dim, size_t offset, - bool reset_stride = true, - const std::string &name_ = "") const; /** * @brief split tensor along axis. * @@ -1664,221 +1150,151 @@ class Tensor { * @param sizes sizes * @param axis axis * @return Tensor splitted tensor - * @note if the given array sizes is just a 1 unsigned int value, assumes that - * it divide tensor by given size evenly - */ - std::vector split(std::vector sizes, int axis = 0); - - /** - * @brief concatenate tensors along axis - * - * @param tensors tensors to be concatenated to the first tensor - * @param axis axis - * @return Tensor concatenated tensor - */ - static Tensor cat(const std::vector &tensors, int axis = 0); - - /** - * @brief make this tensor share memory with given tensor - * - * @param src Source tensor whose memory is to be shared - * @param offset offset to be used from the start of the data in bytes - * @note This tensor will share the same data as the current tensor but - * can have different size. - * @note This tensor's size added with offset must be less than the size of - * the source tensor. - * @note The stride of the source tensor and this tensor must be same. - */ - void makeSharedDataTensor(const Tensor &src, size_t offset = 0); - - /** - * @brief Convient wrapper for inplace copy of @a this. - * @retval Copied version of this - */ - Tensor clone() const; - - /** - * @brief Save the Tensor into file - * @param[in] file output file stream - */ - void save(std::ostream &file); - - /** - * @brief Read the Tensor from file - * @param[in] file input file stream - * @param[in] s_type scale factor data type + * @note if the given array sizes is just a 1 unsigned int value, assumes that + * it divide tensor by given size evenly */ - void read(std::ifstream &file, Tdatatype s_type = Tdatatype::FP32); + std::vector split(std::vector sizes, int axis = 0); /** - * @brief return argument index which value is max by batch - * @retval unsigned int argument index + * @brief concatenate tensors along axis + * + * @param tensors tensors to be concatenated to the first tensor + * @param axis axis + * @return Tensor concatenated tensor */ - std::vector argmax() const; + Tensor concat(const std::vector &tensors, int axis = 0); /** - * @brief return max of the absolute values of the tensor - * @retval maximum absolute value + * @brief concatenate tensors along axis + * + * @param tensors tensors to be concatenated to the first tensor + * @param axis axis + * @return Tensor concatenated tensor */ - float max_abs() const; + static Tensor cat(const std::vector &tensors, int axis = 0); /** - * @brief return a copy of the Tensor Dim - * @retval TensorDim + * @brief Print element + * @param[in] out out stream */ - TensorDim getDim() const { return TensorDim(dim); } + void print(std::ostream &out) const; /** - * @brief return Tensor Dim for a given axis - * @retval dimension + * @brief put data of Tensor + * @note It is only effective when memory_swap is used */ - size_t getTensorDim(unsigned int axis); + void putData() const; /** - * @brief return Tensor Type + * @brief Set the memory buffer for the tensor + * + * @param buf the memory buffer + * @param init intialize the buffer */ - TensorDim::TensorType getTensorType() const { return dim.getTensorType(); }; + void setData(const std::shared_ptr buf, size_t off = 0, + bool init = false); /** - * @brief return Tensor batch size - * @retval batch size + * @brief return Data pointer of Tensor + * @retval template T pointer (float pointer as default) */ - size_t batch() const { return dim.batch(); } + const std::shared_ptr getMemoryData() const; /** - * @brief return Tensor batch size - * @retval batch size + * @brief return offset */ - size_t channel() const { return dim.channel(); } + size_t getOffset() const; /** - * @brief return Tensor height size - * @retval height size + * @brief Copy the Tensor + * @param[in] from Tensor to be copied + * + * @note copy can reshape the tensor to match the shape + * @note support copying data from multiple data type */ - size_t height() const { return dim.height(); } + void copy(const Tensor &from); /** - * @brief return Tensor batch size - * @retval width size + * @brief Copy the Tensor + * @param[in] from Tensor to be copied + * @note support copying data from multiple data type */ - size_t width() const { return dim.width(); } + void copyData(const Tensor &from); /** - * @brief return Tensor Data Type Size - * @retval data type size + * @brief Copy the Tensor + * @param[in] from Tensor to be copied + * @note only support copying data from tensor with the same data type */ - uint getDataTypeSize() const { return dim.getDataTypeSize(); } + void copy_with_stride(const Tensor &from); /** - * @brief update batch size for this tensor - * @param batch size - * @note The batchsize of src_tensor need not be related with this - * tensor's batch size - * - * @note The memory for this tensor will re-allocated/re-assigned if the - * updated batch size is different than the current batch size. - * - * @note If this tensor is/was the src_tensor for some other, then - * reduction in batch size can make the dependent tensors allocate fail due to - * memory smaller. Caller must handle this in their own end. - * - * @note If this tensor is re-allocated, then the memory might not be - * immediately freed as the tensor already depending on this tensor also - * share the same memory. So, the peak memory consumption in worst case can - * reach the total memory requirements of a model with old batchsize and the - * new batch size. It is recommended to first deallocate all the tensors, - * updateBatch and then allocate again to avoid such issues. + * @brief Get slice of the tensor, sliced by batch + * @param[in] offset offset in batch to start the slice + * @param[in] size size of the slice + * @retval slice of this tensor + * @note This function provides a slice of this tensor, and does not create a + * copy */ - void updateBatch(unsigned int batch) { - if (dim.batch() == batch) { - return; - } - - if (isAllocated()) - throw std::invalid_argument( - "Cannot update batch for an allocated tensor"); - dim.batch(batch); - } + Tensor getBatchSlice(size_t offset, unsigned int size) const; /** - * @brief return Data pointer of Tensor - * @retval template T pointer (float pointer as default) + * @brief Convient wrapper for inplace copy of @a this. + * @retval Copied version of this */ - template T *getData() { - if (!data) - return nullptr; - - data->validate(); - return data->getAddr() + offset; - } + Tensor clone() const; /** - * @brief return Data pointer of Tensor - * @retval template T pointer (float pointer as default) + * @brief Save the Tensor into file + * @param[in] file output file stream */ - template const T *getData() const { - if (!data) - return nullptr; - - data->validate(); - return data->getAddr() + offset; - } + void save(std::ostream &file); /** - * @brief return Data pointer of Tensor - * @retval template T pointer (float pointer as default) + * @brief Read the Tensor from file + * @param[in] file input file stream */ - template T *getData(size_t idx) const { - if (!data) - return nullptr; - - size_t index = idx; - - data->validate(); - return data->getAddr() + offset + index; - } + void read(std::ifstream &file); /** - * @brief setter data type - * @param[in] Data Type + * @brief return argument index which value is max by batch + * @retval unsigned int argument indices */ - void setDataType(Tdatatype d_type) { dim.setDataType(d_type); } + std::vector argmax() const; /** - * @brief setter tensor type - * @param[in] tensor Type + * @brief return max of the absolute values of the tensor + * @retval maximum absolute value */ - void setTensorType(ml::train::TensorDim::TensorType t_type) { - dim.setTensorType(t_type); - } + float max_abs() const; /** - * @brief put data of Tensor - * - * @note It is only effective when memory_swap is used + * @brief return maximum value + * @retval Maximum value of the tensor data */ - void putData() const { - if (!data) - return; - - data->invalidate(); - } + float maxValue() const; /** - * @brief return Data pointer of Tensor - * @retval template T pointer (float pointer as default) + * @brief return minimum value + * @retval Minimum value of the tensor data */ - const std::shared_ptr getMemoryData() const { return data; } + float minValue() const; /** - * @brief return offset + * @brief Transpose Tensor + * @param direction to transpose ex) 0:2:1 + * @return Tensor */ - size_t getOffset() const { return offset; } + Tensor transpose(const std::string &direction) const; /** - * @brief i data index - * @retval address of ith data + * @brief Transpose Tensor + * @param direction to transpose ex) 0:2:1 + * @param[out] Tensor to save to, dimension is always reshaped. + * @retval Tensor& reference to the out */ + Tensor &transpose(const std::string &direction, Tensor &out) const; + /** * @brief set Tensor Dim * @param[in] d TensorDim @@ -1898,263 +1314,144 @@ class Tensor { void fill(const Tensor &from, bool allocate = false); /** - * @brief return current stride of tensor. - * @retval int[MAXDIM] strides - */ - const std::array getStrides() const noexcept { - return strides; - } - /** - * @brief Get linear index given the n-d index - */ - inline size_t getIndex(unsigned int b, unsigned int c, unsigned int h, - unsigned int w) const noexcept { - if (getFormat() == Tformat::NCHW) { - if (dim.getStorageOrder() == TStorageOrder::ROW_MAJOR) { - return (b * strides[0] + c * strides[1] + h * strides[2] + - w * strides[3]); - } else { - return b * dim[1] * dim[2] * dim[3] + c * dim[2] * dim[3] + h + - w * dim[2]; - } - - } else { - return (b * strides[0] + h * strides[1] + w * strides[2] + - c * strides[3]); - } - } - - /** - * @brief Check if two given axes are contiguous - */ - bool checkContinuous(unsigned int n, unsigned int np1) const { - std::vector continuous_order_nhwc = {0, 3, 1, 2}; - bool continuous = false; - if (getFormat() == Tformat::NHWC) { - if (continuous_order_nhwc[np1] == continuous_order_nhwc[n] + 1) - continuous = true; - } else { - if (n + 1 == np1) - continuous = true; - } - return continuous; - } - - /** - * @brief Get name of the tensor - * - * @return name of the tensor - */ - void setName(const std::string &name_) { name = name_; } - - /** - * @brief Get name of the tensor - * - * @return name of the tensor + * @brief return a copy of the Tensor Dim + * @retval TensorDim */ - const std::string &getName() const { return name; } + TensorDim getDim() const; /** - * @brief Set the memory buffer for the tensor - * - * @param buf the memory buffer - * @param init intialize the buffer + * @brief return Tensor Type */ - void setData(const std::shared_ptr buf, size_t off = 0, - bool init = false) { - if (buf) { - data = buf; - offset = off; - if (init) - initialize(); - } else { - data = nullptr; - offset = 0; - } - } + TensorDim::TensorType getTensorType() const; /** * @brief Get initializer for the tensor * * @return initializer of the tensor */ - Tensor::Initializer getInitializer() const { return initializer; } + Initializer getInitializer() const; /** * @brief Get format for the tensor - * * @return format of the tensor */ - TensorDim::Format getFormat() const { return dim.getFormat(); } + TensorDim::Format getFormat() const; /** * @brief Get data type for the tensor * * @return data type of the tensor */ - Tdatatype getDataType() const { return dim.getDataType(); } + Tdatatype getDataType() const; /** - * @brief Set fp32 scale factors of the tensor - * @param[in] scales fp32 scale factors + * @brief update batch size for this tensor + * @param batch size + * @note The batchsize of src_tensor need not be related with this + * tensor's batch size + * + * @note The memory for this tensor will re-allocated/re-assigned if the + * updated batch size is different than the current batch size. + * + * @note If this tensor is/was the src_tensor for some other, then + * reduction in batch size can make the dependent tensors allocate fail due to + * memory smaller. Caller must handle this in their own end. + * + * @note If this tensor is re-allocated, then the memory might not be + * immediately freed as the tensor already depending on this tensor also + * share the same memory. So, the peak memory consumption in worst case can + * reach the total memory requirements of a model with old batchsize and the + * new batch size. It is recommended to first deallocate all the tensors, + * updateBatch and then allocate again to avoid such issues. */ - void setScaleFactors(std::vector scales) { - if (scales.empty()) { - throw std::invalid_argument("Error: invalid parameter"); - } - - scale_factors_fp32 = scales; - } + void updateBatch(unsigned int batch); /** - * @brief Get scale factors of the tensor - * - * @return scale factors of the tensor + * @brief return whether tensor is contiguous or not. + * @retval bool contiguous */ - std::vector getScaleFactors() const; + const bool getContiguous() const noexcept; /** - * @brief Set output axis of the tensor - * @param[in] zp zero points + * @brief return current stride of tensor. + * @retval int[MAXDIM] strides */ - void setZeroPoints(std::vector zp); + const std::array getStrides() const noexcept; -#ifdef ENABLE_FP16 /** - * @brief Set fp16 scale factors of the tensor - * @param[in] scales fp16 scale factors + * @brief Check if two given axes are contiguous + * @param[in] np1 first axis + * @param[in] np2 second axis to compare with first axis + * @retval bool continuous */ - void setScaleFactorsFP16(std::vector<_FP16> scales) { - if (scales.empty()) { - throw std::invalid_argument("Error: invalid parameter"); - } - - scale_factors_fp16 = scales; - } -#endif + bool checkContinuous(unsigned int np1, unsigned int np2) const; /** - * @brief Get zero points of the tensor - * - * @return zero points of the tensor + * @brief Set name of the tensor + * @param[in] name_ tensor name */ - std::vector getZeroPoints() const; + void setName(const std::string &name_); /** - * @brief Dequantize Tensor to output tensor datatype - * @param[out] output Tensor to store the result + * @brief Get name of the tensor + * @retval string name */ - void dequantize(Tensor &output, unsigned int axis) const; - - static constexpr float epsilon = 1e-5; - -private: - /**< handle the data as a std::shared_ptr type */ - TensorDim dim; - std::array strides; - bool contiguous; - Tensor::Initializer initializer; - std::string name; /**< name of the tensor */ - std::shared_ptr data; - size_t offset; - std::vector scale_factors_fp32; -#ifdef ENABLE_FP16 - std::vector<_FP16> scale_factors_fp16; -#endif - std::vector zero_points; + const std::string &getName() const; - /**< - * When using shared_data with tensor, this stores the ptr of the source - * tensor which handles the full memory. If tensor data is already allocated, - * this does not affect the tensor. If the tensor data is not allocated, and - * src_ptr is valid, this tensor will use the memory allocated by the src_ptr + /** + * @brief Get linear index given the n-d index + */ + size_t getIndex(unsigned int b, unsigned int c, unsigned int h, + unsigned int w) const noexcept; + /** + * @brief Get size of current tensor + * @retval unsigned int size of the current tensor */ - std::shared_ptr src_tensor; + size_t size() const; - struct BroadcastInfo; + /** + * @brief Get if the tensor is empty + * @retval true if the tensor is empty + */ + bool empty() const; /** - * @brief Applies the given operator to the tensor with the passed argument - * @param[in] m Tensor - * @param[in] v_func vectorized function to apply - * @param e broadcast info. - * @param cur_axis current axis. pass default when calling outside. - * @param offset offset for this. pass default when calling outside. - * @param m_offset offset for m. pass default when calling outside. - * @retval #ML_ERROR_NONE Successful - * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter + * @brief Get size of the data in bytes + * @retval size_t Size in bytes */ - void - apply_broadcast_util(Tensor const &m, - std::function - v_func, - Tensor &output, const BroadcastInfo &e, - int cur_axis = -1, size_t offset = 0, - size_t m_offset = 0) const; + size_t bytes() const; /** - * @brief Applies the given operator to the tensor with the passed argument - * - * @param[in] m Tensor - * @param[in] v_func vectorized function to apply - * @retval #ML_ERROR_NONE Successful - * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter + * @brief return Tensor batch size + * @retval batch size */ - void apply_broadcast(Tensor const &m, - std::function - v_func, - Tensor &output) const; -#ifdef ENABLE_FP16 + size_t batch() const; + /** - * @brief Applies the given operator to the tensor with the passed argument - * @param[in] m Tensor - * @param[in] v_func vectorized function to apply - * @param e broadcast info. - * @param cur_axis current axis. pass default when calling outside. - * @param offset offset for this. pass default when calling outside. - * @param m_offset offset for m. pass default when calling outside. - * @retval #ML_ERROR_NONE Successful - * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter + * @brief return Tensor channel size + * @retval channel size */ - void - apply_broadcast_util(Tensor const &m, - std::function - v_func, - Tensor &output, const BroadcastInfo &e, - int cur_axis = -1, size_t offset = 0, - size_t m_offset = 0) const; + size_t channel() const; + /** - * @brief Applies the given operator to the tensor with the passed argument - * - * @param[in] m Tensor - * @param[in] v_func vectorized function to apply - * @retval #ML_ERROR_NONE Successful - * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter + * @brief return Tensor height size + * @retval height size */ - void apply_broadcast(Tensor const &m, - std::function - v_func, - Tensor &output) const; -#endif + size_t height() const; + /** - * @brief compute Loop info for broadcasting and vectorization - * - * @param m target tensor to be calculated against. - * @return BroadcastInfo Loopinfo needed to run external loop + * @brief return Tensor width size + * @retval width size */ - BroadcastInfo computeBroadcastInfo(const Tensor &m) const; + size_t width() const; /** - * @brief copy a buffer to @a this, the caller has to ensure that @a this is - * initialized otherwise undefined behavior + * @brief Merge the given two axis for tensor at second axis inplace * - * @param buf buffer to copy from + * @param axis1 first axis to merge + * @param axis2 second axis to merge */ - void copy(const void *buf); + void mergeAxis(unsigned int axis1, unsigned int axis2); /** * @brief Update destination tensor to share memory with source tensor @@ -2167,57 +1464,52 @@ class Tensor { * @note New size added with offset must be less than the size of the original * tensor. */ - static void createSharedDataTensor(const Tensor &src, Tensor &dest, - size_t offset); - - /** - * @brief Reallocate memory for this tensor - * @note This will not necessary free the memory as tensors share memory - * @note This can increase the peak memory consumption when callled on all - * the tensors of a model sequentially. It is advised to first deallocate all - * the tensors and then allocate, than reallocate tensors one by one. - */ - void reallocate() { - deallocate(); - allocate(); - } + void createSharedDataTensor(const Tensor &src, Tensor &dest, + size_t offset) const; /** - * @brief Merge the given two axis for tensor at second axis inplace + * @brief Get new tensor which shares memory with current tensor but different + * shape * - * @param axis1 first axis to merge - * @param axis2 second axis to merge + * @param dim new dimension to be set for this tensor + * @param offset offset to be used from the start of the data in elements + * @note The new tensor will share the same data as the current tensor but + * can have different size. + * @note New size added with offset must be less than the size of the original + * tensor. */ - void mergeAxis(unsigned int axis1, unsigned int axis2); + Tensor getSharedDataTensor(const TensorDim dim_, size_t offset, + bool reset_stride = true, + const std::string &name_ = "") const; /** - * @brief rotate 180 dgree - * @param[in] in input Tensor - * @retVal Tensor rotated tensor (180 degree) + * @brief Swaps Tensor lhs and rhs + * @param[in] lhs Tensor to be swapped + * @param[in] rhs Tensor to be swapped */ - Tensor rotate_180(Tensor in); + friend void swap(Tensor &lhs, Tensor &rhs) noexcept { + std::swap(lhs.itensor, rhs.itensor); + } - /** - * @brief Encode two int4 values to one int8 value - * @param[in] high value for first 4 bits - * @param[in] low value for last 4 bits - * @retval Encoded value - */ - uint8_t encode_qint(uint8_t high, uint8_t low) const; + static constexpr float epsilon = 1e-5; + +private: + std::shared_ptr itensor; /** - * @brief Decode int8 value to a int4 value - * @param[in] idx index to retrieve value - * @retval Decoded value + * @brief Set tensor variables + * + * @param[in] d TensorDim + * @param[in] buf buffer + * @param[in] offset offset to be used */ - uint8_t decode_qint(uint8_t val, bool isHigh) const; - -}; // namespace nntrainer + void setTensorVar(TensorDim d, void *buf, size_t offset); +}; /** * @brief Overriding output stream */ -std::ostream &operator<<(std::ostream &out, Tensor const &m); +std::ostream &operator<<(std::ostream &out, Tensor const &input); typedef std::shared_ptr sharedTensor; @@ -2227,7 +1519,7 @@ typedef std::vector sharedConstTensors; typedef std::vector sharedTensors; -} /* namespace nntrainer */ +} // namespace nntrainer #endif /* __cplusplus */ #endif /* __TENSOR_H__ */ diff --git a/nntrainer/tensor/tensor_base.cpp b/nntrainer/tensor/tensor_base.cpp index b2bcfd444e..ed34654d04 100644 --- a/nntrainer/tensor/tensor_base.cpp +++ b/nntrainer/tensor/tensor_base.cpp @@ -9,8 +9,8 @@ * @bug No known bugs except for NYI items */ +#include #include -#include namespace nntrainer { @@ -176,14 +176,14 @@ void TensorBase::getSharedDataTensor(const TensorDim dim_, size_t offset, createSharedDataTensor(this, ret, offset); } -TensorBase::BroadcastInfoV2 -TensorBase::computeBroadcastInfo(const TensorV2 &m) const { +TensorBase::BroadcastInfo +TensorBase::computeBroadcastInfo(const Tensor &m) const { if (m.size() > this->size()) throw exception::not_supported("broadcasting *this is not supported"); const TensorDim m_dim = m.getDim(); - BroadcastInfoV2 e; + BroadcastInfo e; e.tensor_type = getTensorType(); uint continuity[4] = {0, 1, 2, 3}; @@ -255,7 +255,7 @@ TensorBase::computeBroadcastInfo(const TensorV2 &m) const { } void TensorBase::calculateFlattenDot( - TensorV2 const &input, TensorV2 &output, bool trans, bool trans_in, + Tensor const &input, Tensor &output, bool trans, bool trans_in, unsigned int &first_three_flat, unsigned int &last_axis, unsigned int &input_first_three_flat, unsigned int &input_last_axis, unsigned int &M, unsigned int &N, unsigned int &K, unsigned int &lda, @@ -285,11 +285,11 @@ void TensorBase::calculateFlattenDot( N = input_last_axis; M = first_three_flat; if (getFormat() == Tformat::NHWC) { - CREATE_V2_IF_EMPTY_DIMS(output, batch(), N, height(), width(), - getTensorType()); // NHWC Result Tensor + CREATE_IF_EMPTY_DIMS(output, batch(), N, height(), width(), + getTensorType()); // NHWC Result Tensor } else { - CREATE_V2_IF_EMPTY_DIMS(output, batch(), channel(), height(), N, - getTensorType()); + CREATE_IF_EMPTY_DIMS(output, batch(), channel(), height(), N, + getTensorType()); } // We are not set zero the output because of performance reason. @@ -305,11 +305,11 @@ void TensorBase::calculateFlattenDot( N = input_first_three_flat; M = first_three_flat; if (getFormat() == Tformat::NHWC) { - CREATE_V2_IF_EMPTY_DIMS(output, batch(), N, height(), width(), - getTensorType()); + CREATE_IF_EMPTY_DIMS(output, batch(), N, height(), width(), + getTensorType()); } else { - CREATE_V2_IF_EMPTY_DIMS(output, batch(), channel(), height(), N, - getTensorType()); + CREATE_IF_EMPTY_DIMS(output, batch(), channel(), height(), N, + getTensorType()); } } else if (trans && !trans_in) { if (first_three_flat != input_first_three_flat) @@ -319,9 +319,9 @@ void TensorBase::calculateFlattenDot( N = input_last_axis; M = last_axis; if (getFormat() == Tformat::NHWC) { - CREATE_V2_IF_EMPTY_DIMS(output, 1, N, M, 1, getTensorType()); + CREATE_IF_EMPTY_DIMS(output, 1, N, M, 1, getTensorType()); } else { - CREATE_V2_IF_EMPTY_DIMS(output, 1, 1, M, N, getTensorType()); + CREATE_IF_EMPTY_DIMS(output, 1, 1, M, N, getTensorType()); } } else { if (first_three_flat != input_last_axis) @@ -331,9 +331,9 @@ void TensorBase::calculateFlattenDot( N = input_first_three_flat; M = last_axis; if (getFormat() == Tformat::NHWC) { - CREATE_V2_IF_EMPTY_DIMS(output, 1, N, M, 1, getTensorType()); + CREATE_IF_EMPTY_DIMS(output, 1, N, M, 1, getTensorType()); } else { - CREATE_V2_IF_EMPTY_DIMS(output, 1, 1, M, N, getTensorType()); + CREATE_IF_EMPTY_DIMS(output, 1, 1, M, N, getTensorType()); } } diff --git a/nntrainer/tensor/tensor_base.h b/nntrainer/tensor/tensor_base.h index 5a18a7a1e7..c3b4bfb875 100644 --- a/nntrainer/tensor/tensor_base.h +++ b/nntrainer/tensor/tensor_base.h @@ -72,7 +72,7 @@ enum class Initializer { NONE /** No initialization */ }; -class TensorV2; +class Tensor; class SrcSharedTensorBase; /** @@ -114,6 +114,21 @@ class TensorBase { TensorBase(const TensorDim &d, const void *buf = nullptr) : TensorBase(d, true) {} + /** + * @brief Copy constructor of TensorBase. + * @param[in] Tensor & + */ + TensorBase(const TensorBase &rhs) { + dim = rhs.dim; + strides = rhs.strides; + contiguous = rhs.contiguous; + initializer = rhs.initializer; + name = rhs.name; + data = rhs.data; + offset = rhs.offset; + src_tensor = rhs.src_tensor; + } + /** * @brief Comparison operator overload * @param[in] rhs Tensor to be compared with @@ -129,7 +144,7 @@ class TensorBase { bool operator!=(const TensorBase &rhs) const { return !(*this == rhs); } /** - * @copydoc TensorV2::setTensorVar(TensorDim d, void *buf, size_t offset) + * @copydoc Tensor::setTensorVar(TensorDim d, void *buf, size_t offset) */ void setTensorVar(TensorDim d, void *buf, size_t offset); @@ -139,27 +154,27 @@ class TensorBase { virtual ~TensorBase() {} /** - * @copydoc TensorV2::allocate() + * @copydoc Tensor::allocate() */ virtual void allocate() = 0; /** - * @copydoc TensorV2::deallocate() + * @copydoc Tensor::deallocate() */ virtual void deallocate() = 0; /** - * @copydoc TensorV2::isAllocated() + * @copydoc Tensor::isAllocated() */ bool isAllocated() { return data != nullptr; } /** - * @copydoc TensorV2::getData() + * @copydoc Tensor::getData() */ virtual void *getData() const = 0; /** - * @copydoc TensorV2::getData(size_t idx) + * @copydoc Tensor::getData(size_t idx) */ virtual void *getData(size_t idx) const = 0; @@ -176,143 +191,155 @@ class TensorBase { virtual const void *getAddress(unsigned int i) const = 0; /** - * @copydoc TensorV2::setValue(float value) + * @copydoc Tensor::setValue(float value) */ virtual void setValue(float value) = 0; /** - * @copydoc TensorV2::setValue(b, c, h, w, value) + * @copydoc Tensor::setValue(b, c, h, w, value) */ virtual void setValue(unsigned int b, unsigned int c, unsigned int h, unsigned int w, float value) = 0; /** - * @copydoc TensorV2::addValue() + * @copydoc Tensor::addValue() */ virtual void addValue(unsigned int b, unsigned int c, unsigned int h, unsigned int w, float value, float beta) = 0; /** - * @copydoc TensorV2::setZero() + * @copydoc Tensor::setZero() */ virtual void setZero() = 0; /** - * @copydoc TensorV2::setRandNormal() + * @copydoc Tensor::setRandNormal() */ virtual void setRandNormal(float mean, float stddev) = 0; /** - * @copydoc TensorV2::setRandBernoulli() + * @copydoc Tensor::setRandBernoulli() */ virtual void setRandUniform(float min, float max) = 0; /** - * @copydoc TensorV2::setRandBernoulli() + * @copydoc Tensor::setRandBernoulli() */ virtual void setRandBernoulli(float probability) = 0; /** - * @copydoc TensorV2::initialize() + * @copydoc Tensor::initialize() */ virtual void initialize() = 0; /** - * @copydoc TensorV2::initialize(Initializer init) + * @copydoc Tensor::initialize(Initializer init) */ virtual void initialize(Initializer init) = 0; /** - * @copydoc TensorV2::multiply_strided(TensorV2 const &m, TensorV2 &output, + * @copydoc Tensor::multiply_strided(Tensor const &m, Tensor &output, * const float beta) */ - virtual TensorV2 multiply_strided(TensorV2 const &m, TensorV2 &output, - const float beta) const = 0; + virtual Tensor multiply_strided(Tensor const &m, Tensor &output, + const float beta) const = 0; /** - * @copydoc TensorV2::multiply_i(float const &value) + * @copydoc Tensor::multiply_i(float const &value) */ virtual int multiply_i(float const &value) = 0; /** - * @copydoc TensorV2::multiply(float const &value, TensorV2 &out) + * @copydoc Tensor::multiply(float const &value, Tensor &out) */ - virtual TensorV2 &multiply(float const &value, TensorV2 &out) const = 0; + virtual Tensor &multiply(float const &value, Tensor &out) const = 0; /** - * @copydoc TensorV2::multiply(TensorV2 const &m, TensorV2 &output, const + * @copydoc Tensor::multiply(Tensor const &m, Tensor &output, const * float beta = 0.0) */ - virtual TensorV2 &multiply(TensorV2 const &m, TensorV2 &output, - const float beta = 0.0) const = 0; + virtual Tensor &multiply(Tensor const &m, Tensor &output, + const float beta = 0.0) const = 0; /** - * @copydoc TensorV2::divide(float const &value, TensorV2 &output) + * @copydoc Tensor::divide(float const &value, Tensor &output) */ - virtual TensorV2 ÷(float const &value, TensorV2 &output) const = 0; + virtual Tensor ÷(float const &value, Tensor &output) const = 0; /** - * @copydoc TensorV2::divide(TensorV2 const &m, TensorV2 &output) + * @copydoc Tensor::divide(Tensor const &m, Tensor &output) */ - virtual TensorV2 ÷(TensorV2 const &m, TensorV2 &output) const = 0; + virtual Tensor ÷(Tensor const &m, Tensor &output) const = 0; /** - * @copydoc TensorV2::add_strided(TensorV2 const &input, TensorV2 &output, + * @copydoc Tensor::add_strided(Tensor const &input, Tensor &output, * const float beta) */ - virtual TensorV2 &add_strided(TensorV2 const &input, TensorV2 &output, - const float beta) const = 0; + virtual Tensor &add_strided(Tensor const &input, Tensor &output, + const float beta) const = 0; /** - * @copydoc TensorV2::add(float const &value, TensorV2 &output) + * @copydoc Tensor::add_i(Tensor const &m, float const alpha) */ - virtual TensorV2 &add(float const &value, TensorV2 &output) const = 0; + virtual int add_i(Tensor const &m, Tensor &output, float const alpha) = 0; /** - * @copydoc TensorV2::add(TensorV2 const &m, TensorV2 &output, float const + * @copydoc Tensor::add_i_partial() + */ + virtual int add_i_partial(unsigned int len, unsigned int addr_idx, Tensor &m, + unsigned int incX, unsigned int incY, + const Tensor alphas, unsigned int alpha_idx) = 0; + + /** + * @copydoc Tensor::add(float const &value, Tensor &output) + */ + virtual Tensor &add(float const &value, Tensor &output) const = 0; + + /** + * @copydoc Tensor::add(Tensor const &m, Tensor &output, float const * alpha) */ - virtual TensorV2 &add(TensorV2 const &m, TensorV2 &output, - float const alpha) const = 0; + virtual Tensor &add(Tensor const &m, Tensor &output, + float const alpha) const = 0; /** - * @copydoc TensorV2::subtract(float const &value, TensorV2 &output) + * @copydoc Tensor::subtract(float const &value, Tensor &output) */ - virtual TensorV2 &subtract(float const &value, TensorV2 &output) const = 0; + virtual Tensor &subtract(float const &value, Tensor &output) const = 0; /** * @brief Sum all the Tensor elements according to the batch * @param[out] output Tensor(batch, 1, 1, 1) */ - virtual void sum_by_batch(TensorV2 &output) const = 0; + virtual void sum_by_batch(Tensor &output) const = 0; /** - * @copydoc TensorV2::sum(unsigned int axis, TensorV2 &output, float alpha, + * @copydoc Tensor::sum(unsigned int axis, Tensor &output, float alpha, * float beta) const */ - virtual TensorV2 &sum(unsigned int axis, TensorV2 &output, float alpha, - float beta) const = 0; + virtual Tensor &sum(unsigned int axis, Tensor &output, float alpha, + float beta) const = 0; /** - * @copydoc TensorV2::l2norm + * @copydoc Tensor::l2norm */ virtual float l2norm() const = 0; /** - * @copydoc TensorV2::pow(float exponent, TensorV2 &output) + * @copydoc Tensor::pow(float exponent, Tensor &output) */ - virtual TensorV2 &pow(float exponent, TensorV2 &output) const = 0; + virtual Tensor &pow(float exponent, Tensor &output) const = 0; /** - * @copydoc TensorV2::erf(TensorV2 &output) + * @copydoc Tensor::erf(Tensor &output) */ - virtual TensorV2 &erf(TensorV2 &output) const = 0; + virtual Tensor &erf(Tensor &output) const = 0; /** * @brief sin transform function * @param[out] out out to store the result */ - virtual void sin(TensorV2 &out, float alpha = 1.0) { + virtual void sin(Tensor &out, float alpha = 1.0) { throw std::invalid_argument( "Tensor::sin not supported in current tensor data type."); } @@ -321,11 +348,20 @@ class TensorBase { * @brief cos transform function * @param[out] out out to store the result */ - virtual void cos(TensorV2 &out, float alpha = 1.0) { + virtual void cos(Tensor &out, float alpha = 1.0) { throw std::invalid_argument( "Tensor::cos not supported in current tensor data type."); } + /** + * @brief inverse squared root function + * @param[out] out out to store the result + */ + virtual void inv_sqrt(Tensor &out) { + throw std::invalid_argument( + "Tensor::inv_sqrt not supported in current tensor data type."); + } + /** * @brief Dot Product of Tensor ( equal MxM ) * @details This applies dot of the last dimension of this and @@ -337,48 +373,51 @@ class TensorBase { * @param[in] beta beta * @retval Calculated Tensor */ - virtual TensorV2 &dot(TensorV2 const &input, TensorV2 &output, bool trans, - bool trans_in, float beta) const = 0; + virtual Tensor &dot(Tensor const &input, Tensor &output, bool trans, + bool trans_in, float beta) const = 0; /** - * @copydoc TensorV2::dropout_mask(float dropout) + * @copydoc Tensor::dropout_mask(float dropout) */ virtual void dropout_mask(float dropout) = 0; /** - * @copydoc TensorV2::filter_mask(const TensorV2 &mask_len, bool reverse) + * @copydoc Tensor::filter_mask(const Tensor &mask_len, bool reverse) */ - virtual void filter_mask(const TensorV2 &mask_len, bool reverse) = 0; + virtual void filter_mask(const Tensor &mask_len, bool reverse) = 0; /** - * @copydoc TensorV2::zoneout_mask(TensorV2 &opposite, float zoneout) + * @copydoc Tensor::zoneout_mask(Tensor &opposite, float zoneout) */ - virtual void zoneout_mask(TensorV2 &opposite, float zoneout) = 0; + virtual void zoneout_mask(Tensor &opposite, float zoneout) = 0; /** - * @copydoc TensorV2::split(std::vector sizes, int axis) + * @copydoc Tensor::split(std::vector sizes, int axis) */ - virtual std::vector split(std::vector sizes, int axis) = 0; + virtual std::vector split(std::vector sizes, int axis) = 0; /** - * @copydoc TensorV2::print(std::ostream &out) + * @copydoc Tensor::concat(const std::vector &tensors, int axis) + */ + virtual Tensor concat(const std::vector &tensors, int axis) = 0; + + /** + * @copydoc Tensor::print(std::ostream &out) */ virtual void print(std::ostream &out) const = 0; /** - * @copydoc TensorV2::apply(std::function f, TensorV2 &output) + * @copydoc Tensor::apply(std::function f, Tensor &output) */ - virtual TensorV2 &apply(std::function f, - TensorV2 &output) const { + virtual Tensor &apply(std::function f, Tensor &output) const { return output; } #ifdef ENABLE_FP16 /** - * @copydoc TensorV2::apply(std::function f, TensorV2 &output) + * @copydoc Tensor::apply(std::function f, Tensor &output) */ - virtual TensorV2 &apply(std::function<_FP16(_FP16)> f, - TensorV2 &output) const { + virtual Tensor &apply(std::function<_FP16(_FP16)> f, Tensor &output) const { return output; } #endif @@ -389,39 +428,46 @@ class TensorBase { * * @note copy can reshape the tensor to match the shape */ - virtual void copy(const TensorV2 &from) = 0; + virtual void copy(const Tensor &from) = 0; /** * @brief Copy the Tensor * @param[in] from Tensor to be copied */ - virtual void copyData(const TensorV2 &from) = 0; + virtual void copyData(const Tensor &from) = 0; + + /** + * @brief Copy the Tensor + * @param[in] input Tensor to be copied + * @param[out] output output Tensor + */ + virtual void copy_with_stride(const Tensor &input, Tensor &output) = 0; /** - * @copydoc TensorV2::argmax() + * @copydoc Tensor::argmax() */ virtual std::vector argmax() const = 0; /** - * @copydoc TensorV2::max_abs() + * @copydoc Tensor::max_abs() */ virtual float max_abs() const = 0; /** - * @copydoc TensorV2::maxValue() + * @copydoc Tensor::maxValue() */ virtual float maxValue() const = 0; /** - * @copydoc TensorV2::minValue() + * @copydoc Tensor::minValue() */ virtual float minValue() const = 0; /** - * @copydoc TensorV2::transpose(const std::string &direction, TensorV2 &out) + * @copydoc Tensor::transpose(const std::string &direction, Tensor &out) */ - virtual TensorV2 &transpose(const std::string &direction, - TensorV2 &out) const = 0; + virtual Tensor &transpose(const std::string &direction, + Tensor &out) const = 0; /** * @brief put data of Tensor @@ -634,12 +680,12 @@ class TensorBase { * @note This should better be implemented in iterator fashion before used * extensively. */ - struct BroadcastInfoV2 { + struct BroadcastInfo { /** * @brief Construct a new External Loop Info object */ - BroadcastInfoV2() : + BroadcastInfo() : buffer_size(0), buffer_axis(-1), strides{0, 0, 0, 0}, @@ -659,7 +705,7 @@ class TensorBase { * @param m target tensor to be calculated against. * @return BroadcastInfo Loopinfo needed to run external loop */ - BroadcastInfoV2 computeBroadcastInfo(const TensorV2 &m) const; + BroadcastInfo computeBroadcastInfo(const Tensor &m) const; /** * @brief Calcuates variables needed to perform tensor flatten dot product @@ -681,7 +727,7 @@ class TensorBase { * * @note op(X) is one of X or X**T */ - void calculateFlattenDot(TensorV2 const &input, TensorV2 &output, bool trans, + void calculateFlattenDot(Tensor const &input, Tensor &output, bool trans, bool trans_in, unsigned int &first_three_flat, unsigned int &last_axis, unsigned int &input_first_three_flat, diff --git a/nntrainer/tensor/tensor_pool.cpp b/nntrainer/tensor/tensor_pool.cpp index d41e293793..0a69f1dce9 100644 --- a/nntrainer/tensor/tensor_pool.cpp +++ b/nntrainer/tensor/tensor_pool.cpp @@ -32,8 +32,7 @@ namespace nntrainer { */ Tensor *TensorPool::request(const std::string &name, const TensorDim &dim, const std::vector &exec_order, - TensorLifespan lifespan, - const Tensor::Initializer &init, + TensorLifespan lifespan, const Initializer &init, bool is_weight_grad) { return registerRequestSpec( {is_weight_grad, std::make_unique(dim, false, init, name), @@ -101,8 +100,7 @@ Tensor *TensorPool::view(const std::string &name, const std::string &reference, /** @note default is_weight_grad for view is false. view is for the * activation. */ return registerRequestSpec( - {false, - std::make_unique(dim, false, Tensor::Initializer::NONE, name), + {false, std::make_unique(dim, false, Initializer::NONE, name), TensorPool::DependentDetails{parent_idx, adjusted_offset}}); } @@ -365,7 +363,7 @@ Tensor *TensorPool::requestOrExtend(const std::string &name, const TensorDim &dim, const std::vector &exec_order, TensorLifespan lifespan, - const Tensor::Initializer &init) { + const Initializer &init) { NNTR_THROW_IF(lifespan == TensorLifespan::UNMANAGED, std::invalid_argument) << "unmanaged life span is not supported"; diff --git a/nntrainer/tensor/tensor_pool.h b/nntrainer/tensor/tensor_pool.h index 7ff49d790c..fd17db7cd2 100644 --- a/nntrainer/tensor/tensor_pool.h +++ b/nntrainer/tensor/tensor_pool.h @@ -43,8 +43,7 @@ class TensorPool { * @brief Constructor of TensorPool */ TensorPool() : - mem_pool(std::make_unique()), - cache_loader(nullptr) {} + mem_pool(std::make_unique()), cache_loader(nullptr) {} /** * @brief Constructor of TensorPool @@ -179,7 +178,7 @@ class TensorPool { Tensor *request(const std::string &name, const TensorDim &dim, const std::vector &exec_order, TensorLifespan lifespan, - const Tensor::Initializer &init = Tensor::Initializer::NONE, + const Initializer &init = Initializer::NONE, bool is_weight_grad = false); /** @@ -237,11 +236,10 @@ class TensorPool { * @return Tensor* ptr to either to the existing tensor or newly created * tensor */ - Tensor * - requestOrExtend(const std::string &name, const TensorDim &dim, - const std::vector &exec_order, - TensorLifespan lifespan, - const Tensor::Initializer &init = Tensor::Initializer::NONE); + Tensor *requestOrExtend(const std::string &name, const TensorDim &dim, + const std::vector &exec_order, + TensorLifespan lifespan, + const Initializer &init = Initializer::NONE); /** * @brief reidentify the source of already created tensor (or view). diff --git a/nntrainer/tensor/tensor_v2.cpp b/nntrainer/tensor/tensor_v2.cpp deleted file mode 100644 index 28cc2b1b67..0000000000 --- a/nntrainer/tensor/tensor_v2.cpp +++ /dev/null @@ -1,1082 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -/** - * @file tensor_v2.cpp - * @date 01 December 2023 - * @brief This is a TensorV2 class - * @see https://github.com/nnstreamer/nntrainer - * @author Jijoong Moon - * @author Donghyeon Jeong - * @bug No known bugs except for NYI items - */ - -#include -#include - -#ifdef ENABLE_FP16 -#include -#endif - -namespace nntrainer { - -TensorV2::TensorV2(std::string name_, Tformat fm, Tdatatype d_type) { - itensor = nullptr; - - if (d_type == Tdatatype::FP32) { - itensor = std::shared_ptr(new FloatTensor(name_, fm), - std::default_delete()); - } else if (d_type == Tdatatype::FP16) { -#ifdef ENABLE_FP16 - itensor = std::shared_ptr(new HalfTensor(name_, fm), - std::default_delete()); -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } else { - throw std::invalid_argument( - "Error: TensorV2 cannot be constructed because the given d_type is not " - "compatible with itensor. The supported d_types are: FP32, FP16 " - "(if built with ENABLE_FP16)."); - } -} - -TensorV2::TensorV2(const TensorDim &d, bool alloc_now, Initializer init, - std::string name) { - itensor = nullptr; - - if (d.getDataType() == Tdatatype::FP32) { - itensor = - std::shared_ptr(new FloatTensor(d, alloc_now, init, name), - std::default_delete()); - } else if (d.getDataType() == Tdatatype::FP16) { -#ifdef ENABLE_FP16 - itensor = - std::shared_ptr(new HalfTensor(d, alloc_now, init, name), - std::default_delete()); -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } else { - throw std::invalid_argument( - "Error: TensorV2 cannot be constructed because the given d_type is not " - "compatible with itensor. The supported d_types are: FP32, FP16 " - "(if built with ENABLE_FP16)."); - } -} - -TensorV2::TensorV2(const TensorDim &d, const void *buf) { - itensor = nullptr; - - if (d.getDataType() == Tdatatype::FP32) { - itensor = std::shared_ptr(new FloatTensor(d, buf), - std::default_delete()); - } else if (d.getDataType() == Tdatatype::FP16) { -#ifdef ENABLE_FP16 - itensor = std::shared_ptr(new HalfTensor(d, buf), - std::default_delete()); -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } else { - throw std::invalid_argument( - "Error: TensorV2 cannot be constructed because the given d_type is not " - "compatible with itensor. The supported d_types are: FP32, FP16 " - "(if built with ENABLE_FP16)."); - } -} - -TensorV2::TensorV2( - std::vector>>> const &d, - ml::train::TensorDim::TensorType t_type) { - itensor = std::shared_ptr(new FloatTensor(d, t_type.format), - std::default_delete()); -} - -#ifdef ENABLE_FP16 -TensorV2::TensorV2( - std::vector>>> const &d, - ml::train::TensorDim::TensorType t_type) { - itensor = std::shared_ptr(new HalfTensor(d, t_type.format), - std::default_delete()); -} -#endif - -bool TensorV2::operator==(const TensorV2 &rhs) const { - /// compares tensor information - if (*itensor == *rhs.itensor) { - /// compares tensor data - if (getDataType() == Tdatatype::FP32) { - return *std::dynamic_pointer_cast(itensor) == - *std::dynamic_pointer_cast(rhs.itensor); - } else if (getDataType() == Tdatatype::FP16) { -#ifdef ENABLE_FP16 - return *std::dynamic_pointer_cast(itensor) == - *std::dynamic_pointer_cast(rhs.itensor); -#else - throw std::invalid_argument( - "Error: HalfTensor cannot be created or used when FP16 is not enabled. " - "Please check if the tensor data type is set properly."); -#endif - } - } - return false; -} - -void TensorV2::allocate() { itensor->allocate(); } - -void TensorV2::deallocate() { itensor->deallocate(); } - -bool TensorV2::isAllocated() { return itensor->isAllocated(); } - -void TensorV2::setValue(float value) { itensor->setValue(value); } - -void TensorV2::setValue(unsigned int b, unsigned int c, unsigned int h, - unsigned int w, float value) { - itensor->setValue(b, c, h, w, value); -} - -void TensorV2::addValue(unsigned int b, unsigned int c, unsigned int h, - unsigned int w, float value, float beta) noexcept { - itensor->addValue(b, c, h, w, value, beta); -} - -void TensorV2::setZero() { itensor->setZero(); } - -void TensorV2::setRandNormal(float mean, float stddev) { - itensor->setRandNormal(mean, stddev); -} - -void TensorV2::setRandUniform(float min, float max) { - itensor->setRandUniform(min, max); -} - -void TensorV2::setRandBernoulli(float probability) { - itensor->setRandBernoulli(probability); -} - -void TensorV2::initialize() { itensor->initialize(); } - -void TensorV2::initialize(Initializer init) { itensor->initialize(init); } - -TensorV2 TensorV2::apply(std::function f) const { - return f(*this); -} - -TensorV2 &TensorV2::apply(std::function f, - TensorV2 &output) const { - return f(*this, output); -} - -int TensorV2::multiply_i_strided(TensorV2 const &m, const float beta) { - try { - this->multiply_strided(m, *this, beta); - } catch (std::exception &err) { - ml_loge("%s %s", typeid(err).name(), err.what()); - return ML_ERROR_INVALID_PARAMETER; - } - - return ML_ERROR_NONE; -} - -TensorV2 TensorV2::multiply_strided(TensorV2 const &m, const float beta) const { - TensorV2 t; - return this->multiply_strided(m, t, beta); -} - -TensorV2 &TensorV2::multiply_strided(TensorV2 const &m, TensorV2 &output, - const float beta) const { - itensor->multiply_strided(m, output, beta); - return output; -} - -int TensorV2::multiply_i(float const &value) { - NNTR_THROW_IF(!getContiguous(), std::invalid_argument) - << getName() << " is not contiguous, cannot multiply"; - - return itensor->multiply_i(value); -} - -TensorV2 TensorV2::multiply(float const &value) const { - TensorV2 t; - return multiply(value, t); -} - -TensorV2 &TensorV2::multiply(float const &value, TensorV2 &out) const { - itensor->multiply(value, out); - return out; -} - -int TensorV2::multiply_i(TensorV2 const &m, const float beta) { - try { - this->multiply(m, *this, beta); - } catch (std::exception &err) { - ml_loge("%s %s", typeid(err).name(), err.what()); - return ML_ERROR_INVALID_PARAMETER; - } - - return ML_ERROR_NONE; -} - -TensorV2 TensorV2::multiply(TensorV2 const &m, const float beta) const { - TensorV2 t("", this->getFormat()); - return multiply(m, t, beta); -} - -TensorV2 &TensorV2::multiply(TensorV2 const &m, TensorV2 &output, - const float beta) const { - itensor->multiply(m, output, beta); - return output; -} - -int TensorV2::divide_i(float const &value) { - if (value == 0.0f) { - return ML_ERROR_INVALID_PARAMETER; - } - this->divide(value, *this); - return ML_ERROR_NONE; -} - -TensorV2 TensorV2::divide(float const &value) const { - TensorV2 output("", getFormat(), getDataType()); - return divide(value, output); -} - -TensorV2 &TensorV2::divide(float const &value, TensorV2 &output) const { - /// @todo add unittest, ZeroDivisionError - if (value == 0.0f) { - std::stringstream ss; - ss << "[Tensor] divide by value failed, value: " << value; - throw std::invalid_argument(ss.str().c_str()); - } - itensor->divide(value, output); - return output; -} - -int TensorV2::divide_i(TensorV2 const &m) { - try { - this->divide(m, *this); - } catch (std::exception &err) { - ml_loge("%s %s", typeid(err).name(), err.what()); - return ML_ERROR_INVALID_PARAMETER; - } - - return ML_ERROR_NONE; -} - -TensorV2 TensorV2::divide(TensorV2 const &m) const { - TensorV2 output("", getFormat(), getDataType()); - return this->divide(m, output); -} - -TensorV2 &TensorV2::divide(TensorV2 const &m, TensorV2 &output) const { - NNTR_THROW_IF(!getContiguous() || !m.getContiguous() || - !output.getContiguous(), - std::invalid_argument) - << getName() << " is not contiguous, cannot divide"; - itensor->divide(m, output); - return output; -} - -int TensorV2::add_i_strided(TensorV2 const &input, const float beta) { - try { - this->add_strided(input, *this, beta); - } catch (std::exception &err) { - ml_loge("%s %s", typeid(err).name(), err.what()); - return ML_ERROR_INVALID_PARAMETER; - } - - return ML_ERROR_NONE; -} - -TensorV2 TensorV2::add_strided(TensorV2 const &input, const float beta) const { - TensorV2 output("", getFormat(), getDataType()); - return this->add_strided(input, output, beta); -} - -TensorV2 &TensorV2::add_strided(TensorV2 const &input, TensorV2 &output, - const float beta) const { - CREATE_V2_IF_EMPTY_DIMS(output, getDim(), nullptr); - - if (size() != input.size() || size() != output.size()) - throw std::invalid_argument( - "Strided addition does not support broadcasting"); - - itensor->add_strided(input, output, beta); - - return output; -} - -int TensorV2::add_i(float const &value) { - this->add(value, *this); - return ML_ERROR_NONE; -} - -TensorV2 TensorV2::add(float const &value) const { - TensorV2 t("", getFormat(), getDataType()); - return add(value, t); -} - -TensorV2 &TensorV2::add(float const &value, TensorV2 &output) const { - itensor->add(value, output); - return output; -} - -int TensorV2::add_i(TensorV2 const &m, float const alpha) { - try { - this->add(m, *this, alpha); - } catch (std::exception &err) { - ml_loge("%s %s", typeid(err).name(), err.what()); - return ML_ERROR_INVALID_PARAMETER; - } - return ML_ERROR_NONE; -} - -TensorV2 TensorV2::add(TensorV2 const &m, float const alpha) const { - TensorV2 t("", getFormat(), getDataType()); - return this->add(m, t, alpha); -} - -TensorV2 &TensorV2::add(TensorV2 const &m, TensorV2 &output, - float const alpha) const { - NNTR_THROW_IF(!itensor->getContiguous() || !m.getContiguous() || - !output.getContiguous(), - std::invalid_argument) - << getName() << " is not contiguous, cannot add"; - itensor->add(m, output, alpha); - return output; -} - -int TensorV2::subtract_i(float const &value) { - this->subtract(value, *this); - return ML_ERROR_NONE; -} - -TensorV2 TensorV2::subtract(float const &value) const { - TensorV2 output("", getFormat(), getDataType()); - return subtract(value, output); -} - -TensorV2 &TensorV2::subtract(float const &value, TensorV2 &output) const { - itensor->subtract(value, output); - return output; -} - -int TensorV2::subtract_i(TensorV2 const &m) { return add_i(m, -1); } - -TensorV2 TensorV2::subtract(TensorV2 const &m) const { return add(m, -1); } - -TensorV2 &TensorV2::subtract(TensorV2 const &m, TensorV2 &output) const { - return add(m, output, -1); -} - -/** - * This is to sum the Tensor data according to the dim.batch(). - * Therefore the result has M(dim.batch(), 1, 1, 1) dimension. - */ -TensorV2 TensorV2::sum_by_batch() const { - NNTR_THROW_IF(!getContiguous(), std::invalid_argument) - << getName() << " is not contiguous, cannot sum"; - - TensorV2 output(batch(), 1, 1, 1, this->getFormat(), getDataType()); - itensor->sum_by_batch(output); - return output; -} - -TensorV2 TensorV2::sum(unsigned int axis, float alpha) const { - TensorV2 output("", this->getFormat(), this->getDataType()); - return sum(axis, output, alpha, 0); -} - -TensorV2 &TensorV2::sum(unsigned int axis, TensorV2 &output, float alpha, - float beta) const { - NNTR_THROW_IF(!getContiguous(), std::invalid_argument) - << getName() << " is not contiguous, cannot sum"; - - itensor->sum(axis, output, alpha, beta); - return output; -} - -TensorV2 TensorV2::sum(const std::vector &axes, - float alpha) const { - TensorV2 output("", this->getFormat()); - return sum(axes, output, alpha); -} - -TensorV2 &TensorV2::sum(const std::vector &axes, TensorV2 &output, - float alpha) const { - if (axes.empty()) - throw std::invalid_argument("empty axes given"); - - if (axes.size() == 1) { - this->sum(axes[0], output, alpha); - } else { - - /** club axes together */ - TensorV2 new_reshaped = TensorV2(getDim()); - new_reshaped.copy(*this); - std::vector continuous_order = {0, 3, 1, 2}; - std::vector new_axes = {axes[0]}; - - for (unsigned int i = 1; i < axes.size(); ++i) { - if (checkContinuous(axes[i - 1], axes[i])) { - new_reshaped.mergeAxis(axes[i - 1], axes[i]); - new_axes.back() = axes[i]; - } else { - new_axes.push_back(axes[i]); - } - } - - TensorV2 ret = new_reshaped.sum(new_axes[0]); - for (unsigned int i = 1; i < new_axes.size() - 1; ++i) - ret = ret.sum(axes[i]); - ret.sum(new_axes.back(), output, alpha); - } - return output; -} - -TensorV2 TensorV2::average(unsigned int axis) const { - TensorV2 output("", this->getFormat(), this->getDataType()); - return average(axis, output); -} - -TensorV2 &TensorV2::average(unsigned int axis, TensorV2 &output) const { - if (axis >= TensorDim::MAXDIM) - throw std::out_of_range( - "negative axis or axis more then MAXDIM is invalid"); - - unsigned int axis_size = getDim()[axis]; - if (axis_size == 1) - output.copy(*this); - else - this->sum(axis, output, 1.0 / ((float)axis_size)); - - return output; -} - -TensorV2 TensorV2::average(const std::vector &axes) const { - TensorV2 output("", this->getFormat(), this->getDataType()); - return average(axes, output); -} - -TensorV2 &TensorV2::average(const std::vector &axes, - TensorV2 &output) const { - if (axes.empty()) - return this->average(output); - - TensorDim ret_shape(getTensorType()); - - for (const auto &idx : axes) { - if (idx >= TensorDim::MAXDIM) { - throw std::out_of_range("axis more then MAXDIM is invalid"); - } - ret_shape.setTensorDim(idx, getDim().getTensorDim(idx)); - } - - return this->sum(axes, output, 1.0 / (float)ret_shape.getDataLen()); -} - -TensorV2 TensorV2::average() const { - TensorV2 output = *this; - unsigned int axis = 0; - if (this->getFormat() == Tformat::NHWC) { - output.reshape({1, getDim().getDataLen(), 1, 1, this->getTensorType()}); - axis = 1; - } else { - output.reshape({1, 1, 1, getDim().getDataLen(), this->getTensorType()}); - axis = 3; - } - return output.average(axis); -} - -TensorV2 &TensorV2::average(TensorV2 &output) const { - TensorV2 result = *this; - result.reshape({1, 1, 1, getDim().getDataLen()}); - return result.average(3, output); -} - -int TensorV2::pow_i(float exponent) { - pow(exponent, *this); - return ML_ERROR_NONE; -} - -TensorV2 TensorV2::pow(float exponent) const { - TensorV2 output("", getFormat(), getDataType()); - return pow(exponent, output); -} - -TensorV2 &TensorV2::pow(float exponent, TensorV2 &output) const { - itensor->pow(exponent, output); - return output; -} - -int TensorV2::erf_i() { - erf(*this); - return ML_ERROR_NONE; -} - -TensorV2 TensorV2::erf() const { - TensorV2 output("", getFormat(), getDataType()); - return erf(output); -} - -TensorV2 &TensorV2::erf(TensorV2 &output) const { - itensor->erf(output); - return output; -} - -void TensorV2::sin(TensorV2 &out, float alpha) { - if (size() != out.size()) - throw std::invalid_argument("Error: Size of out of Tensor::sin must match"); - - itensor->sin(out, alpha); -} - -void TensorV2::cos(TensorV2 &out, float alpha) { - if (size() != out.size()) - throw std::invalid_argument("Error: Size of out of Tensor::cos must match"); - - itensor->cos(out, alpha); -} - -float TensorV2::l2norm() const { return itensor->l2norm(); } - -void TensorV2::normalization_i() { - NNTR_THROW_IF(!getContiguous(), std::invalid_argument) - << getName() << " is not contiguous, cannot do normalization."; - - const float min = minValue(); - const float max = maxValue(); - - if (max == min) { - TensorV2 tmp = *this; - this->subtract_i(tmp); - } else { - this->subtract_i(min); - this->divide_i(max - min); - } -} - -void TensorV2::standardization_i() { - TensorV2 mean_by_batch = this->sum_by_batch(); - mean_by_batch.divide_i(getDim().getFeatureLen()); - - this->subtract_i(mean_by_batch); - TensorV2 std_dev_by_batch(batch(), 1, 1, 1, getFormat(), getDataType()); - std_dev_by_batch.setZero(); - - /// @todo remove conditional statement - if (getDataType() == ml::train::TensorDim::DataType::FP32) { - float *std_dev = std_dev_by_batch.getData(); - - for (unsigned int k = 0; k < batch(); ++k) { - TensorV2 sub_this = this->getBatchSlice(k, 1); - std_dev[k] = sub_this.l2norm(); - } - } else if (getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - _FP16 *std_dev = std_dev_by_batch.getData<_FP16>(); - - for (unsigned int k = 0; k < batch(); ++k) { - TensorV2 sub_this = this->getBatchSlice(k, 1); - std_dev[k] = static_cast<_FP16>(sub_this.l2norm()); - } -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - - std_dev_by_batch.divide_i(getDim().getFeatureLen()); - this->divide_i(std_dev_by_batch); -} - -TensorV2 TensorV2::dot(TensorV2 const &input, bool trans, bool trans_in) const { - TensorV2 output("", this->getFormat(), this->getDataType()); - dot(input, output, trans, trans_in); - - return output; -} - -/** - * @note: This dot product flattens the fist 3 axis for the purpose of - * computation. So, while performing, these matrices are behaving as 2-D - * matrices. The dimensions are restored while returning back the tensor - * in case of trans is false. - */ -TensorV2 &TensorV2::dot(TensorV2 const &input, TensorV2 &output, bool trans, - bool trans_in, float beta) const { - NNTR_THROW_IF(!getContiguous(), std::invalid_argument) - << getName() << " is not contiguous. Cannot dot product."; - - itensor->dot(input, output, trans, trans_in, beta); - return output; -} - -TensorV2 &TensorV2::dot_deriv_wrt_1(TensorV2 const &m, - TensorV2 const &output_deriv, bool trans, - bool trans_m, float beta) { - bool deriv_trans_m = true; - bool deriv_trans = false; - /** @todo handle all cases of trans and trans_m */ - if (!trans && trans_m) { - deriv_trans_m = false; - } - - return output_deriv.dot(m, *this, deriv_trans, deriv_trans_m, beta); -} - -/** - * @brief compute the derivative wrt m in the m tensor - * @note The caller tensor must be the same tensor as the one which called the - * dot() product. - */ -TensorV2 &TensorV2::dot_deriv_wrt_2(TensorV2 &m_deriv, - TensorV2 const &output_deriv, bool trans, - bool trans_m, float beta) const { - bool deriv_trans_m = false; - bool deriv_trans = true; - /** @todo handle all cases of trans and trans_m */ - - if (!trans && trans_m) { - output_deriv.dot(*this, m_deriv, deriv_trans, deriv_trans_m, beta); - return m_deriv; - } else { - return dot(output_deriv, m_deriv, deriv_trans, deriv_trans_m, beta); - } -} - -TensorV2 &TensorV2::dotBatched(TensorV2 const &m, TensorV2 &result, bool trans, - bool trans_m, float beta) const { - if (!result.isAllocated()) - throw std::invalid_argument( - "Output tensor must be preallocated for dotBatched operation"); - for (unsigned int b = 0; b < batch(); b++) { - /** @todo try using transpose to speedup the operation */ - const TensorV2 this_b = this->getBatchSlice(b, 1); - TensorV2 m_b = m.getBatchSlice(b, 1); - TensorV2 result_b = result.getBatchSlice(b, 1); - - this_b.dot(m_b, result_b, trans, trans_m, beta); - } - - return result; -} - -TensorV2 &TensorV2::dot_batched_deriv_wrt_1(TensorV2 const &m, - TensorV2 const &output_deriv, - bool trans, bool trans_m, - float beta) { - bool deriv_trans_m = true; - bool deriv_trans = false; - /** @todo handle all cases of trans and trans_m */ - if (!trans && trans_m) { - deriv_trans_m = false; - } - - return output_deriv.dotBatched(m, *this, deriv_trans, deriv_trans_m, beta); -} - -TensorV2 &TensorV2::dot_batched_deriv_wrt_2(TensorV2 &m_deriv, - TensorV2 const &output_deriv, - bool trans, bool trans_m, - float beta) const { - bool deriv_trans_m = false; - bool deriv_trans = true; - /** @todo handle all cases of trans and trans_m */ - - if (!trans && trans_m) { - output_deriv.dotBatched(*this, m_deriv, deriv_trans, deriv_trans_m, beta); - return m_deriv; - } else { - return dotBatched(output_deriv, m_deriv, deriv_trans, deriv_trans_m, beta); - } -} - -TensorV2 TensorV2::dropout_mask(float dropout) const { - TensorV2 output(getDim()); - output.dropout_mask(dropout); - return output; -} - -void TensorV2::dropout_mask(float dropout) { - /// @todo add unittest - NNTR_THROW_IF(dropout < 0 || dropout > 1, std::invalid_argument) - << "[Tensor::dropout_mask] Dropout rate should be between 0 and 1"; - - // if the rate is zero, no change is needed - if (std::fpclassify(dropout) == FP_ZERO) - return; - - setRandUniform(0.0, 1.0); - itensor->dropout_mask(dropout); -} - -void TensorV2::filter_mask(const TensorV2 &mask_len, bool reverse) { - /// @todo add unittest - itensor->filter_mask(mask_len, reverse); -} - -TensorV2 TensorV2::zoneout_mask(float zoneout) { - TensorV2 output(getDim()); - zoneout_mask(output, zoneout); - return output; -} - -void TensorV2::zoneout_mask(TensorV2 &opposite, float zoneout) { - NNTR_THROW_IF(getDim() != opposite.getDim(), std::invalid_argument) - << "[Tensor::zoneout_mask] opposite dimension does not match"; - - NNTR_THROW_IF(zoneout < 0 || zoneout > 1, std::invalid_argument) - << "[Tensor::zoneout_mask] Zoneout rate should be between 0 and 1"; - - // if the rate is zero, no change is needed - if (std::fpclassify(zoneout) == FP_ZERO) - return; - - itensor->zoneout_mask(opposite, zoneout); -} - -std::vector TensorV2::split(unsigned num_size, int axis) { - NNTR_THROW_IF(num_size == 0, std::invalid_argument) - << "num size cannot be zero"; - - if (axis == -1) { - axis = 3; - } - - NNTR_THROW_IF(!(0 <= axis && axis < 4), std::invalid_argument) - << "cannot split axis of axis: " << axis; - - NNTR_THROW_IF(getDim().getTensorDim(axis) % num_size != 0, - std::invalid_argument) - << "axis is not divisible by num_size, axis: " << axis - << " num size: " << num_size; - - std::vector sizes; - sizes.resize(num_size); - - unsigned int sz = getDim().getTensorDim(axis) / num_size; - std::fill(sizes.begin(), sizes.end(), sz); - - return split(sizes, axis); -} - -std::vector TensorV2::split(std::vector sizes, int axis) { - NNTR_THROW_IF(sizes.size() == 0, std::invalid_argument) - << "num size cannot be zero"; - - NNTR_THROW_IF(!(-1 <= axis && axis < 4), std::invalid_argument) - << "cannot split axis of axis: " << axis; - - NNTR_THROW_IF( - std::any_of(sizes.begin(), sizes.end(), [](size_t sz) { return !sz; }), - std::invalid_argument) - << "among given sizes at least one of size is 0"; - - return itensor->split(sizes, axis); -} - -TensorV2 TensorV2::cat(const std::vector &tensors, int axis) { - NNTR_THROW_IF(!(-1 <= axis && axis < 4), std::invalid_argument) - << "cannot split axis of axis: " << axis; - - NNTR_THROW_IF(tensors.empty(), std::invalid_argument) - << "given tensor vector is empty"; - - TensorV2 output; - Tdatatype dtype = tensors.front().getDim().getDataType(); - - if (dtype == Tdatatype::FP32) { - output = FloatTensor::cat(tensors, axis); - } else if (dtype == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - output = HalfTensor::cat(tensors, axis); -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - - return output; -} - -void TensorV2::print(std::ostream &out) const { itensor->print(out); } - -void TensorV2::putData() const { itensor->putData(); } - -void TensorV2::setData(const std::shared_ptr buf, size_t off, - bool init) { - itensor->setMemoryData(buf, off); - - if (buf && init) { - initialize(); - } -} - -const std::shared_ptr TensorV2::getMemoryData() const { - return itensor->getMemoryData(); -} - -size_t TensorV2::getOffset() const { return itensor->getOffset(); } - -void TensorV2::copy(const TensorV2 &from) { - /// @todo enable copy to non-contiguous tensor - if (!itensor->getContiguous()) { - throw std::runtime_error("Cannot copy non-contiguous tensor"); - } - - if (from.size() != 0 && size() == from.size() && - getDataType() == from.getDataType()) { - // if tensor size and data type match, copy data - itensor->copy(from); - } else { - // replace with a new tensor that are the same with the given tensor - if (from.getDataType() == ml::train::TensorDim::DataType::FP32) { - TensorV2 t = TensorV2(from.getDim(), from.getData()); - swap(t, *this); - } else if (from.getDataType() == ml::train::TensorDim::DataType::FP16) { -#ifdef ENABLE_FP16 - TensorV2 t = TensorV2(from.getDim(), from.getData<_FP16>()); - swap(t, *this); -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - } -} - -void TensorV2::copyData(const TensorV2 &from) { itensor->copyData(from); } - -void TensorV2::copy_with_stride(const TensorV2 &from) { - if (itensor->getDim() == from.getDim()) { - // if the tensor dim matches, copy the data - copy(from); - } else { - // replace with a new tensor that has the same data as the given tensor - TensorV2 t = TensorV2(from.getDim(), true); - for (unsigned int b = 0; b < t.batch(); ++b) { - for (unsigned int c = 0; c < t.channel(); ++c) { - for (unsigned int h = 0; h < t.height(); ++h) { - for (unsigned int w = 0; w < t.width(); ++w) { - if (getDataType() == ml::train::TensorDim::DataType::FP32) { - t.setValue(b, c, h, w, from.getValue(b, c, h, w)); - } else if (getDataType() == ml::train::TensorDim::DataType::FP16) { - /// @todo remove #ifdef ENABLE_FP16 -#ifdef ENABLE_FP16 - t.setValue(b, c, h, w, from.getValue<_FP16>(b, c, h, w)); -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - } - } - } - } - swap(t, *this); - } -} - -TensorV2 TensorV2::getBatchSlice(size_t offset, unsigned int size) const { - TensorDim dim_ = getDim(); - dim_.batch(size); - - return getSharedDataTensor(dim_, offset * this->getDim().getFeatureLen(), - true, ""); -} - -TensorV2 TensorV2::clone() const { - TensorV2 output(getName(), getFormat(), getDataType()); - output.copy(*this); - return output; -} - -void TensorV2::save(std::ostream &file) { - NNTR_THROW_IF(!getContiguous(), std::invalid_argument) - << getName() << " is not contiguous, cannot save."; - - std::streamsize sz = static_cast(bytes()); - NNTR_THROW_IF(sz < 0, std::invalid_argument) - << "save size: " << bytes() - << " is too big. It cannot be represented by std::streamsize"; - - checkedWrite(file, getData(), sz, "[Tensor::save] operation failed"); - putData(); -} - -void TensorV2::read(std::ifstream &file) { - NNTR_THROW_IF(!getContiguous(), std::invalid_argument) - << getName() << " is not contiguous, cannot read."; - - std::streamsize sz = static_cast(bytes()); - - NNTR_THROW_IF(sz < 0, std::invalid_argument) - << "read size: " << bytes() - << " is too big. It cannot be represented by std::streamsize"; - - checkedRead(file, getData(), sz, "[Tensor::read] operation failed"); - putData(); -} - -std::vector TensorV2::argmax() const { - NNTR_THROW_IF(!getContiguous(), std::invalid_argument) - << getName() << " is not contiguous, cannot get argmax."; - return itensor->argmax(); -} - -float TensorV2::max_abs() const { - NNTR_THROW_IF(!getContiguous(), std::invalid_argument) - << getName() << " is not contiguous, cannot get max_abs."; - return itensor->max_abs(); -} - -float TensorV2::maxValue() const { return itensor->maxValue(); } - -float TensorV2::minValue() const { return itensor->minValue(); } - -TensorV2 TensorV2::transpose(const std::string &direction) const { - TensorV2 output(getDim()); - transpose(direction, output); - return output; -} - -TensorV2 &TensorV2::transpose(const std::string &direction, - TensorV2 &output) const { - NNTR_THROW_IF(!getContiguous(), std::invalid_argument) - << getName() << " is not contiguous. Cannot transpose."; - - if (output.getData() == getData()) { - TensorV2 result = clone(); - return result.transpose(direction, output); - } - - itensor->transpose(direction, output); - - return output; -} - -void TensorV2::reshape(const TensorDim &d) { itensor->reshape(d); } - -void TensorV2::fill(const TensorV2 &from, bool allocate) { - if (allocate && this->empty()) { - this->copy(from); - return; - } - - if (!from.getContiguous() || !getContiguous()) { - /// @todo enable this if needed - throw nntrainer::exception::not_supported( - "[Tensor::fill] non-contiguous tensors are not supported"); - } - - if (getDim() != from.getDim()) { - throw std::invalid_argument("[Tensor::fill] dimension must be the same"); - } - - if (getStrides() != from.getStrides()) { - /// @todo length does not represent buffer size, there should be way to - /// get the buffer size - throw std::invalid_argument("[Tensor::fill] buffer size must be the same"); - } - - copyData(from); -} - -TensorDim TensorV2::getDim() const { return itensor->getDim(); } - -TensorDim::TensorType TensorV2::getTensorType() const { - return itensor->getTensorType(); -}; - -Initializer TensorV2::getInitializer() const { - return itensor->getInitializer(); -} - -TensorDim::Format TensorV2::getFormat() const { return itensor->getFormat(); } - -Tdatatype TensorV2::getDataType() const { return itensor->getDataType(); } - -void TensorV2::updateBatch(unsigned int batch) { itensor->updateBatch(batch); } - -const bool TensorV2::getContiguous() const noexcept { - return itensor->getContiguous(); -} - -const std::array -TensorV2::getStrides() const noexcept { - return itensor->getStrides(); -} - -bool TensorV2::checkContinuous(unsigned int np1, unsigned int np2) const { - if (np1 > 3 || np2 > 3) { - throw std::invalid_argument( - "Error: Input value must be within the range of 0 to 3."); - } - - if (getFormat() == Tformat::NCHW) { - if (np1 + 1 == np2) - return true; - } else { - std::vector continuous_order_nhwc = {0, 3, 1, 2}; - if (continuous_order_nhwc[np2] == continuous_order_nhwc[np1] + 1) - return true; - } - - return false; -} - -void TensorV2::setName(const std::string &name_) { itensor->setName(name_); } - -const std::string &TensorV2::getName() const { return itensor->getName(); } - -size_t TensorV2::getIndex(unsigned int b, unsigned int c, unsigned int h, - unsigned int w) const noexcept { - return itensor->getIndex(b, c, h, w); -} - -size_t TensorV2::size() const { return itensor->size(); } - -bool TensorV2::empty() const { return itensor->empty(); } - -size_t TensorV2::bytes() const { return itensor->bytes(); } - -size_t TensorV2::batch() const { return itensor->batch(); } - -size_t TensorV2::channel() const { return itensor->channel(); } - -size_t TensorV2::height() const { return itensor->height(); } - -size_t TensorV2::width() const { return itensor->width(); } - -void TensorV2::mergeAxis(unsigned int axis1, unsigned int axis2) { - NNTR_THROW_IF(!getContiguous(), std::invalid_argument) - << getName() << " is not contiguous, cannot merge axis"; - - if (axis2 != axis1 + 1) - if (!checkContinuous(axis1, axis2)) - throw std::invalid_argument("axis2 must be axis1 + 1 for merging."); - - itensor->mergeAxis(axis1, axis2); -} - -void TensorV2::createSharedDataTensor(const TensorV2 &src, TensorV2 &dest, - size_t offset) const { - itensor->createSharedDataTensor(src.itensor.get(), dest.itensor.get(), - offset); -} - -TensorV2 TensorV2::getSharedDataTensor(const TensorDim dim_, size_t offset, - bool reset_stride, - const std::string &name_) const { - TensorV2 ret = *this; - itensor->getSharedDataTensor(dim_, offset, reset_stride, name_, - ret.itensor.get()); - return ret; -} - -void TensorV2::setTensorVar(TensorDim d, void *buf, size_t offset) { - itensor->setTensorVar(d, buf, offset); -} - -std::ostream &operator<<(std::ostream &out, TensorV2 const &input) { - input.print(out); - return out; -} - -} // namespace nntrainer diff --git a/nntrainer/tensor/tensor_v2.h b/nntrainer/tensor/tensor_v2.h deleted file mode 100644 index 21893475ad..0000000000 --- a/nntrainer/tensor/tensor_v2.h +++ /dev/null @@ -1,1467 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -/** - * @file tensor_v2.h - * @date 01 December 2023 - * @brief This is a TensorV2 class - * @see https://github.com/nnstreamer/nntrainer - * @author Jijoong Moon - * @author Donghyeon Jeong - * @bug No known bugs except for NYI items - */ - -#ifndef __TENSOR_V2_H__ -#define __TENSOR_V2_H__ -#ifdef __cplusplus - -#define CREATE_V2_IF_EMPTY_DIMS(tensor, ...) \ - do { \ - if (tensor.empty()) \ - tensor = TensorV2(__VA_ARGS__); \ - } while (0); - -#include - -#include -#include - -namespace nntrainer { - -/** - * @class TensorV2 Class - * @brief TensorV2 Class - */ -class TensorV2 { -public: - /** - * @brief Basic Constructor of Tensor - */ - TensorV2(std::string name_ = "", Tformat fm = Tformat::NCHW, - Tdatatype d_type = Tdatatype::FP32); - - /** - * @brief Constructor of Tensor with dimension, possibly lazily - * @param d Tensor dim for this tensor - * @param alloc_now If the memory of the tensor must be allocated - * @param init Initializer for the tensor - * @param name Name of the tensor - */ - TensorV2(const TensorDim &d, bool alloc_now, - Initializer init = Initializer::NONE, std::string name = ""); - - /** - * @brief Constructor of Tensor with dimension/buf - * @param d Tensor dim for this tensor - * @param buf buffer - * @note Memory for this tensor is instantaneously allocated - */ - TensorV2(const TensorDim &d, const void *buf = nullptr); - - /** - * @brief Constructor of Tensor - * @param[in] d0 Batch of Tensor - * @param[in] d1 Channel - * @param[in] d2 Height - * @param[in] d3 Width - * @param[in] fm Tensor Format - * @param[in] d_type Tensor Data Type - */ - TensorV2(size_t d0, size_t d1, size_t d2, size_t d3, - Tformat fm = Tformat::NCHW, Tdatatype d_type = Tdatatype::FP32) : - TensorV2(TensorDim(d0, d1, d2, d3, fm, d_type), nullptr){}; - - /** - * @brief Constructor of Tensor - * @param[in] d1 Channel - * @param[in] d2 Height - * @param[in] d3 Width - * @param[in] fm Tensor Format - * @param[in] d_type Tensor Data Type - */ - TensorV2(size_t d1, size_t d2, size_t d3, Tformat fm = Tformat::NCHW, - Tdatatype d_type = Tdatatype::FP32) : - TensorV2(1, d1, d2, d3, fm, d_type){}; - - /** - * @brief Constructor of Tensor with batch size one and d1 size one - * @param[in] d2 Height (NCHW) or Width (NHWC) - * @param[in] d3 Width (NCHW) or Channel (NHWC) - * @param[in] fm Tensor Format - * @param[in] d_type Tensor Data Type - */ - TensorV2(size_t d2, size_t d3, Tformat fm = Tformat::NCHW, - Tdatatype d_type = Tdatatype::FP32) : - TensorV2(1, 1, d2, d3, fm, d_type){}; - - /** - * @brief Constructor of Tensor with just Width or Channel - * @param[in] d3 Width (NCHW) or Channel (NHWC) - * @param[in] fm Tensor Format - * @param[in] d_type Tensor Data Type - */ - explicit TensorV2(size_t d3, Tformat fm = Tformat::NCHW, - Tdatatype d_type = Tdatatype::FP32) : - TensorV2(1, 1, 1, d3, fm, d_type){}; - - /** - * @brief Constructor of Tensor - * @param[in] d0 Batch of Tensor - * @param[in] d1 Channel (NCHW) or Height (NHWC) - * @param[in] d2 Height (NCHW) or Width (NHWC) - * @param[in] d3 Width (NCHW) or Channel (NHWC) - * @param[in] t_type Tensor Type - */ - TensorV2(size_t d0, size_t d1, size_t d2, size_t d3, - ml::train::TensorDim::TensorType t_type) : - TensorV2(TensorDim(d0, d1, d2, d3, t_type), nullptr){}; - - /** - * @brief Constructor of Tensor - * @param[in] d1 Channel - * @param[in] d2 Height - * @param[in] d3 Width - * @param[in] t_type Tensor Type - */ - TensorV2(size_t d1, size_t d2, size_t d3, - ml::train::TensorDim::TensorType t_type) : - TensorV2(1, d1, d2, d3, t_type){}; - - /** - * @brief Constructor of Tensor with batch size one and d1 size one - * @param[in] d2 Height (NCHW) or Width (NHWC) - * @param[in] d3 Width (NCHW) or Channel (NHWC) - * @param[in] t_type Tensor Type - */ - TensorV2(size_t d2, size_t d3, ml::train::TensorDim::TensorType t_type) : - TensorV2(1, (t_type.format == Tformat::NCHW) ? 1 : d3, - (t_type.format == Tformat::NCHW) ? d2 : 1, - (t_type.format == Tformat::NCHW) ? d3 : d2, t_type){}; - /** - * @brief Constructor of Tensor with just Width or Channel - * @param[in] d3 Width (NCHW) or Channel (NHWC) - * @param[in] t_type Tensor Type - */ - explicit TensorV2(size_t d3, ml::train::TensorDim::TensorType t_type) : - TensorV2(1, (t_type.format == Tformat::NCHW) ? 1 : d3, 1, - (t_type.format == Tformat::NCHW) ? d3 : 1, t_type){}; - - /** - * @brief Constructor of Tensor - * @param[in] d data for the Tensor. It needs to set format properly. - * @param[in] t_type Tensor Type - */ - TensorV2(std::vector>>> const &d, - ml::train::TensorDim::TensorType t_type); - - /** - * @brief Constructor of Tensor - * @note This constructor copies vector again. needs refactoring - * @param[in] d data for the Tensor. It needs to set format properly. - * @param[in] t_type Tensor Type - */ - TensorV2(std::vector>> const &d, - ml::train::TensorDim::TensorType t_type) : - TensorV2(std::vector::type>{d}, t_type){}; - - /** - * @brief Constructor of Tensor - * @note This constructor copies vector again. needs refactoring - * @param[in] d data for the Tensor with batch size one - * @param[in] t_type Tensor Type - */ - TensorV2(std::vector> const &d, - ml::train::TensorDim::TensorType t_type) : - TensorV2(std::vector::type>{d}, t_type){}; - -#ifdef ENABLE_FP16 - /** - * @brief Constructor of Tensor - * @note This constructor copies vector again. needs refactoring - * @param[in] d data for the Tensor with batch size one - * @param[in] t_type Tensor Type - */ - TensorV2(std::vector>>> const &d, - ml::train::TensorDim::TensorType t_type); - - /** - * @brief Constructor of Tensor - * @note This constructor copies vector again. needs refactoring - * @param[in] d data for the Tensor. It needs to set format properly. - * @param[in] t_type Tensor Type - */ - TensorV2(std::vector>> const &d, - ml::train::TensorDim::TensorType t_type) : - TensorV2(std::vector::type>{d}, t_type){}; - - /** - * @brief Constructor of Tensor - * @note This constructor copies vector again. needs refactoring - * @param[in] d data for the Tensor with batch size one - * @param[in] t_type Tensor Type - */ - TensorV2(std::vector> const &d, - ml::train::TensorDim::TensorType t_type) : - TensorV2(std::vector::type>{d}, t_type){}; - -#endif - - /** - * @brief Basic Destructor - */ - ~TensorV2() = default; - - /** - * @brief Copy constructor of Tensor. - * @param[in] Tensor & - */ - TensorV2(const TensorV2 &rhs) = default; - - /** - * @brief Move constructor of Tensor. - * @param[in] Tensor && - */ - TensorV2(TensorV2 &&rhs) noexcept = default; - - /** - * @brief Copy assignment operator. - * @param[in] rhs Tensor to be copied. - */ - TensorV2 &operator=(const TensorV2 &rhs) = default; - - /** - * @brief Move assignment operator. - * @parma[in] rhs Tensor to be moved. - */ - TensorV2 &operator=(TensorV2 &&rhs) noexcept = default; - - /** - * @brief Comparison operator overload - * @param[in] rhs Tensor to be compared with - */ - bool operator==(const TensorV2 &rhs) const; - - /** - * @brief Comparison operator overload - * @param[in] rhs Tensor to be compared with - */ - bool operator!=(const TensorV2 &rhs) const { return !(*this == rhs); } - - /** - * @brief Construct a new Tensor object from a buffer - * This will not copy buffer to a new tensor but directly uses it - * - * @param[in] buf buffer - * @param[in] bytes buffer size in bytes - * @param[in] d tensor dim - * @param[in] offset offset to be used from current - * @return Tensor object - * @throws std::invalid_argument if buf is null - * @note Note that the buffer is not owned by the mapped tensor - */ - template - static TensorV2 Map(T *buf, unsigned int bytes, const TensorDim &d, - size_t offset = 0) { - if (d.getDataLen() == 0 || buf == nullptr) { - throw std::invalid_argument( - "[Tensor::Map] empty tensor dim is not allowed"); - } - - if (d.getDataLen() * sizeof(T) + offset > bytes) { - throw std::invalid_argument( - "Creating shared tensor of size bigger than tensor memory."); - } - - TensorV2 output; - output.setTensorVar(d, buf, offset); - return output; - }; - - /** - * @brief Allocate memory for this tensor - */ - void allocate(); - - /** - * @brief Deallocate memory for this tensor - * @note This will not necessary free the memory as tensors share memory - */ - void deallocate(); - - /** - * @brief Check if the tensor has memory allocated/assigned/associated - */ - bool isAllocated(); - - /** - * @brief return Data pointer of TensorV2 - * @retval template T pointer - */ - template T *getData() const { - return (T *)itensor->getData(); - } - - /** - * @brief return Data pointer of TensorV2 - * @retval template T pointer - */ - template T *getData(size_t idx) const { - return (T *)itensor->getData(idx); - } - - /** - * @brief i data index - * @retval template T pointer (address of ith data) - */ - template T *getAddress(unsigned int i) { - return (T *)itensor->getAddress(i); - } - - /** - * @brief i data index - * @retval template T pointer (address of ith data) - */ - template const T *getAddress(unsigned int i) const { - return (T *)itensor->getAddress(i); - } - - /** - * @brief get address of n-d data - */ - template - T *getAddress(unsigned int b, unsigned int c, unsigned int h, - unsigned int w) { - return getAddress(getIndex(b, c, h, w)); - } - - /** - * @brief get address of n-d data - */ - template - const T *getAddress(unsigned int b, unsigned int c, unsigned int h, - unsigned int w) const { - return getAddress(getIndex(b, c, h, w)); - } - - /** - * @brief return value at specific location - * @param[in] idx location - */ - template - const T &getValue(unsigned int idx) const noexcept { - return getData()[idx]; - } - - /** - * @brief return value at specific location - * @param[in] idx location - */ - template T &getValue(unsigned int idx) noexcept { - return getData()[idx]; - } - - /** - * @brief return value at specific location - * @param[in] b batch location - * @param[in] c channel location - * @param[in] h height location - * @param[in] w width location - */ - template - const T &getValue(unsigned int b, unsigned int c, unsigned int h, - unsigned int w) const noexcept { - return getValue(getIndex(b, c, h, w)); - } - - /** - * @brief return value at specific location - * @param[in] b batch location - * @param[in] c channel location - * @param[in] h height location - * @param[in] w width location - */ - template - T &getValue(unsigned int b, unsigned int c, unsigned int h, - unsigned int w) noexcept { - return getValue(getIndex(b, c, h, w)); - } - - /** - * @brief Fill the Tensor elements with value - * @param[in] value value to be stored - */ - void setValue(float value); - - /** - * @brief Set the element value - * @param[in] b batch location - * @param[in] c channel location - * @param[in] h height location - * @param[in] w width location - * @param[in] value value to be stored - */ - void setValue(unsigned int b, unsigned int c, unsigned int h, unsigned int w, - float value); - - /** - * @brief Set the element value - * @param[in] offset offset from start location - * @param[in] value value to be stored - * - * @todo This is a temporary workout. Remove this - */ - void setValueInt(unsigned int offset, int value) noexcept { - int *data_int = (int *)getData(); - data_int[offset] = value; - } - - /** - * @brief add the element value to the location - * @param[in] b batch location - * @param[in] c channel location - * @param[in] h height location - * @param[in] w width location - * @param[in] value value to be stored - * @param[in] beta scalar to multiply output with and add - */ - void addValue(unsigned int b, unsigned int c, unsigned int h, unsigned int w, - float value, float beta) noexcept; - - /** - * @brief Fill the Tensor elements with zero - */ - void setZero(); - - /** - * @brief Set the tensor with random normal distribution - * @param[in] mean mean of the distribution - * @param[in] std standard deviation of the distribution - */ - void setRandNormal(float mean = 0.0f, float stddev = 0.05f); - - /** - * @brief Set the tensor with random uniform distribution - * @param[in] min minimum value for the distribution - * @param[in] max maximum value for the distribution - */ - void setRandUniform(float min = -0.05f, float max = 0.05f); - - /** - * @brief Set the tensor with random bernoulli distribution - * @param[in] probability probability value for the distribution - */ - void setRandBernoulli(float probability = 0.5f); - - /** - * @brief Initialize the memory of the given tensor - */ - void initialize(); - - /** - * @brief Initialize the memory of the given tensor - * @param init Initiailizer to use for the initialization - */ - void initialize(Initializer init); - - /** - * @brief Apply instantly to the element - * @param[in] *function function pointer applied - * @return int ML_ERROR_NONE if successful - */ - template int apply_i(std::function f) { - TensorV2 result = *this; - apply(f, result); - - return ML_ERROR_NONE; - }; - - /** - * @brief Apply function element by element - * @param[in] *function function pointer applied - * @retval Tensor - */ - template TensorV2 apply(std::function f) const { - TensorV2 result; - apply(f, result); - - return result; - }; - - /** - * @brief Apply function element by element - * @param[in] *function function pointer applied - * @param[out] output output tensor - * @retval Tensor - */ - template - TensorV2 &apply(std::function f, TensorV2 &output) const { - CREATE_V2_IF_EMPTY_DIMS( - output, {itensor->getFormat(), itensor->getDataType()}, nullptr); - - if (itensor->getFormat() != output.itensor->getFormat() || - itensor->getDataType() != itensor->getDataType()) { - /// @todo add unittest - throw std::invalid_argument( - "[Tensor::apply] output dimension does not match"); - } - - itensor->apply(f, output); - - return output; - } - - /** - * @brief Apply function to Tensor - * @param[in] *function function pointer applied - * @retval Tensor - */ - TensorV2 apply(std::function f) const; - - /** - * @brief Apply function to Tensor - * @param[in] *function function pointer applied - * @param[out] output output tensor - * @retval Tensor - */ - TensorV2 &apply(std::function f, - TensorV2 &output) const; - - /** - * @brief Multiply Tensor Elementwise - * @param[in] m Tensor to be multiplied - * @param[in] beta scalar to multiply output with and add - * @retval #ML_ERROR_NONE successful - * - * @note support different strided inputs and output - * @note does not support broadcasting - * - * @todo merge this to multiply_i - */ - int multiply_i_strided(TensorV2 const &m, const float beta = 0.0); - - /** - * @brief Multiply Tensor Element by Element ( Not the MxM ) - * @param[in] m Tensor to be multiplied - * @param[in] beta scalar to multiply output with and add - * @retval Calculated Tensor - * - * @note support different strided inputs and output - * @note does not support broadcasting - * - * @todo merge this to multiply - */ - TensorV2 multiply_strided(TensorV2 const &m, const float beta = 0.0) const; - - /** - * @brief Multiply Tensor Element by Element ( Not the MxM ) - * @param[in] m Tensor to be multiplied - * @param[out] output Tensor to store the result - * @param[in] beta scalar to multiply output with and add - * @retval Calculated Tensor - * - * @note support different strided inputs and output - * @note does not support broadcasting - * - * @todo merge this to multiply - */ - TensorV2 &multiply_strided(TensorV2 const &m, TensorV2 &output, - const float beta = 0.0) const; - - /** - * @brief Multiply value element by element immediately - * @param[in] value multiplier - * @retval #ML_ERROR_INVALID_PARAMETER Tensor dimension is not right - * @retval #ML_ERROR_NONE Successful - */ - int multiply_i(float const &value); - - /** - * @brief Multiply value element by element - * @param[in] value multiplier - * @retval Calculated Tensor - */ - TensorV2 multiply(float const &value) const; - - /** - * @brief multiply value element by element - * @param[in] value multiplier - * @param[out] out out tensor to store the result - * @retval Calculated Tensor - */ - TensorV2 &multiply(float const &value, TensorV2 &out) const; - - /** - * @brief Multiply Tensor Elementwise - * @param[in] m Tensor to be multiplied - * @param[in] beta scalar to multiply output with and add - * @retval #ML_ERROR_NONE successful - */ - int multiply_i(TensorV2 const &m, const float beta = 0.0); - - /** - * @brief Multiply Tensor Element by Element ( Not the MxM ) - * @param[in] m Tensor to be multiplied - * @param[in] beta scalar to multiply output with and add - * @retval Calculated Tensor - */ - TensorV2 multiply(TensorV2 const &m, const float beta = 0.0) const; - - /** - * @brief Multiply Tensor Element by Element ( Not the MxM ) - * @param[in] m Tensor to be multiplied - * @param[out] output Tensor to store the result - * @param[in] beta scalar to multiply output with and add - * @retval Calculated Tensor - */ - TensorV2 &multiply(TensorV2 const &m, TensorV2 &output, - const float beta = 0.0) const; - - /** - * @brief Divide value element by element immediately - * @param[in] value divisor - * @retval #ML_ERROR_INVALID_PARAMETER Tensor dimension is not right - * @retval #ML_ERROR_NONE Successful - */ - int divide_i(float const &value); - - /** - * @brief Divide value element by element - * @param[in] value Divisor - * @retval Calculated Tensor - */ - TensorV2 divide(float const &value) const; - - /** - * @brief Divide value element by element - * @param[in] value Divisor - * @param[out] output Tensor to store the result - * @retval Calculated Tensor - */ - TensorV2 ÷(float const &value, TensorV2 &output) const; - - /** - * @brief divide Tensor Elementwise - * @param[in] m Tensor to be multiplied - * @retval #ML_ERROR_NONE successful - */ - int divide_i(TensorV2 const &m); - - /** - * @brief Divide Tensor Element by Element - * @param[in] m Divisor Tensor - * @retval Calculated Tensor - */ - TensorV2 divide(TensorV2 const &m) const; - - /** - * @brief divide Tensor Elementwise - * @param[in] m Tensor to be multiplied - * @param[out] output Tensor to store the result - * @retval Calculated Tensor - */ - TensorV2 ÷(TensorV2 const &m, TensorV2 &output) const; - - /** - * @brief Add Tensor Elementwise - * @param[in] input Tensor to be added - * @param[in] beta scalar to add output with and add - * @retval #ML_ERROR_NONE successful - * - * @note support different strided inputs and output - * @note does not support broadcasting - * - * @todo merge this to add_i - */ - int add_i_strided(TensorV2 const &input, const float beta = 0.0); - - /** - * @brief Add Tensor Element by Element - * @param[in] input Tensor to be added - * @param[in] beta Value to be scale the input tensor - * @retval Calculated Tensor - * - * @note support different strided inputs and output - * @note does not support broadcasting - * - * @todo merge this to add - */ - TensorV2 add_strided(TensorV2 const &input, const float beta = 0.0) const; - - /** - * @brief Add Tensor Element by Element - * @param[in] input Tensor to be added - * @param[out] output Tensor to store the result - * @param[in] beta Value to be scale the input tensor - * @retval Calculated Tensor - * - * @note support different strided inputs and output - * @note does not support broadcasting - * - * @todo merge this to add - */ - TensorV2 &add_strided(TensorV2 const &input, TensorV2 &output, - const float beta = 0.0) const; - - /** - * @brief Add Tensor Element immediately to target tensor without mem copy - * @param[in] value value to be added - * @retval #ML_ERROR_NONE Successful - * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter - */ - int add_i(float const &value); - - /** - * @brief Add value Element by Element - * @param[in] value value to be added - * @retval Calculated Tensor - */ - TensorV2 add(float const &value) const; - - /** - * @brief Add Tensor Element by Element - * @param[in] value value to be added - * @param[out] output Tensor to save output without allocating new memory - * @retval Calculated Tensor - */ - TensorV2 &add(float const &value, TensorV2 &output) const; - - /** - * @brief Add Tensor Element by Element without mem copy - * @param[in] m Tensor to be added - * @param[in] alpha Values to be scaled - * @retval #ML_ERROR_NONE Successful - * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter - */ - int add_i(TensorV2 const &m, float const alpha = 1); - - /** - * @brief Add Tensor Element by Element - * @param[in] m Tensor to be added - * @param[in] alpha Values to be scaled - * @retval Calculated Tensor - */ - TensorV2 add(TensorV2 const &m, float const alpha = 1) const; - - /** - * @brief Add Tensor Element by Element - * @param[in] m Tensor to be added - * @param[out] output Tensor to be out - * @param[in] alpha Values to be scaled - * @retval Calculated Tensor - */ - TensorV2 &add(TensorV2 const &m, TensorV2 &output, - float const alpha = 1) const; - - /** - * @brief memcpyless version of subtract - * @retval #ML_ERROR_NONE Successful - * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter - */ - int subtract_i(float const &value); - - /** - * @brief subtract value Element by Element - * @param[in] value value to be subtracted - * @retval Calculated Tensor - */ - TensorV2 subtract(float const &value) const; - - /** - * @brief Subtract Tensor Element by Element - * @param[in] value value to be added - * @param[out] output Tensor to save output without allocating new memory - * @retval Calculated Tensor - */ - TensorV2 &subtract(float const &value, TensorV2 &output) const; - - /** - * @brief memcpyless version of subtract - * @param[in] m Tensor to be subtracted - * @retval #ML_ERROR_NONE Successful - * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter - */ - int subtract_i(TensorV2 const &m); - - /** - * @brief Substract Tensor Element by Element - * @param[in] m Tensor to be subtracted - * @retval Calculated Tensor - */ - TensorV2 subtract(TensorV2 const &m) const; - - /** - * @brief Subtract Tensor Element by Element - * @param[in] m Tensor to be added - * @param[out] output Tensor to be out - * @retval Calculated Tensor - */ - TensorV2 &subtract(TensorV2 const &m, TensorV2 &output) const; - - /** - * @brief sum all the Tensor elements according to the batch - * @retval Calculated Tensor(batch, 1, 1, 1) - */ - TensorV2 sum_by_batch() const; - - /** - * @brief sum all the Tensor elements according to the axis - * 0 : batch direction - * 1 : channel direction - * 2 : height direction - * 3 : width direction - * @param[in] axis Axis to calculate sum along - * @param[in] alpha Scale the sum by this value - * @retval Calculated Tensor - */ - TensorV2 sum(unsigned int axis, float alpha = 1.0) const; - - /** - * @brief sum all the Tensor elements according to the axis - * 0 : batch direction - * 1 : channel direction - * 2 : height direction - * 3 : width direction - * @param[in] axis Axis to calculate sum along - * @param[out] output output tensor - * @param[in] alpha Scale the sum by this value - * @retval Calculated Tensor - */ - TensorV2 &sum(unsigned int axis, TensorV2 &output, float alpha = 1.0, - float beta = 0.0) const; - - /** - * @brief sum all the Tensor by multiple axes - * - * @param axes axes to sum along - * @param alpha Scale the sum by this value - * @return Tensor - */ - TensorV2 sum(const std::vector &axes, float alpha = 1.0) const; - - /** - * @brief sum all the Tensor by multiple axes - * - * @param axes axes to sum along - * @param[out] output output tensor - * @param alpha Scale the sum by this value - * @return Tensor - */ - TensorV2 &sum(const std::vector &axes, TensorV2 &output, - float alpha = 1.0) const; - - /** - * @brief Averaging the Tensor elements according to the axis - * 0 : batch direction - * 1 : channel direction - * 2 : height direction - * 3 : width direction - * @retval Calculated Tensor - */ - TensorV2 average(unsigned int axis) const; - - /** - * @brief Averaging the Tensor elements according to the axis - * @retval Calculated Tensor - */ - TensorV2 &average(unsigned int axis, TensorV2 &output) const; - - /** - * @brief Average all the Tensor by multiple axes - * @param[in] axes axes to sum along - * @retval Calculated Tensor - */ - TensorV2 average(const std::vector &axes) const; - - /** - * @brief Average all the Tensor by multiple axes - * @param[in] axes axes to sum along - * @param[out] output output tensor - * @retval Calculated Tensor - */ - TensorV2 &average(const std::vector &axes, - TensorV2 &output) const; - - /** - * @brief Average the Tensor elements by all axis - * @retval Calculated Tensor - */ - TensorV2 average() const; - - /** - * @brief Averaging the Tensor elements by all axis - * @retval Calculated Tensor - */ - TensorV2 &average(TensorV2 &output) const; - - /** - * @brief Tensor power element without mem copy - * @param[in] exponent exponent - * @retval #ML_ERROR_NONE Successful - */ - int pow_i(float exponent); - - /** - * @brief Tensor power element by element - * @param[in] exponent exponent - * @retval Calculated Tensor - */ - TensorV2 pow(float exponent) const; - - /** - * @brief Tensor power element by element - * @param[in] exponent exponent - * @param[out] output out to store the result - * @retval Calculated Tensor - */ - TensorV2 &pow(float exponent, TensorV2 &output) const; - - /** - * @brief Gauss error function - * @retval #ML_ERROR_NONE Successful - */ - int erf_i(); - - /** - * @brief Gauss error function - * @retval Calculated Tensor - */ - TensorV2 erf() const; - - /** - * @brief Gauss error function - * @param[out] output out to store the result - * @retval Calculated Tensor - */ - TensorV2 &erf(TensorV2 &output) const; - - /** - * @brief sin transform function - * @param[out] out out to store the result - */ - void sin(TensorV2 &out, float alpha = 1.0); - - /** - * @brief cos transform function - * @param[out] out out to store the result - */ - void cos(TensorV2 &out, float alpha = 1.0); - - /** - * @brief l2norm the Tensor elements - * @retval Calculated l2norm - */ - float l2norm() const; - - /** - * @brief Normalize the Tensor elements - * @retval Calculated Tensor - */ - TensorV2 &normalization(TensorV2 &output) const; - - /** - * @brief Standardize the Tensor elements - * @retval Calculated Tensor - */ - TensorV2 &standardization(TensorV2 &output) const; - - /** - * @brief Normalize the Tensor elements in-place - * @retval Calculated Tensor - */ - void normalization_i(); - - /** - * @brief Standardize the Tensor elements in-place - * @retval Calculated Tensor - */ - void standardization_i(); - - /** - * @brief Dot Product of Tensor ( equal MxM ) - * @details This applies dot of the last dimension of this and second-last - * dimension of passed input tensor. - * @param[in] input Tensor - * @param[in] trans Transpose - * @param[in] trans_in Transpose input - * @retval Calculated Tensor - */ - TensorV2 dot(TensorV2 const &input, bool trans = false, - bool trans_in = false) const; - - /** - * @brief Dot Product of Tensor ( equal MxM ) - * @details This applies dot of the last dimension of this and - * second-last dimension of passed input tensor. - * @param[in] input Tensor - * @param[in] output output Tensor - * @param[in] trans Transpose - * @param[in] trans_in Transpose input - * @param[in] beta beta - * @retval Calculated Tensor - */ - TensorV2 &dot(TensorV2 const &input, TensorV2 &output, bool trans = false, - bool trans_in = false, float beta = 0.0f) const; - - /** - * @brief compute the derivative of this in the current tensor - * @param input same as given to the dot() - * @param output_deriv the derivative of the output - * @param[in] trans same as given to the dot() - * @param[in] trans_in same as given to the dot() - * @param[in] beta same as given to the dot() - * @note This will compute the derivative in-place and will overwrite - existing - * data in the tensor - */ - TensorV2 &dot_deriv_wrt_1(TensorV2 const &input, TensorV2 const &output_deriv, - bool trans = false, bool trans_in = false, - float beta = 0.0f); - - /** - * @brief compute the derivative wrt m in the input tensor - * @param input_deriv tensor where derivative wrt m will be stored - * @param output_deriv the derivative of the output - * @param[in] trans same as given to the dot() - * @param[in] trans_in same as given to the dot() - * @param[in] beta same as given to the dot() - * @note The caller tensor must be the same tensor as the one which called - the dot() product. - */ - TensorV2 &dot_deriv_wrt_2(TensorV2 &input_deriv, TensorV2 const &output_deriv, - bool trans = false, bool trans_in = false, - float beta = 0.0f) const; - - /** - * @copydoc Tensor::dot(Tensor const &input, Tensor &output, bool trans, - bool trans_in, float beta) const - * @details performs dot operation over a batch of inputs - */ - TensorV2 &dotBatched(TensorV2 const &input, TensorV2 &result, - bool trans = false, bool trans_in = false, - float beta = 0.0f) const; - - /** - * @copydoc Tensor::dot_deriv_wrt_1(Tensor const &input, Tensor const - &output_deriv, bool trans, bool trans_in, float beta) - */ - TensorV2 &dot_batched_deriv_wrt_1(TensorV2 const &input, - TensorV2 const &output_deriv, - bool trans = false, bool trans_in = false, - float beta = 0.0f); - - /** - * @brief Tensor::dot_deriv_wrt_2(Tensor const &input_deriv, Tensor const - &output_deriv, bool trans, bool trans_in, float beta) const - */ - TensorV2 &dot_batched_deriv_wrt_2(TensorV2 &input_deriv, - TensorV2 const &output_deriv, - bool trans = false, bool trans_in = false, - float beta = 0.0f) const; - - /** - * @brief Calculate Drop Out Mask : x * 1.0/(1.0-rate) - * @param dropout drop out rate - * @retval Tensor& reference of drop out mask - */ - TensorV2 dropout_mask(float dropout) const; - - /** - * @brief Calculate Drop Out Mask : x * 1.0/(1.0-rate) inplace - * @param dropout drop out rate - */ - void dropout_mask(float dropout); - - /** - * @brief Calculate filter mask - * @param mask_len length of each mask along the last axis - * @param invert invert the mask - */ - void filter_mask(const TensorV2 &mask_len, bool reverse = false); - - /** - * @brief Calculate 2 Zone Out Mask - * @details Calculate zone out mask according to the bernoulli distribution. - * Zone out mask with rate @a zoneout for inplace and the other zone out mask - * with rate @a (1-zoneout). - * @param zoneout zone out rate - * @retval Tensor zone out mask for opposite tensor - */ - TensorV2 zoneout_mask(float zoneout); - - /** - * @brief Calculate 2 Zone Out Mask - * @details Calculate zone out mask according to the bernoulli distribution. - * Zone out mask with rate @a zoneout for inplace and the other zone out mask - * with rate @a (1-zoneout). - * @param opposite opposite zone out mask - * @param zoneout zone out rate - */ - void zoneout_mask(TensorV2 &opposite, float zoneout); - - /** - * @brief split tensor along axis. - * - * @param num_size num_size - * @param axis axis - * @return Tensor splitted tensor - */ - std::vector split(unsigned num_size, int axis = 0); - - /** - * @brief split tensor along axis. - * - * @param sizes sizes - * @param axis axis - * @return Tensor splitted tensor - * @note if the given array sizes is just a 1 unsigned int value, assumes that - * it divide tensor by given size evenly - */ - std::vector split(std::vector sizes, int axis = 0); - - /** - * @brief concatenate tensors along axis - * - * @param tensors tensors to be concatenated to the first tensor - * @param axis axis - * @return Tensor concatenated tensor - */ - static TensorV2 cat(const std::vector &tensors, int axis = 0); - - /** - * @brief Print element - * @param[in] out out stream - */ - void print(std::ostream &out) const; - - /** - * @brief put data of Tensor - * @note It is only effective when memory_swap is used - */ - void putData() const; - - /** - * @brief Set the memory buffer for the tensor - * - * @param buf the memory buffer - * @param init intialize the buffer - */ - void setData(const std::shared_ptr buf, size_t off = 0, - bool init = false); - - /** - * @brief return Data pointer of Tensor - * @retval template T pointer (float pointer as default) - */ - const std::shared_ptr getMemoryData() const; - - /** - * @brief return offset - */ - size_t getOffset() const; - - /** - * @brief Copy the Tensor - * @param[in] from Tensor to be copied - * - * @note copy can reshape the tensor to match the shape - * @note support copying data from multiple data type - */ - void copy(const TensorV2 &from); - - /** - * @brief Copy the Tensor - * @param[in] from Tensor to be copied - * @note support copying data from multiple data type - */ - void copyData(const TensorV2 &from); - - /** - * @brief Copy the Tensor - * @param[in] from Tensor to be copied - * @note only support copying data from tensor with the same data type - */ - void copy_with_stride(const TensorV2 &from); - - /** - * @brief Get slice of the tensor, sliced by batch - * @param[in] offset offset in batch to start the slice - * @param[in] size size of the slice - * @retval slice of this tensor - * @note This function provides a slice of this tensor, and does not create a - * copy - */ - TensorV2 getBatchSlice(size_t offset, unsigned int size) const; - - /** - * @brief Convient wrapper for inplace copy of @a this. - * @retval Copied version of this - */ - TensorV2 clone() const; - - /** - * @brief Save the Tensor into file - * @param[in] file output file stream - */ - void save(std::ostream &file); - - /** - * @brief Read the Tensor from file - * @param[in] file input file stream - */ - void read(std::ifstream &file); - - /** - * @brief return argument index which value is max by batch - * @retval unsigned int argument indices - */ - std::vector argmax() const; - - /** - * @brief return max of the absolute values of the tensor - * @retval maximum absolute value - */ - float max_abs() const; - - /** - * @brief return maximum value - * @retval Maximum value of the tensor data - */ - float maxValue() const; - - /** - * @brief return minimum value - * @retval Minimum value of the tensor data - */ - float minValue() const; - - /** - * @brief Transpose Tensor - * @param direction to transpose ex) 0:2:1 - * @return Tensor - */ - TensorV2 transpose(const std::string &direction) const; - - /** - * @brief Transpose Tensor - * @param direction to transpose ex) 0:2:1 - * @param[out] Tensor to save to, dimension is always reshaped. - * @retval Tensor& reference to the out - */ - TensorV2 &transpose(const std::string &direction, TensorV2 &out) const; - - /** - * @brief set Tensor Dim - * @param[in] d TensorDim - * @note Throws std::invalid_argument if size mismatch - */ - void reshape(const TensorDim &d); - - /** - * @brief fill tensor data with current value, - * if dimension is not exactly same, it is a hard error in this function - * so, only stride is overriden to @a this - * - * @param from Tensor to fill the data from - * @param allocate if unallocated, allocate with from.getDim() - * @throws std::invalid_argument if dimension and stride does not match - */ - void fill(const TensorV2 &from, bool allocate = false); - - /** - * @brief return a copy of the Tensor Dim - * @retval TensorDim - */ - TensorDim getDim() const; - - /** - * @brief return Tensor Type - */ - TensorDim::TensorType getTensorType() const; - - /** - * @brief Get initializer for the tensor - * - * @return initializer of the tensor - */ - Initializer getInitializer() const; - - /** - * @brief Get format for the tensor - * @return format of the tensor - */ - TensorDim::Format getFormat() const; - - /** - * @brief Get data type for the tensor - * - * @return data type of the tensor - */ - Tdatatype getDataType() const; - - /** - * @brief update batch size for this tensor - * @param batch size - * @note The batchsize of src_tensor need not be related with this - * tensor's batch size - * - * @note The memory for this tensor will re-allocated/re-assigned if the - * updated batch size is different than the current batch size. - * - * @note If this tensor is/was the src_tensor for some other, then - * reduction in batch size can make the dependent tensors allocate fail due to - * memory smaller. Caller must handle this in their own end. - * - * @note If this tensor is re-allocated, then the memory might not be - * immediately freed as the tensor already depending on this tensor also - * share the same memory. So, the peak memory consumption in worst case can - * reach the total memory requirements of a model with old batchsize and the - * new batch size. It is recommended to first deallocate all the tensors, - * updateBatch and then allocate again to avoid such issues. - */ - void updateBatch(unsigned int batch); - - /** - * @brief return whether tensor is contiguous or not. - * @retval bool contiguous - */ - const bool getContiguous() const noexcept; - - /** - * @brief return current stride of tensor. - * @retval int[MAXDIM] strides - */ - const std::array getStrides() const noexcept; - - /** - * @brief Check if two given axes are contiguous - * @param[in] np1 first axis - * @param[in] np2 second axis to compare with first axis - * @retval bool continuous - */ - bool checkContinuous(unsigned int np1, unsigned int np2) const; - - /** - * @brief Set name of the tensor - * @param[in] name_ tensor name - */ - void setName(const std::string &name_); - - /** - * @brief Get name of the tensor - * @retval string name - */ - const std::string &getName() const; - - /** - * @brief Get linear index given the n-d index - */ - size_t getIndex(unsigned int b, unsigned int c, unsigned int h, - unsigned int w) const noexcept; - /** - * @brief Get size of current tensor - * @retval unsigned int size of the current tensor - */ - size_t size() const; - - /** - * @brief Get if the tensor is empty - * @retval true if the tensor is empty - */ - bool empty() const; - - /** - * @brief Get size of the data in bytes - * @retval size_t Size in bytes - */ - size_t bytes() const; - - /** - * @brief return Tensor batch size - * @retval batch size - */ - size_t batch() const; - - /** - * @brief return Tensor channel size - * @retval channel size - */ - size_t channel() const; - - /** - * @brief return Tensor height size - * @retval height size - */ - size_t height() const; - - /** - * @brief return Tensor width size - * @retval width size - */ - size_t width() const; - - /** - * @brief Merge the given two axis for tensor at second axis inplace - * - * @param axis1 first axis to merge - * @param axis2 second axis to merge - */ - void mergeAxis(unsigned int axis1, unsigned int axis2); - - /** - * @brief Update destination tensor to share memory with source tensor - * - * @param src src tensor containing the memory - * @param dest destination tensor which will share the memory - * @param offset offset to be used from the start of the data in bytes - * @note The new tensor will share the same data as the current tensor but - * can have different size. - * @note New size added with offset must be less than the size of the original - * tensor. - */ - void createSharedDataTensor(const TensorV2 &src, TensorV2 &dest, - size_t offset) const; - - /** - * @brief Get new tensor which shares memory with current tensor but different - * shape - * - * @param dim new dimension to be set for this tensor - * @param offset offset to be used from the start of the data in elements - * @note The new tensor will share the same data as the current tensor but - * can have different size. - * @note New size added with offset must be less than the size of the original - * tensor. - */ - TensorV2 getSharedDataTensor(const TensorDim dim_, size_t offset, - bool reset_stride = true, - const std::string &name_ = "") const; - - /** - * @brief Swaps Tensor lhs and rhs - * @param[in] lhs Tensor to be swapped - * @param[in] rhs Tensor to be swapped - */ - friend void swap(TensorV2 &lhs, TensorV2 &rhs) noexcept { - std::swap(lhs.itensor, rhs.itensor); - } - -private: - std::shared_ptr itensor; - - /** - * @brief Set tensor variables - * - * @param[in] d TensorDim - * @param[in] buf buffer - * @param[in] offset offset to be used - */ - void setTensorVar(TensorDim d, void *buf, size_t offset); -}; - -/** - * @brief Overriding output stream - */ -std::ostream &operator<<(std::ostream &out, TensorV2 const &input); - -} // namespace nntrainer - -#endif /* __cplusplus */ -#endif /* __TENSOR_V2_H__ */ diff --git a/nntrainer/tensor/tensor_wrap_specs.h b/nntrainer/tensor/tensor_wrap_specs.h index 6a5195fef5..3f5f9b192d 100644 --- a/nntrainer/tensor/tensor_wrap_specs.h +++ b/nntrainer/tensor/tensor_wrap_specs.h @@ -75,9 +75,8 @@ enum class TensorLifespan { * regularizer_constant, decay, clip gradient constant, need_gradient property, * name, output axis of the tensor object and loss Scale Factor. */ -typedef std::tuple +typedef std::tuple WeightSpec; /** @@ -86,7 +85,7 @@ typedef std::tuple VarGradSpec; @@ -131,8 +130,7 @@ struct TensorSpecV2 { std::string name; /**< Identifier */ TensorDim dim; /**< dimension */ TensorLifespan ls; /**< lifespan */ - Tensor::Initializer initializer = - Tensor::Initializer::NONE; /**< initializer */ + Initializer initializer = Initializer::NONE; /**< initializer */ /** ONLY USED FOR READ_ONLY_VIEW, MAYBE_MODIFYING_VIEW */ unsigned int offset = 0u; /**< tensor offset */ diff --git a/nntrainer/tensor/var_grad.cpp b/nntrainer/tensor/var_grad.cpp index 09dbf6267e..e91c918a9f 100644 --- a/nntrainer/tensor/var_grad.cpp +++ b/nntrainer/tensor/var_grad.cpp @@ -18,7 +18,7 @@ namespace nntrainer { -Var_Grad::Var_Grad(const TensorDim &dim, const Tensor::Initializer init, +Var_Grad::Var_Grad(const TensorDim &dim, const Initializer init, bool need_gradient, bool alloc_now, const std::string &name) : is_dependent(false), @@ -32,15 +32,15 @@ Var_Grad::Var_Grad(const TensorDim &dim, const Tensor::Initializer init, * @todo gradient initializer should be none, and then they should be set * zero right before using by the user itself. */ - grad = std::make_shared(dim, alloc_now, Tensor::Initializer::ZEROS, - grad_name); + grad = + std::make_shared(dim, alloc_now, Initializer::ZEROS, grad_name); else grad = std::make_shared(grad_name); } Var_Grad::Var_Grad(const TensorDim &dim_v, const TensorDim &dim_g, - const Tensor::Initializer init, bool need_gradient, - bool alloc_now, const std::string &name) : + const Initializer init, bool need_gradient, bool alloc_now, + const std::string &name) : is_dependent(false), is_first_access_gradient(false), is_last_access_gradient(false) { @@ -53,8 +53,8 @@ Var_Grad::Var_Grad(const TensorDim &dim_v, const TensorDim &dim_g, * zero right before using by the user itself. */ - grad = std::make_shared(dim_g, alloc_now, - Tensor::Initializer::ZEROS, grad_name); + grad = + std::make_shared(dim_g, alloc_now, Initializer::ZEROS, grad_name); else grad = std::make_shared(grad_name); } diff --git a/nntrainer/tensor/var_grad.h b/nntrainer/tensor/var_grad.h index 52cabbc055..48a4fcf261 100644 --- a/nntrainer/tensor/var_grad.h +++ b/nntrainer/tensor/var_grad.h @@ -55,9 +55,8 @@ class Var_Grad { * @param name Name for this Var_Grad */ explicit Var_Grad(const TensorDim &dim, - const Tensor::Initializer init = Tensor::Initializer::NONE, - bool ng = true, bool alloc_now = false, - const std::string &name = ""); + const Initializer init = Initializer::NONE, bool ng = true, + bool alloc_now = false, const std::string &name = ""); /** * @brief Construct a new Var_Grad object @@ -69,9 +68,8 @@ class Var_Grad { * @param name Name for this Var_Grad */ explicit Var_Grad(const TensorDim &dim_v, const TensorDim &dim_g, - const Tensor::Initializer init = Tensor::Initializer::NONE, - bool ng = true, bool alloc_now = false, - const std::string &name = ""); + const Initializer init = Initializer::NONE, bool ng = true, + bool alloc_now = false, const std::string &name = ""); /** * @brief Construct a new Var_Grad object diff --git a/nntrainer/tensor/weight.cpp b/nntrainer/tensor/weight.cpp index f98c8c8356..b1b3990388 100644 --- a/nntrainer/tensor/weight.cpp +++ b/nntrainer/tensor/weight.cpp @@ -18,7 +18,7 @@ namespace nntrainer { -Weight::Weight(const TensorDim &dim, const Tensor::Initializer init, +Weight::Weight(const TensorDim &dim, const Initializer init, const WeightRegularizer reg, const float reg_const, const float decay_const, const float max_norm, bool train, bool alloc_now_, std::string name, unsigned int axis, @@ -30,14 +30,14 @@ Weight::Weight(const TensorDim &dim, const Tensor::Initializer init, clip_by_global_norm(max_norm), output_axis(axis), loss_scale(loss_scale_) { - if (init == Tensor::Initializer::NONE) + if (init == Initializer::NONE) throw std::invalid_argument("Weight initializer cannot be none"); if (regularizer == WeightRegularizer::UNKNOWN) throw std::invalid_argument("Weight regularizer unknown"); } Weight::Weight(const TensorDim &dim_v, const TensorDim &dim_g, - const Tensor::Initializer init, const WeightRegularizer reg, + const Initializer init, const WeightRegularizer reg, const float reg_const, const float decay_const, const float max_norm, bool train, bool alloc_now_, std::string name, unsigned int axis, float loss_scale_) : @@ -48,7 +48,7 @@ Weight::Weight(const TensorDim &dim_v, const TensorDim &dim_g, clip_by_global_norm(max_norm), output_axis(axis), loss_scale(loss_scale_) { - if (init == Tensor::Initializer::NONE) + if (init == Initializer::NONE) throw std::invalid_argument("Weight initializer cannot be none"); if (regularizer == WeightRegularizer::UNKNOWN) throw std::invalid_argument("Weight regularizer unknown"); diff --git a/nntrainer/tensor/weight.h b/nntrainer/tensor/weight.h index 552f6d5739..36005eaee2 100644 --- a/nntrainer/tensor/weight.h +++ b/nntrainer/tensor/weight.h @@ -59,14 +59,13 @@ class Weight : public Var_Grad { * @param alloc_now The memory for the weight tensors be allocated upon init * @param name Name for this weight */ - explicit Weight( - const TensorDim &dim, - const Tensor::Initializer init = Tensor::Initializer::XAVIER_UNIFORM, - const WeightRegularizer reg = WeightRegularizer::NONE, - const float reg_const = 1.0f, const float decay = 0.0f, - const float clip_by_global_norm = 0.0f, bool ng = true, - bool alloc_now = false, std::string name = "", unsigned int axis = 3, - float loss_scale_ = 0.0); + explicit Weight(const TensorDim &dim, + const Initializer init = Initializer::XAVIER_UNIFORM, + const WeightRegularizer reg = WeightRegularizer::NONE, + const float reg_const = 1.0f, const float decay = 0.0f, + const float clip_by_global_norm = 0.0f, bool ng = true, + bool alloc_now = false, std::string name = "", + unsigned int axis = 3, float loss_scale_ = 0.0); /** * @brief Construct a new Weight object @@ -80,14 +79,13 @@ class Weight : public Var_Grad { * @param alloc_now The memory for the weight tensors be allocated upon init * @param name Name for this weight */ - explicit Weight( - const TensorDim &dim_v, const TensorDim &dim_g, - const Tensor::Initializer init = Tensor::Initializer::XAVIER_UNIFORM, - const WeightRegularizer reg = WeightRegularizer::NONE, - const float reg_const = 1.0f, const float decay = 0.0f, - const float clip_by_global_norm = 0.0f, bool ng = true, - bool alloc_now = false, std::string name = "", unsigned int axis = 3, - float loss_scale_ = 0.0); + explicit Weight(const TensorDim &dim_v, const TensorDim &dim_g, + const Initializer init = Initializer::XAVIER_UNIFORM, + const WeightRegularizer reg = WeightRegularizer::NONE, + const float reg_const = 1.0f, const float decay = 0.0f, + const float clip_by_global_norm = 0.0f, bool ng = true, + bool alloc_now = false, std::string name = "", + unsigned int axis = 3, float loss_scale_ = 0.0); /** * @brief Construct a new Weight object @@ -97,7 +95,7 @@ class Weight : public Var_Grad { explicit Weight(const Spec &spec, bool alloc_now = false) : Weight(std::get<0>(spec), // TensorDim for Variable std::get<1>(spec), // TensorDim for Gradient - std::get<2>(spec), // Tensor::Initializer + std::get<2>(spec), // Initializer std::get<3>(spec), // WeightRegularizer std::get<4>(spec), // WeightRegularizerConstant std::get<5>(spec), // weight decay constant diff --git a/packaging/nntrainer.spec b/packaging/nntrainer.spec index 36ba371d22..5788e708b5 100644 --- a/packaging/nntrainer.spec +++ b/packaging/nntrainer.spec @@ -527,7 +527,6 @@ cp -r result %{buildroot}%{_datadir}/nntrainer/unittest/ # tensor headers %{_includedir}/nntrainer/memory_data.h %{_includedir}/nntrainer/tensor.h -%{_includedir}/nntrainer/tensor_v2.h %{_includedir}/nntrainer/tensor_base.h %{_includedir}/nntrainer/float_tensor.h %if 0%{?enable_fp16} diff --git a/test/include/nntrainer_test_util.h b/test/include/nntrainer_test_util.h index 74eef4abaa..94601938f9 100644 --- a/test/include/nntrainer_test_util.h +++ b/test/include/nntrainer_test_util.h @@ -38,7 +38,6 @@ #include #include #include -#include /** tolerance is reduced for packaging, but CI runs at full tolerance */ #ifdef REDUCE_TOLERANCE @@ -170,31 +169,6 @@ randUniform(unsigned int batch, unsigned channel, unsigned height, nntrainer::Tformat fm = nntrainer::Tformat::NCHW, nntrainer::Tdatatype d_type = nntrainer::Tdatatype::FP32); -/** - * @brief return a tensor filled with contant value with dimension - */ -nntrainer::TensorV2 -constantV2(float value, unsigned int d0, unsigned d1, unsigned d2, unsigned d3, - nntrainer::Tformat fm = nntrainer::Tformat::NCHW, - nntrainer::Tdatatype d_type = nntrainer::Tdatatype::FP32); - -/** - * @brief return a tensor filled with ranged value with given dimension - */ -nntrainer::TensorV2 -rangedV2(unsigned int batch, unsigned channel, unsigned height, unsigned width, - nntrainer::Tformat fm = nntrainer::Tformat::NCHW, - nntrainer::Tdatatype d_type = nntrainer::Tdatatype::FP32); - -/** - * @brief return a tensor filled with random value with given dimension - */ -nntrainer::TensorV2 -randUniformV2(unsigned int batch, unsigned channel, unsigned height, - unsigned width, float min = -1, float max = 1, - nntrainer::Tformat fm = nntrainer::Tformat::NCHW, - nntrainer::Tdatatype d_type = nntrainer::Tdatatype::FP32); - /** * @brief replace string and save in file * @param[in] from string to be replaced diff --git a/test/nntrainer_test_util.cpp b/test/nntrainer_test_util.cpp index bcc33e40c8..260727f212 100644 --- a/test/nntrainer_test_util.cpp +++ b/test/nntrainer_test_util.cpp @@ -213,45 +213,6 @@ nntrainer::Tensor randUniform(unsigned int batch, unsigned int channel, return t; } -nntrainer::TensorV2 constantV2(float value, unsigned int d0, unsigned int d1, - unsigned int d2, unsigned int d3, - nntrainer::Tformat fm, - nntrainer::Tdatatype d_type) { - nntrainer::TensorV2 t(d0, d1, d2, d3, {fm, d_type}); - t.setValue(value); - - return t; -} - -nntrainer::TensorV2 rangedV2(unsigned int batch, unsigned int channel, - unsigned int height, unsigned int width, - nntrainer::Tformat fm, - nntrainer::Tdatatype d_type) { - nntrainer::TensorV2 t(batch, channel, height, width, {fm, d_type}); - if (d_type == nntrainer::Tdatatype::FP32) { - float i = 0; - t = t.apply((std::function)[&](float in) { return i++; }); - } else if (d_type == nntrainer::Tdatatype::FP16) { -#ifdef ENABLE_FP16 - _FP16 i = 0; - t = t.apply((std::function<_FP16(_FP16)>)[&](_FP16 in) { return i++; }); -#else - throw std::invalid_argument("Error: enable-fp16 is not enabled"); -#endif - } - - return t; -} - -nntrainer::TensorV2 randUniformV2(unsigned int batch, unsigned int channel, - unsigned int height, unsigned int width, - float min, float max, nntrainer::Tformat fm, - nntrainer::Tdatatype d_type) { - nntrainer::TensorV2 t(batch, channel, height, width, {fm, d_type}); - t.setRandUniform(min, max); - return t; -} - const std::string getResPath(const std::string &filename, const std::initializer_list fallback_base) { diff --git a/test/unittest/layers/layers_golden_tests.cpp b/test/unittest/layers/layers_golden_tests.cpp index 56d591019b..152f9e0934 100644 --- a/test/unittest/layers/layers_golden_tests.cpp +++ b/test/unittest/layers/layers_golden_tests.cpp @@ -90,7 +90,7 @@ static TensorPacks prepareTensors(const InitLayerContext &context, vg.reserve(dims.size()); for (auto &dim : dims) { - vg.emplace_back(dim, Tensor::Initializer::NONE, true, true, "golden"); + vg.emplace_back(dim, Initializer::NONE, true, true, "golden"); sizeCheckedReadTensor(vg.back().getVariableRef(), file, vg.back().getName()); } @@ -113,8 +113,8 @@ static TensorPacks prepareTensors(const InitLayerContext &context, for (auto &spec : specs) { /// @todo initializer should be depending is as well - vg.emplace_back(spec.variable_spec.dim, Tensor::Initializer::NONE, true, - true, "golden"); + vg.emplace_back(spec.variable_spec.dim, Initializer::NONE, true, true, + "golden"); } return vg; }; diff --git a/test/unittest/layers/unittest_layer_node.cpp b/test/unittest/layers/unittest_layer_node.cpp index 3b41f02f30..9faf44e8af 100644 --- a/test/unittest/layers/unittest_layer_node.cpp +++ b/test/unittest/layers/unittest_layer_node.cpp @@ -123,9 +123,9 @@ TEST(nntrainer_LayerNode, finalize_04_p) { */ TEST(nntrainer_LayerNode, finalize_05_n) { std::unique_ptr lnode; - nntrainer::Var_Grad input = nntrainer::Var_Grad( - nntrainer::TensorDim({1, 1, 1, 1}), nntrainer::Tensor::Initializer::NONE, - true, false, "dummy"); + nntrainer::Var_Grad input = + nntrainer::Var_Grad(nntrainer::TensorDim({1, 1, 1, 1}), + nntrainer::Initializer::NONE, true, false, "dummy"); EXPECT_NO_THROW(lnode = nntrainer::createLayerNode(nntrainer::IdentityLayer::type)); @@ -284,16 +284,15 @@ TEST(nntrainer_LayerNode, setWeights_01_n) { */ TEST(nntrainer_LayerNode, setWeights_02_n) { std::unique_ptr lnode; - nntrainer::Weight weight = - nntrainer::Weight(nntrainer::TensorDim({1, 1, 1, 1}), - nntrainer::Tensor::Initializer::XAVIER_UNIFORM, - nntrainer::WeightRegularizer::NONE, 1.0f, 0.0f, 0.0f, - true, false, "weight"); + nntrainer::Weight weight = nntrainer::Weight( + nntrainer::TensorDim({1, 1, 1, 1}), nntrainer::Initializer::XAVIER_UNIFORM, + nntrainer::WeightRegularizer::NONE, 1.0f, 0.0f, 0.0f, true, false, + "weight"); float *float_ptr[2] = {nullptr, nullptr}; const std::vector new_weights({float_ptr[0], float_ptr[1]}); - nntrainer::Var_Grad input = nntrainer::Var_Grad( - nntrainer::TensorDim({1, 1, 1, 1}), nntrainer::Tensor::Initializer::NONE, - true, false, "dummy"); + nntrainer::Var_Grad input = + nntrainer::Var_Grad(nntrainer::TensorDim({1, 1, 1, 1}), + nntrainer::Initializer::NONE, true, false, "dummy"); EXPECT_NO_THROW(lnode = nntrainer::createLayerNode(nntrainer::IdentityLayer::type)); diff --git a/test/unittest/meson.build b/test/unittest/meson.build index b1977ea8d1..931570739a 100644 --- a/test/unittest/meson.build +++ b/test/unittest/meson.build @@ -39,7 +39,6 @@ test_target = [ ['unittest_nntrainer_internal', []], ['unittest_nntrainer_lazy_tensor', []], ['unittest_nntrainer_tensor', []], - ['unittest_nntrainer_tensor_v2', []], ['unittest_nntrainer_tensor_nhwc', []], ['unittest_util_func', []], ['unittest_nntrainer_modelfile', []], @@ -58,7 +57,6 @@ test_target = [ if get_option('enable-fp16') test_target += [['unittest_nntrainer_tensor_fp16', []]] test_target += [['unittest_nntrainer_tensor_pool_fp16', []]] - test_target += [['unittest_nntrainer_tensor_v2_fp16', []]] endif if get_option('enable-profile') diff --git a/test/unittest/unittest_nntrainer_tensor.cpp b/test/unittest/unittest_nntrainer_tensor.cpp index 12c8873055..0f0fda6534 100644 --- a/test/unittest/unittest_nntrainer_tensor.cpp +++ b/test/unittest/unittest_nntrainer_tensor.cpp @@ -199,76 +199,76 @@ TEST(nntrainer_Tensor, Tensor_03_p) { EXPECT_EQ(status, ML_ERROR_NONE); } -TEST(nntrainer_Tensor, Tensor_04_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int height = 3; - int width = 10; - std::vector>> in; - - for (int k = 0; k < batch; ++k) { - std::vector> ttv; - for (int i = 0; i < height; ++i) { - std::vector tv; - for (int j = 0; j < width; ++j) { - tv.push_back(k * height * width + i * width + j); - } - ttv.push_back(tv); - } - in.push_back(ttv); - } - - nntrainer::Tensor tensor = nntrainer::Tensor( - in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); - ASSERT_NE(nullptr, tensor.getData()); - - if (tensor.getValue(0, 0, 0, 1) != 1) - status = ML_ERROR_INVALID_PARAMETER; - EXPECT_EQ(status, ML_ERROR_NONE); -} +// TEST(nntrainer_Tensor, Tensor_04_p) { +// int status = ML_ERROR_NONE; +// int batch = 3; +// int height = 3; +// int width = 10; +// std::vector>> in; + +// for (int k = 0; k < batch; ++k) { +// std::vector> ttv; +// for (int i = 0; i < height; ++i) { +// std::vector tv; +// for (int j = 0; j < width; ++j) { +// tv.push_back(k * height * width + i * width + j); +// } +// ttv.push_back(tv); +// } +// in.push_back(ttv); +// } -TEST(nntrainer_Tensor, Tensor_05_p) { - int status = ML_ERROR_NONE; - std::vector>> in = {{{0, 1}, {2, 3}}, - {{4, 5}, {6, 7}}, - {{8, 9}, {10, 11}}, - {{12, 13}, {14, 15}}}; +// nntrainer::Tensor tensor = nntrainer::Tensor( +// in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); +// ASSERT_NE(nullptr, tensor.getData()); - nntrainer::Tensor tensor = nntrainer::Tensor( - in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}); - ASSERT_NE(nullptr, tensor.getData()); - - for (size_t b = 0; b < tensor.batch(); ++b) { - for (size_t c = 0; c < tensor.channel(); ++c) { - for (size_t h = 0; h < tensor.height(); ++h) { - for (size_t w = 0; w < tensor.width(); ++w) { - size_t idx = tensor.getIndex(b, c, h, w); - ASSERT_EQ(idx, tensor.getValueQint4(idx)); - } - } - } - } -} - -TEST(nntrainer_Tensor, Tensor_06_p) { - int status = ML_ERROR_NONE; - nntrainer::Tensor tensor = nntrainer::Tensor( - 1, 4, 2, 2, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}); - ASSERT_NE(nullptr, tensor.getData()); +// if (tensor.getValue(0, 0, 0, 1) != 1) +// status = ML_ERROR_INVALID_PARAMETER; +// EXPECT_EQ(status, ML_ERROR_NONE); +// } - tensor.setValue(2); +// TEST(nntrainer_Tensor, Tensor_05_p) { +// int status = ML_ERROR_NONE; +// std::vector>> in = {{{0, 1}, {2, 3}}, +// {{4, 5}, {6, 7}}, +// {{8, 9}, {10, 11}}, +// {{12, 13}, {14, 15}}}; + +// nntrainer::Tensor tensor = nntrainer::Tensor( +// in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}); +// ASSERT_NE(nullptr, tensor.getData()); + +// for (size_t b = 0; b < tensor.batch(); ++b) { +// for (size_t c = 0; c < tensor.channel(); ++c) { +// for (size_t h = 0; h < tensor.height(); ++h) { +// for (size_t w = 0; w < tensor.width(); ++w) { +// size_t idx = tensor.getIndex(b, c, h, w); +// ASSERT_EQ(idx, tensor.getValueQint4(idx)); +// } +// } +// } +// } +// } - for (size_t b = 0; b < tensor.batch(); ++b) { - for (size_t c = 0; c < tensor.channel(); ++c) { - for (size_t h = 0; h < tensor.height(); ++h) { - for (size_t w = 0; w < tensor.width(); ++w) { - size_t idx = tensor.getIndex(b, c, h, w); - ASSERT_EQ(2, tensor.getValueQint4(idx)); - } - } - } - } -} +// TEST(nntrainer_Tensor, Tensor_06_p) { +// int status = ML_ERROR_NONE; +// nntrainer::Tensor tensor = nntrainer::Tensor( +// 1, 4, 2, 2, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}); +// ASSERT_NE(nullptr, tensor.getData()); + +// tensor.setValue(2); + +// for (size_t b = 0; b < tensor.batch(); ++b) { +// for (size_t c = 0; c < tensor.channel(); ++c) { +// for (size_t h = 0; h < tensor.height(); ++h) { +// for (size_t w = 0; w < tensor.width(); ++w) { +// size_t idx = tensor.getIndex(b, c, h, w); +// ASSERT_EQ(2, tensor.getValueQint4(idx)); +// } +// } +// } +// } +// } TEST(nntrainer_Tensor, multiply_i_01_p) { int status = ML_ERROR_NONE; @@ -3217,19 +3217,19 @@ TEST(nntrainer_Tensor, print_small_size) { EXPECT_EQ(ss.str(), expected.str()); } -// TEST(nntrainer_Tensor, print_large_size) { -// nntrainer::Tensor target = constant(1.2, 3, 10, 10, 10); +TEST(nntrainer_Tensor, print_large_size) { + nntrainer::Tensor target = constant(1.2, 3, 10, 10, 10); -// std::stringstream ss, expected; + std::stringstream ss, expected; -// expected << '<' << typeid(target).name() << " at " << &target << ">\n" -// << "data addr: " << target.getData() << '\n' -// << "Shape: 3:10:10:10\n" -// << "[1.2 1.2 1.2 ... 1.2 1.2 1.2]\n"; -// ss << target; + expected << '<' << typeid(target).name() << " at " << &target << ">\n" + << "data addr: " << target.getData() << '\n' + << "Shape: 3:10:10:10 [ FP32 : NCHW ]\n" + << "[1.2 1.2 1.2 ... 1.2 1.2 1.2]\n"; + ss << target; -// EXPECT_EQ(ss.str(), expected.str()); -// } + EXPECT_EQ(ss.str(), expected.str()); +} TEST(nntrainer_Tensor, DISABLED_equation_test_01_p) { nntrainer::Tensor a, b, c; @@ -3342,28 +3342,28 @@ TEST(nntrainer_Tensor, allocate_03_p) { EXPECT_TRUE(t.isAllocated()); } -TEST(nntrainer_Tensor, allocate_04_p) { - nntrainer::Tensor t( - {1, 2, 3, 4, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}}, - true); - EXPECT_TRUE(t.isAllocated()); +// TEST(nntrainer_Tensor, allocate_04_p) { +// nntrainer::Tensor t( +// {1, 2, 3, 4, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}}, +// true); +// EXPECT_TRUE(t.isAllocated()); - t.allocate(); - EXPECT_TRUE(t.isAllocated()); -} +// t.allocate(); +// EXPECT_TRUE(t.isAllocated()); +// } -TEST(nntrainer_Tensor, allocate_05_p) { - nntrainer::Tensor t( - {1, 2, 3, 4, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}}, - true); - EXPECT_TRUE(t.isAllocated()); +// TEST(nntrainer_Tensor, allocate_05_p) { +// nntrainer::Tensor t( +// {1, 2, 3, 4, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}}, +// true); +// EXPECT_TRUE(t.isAllocated()); - t.allocate(); - EXPECT_TRUE(t.isAllocated()); -} +// t.allocate(); +// EXPECT_TRUE(t.isAllocated()); +// } TEST(nntrainer_Tensor, initialize_01_p) { - nntrainer::Tensor t({1, 2, 3, 4}, true, nntrainer::Tensor::Initializer::ONES); + nntrainer::Tensor t({1, 2, 3, 4}, true, nntrainer::Initializer::ONES); nntrainer::Tensor golden(1, 2, 3, 4); golden.setValue(1); @@ -3379,13 +3379,12 @@ TEST(nntrainer_Tensor, initialize_02_p) { EXPECT_NE(golden, t); - t.initialize(nntrainer::Tensor::Initializer::ONES); + t.initialize(nntrainer::Initializer::ONES); EXPECT_EQ(golden, t); } TEST(nntrainer_Tensor, initialize_03_p) { - nntrainer::Tensor t({1, 2, 3, 4}, false, - nntrainer::Tensor::Initializer::ONES); + nntrainer::Tensor t({1, 2, 3, 4}, false, nntrainer::Initializer::ONES); t.allocate(); nntrainer::Tensor golden(1, 2, 3, 4); @@ -3396,7 +3395,7 @@ TEST(nntrainer_Tensor, initialize_03_p) { TEST(nntrainer_Tensor, initialize_04_p) { nntrainer::Tensor t({1, 2, 3, 4}, false); - t.initialize(nntrainer::Tensor::Initializer::ONES); + t.initialize(nntrainer::Initializer::ONES); t.allocate(); nntrainer::Tensor golden(1, 2, 3, 4); @@ -3417,23 +3416,22 @@ TEST(nntrainer_Tensor, initialize_05_p) { * EXPECT_NE(golden, t); */ - t.initialize(nntrainer::Tensor::Initializer::ONES); + t.initialize(nntrainer::Initializer::ONES); EXPECT_EQ(golden, t); } TEST(nntrainer_Tensor, initialize_06_n) { - nntrainer::Tensor t({1, 2, 3, 4}, true, nntrainer::Tensor::Initializer::ONES); - nntrainer::Tensor golden({1, 2, 3, 4}, true, - nntrainer::Tensor::Initializer::ZEROS); + nntrainer::Tensor t({1, 2, 3, 4}, true, nntrainer::Initializer::ONES); + nntrainer::Tensor golden({1, 2, 3, 4}, true, nntrainer::Initializer::ZEROS); EXPECT_NE(golden, t); - golden.initialize(nntrainer::Tensor::Initializer::ONES); + golden.initialize(nntrainer::Initializer::ONES); EXPECT_EQ(golden, t); } TEST(nntrainer_Tensor, initialize_07_p) { - nntrainer::Tensor t({1, 2, 3, 4}, true, nntrainer::Tensor::Initializer::ONES); + nntrainer::Tensor t({1, 2, 3, 4}, true, nntrainer::Initializer::ONES); nntrainer::Tensor golden(1, 2, 3, 4); golden.setValue(1); @@ -3449,39 +3447,37 @@ TEST(nntrainer_Tensor, initialize_07_p) { } TEST(nntrainer_Tensor, initialize_08_p) { - nntrainer::Tensor t({1, 2, 3, 4}, true, nntrainer::Tensor::Initializer::ONES); + nntrainer::Tensor t({1, 2, 3, 4}, true, nntrainer::Initializer::ONES); nntrainer::Tensor golden(1, 2, 3, 4); golden.setValue(1); EXPECT_EQ(golden, t); - t.initialize(nntrainer::Tensor::Initializer::HE_NORMAL); + t.initialize(nntrainer::Initializer::HE_NORMAL); EXPECT_NE(golden, t); t.initialize(); EXPECT_NE(golden, t); - t.initialize(nntrainer::Tensor::Initializer::ONES); + t.initialize(nntrainer::Initializer::ONES); EXPECT_EQ(golden, t); t.initialize(); EXPECT_EQ(golden, t); } -TEST(nntrainer_Tensor, initialize_09_p) { - nntrainer::Tensor t( - {1, 2, 3, 4, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}}, true, - nntrainer::Tensor::Initializer::ONES); - nntrainer::Tensor golden( - {1, 2, 3, 4, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}}, true, - nntrainer::Tensor::Initializer::ZEROS); - - EXPECT_NE(golden, t); - - golden.initialize(nntrainer::Tensor::Initializer::ONES); - EXPECT_EQ(golden, t); -} +// TEST(nntrainer_Tensor, initialize_09_p) { +// nntrainer::Tensor t( +// {1, 2, 3, 4, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}}, +// true, nntrainer::Initializer::ONES); +// nntrainer::Tensor golden( +// {1, 2, 3, 4, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}}, +// true, nntrainer::Initializer::ZEROS); +// EXPECT_NE(golden, t); +// golden.initialize(nntrainer::Initializer::ONES); +// EXPECT_EQ(golden, t); +// } TEST(nntrainer_Tensor, split_01_p) { { @@ -4070,22 +4066,6 @@ TEST(nntrainer_Tensor, TensorWrap_02_n) { EXPECT_THROW(nntrainer::Tensor::Map(dat, 3, {4}), std::invalid_argument); } -TEST(nntrainer_Tensor, TensorPaddedValue_p) { - nntrainer::Tensor a = ranged(1, 1, 3, 3); - float default_padded = -1; - - for (int i = 0; i < 5; ++i) { - for (int j = 0; j < 5; ++j) { - float expected = default_padded; - if (1 <= i && i <= 3 && 1 <= j && j <= 3) { - expected = (i - 1) * 3 + (j - 1); - } - float actual = a.getValuePaddedVirtual(0, 0, i, j, 1, 1, default_padded); - EXPECT_FLOAT_EQ(actual, expected); - } - } -} - TEST(nntrainer_Tensor, add_strided_01_p) { int status = ML_ERROR_NONE; int batch = 3; @@ -4354,111 +4334,111 @@ TEST(nntrainer_Tensor, multiply_strided_06_p) { EXPECT_EQ(status, ML_ERROR_NONE); } -/** - * @brief dequantize FP32 tensor - */ -TEST(nntrainer_Tensor, dequantize_01_n) { - int batch = 1; - int channel = 3; - int height = 4; - int width = 5; - - nntrainer::Tensor input(batch, channel, height, width); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); - input.setScaleFactors({1.5, 1.0, 0.5}); - input.setZeroPoints({1, 4, 7}); +// /** +// * @brief dequantize FP32 tensor +// */ +// TEST(nntrainer_Tensor, dequantize_01_n) { +// int batch = 1; +// int channel = 3; +// int height = 4; +// int width = 5; - nntrainer::Tensor output(batch, channel, height, width); +// nntrainer::Tensor input(batch, channel, height, width); +// GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); +// input.setScaleFactors({1.5, 1.0, 0.5}); +// input.setZeroPoints({1, 4, 7}); - EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument); -} +// nntrainer::Tensor output(batch, channel, height, width); -/** - * @brief dequantize tensor with different dimension - */ -TEST(nntrainer_Tensor, dequantize_02_n) { - int batch = 1; - int channel = 3; - int height = 4; - int width = 5; +// EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument); +// } - nntrainer::Tensor input( - batch + 1, channel, height + 1, width + 1, - {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); - input.setScaleFactors({1.5, 1.0, 0.5}); - input.setZeroPoints({1, 4, 7}); +// /** +// * @brief dequantize tensor with different dimension +// */ +// TEST(nntrainer_Tensor, dequantize_02_n) { +// int batch = 1; +// int channel = 3; +// int height = 4; +// int width = 5; - nntrainer::Tensor output(batch, channel, height, width); +// nntrainer::Tensor input( +// batch + 1, channel, height + 1, width + 1, +// {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); +// GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); +// input.setScaleFactors({1.5, 1.0, 0.5}); +// input.setZeroPoints({1, 4, 7}); - EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument); -} +// nntrainer::Tensor output(batch, channel, height, width); -/** - * @brief dequantize tensor with no scale factors - */ -TEST(nntrainer_Tensor, dequantize_03_n) { - int batch = 1; - int channel = 3; - int height = 4; - int width = 5; - - nntrainer::Tensor input( - batch, channel, height, width, - {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); - - nntrainer::Tensor output(batch, channel, height, width); +// EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument); +// } - EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument); -} +// /** +// * @brief dequantize tensor with no scale factors +// */ +// TEST(nntrainer_Tensor, dequantize_03_n) { +// int batch = 1; +// int channel = 3; +// int height = 4; +// int width = 5; -/** - * @brief dequantize tensor with incorrect number of scale factors - */ -TEST(nntrainer_Tensor, dequantize_04_n) { - int batch = 1; - int channel = 3; - int height = 4; - int width = 5; +// nntrainer::Tensor input( +// batch, channel, height, width, +// {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); +// GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); - nntrainer::Tensor input( - batch, channel, height, width, - {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); +// nntrainer::Tensor output(batch, channel, height, width); - nntrainer::Tensor output( - batch, channel, height, width, - {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP32}); +// EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument); +// } - input.setScaleFactors({2.0, 1.5, 1.0, 0.5}); - input.setZeroPoints({2, 3, 4, 5}); - EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument); - EXPECT_NO_THROW({ input.dequantize(output, 2); }); -} +// /** +// * @brief dequantize tensor with incorrect number of scale factors +// */ +// TEST(nntrainer_Tensor, dequantize_04_n) { +// int batch = 1; +// int channel = 3; +// int height = 4; +// int width = 5; + +// nntrainer::Tensor input( +// batch, channel, height, width, +// {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); +// GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); + +// nntrainer::Tensor output( +// batch, channel, height, width, +// {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP32}); + +// input.setScaleFactors({2.0, 1.5, 1.0, 0.5}); +// input.setZeroPoints({2, 3, 4, 5}); +// EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument); +// EXPECT_NO_THROW({ input.dequantize(output, 2); }); +// } -/** - * @brief dequantize tensor to QINT8 - */ -TEST(nntrainer_Tensor, dequantize_05_n) { - int batch = 1; - int channel = 3; - int height = 4; - int width = 5; +// /** +// * @brief dequantize tensor to QINT8 +// */ +// TEST(nntrainer_Tensor, dequantize_05_n) { +// int batch = 1; +// int channel = 3; +// int height = 4; +// int width = 5; - nntrainer::Tensor input( - batch, channel, height, width, - {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); - input.setScaleFactors({1.5, 1.0, 0.5}); - input.setZeroPoints({1, 4, 7}); +// nntrainer::Tensor input( +// batch, channel, height, width, +// {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); +// GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); +// input.setScaleFactors({1.5, 1.0, 0.5}); +// input.setZeroPoints({1, 4, 7}); - nntrainer::Tensor output( - batch, channel, height, width, - {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); +// nntrainer::Tensor output( +// batch, channel, height, width, +// {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); - EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument); -} +// EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument); +// } TEST(nntrainer_Tensor, sin_contiguous_p) { int batch = 1; @@ -4581,16 +4561,7 @@ TEST(nntrainer_Tensor, cos_uncontiguous_p) { shared_input.cos(shared_output); - for (int b = 0; b < batch; b++) { - for (int c = 0; c < channel; c++) { - for (int h = 0; h < height; h++) { - for (int w = 0; w < width; w++) { - EXPECT_NEAR(shared_output.getValue(b, c, h, w), - ground_truth.getValue(b, c, h, w), eps); - } - } - } - } + EXPECT_EQ(shared_output, ground_truth); } TEST(nntrainer_Tensor, sin_uncontiguous_p) { @@ -4612,6 +4583,7 @@ TEST(nntrainer_Tensor, sin_uncontiguous_p) { MOD); nntrainer::Tensor shared_input = input.getSharedDataTensor(dim, 0, false); + ground_truth.copy_with_stride(shared_input); for (int b = 0; b < batch; b++) { @@ -4627,16 +4599,7 @@ TEST(nntrainer_Tensor, sin_uncontiguous_p) { shared_input.sin(shared_output); - for (int b = 0; b < batch; b++) { - for (int c = 0; c < channel; c++) { - for (int h = 0; h < height; h++) { - for (int w = 0; w < width; w++) { - EXPECT_NEAR(shared_output.getValue(b, c, h, w), - ground_truth.getValue(b, c, h, w), eps); - } - } - } - } + EXPECT_EQ(shared_output, ground_truth); } TEST(nntrainer_Tensor, sin_unmatched_dim_n) { diff --git a/test/unittest/unittest_nntrainer_tensor_fp16.cpp b/test/unittest/unittest_nntrainer_tensor_fp16.cpp index c0b060108d..619aa77f3a 100644 --- a/test/unittest/unittest_nntrainer_tensor_fp16.cpp +++ b/test/unittest/unittest_nntrainer_tensor_fp16.cpp @@ -4960,8 +4960,7 @@ TEST(nntrainer_Tensor, initialize_01_p) { t_type.format = nntrainer::Tformat::NCHW; t_type.data_type = nntrainer::Tdatatype::FP16; - nntrainer::Tensor t({1, 2, 3, 4, t_type}, true, - nntrainer::Tensor::Initializer::ONES); + nntrainer::Tensor t({1, 2, 3, 4, t_type}, true, nntrainer::Initializer::ONES); nntrainer::Tensor golden(1, 2, 3, 4, t_type); golden.setValue(1); @@ -4981,7 +4980,7 @@ TEST(nntrainer_Tensor, initialize_02_p) { EXPECT_NE(golden, t); - t.initialize(nntrainer::Tensor::Initializer::ONES); + t.initialize(nntrainer::Initializer::ONES); EXPECT_EQ(golden, t); } @@ -4991,7 +4990,7 @@ TEST(nntrainer_Tensor, initialize_03_p) { t_type.data_type = nntrainer::Tdatatype::FP16; nntrainer::Tensor t({1, 2, 3, 4, t_type}, false, - nntrainer::Tensor::Initializer::ONES); + nntrainer::Initializer::ONES); t.allocate(); nntrainer::Tensor golden(1, 2, 3, 4, t_type); @@ -5006,7 +5005,7 @@ TEST(nntrainer_Tensor, initialize_04_p) { t_type.data_type = nntrainer::Tdatatype::FP16; nntrainer::Tensor t({1, 2, 3, 4, t_type}, false); - t.initialize(nntrainer::Tensor::Initializer::ONES); + t.initialize(nntrainer::Initializer::ONES); t.allocate(); nntrainer::Tensor golden(1, 2, 3, 4, t_type); @@ -5031,7 +5030,7 @@ TEST(nntrainer_Tensor, initialize_05_p) { * EXPECT_NE(golden, t); */ - t.initialize(nntrainer::Tensor::Initializer::ONES); + t.initialize(nntrainer::Initializer::ONES); EXPECT_EQ(golden, t); } @@ -5040,14 +5039,13 @@ TEST(nntrainer_Tensor, initialize_06_n) { t_type.format = nntrainer::Tformat::NCHW; t_type.data_type = nntrainer::Tdatatype::FP16; - nntrainer::Tensor t({1, 2, 3, 4, t_type}, true, - nntrainer::Tensor::Initializer::ONES); + nntrainer::Tensor t({1, 2, 3, 4, t_type}, true, nntrainer::Initializer::ONES); nntrainer::Tensor golden({1, 2, 3, 4, t_type}, true, - nntrainer::Tensor::Initializer::ZEROS); + nntrainer::Initializer::ZEROS); EXPECT_NE(golden, t); - golden.initialize(nntrainer::Tensor::Initializer::ONES); + golden.initialize(nntrainer::Initializer::ONES); EXPECT_EQ(golden, t); } @@ -5056,9 +5054,7 @@ TEST(nntrainer_Tensor, initialize_07_p) { t_type.format = nntrainer::Tformat::NCHW; t_type.data_type = nntrainer::Tdatatype::FP16; - nntrainer::Tensor t({1, 2, 3, 4, t_type}, true, - nntrainer::Tensor::Initializer::ONES); - + nntrainer::Tensor t({1, 2, 3, 4, t_type}, true, nntrainer::Initializer::ONES); nntrainer::Tensor golden(1, 2, 3, 4, t_type); golden.setValue(1); @@ -5077,8 +5073,7 @@ TEST(nntrainer_Tensor, initialize_08_p) { t_type.format = nntrainer::Tformat::NCHW; t_type.data_type = nntrainer::Tdatatype::FP16; - nntrainer::Tensor t({1, 2, 3, 4, t_type}, true, - nntrainer::Tensor::Initializer::ONES); + nntrainer::Tensor t({1, 2, 3, 4, t_type}, true, nntrainer::Initializer::ONES); nntrainer::Tensor golden(1, 2, 3, 4, t_type); golden.setValue(1.f); @@ -5086,12 +5081,12 @@ TEST(nntrainer_Tensor, initialize_08_p) { /// @todo this test case is not valid anymore, since /// std::uniform_real_distribution does not support _FP16 - // t.initialize(nntrainer::Tensor::Initializer::HE_NORMAL); + // t.initialize(nntrainer::Initializer::HE_NORMAL); // EXPECT_NE(golden, t); // t.initialize(); // EXPECT_NE(golden, t); - t.initialize(nntrainer::Tensor::Initializer::ONES); + t.initialize(nntrainer::Initializer::ONES); EXPECT_EQ(golden, t); t.initialize(); @@ -5796,405 +5791,471 @@ TEST(nntrainer_Tensor, TensorWrap_02_n) { EXPECT_THROW(nntrainer::Tensor::Map(dat, 3, {4}), std::invalid_argument); } -TEST(nntrainer_Tensor, TensorPaddedValue_p) { - nntrainer::Tensor a = - ranged(1, 1, 3, 3, nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16); - _FP16 default_padded = -1; - - for (int i = 0; i < 5; ++i) { - for (int j = 0; j < 5; ++j) { - _FP16 expected = default_padded; - if (1 <= i && i <= 3 && 1 <= j && j <= 3) { - expected = (i - 1) * 3 + (j - 1); - } - _FP16 actual = - a.getValuePaddedVirtual<_FP16>(0, 0, i, j, 1, 1, default_padded); - EXPECT_FLOAT_EQ(actual, expected); - } - } -} +// TEST(nntrainer_Tensor, TensorPaddedValue_p) { +// nntrainer::Tensor a = +// ranged(1, 1, 3, 3, nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16); +// _FP16 default_padded = -1; + +// for (int i = 0; i < 5; ++i) { +// for (int j = 0; j < 5; ++j) { +// _FP16 expected = default_padded; +// if (1 <= i && i <= 3 && 1 <= j && j <= 3) { +// expected = (i - 1) * 3 + (j - 1); +// } +// _FP16 actual = +// a.getValuePaddedVirtual<_FP16>(0, 0, i, j, 1, 1, default_padded); +// EXPECT_FLOAT_EQ(actual, expected); +// } +// } +// } -/** - * @brief dequantize FP16 tensor - */ -TEST(nntrainer_Tensor, dequantize_01_n) { - int batch = 1; - int channel = 3; - int height = 4; - int width = 5; +// /** +// * @brief dequantize FP16 tensor +// */ +// TEST(nntrainer_Tensor, dequantize_01_n) { +// int batch = 1; +// int channel = 3; +// int height = 4; +// int width = 5; - nntrainer::Tensor input(batch, channel, height, width, - nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); +// nntrainer::Tensor input(batch, channel, height, width, +// nntrainer::Tformat::NCHW, +// nntrainer::Tdatatype::FP16); +// GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); - input.setScaleFactorsFP16({static_cast<_FP16>(1.5), static_cast<_FP16>(1.0), - static_cast<_FP16>(0.5)}); - input.setZeroPoints({1, 4, 7}); +// input.setScaleFactorsFP16({static_cast<_FP16>(1.5), +// static_cast<_FP16>(1.0), +// static_cast<_FP16>(0.5)}); +// input.setZeroPoints({1, 4, 7}); - nntrainer::Tensor output(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); +// nntrainer::Tensor output(batch, channel, height, width, +// nntrainer::Tformat::NCHW, +// nntrainer::Tdatatype::FP16); - EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument); -} +// EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument); +// } -/** - * @brief dequantize tensor with different dimension - */ -TEST(nntrainer_Tensor, dequantize_02_n) { - int batch = 1; - int channel = 3; - int height = 4; - int width = 5; - - nntrainer::Tensor input( - batch + 1, channel, height + 1, width + 1, - {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); +// /** +// * @brief dequantize tensor with different dimension +// */ +// TEST(nntrainer_Tensor, dequantize_02_n) { +// int batch = 1; +// int channel = 3; +// int height = 4; +// int width = 5; - input.setScaleFactorsFP16({static_cast<_FP16>(1.5), static_cast<_FP16>(1.0), - static_cast<_FP16>(0.5)}); - input.setZeroPoints({1, 4, 7}); +// nntrainer::Tensor input( +// batch + 1, channel, height + 1, width + 1, +// {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); +// GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); - nntrainer::Tensor output(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); +// input.setScaleFactorsFP16({static_cast<_FP16>(1.5), +// static_cast<_FP16>(1.0), +// static_cast<_FP16>(0.5)}); +// input.setZeroPoints({1, 4, 7}); - EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument); -} +// nntrainer::Tensor output(batch, channel, height, width, +// nntrainer::Tformat::NCHW, +// nntrainer::Tdatatype::FP16); -/** - * @brief dequantize tensor with no scale factors - */ -TEST(nntrainer_Tensor, dequantize_03_n) { - int batch = 1; - int channel = 3; - int height = 4; - int width = 5; - - nntrainer::Tensor input( - batch, channel, height, width, - {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); +// EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument); +// } - nntrainer::Tensor output(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); +// /** +// * @brief dequantize tensor with no scale factors +// */ +// TEST(nntrainer_Tensor, dequantize_03_n) { +// int batch = 1; +// int channel = 3; +// int height = 4; +// int width = 5; - EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument); -} +// nntrainer::Tensor input( +// batch, channel, height, width, +// {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); +// GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); -/** - * @brief dequantize qint8 tensor to fp16 - */ -TEST(nntrainer_Tensor, dequantize_04_p) { - int batch = 1; - int channel = 3; - int height = 4; - int width = 5; - - nntrainer::Tensor input( - batch, channel, height, width, - {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); +// nntrainer::Tensor output(batch, channel, height, width, +// nntrainer::Tformat::NCHW, +// nntrainer::Tdatatype::FP16); - input.setScaleFactorsFP16({static_cast<_FP16>(1.5), static_cast<_FP16>(1.0), - static_cast<_FP16>(0.5)}); - input.setZeroPoints({0, 0, 0}); - - nntrainer::Tensor output( - {1, 3, 4, 5, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}}, true); - - EXPECT_NO_THROW({ input.dequantize(output, 1); }); - - _FP16 answer_data[] = { - static_cast<_FP16>(1.5), static_cast<_FP16>(1.5), static_cast<_FP16>(1.5), - static_cast<_FP16>(1.5), static_cast<_FP16>(1.5), static_cast<_FP16>(3), - static_cast<_FP16>(3), static_cast<_FP16>(3), static_cast<_FP16>(3), - static_cast<_FP16>(3), static_cast<_FP16>(4.5), static_cast<_FP16>(4.5), - static_cast<_FP16>(4.5), static_cast<_FP16>(4.5), static_cast<_FP16>(4.5), - static_cast<_FP16>(6), static_cast<_FP16>(6), static_cast<_FP16>(6), - static_cast<_FP16>(6), static_cast<_FP16>(6), static_cast<_FP16>(6), - static_cast<_FP16>(6), static_cast<_FP16>(6), static_cast<_FP16>(6), - static_cast<_FP16>(6), static_cast<_FP16>(7), static_cast<_FP16>(7), - static_cast<_FP16>(7), static_cast<_FP16>(7), static_cast<_FP16>(7), - static_cast<_FP16>(8), static_cast<_FP16>(8), static_cast<_FP16>(8), - static_cast<_FP16>(8), static_cast<_FP16>(8), static_cast<_FP16>(9), - static_cast<_FP16>(9), static_cast<_FP16>(9), static_cast<_FP16>(9), - static_cast<_FP16>(9), static_cast<_FP16>(5.5), static_cast<_FP16>(5.5), - static_cast<_FP16>(5.5), static_cast<_FP16>(5.5), static_cast<_FP16>(5.5), - static_cast<_FP16>(6), static_cast<_FP16>(6), static_cast<_FP16>(6), - static_cast<_FP16>(6), static_cast<_FP16>(6), static_cast<_FP16>(6.5), - static_cast<_FP16>(6.5), static_cast<_FP16>(6.5), static_cast<_FP16>(6.5), - static_cast<_FP16>(6.5), static_cast<_FP16>(7), static_cast<_FP16>(7), - static_cast<_FP16>(7), static_cast<_FP16>(7), static_cast<_FP16>(7)}; - - nntrainer::Tensor answer(ml::train::TensorDim(batch, channel, height, width, - {nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16}), - answer_data); - - EXPECT_EQ(output, answer); -} +// EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument); +// } -/** - * @brief dequantize qint8 tensor to fp16 - */ -TEST(nntrainer_Tensor, dequantize_05_p) { - size_t batch = 1; - size_t channel = 3; - size_t height = 4; - size_t width = 5; - - nntrainer::Tensor input( - {batch, - channel, - height, - width, - {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}}, - true, nntrainer::Tensor::Initializer::ZEROS); - nntrainer::Tensor output(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); +// /** +// * @brief dequantize qint8 tensor to fp16 +// */ +// TEST(nntrainer_Tensor, dequantize_04_p) { +// int batch = 1; +// int channel = 3; +// int height = 4; +// int width = 5; + +// nntrainer::Tensor input( +// batch, channel, height, width, +// {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); +// GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); + +// input.setScaleFactorsFP16({static_cast<_FP16>(1.5), +// static_cast<_FP16>(1.0), +// static_cast<_FP16>(0.5)}); +// input.setZeroPoints({0, 0, 0}); + +// nntrainer::Tensor output( +// {1, 3, 4, 5, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}}, +// true); + +// EXPECT_NO_THROW({ input.dequantize(output, 1); }); + +// _FP16 answer_data[] = { +// static_cast<_FP16>(1.5), static_cast<_FP16>(1.5), +// static_cast<_FP16>(1.5), static_cast<_FP16>(1.5), +// static_cast<_FP16>(1.5), static_cast<_FP16>(3), static_cast<_FP16>(3), +// static_cast<_FP16>(3), static_cast<_FP16>(3), static_cast<_FP16>(3), +// static_cast<_FP16>(4.5), static_cast<_FP16>(4.5), +// static_cast<_FP16>(4.5), static_cast<_FP16>(4.5), +// static_cast<_FP16>(4.5), static_cast<_FP16>(6), static_cast<_FP16>(6), +// static_cast<_FP16>(6), static_cast<_FP16>(6), static_cast<_FP16>(6), +// static_cast<_FP16>(6), static_cast<_FP16>(6), static_cast<_FP16>(6), +// static_cast<_FP16>(6), static_cast<_FP16>(6), static_cast<_FP16>(7), +// static_cast<_FP16>(7), static_cast<_FP16>(7), static_cast<_FP16>(7), +// static_cast<_FP16>(7), static_cast<_FP16>(8), static_cast<_FP16>(8), +// static_cast<_FP16>(8), static_cast<_FP16>(8), static_cast<_FP16>(8), +// static_cast<_FP16>(9), static_cast<_FP16>(9), static_cast<_FP16>(9), +// static_cast<_FP16>(9), static_cast<_FP16>(9), static_cast<_FP16>(5.5), +// static_cast<_FP16>(5.5), static_cast<_FP16>(5.5), +// static_cast<_FP16>(5.5), static_cast<_FP16>(5.5), static_cast<_FP16>(6), +// static_cast<_FP16>(6), static_cast<_FP16>(6), static_cast<_FP16>(6), +// static_cast<_FP16>(6), static_cast<_FP16>(6.5), +// static_cast<_FP16>(6.5), static_cast<_FP16>(6.5), +// static_cast<_FP16>(6.5), static_cast<_FP16>(6.5), static_cast<_FP16>(7), +// static_cast<_FP16>(7), static_cast<_FP16>(7), static_cast<_FP16>(7), +// static_cast<_FP16>(7)}; + +// nntrainer::Tensor answer(ml::train::TensorDim(batch, channel, height, +// width, +// {nntrainer::Tformat::NCHW, +// nntrainer::Tdatatype::FP16}), +// answer_data); + +// EXPECT_EQ(output, answer); +// } - // Dequantize by channel - EXPECT_NO_THROW(input.setScaleFactorsFP16( - {static_cast<_FP16>(2), static_cast<_FP16>(-2), static_cast<_FP16>(-4)})); - EXPECT_NO_THROW(input.setZeroPoints({1, 1, 1})); - EXPECT_NO_THROW({ input.dequantize(output, 1); }); - - _FP16 answer_data_1[] = {-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4}; - - nntrainer::Tensor answer1(ml::train::TensorDim(batch, channel, height, width, - {nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16}), - answer_data_1); - - EXPECT_EQ(output, answer1); - - // Dequantize by height - - EXPECT_NO_THROW(input.setScaleFactorsFP16( - {static_cast<_FP16>(4.2), static_cast<_FP16>(2), static_cast<_FP16>(-2), - static_cast<_FP16>(-4.8)})); - EXPECT_NO_THROW(input.setZeroPoints({1, 1, 1, 1})); - EXPECT_NO_THROW({ input.dequantize(output, 2); }); - - _FP16 answer_data_2[] = {static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2), - static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2), - static_cast<_FP16>(-4.2), static_cast<_FP16>(-2), - static_cast<_FP16>(-2), static_cast<_FP16>(-2), - static_cast<_FP16>(-2), static_cast<_FP16>(-2), - static_cast<_FP16>(2), static_cast<_FP16>(2), - static_cast<_FP16>(2), static_cast<_FP16>(2), - static_cast<_FP16>(2), static_cast<_FP16>(4.8), - static_cast<_FP16>(4.8), static_cast<_FP16>(4.8), - static_cast<_FP16>(4.8), static_cast<_FP16>(4.8), - static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2), - static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2), - static_cast<_FP16>(-4.2), static_cast<_FP16>(-2), - static_cast<_FP16>(-2), static_cast<_FP16>(-2), - static_cast<_FP16>(-2), static_cast<_FP16>(-2), - static_cast<_FP16>(2), static_cast<_FP16>(2), - static_cast<_FP16>(2), static_cast<_FP16>(2), - static_cast<_FP16>(2), static_cast<_FP16>(4.8), - static_cast<_FP16>(4.8), static_cast<_FP16>(4.8), - static_cast<_FP16>(4.8), static_cast<_FP16>(4.8), - static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2), - static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2), - static_cast<_FP16>(-4.2), static_cast<_FP16>(-2), - static_cast<_FP16>(-2), static_cast<_FP16>(-2), - static_cast<_FP16>(-2), static_cast<_FP16>(-2), - static_cast<_FP16>(2), static_cast<_FP16>(2), - static_cast<_FP16>(2), static_cast<_FP16>(2), - static_cast<_FP16>(2), static_cast<_FP16>(4.8), - static_cast<_FP16>(4.8), static_cast<_FP16>(4.8), - static_cast<_FP16>(4.8), static_cast<_FP16>(4.8)}; - nntrainer::Tensor answer2(ml::train::TensorDim(batch, channel, height, width, - {nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16}), - answer_data_2); - - EXPECT_EQ(output, answer2); - - // Dequantize by width - EXPECT_NO_THROW(input.setScaleFactorsFP16( - {static_cast<_FP16>(4.2), static_cast<_FP16>(2), static_cast<_FP16>(-2), - static_cast<_FP16>(-4), static_cast<_FP16>(8)})); - EXPECT_NO_THROW(input.setZeroPoints({1, 1, 1, 1, 1})); - EXPECT_NO_THROW({ input.dequantize(output, 3); }); - - _FP16 answer_data_3[] = {static_cast<_FP16>(-4.2), static_cast<_FP16>(-2), - static_cast<_FP16>(2), static_cast<_FP16>(4), - static_cast<_FP16>(-8), static_cast<_FP16>(-4.2), - static_cast<_FP16>(-2), static_cast<_FP16>(2), - static_cast<_FP16>(4), static_cast<_FP16>(-8), - static_cast<_FP16>(-4.2), static_cast<_FP16>(-2), - static_cast<_FP16>(2), static_cast<_FP16>(4), - static_cast<_FP16>(-8), static_cast<_FP16>(-4.2), - static_cast<_FP16>(-2), static_cast<_FP16>(2), - static_cast<_FP16>(4), static_cast<_FP16>(-8), - static_cast<_FP16>(-4.2), static_cast<_FP16>(-2), - static_cast<_FP16>(2), static_cast<_FP16>(4), - static_cast<_FP16>(-8), static_cast<_FP16>(-4.2), - static_cast<_FP16>(-2), static_cast<_FP16>(2), - static_cast<_FP16>(4), static_cast<_FP16>(-8), - static_cast<_FP16>(-4.2), static_cast<_FP16>(-2), - static_cast<_FP16>(2), static_cast<_FP16>(4), - static_cast<_FP16>(-8), static_cast<_FP16>(-4.2), - static_cast<_FP16>(-2), static_cast<_FP16>(2), - static_cast<_FP16>(4), static_cast<_FP16>(-8), - static_cast<_FP16>(-4.2), static_cast<_FP16>(-2), - static_cast<_FP16>(2), static_cast<_FP16>(4), - static_cast<_FP16>(-8), static_cast<_FP16>(-4.2), - static_cast<_FP16>(-2), static_cast<_FP16>(2), - static_cast<_FP16>(4), static_cast<_FP16>(-8), - static_cast<_FP16>(-4.2), static_cast<_FP16>(-2), - static_cast<_FP16>(2), static_cast<_FP16>(4), - static_cast<_FP16>(-8), static_cast<_FP16>(-4.2), - static_cast<_FP16>(-2), static_cast<_FP16>(2), - static_cast<_FP16>(4), static_cast<_FP16>(-8)}; - - nntrainer::Tensor answer3(ml::train::TensorDim(batch, channel, height, width, - {nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16}), - answer_data_3); - - EXPECT_EQ(output, answer3); -} +// /** +// * @brief dequantize qint8 tensor to fp16 +// */ +// TEST(nntrainer_Tensor, dequantize_05_p) { +// size_t batch = 1; +// size_t channel = 3; +// size_t height = 4; +// size_t width = 5; + +// nntrainer::Tensor input( +// {batch, +// channel, +// height, +// width, +// {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}}, +// true, nntrainer::Initializer::ZEROS); +// nntrainer::Tensor output(batch, channel, height, width, +// nntrainer::Tformat::NCHW, +// nntrainer::Tdatatype::FP16); -/** - * @brief dequantize qint4 tensor - */ -TEST(nntrainer_Tensor, dequantize_06_p) { - size_t batch = 1; - size_t channel = 3; - size_t height = 4; - size_t width = 5; - - nntrainer::Tensor input( - {batch, - channel, - height, - width, - {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}}, - true, nntrainer::Tensor::Initializer::ZEROS); - nntrainer::Tensor output(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); +// // Dequantize by channel +// EXPECT_NO_THROW(input.setScaleFactorsFP16( +// {static_cast<_FP16>(2), static_cast<_FP16>(-2), +// static_cast<_FP16>(-4)})); +// EXPECT_NO_THROW(input.setZeroPoints({1, 1, 1})); +// EXPECT_NO_THROW({ input.dequantize(output, 1); }); + +// _FP16 answer_data_1[] = {-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, +// -2, -2, -2, -2, -2, -2, -2, -2, 2, 2, 2, 2, +// 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +// 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, +// 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4}; + +// nntrainer::Tensor answer1(ml::train::TensorDim(batch, channel, height, +// width, +// {nntrainer::Tformat::NCHW, +// nntrainer::Tdatatype::FP16}), +// answer_data_1); + +// EXPECT_EQ(output, answer1); + +// // Dequantize by height + +// EXPECT_NO_THROW(input.setScaleFactorsFP16( +// {static_cast<_FP16>(4.2), static_cast<_FP16>(2), +// static_cast<_FP16>(-2), +// static_cast<_FP16>(-4.8)})); +// EXPECT_NO_THROW(input.setZeroPoints({1, 1, 1, 1})); +// EXPECT_NO_THROW({ input.dequantize(output, 2); }); + +// _FP16 answer_data_2[] = {static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), static_cast<_FP16>(-2), +// static_cast<_FP16>(-2), static_cast<_FP16>(-2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(2), static_cast<_FP16>(2), +// static_cast<_FP16>(2), static_cast<_FP16>(2), +// static_cast<_FP16>(4.8), +// static_cast<_FP16>(4.8), +// static_cast<_FP16>(4.8), +// static_cast<_FP16>(4.8), +// static_cast<_FP16>(4.8), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), static_cast<_FP16>(-2), +// static_cast<_FP16>(-2), static_cast<_FP16>(-2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(2), static_cast<_FP16>(2), +// static_cast<_FP16>(2), static_cast<_FP16>(2), +// static_cast<_FP16>(4.8), +// static_cast<_FP16>(4.8), +// static_cast<_FP16>(4.8), +// static_cast<_FP16>(4.8), +// static_cast<_FP16>(4.8), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), static_cast<_FP16>(-2), +// static_cast<_FP16>(-2), static_cast<_FP16>(-2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(2), static_cast<_FP16>(2), +// static_cast<_FP16>(2), static_cast<_FP16>(2), +// static_cast<_FP16>(4.8), +// static_cast<_FP16>(4.8), +// static_cast<_FP16>(4.8), +// static_cast<_FP16>(4.8), +// static_cast<_FP16>(4.8)}; +// nntrainer::Tensor answer2(ml::train::TensorDim(batch, channel, height, +// width, +// {nntrainer::Tformat::NCHW, +// nntrainer::Tdatatype::FP16}), +// answer_data_2); + +// EXPECT_EQ(output, answer2); + +// // Dequantize by width +// EXPECT_NO_THROW(input.setScaleFactorsFP16( +// {static_cast<_FP16>(4.2), static_cast<_FP16>(2), +// static_cast<_FP16>(-2), +// static_cast<_FP16>(-4), static_cast<_FP16>(8)})); +// EXPECT_NO_THROW(input.setZeroPoints({1, 1, 1, 1, 1})); +// EXPECT_NO_THROW({ input.dequantize(output, 3); }); + +// _FP16 answer_data_3[] = {static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), +// static_cast<_FP16>(2), static_cast<_FP16>(4), +// static_cast<_FP16>(-8), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(4), static_cast<_FP16>(-8), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(4), static_cast<_FP16>(-8), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(4), static_cast<_FP16>(-8), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(4), static_cast<_FP16>(-8), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(4), static_cast<_FP16>(-8), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(4), static_cast<_FP16>(-8), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(4), static_cast<_FP16>(-8), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(4), static_cast<_FP16>(-8), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(4), static_cast<_FP16>(-8), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(4), static_cast<_FP16>(-8), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(4), static_cast<_FP16>(-8)}; + +// nntrainer::Tensor answer3(ml::train::TensorDim(batch, channel, height, +// width, +// {nntrainer::Tformat::NCHW, +// nntrainer::Tdatatype::FP16}), +// answer_data_3); + +// EXPECT_EQ(output, answer3); +// } - // Dequantize by channel - EXPECT_NO_THROW(input.setScaleFactorsFP16( - {static_cast<_FP16>(2), static_cast<_FP16>(-2), static_cast<_FP16>(-4)})); - EXPECT_NO_THROW(input.setZeroPoints({1, 1, 1})); - EXPECT_NO_THROW({ input.dequantize(output, 1); }); - - _FP16 answer_data_1[] = {-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, -2, -2, -2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4}; - - nntrainer::Tensor answer1(ml::train::TensorDim(batch, channel, height, width, - {nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16}), - answer_data_1); - - EXPECT_EQ(output, answer1); - - // Dequantize by height - EXPECT_NO_THROW(input.setScaleFactorsFP16( - {static_cast<_FP16>(4.2), static_cast<_FP16>(2), static_cast<_FP16>(-2), - static_cast<_FP16>(-4)})); - EXPECT_NO_THROW(input.setZeroPoints({1, 1, 1, 1})); - EXPECT_NO_THROW({ input.dequantize(output, 2); }); - - _FP16 answer_data_2[] = {static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2), - static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2), - static_cast<_FP16>(-4.2), static_cast<_FP16>(-2), - static_cast<_FP16>(-2), static_cast<_FP16>(-2), - static_cast<_FP16>(-2), static_cast<_FP16>(-2), - static_cast<_FP16>(2), static_cast<_FP16>(2), - static_cast<_FP16>(2), static_cast<_FP16>(2), - static_cast<_FP16>(2), static_cast<_FP16>(4), - static_cast<_FP16>(4), static_cast<_FP16>(4), - static_cast<_FP16>(4), static_cast<_FP16>(4), - static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2), - static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2), - static_cast<_FP16>(-4.2), static_cast<_FP16>(-2), - static_cast<_FP16>(-2), static_cast<_FP16>(-2), - static_cast<_FP16>(-2), static_cast<_FP16>(-2), - static_cast<_FP16>(2), static_cast<_FP16>(2), - static_cast<_FP16>(2), static_cast<_FP16>(2), - static_cast<_FP16>(2), static_cast<_FP16>(4), - static_cast<_FP16>(4), static_cast<_FP16>(4), - static_cast<_FP16>(4), static_cast<_FP16>(4), - static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2), - static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2), - static_cast<_FP16>(-4.2), static_cast<_FP16>(-2), - static_cast<_FP16>(-2), static_cast<_FP16>(-2), - static_cast<_FP16>(-2), static_cast<_FP16>(-2), - static_cast<_FP16>(2), static_cast<_FP16>(2), - static_cast<_FP16>(2), static_cast<_FP16>(2), - static_cast<_FP16>(2), static_cast<_FP16>(4), - static_cast<_FP16>(4), static_cast<_FP16>(4), - static_cast<_FP16>(4), static_cast<_FP16>(4)}; - nntrainer::Tensor answer2(ml::train::TensorDim(batch, channel, height, width, - {nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16}), - answer_data_2); - - EXPECT_EQ(output, answer2); - - // Dequantize by width - EXPECT_NO_THROW(input.setScaleFactorsFP16( - {static_cast<_FP16>(4.2), static_cast<_FP16>(2), static_cast<_FP16>(-2), - static_cast<_FP16>(-4), static_cast<_FP16>(8)})); - EXPECT_NO_THROW(input.setZeroPoints({1, 1, 1, 1, 1})); - EXPECT_NO_THROW({ input.dequantize(output, 3); }); - - _FP16 answer_data_3[] = {static_cast<_FP16>(-4.2), static_cast<_FP16>(-2), - static_cast<_FP16>(2), static_cast<_FP16>(4), - static_cast<_FP16>(-8), static_cast<_FP16>(-4.2), - static_cast<_FP16>(-2), static_cast<_FP16>(2), - static_cast<_FP16>(4), static_cast<_FP16>(-8), - static_cast<_FP16>(-4.2), static_cast<_FP16>(-2), - static_cast<_FP16>(2), static_cast<_FP16>(4), - static_cast<_FP16>(-8), static_cast<_FP16>(-4.2), - static_cast<_FP16>(-2), static_cast<_FP16>(2), - static_cast<_FP16>(4), static_cast<_FP16>(-8), - static_cast<_FP16>(-4.2), static_cast<_FP16>(-2), - static_cast<_FP16>(2), static_cast<_FP16>(4), - static_cast<_FP16>(-8), static_cast<_FP16>(-4.2), - static_cast<_FP16>(-2), static_cast<_FP16>(2), - static_cast<_FP16>(4), static_cast<_FP16>(-8), - static_cast<_FP16>(-4.2), static_cast<_FP16>(-2), - static_cast<_FP16>(2), static_cast<_FP16>(4), - static_cast<_FP16>(-8), static_cast<_FP16>(-4.2), - static_cast<_FP16>(-2), static_cast<_FP16>(2), - static_cast<_FP16>(4), static_cast<_FP16>(-8), - static_cast<_FP16>(-4.2), static_cast<_FP16>(-2), - static_cast<_FP16>(2), static_cast<_FP16>(4), - static_cast<_FP16>(-8), static_cast<_FP16>(-4.2), - static_cast<_FP16>(-2), static_cast<_FP16>(2), - static_cast<_FP16>(4), static_cast<_FP16>(-8), - static_cast<_FP16>(-4.2), static_cast<_FP16>(-2), - static_cast<_FP16>(2), static_cast<_FP16>(4), - static_cast<_FP16>(-8), static_cast<_FP16>(-4.2), - static_cast<_FP16>(-2), static_cast<_FP16>(2), - static_cast<_FP16>(4), static_cast<_FP16>(-8)}; - - nntrainer::Tensor answer3(ml::train::TensorDim(batch, channel, height, width, - {nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16}), - answer_data_3); - - EXPECT_EQ(output, answer3); -} +// /** +// * @brief dequantize qint4 tensor +// */ +// TEST(nntrainer_Tensor, dequantize_06_p) { +// size_t batch = 1; +// size_t channel = 3; +// size_t height = 4; +// size_t width = 5; + +// nntrainer::Tensor input( +// {batch, +// channel, +// height, +// width, +// {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}}, +// true, nntrainer::Initializer::ZEROS); +// nntrainer::Tensor output(batch, channel, height, width, +// nntrainer::Tformat::NCHW, +// nntrainer::Tdatatype::FP16); +// // Dequantize by channel +// EXPECT_NO_THROW(input.setScaleFactorsFP16( +// {static_cast<_FP16>(2), static_cast<_FP16>(-2), +// static_cast<_FP16>(-4)})); +// EXPECT_NO_THROW(input.setZeroPoints({1, 1, 1})); +// EXPECT_NO_THROW({ input.dequantize(output, 1); }); + +// _FP16 answer_data_1[] = {-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, +// -2, +// -2, -2, -2, -2, -2, -2, -2, -2, 2, 2, 2, 2, +// 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +// 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, +// 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4}; + +// nntrainer::Tensor answer1(ml::train::TensorDim(batch, channel, height, +// width, +// {nntrainer::Tformat::NCHW, +// nntrainer::Tdatatype::FP16}), +// answer_data_1); + +// EXPECT_EQ(output, answer1); + +// // Dequantize by height +// EXPECT_NO_THROW(input.setScaleFactorsFP16( +// {static_cast<_FP16>(4.2), static_cast<_FP16>(2), +// static_cast<_FP16>(-2), +// static_cast<_FP16>(-4)})); +// EXPECT_NO_THROW(input.setZeroPoints({1, 1, 1, 1})); +// EXPECT_NO_THROW({ input.dequantize(output, 2); }); + +// _FP16 answer_data_2[] = {static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), +// static_cast<_FP16>(-2), +// static_cast<_FP16>(-2), +// static_cast<_FP16>(-2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(2), static_cast<_FP16>(2), +// static_cast<_FP16>(2), static_cast<_FP16>(2), +// static_cast<_FP16>(4), static_cast<_FP16>(4), +// static_cast<_FP16>(4), static_cast<_FP16>(4), +// static_cast<_FP16>(4), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), +// static_cast<_FP16>(-2), +// static_cast<_FP16>(-2), +// static_cast<_FP16>(-2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(2), static_cast<_FP16>(2), +// static_cast<_FP16>(2), static_cast<_FP16>(2), +// static_cast<_FP16>(4), static_cast<_FP16>(4), +// static_cast<_FP16>(4), static_cast<_FP16>(4), +// static_cast<_FP16>(4), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), +// static_cast<_FP16>(-2), +// static_cast<_FP16>(-2), +// static_cast<_FP16>(-2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(2), static_cast<_FP16>(2), +// static_cast<_FP16>(2), static_cast<_FP16>(2), +// static_cast<_FP16>(4), static_cast<_FP16>(4), +// static_cast<_FP16>(4), static_cast<_FP16>(4), +// static_cast<_FP16>(4)}; +// nntrainer::Tensor answer2(ml::train::TensorDim(batch, channel, height, +// width, +// {nntrainer::Tformat::NCHW, +// nntrainer::Tdatatype::FP16}), +// answer_data_2); + +// EXPECT_EQ(output, answer2); + +// // Dequantize by width +// EXPECT_NO_THROW(input.setScaleFactorsFP16( +// {static_cast<_FP16>(4.2), static_cast<_FP16>(2), +// static_cast<_FP16>(-2), +// static_cast<_FP16>(-4), static_cast<_FP16>(8)})); +// EXPECT_NO_THROW(input.setZeroPoints({1, 1, 1, 1, 1})); +// EXPECT_NO_THROW({ input.dequantize(output, 3); }); + +// _FP16 answer_data_3[] = {static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), +// static_cast<_FP16>(2), static_cast<_FP16>(4), +// static_cast<_FP16>(-8), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(4), static_cast<_FP16>(-8), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(4), static_cast<_FP16>(-8), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(4), static_cast<_FP16>(-8), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(4), static_cast<_FP16>(-8), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(4), static_cast<_FP16>(-8), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(4), static_cast<_FP16>(-8), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(4), static_cast<_FP16>(-8), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(4), static_cast<_FP16>(-8), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(4), static_cast<_FP16>(-8), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(4), static_cast<_FP16>(-8), +// static_cast<_FP16>(-4.2), +// static_cast<_FP16>(-2), static_cast<_FP16>(2), +// static_cast<_FP16>(4), +// static_cast<_FP16>(-8)}; + +// nntrainer::Tensor answer3(ml::train::TensorDim(batch, channel, height, +// width, +// {nntrainer::Tformat::NCHW, +// nntrainer::Tdatatype::FP16}), +// answer_data_3); + +// EXPECT_EQ(output, answer3); +// } GTEST_API_ int main(int argc, char **argv) { int result = -1; diff --git a/test/unittest/unittest_nntrainer_tensor_nhwc.cpp b/test/unittest/unittest_nntrainer_tensor_nhwc.cpp index f65e1b4eda..167acec29c 100644 --- a/test/unittest/unittest_nntrainer_tensor_nhwc.cpp +++ b/test/unittest/unittest_nntrainer_tensor_nhwc.cpp @@ -3592,7 +3592,7 @@ TEST(nntrainer_Tensor, allocate_03_nhwc_p) { TEST(nntrainer_Tensor, initialize_01_nhwc_p) { nntrainer::Tensor t(nntrainer::TensorDim(1, 2, 3, 4, NHWC_, FP32_), true, - nntrainer::Tensor::Initializer::ONES); + nntrainer::Initializer::ONES); nntrainer::Tensor golden(1, 2, 3, 4, NHWC_, FP32_); golden.setValue(1); @@ -3608,13 +3608,13 @@ TEST(nntrainer_Tensor, initialize_02_nhwc_p) { EXPECT_NE(golden, t); - t.initialize(nntrainer::Tensor::Initializer::ONES); + t.initialize(nntrainer::Initializer::ONES); EXPECT_EQ(golden, t); } TEST(nntrainer_Tensor, initialize_03_nhwc_p) { nntrainer::Tensor t(nntrainer::TensorDim(1, 2, 3, 4, NHWC_, FP32_), false, - nntrainer::Tensor::Initializer::ONES); + nntrainer::Initializer::ONES); t.allocate(); nntrainer::Tensor golden(1, 2, 3, 4, NHWC_, FP32_); @@ -3625,7 +3625,7 @@ TEST(nntrainer_Tensor, initialize_03_nhwc_p) { TEST(nntrainer_Tensor, initialize_04_nhwc_p) { nntrainer::Tensor t(nntrainer::TensorDim(1, 2, 3, 4, NHWC_, FP32_), false); - t.initialize(nntrainer::Tensor::Initializer::ONES); + t.initialize(nntrainer::Initializer::ONES); t.allocate(); nntrainer::Tensor golden(1, 2, 3, 4, NHWC_, FP32_); @@ -3646,25 +3646,25 @@ TEST(nntrainer_Tensor, initialize_05_nhwc_p) { * EXPECT_NE(golden, t); */ - t.initialize(nntrainer::Tensor::Initializer::ONES); + t.initialize(nntrainer::Initializer::ONES); EXPECT_EQ(golden, t); } TEST(nntrainer_Tensor, initialize_06_nhwc_n) { nntrainer::Tensor t(nntrainer::TensorDim(1, 2, 3, 4, NHWC_, FP32_), true, - nntrainer::Tensor::Initializer::ONES); + nntrainer::Initializer::ONES); nntrainer::Tensor golden(nntrainer::TensorDim(1, 2, 3, 4, NHWC_, FP32_), true, - nntrainer::Tensor::Initializer::ZEROS); + nntrainer::Initializer::ZEROS); EXPECT_NE(golden, t); - golden.initialize(nntrainer::Tensor::Initializer::ONES); + golden.initialize(nntrainer::Initializer::ONES); EXPECT_EQ(golden, t); } TEST(nntrainer_Tensor, initialize_07_nhwc_p) { nntrainer::Tensor t(nntrainer::TensorDim(1, 2, 3, 4, NHWC_, FP32_), true, - nntrainer::Tensor::Initializer::ONES); + nntrainer::Initializer::ONES); nntrainer::Tensor golden(1, 2, 3, 4, NHWC_, FP32_); golden.setValue(1); @@ -3681,20 +3681,20 @@ TEST(nntrainer_Tensor, initialize_07_nhwc_p) { TEST(nntrainer_Tensor, initialize_08_nhwc_p) { nntrainer::Tensor t(nntrainer::TensorDim(1, 2, 3, 4, NHWC_, FP32_), true, - nntrainer::Tensor::Initializer::ONES); + nntrainer::Initializer::ONES); nntrainer::Tensor golden(1, 2, 3, 4, NHWC_, FP32_); golden.setValue(1); EXPECT_EQ(golden, t); - t.initialize(nntrainer::Tensor::Initializer::HE_NORMAL); + t.initialize(nntrainer::Initializer::HE_NORMAL); EXPECT_NE(golden, t); t.initialize(); EXPECT_NE(golden, t); - t.initialize(nntrainer::Tensor::Initializer::ONES); + t.initialize(nntrainer::Initializer::ONES); EXPECT_EQ(golden, t); t.initialize(); @@ -3973,21 +3973,21 @@ TEST(nntrainer_Tensor, TensorWrap_02_nhwc_n) { EXPECT_THROW(nntrainer::Tensor::Map(dat, 3, {4}), std::invalid_argument); } -TEST(nntrainer_Tensor, TensorPaddedValue_nhwc_p) { - nntrainer::Tensor a = ranged(1, 1, 3, 3, NHWC_, FP32_); - float default_padded = -1; +// TEST(nntrainer_Tensor, TensorPaddedValue_nhwc_p) { +// nntrainer::Tensor a = ranged(1, 1, 3, 3, NHWC_, FP32_); +// float default_padded = -1; - for (int i = 0; i < 5; ++i) { - for (int j = 0; j < 5; ++j) { - float expected = default_padded; - if (1 <= i && i <= 3 && 1 <= j && j <= 3) { - expected = (i - 1) * 3 + (j - 1); - } - float actual = a.getValuePaddedVirtual(0, 0, i, j, 1, 1, default_padded); - EXPECT_FLOAT_EQ(actual, expected); - } - } -} +// for (int i = 0; i < 5; ++i) { +// for (int j = 0; j < 5; ++j) { +// float expected = default_padded; +// if (1 <= i && i <= 3 && 1 <= j && j <= 3) { +// expected = (i - 1) * 3 + (j - 1); +// } +// float actual = a.getValuePaddedVirtual(0, 0, i, j, 1, 1, +// default_padded); EXPECT_FLOAT_EQ(actual, expected); +// } +// } +// } TEST(nntrainer_Tensor, zoneout_mask_01_nhwc_n) { const float zoneout_rate = 0.3f; @@ -4702,51 +4702,51 @@ TEST(nntrainer_Tensor, tranpose_dimension_not_match_nhwc_n) { EXPECT_THROW(a.transpose("0:1:2", b), std::invalid_argument); } -/** - * @brief dequantize tensor with different format - */ -TEST(nntrainer_Tensor, dequantize_01_n) { - int batch = 1; - int channel = 3; - int height = 4; - int width = 5; - - nntrainer::Tensor input( - batch, channel, height, width, - {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); - input.setScaleFactors({1.5, 1.0, 0.5}); - input.setZeroPoints({1, 0, 3}); - - nntrainer::Tensor output( - batch, channel, height, width, - {nntrainer::Tformat::NHWC, nntrainer::Tdatatype::FP32}); - - EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument); -} - -/** - * @brief dequantize tensor with different format - */ -TEST(nntrainer_Tensor, dequantize_02_n) { - int batch = 1; - int channel = 3; - int height = 4; - int width = 5; - - nntrainer::Tensor input( - batch, channel, height, width, - {nntrainer::Tformat::NHWC, nntrainer::Tdatatype::QINT8}); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); - input.setScaleFactors({1.5, 1.0, 0.5}); - input.setZeroPoints({1, 0, 3}); - - nntrainer::Tensor output( - batch, channel, height, width, - {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP32}); - - EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument); -} +// /** +// * @brief dequantize tensor with different format +// */ +// TEST(nntrainer_Tensor, dequantize_01_n) { +// int batch = 1; +// int channel = 3; +// int height = 4; +// int width = 5; + +// nntrainer::Tensor input( +// batch, channel, height, width, +// {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}); +// GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); +// input.setScaleFactors({1.5, 1.0, 0.5}); +// input.setZeroPoints({1, 0, 3}); + +// nntrainer::Tensor output( +// batch, channel, height, width, +// {nntrainer::Tformat::NHWC, nntrainer::Tdatatype::FP32}); + +// EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument); +// } + +// /** +// * @brief dequantize tensor with different format +// */ +// TEST(nntrainer_Tensor, dequantize_02_n) { +// int batch = 1; +// int channel = 3; +// int height = 4; +// int width = 5; + +// nntrainer::Tensor input( +// batch, channel, height, width, +// {nntrainer::Tformat::NHWC, nntrainer::Tdatatype::QINT8}); +// GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); +// input.setScaleFactors({1.5, 1.0, 0.5}); +// input.setZeroPoints({1, 0, 3}); + +// nntrainer::Tensor output( +// batch, channel, height, width, +// {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP32}); + +// EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument); +// } int main(int argc, char **argv) { int result = -1; diff --git a/test/unittest/unittest_nntrainer_tensor_pool.cpp b/test/unittest/unittest_nntrainer_tensor_pool.cpp index fa57141c08..9035099a15 100644 --- a/test/unittest/unittest_nntrainer_tensor_pool.cpp +++ b/test/unittest/unittest_nntrainer_tensor_pool.cpp @@ -435,127 +435,137 @@ TEST(TensorPool, validate_memory) { EXPECT_NO_THROW(pool.deallocate()); } -/** - * @brief qint8 tensors reuse fp32 tensor memory space - */ -TEST(TensorPool, validate_memory_reuse_01_p) { - // |--------- t1 ---------| - // |-t2-||-t3-||-t4-||-t5-| - nntrainer::TensorPool pool; - nntrainer::Tensor *t1 = nullptr, *t2 = nullptr, *t3 = nullptr, *t4 = nullptr, - *t5 = nullptr; - - EXPECT_NO_THROW( - t1 = pool.request("t1", nntrainer::TensorDim({4}), {0}, - nntrainer::TensorLifespan::FORWARD_FUNC_LIFESPAN)); - EXPECT_NE(t1, nullptr); - EXPECT_FALSE(t1->isAllocated()); - - EXPECT_NO_THROW( - t2 = pool.request("t2", - nntrainer::TensorDim({4}, {nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::QINT8}), - {1}, nntrainer::TensorLifespan::BACKWARD_FUNC_LIFESPAN)); - EXPECT_NE(t2, nullptr); - EXPECT_FALSE(t2->isAllocated()); - - EXPECT_NO_THROW( - t3 = pool.request("t3", - nntrainer::TensorDim({4}, {nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::QINT8}), - {1}, nntrainer::TensorLifespan::BACKWARD_FUNC_LIFESPAN)); - EXPECT_NE(t3, nullptr); - EXPECT_FALSE(t3->isAllocated()); - - EXPECT_NO_THROW( - t4 = pool.request("t4", - nntrainer::TensorDim({4}, {nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::QINT8}), - {1}, nntrainer::TensorLifespan::BACKWARD_FUNC_LIFESPAN)); - EXPECT_NE(t4, nullptr); - EXPECT_FALSE(t4->isAllocated()); - - EXPECT_NO_THROW( - t5 = pool.request("t5", - nntrainer::TensorDim({4}, {nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::QINT8}), - {1}, nntrainer::TensorLifespan::BACKWARD_FUNC_LIFESPAN)); - EXPECT_NE(t5, nullptr); - EXPECT_FALSE(t5->isAllocated()); - - EXPECT_NO_THROW(pool.finalize(nntrainer::OptimizedV1Planner(), 0, 2)); - EXPECT_EQ(pool.minMemoryRequirement(), t1->bytes()); - - EXPECT_NO_THROW(pool.allocate()); - - EXPECT_EQ(t1->getAddress(0), (float *)t2->getAddress(0)); - EXPECT_EQ(t1->getAddress(1), (float *)t3->getAddress(0)); - EXPECT_EQ(t1->getAddress(2), (float *)t4->getAddress(0)); - EXPECT_EQ(t1->getAddress(3), (float *)t5->getAddress(0)); - - EXPECT_NO_THROW(pool.deallocate()); -} - -/** - * @brief qint4 tensors reuse fp32 tensor memory space - */ -TEST(TensorPool, validate_memory_reuse_02_p) { - // |--------- t1 ---------| - // |-t2-||-t3-||-t4-||-t5-| - nntrainer::TensorPool pool; - nntrainer::Tensor *t1 = nullptr, *t2 = nullptr, *t3 = nullptr, *t4 = nullptr, - *t5 = nullptr; - - EXPECT_NO_THROW( - t1 = pool.request("t1", nntrainer::TensorDim({4}), {0}, - nntrainer::TensorLifespan::FORWARD_FUNC_LIFESPAN)); - EXPECT_NE(t1, nullptr); - EXPECT_FALSE(t1->isAllocated()); - - EXPECT_NO_THROW( - t2 = pool.request("t2", - nntrainer::TensorDim({8}, {nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::QINT4}), - {1}, nntrainer::TensorLifespan::BACKWARD_FUNC_LIFESPAN)); - EXPECT_NE(t2, nullptr); - EXPECT_FALSE(t2->isAllocated()); - - EXPECT_NO_THROW( - t3 = pool.request("t3", - nntrainer::TensorDim({7}, {nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::QINT4}), - {1}, nntrainer::TensorLifespan::BACKWARD_FUNC_LIFESPAN)); - EXPECT_NE(t3, nullptr); - EXPECT_FALSE(t3->isAllocated()); - - EXPECT_NO_THROW( - t4 = pool.request("t4", - nntrainer::TensorDim({8}, {nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::QINT4}), - {1}, nntrainer::TensorLifespan::BACKWARD_FUNC_LIFESPAN)); - EXPECT_NE(t4, nullptr); - EXPECT_FALSE(t4->isAllocated()); - - EXPECT_NO_THROW( - t5 = pool.request("t5", - nntrainer::TensorDim({7}, {nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::QINT4}), - {1}, nntrainer::TensorLifespan::BACKWARD_FUNC_LIFESPAN)); - EXPECT_NE(t5, nullptr); - EXPECT_FALSE(t5->isAllocated()); - - EXPECT_NO_THROW(pool.finalize(nntrainer::OptimizedV1Planner(), 0, 2)); - EXPECT_EQ(pool.minMemoryRequirement(), t1->bytes()); - - EXPECT_NO_THROW(pool.allocate()); - - EXPECT_EQ(t1->getAddress(0), (float *)t2->getAddress(0)); - EXPECT_EQ(t1->getAddress(1), (float *)t3->getAddress(0)); - EXPECT_EQ(t1->getAddress(2), (float *)t4->getAddress(0)); - EXPECT_EQ(t1->getAddress(3), (float *)t5->getAddress(0)); - - EXPECT_NO_THROW(pool.deallocate()); -} +// /** +// * @brief qint8 tensors reuse fp32 tensor memory space +// */ +// TEST(TensorPool, validate_memory_reuse_01_p) { +// // |--------- t1 ---------| +// // |-t2-||-t3-||-t4-||-t5-| +// nntrainer::TensorPool pool; +// nntrainer::Tensor *t1 = nullptr, *t2 = nullptr, *t3 = nullptr, *t4 = +// nullptr, +// *t5 = nullptr; + +// EXPECT_NO_THROW( +// t1 = pool.request("t1", nntrainer::TensorDim({4}), {0}, +// nntrainer::TensorLifespan::FORWARD_FUNC_LIFESPAN)); +// EXPECT_NE(t1, nullptr); +// EXPECT_FALSE(t1->isAllocated()); + +// EXPECT_NO_THROW( +// t2 = pool.request("t2", +// nntrainer::TensorDim({4}, {nntrainer::Tformat::NCHW, +// nntrainer::Tdatatype::QINT8}), +// {1}, +// nntrainer::TensorLifespan::BACKWARD_FUNC_LIFESPAN)); +// EXPECT_NE(t2, nullptr); +// EXPECT_FALSE(t2->isAllocated()); + +// EXPECT_NO_THROW( +// t3 = pool.request("t3", +// nntrainer::TensorDim({4}, {nntrainer::Tformat::NCHW, +// nntrainer::Tdatatype::QINT8}), +// {1}, +// nntrainer::TensorLifespan::BACKWARD_FUNC_LIFESPAN)); +// EXPECT_NE(t3, nullptr); +// EXPECT_FALSE(t3->isAllocated()); + +// EXPECT_NO_THROW( +// t4 = pool.request("t4", +// nntrainer::TensorDim({4}, {nntrainer::Tformat::NCHW, +// nntrainer::Tdatatype::QINT8}), +// {1}, +// nntrainer::TensorLifespan::BACKWARD_FUNC_LIFESPAN)); +// EXPECT_NE(t4, nullptr); +// EXPECT_FALSE(t4->isAllocated()); + +// EXPECT_NO_THROW( +// t5 = pool.request("t5", +// nntrainer::TensorDim({4}, {nntrainer::Tformat::NCHW, +// nntrainer::Tdatatype::QINT8}), +// {1}, +// nntrainer::TensorLifespan::BACKWARD_FUNC_LIFESPAN)); +// EXPECT_NE(t5, nullptr); +// EXPECT_FALSE(t5->isAllocated()); + +// EXPECT_NO_THROW(pool.finalize(nntrainer::OptimizedV1Planner(), 0, 2)); +// EXPECT_EQ(pool.minMemoryRequirement(), t1->bytes()); + +// EXPECT_NO_THROW(pool.allocate()); + +// EXPECT_EQ(t1->getAddress(0), (float *)t2->getAddress(0)); +// EXPECT_EQ(t1->getAddress(1), (float *)t3->getAddress(0)); +// EXPECT_EQ(t1->getAddress(2), (float *)t4->getAddress(0)); +// EXPECT_EQ(t1->getAddress(3), (float *)t5->getAddress(0)); + +// EXPECT_NO_THROW(pool.deallocate()); +// } + +// /** +// * @brief qint4 tensors reuse fp32 tensor memory space +// */ +// TEST(TensorPool, validate_memory_reuse_02_p) { +// // |--------- t1 ---------| +// // |-t2-||-t3-||-t4-||-t5-| +// nntrainer::TensorPool pool; +// nntrainer::Tensor *t1 = nullptr, *t2 = nullptr, *t3 = nullptr, *t4 = +// nullptr, +// *t5 = nullptr; + +// EXPECT_NO_THROW( +// t1 = pool.request("t1", nntrainer::TensorDim({4}), {0}, +// nntrainer::TensorLifespan::FORWARD_FUNC_LIFESPAN)); +// EXPECT_NE(t1, nullptr); +// EXPECT_FALSE(t1->isAllocated()); + +// EXPECT_NO_THROW( +// t2 = pool.request("t2", +// nntrainer::TensorDim({8}, {nntrainer::Tformat::NCHW, +// nntrainer::Tdatatype::QINT4}), +// {1}, +// nntrainer::TensorLifespan::BACKWARD_FUNC_LIFESPAN)); +// EXPECT_NE(t2, nullptr); +// EXPECT_FALSE(t2->isAllocated()); + +// EXPECT_NO_THROW( +// t3 = pool.request("t3", +// nntrainer::TensorDim({7}, {nntrainer::Tformat::NCHW, +// nntrainer::Tdatatype::QINT4}), +// {1}, +// nntrainer::TensorLifespan::BACKWARD_FUNC_LIFESPAN)); +// EXPECT_NE(t3, nullptr); +// EXPECT_FALSE(t3->isAllocated()); + +// EXPECT_NO_THROW( +// t4 = pool.request("t4", +// nntrainer::TensorDim({8}, {nntrainer::Tformat::NCHW, +// nntrainer::Tdatatype::QINT4}), +// {1}, +// nntrainer::TensorLifespan::BACKWARD_FUNC_LIFESPAN)); +// EXPECT_NE(t4, nullptr); +// EXPECT_FALSE(t4->isAllocated()); + +// EXPECT_NO_THROW( +// t5 = pool.request("t5", +// nntrainer::TensorDim({7}, {nntrainer::Tformat::NCHW, +// nntrainer::Tdatatype::QINT4}), +// {1}, +// nntrainer::TensorLifespan::BACKWARD_FUNC_LIFESPAN)); +// EXPECT_NE(t5, nullptr); +// EXPECT_FALSE(t5->isAllocated()); + +// EXPECT_NO_THROW(pool.finalize(nntrainer::OptimizedV1Planner(), 0, 2)); +// EXPECT_EQ(pool.minMemoryRequirement(), t1->bytes()); + +// EXPECT_NO_THROW(pool.allocate()); + +// EXPECT_EQ(t1->getAddress(0), (float *)t2->getAddress(0)); +// EXPECT_EQ(t1->getAddress(1), (float *)t3->getAddress(0)); +// EXPECT_EQ(t1->getAddress(2), (float *)t4->getAddress(0)); +// EXPECT_EQ(t1->getAddress(3), (float *)t5->getAddress(0)); + +// EXPECT_NO_THROW(pool.deallocate()); +// } /** * @brief check if data span of two tensor testOverlap @@ -863,10 +873,9 @@ TEST(TensorPool, createOrExtend_different_dim_n) { TEST(TensorPool, createOrExtend_init_n) { nntrainer::TensorPool pool; - pool.requestOrExtend("t", {10}, {0}, max_ls, - nntrainer::Tensor::Initializer::ONES); + pool.requestOrExtend("t", {10}, {0}, max_ls, nntrainer::Initializer::ONES); EXPECT_ANY_THROW(pool.requestOrExtend("t", {10}, {1}, max_ls, - nntrainer::Tensor::Initializer::ZEROS)); + nntrainer::Initializer::ZEROS)); } TEST(TensorPool, createOrExtend_unmanaged_n) { nntrainer::TensorPool pool; diff --git a/test/unittest/unittest_nntrainer_tensor_pool_fp16.cpp b/test/unittest/unittest_nntrainer_tensor_pool_fp16.cpp index 19ab760aa5..fa7ef82c11 100644 --- a/test/unittest/unittest_nntrainer_tensor_pool_fp16.cpp +++ b/test/unittest/unittest_nntrainer_tensor_pool_fp16.cpp @@ -1337,16 +1337,29 @@ static void testNoOverlap(nntrainer::Tensor *t1, nntrainer::Tensor *t2) { * @param t2 t2 tensor 2 */ static void testSubset(nntrainer::Tensor *t1, nntrainer::Tensor *t2) { - _FP16 *t1_start = t1->getData<_FP16>(); - _FP16 *t1_end = t1_start + t1->size(); - - _FP16 *t2_start = t2->getData<_FP16>(); - _FP16 *t2_end = t2_start + t2->size(); - - EXPECT_NE(t1_start, nullptr); - EXPECT_NE(t2_start, nullptr); - EXPECT_TRUE(t1_start <= t2_start && t2_end <= t1_end) - << "t2 is not subset of t1"; + if (t1->getDataType() == ml::train::TensorDim::DataType::FP32) { + float *t1_start = t1->getData(); + float *t1_end = t1_start + t1->size(); + + float *t2_start = t2->getData(); + float *t2_end = t2_start + t2->size(); + + EXPECT_NE(t1_start, nullptr); + EXPECT_NE(t2_start, nullptr); + EXPECT_TRUE(t1_start <= t2_start && t2_end <= t1_end) + << "t2 is not subset of t1"; + } else { + _FP16 *t1_start = t1->getData<_FP16>(); + _FP16 *t1_end = t1_start + t1->size(); + + _FP16 *t2_start = t2->getData<_FP16>(); + _FP16 *t2_end = t2_start + t2->size(); + + EXPECT_NE(t1_start, nullptr); + EXPECT_NE(t2_start, nullptr); + EXPECT_TRUE(t1_start <= t2_start && t2_end <= t1_end) + << "t2 is not subset of t1"; + } } TEST(TensorPool, create_allocate_has_data_01_p) { @@ -2069,21 +2082,21 @@ TEST(TensorPool, createOrExtend_different_type_02_n) { TEST(TensorPool, createOrExtend_init_01_n) { nntrainer::TensorPool pool; pool.requestOrExtend("t", {{10}, FP16_}, {0}, max_ls, - nntrainer::Tensor::Initializer::ONES); + nntrainer::Initializer::ONES); EXPECT_ANY_THROW(pool.requestOrExtend("t", {{10}, FP16_}, {1}, max_ls, - nntrainer::Tensor::Initializer::ZEROS)); + nntrainer::Initializer::ZEROS)); } TEST(TensorPool, createOrExtend_init_02_n) { nntrainer::TensorPool pool; pool.requestOrExtend("t0", {{10}, FP16_}, {0}, max_ls, - nntrainer::Tensor::Initializer::ONES); + nntrainer::Initializer::ONES); EXPECT_ANY_THROW(pool.requestOrExtend("t0", {{10}, FP16_}, {1}, max_ls, - nntrainer::Tensor::Initializer::ZEROS)); + nntrainer::Initializer::ZEROS)); pool.requestOrExtend("t1", {{10}, FP32_}, {0}, max_ls, - nntrainer::Tensor::Initializer::ONES); + nntrainer::Initializer::ONES); EXPECT_ANY_THROW(pool.requestOrExtend("t1", {{10}, FP32_}, {1}, max_ls, - nntrainer::Tensor::Initializer::ZEROS)); + nntrainer::Initializer::ZEROS)); } TEST(TensorPool, createOrExtend_unmanaged_01_n) { diff --git a/test/unittest/unittest_nntrainer_tensor_v2.cpp b/test/unittest/unittest_nntrainer_tensor_v2.cpp deleted file mode 100644 index de7d2d7935..0000000000 --- a/test/unittest/unittest_nntrainer_tensor_v2.cpp +++ /dev/null @@ -1,1860 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -/** - * Copyright (C) 2023 Donghyeon Jeong - * - * @file unittest_nntrainer_tensor_v2.cpp - * @date 16 November 2023 - * @brief Unit test utility for tensor v2. - * @see https://github.com/nnstreamer/nntrainer - * @author 2023 Donghyeon Jeong - * @bug No known bugs - */ -#include - -#include "nntrainer_test_util.h" -#include "util_func.h" -#include -#include -#include -#include - -TEST(nntrainer_Tensor, Tensor_01_p) { - int status = ML_ERROR_NONE; - nntrainer::TensorV2 tensor = nntrainer::TensorV2(1, 2, 3); - tensor.setZero(); - ASSERT_NE(nullptr, tensor.getData()); - - if (tensor.getValue(0, 0, 0, 0) != 0.0) - status = ML_ERROR_INVALID_PARAMETER; - EXPECT_EQ(status, ML_ERROR_NONE); -} - -TEST(nntrainer_Tensor, Tensor_02_p) { - int status = ML_ERROR_NONE; - int height = 3; - int width = 10; - std::vector> in; - for (int i = 0; i < height; ++i) { - std::vector tv; - for (int j = 0; j < width; ++j) { - tv.push_back(i * 2.0 + j); - } - in.push_back(tv); - } - - nntrainer::TensorV2 tensor = nntrainer::TensorV2( - in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP32}); - ASSERT_NE(nullptr, tensor.getData()); - - if (tensor.getValue(0, 0, 0, 1) != 1.0) - status = ML_ERROR_INVALID_PARAMETER; - EXPECT_EQ(status, ML_ERROR_NONE); -} - -TEST(nntrainer_Tensor, Tensor_02_nhwc_p) { - int status = ML_ERROR_NONE; - int width = 10; - int channel = 3; - std::vector> in; - for (int i = 0; i < width; ++i) { - std::vector tv; - for (int j = 0; j < channel; ++j) { - tv.push_back(i * 2.0 + j); - } - in.push_back(tv); - } - - nntrainer::TensorV2 tensor = nntrainer::TensorV2( - in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP32}); - ASSERT_NE(nullptr, tensor.getData()); - - if (tensor.getValue(0, 0, 0, 1) != 1.0) - status = ML_ERROR_INVALID_PARAMETER; - EXPECT_EQ(status, ML_ERROR_NONE); -} - -TEST(nntrainer_Tensor, Tensor_03_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int height = 3; - int width = 10; - std::vector>> in; - - for (int k = 0; k < batch; ++k) { - std::vector> ttv; - for (int i = 0; i < height; ++i) { - std::vector tv; - for (int j = 0; j < width; ++j) { - tv.push_back(k * height * width + i * width + j); - } - ttv.push_back(tv); - } - in.push_back(ttv); - } - - nntrainer::TensorV2 tensor = nntrainer::TensorV2( - in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP32}); - ASSERT_NE(nullptr, tensor.getData()); - - if (tensor.getValue(0, 0, 0, 1) != 1.0) - status = ML_ERROR_INVALID_PARAMETER; - EXPECT_EQ(status, ML_ERROR_NONE); -} - -TEST(nntrainer_Tensor, Tensor_04_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int height = 3; - int width = 10; - std::vector>> in; - - for (int k = 0; k < batch; ++k) { - std::vector> ttv; - for (int i = 0; i < height; ++i) { - std::vector tv; - for (int j = 0; j < width; ++j) { - tv.push_back(k * height * width + i * width + j); - } - ttv.push_back(tv); - } - in.push_back(ttv); - } - - nntrainer::TensorV2 t0 = nntrainer::TensorV2( - in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP32}); - - // copy assignment operator - nntrainer::TensorV2 t1 = t0; - - if (t1.getValue(0, 0, 0, 1) != 1.0) - status = ML_ERROR_INVALID_PARAMETER; - EXPECT_EQ(status, ML_ERROR_NONE); - - // comparison operator - EXPECT_EQ(t0, t1); -} - -TEST(nntrainer_Tensor, Tensor_05_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int height = 3; - int width = 10; - std::vector>> in; - - for (int k = 0; k < batch; ++k) { - std::vector> ttv; - for (int i = 0; i < height; ++i) { - std::vector tv; - for (int j = 0; j < width; ++j) { - tv.push_back(k * height * width + i * width + j); - } - ttv.push_back(tv); - } - in.push_back(ttv); - } - - nntrainer::TensorV2 t0 = nntrainer::TensorV2( - in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP32}); - - // copy assignment operator - nntrainer::TensorV2 t1 = nntrainer::TensorV2(batch, height, width); - t1.setRandNormal(2.3, 0.5); - - float val_t0 = t0.getValue(0, 0, 0, 1); - float val_t1 = t1.getValue(0, 0, 0, 1); - - swap(t0, t1); - - if (t0.getValue(0, 0, 0, 1) != val_t1) - status = ML_ERROR_INVALID_PARAMETER; - EXPECT_EQ(status, ML_ERROR_NONE); - - if (t1.getValue(0, 0, 0, 1) != val_t0) - status = ML_ERROR_INVALID_PARAMETER; - EXPECT_EQ(status, ML_ERROR_NONE); -} - -TEST(nntrainer_Tensor, empty_01) { - nntrainer::TensorV2 t; - - EXPECT_TRUE(t.empty()); -} - -TEST(nntrainer_Tensor, empty_02) { - nntrainer::TensorV2 t({1, 2, 3, 4}, false); - - EXPECT_FALSE(t.empty()); -} - -TEST(nntrainer_Tensor, empty_03) { - nntrainer::TensorV2 t({1, 2, 3, 4}, true); - - EXPECT_FALSE(t.empty()); -} - -TEST(nntrainer_Tensor, allocate_01_n) { - nntrainer::TensorV2 t; - EXPECT_FALSE(t.isAllocated()); - - t.allocate(); - EXPECT_FALSE(t.isAllocated()); -} - -TEST(nntrainer_Tensor, allocate_02_p) { - nntrainer::TensorV2 t({1, 2, 3, 4}, false); - EXPECT_FALSE(t.isAllocated()); - - t.allocate(); - EXPECT_TRUE(t.isAllocated()); -} - -TEST(nntrainer_Tensor, allocate_03_p) { - nntrainer::TensorV2 t({1, 2, 3, 4}, true); - EXPECT_TRUE(t.isAllocated()); - - t.allocate(); - EXPECT_TRUE(t.isAllocated()); -} - -TEST(nntrainer_Tensor, initialize_01_p) { - nntrainer::TensorV2 t({1, 2, 3, 4}, true, nntrainer::Initializer::ONES); - - nntrainer::TensorV2 golden(1, 2, 3, 4); - golden.setValue(1); - - EXPECT_EQ(golden, t); -} - -TEST(nntrainer_Tensor, initialize_02_p) { - nntrainer::TensorV2 t({1, 2, 3, 4}, true); - - nntrainer::TensorV2 golden(1, 2, 3, 4); - golden.setValue(1); - - EXPECT_NE(golden, t); - - t.initialize(nntrainer::Initializer::ONES); - EXPECT_EQ(golden, t); -} - -TEST(nntrainer_Tensor, initialize_03_p) { - nntrainer::TensorV2 t({1, 2, 3, 4}, false, nntrainer::Initializer::ONES); - t.allocate(); - - nntrainer::TensorV2 golden(1, 2, 3, 4); - golden.setValue(1); - - EXPECT_EQ(golden, t); -} - -TEST(nntrainer_Tensor, initialize_04_p) { - nntrainer::TensorV2 t({1, 2, 3, 4}, false); - t.initialize(nntrainer::Initializer::ONES); - t.allocate(); - - nntrainer::TensorV2 golden(1, 2, 3, 4); - golden.setValue(1); - - EXPECT_EQ(golden, t); -} - -TEST(nntrainer_Tensor, initialize_05_p) { - nntrainer::TensorV2 t({1, 2, 3, 4}, false); - t.allocate(); - - nntrainer::TensorV2 golden(1, 2, 3, 4); - golden.setValue(1.f); - - /** - * Ideally, it should be NE, but it can be equal due to no initialization - * EXPECT_NE(golden, t); - */ - - t.initialize(nntrainer::Initializer::ONES); - EXPECT_EQ(golden, t); -} - -TEST(nntrainer_Tensor, initialize_06_n) { - nntrainer::TensorV2 t({1, 2, 3, 4}, true, nntrainer::Initializer::ONES); - nntrainer::TensorV2 golden({1, 2, 3, 4}, true, nntrainer::Initializer::ZEROS); - - EXPECT_NE(golden, t); - - golden.initialize(nntrainer::Initializer::ONES); - EXPECT_EQ(golden, t); -} - -TEST(nntrainer_Tensor, initialize_07_p) { - nntrainer::TensorV2 t({1, 2, 3, 4}, true, nntrainer::Initializer::ONES); - - nntrainer::TensorV2 golden(1, 2, 3, 4); - golden.setValue(1); - - EXPECT_EQ(golden, t); - - t.setValue(0, 0, 0, 0, 0); - t.setValue(0, 0, 0, t.size() - 1, 0); - EXPECT_NE(golden, t); - - t.initialize(); - EXPECT_EQ(golden, t); -} - -TEST(nntrainer_Tensor, initialize_08_p) { - nntrainer::TensorV2 t({1, 2, 3, 4}, true, nntrainer::Initializer::ONES); - - nntrainer::TensorV2 golden(1, 2, 3, 4); - golden.setValue(1); - - EXPECT_EQ(golden, t); - - t.initialize(nntrainer::Initializer::HE_NORMAL); - EXPECT_NE(golden, t); - - t.initialize(); - EXPECT_NE(golden, t); - - t.initialize(nntrainer::Initializer::ONES); - EXPECT_EQ(golden, t); - - t.initialize(); - EXPECT_EQ(golden, t); -} - -TEST(nntrainer_Tensor, multiply_i_01_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); - - nntrainer::TensorV2 original; - original.copy(input); - - status = input.multiply_i(2.0); - EXPECT_EQ(status, ML_ERROR_NONE); - - float *data = original.getData(); - ASSERT_NE(nullptr, data); - float *indata = input.getData(); - ASSERT_NE(nullptr, indata); - - for (int i = 0; i < batch * channel * width * height; ++i) { - EXPECT_FLOAT_EQ(data[i] + data[i], indata[i]); - } -} - -TEST(nntrainer_Tensor, multiply_i_02_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); - - nntrainer::TensorV2 original; - original.copy(input); - - status = input.multiply_i(input); - EXPECT_EQ(status, ML_ERROR_NONE); - - float *data = original.getData(); - ASSERT_NE(nullptr, data); - float *indata = input.getData(); - ASSERT_NE(nullptr, indata); - - for (int i = 0; i < batch * channel * width * height; ++i) { - EXPECT_FLOAT_EQ(data[i] * data[i], indata[i]); - } -} - -TEST(nntrainer_Tensor, multiply_i_03_n) { - int status = ML_ERROR_NONE; - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); - - nntrainer::TensorV2 target2(batch, channel, height - 2, width - 1); - status = input.multiply_i(target2); - - EXPECT_EQ(status, ML_ERROR_INVALID_PARAMETER); -} - -TEST(nntrainer_Tensor, multiply_i_broadcast_01_p) { - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - nntrainer::TensorV2 m = rangedV2(1, 2, 4, 5); - float answer_data[] = { - 0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, - 144, 169, 196, 225, 256, 289, 324, 361, 400, 441, 484, 529, - 576, 625, 676, 729, 784, 841, 900, 961, 1024, 1089, 1156, 1225, - 1296, 1369, 1444, 1521, 0, 41, 84, 129, 176, 225, 276, 329, - 384, 441, 500, 561, 624, 689, 756, 825, 896, 969, 1044, 1121, - 1200, 1281, 1364, 1449, 1536, 1625, 1716, 1809, 1904, 2001, 2100, 2201, - 2304, 2409, 2516, 2625, 2736, 2849, 2964, 3081, 0, 81, 164, 249, - 336, 425, 516, 609, 704, 801, 900, 1001, 1104, 1209, 1316, 1425, - 1536, 1649, 1764, 1881, 2000, 2121, 2244, 2369, 2496, 2625, 2756, 2889, - 3024, 3161, 3300, 3441, 3584, 3729, 3876, 4025, 4176, 4329, 4484, 4641}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.multiply_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - nntrainer::TensorV2 m = rangedV2(3, 1, 4, 5); - float answer_data[] = { - 0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, - 144, 169, 196, 225, 256, 289, 324, 361, 0, 21, 44, 69, - 96, 125, 156, 189, 224, 261, 300, 341, 384, 429, 476, 525, - 576, 629, 684, 741, 800, 861, 924, 989, 1056, 1125, 1196, 1269, - 1344, 1421, 1500, 1581, 1664, 1749, 1836, 1925, 2016, 2109, 2204, 2301, - 1200, 1281, 1364, 1449, 1536, 1625, 1716, 1809, 1904, 2001, 2100, 2201, - 2304, 2409, 2516, 2625, 2736, 2849, 2964, 3081, 3200, 3321, 3444, 3569, - 3696, 3825, 3956, 4089, 4224, 4361, 4500, 4641, 4784, 4929, 5076, 5225, - 5376, 5529, 5684, 5841, 4000, 4141, 4284, 4429, 4576, 4725, 4876, 5029, - 5184, 5341, 5500, 5661, 5824, 5989, 6156, 6325, 6496, 6669, 6844, 7021}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.multiply_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - nntrainer::TensorV2 m = rangedV2(3, 2, 4, 1); - float answer_data[] = { - 0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 20, 22, - 24, 26, 28, 45, 48, 51, 54, 57, 80, 84, 88, 92, - 96, 125, 130, 135, 140, 145, 180, 186, 192, 198, 204, 245, - 252, 259, 266, 273, 320, 328, 336, 344, 352, 405, 414, 423, - 432, 441, 500, 510, 520, 530, 540, 605, 616, 627, 638, 649, - 720, 732, 744, 756, 768, 845, 858, 871, 884, 897, 980, 994, - 1008, 1022, 1036, 1125, 1140, 1155, 1170, 1185, 1280, 1296, 1312, 1328, - 1344, 1445, 1462, 1479, 1496, 1513, 1620, 1638, 1656, 1674, 1692, 1805, - 1824, 1843, 1862, 1881, 2000, 2020, 2040, 2060, 2080, 2205, 2226, 2247, - 2268, 2289, 2420, 2442, 2464, 2486, 2508, 2645, 2668, 2691, 2714, 2737}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.multiply_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - nntrainer::TensorV2 m = rangedV2(3, 1, 1, 5); - float answer_data[] = { - 0, 1, 4, 9, 16, 0, 6, 14, 24, 36, 0, 11, - 24, 39, 56, 0, 16, 34, 54, 76, 0, 21, 44, 69, - 96, 0, 26, 54, 84, 116, 0, 31, 64, 99, 136, 0, - 36, 74, 114, 156, 200, 246, 294, 344, 396, 225, 276, 329, - 384, 441, 250, 306, 364, 424, 486, 275, 336, 399, 464, 531, - 300, 366, 434, 504, 576, 325, 396, 469, 544, 621, 350, 426, - 504, 584, 666, 375, 456, 539, 624, 711, 800, 891, 984, 1079, - 1176, 850, 946, 1044, 1144, 1246, 900, 1001, 1104, 1209, 1316, 950, - 1056, 1164, 1274, 1386, 1000, 1111, 1224, 1339, 1456, 1050, 1166, 1284, - 1404, 1526, 1100, 1221, 1344, 1469, 1596, 1150, 1276, 1404, 1534, 1666}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.multiply_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - nntrainer::TensorV2 m = rangedV2(1, 2, 1, 5); - float answer_data[] = { - 0, 1, 4, 9, 16, 0, 6, 14, 24, 36, 0, 11, 24, 39, - 56, 0, 16, 34, 54, 76, 100, 126, 154, 184, 216, 125, 156, 189, - 224, 261, 150, 186, 224, 264, 306, 175, 216, 259, 304, 351, 0, 41, - 84, 129, 176, 0, 46, 94, 144, 196, 0, 51, 104, 159, 216, 0, - 56, 114, 174, 236, 300, 366, 434, 504, 576, 325, 396, 469, 544, 621, - 350, 426, 504, 584, 666, 375, 456, 539, 624, 711, 0, 81, 164, 249, - 336, 0, 86, 174, 264, 356, 0, 91, 184, 279, 376, 0, 96, 194, - 294, 396, 500, 606, 714, 824, 936, 525, 636, 749, 864, 981, 550, 666, - 784, 904, 1026, 575, 696, 819, 944, 1071}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.multiply_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - nntrainer::TensorV2 m = rangedV2(3, 1, 4, 1); - float answer_data[] = { - 0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 20, 22, - 24, 26, 28, 45, 48, 51, 54, 57, 0, 0, 0, 0, - 0, 25, 26, 27, 28, 29, 60, 62, 64, 66, 68, 105, - 108, 111, 114, 117, 160, 164, 168, 172, 176, 225, 230, 235, - 240, 245, 300, 306, 312, 318, 324, 385, 392, 399, 406, 413, - 240, 244, 248, 252, 256, 325, 330, 335, 340, 345, 420, 426, - 432, 438, 444, 525, 532, 539, 546, 553, 640, 648, 656, 664, - 672, 765, 774, 783, 792, 801, 900, 910, 920, 930, 940, 1045, - 1056, 1067, 1078, 1089, 800, 808, 816, 824, 832, 945, 954, 963, - 972, 981, 1100, 1110, 1120, 1130, 1140, 1265, 1276, 1287, 1298, 1309}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.multiply_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - nntrainer::TensorV2 m = rangedV2(1, 1, 1, 5); - float answer_data[] = { - 0, 1, 4, 9, 16, 0, 6, 14, 24, 36, 0, 11, 24, 39, 56, - 0, 16, 34, 54, 76, 0, 21, 44, 69, 96, 0, 26, 54, 84, 116, - 0, 31, 64, 99, 136, 0, 36, 74, 114, 156, 0, 41, 84, 129, 176, - 0, 46, 94, 144, 196, 0, 51, 104, 159, 216, 0, 56, 114, 174, 236, - 0, 61, 124, 189, 256, 0, 66, 134, 204, 276, 0, 71, 144, 219, 296, - 0, 76, 154, 234, 316, 0, 81, 164, 249, 336, 0, 86, 174, 264, 356, - 0, 91, 184, 279, 376, 0, 96, 194, 294, 396, 0, 101, 204, 309, 416, - 0, 106, 214, 324, 436, 0, 111, 224, 339, 456, 0, 116, 234, 354, 476}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.multiply_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - nntrainer::TensorV2 m = rangedV2(1, 2, 1, 1); - float answer_data[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, - 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, - 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, - 112, 113, 114, 115, 116, 117, 118, 119}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.multiply_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - nntrainer::TensorV2 m = rangedV2(3, 1, 1, 1); - float answer_data[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, 41, - 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, - 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 160, 162, 164, 166, - 168, 170, 172, 174, 176, 178, 180, 182, 184, 186, 188, 190, 192, 194, - 196, 198, 200, 202, 204, 206, 208, 210, 212, 214, 216, 218, 220, 222, - 224, 226, 228, 230, 232, 234, 236, 238}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.multiply_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 5, 1, 4); - nntrainer::TensorV2 t = rangedV2(3, 5, 1, 4); - nntrainer::TensorV2 m = rangedV2(3, 1, 1, 4); - float answer_data[] = {0, 1, 4, 9, 0, 5, 12, 21, 0, 9, - 20, 33, 0, 13, 28, 45, 0, 17, 36, 57, - 80, 105, 132, 161, 96, 125, 156, 189, 112, 145, - 180, 217, 128, 165, 204, 245, 144, 185, 228, 273, - 320, 369, 420, 473, 352, 405, 460, 517, 384, 441, - 500, 561, 416, 477, 540, 605, 448, 513, 580, 649}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.multiply_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } -} - -TEST(nntrainer_Tensor, multiply_i_broadcast_not_supported_01_n) { - nntrainer::TensorV2 target(3, 1, 3, 1); - nntrainer::TensorV2 target2(3, 1, 3, 3); - - EXPECT_EQ(target.multiply_i(target2), ML_ERROR_INVALID_PARAMETER); -} - -TEST(nntrainer_Tensor, multiply_i_broadcast_not_broadcastable_02_n) { - nntrainer::TensorV2 target(3, 2, 4, 5); - nntrainer::TensorV2 target2(3, 2, 3, 1); - - EXPECT_EQ(target.multiply_i(target2), ML_ERROR_INVALID_PARAMETER); -} - -TEST(nntrainer_Tensor, multiply_01_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); - - nntrainer::TensorV2 result = input.multiply(0.0); - if (result.getValue(0, 0, 1, 1) != 0.0) - status = ML_ERROR_RESULT_OUT_OF_RANGE; - EXPECT_EQ(status, ML_ERROR_NONE); -} - -TEST(nntrainer_Tensor, multiply_02_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - - nntrainer::TensorV2 result = input.multiply(input); - - float *data = result.getData(); - ASSERT_NE(nullptr, data); - float *indata = input.getData(); - ASSERT_NE(nullptr, indata); - - for (int i = 0; i < batch * height * width; ++i) { - if (data[i] != indata[i] * indata[i]) { - status = ML_ERROR_RESULT_OUT_OF_RANGE; - break; - } - } - - EXPECT_EQ(status, ML_ERROR_NONE); -} - -TEST(nntrainer_Tensor, multiply_03_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - - nntrainer::TensorV2 test(batch - 1, height - 1, width - 1); - - EXPECT_THROW({ input.multiply(test); }, std::invalid_argument); -} - -TEST(nntrainer_Tensor, multiply_04_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - - nntrainer::TensorV2 input(batch, channel, height, 2 * width); - nntrainer::TensorV2 shared_input = - input.getSharedDataTensor(dim, 0, false, ""); - nntrainer::TensorV2 test(dim); - - EXPECT_THROW(shared_input.multiply(test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, multiply_05_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - - nntrainer::TensorV2 input(dim); - nntrainer::TensorV2 test(batch, channel, height, 2 * width); - nntrainer::TensorV2 shared_test = test.getSharedDataTensor(dim, 0, false, ""); - - EXPECT_THROW(input.multiply(shared_test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, multiply_06_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - - nntrainer::TensorV2 input(dim, false); - nntrainer::TensorV2 test(dim); - GEN_TEST_INPUT(test, i * (batch * height) + j * (width) + k + 1); - - EXPECT_THROW(input.multiply(test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, multiply_07_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - - nntrainer::TensorV2 input(dim); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - nntrainer::TensorV2 test(dim, false); - - EXPECT_THROW(input.multiply(test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, multiply_08_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - - nntrainer::TensorV2 input(dim); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - nntrainer::TensorV2 test(dim); - GEN_TEST_INPUT(test, i * (batch * height) + j * (width) + k + 2); - nntrainer::TensorV2 output(dim, false); - - EXPECT_THROW(input.multiply(test, output), std::invalid_argument); -} - -TEST(nntrainer_Tensor, multiply_float_01_p) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - - nntrainer::TensorV2 expected(batch, channel, height, width); - GEN_TEST_INPUT(expected, (i * (batch * height) + j * (width) + k + 1) * 2); - - nntrainer::TensorV2 result = input.multiply(2.0); - - EXPECT_EQ(result, expected); -} - -TEST(nntrainer_Tensor, multiply_strided_01_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - - nntrainer::TensorV2 result = input.multiply_strided(input); - - float *data = result.getData(); - ASSERT_NE(nullptr, data); - float *indata = input.getData(); - ASSERT_NE(nullptr, indata); - - float *outdata = new float[(input.size())]; - - std::transform(indata, indata + batch * channel * height * width, indata, - outdata, std::multiplies()); - - for (int i = 0; i < batch * height * width; ++i) { - if (data[i] != outdata[i]) { - status = ML_ERROR_RESULT_OUT_OF_RANGE; - break; - } - } - - delete[] outdata; - - EXPECT_EQ(status, ML_ERROR_NONE); -} - -TEST(nntrainer_Tensor, multiply_strided_02_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - - nntrainer::TensorV2 test(batch - 1, height - 1, width - 1); - - EXPECT_THROW({ input.multiply_strided(test); }, std::invalid_argument); -} - -TEST(nntrainer_Tensor, multiply_strided_03_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - // input is not allocated now : alloc_now == false - nntrainer::TensorV2 input(dim, false); - nntrainer::TensorV2 test(dim); - GEN_TEST_INPUT(test, i * (batch * height) + j * (width) + k + 1); - - EXPECT_THROW(input.multiply_strided(test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, multiply_strided_04_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - - nntrainer::TensorV2 input(dim); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - // test is not allocated. - nntrainer::TensorV2 test(dim, false); - - EXPECT_THROW(input.multiply_strided(test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, multiply_strided_05_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - - nntrainer::TensorV2 input(dim); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - nntrainer::TensorV2 test(dim); - GEN_TEST_INPUT(test, i * (batch * height) + j * (width) + k + 1); - // output is not allocated - nntrainer::TensorV2 output(dim, false); - - EXPECT_THROW(input.multiply_strided(test, output), std::invalid_argument); -} - -TEST(nntrainer_Tensor, multiply_strided_06_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - - nntrainer::TensorV2 output(batch, channel, height, width); - GEN_TEST_INPUT(output, i * (batch * height) + j * (width) + k + 1); - - float *indata = input.getData(); - ASSERT_NE(nullptr, indata); - - float *data = output.getData(); - ASSERT_NE(nullptr, data); - - float *outdata_beta = new float[(input.size())]; - float *indata_mul = new float[(input.size())]; - float *outdata = new float[(input.size())]; - - std::transform( - indata, indata + batch * channel * height * width, outdata_beta, - std::bind(std::multiplies(), std::placeholders::_1, 10.0)); - - std::transform(indata, indata + batch * channel * height * width, indata, - indata_mul, std::multiplies()); - std::transform(indata_mul, indata_mul + batch * channel * height * width, - outdata_beta, outdata, std::plus()); - - input.multiply_strided(input, output, 10.0); - - for (int i = 0; i < batch * height * width; ++i) { - if (data[i] != outdata[i]) { - status = ML_ERROR_RESULT_OUT_OF_RANGE; - break; - } - } - - delete[] outdata_beta; - delete[] indata_mul; - delete[] outdata; - - EXPECT_EQ(status, ML_ERROR_NONE); -} - -TEST(nntrainer_Tensor, divide_i_01_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); - - nntrainer::TensorV2 original; - original.copy(input); - - status = input.divide_i((float)2.0); - EXPECT_EQ(status, ML_ERROR_NONE); - - float *data = original.getData(); - ASSERT_NE(nullptr, data); - float *indata = input.getData(); - ASSERT_NE(nullptr, indata); - - for (int i = 0; i < batch * height * width * channel; ++i) { - EXPECT_FLOAT_EQ(data[i], indata[i] + indata[i]); - } -} - -TEST(nntrainer_Tensor, divide_i_02_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - - status = input.divide_i(input); - EXPECT_EQ(status, ML_ERROR_NONE); - float *indata = input.getData(); - ASSERT_NE(nullptr, indata); - - for (int i = 0; i < batch * height * width * channel; ++i) { - EXPECT_FLOAT_EQ(indata[i], float(1.0)); - } -} - -TEST(nntrainer_Tensor, divide_i_01_n) { - int status = ML_ERROR_NONE; - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); - - status = input.divide_i((float)0); - EXPECT_EQ(status, ML_ERROR_INVALID_PARAMETER); -} - -TEST(nntrainer_Tensor, divide_i_02_n) { - int status = ML_ERROR_NONE; - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); - - nntrainer::TensorV2 original(batch, channel, height - 2, width - 1); - - status = input.divide_i(original); - EXPECT_EQ(status, ML_ERROR_INVALID_PARAMETER); -} - -TEST(nntrainer_Tensor, divide_01_p) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - - nntrainer::TensorV2 result = input.divide(1.0); - - float *previous = input.getData(); - ASSERT_NE(nullptr, previous); - float *data = result.getData(); - ASSERT_NE(nullptr, data); - - for (int i = 0; i < batch * height * width * channel; ++i) { - EXPECT_FLOAT_EQ(data[i], previous[i]); - } -} - -TEST(nntrainer_Tensor, divide_02_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - - EXPECT_THROW({ input.divide(0.0); }, std::invalid_argument); -} - -TEST(nntrainer_Tensor, divide_04_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - - nntrainer::TensorV2 input(batch, channel, height, 2 * width); - nntrainer::TensorV2 shared_input = - input.getSharedDataTensor(dim, 0, false, ""); - nntrainer::TensorV2 test(dim); - - EXPECT_THROW(shared_input.divide(test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, divide_05_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - - nntrainer::TensorV2 input(dim); - nntrainer::TensorV2 test(batch, channel, height, 2 * width); - nntrainer::TensorV2 shared_test = test.getSharedDataTensor(dim, 0, false, ""); - - EXPECT_THROW(input.divide(shared_test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, divide_06_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - - nntrainer::TensorV2 input(dim, false); - nntrainer::TensorV2 test(dim); - GEN_TEST_INPUT(test, i * (batch * height) + j * (width) + k + 1); - - EXPECT_THROW(input.divide(test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, divide_07_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - - nntrainer::TensorV2 input(dim); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - nntrainer::TensorV2 test(dim, false); - - EXPECT_THROW(input.divide(test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, divide_08_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - - nntrainer::TensorV2 input(dim); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - nntrainer::TensorV2 test(dim); - GEN_TEST_INPUT(test, i * (batch * height) + j * (width) + k + 2); - nntrainer::TensorV2 output(dim, false); - - EXPECT_THROW(input.divide(test, output), std::invalid_argument); -} - -TEST(nntrainer_Tensor, divide_i_broadcast_01_p) { - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - t.add_i(1); - nntrainer::TensorV2 m = rangedV2(1, 2, 4, 5); - m.add_i(1); - float answer_data[] = { - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 41.0, 21.0, - 14.333333, 11.0, 9.0, 7.6666665, 6.714286, 6.0, - 5.4444447, 5.0, 4.6363635, 4.3333335, 4.076923, 3.857143, - 3.6666667, 3.5, 3.3529413, 3.2222223, 3.1052632, 3.0, - 2.9047618, 2.8181818, 2.7391305, 2.6666667, 2.6, 2.5384614, - 2.4814816, 2.4285715, 2.3793104, 2.3333333, 2.2903225, 2.25, - 2.2121212, 2.1764705, 2.142857, 2.1111112, 2.0810812, 2.0526316, - 2.025641, 2.0, 81.0, 41.0, 27.666666, 21.0, - 17.0, 14.333333, 12.428572, 11.0, 9.888889, 9.0, - 8.272727, 7.6666665, 7.1538463, 6.714286, 6.3333335, 6.0, - 5.7058825, 5.4444447, 5.2105265, 5.0, 4.8095236, 4.6363635, - 4.478261, 4.3333335, 4.2, 4.076923, 3.9629629, 3.857143, - 3.7586207, 3.6666667, 3.580645, 3.5, 3.4242425, 3.3529413, - 3.2857144, 3.2222223, 3.162162, 3.1052632, 3.0512822, 3.0}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.divide_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - t.add_i(1); - nntrainer::TensorV2 m = rangedV2(3, 1, 4, 5); - m.add_i(1); - float answer_data[] = { - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 21.0, 11.0, 7.6666665, 6.0, - 5.0, 4.3333335, 3.857143, 3.5, 3.2222223, 3.0, - 2.8181818, 2.6666667, 2.5384614, 2.4285715, 2.3333333, 2.25, - 2.1764705, 2.1111112, 2.0526316, 2.0, 1.9523809, 1.9090909, - 1.8695652, 1.8333334, 1.8, 1.7692307, 1.7407408, 1.7142857, - 1.6896552, 1.6666666, 1.6451613, 1.625, 1.6060606, 1.5882353, - 1.5714285, 1.5555556, 1.5405406, 1.5263158, 1.5128205, 1.5, - 2.9047618, 2.8181818, 2.7391305, 2.6666667, 2.6, 2.5384614, - 2.4814816, 2.4285715, 2.3793104, 2.3333333, 2.2903225, 2.25, - 2.2121212, 2.1764705, 2.142857, 2.1111112, 2.0810812, 2.0526316, - 2.025641, 2.0, 1.9756098, 1.9523809, 1.9302325, 1.9090909, - 1.8888888, 1.8695652, 1.8510638, 1.8333334, 1.8163265, 1.8, - 1.7843137, 1.7692307, 1.754717, 1.7407408, 1.7272727, 1.7142857, - 1.7017543, 1.6896552, 1.6779661, 1.6666666, 2.4634147, 2.4285715, - 2.3953488, 2.3636363, 2.3333333, 2.3043478, 2.2765958, 2.25, - 2.2244897, 2.2, 2.1764705, 2.1538463, 2.1320755, 2.1111112, - 2.090909, 2.0714285, 2.0526316, 2.0344827, 2.0169492, 2.0}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.divide_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - t.add_i(1); - nntrainer::TensorV2 m = rangedV2(3, 2, 4, 1); - m.add_i(1); - float answer_data[] = { - 1.0, 2.0, 3.0, 4.0, 5.0, 3.0, - 3.5, 4.0, 4.5, 5.0, 3.6666667, 4.0, - 4.3333335, 4.6666665, 5.0, 4.0, 4.25, 4.5, - 4.75, 5.0, 4.2, 4.4, 4.6, 4.8, - 5.0, 4.3333335, 4.5, 4.6666665, 4.8333335, 5.0, - 4.428571, 4.571429, 4.714286, 4.857143, 5.0, 4.5, - 4.625, 4.75, 4.875, 5.0, 4.5555553, 4.6666665, - 4.7777777, 4.888889, 5.0, 4.6, 4.7, 4.8, - 4.9, 5.0, 4.6363635, 4.7272725, 4.818182, 4.909091, - 5.0, 4.6666665, 4.75, 4.8333335, 4.9166665, 5.0, - 4.6923075, 4.769231, 4.8461537, 4.923077, 5.0, 4.714286, - 4.785714, 4.857143, 4.928571, 5.0, 4.733333, 4.8, - 4.866667, 4.9333334, 5.0, 4.75, 4.8125, 4.875, - 4.9375, 5.0, 4.7647057, 4.8235292, 4.882353, 4.9411764, - 5.0, 4.7777777, 4.8333335, 4.888889, 4.9444447, 5.0, - 4.7894735, 4.8421054, 4.894737, 4.9473686, 5.0, 4.8, - 4.85, 4.9, 4.95, 5.0, 4.8095236, 4.857143, - 4.904762, 4.952381, 5.0, 4.818182, 4.8636365, 4.909091, - 4.9545455, 5.0, 4.826087, 4.869565, 4.9130435, 4.9565215, - 5.0, 4.8333335, 4.875, 4.9166665, 4.9583335, 5.0}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.divide_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - t.add_i(1); - nntrainer::TensorV2 m = rangedV2(3, 1, 1, 5); - m.add_i(1); - float answer_data[] = { - 1.0, 1.0, 1.0, 1.0, 1.0, 6.0, - 3.5, 2.6666667, 2.25, 2.0, 11.0, 6.0, - 4.3333335, 3.5, 3.0, 16.0, 8.5, 6.0, - 4.75, 4.0, 21.0, 11.0, 7.6666665, 6.0, - 5.0, 26.0, 13.5, 9.333333, 7.25, 6.0, - 31.0, 16.0, 11.0, 8.5, 7.0, 36.0, - 18.5, 12.666667, 9.75, 8.0, 6.8333335, 6.0, - 5.375, 4.888889, 4.5, 7.6666665, 6.714286, 6.0, - 5.4444447, 5.0, 8.5, 7.428571, 6.625, 6.0, - 5.5, 9.333333, 8.142858, 7.25, 6.5555553, 6.0, - 10.166667, 8.857142, 7.875, 7.111111, 6.5, 11.0, - 9.571428, 8.5, 7.6666665, 7.0, 11.833333, 10.285714, - 9.125, 8.222222, 7.5, 12.666667, 11.0, 9.75, - 8.777778, 8.0, 7.3636365, 6.8333335, 6.3846154, 6.0, - 5.6666665, 7.818182, 7.25, 6.769231, 6.357143, 6.0, - 8.272727, 7.6666665, 7.1538463, 6.714286, 6.3333335, 8.727273, - 8.083333, 7.5384617, 7.071429, 6.6666665, 9.181818, 8.5, - 7.923077, 7.428571, 7.0, 9.636364, 8.916667, 8.307693, - 7.785714, 7.3333335, 10.090909, 9.333333, 8.692307, 8.142858, - 7.6666665, 10.545455, 9.75, 9.076923, 8.5, 8.0}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.divide_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - t.add_i(1); - nntrainer::TensorV2 m = rangedV2(1, 2, 1, 5); - m.add_i(1); - float answer_data[] = { - 1.0, 1.0, 1.0, 1.0, 1.0, 6.0, - 3.5, 2.6666667, 2.25, 2.0, 11.0, 6.0, - 4.3333335, 3.5, 3.0, 16.0, 8.5, 6.0, - 4.75, 4.0, 3.5, 3.142857, 2.875, 2.6666667, - 2.5, 4.3333335, 3.857143, 3.5, 3.2222223, 3.0, - 5.1666665, 4.571429, 4.125, 3.7777777, 3.5, 6.0, - 5.285714, 4.75, 4.3333335, 4.0, 41.0, 21.0, - 14.333333, 11.0, 9.0, 46.0, 23.5, 16.0, - 12.25, 10.0, 51.0, 26.0, 17.666666, 13.5, - 11.0, 56.0, 28.5, 19.333334, 14.75, 12.0, - 10.166667, 8.857142, 7.875, 7.111111, 6.5, 11.0, - 9.571428, 8.5, 7.6666665, 7.0, 11.833333, 10.285714, - 9.125, 8.222222, 7.5, 12.666667, 11.0, 9.75, - 8.777778, 8.0, 81.0, 41.0, 27.666666, 21.0, - 17.0, 86.0, 43.5, 29.333334, 22.25, 18.0, - 91.0, 46.0, 31.0, 23.5, 19.0, 96.0, - 48.5, 32.666668, 24.75, 20.0, 16.833334, 14.571428, - 12.875, 11.555555, 10.5, 17.666666, 15.285714, 13.5, - 12.111111, 11.0, 18.5, 16.0, 14.125, 12.666667, - 11.5, 19.333334, 16.714285, 14.75, 13.222222, 12.0}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.divide_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - t.add_i(1); - nntrainer::TensorV2 m = rangedV2(3, 1, 4, 1); - m.add_i(1); - float answer_data[] = { - 1.0, 2.0, 3.0, 4.0, 5.0, 3.0, - 3.5, 4.0, 4.5, 5.0, 3.6666667, 4.0, - 4.3333335, 4.6666665, 5.0, 4.0, 4.25, 4.5, - 4.75, 5.0, 21.0, 22.0, 23.0, 24.0, - 25.0, 13.0, 13.5, 14.0, 14.5, 15.0, - 10.333333, 10.666667, 11.0, 11.333333, 11.666667, 9.0, - 9.25, 9.5, 9.75, 10.0, 8.2, 8.4, - 8.6, 8.8, 9.0, 7.6666665, 7.8333335, 8.0, - 8.166667, 8.333333, 7.285714, 7.428571, 7.571429, 7.714286, - 7.857143, 7.0, 7.125, 7.25, 7.375, 7.5, - 12.2, 12.4, 12.6, 12.8, 13.0, 11.0, - 11.166667, 11.333333, 11.5, 11.666667, 10.142858, 10.285714, - 10.428572, 10.571428, 10.714286, 9.5, 9.625, 9.75, - 9.875, 10.0, 9.0, 9.111111, 9.222222, 9.333333, - 9.444445, 8.6, 8.7, 8.8, 8.9, 9.0, - 8.272727, 8.363636, 8.454545, 8.545455, 8.636364, 8.0, - 8.083333, 8.166667, 8.25, 8.333333, 11.222222, 11.333333, - 11.444445, 11.555555, 11.666667, 10.6, 10.7, 10.8, - 10.9, 11.0, 10.090909, 10.181818, 10.272727, 10.363636, - 10.454545, 9.666667, 9.75, 9.833333, 9.916667, 10.0}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.divide_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - t.add_i(1); - nntrainer::TensorV2 m = rangedV2(1, 1, 1, 5); - m.add_i(1); - float answer_data[] = { - 1.0, 1.0, 1.0, 1.0, 1.0, 6.0, 3.5, 2.6666667, 2.25, 2.0, - 11.0, 6.0, 4.3333335, 3.5, 3.0, 16.0, 8.5, 6.0, 4.75, 4.0, - 21.0, 11.0, 7.6666665, 6.0, 5.0, 26.0, 13.5, 9.333333, 7.25, 6.0, - 31.0, 16.0, 11.0, 8.5, 7.0, 36.0, 18.5, 12.666667, 9.75, 8.0, - 41.0, 21.0, 14.333333, 11.0, 9.0, 46.0, 23.5, 16.0, 12.25, 10.0, - 51.0, 26.0, 17.666666, 13.5, 11.0, 56.0, 28.5, 19.333334, 14.75, 12.0, - 61.0, 31.0, 21.0, 16.0, 13.0, 66.0, 33.5, 22.666666, 17.25, 14.0, - 71.0, 36.0, 24.333334, 18.5, 15.0, 76.0, 38.5, 26.0, 19.75, 16.0, - 81.0, 41.0, 27.666666, 21.0, 17.0, 86.0, 43.5, 29.333334, 22.25, 18.0, - 91.0, 46.0, 31.0, 23.5, 19.0, 96.0, 48.5, 32.666668, 24.75, 20.0, - 101.0, 51.0, 34.333332, 26.0, 21.0, 106.0, 53.5, 36.0, 27.25, 22.0, - 111.0, 56.0, 37.666668, 28.5, 23.0, 116.0, 58.5, 39.333332, 29.75, 24.0}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.divide_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - t.add_i(1); - nntrainer::TensorV2 m = rangedV2(1, 2, 1, 1); - m.add_i(1); - float answer_data[] = { - 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, - 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 10.5, 11.0, 11.5, 12.0, - 12.5, 13.0, 13.5, 14.0, 14.5, 15.0, 15.5, 16.0, 16.5, 17.0, 17.5, 18.0, - 18.5, 19.0, 19.5, 20.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0, - 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, - 30.5, 31.0, 31.5, 32.0, 32.5, 33.0, 33.5, 34.0, 34.5, 35.0, 35.5, 36.0, - 36.5, 37.0, 37.5, 38.0, 38.5, 39.0, 39.5, 40.0, 81.0, 82.0, 83.0, 84.0, - 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0, 96.0, - 97.0, 98.0, 99.0, 100.0, 50.5, 51.0, 51.5, 52.0, 52.5, 53.0, 53.5, 54.0, - 54.5, 55.0, 55.5, 56.0, 56.5, 57.0, 57.5, 58.0, 58.5, 59.0, 59.5, 60.0}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.divide_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - t.add_i(1); - nntrainer::TensorV2 m = rangedV2(3, 1, 1, 1); - m.add_i(1); - float answer_data[] = { - 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, - 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, - 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, - 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, - 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, - 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, - 37.0, 38.0, 39.0, 40.0, 20.5, 21.0, - 21.5, 22.0, 22.5, 23.0, 23.5, 24.0, - 24.5, 25.0, 25.5, 26.0, 26.5, 27.0, - 27.5, 28.0, 28.5, 29.0, 29.5, 30.0, - 30.5, 31.0, 31.5, 32.0, 32.5, 33.0, - 33.5, 34.0, 34.5, 35.0, 35.5, 36.0, - 36.5, 37.0, 37.5, 38.0, 38.5, 39.0, - 39.5, 40.0, 27.0, 27.333334, 27.666666, 28.0, - 28.333334, 28.666666, 29.0, 29.333334, 29.666666, 30.0, - 30.333334, 30.666666, 31.0, 31.333334, 31.666666, 32.0, - 32.333332, 32.666668, 33.0, 33.333332, 33.666668, 34.0, - 34.333332, 34.666668, 35.0, 35.333332, 35.666668, 36.0, - 36.333332, 36.666668, 37.0, 37.333332, 37.666668, 38.0, - 38.333332, 38.666668, 39.0, 39.333332, 39.666668, 40.0}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.divide_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 5, 1, 4); - nntrainer::TensorV2 t = rangedV2(3, 5, 1, 4); - t.add_i(1); - nntrainer::TensorV2 m = rangedV2(3, 1, 1, 4); - m.add_i(1); - float answer_data[] = { - 1.0, 1.0, 1.0, 1.0, 5.0, 3.0, - 2.3333333, 2.0, 9.0, 5.0, 3.6666667, 3.0, - 13.0, 7.0, 5.0, 4.0, 17.0, 9.0, - 6.3333335, 5.0, 4.2, 3.6666667, 3.2857144, 3.0, - 5.0, 4.3333335, 3.857143, 3.5, 5.8, 5.0, - 4.428571, 4.0, 6.6, 5.6666665, 5.0, 4.5, - 7.4, 6.3333335, 5.571429, 5.0, 4.5555553, 4.2, - 3.909091, 3.6666667, 5.0, 4.6, 4.2727275, 4.0, - 5.4444447, 5.0, 4.6363635, 4.3333335, 5.888889, 5.4, - 5.0, 4.6666665, 6.3333335, 5.8, 5.3636365, 5.0}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.divide_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } -} - -TEST(nntrainer_Tensor, divide_i_broadcast_not_supported_01_n) { - nntrainer::TensorV2 target(3, 1, 3, 1); - nntrainer::TensorV2 target2(3, 1, 3, 3); - - EXPECT_EQ(target.divide_i(target2), ML_ERROR_INVALID_PARAMETER); -} - -TEST(nntrainer_Tensor, divide_i_broadcast_not_broadcastable_02_n) { - nntrainer::TensorV2 target(3, 2, 4, 5); - nntrainer::TensorV2 target2(3, 2, 3, 1); - - EXPECT_EQ(target.divide_i(target2), ML_ERROR_INVALID_PARAMETER); -} - -TEST(nntrainer_Tensor, add_i_01_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int height = 3; - int width = 10; - int channel = 1; - - nntrainer::TensorV2 target(batch, channel, height, width); - GEN_TEST_INPUT(target, i * (batch * height) + j * (width) + k + 1 + channel); - - nntrainer::TensorV2 original(batch, channel, height, width); - original.copy(target); - - status = target.add_i(2.1); - EXPECT_EQ(status, ML_ERROR_NONE); - - float *previous = original.getData(); - ASSERT_NE(nullptr, previous); - float *data = target.getData(); - ASSERT_NE(nullptr, data); - - for (int i = 0; i < batch * height * width; ++i) { - EXPECT_FLOAT_EQ(data[i], previous[i] + (float)2.1); - } -} - -TEST(nntrainer_Tensor, add_i_02_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int height = 3; - int width = 10; - int channel = 1; - - nntrainer::TensorV2 target(batch, channel, height, width); - GEN_TEST_INPUT(target, i * (batch * height) + j * (width) + k + 1); - - nntrainer::TensorV2 original(batch, height, width); - original.copy(target); - - status = target.add_i(target, 3.0); - EXPECT_EQ(status, ML_ERROR_NONE); - - float *previous = original.getData(); - ASSERT_NE(nullptr, previous); - float *data = target.getData(); - ASSERT_NE(nullptr, data); - - for (int i = 0; i < batch * height * width; ++i) { - EXPECT_FLOAT_EQ(data[i], previous[i] * 4.0); - } -} - -/** - * @brief operand dimension is not right - */ -TEST(nntrainer_Tensor, add_i_01_n) { - int status = ML_ERROR_NONE; - int batch = 3; - int height = 3; - int width = 10; - int channel = 1; - - nntrainer::TensorV2 target(batch, channel, height, width); - GEN_TEST_INPUT(target, i * (batch * height) + j * (width) + k + 1); - - nntrainer::TensorV2 target2(batch, height - 2, width - 3); - - status = target.add_i(target2); - EXPECT_EQ(status, ML_ERROR_INVALID_PARAMETER); -} - -TEST(nntrainer_Tensor, add_i_broadcast_01_p) { - nntrainer::TensorDim ref_dim{3, 2, 4, 5}; - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - nntrainer::TensorV2 m = rangedV2(1, 2, 4, 5); - float answer_data[] = { - 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, - 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, - 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 40, 42, - 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, - 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, - 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 80, 82, 84, 86, - 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, - 116, 118, 120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, - 144, 146, 148, 150, 152, 154, 156, 158}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.add_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - nntrainer::TensorV2 m = rangedV2(3, 1, 4, 5); - float answer_data[] = { - 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, - 28, 30, 32, 34, 36, 38, 20, 22, 24, 26, 28, 30, 32, 34, - 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, - 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, - 92, 94, 96, 98, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, - 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, - 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, - 156, 158, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 160, 162, - 164, 166, 168, 170, 172, 174, 176, 178}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.add_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - nntrainer::TensorV2 m = rangedV2(3, 2, 4, 1); - float answer_data[] = { - 0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, - 16, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 30, 31, 32, - 33, 34, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 48, 49, - 50, 51, 52, 54, 55, 56, 57, 58, 60, 61, 62, 63, 64, 66, - 67, 68, 69, 70, 72, 73, 74, 75, 76, 78, 79, 80, 81, 82, - 84, 85, 86, 87, 88, 90, 91, 92, 93, 94, 96, 97, 98, 99, - 100, 102, 103, 104, 105, 106, 108, 109, 110, 111, 112, 114, 115, 116, - 117, 118, 120, 121, 122, 123, 124, 126, 127, 128, 129, 130, 132, 133, - 134, 135, 136, 138, 139, 140, 141, 142}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.add_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - nntrainer::TensorV2 m = rangedV2(3, 1, 1, 5); - float answer_data[] = { - 0, 2, 4, 6, 8, 5, 7, 9, 11, 13, 10, 12, 14, 16, - 18, 15, 17, 19, 21, 23, 20, 22, 24, 26, 28, 25, 27, 29, - 31, 33, 30, 32, 34, 36, 38, 35, 37, 39, 41, 43, 45, 47, - 49, 51, 53, 50, 52, 54, 56, 58, 55, 57, 59, 61, 63, 60, - 62, 64, 66, 68, 65, 67, 69, 71, 73, 70, 72, 74, 76, 78, - 75, 77, 79, 81, 83, 80, 82, 84, 86, 88, 90, 92, 94, 96, - 98, 95, 97, 99, 101, 103, 100, 102, 104, 106, 108, 105, 107, 109, - 111, 113, 110, 112, 114, 116, 118, 115, 117, 119, 121, 123, 120, 122, - 124, 126, 128, 125, 127, 129, 131, 133}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.add_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - nntrainer::TensorV2 m = rangedV2(1, 2, 1, 5); - float answer_data[] = { - 0, 2, 4, 6, 8, 5, 7, 9, 11, 13, 10, 12, 14, 16, - 18, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 30, 32, 34, - 36, 38, 35, 37, 39, 41, 43, 40, 42, 44, 46, 48, 40, 42, - 44, 46, 48, 45, 47, 49, 51, 53, 50, 52, 54, 56, 58, 55, - 57, 59, 61, 63, 65, 67, 69, 71, 73, 70, 72, 74, 76, 78, - 75, 77, 79, 81, 83, 80, 82, 84, 86, 88, 80, 82, 84, 86, - 88, 85, 87, 89, 91, 93, 90, 92, 94, 96, 98, 95, 97, 99, - 101, 103, 105, 107, 109, 111, 113, 110, 112, 114, 116, 118, 115, 117, - 119, 121, 123, 120, 122, 124, 126, 128}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.add_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - nntrainer::TensorV2 m = rangedV2(3, 1, 4, 1); - float answer_data[] = { - 0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, - 16, 18, 19, 20, 21, 22, 20, 21, 22, 23, 24, 26, 27, 28, - 29, 30, 32, 33, 34, 35, 36, 38, 39, 40, 41, 42, 44, 45, - 46, 47, 48, 50, 51, 52, 53, 54, 56, 57, 58, 59, 60, 62, - 63, 64, 65, 66, 64, 65, 66, 67, 68, 70, 71, 72, 73, 74, - 76, 77, 78, 79, 80, 82, 83, 84, 85, 86, 88, 89, 90, 91, - 92, 94, 95, 96, 97, 98, 100, 101, 102, 103, 104, 106, 107, 108, - 109, 110, 108, 109, 110, 111, 112, 114, 115, 116, 117, 118, 120, 121, - 122, 123, 124, 126, 127, 128, 129, 130}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.add_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - nntrainer::TensorV2 m = rangedV2(1, 1, 1, 5); - float answer_data[] = { - 0, 2, 4, 6, 8, 5, 7, 9, 11, 13, 10, 12, 14, 16, - 18, 15, 17, 19, 21, 23, 20, 22, 24, 26, 28, 25, 27, 29, - 31, 33, 30, 32, 34, 36, 38, 35, 37, 39, 41, 43, 40, 42, - 44, 46, 48, 45, 47, 49, 51, 53, 50, 52, 54, 56, 58, 55, - 57, 59, 61, 63, 60, 62, 64, 66, 68, 65, 67, 69, 71, 73, - 70, 72, 74, 76, 78, 75, 77, 79, 81, 83, 80, 82, 84, 86, - 88, 85, 87, 89, 91, 93, 90, 92, 94, 96, 98, 95, 97, 99, - 101, 103, 100, 102, 104, 106, 108, 105, 107, 109, 111, 113, 110, 112, - 114, 116, 118, 115, 117, 119, 121, 123}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.add_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - nntrainer::TensorV2 m = rangedV2(1, 2, 1, 1); - float answer_data[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, - 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 40, 41, - 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, - 56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, - 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 80, 81, 82, 83, - 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, - 98, 99, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, - 113, 114, 115, 116, 117, 118, 119, 120}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.add_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - nntrainer::TensorV2 m = rangedV2(3, 1, 1, 1); - float answer_data[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, - 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 41, 42, - 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, - 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, - 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 82, 83, 84, 85, - 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, - 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, - 114, 115, 116, 117, 118, 119, 120, 121}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.add_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5); - nntrainer::TensorV2 m = rangedV2(1, 1, 1, 1); - m.add_i(1.0); - float answer_data[] = { - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, - 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, - 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, - 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, - 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, - 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, - 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, - 113, 114, 115, 116, 117, 118, 119, 120}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.add_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 5, 1, 4); - nntrainer::TensorV2 t = rangedV2(3, 5, 1, 4); - nntrainer::TensorV2 m = rangedV2(3, 1, 1, 4); - float answer_data[] = {0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, - 12, 14, 16, 18, 16, 18, 20, 22, 24, 26, 28, 30, - 28, 30, 32, 34, 32, 34, 36, 38, 36, 38, 40, 42, - 40, 42, 44, 46, 48, 50, 52, 54, 52, 54, 56, 58, - 56, 58, 60, 62, 60, 62, 64, 66, 64, 66, 68, 70}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.add_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(1, 1, 2, 1); - nntrainer::TensorV2 t = rangedV2(1, 1, 2, 1); - nntrainer::TensorV2 m = rangedV2(1, 1, 2, 1); - float answer_data[] = {0.0, 2.0}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.add_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(16, 1, 1, 1); - nntrainer::TensorV2 t = rangedV2(16, 1, 1, 1); - nntrainer::TensorV2 m = rangedV2(1, 1, 1, 1); - float answer_data[] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, - 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.add_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } -} - -TEST(nntrainer_Tensor, add_i_broadcast_not_supported_01_n) { - nntrainer::TensorV2 target(3, 1, 3, 1); - nntrainer::TensorV2 target2(3, 1, 3, 3); - - EXPECT_EQ(target.add_i(target2), ML_ERROR_INVALID_PARAMETER); -} - -TEST(nntrainer_Tensor, add_i_broadcast_not_broadcastable_02_n) { - nntrainer::TensorV2 target(3, 2, 4, 5); - nntrainer::TensorV2 target2(3, 2, 3, 1); - - EXPECT_EQ(target.add_i(target2), ML_ERROR_INVALID_PARAMETER); -} - -TEST(nntrainer_Tensor, add_01_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - - nntrainer::TensorV2 result = input.add(1.0); - - float *data = result.getData(); - ASSERT_NE(nullptr, data); - float *indata = input.getData(); - ASSERT_NE(nullptr, indata); - - for (int i = 0; i < batch * height * width; ++i) { - if (data[i] != indata[i] + (float)1.0) { - status = ML_ERROR_RESULT_OUT_OF_RANGE; - break; - } - } - - EXPECT_EQ(status, ML_ERROR_NONE); -} - -TEST(nntrainer_Tensor, add_02_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - - nntrainer::TensorV2 result = input.add(input); - - float *data = result.getData(); - ASSERT_NE(nullptr, data); - float *indata = input.getData(); - ASSERT_NE(nullptr, indata); - - for (int i = 0; i < batch * height * width; ++i) { - if (data[i] != indata[i] + indata[i]) { - status = ML_ERROR_RESULT_OUT_OF_RANGE; - break; - } - } - - EXPECT_EQ(status, ML_ERROR_NONE); -} - -TEST(nntrainer_Tensor, add_03_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - - nntrainer::TensorV2 test(batch - 1, channel, height - 1, width - 1); - - EXPECT_THROW({ input.add(test); }, std::invalid_argument); -} - -TEST(nntrainer_Tensor, add_04_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - - nntrainer::TensorV2 input(batch, channel, height, 2 * width); - nntrainer::TensorV2 shared_input = - input.getSharedDataTensor(dim, 0, false, ""); - nntrainer::TensorV2 test(dim); - - EXPECT_THROW(shared_input.add(test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, add_05_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - - nntrainer::TensorV2 input(dim); - nntrainer::TensorV2 test(batch, channel, height, 2 * width); - nntrainer::TensorV2 shared_test = test.getSharedDataTensor(dim, 0, false, ""); - - EXPECT_THROW(input.add(shared_test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, add_06_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - - nntrainer::TensorV2 input(dim, false); - nntrainer::TensorV2 test(dim); - GEN_TEST_INPUT(test, i * (batch * height) + j * (width) + k + 1); - - EXPECT_THROW(input.add(test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, add_07_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - - nntrainer::TensorV2 input(dim); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - nntrainer::TensorV2 test(dim, false); - - EXPECT_THROW(input.add(test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, add_08_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - - nntrainer::TensorV2 input(dim); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - nntrainer::TensorV2 test(dim); - GEN_TEST_INPUT(test, i * (batch * height) + j * (width) + k + 2); - nntrainer::TensorV2 output(dim, false); - - EXPECT_THROW(input.add(test, output), std::invalid_argument); -} - -int main(int argc, char **argv) { - int result = -1; - - try { - testing::InitGoogleTest(&argc, argv); - } catch (...) { - std::cerr << "Error during InitGoogleTest" << std::endl; - return 0; - } - - try { - result = RUN_ALL_TESTS(); - } catch (...) { - std::cerr << "Error during RUN_ALL_TESTS()" << std::endl; - } - - return result; -} diff --git a/test/unittest/unittest_nntrainer_tensor_v2_fp16.cpp b/test/unittest/unittest_nntrainer_tensor_v2_fp16.cpp deleted file mode 100644 index d9b5743bd6..0000000000 --- a/test/unittest/unittest_nntrainer_tensor_v2_fp16.cpp +++ /dev/null @@ -1,2209 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -/** - * Copyright (C) 2023 Donghyeon Jeong - * - * @file unittest_nntrainer_tensor_v2_fp16.cpp - * @date 16 November 2023 - * @brief Unit test utility for tensor v2. - * @see https://github.com/nnstreamer/nntrainer - * @author 2023 Donghyeon Jeong - * @bug No known bugs - */ -#include - -#include "nntrainer_test_util.h" -#include "util_func.h" -#include -#include -#include -#include - -TEST(nntrainer_Tensor, Tensor_01_p) { - int status = ML_ERROR_NONE; - nntrainer::TensorV2 tensor = nntrainer::TensorV2( - 1, 2, 3, nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16); - tensor.setZero(); - ASSERT_NE(nullptr, tensor.getData<_FP16>()); - if (tensor.getValue<_FP16>(0, 0, 0, 0) != 0.0) - status = ML_ERROR_INVALID_PARAMETER; - EXPECT_EQ(status, ML_ERROR_NONE); -} - -TEST(nntrainer_Tensor, Tensor_02_p) { - int status = ML_ERROR_NONE; - int height = 3; - int width = 10; - std::vector> in; - for (int i = 0; i < height; ++i) { - std::vector<_FP16> tv; - for (int j = 0; j < width; ++j) { - tv.push_back(static_cast<_FP16>(i * 2.0 + j)); - } - in.push_back(tv); - } - - nntrainer::TensorV2 tensor = nntrainer::TensorV2( - in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}); - ASSERT_NE(nullptr, tensor.getData<_FP16>()); - - if (tensor.getValue<_FP16>(0, 0, 0, 1) != 1.0) - status = ML_ERROR_INVALID_PARAMETER; - EXPECT_EQ(status, ML_ERROR_NONE); -} - -TEST(nntrainer_Tensor, Tensor_02_nhwc_p) { - int status = ML_ERROR_NONE; - int width = 10; - int channel = 3; - std::vector> in; - for (int i = 0; i < width; ++i) { - std::vector<_FP16> tv; - for (int j = 0; j < channel; ++j) { - tv.push_back(static_cast<_FP16>(i * 2.0 + j)); - } - in.push_back(tv); - } - - nntrainer::TensorV2 tensor = nntrainer::TensorV2( - in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}); - ASSERT_NE(nullptr, tensor.getData<_FP16>()); - - if (tensor.getValue<_FP16>(0, 0, 0, 1) != 1.0) - status = ML_ERROR_INVALID_PARAMETER; - EXPECT_EQ(status, ML_ERROR_NONE); -} - -TEST(nntrainer_Tensor, Tensor_03_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int height = 3; - int width = 10; - std::vector>> in; - - for (int k = 0; k < batch; ++k) { - std::vector> ttv; - for (int i = 0; i < height; ++i) { - std::vector<_FP16> tv; - for (int j = 0; j < width; ++j) { - tv.push_back(static_cast<_FP16>(k * height * width + i * width + j)); - } - ttv.push_back(tv); - } - in.push_back(ttv); - } - - nntrainer::TensorV2 tensor = nntrainer::TensorV2( - in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}); - ASSERT_NE(nullptr, tensor.getData<_FP16>()); - - if (tensor.getValue<_FP16>(0, 0, 0, 1) != 1.0) - status = ML_ERROR_INVALID_PARAMETER; - EXPECT_EQ(status, ML_ERROR_NONE); -} - -TEST(nntrainer_Tensor, Tensor_04_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int height = 3; - int width = 10; - std::vector>> in; - - for (int k = 0; k < batch; ++k) { - std::vector> ttv; - for (int i = 0; i < height; ++i) { - std::vector<_FP16> tv; - for (int j = 0; j < width; ++j) { - tv.push_back(k * height * width + i * width + j); - } - ttv.push_back(tv); - } - in.push_back(ttv); - } - - nntrainer::TensorV2 t0 = nntrainer::TensorV2( - in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}); - - // copy assignment operator - nntrainer::TensorV2 t1 = t0; - - if (t1.getValue<_FP16>(0, 0, 0, 1) != 1.0) - status = ML_ERROR_INVALID_PARAMETER; - EXPECT_EQ(status, ML_ERROR_NONE); - - // comparison operator - EXPECT_EQ(t0, t1); -} - -TEST(nntrainer_Tensor, Tensor_05_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int height = 3; - int width = 10; - std::vector>> in; - - for (int k = 0; k < batch; ++k) { - std::vector> ttv; - for (int i = 0; i < height; ++i) { - std::vector<_FP16> tv; - for (int j = 0; j < width; ++j) { - tv.push_back(k * height * width + i * width + j); - } - ttv.push_back(tv); - } - in.push_back(ttv); - } - - nntrainer::TensorV2 t0 = nntrainer::TensorV2( - in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}); - - // copy assignment operator - nntrainer::TensorV2 t1 = nntrainer::TensorV2( - batch, height, width, nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16); - t1.setRandNormal(2.3, 0.5); - - _FP16 val_t0 = t0.getValue<_FP16>(0, 0, 0, 1); - _FP16 val_t1 = t1.getValue<_FP16>(0, 0, 0, 1); - - swap(t0, t1); - - if (t0.getValue<_FP16>(0, 0, 0, 1) != val_t1) - status = ML_ERROR_INVALID_PARAMETER; - EXPECT_EQ(status, ML_ERROR_NONE); - - if (t1.getValue<_FP16>(0, 0, 0, 1) != val_t0) - status = ML_ERROR_INVALID_PARAMETER; - EXPECT_EQ(status, ML_ERROR_NONE); -} - -TEST(nntrainer_Tensor, Tensor_06_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int height = 3; - int width = 10; - std::vector>> in; - std::vector>> in2; - - for (int k = 0; k < batch; ++k) { - std::vector> ttv; - std::vector> ttv2; - for (int i = 0; i < height; ++i) { - std::vector tv; - std::vector<_FP16> tv2; - for (int j = 0; j < width; ++j) { - tv.push_back(k * height * width + i * width + j); - tv2.push_back(k * height * width + i * width + j); - } - ttv.push_back(tv); - ttv2.push_back(tv2); - } - in.push_back(ttv); - in2.push_back(ttv2); - } - - nntrainer::TensorV2 t0 = nntrainer::TensorV2( - in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP32}); - nntrainer::TensorV2 t1 = nntrainer::TensorV2( - in2, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}); - - EXPECT_NE(t0, t1); -} - -TEST(nntrainer_Tensor, empty_01) { - nntrainer::TensorV2 t("", nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - - EXPECT_TRUE(t.empty()); -} - -TEST(nntrainer_Tensor, empty_02) { - nntrainer::TensorV2 t( - {{1, 2, 3, 4}, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}}, - false); - - EXPECT_FALSE(t.empty()); -} - -TEST(nntrainer_Tensor, empty_03) { - nntrainer::TensorV2 t( - {{1, 2, 3, 4}, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}}, - true); - - EXPECT_FALSE(t.empty()); -} - -TEST(nntrainer_Tensor, allocate_01_n) { - nntrainer::TensorV2 t; - EXPECT_FALSE(t.isAllocated()); - - t.allocate(); - EXPECT_FALSE(t.isAllocated()); -} - -TEST(nntrainer_Tensor, allocate_02_p) { - nntrainer::TensorV2 t( - {{1, 2, 3, 4}, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}}, - false); - EXPECT_FALSE(t.isAllocated()); - - t.allocate(); - EXPECT_TRUE(t.isAllocated()); -} - -TEST(nntrainer_Tensor, allocate_03_p) { - nntrainer::TensorV2 t( - {{1, 2, 3, 4}, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}}, - true); - EXPECT_TRUE(t.isAllocated()); - - t.allocate(); - EXPECT_TRUE(t.isAllocated()); -} - -TEST(nntrainer_Tensor, initialize_01_p) { - nntrainer::TensorV2 t( - {{1, 2, 3, 4}, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}}, - true, nntrainer::Initializer::ONES); - - nntrainer::TensorV2 golden(1, 2, 3, 4, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - golden.setValue(1); - - EXPECT_EQ(golden, t); -} - -TEST(nntrainer_Tensor, initialize_02_p) { - nntrainer::TensorV2 t( - {{1, 2, 3, 4}, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}}, - true); - - nntrainer::TensorV2 golden(1, 2, 3, 4, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - golden.setValue(1); - - EXPECT_NE(golden, t); - - t.initialize(nntrainer::Initializer::ONES); - EXPECT_EQ(golden, t); -} - -TEST(nntrainer_Tensor, initialize_03_p) { - nntrainer::TensorV2 t( - {{1, 2, 3, 4}, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}}, - false, nntrainer::Initializer::ONES); - t.allocate(); - - nntrainer::TensorV2 golden(1, 2, 3, 4, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - - golden.setValue(1); - - EXPECT_EQ(golden, t); -} - -TEST(nntrainer_Tensor, initialize_04_p) { - nntrainer::TensorV2 t( - {{1, 2, 3, 4}, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}}, - false); - t.initialize(nntrainer::Initializer::ONES); - t.allocate(); - - nntrainer::TensorV2 golden(1, 2, 3, 4, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - ; - golden.setValue(1); - - EXPECT_EQ(golden, t); -} - -TEST(nntrainer_Tensor, initialize_05_p) { - nntrainer::TensorV2 t( - {{1, 2, 3, 4}, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}}, - false); - t.allocate(); - - nntrainer::TensorV2 golden(1, 2, 3, 4, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - - golden.setValue(1.f); - - /** - * Ideally, it should be NE, but it can be equal due to no initialization - * EXPECT_NE(golden, t); - */ - - t.initialize(nntrainer::Initializer::ONES); - EXPECT_EQ(golden, t); -} - -TEST(nntrainer_Tensor, initialize_06_n) { - nntrainer::TensorV2 t( - {{1, 2, 3, 4}, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}}, - true, nntrainer::Initializer::ONES); - nntrainer::TensorV2 golden( - {{1, 2, 3, 4}, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}}, - true, nntrainer::Initializer::ZEROS); - - EXPECT_NE(golden, t); - - golden.initialize(nntrainer::Initializer::ONES); - EXPECT_EQ(golden, t); -} - -TEST(nntrainer_Tensor, initialize_07_p) { - nntrainer::TensorV2 t( - {{1, 2, 3, 4}, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}}, - true, nntrainer::Initializer::ONES); - - nntrainer::TensorV2 golden(1, 2, 3, 4, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - - golden.setValue(1); - - EXPECT_EQ(golden, t); - - t.setValue(0, 0, 0, 0, 0); - t.setValue(0, 0, 0, t.size() - 1, 0); - EXPECT_NE(golden, t); - - t.initialize(); - EXPECT_EQ(golden, t); -} - -TEST(nntrainer_Tensor, initialize_08_p) { - nntrainer::TensorV2 t( - {{1, 2, 3, 4}, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}}, - true, nntrainer::Initializer::ONES); - - nntrainer::TensorV2 golden(1, 2, 3, 4, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - - golden.setValue(1); - - EXPECT_EQ(golden, t); - - t.initialize(nntrainer::Initializer::HE_NORMAL); - EXPECT_NE(golden, t); - - t.initialize(); - EXPECT_NE(golden, t); - - t.initialize(nntrainer::Initializer::ONES); - EXPECT_EQ(golden, t); - - t.initialize(); - EXPECT_EQ(golden, t); -} - -TEST(nntrainer_Tensor, multiply_i_01_fp16_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); - - nntrainer::TensorV2 original; - original.copy(input); - - status = input.multiply_i(2.0); - EXPECT_EQ(status, ML_ERROR_NONE); - - _FP16 *data = original.getData<_FP16>(); - ASSERT_NE(nullptr, data); - _FP16 *indata = input.getData<_FP16>(); - ASSERT_NE(nullptr, indata); - - for (int i = 0; i < batch * height * width * channel; ++i) { - EXPECT_FLOAT_EQ(data[i] + data[i], indata[i]); - } -} - -TEST(nntrainer_Tensor, multiply_i_02_fp16_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); - - nntrainer::TensorV2 original; - original.copy(input); - - status = input.multiply_i(input); - EXPECT_EQ(status, ML_ERROR_NONE); - - _FP16 *data = original.getData<_FP16>(); - ASSERT_NE(nullptr, data); - _FP16 *indata = input.getData<_FP16>(); - ASSERT_NE(nullptr, indata); - - for (int i = 0; i < batch * height * width * channel; ++i) { - EXPECT_FLOAT_EQ(data[i] * data[i], indata[i]); - } -} - -TEST(nntrainer_Tensor, multiply_i_03_fp16_n) { - int status = ML_ERROR_NONE; - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); - - nntrainer::TensorV2 target2(batch, channel, height - 2, width - 1, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - status = input.multiply_i(target2); - - EXPECT_EQ(status, ML_ERROR_INVALID_PARAMETER); -} - -TEST(nntrainer_Tensor, multiply_i_broadcast_01_fp16_p) { - unsigned int N = 120; - _FP16 *answer_data = new _FP16[N]; - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 m = rangedV2(1, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - - float float_data[] = { - 0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, - 144, 169, 196, 225, 256, 289, 324, 361, 400, 441, 484, 529, - 576, 625, 676, 729, 784, 841, 900, 961, 1024, 1089, 1156, 1225, - 1296, 1369, 1444, 1521, 0, 41, 84, 129, 176, 225, 276, 329, - 384, 441, 500, 561, 624, 689, 756, 825, 896, 969, 1044, 1121, - 1200, 1281, 1364, 1449, 1536, 1625, 1716, 1809, 1904, 2001, 2100, 2201, - 2304, 2409, 2516, 2625, 2736, 2849, 2964, 3081, 0, 81, 164, 249, - 336, 425, 516, 609, 704, 801, 900, 1001, 1104, 1209, 1316, 1425, - 1536, 1649, 1764, 1881, 2000, 2121, 2244, 2369, 2496, 2625, 2756, 2889, - 3024, 3161, 3300, 3441, 3584, 3729, 3876, 4025, 4176, 4329, 4484, 4641}; - - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.multiply_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 m = rangedV2(3, 1, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - float float_data[] = { - 0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, - 144, 169, 196, 225, 256, 289, 324, 361, 0, 21, 44, 69, - 96, 125, 156, 189, 224, 261, 300, 341, 384, 429, 476, 525, - 576, 629, 684, 741, 800, 861, 924, 989, 1056, 1125, 1196, 1269, - 1344, 1421, 1500, 1581, 1664, 1749, 1836, 1925, 2016, 2109, 2204, 2301, - 1200, 1281, 1364, 1449, 1536, 1625, 1716, 1809, 1904, 2001, 2100, 2201, - 2304, 2409, 2516, 2625, 2736, 2849, 2964, 3081, 3200, 3321, 3444, 3569, - 3696, 3825, 3956, 4089, 4224, 4361, 4500, 4641, 4784, 4929, 5076, 5225, - 5376, 5529, 5684, 5841, 4000, 4141, 4284, 4429, 4576, 4725, 4876, 5029, - 5184, 5341, 5500, 5661, 5824, 5989, 6156, 6325, 6496, 6669, 6844, 7021}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.multiply_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 m = rangedV2(3, 2, 4, 1, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - float float_data[] = { - 0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 20, 22, - 24, 26, 28, 45, 48, 51, 54, 57, 80, 84, 88, 92, - 96, 125, 130, 135, 140, 145, 180, 186, 192, 198, 204, 245, - 252, 259, 266, 273, 320, 328, 336, 344, 352, 405, 414, 423, - 432, 441, 500, 510, 520, 530, 540, 605, 616, 627, 638, 649, - 720, 732, 744, 756, 768, 845, 858, 871, 884, 897, 980, 994, - 1008, 1022, 1036, 1125, 1140, 1155, 1170, 1185, 1280, 1296, 1312, 1328, - 1344, 1445, 1462, 1479, 1496, 1513, 1620, 1638, 1656, 1674, 1692, 1805, - 1824, 1843, 1862, 1881, 2000, 2020, 2040, 2060, 2080, 2205, 2226, 2247, - 2268, 2289, 2420, 2442, 2464, 2486, 2508, 2645, 2668, 2691, 2714, 2737}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.multiply_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 m = rangedV2(3, 1, 1, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - float float_data[] = { - 0, 1, 4, 9, 16, 0, 6, 14, 24, 36, 0, 11, - 24, 39, 56, 0, 16, 34, 54, 76, 0, 21, 44, 69, - 96, 0, 26, 54, 84, 116, 0, 31, 64, 99, 136, 0, - 36, 74, 114, 156, 200, 246, 294, 344, 396, 225, 276, 329, - 384, 441, 250, 306, 364, 424, 486, 275, 336, 399, 464, 531, - 300, 366, 434, 504, 576, 325, 396, 469, 544, 621, 350, 426, - 504, 584, 666, 375, 456, 539, 624, 711, 800, 891, 984, 1079, - 1176, 850, 946, 1044, 1144, 1246, 900, 1001, 1104, 1209, 1316, 950, - 1056, 1164, 1274, 1386, 1000, 1111, 1224, 1339, 1456, 1050, 1166, 1284, - 1404, 1526, 1100, 1221, 1344, 1469, 1596, 1150, 1276, 1404, 1534, 1666}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.multiply_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 m = rangedV2(1, 2, 1, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - float float_data[] = { - 0, 1, 4, 9, 16, 0, 6, 14, 24, 36, 0, 11, 24, 39, - 56, 0, 16, 34, 54, 76, 100, 126, 154, 184, 216, 125, 156, 189, - 224, 261, 150, 186, 224, 264, 306, 175, 216, 259, 304, 351, 0, 41, - 84, 129, 176, 0, 46, 94, 144, 196, 0, 51, 104, 159, 216, 0, - 56, 114, 174, 236, 300, 366, 434, 504, 576, 325, 396, 469, 544, 621, - 350, 426, 504, 584, 666, 375, 456, 539, 624, 711, 0, 81, 164, 249, - 336, 0, 86, 174, 264, 356, 0, 91, 184, 279, 376, 0, 96, 194, - 294, 396, 500, 606, 714, 824, 936, 525, 636, 749, 864, 981, 550, 666, - 784, 904, 1026, 575, 696, 819, 944, 1071}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.multiply_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 m = rangedV2(3, 1, 4, 1, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - float float_data[] = { - 0, 0, 0, 0, 0, 5, 6, 7, 8, 9, 20, 22, - 24, 26, 28, 45, 48, 51, 54, 57, 0, 0, 0, 0, - 0, 25, 26, 27, 28, 29, 60, 62, 64, 66, 68, 105, - 108, 111, 114, 117, 160, 164, 168, 172, 176, 225, 230, 235, - 240, 245, 300, 306, 312, 318, 324, 385, 392, 399, 406, 413, - 240, 244, 248, 252, 256, 325, 330, 335, 340, 345, 420, 426, - 432, 438, 444, 525, 532, 539, 546, 553, 640, 648, 656, 664, - 672, 765, 774, 783, 792, 801, 900, 910, 920, 930, 940, 1045, - 1056, 1067, 1078, 1089, 800, 808, 816, 824, 832, 945, 954, 963, - 972, 981, 1100, 1110, 1120, 1130, 1140, 1265, 1276, 1287, 1298, 1309}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.multiply_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 m = rangedV2(1, 1, 1, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - float float_data[] = { - 0, 1, 4, 9, 16, 0, 6, 14, 24, 36, 0, 11, 24, 39, 56, - 0, 16, 34, 54, 76, 0, 21, 44, 69, 96, 0, 26, 54, 84, 116, - 0, 31, 64, 99, 136, 0, 36, 74, 114, 156, 0, 41, 84, 129, 176, - 0, 46, 94, 144, 196, 0, 51, 104, 159, 216, 0, 56, 114, 174, 236, - 0, 61, 124, 189, 256, 0, 66, 134, 204, 276, 0, 71, 144, 219, 296, - 0, 76, 154, 234, 316, 0, 81, 164, 249, 336, 0, 86, 174, 264, 356, - 0, 91, 184, 279, 376, 0, 96, 194, 294, 396, 0, 101, 204, 309, 416, - 0, 106, 214, 324, 436, 0, 111, 224, 339, 456, 0, 116, 234, 354, 476}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.multiply_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 m = rangedV2(1, 2, 1, 1, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - float float_data[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, - 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, - 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, - 112, 113, 114, 115, 116, 117, 118, 119}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.multiply_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 m = rangedV2(3, 1, 1, 1, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - float float_data[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, 41, - 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, - 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 160, 162, 164, 166, - 168, 170, 172, 174, 176, 178, 180, 182, 184, 186, 188, 190, 192, 194, - 196, 198, 200, 202, 204, 206, 208, 210, 212, 214, 216, 218, 220, 222, - 224, 226, 228, 230, 232, 234, 236, 238}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.multiply_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 5, 1, 4, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 t = rangedV2(3, 5, 1, 4, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 m = rangedV2(3, 1, 1, 4, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - float float_data[] = {0, 1, 4, 9, 0, 5, 12, 21, 0, 9, - 20, 33, 0, 13, 28, 45, 0, 17, 36, 57, - 80, 105, 132, 161, 96, 125, 156, 189, 112, 145, - 180, 217, 128, 165, 204, 245, 144, 185, 228, 273, - 320, 369, 420, 473, 352, 405, 460, 517, 384, 441, - 500, 561, 416, 477, 540, 605, 448, 513, 580, 649}; - std::transform(float_data, float_data + 60, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.multiply_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - delete[] answer_data; -} - -TEST(nntrainer_Tensor, multiply_i_broadcast_not_supported_01_n) { - - nntrainer::TensorV2 target(3, 1, 3, 1, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 target2(3, 1, 3, 3, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - - EXPECT_EQ(target.multiply_i(target2), ML_ERROR_INVALID_PARAMETER); -} - -TEST(nntrainer_Tensor, multiply_i_broadcast_not_broadcastable_02_n) { - nntrainer::TensorV2 target(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 target2(3, 2, 3, 1, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - - EXPECT_EQ(target.multiply_i(target2), ML_ERROR_INVALID_PARAMETER); -} - -TEST(nntrainer_Tensor, multiply_01_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); - - nntrainer::TensorV2 result = input.multiply(0.0); - if (result.getValue<_FP16>(0, 0, 1, 1) != 0.0) - status = ML_ERROR_RESULT_OUT_OF_RANGE; - EXPECT_EQ(status, ML_ERROR_NONE); -} - -TEST(nntrainer_Tensor, multiply_02_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - - nntrainer::TensorV2 result = input.multiply(input); - - _FP16 *data = result.getData<_FP16>(); - ASSERT_NE(nullptr, data); - _FP16 *indata = input.getData<_FP16>(); - ASSERT_NE(nullptr, indata); - - for (int i = 0; i < batch * height * width; ++i) { - if (data[i] != indata[i] * indata[i]) { - status = ML_ERROR_RESULT_OUT_OF_RANGE; - break; - } - } - - EXPECT_EQ(status, ML_ERROR_NONE); -} - -TEST(nntrainer_Tensor, multiply_03_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - - nntrainer::TensorV2 test(batch - 1, height - 1, width - 1, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - - EXPECT_THROW({ input.multiply(test); }, std::invalid_argument); -} - -TEST(nntrainer_Tensor, multiply_04_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - - nntrainer::TensorV2 input(batch, channel, height, 2 * width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 shared_input = - input.getSharedDataTensor(dim, 0, false, ""); - nntrainer::TensorV2 test(dim); - - EXPECT_THROW(shared_input.multiply(test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, multiply_05_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - - nntrainer::TensorV2 input(dim); - nntrainer::TensorV2 test(batch, channel, height, 2 * width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 shared_test = test.getSharedDataTensor(dim, 0, false, ""); - - EXPECT_THROW(input.multiply(shared_test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, multiply_06_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - - nntrainer::TensorV2 input(dim, false); - nntrainer::TensorV2 test(dim); - GEN_TEST_INPUT(test, i * (batch * height) + j * (width) + k + 1); - - EXPECT_THROW(input.multiply(test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, multiply_07_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - - nntrainer::TensorV2 input(dim); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - nntrainer::TensorV2 test(dim, false); - - EXPECT_THROW(input.multiply(test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, multiply_08_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - - nntrainer::TensorV2 input(dim); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - nntrainer::TensorV2 test(dim); - GEN_TEST_INPUT(test, i * (batch * height) + j * (width) + k + 2); - nntrainer::TensorV2 output(dim, false); - - EXPECT_THROW(input.multiply(test, output), std::invalid_argument); -} - -TEST(nntrainer_Tensor, multiply_float_01_p) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - - nntrainer::TensorV2 expected(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - GEN_TEST_INPUT(expected, (i * (batch * height) + j * (width) + k + 1) * 2); - - nntrainer::TensorV2 result = input.multiply(2.0); - - EXPECT_EQ(result, expected); -} - -TEST(nntrainer_Tensor, multiply_strided_01_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input( - batch, channel, height, width, - {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - - nntrainer::TensorV2 result = input.multiply_strided(input); - - _FP16 *data = result.getData<_FP16>(); - ASSERT_NE(nullptr, data); - _FP16 *indata = input.getData<_FP16>(); - ASSERT_NE(nullptr, indata); - - _FP16 *outdata = new _FP16[(input.size())]; - - std::transform(indata, indata + batch * height * width * channel, indata, - outdata, std::multiplies<_FP16>()); - - for (int i = 0; i < batch * height * width; ++i) { - if (data[i] != outdata[i]) { - status = ML_ERROR_RESULT_OUT_OF_RANGE; - break; - } - } - - delete[] outdata; - - EXPECT_EQ(status, ML_ERROR_NONE); -} - -TEST(nntrainer_Tensor, multiply_strided_02_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input( - batch, channel, height, width, - {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - - nntrainer::TensorV2 test(batch - 1, height - 1, width - 1); - - EXPECT_THROW({ input.multiply_strided(test); }, std::invalid_argument); -} - -TEST(nntrainer_Tensor, multiply_strided_03_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim( - batch, channel, height, width, - {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}); - // input is not allocated now : alloc_now == false - nntrainer::TensorV2 input(dim, false); - nntrainer::TensorV2 test(dim); - GEN_TEST_INPUT(test, i * (batch * height) + j * (width) + k + 1); - - EXPECT_THROW(input.multiply_strided(test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, multiply_strided_04_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim( - batch, channel, height, width, - {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}); - - nntrainer::TensorV2 input(dim); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - // test is not allocated. - nntrainer::TensorV2 test(dim, false); - - EXPECT_THROW(input.multiply_strided(test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, multiply_strided_05_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim( - batch, channel, height, width, - {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}); - - nntrainer::TensorV2 input(dim); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - nntrainer::TensorV2 test(dim); - GEN_TEST_INPUT(test, i * (batch * height) + j * (width) + k + 1); - // output is not allocated - nntrainer::TensorV2 output(dim, false); - - EXPECT_THROW(input.multiply_strided(test, output), std::invalid_argument); -} - -TEST(nntrainer_Tensor, multiply_strided_06_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input( - batch, channel, height, width, - {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - - nntrainer::TensorV2 output( - batch, channel, height, width, - {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}); - GEN_TEST_INPUT(output, i * (batch * height) + j * (width) + k + 1); - - _FP16 *indata = input.getData<_FP16>(); - ASSERT_NE(nullptr, indata); - - _FP16 *outdata_beta = new _FP16[(input.size())]; - _FP16 *indata_mul = new _FP16[(input.size())]; - _FP16 *outdata = new _FP16[(input.size())]; - - std::transform(indata, indata + batch * height * width * channel, - outdata_beta, - std::bind(std::multiplies<_FP16>(), std::placeholders::_1, - static_cast<_FP16>(10.0))); - - std::transform(indata, indata + batch * height * width * channel, indata, - indata_mul, std::multiplies<_FP16>()); - std::transform(indata_mul, indata_mul + batch * height * width * channel, - outdata_beta, outdata, std::plus<_FP16>()); - - input.multiply_strided(input, output, 10.0); - - _FP16 *data = output.getData<_FP16>(); - ASSERT_NE(nullptr, data); - - for (int i = 0; i < batch * height * width; ++i) { - if (data[i] != outdata[i]) { - status = ML_ERROR_RESULT_OUT_OF_RANGE; - break; - } - } - - delete[] outdata_beta; - delete[] indata_mul; - delete[] outdata; - - EXPECT_EQ(status, ML_ERROR_NONE); -} - -TEST(nntrainer_Tensor, divide_i_01_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); - - nntrainer::TensorV2 original; - original.copy(input); - - status = input.divide_i(2.0f); - EXPECT_EQ(status, ML_ERROR_NONE); - - _FP16 *data = original.getData<_FP16>(); - ASSERT_NE(nullptr, data); - _FP16 *indata = input.getData<_FP16>(); - ASSERT_NE(nullptr, indata); - - for (int i = 0; i < batch * height * width * channel; ++i) { - EXPECT_FLOAT_EQ(data[i], indata[i] + indata[i]); - } -} - -TEST(nntrainer_Tensor, divide_i_02_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - - status = input.divide_i(input); - EXPECT_EQ(status, ML_ERROR_NONE); - _FP16 *indata = input.getData<_FP16>(); - ASSERT_NE(nullptr, indata); - - for (int i = 0; i < batch * height * width * channel; ++i) { - EXPECT_FLOAT_EQ(indata[i], _FP16(1.0)); - } -} - -TEST(nntrainer_Tensor, divide_i_01_n) { - int status = ML_ERROR_NONE; - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); - - status = input.divide_i((_FP16)0); - EXPECT_EQ(status, ML_ERROR_INVALID_PARAMETER); -} - -TEST(nntrainer_Tensor, divide_i_02_n) { - int status = ML_ERROR_NONE; - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k); - - nntrainer::TensorV2 original(batch, channel, height - 2, width - 1, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - - status = input.divide_i(original); - EXPECT_EQ(status, ML_ERROR_INVALID_PARAMETER); -} - -TEST(nntrainer_Tensor, divide_01_p) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - - nntrainer::TensorV2 result = input.divide(1.0); - - _FP16 *previous = input.getData<_FP16>(); - ASSERT_NE(nullptr, previous); - _FP16 *data = result.getData<_FP16>(); - ASSERT_NE(nullptr, data); - - for (int i = 0; i < batch * height * width * channel; ++i) { - EXPECT_FLOAT_EQ(data[i], previous[i]); - } -} - -TEST(nntrainer_Tensor, divide_02_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - - EXPECT_THROW({ input.divide(0.0); }, std::invalid_argument); -} - -TEST(nntrainer_Tensor, divide_03_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - - nntrainer::TensorV2 test(batch - 1, channel, height - 1, width - 1, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - - EXPECT_THROW({ input.divide(test); }, std::invalid_argument); -} - -TEST(nntrainer_Tensor, divide_04_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - - nntrainer::TensorV2 input(batch, channel, height, 2 * width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 shared_input = - input.getSharedDataTensor(dim, 0, false, ""); - nntrainer::TensorV2 test(dim); - - EXPECT_THROW(shared_input.divide(test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, divide_05_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - - nntrainer::TensorV2 input(dim); - nntrainer::TensorV2 test(batch, channel, height, 2 * width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 shared_test = test.getSharedDataTensor(dim, 0, false, ""); - - EXPECT_THROW(input.divide(shared_test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, divide_06_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - - nntrainer::TensorV2 input(dim, false); - nntrainer::TensorV2 test(dim); - GEN_TEST_INPUT(test, i * (batch * height) + j * (width) + k + 1); - - EXPECT_THROW(input.divide(test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, divide_07_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width); - - nntrainer::TensorV2 input(dim); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - nntrainer::TensorV2 test(dim, false); - - EXPECT_THROW(input.divide(test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, divide_08_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - - nntrainer::TensorV2 input(dim); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - nntrainer::TensorV2 test(dim); - GEN_TEST_INPUT(test, i * (batch * height) + j * (width) + k + 2); - nntrainer::TensorV2 output(dim, false); - - EXPECT_THROW(input.divide(test, output), std::invalid_argument); -} - -TEST(nntrainer_Tensor, divide_i_broadcast_01_p) { - unsigned int N = 120; - _FP16 *answer_data = new _FP16[N]; - nntrainer::TensorDim ref_dim(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - t.add_i(1); - nntrainer::TensorV2 m = rangedV2(1, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - m.add_i(1); - float float_data[] = { - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 41.0, 21.0, - 14.333333, 11.0, 9.0, 7.6666665, 6.714286, 6.0, - 5.4444447, 5.0, 4.6363635, 4.3333335, 4.076923, 3.857143, - 3.6666667, 3.5, 3.3529413, 3.2222223, 3.1052632, 3.0, - 2.9047618, 2.8181818, 2.7391305, 2.6666667, 2.6, 2.5384614, - 2.4814816, 2.4285715, 2.3793104, 2.3333333, 2.2903225, 2.25, - 2.2121212, 2.1764705, 2.142857, 2.1111112, 2.0810812, 2.0526316, - 2.025641, 2.0, 81.0, 41.0, 27.666666, 21.0, - 17.0, 14.333333, 12.428572, 11.0, 9.888889, 9.0, - 8.272727, 7.6666665, 7.1538463, 6.714286, 6.3333335, 6.0, - 5.7058825, 5.4444447, 5.2105265, 5.0, 4.8095236, 4.6363635, - 4.478261, 4.3333335, 4.2, 4.076923, 3.9629629, 3.857143, - 3.7586207, 3.6666667, 3.580645, 3.5, 3.4242425, 3.3529413, - 3.2857144, 3.2222223, 3.162162, 3.1052632, 3.0512822, 3.0}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.divide_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - t.add_i(1); - nntrainer::TensorV2 m = rangedV2(3, 1, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - m.add_i(1); - float float_data[] = { - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 21.0, 11.0, 7.6666665, 6.0, - 5.0, 4.3333335, 3.857143, 3.5, 3.2222223, 3.0, - 2.8181818, 2.6666667, 2.5384614, 2.4285715, 2.3333333, 2.25, - 2.1764705, 2.1111112, 2.0526316, 2.0, 1.9523809, 1.9090909, - 1.8695652, 1.8333334, 1.8, 1.7692307, 1.7407408, 1.7142857, - 1.6896552, 1.6666666, 1.6451613, 1.625, 1.6060606, 1.5882353, - 1.5714285, 1.5555556, 1.5405406, 1.5263158, 1.5128205, 1.5, - 2.9047618, 2.8181818, 2.7391305, 2.6666667, 2.6, 2.5384614, - 2.4814816, 2.4285715, 2.3793104, 2.3333333, 2.2903225, 2.25, - 2.2121212, 2.1764705, 2.142857, 2.1111112, 2.0810812, 2.0526316, - 2.025641, 2.0, 1.9756098, 1.9523809, 1.9302325, 1.9090909, - 1.8888888, 1.8695652, 1.8510638, 1.8333334, 1.8163265, 1.8, - 1.7843137, 1.7692307, 1.754717, 1.7407408, 1.7272727, 1.7142857, - 1.7017543, 1.6896552, 1.6779661, 1.6666666, 2.4634147, 2.4285715, - 2.3953488, 2.3636363, 2.3333333, 2.3043478, 2.2765958, 2.25, - 2.2244897, 2.2, 2.1764705, 2.1538463, 2.1320755, 2.1111112, - 2.090909, 2.0714285, 2.0526316, 2.0344827, 2.0169492, 2.0}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.divide_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - t.add_i(1); - nntrainer::TensorV2 m = rangedV2(3, 2, 4, 1, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - m.add_i(1); - float float_data[] = { - 1.0, 2.0, 3.0, 4.0, 5.0, 3.0, - 3.5, 4.0, 4.5, 5.0, 3.6666667, 4.0, - 4.3333335, 4.6666665, 5.0, 4.0, 4.25, 4.5, - 4.75, 5.0, 4.2, 4.4, 4.6, 4.8, - 5.0, 4.3333335, 4.5, 4.6666665, 4.8333335, 5.0, - 4.428571, 4.571429, 4.714286, 4.857143, 5.0, 4.5, - 4.625, 4.75, 4.875, 5.0, 4.5555553, 4.6666665, - 4.7777777, 4.888889, 5.0, 4.6, 4.7, 4.8, - 4.9, 5.0, 4.6363635, 4.7272725, 4.818182, 4.909091, - 5.0, 4.6666665, 4.75, 4.8333335, 4.9166665, 5.0, - 4.6923075, 4.769231, 4.8461537, 4.923077, 5.0, 4.714286, - 4.785714, 4.857143, 4.928571, 5.0, 4.733333, 4.8, - 4.866667, 4.9333334, 5.0, 4.75, 4.8125, 4.875, - 4.9375, 5.0, 4.7647057, 4.8235292, 4.882353, 4.9411764, - 5.0, 4.7777777, 4.8333335, 4.888889, 4.9444447, 5.0, - 4.7894735, 4.8421054, 4.894737, 4.9473686, 5.0, 4.8, - 4.85, 4.9, 4.95, 5.0, 4.8095236, 4.857143, - 4.904762, 4.952381, 5.0, 4.818182, 4.8636365, 4.909091, - 4.9545455, 5.0, 4.826087, 4.869565, 4.9130435, 4.9565215, - 5.0, 4.8333335, 4.875, 4.9166665, 4.9583335, 5.0}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.divide_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - t.add_i(1); - nntrainer::TensorV2 m = rangedV2(3, 1, 1, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - m.add_i(1); - float float_data[] = { - 1.0, 1.0, 1.0, 1.0, 1.0, 6.0, - 3.5, 2.6666667, 2.25, 2.0, 11.0, 6.0, - 4.3333335, 3.5, 3.0, 16.0, 8.5, 6.0, - 4.75, 4.0, 21.0, 11.0, 7.6666665, 6.0, - 5.0, 26.0, 13.5, 9.333333, 7.25, 6.0, - 31.0, 16.0, 11.0, 8.5, 7.0, 36.0, - 18.5, 12.666667, 9.75, 8.0, 6.8333335, 6.0, - 5.375, 4.888889, 4.5, 7.6666665, 6.714286, 6.0, - 5.4444447, 5.0, 8.5, 7.428571, 6.625, 6.0, - 5.5, 9.333333, 8.142858, 7.25, 6.5555553, 6.0, - 10.166667, 8.857142, 7.875, 7.111111, 6.5, 11.0, - 9.571428, 8.5, 7.6666665, 7.0, 11.833333, 10.285714, - 9.125, 8.222222, 7.5, 12.666667, 11.0, 9.75, - 8.777778, 8.0, 7.3636365, 6.8333335, 6.3846154, 6.0, - 5.6666665, 7.818182, 7.25, 6.769231, 6.357143, 6.0, - 8.272727, 7.6666665, 7.1538463, 6.714286, 6.3333335, 8.727273, - 8.083333, 7.5384617, 7.071429, 6.6666665, 9.181818, 8.5, - 7.923077, 7.428571, 7.0, 9.636364, 8.916667, 8.307693, - 7.785714, 7.3333335, 10.090909, 9.333333, 8.692307, 8.142858, - 7.6666665, 10.545455, 9.75, 9.076923, 8.5, 8.0}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.divide_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - t.add_i(1); - nntrainer::TensorV2 m = rangedV2(1, 2, 1, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - m.add_i(1); - float float_data[] = { - 1.0, 1.0, 1.0, 1.0, 1.0, 6.0, - 3.5, 2.6666667, 2.25, 2.0, 11.0, 6.0, - 4.3333335, 3.5, 3.0, 16.0, 8.5, 6.0, - 4.75, 4.0, 3.5, 3.142857, 2.875, 2.6666667, - 2.5, 4.3333335, 3.857143, 3.5, 3.2222223, 3.0, - 5.1666665, 4.571429, 4.125, 3.7777777, 3.5, 6.0, - 5.285714, 4.75, 4.3333335, 4.0, 41.0, 21.0, - 14.333333, 11.0, 9.0, 46.0, 23.5, 16.0, - 12.25, 10.0, 51.0, 26.0, 17.666666, 13.5, - 11.0, 56.0, 28.5, 19.333334, 14.75, 12.0, - 10.166667, 8.857142, 7.875, 7.111111, 6.5, 11.0, - 9.571428, 8.5, 7.6666665, 7.0, 11.833333, 10.285714, - 9.125, 8.222222, 7.5, 12.666667, 11.0, 9.75, - 8.777778, 8.0, 81.0, 41.0, 27.666666, 21.0, - 17.0, 86.0, 43.5, 29.333334, 22.25, 18.0, - 91.0, 46.0, 31.0, 23.5, 19.0, 96.0, - 48.5, 32.666668, 24.75, 20.0, 16.833334, 14.571428, - 12.875, 11.555555, 10.5, 17.666666, 15.285714, 13.5, - 12.111111, 11.0, 18.5, 16.0, 14.125, 12.666667, - 11.5, 19.333334, 16.714285, 14.75, 13.222222, 12.0}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.divide_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - t.add_i(1); - nntrainer::TensorV2 m = rangedV2(3, 1, 4, 1, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - m.add_i(1); - float float_data[] = { - 1.0, 2.0, 3.0, 4.0, 5.0, 3.0, - 3.5, 4.0, 4.5, 5.0, 3.6666667, 4.0, - 4.3333335, 4.6666665, 5.0, 4.0, 4.25, 4.5, - 4.75, 5.0, 21.0, 22.0, 23.0, 24.0, - 25.0, 13.0, 13.5, 14.0, 14.5, 15.0, - 10.333333, 10.666667, 11.0, 11.333333, 11.666667, 9.0, - 9.25, 9.5, 9.75, 10.0, 8.2, 8.4, - 8.6, 8.8, 9.0, 7.6666665, 7.8333335, 8.0, - 8.166667, 8.333333, 7.285714, 7.428571, 7.571429, 7.714286, - 7.857143, 7.0, 7.125, 7.25, 7.375, 7.5, - 12.2, 12.4, 12.6, 12.8, 13.0, 11.0, - 11.166667, 11.333333, 11.5, 11.666667, 10.142858, 10.285714, - 10.428572, 10.571428, 10.714286, 9.5, 9.625, 9.75, - 9.875, 10.0, 9.0, 9.111111, 9.222222, 9.333333, - 9.444445, 8.6, 8.7, 8.8, 8.9, 9.0, - 8.272727, 8.363636, 8.454545, 8.545455, 8.636364, 8.0, - 8.083333, 8.166667, 8.25, 8.333333, 11.222222, 11.333333, - 11.444445, 11.555555, 11.666667, 10.6, 10.7, 10.8, - 10.9, 11.0, 10.090909, 10.181818, 10.272727, 10.363636, - 10.454545, 9.666667, 9.75, 9.833333, 9.916667, 10.0}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.divide_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - t.add_i(1); - nntrainer::TensorV2 m = rangedV2(1, 1, 1, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - m.add_i(1); - float float_data[] = { - 1.0, 1.0, 1.0, 1.0, 1.0, 6.0, 3.5, 2.6666667, 2.25, 2.0, - 11.0, 6.0, 4.3333335, 3.5, 3.0, 16.0, 8.5, 6.0, 4.75, 4.0, - 21.0, 11.0, 7.6666665, 6.0, 5.0, 26.0, 13.5, 9.333333, 7.25, 6.0, - 31.0, 16.0, 11.0, 8.5, 7.0, 36.0, 18.5, 12.666667, 9.75, 8.0, - 41.0, 21.0, 14.333333, 11.0, 9.0, 46.0, 23.5, 16.0, 12.25, 10.0, - 51.0, 26.0, 17.666666, 13.5, 11.0, 56.0, 28.5, 19.333334, 14.75, 12.0, - 61.0, 31.0, 21.0, 16.0, 13.0, 66.0, 33.5, 22.666666, 17.25, 14.0, - 71.0, 36.0, 24.333334, 18.5, 15.0, 76.0, 38.5, 26.0, 19.75, 16.0, - 81.0, 41.0, 27.666666, 21.0, 17.0, 86.0, 43.5, 29.333334, 22.25, 18.0, - 91.0, 46.0, 31.0, 23.5, 19.0, 96.0, 48.5, 32.666668, 24.75, 20.0, - 101.0, 51.0, 34.333332, 26.0, 21.0, 106.0, 53.5, 36.0, 27.25, 22.0, - 111.0, 56.0, 37.666668, 28.5, 23.0, 116.0, 58.5, 39.333332, 29.75, 24.0}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.divide_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - t.add_i(1); - nntrainer::TensorV2 m = rangedV2(1, 2, 1, 1, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - m.add_i(1); - float float_data[] = { - 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, - 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 10.5, 11.0, 11.5, 12.0, - 12.5, 13.0, 13.5, 14.0, 14.5, 15.0, 15.5, 16.0, 16.5, 17.0, 17.5, 18.0, - 18.5, 19.0, 19.5, 20.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0, - 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, - 30.5, 31.0, 31.5, 32.0, 32.5, 33.0, 33.5, 34.0, 34.5, 35.0, 35.5, 36.0, - 36.5, 37.0, 37.5, 38.0, 38.5, 39.0, 39.5, 40.0, 81.0, 82.0, 83.0, 84.0, - 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0, 96.0, - 97.0, 98.0, 99.0, 100.0, 50.5, 51.0, 51.5, 52.0, 52.5, 53.0, 53.5, 54.0, - 54.5, 55.0, 55.5, 56.0, 56.5, 57.0, 57.5, 58.0, 58.5, 59.0, 59.5, 60.0}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.divide_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - t.add_i(1); - nntrainer::TensorV2 m = rangedV2(3, 1, 1, 1, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - m.add_i(1); - float float_data[] = { - 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, - 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, - 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, - 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, - 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, - 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, - 37.0, 38.0, 39.0, 40.0, 20.5, 21.0, - 21.5, 22.0, 22.5, 23.0, 23.5, 24.0, - 24.5, 25.0, 25.5, 26.0, 26.5, 27.0, - 27.5, 28.0, 28.5, 29.0, 29.5, 30.0, - 30.5, 31.0, 31.5, 32.0, 32.5, 33.0, - 33.5, 34.0, 34.5, 35.0, 35.5, 36.0, - 36.5, 37.0, 37.5, 38.0, 38.5, 39.0, - 39.5, 40.0, 27.0, 27.333334, 27.666666, 28.0, - 28.333334, 28.666666, 29.0, 29.333334, 29.666666, 30.0, - 30.333334, 30.666666, 31.0, 31.333334, 31.666666, 32.0, - 32.333332, 32.666668, 33.0, 33.333332, 33.666668, 34.0, - 34.333332, 34.666668, 35.0, 35.333332, 35.666668, 36.0, - 36.333332, 36.666668, 37.0, 37.333332, 37.666668, 38.0, - 38.333332, 38.666668, 39.0, 39.333332, 39.666668, 40.0}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.divide_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 5, 1, 4, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 t = rangedV2(3, 5, 1, 4, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - t.add_i(1); - nntrainer::TensorV2 m = rangedV2(3, 1, 1, 4, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - m.add_i(1); - float float_data[] = { - 1.0, 1.0, 1.0, 1.0, 5.0, 3.0, - 2.3333333, 2.0, 9.0, 5.0, 3.6666667, 3.0, - 13.0, 7.0, 5.0, 4.0, 17.0, 9.0, - 6.3333335, 5.0, 4.2, 3.6666667, 3.2857144, 3.0, - 5.0, 4.3333335, 3.857143, 3.5, 5.8, 5.0, - 4.428571, 4.0, 6.6, 5.6666665, 5.0, 4.5, - 7.4, 6.3333335, 5.571429, 5.0, 4.5555553, 4.2, - 3.909091, 3.6666667, 5.0, 4.6, 4.2727275, 4.0, - 5.4444447, 5.0, 4.6363635, 4.3333335, 5.888889, 5.4, - 5.0, 4.6666665, 6.3333335, 5.8, 5.3636365, 5.0}; - std::transform(float_data, float_data + 60, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.divide_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - delete[] answer_data; -} - -TEST(nntrainer_Tensor, divide_i_broadcast_not_supported_01_n) { - nntrainer::TensorV2 target(3, 1, 3, 1, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 target2(3, 1, 3, 3, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - - EXPECT_EQ(target.divide_i(target2), ML_ERROR_INVALID_PARAMETER); -} - -TEST(nntrainer_Tensor, divide_i_broadcast_not_broadcastable_02_n) { - nntrainer::TensorV2 target(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 target2(3, 2, 3, 1, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - - EXPECT_EQ(target.divide_i(target2), ML_ERROR_INVALID_PARAMETER); -} - -TEST(nntrainer_Tensor, add_i_01_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int height = 3; - int width = 10; - int channel = 1; - - nntrainer::TensorV2 target(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - GEN_TEST_INPUT(target, i * (batch * height) + j * (width) + k + 1 + channel); - - nntrainer::TensorV2 original(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - original.copy(target); - - status = target.add_i((_FP16)2.1); - EXPECT_EQ(status, ML_ERROR_NONE); - - _FP16 *previous = original.getData<_FP16>(); - ASSERT_NE(nullptr, previous); - _FP16 *data = target.getData<_FP16>(); - ASSERT_NE(nullptr, data); - - for (int i = 0; i < batch * height * width; ++i) { - EXPECT_FLOAT_EQ(data[i], (_FP16)(previous[i] + (_FP16)2.1)); - } -} - -TEST(nntrainer_Tensor, add_i_02_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int height = 3; - int width = 10; - int channel = 1; - - nntrainer::TensorV2 target(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - GEN_TEST_INPUT(target, i * (batch * height) + j * (width) + k + 1); - - nntrainer::TensorV2 original(batch, height, width, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - original.copy(target); - - status = target.add_i(target, 3.0); - EXPECT_EQ(status, ML_ERROR_NONE); - - _FP16 *previous = original.getData<_FP16>(); - ASSERT_NE(nullptr, previous); - _FP16 *data = target.getData<_FP16>(); - ASSERT_NE(nullptr, data); - - for (int i = 0; i < batch * height * width; ++i) { - EXPECT_FLOAT_EQ(data[i], previous[i] * 4.0); - } -} - -// /** -// * @brief operand dimension is not right -// */ -TEST(nntrainer_Tensor, add_i_01_n) { - int status = ML_ERROR_NONE; - int batch = 3; - int height = 3; - int width = 10; - int channel = 1; - - nntrainer::TensorV2 target(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - GEN_TEST_INPUT(target, i * (batch * height) + j * (width) + k + 1); - - nntrainer::TensorV2 target2(batch, height - 2, width - 3, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - - status = target.add_i(target2); - EXPECT_EQ(status, ML_ERROR_INVALID_PARAMETER); -} - -TEST(nntrainer_Tensor, add_i_broadcast_01_p) { - unsigned int N = 120; - _FP16 *answer_data = new _FP16[N]; - nntrainer::TensorDim ref_dim(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 m = rangedV2(1, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - float float_data[] = { - 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, - 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, - 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 40, 42, - 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, - 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, - 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 80, 82, 84, 86, - 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, - 116, 118, 120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, - 144, 146, 148, 150, 152, 154, 156, 158}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.add_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 m = rangedV2(3, 1, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - float float_data[] = { - 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, - 28, 30, 32, 34, 36, 38, 20, 22, 24, 26, 28, 30, 32, 34, - 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, - 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, - 92, 94, 96, 98, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, - 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, - 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, - 156, 158, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 160, 162, - 164, 166, 168, 170, 172, 174, 176, 178}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.add_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 m = rangedV2(3, 2, 4, 1, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - float float_data[] = { - 0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, - 16, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 30, 31, 32, - 33, 34, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 48, 49, - 50, 51, 52, 54, 55, 56, 57, 58, 60, 61, 62, 63, 64, 66, - 67, 68, 69, 70, 72, 73, 74, 75, 76, 78, 79, 80, 81, 82, - 84, 85, 86, 87, 88, 90, 91, 92, 93, 94, 96, 97, 98, 99, - 100, 102, 103, 104, 105, 106, 108, 109, 110, 111, 112, 114, 115, 116, - 117, 118, 120, 121, 122, 123, 124, 126, 127, 128, 129, 130, 132, 133, - 134, 135, 136, 138, 139, 140, 141, 142}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.add_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 m = rangedV2(3, 1, 1, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - float float_data[] = { - 0, 2, 4, 6, 8, 5, 7, 9, 11, 13, 10, 12, 14, 16, - 18, 15, 17, 19, 21, 23, 20, 22, 24, 26, 28, 25, 27, 29, - 31, 33, 30, 32, 34, 36, 38, 35, 37, 39, 41, 43, 45, 47, - 49, 51, 53, 50, 52, 54, 56, 58, 55, 57, 59, 61, 63, 60, - 62, 64, 66, 68, 65, 67, 69, 71, 73, 70, 72, 74, 76, 78, - 75, 77, 79, 81, 83, 80, 82, 84, 86, 88, 90, 92, 94, 96, - 98, 95, 97, 99, 101, 103, 100, 102, 104, 106, 108, 105, 107, 109, - 111, 113, 110, 112, 114, 116, 118, 115, 117, 119, 121, 123, 120, 122, - 124, 126, 128, 125, 127, 129, 131, 133}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.add_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 m = rangedV2(1, 2, 1, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - float float_data[] = { - 0, 2, 4, 6, 8, 5, 7, 9, 11, 13, 10, 12, 14, 16, - 18, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 30, 32, 34, - 36, 38, 35, 37, 39, 41, 43, 40, 42, 44, 46, 48, 40, 42, - 44, 46, 48, 45, 47, 49, 51, 53, 50, 52, 54, 56, 58, 55, - 57, 59, 61, 63, 65, 67, 69, 71, 73, 70, 72, 74, 76, 78, - 75, 77, 79, 81, 83, 80, 82, 84, 86, 88, 80, 82, 84, 86, - 88, 85, 87, 89, 91, 93, 90, 92, 94, 96, 98, 95, 97, 99, - 101, 103, 105, 107, 109, 111, 113, 110, 112, 114, 116, 118, 115, 117, - 119, 121, 123, 120, 122, 124, 126, 128}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.add_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 m = rangedV2(3, 1, 4, 1, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - float float_data[] = { - 0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, - 16, 18, 19, 20, 21, 22, 20, 21, 22, 23, 24, 26, 27, 28, - 29, 30, 32, 33, 34, 35, 36, 38, 39, 40, 41, 42, 44, 45, - 46, 47, 48, 50, 51, 52, 53, 54, 56, 57, 58, 59, 60, 62, - 63, 64, 65, 66, 64, 65, 66, 67, 68, 70, 71, 72, 73, 74, - 76, 77, 78, 79, 80, 82, 83, 84, 85, 86, 88, 89, 90, 91, - 92, 94, 95, 96, 97, 98, 100, 101, 102, 103, 104, 106, 107, 108, - 109, 110, 108, 109, 110, 111, 112, 114, 115, 116, 117, 118, 120, 121, - 122, 123, 124, 126, 127, 128, 129, 130}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.add_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 m = rangedV2(1, 1, 1, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - float float_data[] = { - 0, 2, 4, 6, 8, 5, 7, 9, 11, 13, 10, 12, 14, 16, - 18, 15, 17, 19, 21, 23, 20, 22, 24, 26, 28, 25, 27, 29, - 31, 33, 30, 32, 34, 36, 38, 35, 37, 39, 41, 43, 40, 42, - 44, 46, 48, 45, 47, 49, 51, 53, 50, 52, 54, 56, 58, 55, - 57, 59, 61, 63, 60, 62, 64, 66, 68, 65, 67, 69, 71, 73, - 70, 72, 74, 76, 78, 75, 77, 79, 81, 83, 80, 82, 84, 86, - 88, 85, 87, 89, 91, 93, 90, 92, 94, 96, 98, 95, 97, 99, - 101, 103, 100, 102, 104, 106, 108, 105, 107, 109, 111, 113, 110, 112, - 114, 116, 118, 115, 117, 119, 121, 123}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.add_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 m = rangedV2(1, 2, 1, 1, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - float float_data[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, - 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 40, 41, - 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, - 56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, - 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 80, 81, 82, 83, - 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, - 98, 99, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, - 113, 114, 115, 116, 117, 118, 119, 120}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.add_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 m = rangedV2(3, 1, 1, 1, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - float float_data[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, - 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 41, 42, - 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, - 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, - 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 82, 83, 84, 85, - 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, - 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, - 114, 115, 116, 117, 118, 119, 120, 121}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.add_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorV2 t = rangedV2(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 m = rangedV2(1, 1, 1, 1, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - m.add_i(1.0); - float float_data[] = { - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, - 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, - 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, - 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, - 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, - 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, - 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, - 113, 114, 115, 116, 117, 118, 119, 120}; - std::transform(float_data, float_data + N, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.add_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(3, 5, 1, 4, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 t = rangedV2(3, 5, 1, 4, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 m = rangedV2(3, 1, 1, 4, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - float float_data[] = {0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, - 12, 14, 16, 18, 16, 18, 20, 22, 24, 26, 28, 30, - 28, 30, 32, 34, 32, 34, 36, 38, 36, 38, 40, 42, - 40, 42, 44, 46, 48, 50, 52, 54, 52, 54, 56, 58, - 56, 58, 60, 62, 60, 62, 64, 66, 64, 66, 68, 70}; - std::transform(float_data, float_data + 60, answer_data, - static_cast_func<_FP16>()); - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.add_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(1, 1, 2, 1, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 t = rangedV2(1, 1, 2, 1, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 m = rangedV2(1, 1, 2, 1, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - _FP16 answer_data[] = {static_cast<_FP16>(0.0), static_cast<_FP16>(2.0)}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.add_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - { - nntrainer::TensorDim ref_dim(16, 1, 1, 1, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 t = rangedV2(16, 1, 1, 1, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 m = rangedV2(1, 1, 1, 1, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - _FP16 answer_data[] = {static_cast<_FP16>(0.0), static_cast<_FP16>(1.0), - static_cast<_FP16>(2.0), static_cast<_FP16>(3.0), - static_cast<_FP16>(4.0), static_cast<_FP16>(5.0), - static_cast<_FP16>(6.0), static_cast<_FP16>(7.0), - static_cast<_FP16>(8.0), static_cast<_FP16>(9.0), - static_cast<_FP16>(10.0), static_cast<_FP16>(11.0), - static_cast<_FP16>(12.0), static_cast<_FP16>(13.0), - static_cast<_FP16>(14.0), static_cast<_FP16>(15.0)}; - nntrainer::TensorV2 answer(ref_dim, answer_data); - int status = t.add_i(m); - EXPECT_EQ(status, ML_ERROR_NONE); - EXPECT_EQ(t, answer); - } - delete[] answer_data; -} - -TEST(nntrainer_Tensor, add_i_broadcast_not_supported_01_n) { - nntrainer::TensorV2 target(3, 1, 3, 1, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 target2(3, 1, 3, 3, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - - EXPECT_EQ(target.add_i(target2), ML_ERROR_INVALID_PARAMETER); -} - -TEST(nntrainer_Tensor, add_i_broadcast_not_broadcastable_02_n) { - nntrainer::TensorV2 target(3, 2, 4, 5, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 target2(3, 2, 3, 1, nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - - EXPECT_EQ(target.add_i(target2), ML_ERROR_INVALID_PARAMETER); -} - -TEST(nntrainer_Tensor, add_01_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - - nntrainer::TensorV2 result = input.add(1.0); - - _FP16 *data = result.getData<_FP16>(); - ASSERT_NE(nullptr, data); - _FP16 *indata = input.getData<_FP16>(); - ASSERT_NE(nullptr, indata); - - for (int i = 0; i < batch * height * width; ++i) { - if (data[i] != (_FP16)(indata[i] + (_FP16)1.0)) { - status = ML_ERROR_RESULT_OUT_OF_RANGE; - break; - } - } - - EXPECT_EQ(status, ML_ERROR_NONE); -} - -TEST(nntrainer_Tensor, add_02_p) { - int status = ML_ERROR_NONE; - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - - nntrainer::TensorV2 result = input.add(input); - - _FP16 *data = result.getData<_FP16>(); - ASSERT_NE(nullptr, data); - _FP16 *indata = input.getData<_FP16>(); - ASSERT_NE(nullptr, indata); - - for (int i = 0; i < batch * height * width; ++i) { - if (data[i] != indata[i] + indata[i]) { - status = ML_ERROR_RESULT_OUT_OF_RANGE; - break; - } - } - - EXPECT_EQ(status, ML_ERROR_NONE); -} - -TEST(nntrainer_Tensor, add_03_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorV2 input(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - - nntrainer::TensorV2 test(batch - 1, channel, height - 1, width - 1, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - - EXPECT_THROW({ input.add(test); }, std::invalid_argument); -} - -TEST(nntrainer_Tensor, add_04_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - - nntrainer::TensorV2 input(batch, channel, height, 2 * width); - nntrainer::TensorV2 shared_input = - input.getSharedDataTensor(dim, 0, false, ""); - nntrainer::TensorV2 test(dim); - - EXPECT_THROW(shared_input.add(test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, add_05_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - - nntrainer::TensorV2 input(dim); - nntrainer::TensorV2 test(batch, channel, height, 2 * width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - nntrainer::TensorV2 shared_test = test.getSharedDataTensor(dim, 0, false, ""); - - EXPECT_THROW(input.add(shared_test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, add_06_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - - nntrainer::TensorV2 input(dim, false); - nntrainer::TensorV2 test(dim); - GEN_TEST_INPUT(test, i * (batch * height) + j * (width) + k + 1); - - EXPECT_THROW(input.add(test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, add_07_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - - nntrainer::TensorV2 input(dim); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - nntrainer::TensorV2 test(dim, false); - - EXPECT_THROW(input.add(test), std::invalid_argument); -} - -TEST(nntrainer_Tensor, add_08_n) { - int batch = 3; - int channel = 1; - int height = 3; - int width = 10; - - nntrainer::TensorDim dim(batch, channel, height, width, - nntrainer::Tformat::NCHW, - nntrainer::Tdatatype::FP16); - - nntrainer::TensorV2 input(dim); - GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1); - nntrainer::TensorV2 test(dim); - GEN_TEST_INPUT(test, i * (batch * height) + j * (width) + k + 2); - nntrainer::TensorV2 output(dim, false); - - EXPECT_THROW(input.add(test, output), std::invalid_argument); -} - -int main(int argc, char **argv) { - int result = -1; - - try { - testing::InitGoogleTest(&argc, argv); - } catch (...) { - std::cerr << "Error during InitGoogleTest" << std::endl; - return 0; - } - - try { - result = RUN_ALL_TESTS(); - } catch (...) { - std::cerr << "Error during RUN_ALL_TESTS()" << std::endl; - } - - return result; -}