From c75d847d92f301bd05d0b49fb65cca2311d883c0 Mon Sep 17 00:00:00 2001 From: Donghak PARK Date: Wed, 28 Feb 2024 12:19:19 +0900 Subject: [PATCH] [util] Add numpy file reader In deep learning data feeding, there are often tasks that involve reading and processing numpy (.npy) files. Therefore, it would be much more convenient to develop by entering the file name and returning the data in a vector instead of manually writing code to read it every time. Function Signature ``` void read_npy_file(const char *file_path, std::vector &dims, std::vector &values); ``` **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Donghak PARK --- Applications/utils/meson.build | 1 + Applications/utils/npy_reader/meson.build | 2 + Applications/utils/npy_reader/npy_reader.cpp | 160 +++++++++++++++++++ Applications/utils/npy_reader/npy_reader.h | 72 +++++++++ 4 files changed, 235 insertions(+) create mode 100644 Applications/utils/npy_reader/meson.build create mode 100644 Applications/utils/npy_reader/npy_reader.cpp create mode 100644 Applications/utils/npy_reader/npy_reader.h diff --git a/Applications/utils/meson.build b/Applications/utils/meson.build index a43d473208..7e4cdfee5d 100644 --- a/Applications/utils/meson.build +++ b/Applications/utils/meson.build @@ -1,2 +1,3 @@ subdir('jni') subdir('datagen/cifar') +subdir('npy_reader') diff --git a/Applications/utils/npy_reader/meson.build b/Applications/utils/npy_reader/meson.build new file mode 100644 index 0000000000..a8c175fa0a --- /dev/null +++ b/Applications/utils/npy_reader/meson.build @@ -0,0 +1,2 @@ +npy_path = meson.current_source_dir() +npy_include_dir = include_directories('.') diff --git a/Applications/utils/npy_reader/npy_reader.cpp b/Applications/utils/npy_reader/npy_reader.cpp new file mode 100644 index 0000000000..84c1f2edd5 --- /dev/null +++ b/Applications/utils/npy_reader/npy_reader.cpp @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * Copyright (C) 2024 Donghak Park + * + * @file npy_reader.cpp + * @date 28 Feb 2024 + * @brief reader for npy file + * @see https://github.com/nnstreamer/nntrainer + * @author Donghak Park + * @bug No known bugs except for NYI items + */ + +#include "npy_reader.h" + +#include +#include +#include +#include +#include +#include +#include + +namespace nntrainer::util { +/** + * @brief read numpy file from file_path + * + */ +void NpyReader::read_npy_file(const char *file_path) { + FILE *file = fopen(file_path, "rb"); + + char magic[7] = {}; + try { + if (!file) { + throw std::runtime_error("Failed to open file"); + } + + int bytes_read = fread(magic, 6, 1, file); + + if (bytes_read != 1 || strcmp(magic, "\x93NUMPY") != 0) { + fclose(file); + throw std::runtime_error("Failed : this file is not a numpy file"); + } + } catch (const std::exception &e) { + std::cerr << "Error: " << e.what() << std::endl; + return; + } + + char major = 0; + if (fread(&major, 1, 1, file) != 1) { + fclose(file); + }; + + char major = 0; + if (fread(&major, 1, 1, file) != 1) { + fclose(file); + throw std::runtime_error("Failed to read major version"); + } + + char minor = 0; + if (fread(&minor, 1, 1, file) != 1) { + fclose(file); + throw std::runtime_error("Failed to read minor version"); + } + + uint16_t header_len; + if (fread(&header_len, 2, 1, file) != 1) { + fclose(file); + throw std::runtime_error("Failed to read header length"); + } + + char *header = (char *)malloc(header_len); + if (header == nullptr) { + fclose(file); + throw std::runtime_error("Failed to allocate memory for header"); + } + + if (fread(header, header_len, 1, file) != 1) { + free(header); + fclose(file); + throw std::runtime_error("Failed to read header"); + } + + char *header_pos = header; + if (*header_pos != '{') { + ml_loge("Filed to read numpy file"); + return; + } + + ++header_pos; + char buffer[1024]; + int buffer_pos = 0; + + while (*header_pos != '}') { + if (*header_pos == '\'') { + ++header_pos; + while (*header_pos != '\'') { + buffer[buffer_pos++] = *header_pos++; + } + + buffer[buffer_pos++] = '\0'; + + if (strcmp(buffer, "shape") == 0) { + header_pos += 3; + if (*header_pos = !'(') { + ml_loge("File to read numpy file"); + return; + } + ++header_pos; + + while (*header_pos != ')') { + buffer_pos = 0; + while (*header_pos != ',' && *header_pos != ')') { + buffer[buffer_pos++] = *header_pos++; + } + + int mul = 1; + int value = 0; + for (int i = buffer_pos - 1; i >= 0; --i) { + value += static_cast(buffer[i] - '0') * mul; + mul *= 10; + } + dims.push_back(value); + + if (*header_pos != ')') { + header_pos += 2; + } + } + + header_pos += 3; + } else { + while (*header_pos != ',') { + ++header_pos; + } + header_pos += 2; + buffer_pos = 0; + } + } + } + + free(header); + header = nullptr; + + int total_entries = 1; + for (int i = 0; i < dims.size(); ++i) { + total_entries *= dims[i]; + } + + for (int i = 0; i < total_entries; ++i) { + float value; + if (fread(&value, 4, 1, file) != 1) { + fclose(file); + throw std::runtime_error("Failed to read data"); + } + values.push_back(value); + } + + fclose(file); +} + +} // namespace nntrainer::util diff --git a/Applications/utils/npy_reader/npy_reader.h b/Applications/utils/npy_reader/npy_reader.h new file mode 100644 index 0000000000..f9003a885f --- /dev/null +++ b/Applications/utils/npy_reader/npy_reader.h @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * Copyright (C) 2024 Donghak Park + * + * @file npy_reader.h + * @date 28 Feb 2024 + * @brief reader for npy file + * @see https://github.com/nnstreamer/nntrainer + * @author Donghak Park + * @bug No known bugs except for NYI items + */ + +#ifndef NPY_READER_H_ +#define NPY_READER_H_ + +#include +#include + +namespace nntrainer::util { + +/** + * @brief NpyReader class for read numpy format file + * + */ +class NpyReader { +private: + std::string file_path; + std::vector dims; + std::vector values; + + /** + * @brief read numpy file from file_path + */ + void read_npy_file(const char *file_path); + +public: + /** + * @brief Construct a new Npy Reader object + * + * @param file_path file path for numpy + */ + NpyReader::NpyReader(const char *file_path) : file_path(file_path) { + read_npy_file(file_path); + } + + /** + * @brief Construct a new Npy Reader object + * + * @param dims The dimension of the file you want to read. + * @param values A vector containing the data you want to use. + */ + NpyReader::NpyReader(std::vector &dims, std::vector &values) : + file_path(nullptr), dims(dims), values(values) {} + + /** + * @brief Get the dims object + * + * @return const std::vector& dims + */ + const std::vector &get_dims() const { return dims; } + + /** + * @brief Get the values object + * + * @return const std::vector& values + */ + const std::vector &get_values() const { return values; }; +}; + +} // namespace nntrainer::util + +#endif // NPY_READER_H_