-
Notifications
You must be signed in to change notification settings - Fork 75
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
In deep learning data feeding, there are often tasks that involve reading and processing numpy (.npy) files. Therefore, it would be much more convenient to develop by entering the file name and returning the data in a vector instead of manually writing code to read it every time. Function Signature ``` void read_npy_file(const char *file_path, std::vector<int> &dims, std::vector<float> &values); ``` **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Donghak PARK <[email protected]>
- Loading branch information
1 parent
f0c51be
commit c75d847
Showing
4 changed files
with
235 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
subdir('jni') | ||
subdir('datagen/cifar') | ||
subdir('npy_reader') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
npy_path = meson.current_source_dir() | ||
npy_include_dir = include_directories('.') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
// SPDX-License-Identifier: Apache-2.0 | ||
/** | ||
* Copyright (C) 2024 Donghak Park <[email protected]> | ||
* | ||
* @file npy_reader.cpp | ||
* @date 28 Feb 2024 | ||
* @brief reader for npy file | ||
* @see https://github.com/nnstreamer/nntrainer | ||
* @author Donghak Park <[email protected]> | ||
* @bug No known bugs except for NYI items | ||
*/ | ||
|
||
#include "npy_reader.h" | ||
|
||
#include <cstring> | ||
#include <iostream> | ||
#include <nntrainer_error.h> | ||
#include <nntrainer_log.h> | ||
#include <random> | ||
#include <string> | ||
#include <vector> | ||
|
||
namespace nntrainer::util { | ||
/** | ||
* @brief read numpy file from file_path | ||
* | ||
*/ | ||
void NpyReader::read_npy_file(const char *file_path) { | ||
FILE *file = fopen(file_path, "rb"); | ||
|
||
char magic[7] = {}; | ||
try { | ||
if (!file) { | ||
throw std::runtime_error("Failed to open file"); | ||
} | ||
|
||
int bytes_read = fread(magic, 6, 1, file); | ||
|
||
if (bytes_read != 1 || strcmp(magic, "\x93NUMPY") != 0) { | ||
fclose(file); | ||
throw std::runtime_error("Failed : this file is not a numpy file"); | ||
} | ||
} catch (const std::exception &e) { | ||
std::cerr << "Error: " << e.what() << std::endl; | ||
return; | ||
} | ||
|
||
char major = 0; | ||
if (fread(&major, 1, 1, file) != 1) { | ||
fclose(file); | ||
}; | ||
|
||
char major = 0; | ||
if (fread(&major, 1, 1, file) != 1) { | ||
fclose(file); | ||
throw std::runtime_error("Failed to read major version"); | ||
} | ||
|
||
char minor = 0; | ||
if (fread(&minor, 1, 1, file) != 1) { | ||
fclose(file); | ||
throw std::runtime_error("Failed to read minor version"); | ||
} | ||
|
||
uint16_t header_len; | ||
if (fread(&header_len, 2, 1, file) != 1) { | ||
fclose(file); | ||
throw std::runtime_error("Failed to read header length"); | ||
} | ||
|
||
char *header = (char *)malloc(header_len); | ||
if (header == nullptr) { | ||
fclose(file); | ||
throw std::runtime_error("Failed to allocate memory for header"); | ||
} | ||
|
||
if (fread(header, header_len, 1, file) != 1) { | ||
free(header); | ||
fclose(file); | ||
throw std::runtime_error("Failed to read header"); | ||
} | ||
|
||
char *header_pos = header; | ||
if (*header_pos != '{') { | ||
ml_loge("Filed to read numpy file"); | ||
return; | ||
} | ||
|
||
++header_pos; | ||
char buffer[1024]; | ||
int buffer_pos = 0; | ||
|
||
while (*header_pos != '}') { | ||
if (*header_pos == '\'') { | ||
++header_pos; | ||
while (*header_pos != '\'') { | ||
buffer[buffer_pos++] = *header_pos++; | ||
} | ||
|
||
buffer[buffer_pos++] = '\0'; | ||
|
||
if (strcmp(buffer, "shape") == 0) { | ||
header_pos += 3; | ||
if (*header_pos = !'(') { | ||
ml_loge("File to read numpy file"); | ||
return; | ||
} | ||
++header_pos; | ||
|
||
while (*header_pos != ')') { | ||
buffer_pos = 0; | ||
while (*header_pos != ',' && *header_pos != ')') { | ||
buffer[buffer_pos++] = *header_pos++; | ||
} | ||
|
||
int mul = 1; | ||
int value = 0; | ||
for (int i = buffer_pos - 1; i >= 0; --i) { | ||
value += static_cast<int>(buffer[i] - '0') * mul; | ||
mul *= 10; | ||
} | ||
dims.push_back(value); | ||
|
||
if (*header_pos != ')') { | ||
header_pos += 2; | ||
} | ||
} | ||
|
||
header_pos += 3; | ||
} else { | ||
while (*header_pos != ',') { | ||
++header_pos; | ||
} | ||
header_pos += 2; | ||
buffer_pos = 0; | ||
} | ||
} | ||
} | ||
|
||
free(header); | ||
header = nullptr; | ||
|
||
int total_entries = 1; | ||
for (int i = 0; i < dims.size(); ++i) { | ||
total_entries *= dims[i]; | ||
} | ||
|
||
for (int i = 0; i < total_entries; ++i) { | ||
float value; | ||
if (fread(&value, 4, 1, file) != 1) { | ||
fclose(file); | ||
throw std::runtime_error("Failed to read data"); | ||
} | ||
values.push_back(value); | ||
} | ||
|
||
fclose(file); | ||
} | ||
|
||
} // namespace nntrainer::util |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
// SPDX-License-Identifier: Apache-2.0 | ||
/** | ||
* Copyright (C) 2024 Donghak Park <[email protected]> | ||
* | ||
* @file npy_reader.h | ||
* @date 28 Feb 2024 | ||
* @brief reader for npy file | ||
* @see https://github.com/nnstreamer/nntrainer | ||
* @author Donghak Park <[email protected]> | ||
* @bug No known bugs except for NYI items | ||
*/ | ||
|
||
#ifndef NPY_READER_H_ | ||
#define NPY_READER_H_ | ||
|
||
#include <string> | ||
#include <vector> | ||
|
||
namespace nntrainer::util { | ||
|
||
/** | ||
* @brief NpyReader class for read numpy format file | ||
* | ||
*/ | ||
class NpyReader { | ||
private: | ||
std::string file_path; | ||
std::vector<int> dims; | ||
std::vector<float> values; | ||
|
||
/** | ||
* @brief read numpy file from file_path | ||
*/ | ||
void read_npy_file(const char *file_path); | ||
|
||
public: | ||
/** | ||
* @brief Construct a new Npy Reader object | ||
* | ||
* @param file_path file path for numpy | ||
*/ | ||
NpyReader::NpyReader(const char *file_path) : file_path(file_path) { | ||
read_npy_file(file_path); | ||
} | ||
|
||
/** | ||
* @brief Construct a new Npy Reader object | ||
* | ||
* @param dims The dimension of the file you want to read. | ||
* @param values A vector containing the data you want to use. | ||
*/ | ||
NpyReader::NpyReader(std::vector<int> &dims, std::vector<float> &values) : | ||
file_path(nullptr), dims(dims), values(values) {} | ||
|
||
/** | ||
* @brief Get the dims object | ||
* | ||
* @return const std::vector<int>& dims | ||
*/ | ||
const std::vector<int> &get_dims() const { return dims; } | ||
|
||
/** | ||
* @brief Get the values object | ||
* | ||
* @return const std::vector<float>& values | ||
*/ | ||
const std::vector<float> &get_values() const { return values; }; | ||
}; | ||
|
||
} // namespace nntrainer::util | ||
|
||
#endif // NPY_READER_H_ |