Skip to content

Commit

Permalink
[util] Add numpy file reader
Browse files Browse the repository at this point in the history
In deep learning data feeding, there are often tasks that involve reading and processing numpy (.npy) files.

Therefore, it would be much more convenient to develop by entering the file name and returning the data in a vector instead of manually writing code to read it every time.

Function Signature
```
void read_npy_file(const char *file_path, std::vector<int> &dims,
                   std::vector<float> &values);
```

**Self evaluation:**
1. Build test:	 [X]Passed [ ]Failed [ ]Skipped
2. Run test:	 [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Donghak PARK <[email protected]>
  • Loading branch information
DonghakPark committed Mar 4, 2024
1 parent f0c51be commit c75d847
Show file tree
Hide file tree
Showing 4 changed files with 235 additions and 0 deletions.
1 change: 1 addition & 0 deletions Applications/utils/meson.build
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
subdir('jni')
subdir('datagen/cifar')
subdir('npy_reader')
2 changes: 2 additions & 0 deletions Applications/utils/npy_reader/meson.build
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
npy_path = meson.current_source_dir()
npy_include_dir = include_directories('.')
160 changes: 160 additions & 0 deletions Applications/utils/npy_reader/npy_reader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (C) 2024 Donghak Park <[email protected]>
*
* @file npy_reader.cpp
* @date 28 Feb 2024
* @brief reader for npy file
* @see https://github.com/nnstreamer/nntrainer
* @author Donghak Park <[email protected]>
* @bug No known bugs except for NYI items
*/

#include "npy_reader.h"

#include <cstring>
#include <iostream>
#include <nntrainer_error.h>
#include <nntrainer_log.h>
#include <random>
#include <string>
#include <vector>

namespace nntrainer::util {
/**
* @brief read numpy file from file_path
*
*/
void NpyReader::read_npy_file(const char *file_path) {
FILE *file = fopen(file_path, "rb");

char magic[7] = {};
try {
if (!file) {
throw std::runtime_error("Failed to open file");
}

int bytes_read = fread(magic, 6, 1, file);

if (bytes_read != 1 || strcmp(magic, "\x93NUMPY") != 0) {
fclose(file);
throw std::runtime_error("Failed : this file is not a numpy file");
}
} catch (const std::exception &e) {
std::cerr << "Error: " << e.what() << std::endl;
return;
}

char major = 0;
if (fread(&major, 1, 1, file) != 1) {
fclose(file);
};

char major = 0;
if (fread(&major, 1, 1, file) != 1) {
fclose(file);
throw std::runtime_error("Failed to read major version");
}

char minor = 0;
if (fread(&minor, 1, 1, file) != 1) {
fclose(file);
throw std::runtime_error("Failed to read minor version");
}

uint16_t header_len;
if (fread(&header_len, 2, 1, file) != 1) {
fclose(file);
throw std::runtime_error("Failed to read header length");
}

char *header = (char *)malloc(header_len);
if (header == nullptr) {
fclose(file);
throw std::runtime_error("Failed to allocate memory for header");
}

if (fread(header, header_len, 1, file) != 1) {
free(header);
fclose(file);
throw std::runtime_error("Failed to read header");
}

char *header_pos = header;
if (*header_pos != '{') {
ml_loge("Filed to read numpy file");
return;
}

++header_pos;
char buffer[1024];
int buffer_pos = 0;

while (*header_pos != '}') {
if (*header_pos == '\'') {
++header_pos;
while (*header_pos != '\'') {
buffer[buffer_pos++] = *header_pos++;
}

buffer[buffer_pos++] = '\0';

if (strcmp(buffer, "shape") == 0) {
header_pos += 3;
if (*header_pos = !'(') {
ml_loge("File to read numpy file");
return;
}
++header_pos;

while (*header_pos != ')') {
buffer_pos = 0;
while (*header_pos != ',' && *header_pos != ')') {
buffer[buffer_pos++] = *header_pos++;
}

int mul = 1;
int value = 0;
for (int i = buffer_pos - 1; i >= 0; --i) {
value += static_cast<int>(buffer[i] - '0') * mul;
mul *= 10;
}
dims.push_back(value);

if (*header_pos != ')') {
header_pos += 2;
}
}

header_pos += 3;
} else {
while (*header_pos != ',') {
++header_pos;
}
header_pos += 2;
buffer_pos = 0;
}
}
}

free(header);
header = nullptr;

int total_entries = 1;
for (int i = 0; i < dims.size(); ++i) {
total_entries *= dims[i];
}

for (int i = 0; i < total_entries; ++i) {
float value;
if (fread(&value, 4, 1, file) != 1) {
fclose(file);
throw std::runtime_error("Failed to read data");
}
values.push_back(value);
}

fclose(file);
}

} // namespace nntrainer::util
72 changes: 72 additions & 0 deletions Applications/utils/npy_reader/npy_reader.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (C) 2024 Donghak Park <[email protected]>
*
* @file npy_reader.h
* @date 28 Feb 2024
* @brief reader for npy file
* @see https://github.com/nnstreamer/nntrainer
* @author Donghak Park <[email protected]>
* @bug No known bugs except for NYI items
*/

#ifndef NPY_READER_H_
#define NPY_READER_H_

#include <string>
#include <vector>

namespace nntrainer::util {

/**
* @brief NpyReader class for read numpy format file
*
*/
class NpyReader {
private:
std::string file_path;
std::vector<int> dims;
std::vector<float> values;

/**
* @brief read numpy file from file_path
*/
void read_npy_file(const char *file_path);

public:
/**
* @brief Construct a new Npy Reader object
*
* @param file_path file path for numpy
*/
NpyReader::NpyReader(const char *file_path) : file_path(file_path) {
read_npy_file(file_path);
}

/**
* @brief Construct a new Npy Reader object
*
* @param dims The dimension of the file you want to read.
* @param values A vector containing the data you want to use.
*/
NpyReader::NpyReader(std::vector<int> &dims, std::vector<float> &values) :
file_path(nullptr), dims(dims), values(values) {}

/**
* @brief Get the dims object
*
* @return const std::vector<int>& dims
*/
const std::vector<int> &get_dims() const { return dims; }

/**
* @brief Get the values object
*
* @return const std::vector<float>& values
*/
const std::vector<float> &get_values() const { return values; };
};

} // namespace nntrainer::util

#endif // NPY_READER_H_

0 comments on commit c75d847

Please sign in to comment.