Skip to content

Commit

Permalink
[util] Add numpy file reader
Browse files Browse the repository at this point in the history
In deep learning data feeding, there are often tasks that involve reading and processing numpy (.npy) files.

Therefore, it would be much more convenient to develop by entering the file name and returning the data in a vector instead of manually writing code to read it every time.

Function Signature
```
void read_npy_file(const char *file_path, std::vector<int> &dims,
                   std::vector<float> &values);
```

**Self evaluation:**
1. Build test:	 [X]Passed [ ]Failed [ ]Skipped
2. Run test:	 [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Donghak PARK <[email protected]>
  • Loading branch information
DonghakPark committed Feb 28, 2024
1 parent f0c51be commit 5a4bca3
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 0 deletions.
1 change: 1 addition & 0 deletions Applications/utils/meson.build
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
subdir('jni')
subdir('datagen/cifar')
subdir('npy_reader')
2 changes: 2 additions & 0 deletions Applications/utils/npy_reader/meson.build
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
npy_path = meson.current_source_dir()
npy_include_dir = include_directories('.')
119 changes: 119 additions & 0 deletions Applications/utils/npy_reader/npy_reader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (C) 2024 Donghak Park <[email protected]>
*
* @file npy_reader.cpp
* @date 28 Feb 2024
* @brief reader for npy file
* @see https://github.com/nnstreamer/nntrainer
* @author Donghak Park <[email protected]>
* @bug No known bugs except for NYI items
*/

#include "npy_reader.h"

#include <assert.h>
#include <cstring>
#include <iostream>
#include <random>
#include <string>
#include <vector>

namespace nntrainer::util {
namespace {
/**
* @brief read numpy file from file_path
*
* @param file_path file_path that want to read
* @param dims data shape
* @param values return value that contain npy's contents
*/
void read_npy_file(const char *file_path, std::vector<int> &dims,
std::vector<float> &values) {
FILE *file = fopen(file_path, "rb");

char magic[7] = {};
fread(magic, 6, 1, file);
assert(strcmp(magic, "\x93NUMPY") == 0);

char major = 0;
fread(&major, 1, 1, file);

char minor = 0;
fread(&minor, 1, 1, file);

uint16_t header_len;
fread(&header_len, 2, 1, file);

char *header = (char *)malloc(header_len);
fread(header, header_len, 1, file);

char *header_pos = header;
assert(*header_pos == '{');
++header_pos;
char buffer[1024];
int buffer_pos = 0;

while (*header_pos != '}') {
if (*header_pos == '\'') {
++header_pos;
while (*header_pos != '\'') {
buffer[buffer_pos++] = *header_pos++;
}

buffer[buffer_pos++] = '\0';

if (strcmp(buffer, "shape") == 0) {
header_pos += 3;
assert(*header_pos == '(');
++header_pos;

while (*header_pos != ')') {
buffer_pos = 0;
while (*header_pos != ',' && *header_pos != ')') {
buffer[buffer_pos++] = *header_pos++;
}

int mul = 1;
int value = 0;
for (int i = buffer_pos - 1; i >= 0; --i) {
value += static_cast<int>(buffer[i] - '0') * mul;
mul *= 10;
}
dims.push_back(value);

if (*header_pos != ')') {
header_pos += 2;
}
}

header_pos += 3;
} else {
while (*header_pos != ',') {
++header_pos;
}
header_pos += 2;
buffer_pos = 0;
}
}
}

free(header);
header = nullptr;

int total_entries = 1;
for (int i = 0; i < dims.size(); ++i) {
total_entries *= dims[i];
}

for (int i = 0; i < total_entries; ++i) {
float value;
fread(&value, 4, 1, file);
values.push_back(value);
}

fclose(file);
}
} // namespace

} // namespace nntrainer::util
31 changes: 31 additions & 0 deletions Applications/utils/npy_reader/npy_reader.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (C) 2024 Donghak Park <[email protected]>
*
* @file npy_reader.h
* @date 28 Feb 2024
* @brief reader for npy file
* @see https://github.com/nnstreamer/nntrainer
* @author Donghak Park <[email protected]>
* @bug No known bugs except for NYI items
*/

#ifndef NPY_READER_H_
#define NPY_READER_H_

#include <vector>

namespace nntrainer::util {
/**
* @brief read numpy file from file_path
*
* @param file_path file_path that want to read
* @param dims data shape
* @param values return value that contain npy's contents
*/
void read_npy_file(const char *file_path, std::vector<int> &dims,
std::vector<float> &values);

} // namespace nntrainer::util

#endif // NPY_READER_H_

0 comments on commit 5a4bca3

Please sign in to comment.