diff --git a/meson_options.txt b/meson_options.txt index e904d9de3..968dc214f 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -45,6 +45,7 @@ option('enable-avx', type: 'boolean', value: true) option('enable-opencl', type: 'boolean', value: false) option('enable-biqgemm', type: 'boolean', value: false) option('enable-benchmarks', type: 'boolean', value : false) +option('enable-qnn', type: 'boolean', value: true) # ml-api dependency (to enable, install capi-inference from github.com/nnstreamer/api ) # To inter-operate with nnstreamer and ML-API packages, you need to enable this. @@ -57,3 +58,4 @@ option('nnstreamer-subplugin-install-path', type: 'string', value: '/usr/lib/nns # application related options option('enable_encoder', type: 'boolean', value: false) + diff --git a/nntrainer/meson.build b/nntrainer/meson.build index ed15b8f2a..383e144be 100644 --- a/nntrainer/meson.build +++ b/nntrainer/meson.build @@ -50,6 +50,11 @@ if get_option('enable-opencl') nntrainer_elements += 'layers/cl_layers' endif +if get_option('enable-qnn') + message ('QNN build is enabled. Will work only if Qualcomm NPU is available.') + nntrainer_elements += 'npu' +endif + foreach elem : nntrainer_elements subdir(elem) nntrainer_inc += include_directories(elem) diff --git a/nntrainer/npu/meson.build b/nntrainer/npu/meson.build new file mode 100644 index 000000000..0773800f4 --- /dev/null +++ b/nntrainer/npu/meson.build @@ -0,0 +1 @@ +subdir('qnn') diff --git a/nntrainer/npu/qnn/PAL/include/PAL/Debug.hpp b/nntrainer/npu/qnn/PAL/include/PAL/Debug.hpp new file mode 100644 index 000000000..d03331c26 --- /dev/null +++ b/nntrainer/npu/qnn/PAL/include/PAL/Debug.hpp @@ -0,0 +1,21 @@ +//============================================================================ +// +// Copyright (c) 2020-2022 Qualcomm Technologies, Inc. +// All Rights Reserved. +// Confidential and Proprietary - Qualcomm Technologies, Inc. +// +//============================================================================ + +#pragma once + +#define DEBUG_ON 0 + +#if DEBUG_ON +#define DEBUG_MSG(...) \ + { \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, "\n"); \ + } +#else +#define DEBUG_MSG(...) +#endif diff --git a/nntrainer/npu/qnn/PAL/include/PAL/Directory.hpp b/nntrainer/npu/qnn/PAL/include/PAL/Directory.hpp new file mode 100644 index 000000000..6ebef2288 --- /dev/null +++ b/nntrainer/npu/qnn/PAL/include/PAL/Directory.hpp @@ -0,0 +1,81 @@ +//============================================================================== +// +// Copyright (c) 2008-2014, 2020-2022 Qualcomm Technologies, Inc. +// All Rights Reserved. +// Confidential and Proprietary - Qualcomm Technologies, Inc. +// +//============================================================================== + +//--------------------------------------------------------------------------- +/// @file +/// This file includes APIs for directory operations on supported platforms +//--------------------------------------------------------------------------- + +#pragma once + +#include + +#include "PAL/FileOp.hpp" + +namespace pal { +class Directory; +} + +class pal::Directory { +public: + using DirMode = pal::FileOp::FileMode; + //--------------------------------------------------------------------------- + /// @brief + /// Creates a directory in the file system. + /// @param path + /// Name of directory to create. + /// @param dirmode + /// Directory mode + /// @return + /// True if + /// 1. create a directory successfully + /// 2. or directory exist already + /// False otherwise + /// + /// For example: + /// + /// - Create a directory in default. + /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + /// pal::Directory::Create(path, pal::Directory::DirMode::S_DEFAULT_); + /// pal::Directory::Create(path); + /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + /// + /// - Create a directory with specific permission. + /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + /// pal::Directory::Create(path, pal::Directory::DirMode::S_IRWXU_| + /// pal::Directory::DirMode::S_IRWXG_| + /// pal::Directory::DirMode::S_IRWXO_); + /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + /// + /// @note For windows, dirmode is not used. + /// @note For linux, dirmode is used to set the permission of the folder. + //--------------------------------------------------------------------------- + static bool + create(const std::string &path, + pal::Directory::DirMode dirmode = pal::Directory::DirMode::S_DEFAULT_); + + //--------------------------------------------------------------------------- + /// @brief + /// Removes the entire directory whether it's empty or not. + /// @param path + /// Name of directory to delete. + /// @return + /// True if the directory was successfully deleted, false otherwise. + //--------------------------------------------------------------------------- + static bool remove(const std::string &path); + + //--------------------------------------------------------------------------- + /// @brief + /// Creates a directory and all parent directories required. + /// @param path + /// Path of directory to create. + /// @return + /// True if the directory was successfully created, false otherwise. + //--------------------------------------------------------------------------- + static bool makePath(const std::string &path); +}; diff --git a/nntrainer/npu/qnn/PAL/include/PAL/DynamicLoading.hpp b/nntrainer/npu/qnn/PAL/include/PAL/DynamicLoading.hpp new file mode 100644 index 000000000..1d1a13393 --- /dev/null +++ b/nntrainer/npu/qnn/PAL/include/PAL/DynamicLoading.hpp @@ -0,0 +1,101 @@ +//============================================================================== +// +// Copyright (c) 2020-2022 Qualcomm Technologies, Inc. +// All Rights Reserved. +// Confidential and Proprietary - Qualcomm Technologies, Inc. +// +//============================================================================== + +//--------------------------------------------------------------------------- +/// @file +/// This file includes APIs for dynamic loading on supported platforms +//--------------------------------------------------------------------------- + +#pragma once + +#include + +namespace pal { +namespace dynamicloading { +// we only support subset of POSIX of dlopen/dlsym/dladdr/dlerror/dlclose +// except the following flags for dlopen, others should be done only +// when we really need them +// DL_NOW is MUST +// DL_LOCAL is enabled if not specified +enum { + DL_NOW = 0x0001, + DL_LOCAL = 0x0002, + DL_GLOBAL = 0x0004, +}; + +// specify this address to distingiush from NULL pointer +#define DL_DEFAULT (void *)(0x4) + +//--------------------------------------------------------------------------- +/// @brief +/// Loads the dynamic shared object +/// @param filename +/// If contains path separators, treat it as relative or absolute pathname +/// or search it for the rule of dynamic linker +/// @param flags +/// - DL_NOW: resolve undefined symbols before return. MUST be specified. +/// - DL_LOCAL: optional, but the default specified. Symbols defined in this +/// shared object are not made available to resolve references in +/// subsequently loaded shared objects +/// - DL_GLOBAL: optional, resolve symbol globally +/// @return +/// On success, a non-NULL handle for the loaded library. +/// On error, NULL +//--------------------------------------------------------------------------- +void *dlOpen(const char *filename, int flags); + +//--------------------------------------------------------------------------- +/// @brief +/// Obtain address of a symbol in a shared object or executable +/// @param handle +/// A handle of a dynamic loaded shared object returned by dlopen +/// @param symbol +/// A null-terminated symbol name +/// @return +/// On success, return the address associated with symbol +/// On error, NULL +//--------------------------------------------------------------------------- +void *dlSym(void *handle, const char *symbol); + +//--------------------------------------------------------------------------- +/// @brief +/// Translate the address of a symbol to the path of the belonging shared +/// object +/// @param addr +/// Address of symbol in a shared object +/// @param path +/// Full name of shared object that contains address, usually it is an +/// absolute path +/// @return +/// On success, return a non-zero value +/// On error, return 0 +//--------------------------------------------------------------------------- +int dlAddrToLibName(void *addr, std::string &name); + +//--------------------------------------------------------------------------- +/// @brief +/// Decrements the reference count on the dynamically loaded shared object +/// referred to by handle. If the reference count drops to 0, then the +/// object is unloaded. +/// @return +/// On success, 0; on error, a nonzero value +//--------------------------------------------------------------------------- +int dlClose(void *handle); + +//--------------------------------------------------------------------------- +/// @brief +/// Obtain error diagnostic for functions in the dl-family APIs. +/// @return +/// Returns a human-readable, null-terminated string describing the most +/// recent error that occurred from a call to one of the functions in the +/// dl-family APIs. +//--------------------------------------------------------------------------- +char *dlError(void); + +} // namespace dynamicloading +} // namespace pal diff --git a/nntrainer/npu/qnn/PAL/include/PAL/FileOp.hpp b/nntrainer/npu/qnn/PAL/include/PAL/FileOp.hpp new file mode 100644 index 000000000..1b25a1d57 --- /dev/null +++ b/nntrainer/npu/qnn/PAL/include/PAL/FileOp.hpp @@ -0,0 +1,246 @@ +//============================================================================== +// +// Copyright (c) 2008-2022 Qualcomm Technologies, Inc. +// All Rights Reserved. +// Confidential and Proprietary - Qualcomm Technologies, Inc. +// +//============================================================================== + +//------------------------------------------------------------------------------ +/// @file +/// This file includes APIs for file operations on the supported platforms +//------------------------------------------------------------------------------ + +#pragma once + +#include + +#include +#include + +namespace pal { +class FileOp; +} + +//------------------------------------------------------------------------------ +/// @brief +/// FileOp contains OS Specific file system functionality. +//------------------------------------------------------------------------------ +class pal::FileOp { +public: + // enum for symbolic constants mode, strictly follow linux usage + // windows or another OS user should transfer the usage + // ref : http://man7.org/linux/man-pages/man2/open.2.html + enum class FileMode : uint32_t { + S_DEFAULT_ = 0777, + S_IRWXU_ = 0700, + S_IRUSR_ = 0400, + S_IWUSR_ = 0200, + S_IXUSR_ = 0100, + S_IRWXG_ = 0070, + S_IRGRP_ = 0040, + S_IWGRP_ = 0020, + S_IXGRP_ = 0010, + S_IRWXO_ = 0007, + S_IROTH_ = 0004, + S_IWOTH_ = 0002, + S_IXOTH_ = 0001 + }; + + //--------------------------------------------------------------------------- + /// @brief + /// Copies a file from one location to another, overwrites if the + /// destination already exists. + /// @param source + /// File name of the source file. + /// @param target + /// File name of the target file. + /// @return + /// True on success, otherwise false. + //--------------------------------------------------------------------------- + static bool copyOverFile(const std::string &source, + const std::string &target); + + //--------------------------------------------------------------------------- + /// @brief + /// Checks whether the file exists or not. + /// @param fileName + /// File name of the source file, including its complete path. + /// @return + /// True on success, otherwise false. + //--------------------------------------------------------------------------- + static bool checkFileExists(const std::string &fileName); + + //--------------------------------------------------------------------------- + /// @brief + /// Renames an existing file. If the file with target name exists, this call + /// overwrites it with the file with source name. + /// @param source + /// Current File name. + /// @param target + /// New name of the file. + /// @param overwrite + /// Flag indicating to overwrite existing file with newName + /// @return + /// True if successful, otherwise false. + /// @warning + /// Does not work if source and target are on different filesystems. + //--------------------------------------------------------------------------- + static bool move(const std::string &source, const std::string &target, + bool overwrite); + + //--------------------------------------------------------------------------- + /// @brief + /// Delete an existing file + /// @param fileName + /// File name of the file to be deleted. + /// @return + /// True if successful, otherwise false. + //--------------------------------------------------------------------------- + static bool deleteFile(const std::string &fileName); + + //--------------------------------------------------------------------------- + /// @brief + /// Check if path is a directory or not + /// @param path + /// Path to check + /// @return + /// True if successful, otherwise false. + //--------------------------------------------------------------------------- + static bool checkIsDir(const std::string &path); + + //--------------------------------------------------------------------------- + /// @brief Data type representing parts of a filename + //--------------------------------------------------------------------------- + typedef struct { + //--------------------------------------------------------------------------- + /// @brief Name of the file without the extension (i.e., basename) + //--------------------------------------------------------------------------- + std::string basename; + + //--------------------------------------------------------------------------- + /// @brief Name of the file extension (i.e., .txt or .hlnd, .html) + //--------------------------------------------------------------------------- + std::string extension; + + //--------------------------------------------------------------------------- + /// @brief + /// Location of the file (i.e., /abc/xyz/foo.bar <-- /abc/xyz/). + /// If the file name has no location then the Directory points to + /// empty string + //--------------------------------------------------------------------------- + std::string directory; + } FilenamePartsType_t; + + //--------------------------------------------------------------------------- + /// @brief + /// Determines the components of a given filename, being the directory, + /// basename and extension. If the file has no location or extension, these + /// components remain empty + /// @param filename + /// Path of the file for which the components are to be determined + /// @param filenameParts + /// Will contain the file name components when this function returns + /// @return + /// True if successful, false otherwise + //--------------------------------------------------------------------------- + static bool getFileInfo(const std::string &filename, + FilenamePartsType_t &filenameParts); + + //--------------------------------------------------------------------------- + /// @brief + /// Typedef for a vector of FilenamePartsType_t + //--------------------------------------------------------------------------- + typedef std::vector FilenamePartsListType_t; + + //--------------------------------------------------------------------------- + /// @brief + /// Typedef for a vector of FilenamePartsType_t const iterator + //--------------------------------------------------------------------------- + typedef std::vector::const_iterator + FilenamePartsListTypeIter_t; + + //--------------------------------------------------------------------------- + /// @brief + /// Returns a vector of FilenamePartsType_t objects for a given directory + /// @param path + /// Path to scan for files + /// @return + /// True if successful, false otherwise + //--------------------------------------------------------------------------- + static bool getFileInfoList(const std::string &path, + FilenamePartsListType_t &filenamePartsList); + + //--------------------------------------------------------------------------- + /// @brief + /// Returns a vector of FilenamePartsType_t objects for a given directory + /// and the child directories inside. + /// @param path + /// Path to directory to scan for files for + /// @note if path is not a directory - the function will return false + /// @param filenamePartList + /// List to append to + /// @param ignoreDirs + /// If this flag is set to true, directories (and symbolic links to + /// directories) are not included in the list. Only actual files below the + /// specified directory path will be appended. + /// @return True if successful, false otherwise + /// @note Directories in list only populate Directory member variable of the + /// struct. + /// That is Basename and Extension will be empty strings. + /// @note Symbolic links to directories are not followed. This is to avoid + /// possible + /// infinite recursion. However the initial call to this method can have + /// path to be a symbolic link to a directory. If ignoreDirs is true, + /// symbolic links to directories are also ignored. + /// @note The order in which the files/directories are listed is platform + /// dependent. However files inside a directory always come before the + /// directory itself. + //--------------------------------------------------------------------------- + static bool + getFileInfoListRecursive(const std::string &path, + FilenamePartsListType_t &filenamePartsList, + const bool ignoreDirs); + + //--------------------------------------------------------------------------- + /// @brief + /// Create an absolute path from the supplied path + /// @param path + /// Path should not contain trailing '/' or '\\' + /// @return + /// Return absolute path without trailing '/' or '\\' + //--------------------------------------------------------------------------- + static std::string getAbsolutePath(const std::string &path); + + //--------------------------------------------------------------------------- + /// @brief Get the file name from a path + //--------------------------------------------------------------------------- + static std::string getFileName(const std::string &file); + + //--------------------------------------------------------------------------- + /// @brief Get the directory path to a file + //--------------------------------------------------------------------------- + static std::string getDirectory(const std::string &file); + + //--------------------------------------------------------------------------- + /// @brief Get the current working directory. + /// @returns The absolute CWD or empty string if the path could not be + /// retrieved (because it was too long or deleted for example). + //--------------------------------------------------------------------------- + static std::string getCurrentWorkingDirectory(); + + //--------------------------------------------------------------------------- + /// @brief Set the current working directory + //--------------------------------------------------------------------------- + static bool setCurrentWorkingDirectory(const std::string &workingDir); + + //--------------------------------------------------------------------------- + /// @brief Returns true if the file contains any extension or false. + //--------------------------------------------------------------------------- + static bool hasFileExtension(const std::string &file); + + //--------------------------------------------------------------------------- + /// @brief Returns full path of file, Directory/Basename(.Extension, if any) + //--------------------------------------------------------------------------- + static std::string partsToString(const FilenamePartsType_t &filenameParts); +}; diff --git a/nntrainer/npu/qnn/PAL/include/PAL/GetOpt.hpp b/nntrainer/npu/qnn/PAL/include/PAL/GetOpt.hpp new file mode 100644 index 000000000..c54ac1966 --- /dev/null +++ b/nntrainer/npu/qnn/PAL/include/PAL/GetOpt.hpp @@ -0,0 +1,92 @@ +//============================================================================== +// +// Copyright (c) 2020-2022 Qualcomm Technologies, Inc. +// All Rights Reserved. +// Confidential and Proprietary - Qualcomm Technologies, Inc. +// +//============================================================================== + +//-------------------------------------------------------------------------------- +/// @file +/// This file includes APIs for the command line parsing on supported +/// platforms +//-------------------------------------------------------------------------------- + +#pragma once + +namespace pal { +// we implement a similar API for POSIX.2 +// so that some global var are necessary + +extern const char *g_optArg; +extern int g_optInd; + +enum { + no_argument = 0, + required_argument = 1, + optional_argument = 2, +}; + +//-------------------------------------------------------------------------------------------------- +/// @brief +/// This structure describes a single long option name for the sake of +/// getopt_long. The argument longopts must be an array of these structures, +/// one for each long option. Terminate the array with an element containing +/// all zeros. +//-------------------------------------------------------------------------------------------------- +struct Option { + //-------------------------------------------------------------------------------------------------- + /// @brief The name of the long option. + //-------------------------------------------------------------------------------------------------- + const char *name; + + //-------------------------------------------------------------------------------------------------- + /// @brief + /// If the option does not take an argument, no_argument (or 0). + /// If the option requires an argument, required_argument (or 1). + //-------------------------------------------------------------------------------------------------- + int hasArg; + + //-------------------------------------------------------------------------------------------------- + /// @brief + /// Specifies how results are returned for a long option. + /// If flag is NULL, then GetOptLongOnly() returns val. Otherwise, it + /// returns 0, and flag points to a variable which is set to val if the + /// option is found, but left unchanged if the option is not found. + //-------------------------------------------------------------------------------------------------- + int *flag; + + //-------------------------------------------------------------------------------------------------- + /// @brief + /// The value to return, or to load into the variable pointed to by flag. + /// The last element of the array has to be filled with zeros. + //-------------------------------------------------------------------------------------------------- + int val; +}; + +//-------------------------------------------------------------------------------------------------- +/// @brief +/// This parses command-line options as POSIX getopt_long_only() +/// but we don't support optstring and optonal_argument now +/// @param argc +/// Argument count +/// @param argv +/// Argument array +/// @param optstring +/// Legitimate option characters, short options, don't support now +/// @param longopts +/// A pointer to the first element of an array of struct option, +/// has_arg field in the struct option indicates 3 possibilities, +/// no_argument, required_argument or optional_argument. we don't +/// support optional_argument now +/// @param longindex +/// If longindex is not NULL, it points to a variable which is set +/// to the index of the long option relative to longopts +/// @return +/// -1 for parsing done, '?' for non-recognized arguments, 0 for +/// flag in longopts is not NULL and saved the val to it +//-------------------------------------------------------------------------------------------------- +int getOptLongOnly(int argc, const char *const argv[], const char *optstring, + const struct Option *longopts, int *longindex); + +} // namespace pal diff --git a/nntrainer/npu/qnn/PAL/include/PAL/Path.hpp b/nntrainer/npu/qnn/PAL/include/PAL/Path.hpp new file mode 100644 index 000000000..374aead6a --- /dev/null +++ b/nntrainer/npu/qnn/PAL/include/PAL/Path.hpp @@ -0,0 +1,50 @@ +//============================================================================== +// +// Copyright (c) 2008-2014, 2020-2022 Qualcomm Technologies, Inc. +// All Rights Reserved. +// Confidential and Proprietary - Qualcomm Technologies, Inc. +//============================================================================== + +//------------------------------------------------------------------------------ +/// @file +/// The file includes APIs for path related operations on supported platforms +//------------------------------------------------------------------------------ + +#pragma once + +#include +#include + +namespace pal { +class Path; +} + +class pal::Path { +public: + //--------------------------------------------------------------------------- + /// @brief Returns path separator for the system + //--------------------------------------------------------------------------- + static char getSeparator(); + + //--------------------------------------------------------------------------- + /// @brief Concatenate s1 and s2 + //--------------------------------------------------------------------------- + static std::string combine(const std::string &s1, const std::string &s2); + + //--------------------------------------------------------------------------- + /// @brief Get the directory name + //--------------------------------------------------------------------------- + static std::string getDirectoryName(const std::string &path); + + //--------------------------------------------------------------------------- + /// @brief Get absolute path + //--------------------------------------------------------------------------- + static std::string getAbsolute(const std::string &path); + + //--------------------------------------------------------------------------- + /// @brief Check if the input path is absolute path + //--------------------------------------------------------------------------- + static bool isAbsolute(const std::string &path); + +private: +}; diff --git a/nntrainer/npu/qnn/PAL/include/PAL/StringOp.hpp b/nntrainer/npu/qnn/PAL/include/PAL/StringOp.hpp new file mode 100644 index 000000000..f3da64319 --- /dev/null +++ b/nntrainer/npu/qnn/PAL/include/PAL/StringOp.hpp @@ -0,0 +1,61 @@ +//============================================================================== +// +// Copyright (c) 2018-2022 Qualcomm Technologies, Inc. +// All Rights Reserved. +// Confidential and Proprietary - Qualcomm Technologies, Inc. +// +//============================================================================== + +//----------------------------------------------------------------------------- +/// @file +/// The file inludes APIs for string operations on supported platforms +//----------------------------------------------------------------------------- + +#pragma once + +#include + +namespace pal { +class StringOp; +} + +//------------------------------------------------------------------------------ +/// @brief +/// FileOp contains OS Specific file system functionality. +//------------------------------------------------------------------------------ +class pal::StringOp { +public: + //--------------------------------------------------------------------------- + /// @brief + /// Copy copy_size bytes from buffer src to buffer dst. Behaviour of the + /// function is undefined if src and dst overlap. + /// @param dst + /// Destination buffer + /// @param dst_size + /// Size of destination buffer + /// @param src + /// Source buffer + /// @param copy_size + /// Number of bytes to copy + /// @return + /// Number of bytes copied + //--------------------------------------------------------------------------- + static size_t memscpy(void *dst, size_t dstSize, const void *src, + size_t copySize); + + //--------------------------------------------------------------------------- + /// @brief + /// Returns a pointer to a null-terminated byte string, which contains + /// copies of at most size bytes from the string pointed to by str. If the + /// null terminator is not encountered in the first size bytes, it is added + /// to the duplicated string. + /// @param source + /// Source string + /// @param maxlen + /// Max number of bytes to copy from str + /// @return + /// A pointer to the newly allocated string, or a null pointer if an error + /// occurred. + //--------------------------------------------------------------------------- + static char *strndup(const char *source, size_t maxlen); +}; diff --git a/nntrainer/npu/qnn/PAL/src/common/GetOpt.cpp b/nntrainer/npu/qnn/PAL/src/common/GetOpt.cpp new file mode 100644 index 000000000..cb3f0176d --- /dev/null +++ b/nntrainer/npu/qnn/PAL/src/common/GetOpt.cpp @@ -0,0 +1,150 @@ +//============================================================================= +// +// Copyright (c) 2020-2022 Qualcomm Technologies, Inc. +// All Rights Reserved. +// Confidential and Proprietary - Qualcomm Technologies, Inc. +// +//============================================================================= + +#include + +#include + +#include "PAL/GetOpt.hpp" + +using namespace std; + +namespace pal { + +const char *g_optArg = nullptr; +int g_optInd = 1; + +static const struct Option * +findOpt(const string str, const struct Option *longopts, int *longindex) { + const struct Option *opt = nullptr; + int idx = 0; + size_t searchEnd = str.find_first_of("="); + + for (opt = longopts; opt->name && strlen(opt->name) > 0; opt++, idx++) { + if (str.substr(0, searchEnd) == opt->name) { + if (longindex) { + *longindex = idx; + } + break; + } + } + // if not found, opt would point to the last element of longopts + // whose name MUST be empty + return opt->name ? opt : nullptr; +} + +int getOptLongOnly(int argc, const char *const argv[], const char *, + const struct Option *longopts, int *longindex) { + const struct Option *opt; + int argLen = 0; + bool isShort = false; + const char *arg = ""; + + g_optArg = nullptr; + // no arg, means the end of command + if (g_optInd >= argc) { + return -1; + } + + arg = argv[g_optInd]; + + if (arg[0] != '-') { + g_optInd += 1; + return '?'; + } + + argLen = strlen(arg); + + if (argLen < 2) { + g_optInd += 1; + return '?'; + } + + if (!longopts) { + g_optInd += 1; + return '?'; + } + + // check short options with this form, -a arg + if (argLen == 2) { + isShort = true; + // check short options with this form, -a=arg + } else if (argLen > 3 && arg[2] == '=') { + isShort = true; + // check for long options, can be used for both forms + } else if (argLen > 2 && arg[1] != '=') { + if (arg[1] != '-') { + g_optInd += 1; + return '?'; + } + isShort = false; + } + + // start after -- to find the option + const char *const optStr = isShort ? &arg[1] : &arg[2]; + opt = findOpt(optStr, longopts, longindex); + if (!opt) { + g_optInd += 1; + return '?'; + } + + if (opt->hasArg == no_argument) { + g_optInd += 1; + + if (!opt->flag) { + return opt->val; + } else { + *(opt->flag) = opt->val; + return 0; + } + } + + if (opt->hasArg == required_argument) { + string optStr = argv[g_optInd]; + size_t assignIdx = optStr.find_first_of("="); + bool advance = (assignIdx == string::npos); + + // if it is --opt arg form, this will be true, + // so we need to advance one step to get arg + // otherwise, need to stop advance step & extract arg from argv[g_optInd] + if (advance) { + g_optInd += 1; + } + + if (g_optInd >= argc) { + return '?'; + } else { + // if advance, means it is the form --opt arg + // otherwise, the form, --opt=arg + if (advance) { + // since g_optInd is advanced, g_optArg can be assigned directly + g_optArg = argv[g_optInd]; + } else { + if (assignIdx == optStr.size()) { + return '?'; + } + // for not advanced form, + // g_optArg should point to the address right after "=" + g_optArg = &argv[g_optInd][assignIdx + 1]; + } + // OK, now we are ready to handle the next pair + g_optInd += 1; + + if (!opt->flag) { + return opt->val; + } else { + *(opt->flag) = opt->val; + return 0; + } + } + } + + return '?'; +} // end of getOptLongOnly + +} // namespace pal diff --git a/nntrainer/npu/qnn/PAL/src/common/StringOp.cpp b/nntrainer/npu/qnn/PAL/src/common/StringOp.cpp new file mode 100644 index 000000000..eb917681b --- /dev/null +++ b/nntrainer/npu/qnn/PAL/src/common/StringOp.cpp @@ -0,0 +1,48 @@ +//============================================================================== +// +// Copyright (c) 2018-2022 Qualcomm Technologies, Inc. +// All Rights Reserved. +// Confidential and Proprietary - Qualcomm Technologies, Inc. +// +//============================================================================== + +#include +#include + +#include "PAL/StringOp.hpp" + +//--------------------------------------------------------------------------- +// pal::StringOp::memscpy +//--------------------------------------------------------------------------- +size_t pal::StringOp::memscpy(void *dst, size_t dstSize, const void *src, + size_t copySize) { + if (!dst || !src || !dstSize || !copySize) + return 0; + + size_t minSize = dstSize < copySize ? dstSize : copySize; + + memcpy(dst, src, minSize); + + return minSize; +} + +//--------------------------------------------------------------------------- +// pal::StringOp::strndup +//--------------------------------------------------------------------------- +char *pal::StringOp::strndup(const char *source, size_t maxlen) { +#ifdef _WIN32 + size_t length = ::strnlen(source, maxlen); + + char *destination = (char *)malloc((length + 1) * sizeof(char)); + if (destination == nullptr) + return nullptr; + + // copy length bytes to destination and leave destination[length] to be + // null terminator + strncpy_s(destination, length + 1, source, length); + + return destination; +#else + return ::strndup(source, maxlen); +#endif +} diff --git a/nntrainer/npu/qnn/PAL/src/linux/Directory.cpp b/nntrainer/npu/qnn/PAL/src/linux/Directory.cpp new file mode 100644 index 000000000..5819e6ec1 --- /dev/null +++ b/nntrainer/npu/qnn/PAL/src/linux/Directory.cpp @@ -0,0 +1,155 @@ +//============================================================================== +// +// Copyright (c) 2008-2022 Qualcomm Technologies, Inc. +// All Rights Reserved. +// Confidential and Proprietary - Qualcomm Technologies, Inc. +// +//============================================================================== + +#include +#include +#include +#ifndef __QNXNTO__ +#include +#endif +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "PAL/Directory.hpp" +#include "PAL/FileOp.hpp" +#include "PAL/Path.hpp" + +//------------------------------------------------------------------------------ +//------------------------------------------------------------------------------ +#ifdef __QNXNTO__ +static bool is_qnx_dir(const struct dirent *ep) { + struct dirent_extra *exp; + bool is_dir = false; + + for (exp = _DEXTRA_FIRST(ep); _DEXTRA_VALID(exp, ep); + exp = _DEXTRA_NEXT(exp)) { + if (exp->d_type == _DTYPE_STAT || exp->d_type == _DTYPE_LSTAT) { + struct stat *statbuff = &((dirent_extra_stat *)exp)->d_stat; + if (statbuff && S_ISDIR(statbuff->st_mode)) { + is_dir = true; + break; + } + } + } + return is_dir; +} +#endif + +// ------------------------------------------------------------------------------ +// pal::Directory::create +// ------------------------------------------------------------------------------ +bool pal::Directory::create(const std::string &path, + pal::Directory::DirMode dirmode) { + struct stat st; + int status = 0; + if (stat(path.c_str(), &st) != 0) { + // Directory does not exist + status = mkdir(path.c_str(), static_cast(dirmode)); + } else if (!S_ISDIR(st.st_mode)) { + errno = ENOTDIR; + status = -1; + } + return (status == 0); +} + +//------------------------------------------------------------------------------ +//------------------------------------------------------------------------------ +bool pal::Directory::remove(const std::string &dirName) { + DIR *dir; + struct dirent *entry; + + dir = opendir(dirName.c_str()); + if (dir == nullptr) { + // If the directory doesn't exist then just return true. + if (errno == ENOENT) { + return true; + } + return false; + } + +#ifdef __QNXNTO__ + if (dircntl(dir, D_SETFLAG, D_FLAG_STAT) == -1) { + return false; + } +#endif + + // Recursively traverse the directory tree. + while ((entry = readdir(dir)) != nullptr) { + if (strcmp(entry->d_name, ".") && strcmp(entry->d_name, "..")) { + std::stringstream ss; + ss << dirName << Path::getSeparator() << entry->d_name; + std::string path = ss.str(); +#ifdef __QNXNTO__ + if (is_qnx_dir(entry)) +#else + if (entry->d_type == DT_DIR) +#endif + { + // It's a directory so we need to drill down into it and delete + // its contents. + if (!remove(path)) { + return false; + } + } else { + if (::remove(path.c_str())) { + return false; + } + } + } + } + + closedir(dir); + + if (::remove(dirName.c_str())) { + return false; + } + + return true; +} + +bool pal::Directory::makePath(const std::string &path) { + struct stat st; + bool rc = false; + + if (path == ".") { + rc = true; + } else if (stat(path.c_str(), &st) == 0) { + if (st.st_mode & S_IFDIR) { + rc = true; + } + } else { + size_t offset = path.find_last_of(Path::getSeparator()); + if (offset != std::string::npos) { + std::string newPath = path.substr(0, offset); + if (!makePath(newPath)) { + return false; + } + } + + // There is a possible race condition, where a file/directory can be + // created in between the stat() above, and the mkdir() call here. + // So, ignore the return code from the mkdir() call, and then re-check + // for existence of the directory after it. Ensure both that it exists + // and that it is a directory - just like above. + mkdir(path.c_str(), 0777); + + if ((stat(path.c_str(), &st) == 0) && (st.st_mode & S_IFDIR)) { + rc = true; + } + } + + return rc; +} diff --git a/nntrainer/npu/qnn/PAL/src/linux/DynamicLoading.cpp b/nntrainer/npu/qnn/PAL/src/linux/DynamicLoading.cpp new file mode 100644 index 000000000..4b2f00823 --- /dev/null +++ b/nntrainer/npu/qnn/PAL/src/linux/DynamicLoading.cpp @@ -0,0 +1,90 @@ +//============================================================================== +// +// Copyright (c) 2020-2022 Qualcomm Technologies, Inc. +// All Rights Reserved. +// Confidential and Proprietary - Qualcomm Technologies, Inc. +// +//============================================================================== + +#include +#include +#include + +//#include "Log.h" +#include "PAL/Debug.hpp" +#include "PAL/DynamicLoading.hpp" +const std::vector LIB_PREFIX = { + "/system/lib64/", "/odm/lib64/", "/vendor/lib64/", + "/data/local/tmp/mllm/qnn-lib/", "/system_ext/lib64/"}; +void *pal::dynamicloading::dlOpen(const char *filename, int flags) { + int realFlags = 0; + + if (flags & DL_NOW) { + realFlags |= RTLD_NOW; + } + + if (flags & DL_LOCAL) { + realFlags |= RTLD_LOCAL; + } + + if (flags & DL_GLOBAL) { + realFlags |= RTLD_GLOBAL; + } + + auto res = ::dlopen(filename, realFlags); + if (!res) { + for (auto prefix_ : LIB_PREFIX) { + std::string prefix = prefix_ + filename; + res = ::dlopen(prefix.c_str(), realFlags); + if (res) { + break; + } + // MLLM_LOG_ERROR("{} not found", prefix); + } + } + return res; +} + +void *pal::dynamicloading::dlSym(void *handle, const char *symbol) { + if (handle == DL_DEFAULT) { + handle = RTLD_DEFAULT; + } + + return ::dlsym(handle, symbol); +} + +int pal::dynamicloading::dlAddrToLibName(void *addr, std::string &name) { + // Clean the output buffer + name = std::string(); + + // If the address is empty, return zero as treating failure + if (!addr) { + DEBUG_MSG("Input address is nullptr."); + return 0; + } + + // Dl_info do not maintain the lifetime of its string members, + // it would be maintained by dlopen() and dlclose(), + // so we do not need to release it manually + Dl_info info; + int result = ::dladdr(addr, &info); + + // If dladdr() successes, set name to the library name + if (result) { + name = std::string(info.dli_fname); + } else { + DEBUG_MSG("Input address could not be matched to a shared object."); + } + + return result; +} + +int pal::dynamicloading::dlClose(void *handle) { + if (!handle) { + return 0; + } + + return ::dlclose(handle); +} + +char *pal::dynamicloading::dlError(void) { return ::dlerror(); } diff --git a/nntrainer/npu/qnn/PAL/src/linux/FileOp.cpp b/nntrainer/npu/qnn/PAL/src/linux/FileOp.cpp new file mode 100644 index 000000000..baebafbea --- /dev/null +++ b/nntrainer/npu/qnn/PAL/src/linux/FileOp.cpp @@ -0,0 +1,362 @@ +//============================================================================== +// +// Copyright (c) 2008-2013,2015,2019-2022 Qualcomm Technologies, Inc. +// All Rights Reserved. +// Confidential and Proprietary - Qualcomm Technologies, Inc. +// +//============================================================================== + +#include +#include +#include +#ifndef __QNXNTO__ +#include +#endif +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "PAL/Debug.hpp" +#include "PAL/FileOp.hpp" +#include "PAL/Path.hpp" + +typedef struct stat Stat_t; + +//--------------------------------------------------------------------------- +// pal::FileOp::HasFileExtension +//--------------------------------------------------------------------------- +bool pal::FileOp::checkFileExists(const std::string &fileName) { + Stat_t sb; + + if (stat(fileName.c_str(), &sb) == -1) { + return false; + } else { + return true; + } +} + +//--------------------------------------------------------------------------- +// pal::FileOp::move +//--------------------------------------------------------------------------- +bool pal::FileOp::move(const std::string ¤tName, + const std::string &newName, bool overwrite) { + if (overwrite) { + remove(newName.c_str()); + } + return (rename(currentName.c_str(), newName.c_str()) == 0); +} + +//--------------------------------------------------------------------------- +// pal::FileOp::deleteFile +//--------------------------------------------------------------------------- +bool pal::FileOp::deleteFile(const std::string &fileName) { + return (remove(fileName.c_str()) == 0); +} + +//------------------------------------------------------------------------------ +// pal::FileOp::checkIsDir +//------------------------------------------------------------------------------ +bool pal::FileOp::checkIsDir(const std::string &fileName) { + bool retVal = false; + Stat_t sb; + if (stat(fileName.c_str(), &sb) == 0) { + if (sb.st_mode & S_IFDIR) { + retVal = true; + } + } + return retVal; +} + +//------------------------------------------------------------------------------ +// pal::FileOp::getFileInfo +//------------------------------------------------------------------------------ +bool pal::FileOp::getFileInfo(const std::string &filename, + pal::FileOp::FilenamePartsType_t &filenameParts) { + std::string name; + + // Clear the result + filenameParts.basename.clear(); + filenameParts.extension.clear(); + filenameParts.directory.clear(); + + size_t lastPathSeparator = filename.find_last_of(Path::getSeparator()); + if (lastPathSeparator == std::string::npos) { + // No directory + name = filename; + } else { + // has a directory part + filenameParts.directory = filename.substr(0, lastPathSeparator); + name = filename.substr(lastPathSeparator + 1); + } + + size_t ext = name.find_last_of("."); + if (ext == std::string::npos) { + // no extension + filenameParts.basename = name; + } else { + // has extension + filenameParts.basename = name.substr(0, ext); + filenameParts.extension = name.substr(ext + 1); + } + + return true; +} + +//--------------------------------------------------------------------------- +// pal::FileOp::copyOverFile +//--------------------------------------------------------------------------- +bool pal::FileOp::copyOverFile(const std::string &fromFile, + const std::string &toFile) { + bool rc = false; + int readFd; + int writeFd; + struct stat statBuf; + + // Open the input file. + readFd = ::open(fromFile.c_str(), O_RDONLY); + if (readFd == -1) { + close(readFd); + return false; + } + + // Stat the input file to obtain its size. */ + if (fstat(readFd, &statBuf) != 0) { + close(readFd); + return false; + } + + // Open the output file for writing, with the same permissions as the input + writeFd = + ::open(toFile.c_str(), O_WRONLY | O_CREAT | O_TRUNC, statBuf.st_mode); + if (writeFd == -1) { + close(readFd); + return false; + } + + // Copy the file in a non-kernel specific way */ + char fileBuf[8192]; + ssize_t rBytes, wBytes; + while (true) { + rBytes = read(readFd, fileBuf, sizeof(fileBuf)); + + if (!rBytes) { + rc = true; + break; + } + + if (rBytes < 0) { + rc = false; + break; + } + + wBytes = write(writeFd, fileBuf, (size_t)rBytes); + + if (!wBytes) { + rc = true; + break; + } + + if (wBytes < 0) { + rc = false; + break; + } + } + + /* Close up. */ + close(readFd); + close(writeFd); + return rc; +} + +static bool getFileInfoListRecursiveImpl( + const std::string &path, + pal::FileOp::FilenamePartsListType_t &filenamePartsList, + const bool ignoreDirs, size_t maxDepth) { + struct dirent **namelist = nullptr; + int entryCount = 0; + + // Base case + if (maxDepth == 0) { + return true; + } + +#ifdef __ANDROID__ + // android dirent.h has the wrong signature for alphasort so it had to be + // disabled or fixed + entryCount = scandir(path.c_str(), &namelist, 0, 0); +#else + entryCount = scandir(path.c_str(), &namelist, 0, alphasort); +#endif + if (entryCount < 0) { + return false; + } else { + while (entryCount--) { + const std::string dName(namelist[entryCount]->d_name); + free(namelist[entryCount]); + + // skip current directory, prev directory and empty string + if (dName.empty() || dName == "." || dName == "..") { + continue; + } + + std::string curPath = path; + curPath += pal::Path::getSeparator(); + curPath += dName; + + // recurse if directory but avoid symbolic links to directories + if (pal::FileOp::checkIsDir(curPath)) { + Stat_t sb; + if (lstat(curPath.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) { + if (!getFileInfoListRecursiveImpl(curPath, filenamePartsList, + ignoreDirs, maxDepth - 1)) { + return false; + } + } + + if (ignoreDirs) { + continue; + } + + // Append training / to make this path look like a directory for + // getFileInfo() + if (curPath.back() != pal::Path::getSeparator()) { + curPath += pal::Path::getSeparator(); + } + } + + // add to vector + pal::FileOp::FilenamePartsType_t filenameParts; + if (pal::FileOp::getFileInfo(curPath, filenameParts)) { + filenamePartsList.push_back(filenameParts); + } + } + + free(namelist); + } + + return true; +} + +//--------------------------------------------------------------------------- +// pal::FileOp::getFileInfoList +//--------------------------------------------------------------------------- +bool pal::FileOp::getFileInfoList(const std::string &path, + FilenamePartsListType_t &filenamePartsList) { + return getFileInfoListRecursiveImpl(path, filenamePartsList, false, 1); +} + +//--------------------------------------------------------------------------- +// pal::FileOp::getFileInfoListRecursive +//--------------------------------------------------------------------------- +bool pal::FileOp::getFileInfoListRecursive( + const std::string &path, FilenamePartsListType_t &filenamePartsList, + const bool ignoreDirs) { + return getFileInfoListRecursiveImpl(path, filenamePartsList, ignoreDirs, + std::numeric_limits::max()); +} + +//--------------------------------------------------------------------------- +// pal::FileOp::getAbsolutePath +//--------------------------------------------------------------------------- +std::string pal::FileOp::getAbsolutePath(const std::string &path) { + // NOTE: This implementation is broken currently when a path with + // non-existant components is passed! NEO-19723 was created to address. + char absPath[PATH_MAX + 1] = {0}; + + if (realpath(path.c_str(), absPath) == NULL) { + DEBUG_MSG("GetAbsolute path fail! Error code : %d", errno); + return std::string(); + } + return std::string(absPath); +} + +//--------------------------------------------------------------------------- +// pal::FileOp::setCWD +//--------------------------------------------------------------------------- +bool pal::FileOp::setCurrentWorkingDirectory(const std::string &workingDir) { + return chdir(workingDir.c_str()) == 0; +} + +//--------------------------------------------------------------------------- +// pal::FileOp::getDirectory +//--------------------------------------------------------------------------- +std::string pal::FileOp::getDirectory(const std::string &file) { + std::string rc = file; + size_t offset = file.find_last_of(Path::getSeparator()); + if (offset != std::string::npos) { + rc = file.substr(0, offset); + } + return rc; +} + +//--------------------------------------------------------------------------- +// pal::FileOp::getFileName +//--------------------------------------------------------------------------- +std::string pal::FileOp::getFileName(const std::string &file) { + std::string rc = file; + size_t offset = file.find_last_of(Path::getSeparator()); + if (offset != std::string::npos) { + rc = file.substr(offset + 1); // +1 to skip path separator + } + return rc; +} + +//--------------------------------------------------------------------------- +// pal::FileOp::hasFileExtension +//--------------------------------------------------------------------------- +bool pal::FileOp::hasFileExtension(const std::string &file) { + FilenamePartsType_t parts; + getFileInfo(file, parts); + + return !parts.extension.empty(); +} + +//--------------------------------------------------------------------------- +// pal::FileOp::getCWD +//--------------------------------------------------------------------------- +std::string pal::FileOp::getCurrentWorkingDirectory() { + char buffer[PATH_MAX + 1]; + buffer[0] = '\0'; + + // If there is any failure return empty string. It is technically possible + // to handle paths exceeding PATH_MAX on some flavors of *nix but platforms + // like Android (Bionic) do no provide such capability. For consistency we + // will not handle extra long path names. + if (nullptr == getcwd(buffer, PATH_MAX)) { + return std::string(); + } else { + return std::string(buffer); + } +} + +//--------------------------------------------------------------------------- +// pal::FileOp::partsToString +//--------------------------------------------------------------------------- +std::string +pal::FileOp::partsToString(const FilenamePartsType_t &filenameParts) { + std::string path; + + if (!filenameParts.directory.empty()) { + path += filenameParts.directory; + path += Path::getSeparator(); + } + if (!filenameParts.basename.empty()) { + path += filenameParts.basename; + } + if (!filenameParts.extension.empty()) { + path += "."; + path += filenameParts.extension; + } + return path; +} diff --git a/nntrainer/npu/qnn/PAL/src/linux/Path.cpp b/nntrainer/npu/qnn/PAL/src/linux/Path.cpp new file mode 100644 index 000000000..bc40117d0 --- /dev/null +++ b/nntrainer/npu/qnn/PAL/src/linux/Path.cpp @@ -0,0 +1,48 @@ +//============================================================================== +// +// Copyright (c) 2008-2014, 2015, 2020-2022 Qualcomm Technologies, Inc. +// All Rights Reserved. +// Confidential and Proprietary - Qualcomm Technologies, Inc. +// +//============================================================================== + +#include + +#include +#ifndef PATH_MAX +#include +#endif + +#include "PAL/FileOp.hpp" +#include "PAL/Path.hpp" + +char pal::Path::getSeparator() { return '/'; } + +std::string pal::Path::combine(const std::string &s1, const std::string &s2) { + std::stringstream ss; + ss << s1; + if (s1.size() > 0 && s1[s1.size() - 1] != getSeparator()) { + ss << getSeparator(); + } + ss << s2; + return ss.str(); +} + +std::string pal::Path::getDirectoryName(const std::string &path) { + std::string rc = path; + size_t index = path.find_last_of(pal::Path::getSeparator()); + if (index != std::string::npos) { + rc = path.substr(0, index); + } + return rc; +} + +std::string pal::Path::getAbsolute(const std::string &path) { + // Functionality was duplicated of function in FileOp + // Just call that function directly instead + return pal::FileOp::getAbsolutePath(path); +} + +bool pal::Path::isAbsolute(const std::string &path) { + return path.size() > 0 && path[0] == getSeparator(); +} diff --git a/nntrainer/npu/qnn/QNN.hpp b/nntrainer/npu/qnn/QNN.hpp new file mode 100644 index 000000000..3f61030b1 --- /dev/null +++ b/nntrainer/npu/qnn/QNN.hpp @@ -0,0 +1,37 @@ +//============================================================================== +// +// Copyright (c) 2020-2023 Qualcomm Technologies, Inc. +// All Rights Reserved. +// Confidential and Proprietary - Qualcomm Technologies, Inc. +// +//============================================================================== + +#pragma once + +#include "QnnInterface.h" +#include "System/QnnSystemInterface.h" +#include "WrapperUtils/QnnWrapperUtils.hpp" + +namespace qnn { +namespace tools { +namespace sample_app { + +// Graph Related Function Handle Types +typedef qnn_wrapper_api::ModelError_t (*ComposeGraphsFnHandleType_t)( + Qnn_BackendHandle_t, QNN_INTERFACE_VER_TYPE, Qnn_ContextHandle_t, + const qnn_wrapper_api::GraphConfigInfo_t **, const uint32_t, + qnn_wrapper_api::GraphInfo_t ***, uint32_t *, bool, QnnLog_Callback_t, + QnnLog_Level_t); +typedef qnn_wrapper_api::ModelError_t (*FreeGraphInfoFnHandleType_t)( + qnn_wrapper_api::GraphInfo_t ***, uint32_t); + +typedef struct QnnFunctionPointers { + ComposeGraphsFnHandleType_t composeGraphsFnHandle; + FreeGraphInfoFnHandleType_t freeGraphInfoFnHandle; + QNN_INTERFACE_VER_TYPE qnnInterface; + QNN_SYSTEM_INTERFACE_VER_TYPE qnnSystemInterface; +} QnnFunctionPointers; + +} // namespace sample_app +} // namespace tools +} // namespace qnn diff --git a/nntrainer/npu/qnn/QnnTypeMacros.hpp b/nntrainer/npu/qnn/QnnTypeMacros.hpp new file mode 100644 index 000000000..bcd561ac3 --- /dev/null +++ b/nntrainer/npu/qnn/QnnTypeMacros.hpp @@ -0,0 +1,546 @@ +//============================================================================== +// +// Copyright (c) 2022 Qualcomm Technologies, Inc. +// All Rights Reserved. +// Confidential and Proprietary - Qualcomm Technologies, Inc. +// +//============================================================================== + +#pragma once + +#include +#include +#include + +#include "QnnTypes.h" +#include "WrapperUtils/QnnWrapperUtils.hpp" + +namespace qnn_wrapper_api { + +/** + * @brief Verifies the tensor object passed is of supported Qnn_Tensor_t API + * version + * + * @param[in] tensor Qnn_Tensor_t object to validate + * + * @return Error code + */ +inline ModelError_t validateTensorVersion(Qnn_Tensor_t tensor) { + if (tensor.version != QNN_TENSOR_VERSION_1) { + PRINT_ERROR( + "validateTensorVersion() tensor %s, got unsupported version %d.", + tensor.v1.name, tensor.version); + return MODEL_TENSOR_ERROR; + } + return MODEL_NO_ERROR; +} + +/** + * @brief Verifies the tensor object passed is of supported Qnn_OpConfig_t API + * version + * + * @param[in] tensor Qnn_OpConfig_t object to validate + * + * @return Error code + */ +inline ModelError_t validateOpConfigVersion(Qnn_OpConfig_t opConfig) { + if (opConfig.version != QNN_OPCONFIG_VERSION_1) { + PRINT_ERROR("validateOpConfigVersion() op %s, got unsupported version %d.", + opConfig.v1.name, opConfig.version); + return MODEL_NODES_ERROR; + } + return MODEL_NO_ERROR; +} + +inline const char *getQnnOpConfigName(const Qnn_OpConfig_t &opConfig) { + if (opConfig.version == QNN_OPCONFIG_VERSION_1) { + return opConfig.v1.name; + } + return nullptr; +} + +inline const char *getQnnOpConfigName(const Qnn_OpConfig_t *opConfig) { + return getQnnOpConfigName(*opConfig); +} + +inline const char *getQnnOpConfigPackageName(const Qnn_OpConfig_t &opConfig) { + if (opConfig.version == QNN_OPCONFIG_VERSION_1) { + return opConfig.v1.packageName; + } + return nullptr; +} + +inline const char *getQnnOpConfigPackageName(const Qnn_OpConfig_t *opConfig) { + return getQnnOpConfigPackageName(*opConfig); +} + +inline const char *getQnnOpConfigTypeName(const Qnn_OpConfig_t &opConfig) { + if (opConfig.version == QNN_OPCONFIG_VERSION_1) { + return opConfig.v1.typeName; + } + return nullptr; +} + +inline const char *getQnnOpConfigTypeName(const Qnn_OpConfig_t *opConfig) { + return getQnnOpConfigTypeName(*opConfig); +} + +inline uint32_t getQnnOpConfigNumParams(const Qnn_OpConfig_t &opConfig) { + if (opConfig.version == QNN_OPCONFIG_VERSION_1) { + return opConfig.v1.numOfParams; + } + return 0u; +} + +inline uint32_t getQnnOpConfigNumParams(const Qnn_OpConfig_t *opConfig) { + return getQnnOpConfigNumParams(*opConfig); +} + +inline const Qnn_Param_t *getQnnOpConfigParams(const Qnn_OpConfig_t &opConfig) { + if (opConfig.version == QNN_OPCONFIG_VERSION_1) { + return opConfig.v1.params; + } + return nullptr; +} + +inline const Qnn_Param_t *getQnnOpConfigParams(const Qnn_OpConfig_t *opConfig) { + return getQnnOpConfigParams(*opConfig); +} + +inline uint32_t getQnnOpConfigNumInputs(const Qnn_OpConfig_t &opConfig) { + if (opConfig.version == QNN_OPCONFIG_VERSION_1) { + return opConfig.v1.numOfInputs; + } + return 0u; +} + +inline uint32_t getQnnOpConfigNumInputs(const Qnn_OpConfig_t *opConfig) { + return getQnnOpConfigNumInputs(*opConfig); +} + +inline const Qnn_Tensor_t * +getQnnOpConfigInputs(const Qnn_OpConfig_t &opConfig) { + if (opConfig.version == QNN_OPCONFIG_VERSION_1) { + return opConfig.v1.inputTensors; + } + return nullptr; +} + +inline const Qnn_Tensor_t * +getQnnOpConfigInputs(const Qnn_OpConfig_t *opConfig) { + return getQnnOpConfigInputs(*opConfig); +} + +inline uint32_t getQnnOpConfigNumOutputs(const Qnn_OpConfig_t &opConfig) { + if (opConfig.version == QNN_OPCONFIG_VERSION_1) { + return opConfig.v1.numOfOutputs; + } + return 0u; +} + +inline uint32_t getQnnOpConfigNumOutputs(const Qnn_OpConfig_t *opConfig) { + return getQnnOpConfigNumOutputs(*opConfig); +} + +inline const Qnn_Tensor_t * +getQnnOpConfigOutputs(const Qnn_OpConfig_t &opConfig) { + if (opConfig.version == QNN_OPCONFIG_VERSION_1) { + return opConfig.v1.outputTensors; + } + return nullptr; +} + +inline const Qnn_Tensor_t * +getQnnOpConfigOutputs(const Qnn_OpConfig_t *opConfig) { + return getQnnOpConfigOutputs(*opConfig); +} + +inline void setQnnOpConfigName(Qnn_OpConfig_t &opConfig, const char *name) { + if (opConfig.version == QNN_OPCONFIG_VERSION_1) { + opConfig.v1.name = name; + } +} + +inline void setQnnOpConfigName(Qnn_OpConfig_t *opConfig, const char *name) { + setQnnOpConfigName(*opConfig, name); +} + +inline void setQnnOpConfigPackageName(Qnn_OpConfig_t &opConfig, + const char *packageName) { + if (opConfig.version == QNN_OPCONFIG_VERSION_1) { + opConfig.v1.packageName = packageName; + } +} + +inline void setQnnOpConfigPackageName(Qnn_OpConfig_t *opConfig, + const char *packageName) { + setQnnOpConfigPackageName(*opConfig, packageName); +} + +inline void setQnnOpConfigTypeName(Qnn_OpConfig_t &opConfig, + const char *typeName) { + if (opConfig.version == QNN_OPCONFIG_VERSION_1) { + opConfig.v1.typeName = typeName; + } +} + +inline void setQnnOpConfigTypeName(Qnn_OpConfig_t *opConfig, + const char *typeName) { + setQnnOpConfigTypeName(*opConfig, typeName); +} + +inline void setQnnOpConfigParams(Qnn_OpConfig_t &opConfig, uint32_t numOfParams, + Qnn_Param_t *params) { + if (opConfig.version == QNN_OPCONFIG_VERSION_1) { + opConfig.v1.numOfParams = numOfParams; + opConfig.v1.params = params; + } +} + +inline void setQnnOpConfigParams(Qnn_OpConfig_t *opConfig, uint32_t numOfParams, + Qnn_Param_t *params) { + setQnnOpConfigParams(*opConfig, numOfParams, params); +} + +inline void setQnnOpConfigInputs(Qnn_OpConfig_t &opConfig, uint32_t numOfInputs, + Qnn_Tensor_t *inputTensors) { + if (opConfig.version == QNN_OPCONFIG_VERSION_1) { + opConfig.v1.numOfInputs = numOfInputs; + opConfig.v1.inputTensors = inputTensors; + } +} + +inline void setQnnOpConfigInputs(Qnn_OpConfig_t *opConfig, uint32_t numOfInputs, + Qnn_Tensor_t *inputTensors) { + setQnnOpConfigInputs(*opConfig, numOfInputs, inputTensors); +} + +inline void setQnnOpConfigOutputs(Qnn_OpConfig_t &opConfig, + uint32_t numOfOutputs, + Qnn_Tensor_t *outputTensors) { + if (opConfig.version == QNN_OPCONFIG_VERSION_1) { + opConfig.v1.numOfOutputs = numOfOutputs; + opConfig.v1.outputTensors = outputTensors; + } +} + +inline void setQnnOpConfigOutputs(Qnn_OpConfig_t *opConfig, + uint32_t numOfOutputs, + Qnn_Tensor_t *outputTensors) { + setQnnOpConfigOutputs(*opConfig, numOfOutputs, outputTensors); +} + +// inline Qnn_OpConfig_t + +inline uint32_t getQnnTensorId(const Qnn_Tensor_t &tensor) { + if (tensor.version == QNN_TENSOR_VERSION_1) { + return tensor.v1.id; + } + return 0u; +} + +inline uint32_t getQnnTensorId(const Qnn_Tensor_t *tensor) { + return getQnnTensorId(*tensor); +} + +inline const char *getQnnTensorName(const Qnn_Tensor_t &tensor) { + if (tensor.version == QNN_TENSOR_VERSION_1) { + return tensor.v1.name; + } + return 0u; +} + +inline const char *getQnnTensorName(const Qnn_Tensor_t *tensor) { + return getQnnTensorName(*tensor); +} + +inline Qnn_TensorType_t getQnnTensorType(const Qnn_Tensor_t &tensor) { + if (tensor.version == QNN_TENSOR_VERSION_1) { + return tensor.v1.type; + } + return QNN_TENSOR_TYPE_UNDEFINED; +} + +inline Qnn_TensorType_t getQnnTensorType(const Qnn_Tensor_t *tensor) { + return getQnnTensorType(*tensor); +} + +inline Qnn_TensorDataFormat_t +getQnnTensorDataFormat(const Qnn_Tensor_t &tensor) { + if (tensor.version == QNN_TENSOR_VERSION_1) { + return tensor.v1.dataFormat; + } + return QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER; +} + +inline Qnn_TensorDataFormat_t +getQnnTensorDataFormat(const Qnn_Tensor_t *tensor) { + return getQnnTensorDataFormat(*tensor); +} + +inline Qnn_DataType_t getQnnTensorDataType(const Qnn_Tensor_t &tensor) { + if (tensor.version == QNN_TENSOR_VERSION_1) { + return tensor.v1.dataType; + } + return QNN_DATATYPE_UNDEFINED; +} + +inline Qnn_DataType_t getQnnTensorDataType(const Qnn_Tensor_t *tensor) { + return getQnnTensorDataType(*tensor); +} + +inline Qnn_QuantizeParams_t +getQnnTensorQuantParams(const Qnn_Tensor_t &tensor) { + if (tensor.version == QNN_TENSOR_VERSION_1) { + return tensor.v1.quantizeParams; + } + return QNN_QUANTIZE_PARAMS_INIT; +} + +inline Qnn_QuantizeParams_t +getQnnTensorQuantParams(const Qnn_Tensor_t *tensor) { + return getQnnTensorQuantParams(*tensor); +} + +inline uint32_t getQnnTensorRank(const Qnn_Tensor_t &tensor) { + if (tensor.version == QNN_TENSOR_VERSION_1) { + return tensor.v1.rank; + } + return 0u; +} + +inline uint32_t getQnnTensorRank(const Qnn_Tensor_t *tensor) { + return getQnnTensorRank(*tensor); +} + +inline uint32_t *getQnnTensorDimensions(const Qnn_Tensor_t &tensor) { + if (tensor.version == QNN_TENSOR_VERSION_1) { + return tensor.v1.dimensions; + } + return nullptr; +} + +inline uint32_t *getQnnTensorDimensions(const Qnn_Tensor_t *tensor) { + return getQnnTensorDimensions(*tensor); +} + +inline Qnn_TensorMemType_t getQnnTensorMemType(const Qnn_Tensor_t &tensor) { + if (tensor.version == QNN_TENSOR_VERSION_1) { + return tensor.v1.memType; + } + return QNN_TENSORMEMTYPE_UNDEFINED; +} + +inline Qnn_TensorMemType_t getQnnTensorMemType(const Qnn_Tensor_t *tensor) { + return getQnnTensorMemType(*tensor); +} + +inline Qnn_ClientBuffer_t getQnnTensorClientBuf(const Qnn_Tensor_t &tensor) { + if (tensor.version == QNN_TENSOR_VERSION_1) { + return tensor.v1.clientBuf; + } + return QNN_CLIENT_BUFFER_INIT; +} + +inline Qnn_ClientBuffer_t getQnnTensorClientBuf(const Qnn_Tensor_t *tensor) { + return getQnnTensorClientBuf(*tensor); +} + +inline Qnn_MemHandle_t getQnnTensorMemHandle(const Qnn_Tensor_t &tensor) { + if (tensor.version == QNN_TENSOR_VERSION_1) { + return tensor.v1.memHandle; + } + return nullptr; +} + +inline Qnn_MemHandle_t getQnnTensorMemHandle(const Qnn_Tensor_t *tensor) { + return getQnnTensorMemHandle(*tensor); +} + +inline void setQnnTensorId(Qnn_Tensor_t &tensor, uint32_t id) { + if (tensor.version == QNN_TENSOR_VERSION_1) { + tensor.v1.id = id; + } +} + +inline void setQnnTensorId(Qnn_Tensor_t *tensor, uint32_t id) { + setQnnTensorId(*tensor, id); +} + +inline void setQnnTensorName(Qnn_Tensor_t &tensor, const char *name) { + if (tensor.version == QNN_TENSOR_VERSION_1) { + tensor.v1.name = name; + } +} + +inline void setQnnTensorName(Qnn_Tensor_t *tensor, const char *name) { + setQnnTensorName(*tensor, name); +} + +inline void setQnnTensorType(Qnn_Tensor_t &tensor, Qnn_TensorType_t type) { + if (tensor.version == QNN_TENSOR_VERSION_1) { + tensor.v1.type = type; + } +} + +inline void setQnnTensorType(Qnn_Tensor_t *tensor, Qnn_TensorType_t type) { + setQnnTensorType(*tensor, type); +} + +inline void setQnnTensorDataFormat(Qnn_Tensor_t &tensor, + Qnn_TensorDataFormat_t format) { + if (tensor.version == QNN_TENSOR_VERSION_1) { + tensor.v1.dataFormat = format; + } +} + +inline void setQnnTensorDataFormat(Qnn_Tensor_t *tensor, + Qnn_TensorDataFormat_t format) { + setQnnTensorDataFormat(*tensor, format); +} + +inline void setQnnTensorDataType(Qnn_Tensor_t &tensor, + Qnn_DataType_t dataType) { + if (tensor.version == QNN_TENSOR_VERSION_1) { + tensor.v1.dataType = dataType; + } +} + +inline void setQnnTensorDataType(Qnn_Tensor_t *tensor, + Qnn_DataType_t dataType) { + setQnnTensorDataType(*tensor, dataType); +} + +inline void setQnnTensorQuantParams(Qnn_Tensor_t &tensor, + Qnn_QuantizeParams_t params) { + if (tensor.version == QNN_TENSOR_VERSION_1) { + tensor.v1.quantizeParams = params; + } +} + +inline void setQnnTensorQuantParams(Qnn_Tensor_t *tensor, + Qnn_QuantizeParams_t params) { + setQnnTensorQuantParams(*tensor, params); +} + +inline void setQnnTensorRank(Qnn_Tensor_t &tensor, uint32_t rank) { + if (tensor.version == QNN_TENSOR_VERSION_1) { + tensor.v1.rank = rank; + } +} + +inline void setQnnTensorRank(Qnn_Tensor_t *tensor, uint32_t rank) { + setQnnTensorRank(*tensor, rank); +} + +inline void setQnnTensorDimensions(Qnn_Tensor_t &tensor, uint32_t *dims) { + if (tensor.version == QNN_TENSOR_VERSION_1) { + tensor.v1.dimensions = dims; + } +} + +inline void setQnnTensorDimensions(Qnn_Tensor_t *tensor, uint32_t *dims) { + setQnnTensorDimensions(*tensor, dims); +} + +inline void setQnnTensorMemType(Qnn_Tensor_t &tensor, + Qnn_TensorMemType_t memType) { + if (tensor.version == QNN_TENSOR_VERSION_1) { + tensor.v1.memType = memType; + } +} + +inline void setQnnTensorMemType(Qnn_Tensor_t *tensor, + Qnn_TensorMemType_t memType) { + setQnnTensorMemType(*tensor, memType); +} + +inline void setQnnTensorClientBuf(Qnn_Tensor_t &tensor, + Qnn_ClientBuffer_t clientBuf) { + if (tensor.version == QNN_TENSOR_VERSION_1) { + tensor.v1.clientBuf = clientBuf; + } +} + +inline void setQnnTensorClientBuf(Qnn_Tensor_t *tensor, + Qnn_ClientBuffer_t clientBuf) { + setQnnTensorClientBuf(*tensor, clientBuf); +} + +inline void setQnnTensorMemHandle(Qnn_Tensor_t &tensor, + Qnn_MemHandle_t handle) { + if (tensor.version == QNN_TENSOR_VERSION_1) { + tensor.v1.memHandle = handle; + } +} + +inline void setQnnTensorMemHandle(Qnn_Tensor_t *tensor, + Qnn_MemHandle_t handle) { + setQnnTensorMemHandle(*tensor, handle); +} + +// Validation +#define VALIDATE_TENSOR_VERSION(tensor, err) \ + VALIDATE(validateTensorVersion(tensor), err) +#define VALIDATE_OP_CONFIG_VERSION(op, err) \ + VALIDATE(validateOpConfigVersion(op), err) + +// Accessors for QNN Op Config +#define QNN_OP_CFG_GET_NAME(opConfig) getQnnOpConfigName(opConfig) +#define QNN_OP_CFG_GET_PACKAGE_NAME(opConfig) \ + getQnnOpConfigPackageName(opConfig) +#define QNN_OP_CFG_GET_TYPE_NAME(opConfig) getQnnOpConfigTypeName(opConfig) +#define QNN_OP_CFG_GET_NUM_PARAMS(opConfig) getQnnOpConfigNumParams(opConfig) +#define QNN_OP_CFG_GET_PARAMS(opConfig) getQnnOpConfigParams(opConfig) +#define QNN_OP_CFG_GET_NUM_INPUTS(opConfig) getQnnOpConfigNumInputs(opConfig) +#define QNN_OP_CFG_GET_INPUTS(opConfig) getQnnOpConfigInputs(opConfig) +#define QNN_OP_CFG_GET_NUM_OUTPUTS(opConfig) getQnnOpConfigNumOutputs(opConfig) +#define QNN_OP_CFG_GET_OUTPUTS(opConfig) getQnnOpConfigOutputs(opConfig) + +// Modifiers for QNN Op Config +#define QNN_OP_CFG_SET_NAME(opConfig, value) setQnnOpConfigName(opConfig, value) +#define QNN_OP_CFG_SET_PACKAGE_NAME(opConfig, value) \ + setQnnOpConfigPackageName(opConfig, value) +#define QNN_OP_CFG_SET_TYPE_NAME(opConfig, value) \ + setQnnOpConfigTypeName(opConfig, value) +#define QNN_OP_CFG_SET_PARAMS(opConfig, numOfParams, params) \ + setQnnOpConfigParams(opConfig, numOfParams, params) +#define QNN_OP_CFG_SET_INPUTS(opConfig, numOfInputs, inputTensors) \ + setQnnOpConfigInputs(opConfig, numOfInputs, inputTensors) +#define QNN_OP_CFG_SET_OUTPUTS(opConfig, numOfOutputs, outputTensors) \ + setQnnOpConfigOutputs(opConfig, numOfOutputs, outputTensors) + +// Accessors for QNN Tensor +#define QNN_TENSOR_GET_ID(tensor) getQnnTensorId(tensor) +#define QNN_TENSOR_GET_NAME(tensor) getQnnTensorName(tensor) +#define QNN_TENSOR_GET_TYPE(tensor) getQnnTensorType(tensor) +#define QNN_TENSOR_GET_DATA_FORMAT(tensor) getQnnTensorDataFormat(tensor) +#define QNN_TENSOR_GET_DATA_TYPE(tensor) getQnnTensorDataType(tensor) +#define QNN_TENSOR_GET_QUANT_PARAMS(tensor) getQnnTensorQuantParams(tensor) +#define QNN_TENSOR_GET_RANK(tensor) getQnnTensorRank(tensor) +#define QNN_TENSOR_GET_DIMENSIONS(tensor) getQnnTensorDimensions(tensor) +#define QNN_TENSOR_GET_MEM_TYPE(tensor) getQnnTensorMemType(tensor) +#define QNN_TENSOR_GET_CLIENT_BUF(tensor) getQnnTensorClientBuf(tensor) +#define QNN_TENSOR_GET_MEM_HANDLE(tensor) getQnnTensorMemHandle(tensor) + +// Modifiers for QNN Tensor +#define QNN_TENSOR_SET_ID(tensor, value) setQnnTensorId(tensor, value) +#define QNN_TENSOR_SET_NAME(tensor, value) setQnnTensorName(tensor, value) +#define QNN_TENSOR_SET_TYPE(tensor, value) setQnnTensorType(tensor, value) +#define QNN_TENSOR_SET_DATA_FORMAT(tensor, value) \ + setQnnTensorDataFormat(tensor, value) +#define QNN_TENSOR_SET_DATA_TYPE(tensor, value) \ + setQnnTensorDataType(tensor, value) +#define QNN_TENSOR_SET_QUANT_PARAMS(tensor, value) \ + setQnnTensorQuantParams(tensor, value) +#define QNN_TENSOR_SET_RANK(tensor, value) setQnnTensorRank(tensor, value) +#define QNN_TENSOR_SET_DIMENSIONS(tensor, value) \ + setQnnTensorDimensions(tensor, value) +#define QNN_TENSOR_SET_MEM_TYPE(tensor, value) \ + setQnnTensorMemType(tensor, value) +#define QNN_TENSOR_SET_CLIENT_BUF(tensor, value) \ + setQnnTensorClientBuf(tensor, value) +#define QNN_TENSOR_SET_MEM_HANDLE(tensor, value) \ + setQnnTensorMemHandle(tensor, value) + +} // namespace qnn_wrapper_api diff --git a/nntrainer/npu/qnn/Utils/BuildId.hpp b/nntrainer/npu/qnn/Utils/BuildId.hpp new file mode 100644 index 000000000..9e6e29152 --- /dev/null +++ b/nntrainer/npu/qnn/Utils/BuildId.hpp @@ -0,0 +1,19 @@ +//============================================================================== +// +// Copyright (c) 2020 Qualcomm Technologies, Inc. +// All Rights Reserved. +// Confidential and Proprietary - Qualcomm Technologies, Inc. +// +//============================================================================== + +#pragma once + +namespace qnn { +namespace tools { + +inline std::string getBuildId() { + return std::string("v2.16.4.231110151339_60331"); +} + +} // namespace tools +} // namespace qnn diff --git a/nntrainer/npu/qnn/Utils/DataUtil.cpp b/nntrainer/npu/qnn/Utils/DataUtil.cpp new file mode 100644 index 000000000..885520dc6 --- /dev/null +++ b/nntrainer/npu/qnn/Utils/DataUtil.cpp @@ -0,0 +1,417 @@ +//============================================================================== +// +// Copyright (c) 2019-2022 Qualcomm Technologies, Inc. +// All Rights Reserved. +// Confidential and Proprietary - Qualcomm Technologies, Inc. +// +//============================================================================== +#include +#include +#include +#include +#include + +#include "DataUtil.hpp" +//#include "Log.h" +#include "Logger.hpp" +#include "PAL/Directory.hpp" +#include "PAL/FileOp.hpp" +#include "PAL/Path.hpp" + +#define __fp16 _Float16 + +using namespace qnn; +using namespace qnn::tools; + +std::tuple +datautil::getDataTypeSizeInBytes(Qnn_DataType_t dataType) { + if (g_dataTypeToSize.find(dataType) == g_dataTypeToSize.end()) { + // MLLM_LOG_ERROR_LEGACY("Invalid qnn data type provided"); + return std::make_tuple(StatusCode::INVALID_DATA_TYPE, 0); + } + return std::make_tuple(StatusCode::SUCCESS, + g_dataTypeToSize.find(dataType)->second); +} + +size_t datautil::calculateElementCount(std::vector dims) { + if (dims.size() == 0) { + return 0; + } + return std::accumulate(dims.begin(), dims.end(), 1, + std::multiplies()); +} + +std::tuple +datautil::calculateLength(std::vector dims, Qnn_DataType_t dataType) { + if (dims.size() == 0) { + // MLLM_LOG_ERROR_LEGACY("dims.size() is zero"); + return std::make_tuple(StatusCode::INVALID_DIMENSIONS, 0); + } + StatusCode returnStatus{StatusCode::SUCCESS}; + size_t length{0}; + std::tie(returnStatus, length) = getDataTypeSizeInBytes(dataType); + if (StatusCode::SUCCESS != returnStatus) { + return std::make_tuple(returnStatus, 0); + } + length *= calculateElementCount(dims); + return std::make_tuple(StatusCode::SUCCESS, length); +} + +datautil::StatusCode datautil::readDataFromFile(std::string filePath, + std::vector dims, + Qnn_DataType_t dataType, + uint8_t *buffer) { + if (nullptr == buffer) { + // MLLM_LOG_ERROR_LEGACY("buffer is nullptr"); + return StatusCode::INVALID_BUFFER; + } + std::ifstream in(filePath, std::ifstream::binary); + if (!in) { + // MLLM_LOG_ERROR_LEGACY("Failed to open input file: %s", filePath.c_str()); + return StatusCode::FILE_OPEN_FAIL; + } + in.seekg(0, in.end); + const size_t length = in.tellg(); + in.seekg(0, in.beg); + StatusCode err{StatusCode::SUCCESS}; + size_t l{0}; + std::tie(err, l) = datautil::calculateLength(dims, dataType); + if (StatusCode::SUCCESS != err) { + return err; + } + if (length != l) { + // MLLM_LOG_ERROR_LEGACY("Input file %s: file size in bytes (%d), should be + // equal to: %d", filePath.c_str(), length, l); + return StatusCode::DATA_SIZE_MISMATCH; + } + + if (!in.read(reinterpret_cast(buffer), length)) { + // MLLM_LOG_ERROR_LEGACY("Failed to read the contents of: %s", + // filePath.c_str()); + return StatusCode::DATA_READ_FAIL; + } + return StatusCode::SUCCESS; +} + +datautil::ReadBatchDataRetType_t datautil::readBatchDataAndUpdateQueue( + std::queue &filePaths, std::vector dims, + Qnn_DataType_t dataType, uint8_t *buffer) { + if (nullptr == buffer) { + // MLLM_LOG_ERROR_LEGACY("buffer is nullptr"); + return std::make_tuple(StatusCode::INVALID_BUFFER, 0, 0); + } + StatusCode err{StatusCode::SUCCESS}; + size_t l{0}; + std::tie(err, l) = datautil::calculateLength(dims, dataType); + if (StatusCode::SUCCESS != err) { + return std::make_tuple(err, 0, 0); + } + size_t numInputsCopied = 0; + size_t numBatchSize = 0; + size_t totalLength = 0; + do { + if (filePaths.empty()) { + numBatchSize += (l - totalLength) / (totalLength / numBatchSize); + // pad the vector with zeros + memset(buffer + totalLength, 0, (l - totalLength) * sizeof(char)); + totalLength = l; + } else { + std::ifstream in(filePaths.front(), std::ifstream::binary); + if (!in) { + // MLLM_LOG_ERROR_LEGACY("Failed to open input file: %s", + // filePaths.front().c_str()); + return std::make_tuple(StatusCode::FILE_OPEN_FAIL, numInputsCopied, + numBatchSize); + } + in.seekg(0, in.end); + const size_t length = in.tellg(); + in.seekg(0, in.beg); + if ((l % length) != 0 || length > l || length == 0) { + // MLLM_LOG_ERROR_LEGACY("Input file %s: file size in bytes (%d), should + // be multiples of: %d", + // filePaths.front().c_str(), + // length, + // l); + return std::make_tuple(StatusCode::DATA_SIZE_MISMATCH, numInputsCopied, + numBatchSize); + } + if (!in.read( + reinterpret_cast(buffer + (numInputsCopied * length)), + length)) { + // MLLM_LOG_ERROR_LEGACY("Failed to read the contents of: %s", + // filePaths.front().c_str()); + return std::make_tuple(StatusCode::DATA_READ_FAIL, numInputsCopied, + numBatchSize); + } + QNN_VERBOSE("Return from readDataFromFile()"); + totalLength += length; + numInputsCopied += 1; + numBatchSize += 1; + filePaths.pop(); + } + } while (totalLength < l); + return std::make_tuple(StatusCode::SUCCESS, numInputsCopied, numBatchSize); +} + +std::tuple +datautil::getFileSize(std::string filePath) { + std::ifstream in(filePath, std::ifstream::binary); + if (!in) { + // MLLM_LOG_ERROR_LEGACY("Failed to open input file: %s", filePath.c_str()); + return std::make_tuple(StatusCode::FILE_OPEN_FAIL, 0); + } + in.seekg(0, in.end); + const size_t length = in.tellg(); + in.seekg(0, in.beg); + return std::make_tuple(StatusCode::SUCCESS, length); +} + +datautil::StatusCode datautil::readBinaryFromFile(std::string filePath, + uint8_t *buffer, + size_t bufferSize) { + if (nullptr == buffer) { + // MLLM_LOG_ERROR_LEGACY("buffer is nullptr"); + return StatusCode::INVALID_BUFFER; + } + std::ifstream in(filePath, std::ifstream::binary); + if (!in) { + // MLLM_LOG_ERROR_LEGACY("Failed to open input file: %s", filePath.c_str()); + return StatusCode::FILE_OPEN_FAIL; + } + if (!in.read(reinterpret_cast(buffer), bufferSize)) { + // MLLM_LOG_ERROR_LEGACY("Failed to read the contents of: %s", + // filePath.c_str()); + return StatusCode::DATA_READ_FAIL; + } + return StatusCode::SUCCESS; +} + +datautil::StatusCode datautil::writeDataToFile(std::string fileDir, + std::string fileName, + std::vector dims, + Qnn_DataType_t dataType, + uint8_t *buffer) { + if (nullptr == buffer) { + // MLLM_LOG_ERROR_LEGACY("buffer is nullptr"); + return StatusCode::INVALID_BUFFER; + } + if (!pal::Directory::makePath(fileDir)) { + // MLLM_LOG_ERROR_LEGACY("Failed to create output directory: %s", + // fileDir.c_str()); + return StatusCode::DIRECTORY_CREATE_FAIL; + } + const std::string outputPath(fileDir + pal::Path::getSeparator() + fileName); + std::ofstream os(outputPath, std::ofstream::binary); + if (!os) { + // MLLM_LOG_ERROR_LEGACY("Failed to open output file for writing: %s", + // outputPath.c_str()); + return StatusCode::FILE_OPEN_FAIL; + } + StatusCode err{StatusCode::SUCCESS}; + size_t length{0}; + std::tie(err, length) = datautil::calculateLength(dims, dataType); + if (StatusCode::SUCCESS != err) { + return err; + } + for (size_t l = 0; l < length; l++) { + os.write(reinterpret_cast(&(*(buffer + l))), 1); + } + return StatusCode::SUCCESS; +} + +datautil::StatusCode +datautil::writeBatchDataToFile(std::vector fileDirs, + std::string fileName, std::vector dims, + Qnn_DataType_t dataType, uint8_t *buffer, + const size_t batchSize) { + if (nullptr == buffer) { + // MLLM_LOG_ERROR_LEGACY("buffer is nullptr"); + return StatusCode::INVALID_BUFFER; + } + StatusCode err{StatusCode::SUCCESS}; + size_t length{0}; + std::tie(err, length) = datautil::calculateLength(dims, dataType); + if (StatusCode::SUCCESS != err) { + return err; + } + auto outputSize = (length / batchSize); + for (size_t batchIndex = 0; batchIndex < fileDirs.size(); batchIndex++) { + std::string fileDir = fileDirs[batchIndex]; + if (!pal::Directory::makePath(fileDir)) { + // MLLM_LOG_ERROR_LEGACY("Failed to create output directory: %s", + // fileDir.c_str()); + return StatusCode::DIRECTORY_CREATE_FAIL; + } + const std::string outputPath(fileDir + pal::Path::getSeparator() + + fileName); + std::ofstream os(outputPath, std::ofstream::binary); + if (!os) { + // MLLM_LOG_ERROR_LEGACY("Failed to open output file for writing: %s", + // outputPath.c_str()); + return StatusCode::FILE_OPEN_FAIL; + } + for (size_t l = 0; l < outputSize; l++) { + size_t bufferIndex = l + (batchIndex * outputSize); + os.write(reinterpret_cast(&(*(buffer + bufferIndex))), 1); + } + } + return StatusCode::SUCCESS; +} + +datautil::StatusCode datautil::writeBinaryToFile(std::string fileDir, + std::string fileName, + uint8_t *buffer, + size_t bufferSize) { + if (nullptr == buffer) { + // MLLM_LOG_ERROR_LEGACY("buffer is nullptr"); + return StatusCode::INVALID_BUFFER; + } + if (!pal::Directory::makePath(fileDir)) { + // MLLM_LOG_ERROR_LEGACY("Failed to create output directory: %s", + // fileDir.c_str()); + return StatusCode::DIRECTORY_CREATE_FAIL; + } + const std::string outputPath(fileDir + pal::Path::getSeparator() + fileName); + std::ofstream os(outputPath, std::ofstream::binary); + if (!os) { + // MLLM_LOG_ERROR_LEGACY("Failed to open output file for writing: %s", + // outputPath.c_str()); + return StatusCode::FILE_OPEN_FAIL; + } + os.write(reinterpret_cast(buffer), bufferSize); + return StatusCode::SUCCESS; +} + +template +datautil::StatusCode datautil::floatToTfN(T_QuantType *out, float *in, + int32_t offset, float scale, + size_t numElements) { + static_assert(std::is_unsigned::value, + "floatToTfN supports unsigned only!"); + + if (nullptr == out || nullptr == in) { + // MLLM_LOG_ERROR_LEGACY("Received a nullptr"); + return StatusCode::INVALID_BUFFER; + } + + size_t dataTypeSizeInBytes = sizeof(T_QuantType); + size_t bitWidth = dataTypeSizeInBytes * g_bitsPerByte; + double trueBitWidthMax = pow(2, bitWidth) - 1; + double encodingMin = offset * scale; + double encodingMax = (trueBitWidthMax + offset) * scale; + double encodingRange = encodingMax - encodingMin; + + for (size_t i = 0; i < numElements; ++i) { + int quantizedValue = + round(trueBitWidthMax * (in[i] - encodingMin) / encodingRange); + if (quantizedValue < 0) + quantizedValue = 0; + else if (quantizedValue > (int)trueBitWidthMax) + quantizedValue = (int)trueBitWidthMax; + out[i] = static_cast(quantizedValue); + } + return StatusCode::SUCCESS; +} + +template datautil::StatusCode +datautil::floatToTfN(uint8_t *out, float *in, int32_t offset, + float scale, size_t numElements); + +template datautil::StatusCode +datautil::floatToTfN(uint16_t *out, float *in, int32_t offset, + float scale, size_t numElements); + +template +datautil::StatusCode datautil::tfNToFloat(float *out, T_QuantType *in, + int32_t offset, float scale, + size_t numElements) { + static_assert(std::is_unsigned::value, + "tfNToFloat supports unsigned only!"); + + if (nullptr == out || nullptr == in) { + // MLLM_LOG_ERROR_LEGACY("Received a nullptr"); + return StatusCode::INVALID_BUFFER; + } + for (size_t i = 0; i < numElements; i++) { + double quantizedValue = static_cast(in[i]); + double offsetDouble = static_cast(offset); + out[i] = static_cast((quantizedValue + offsetDouble) * scale); + } + return StatusCode::SUCCESS; +} + +template datautil::StatusCode +datautil::tfNToFloat(float *out, uint8_t *in, int32_t offset, + float scale, size_t numElements); + +template datautil::StatusCode +datautil::tfNToFloat(float *out, uint16_t *in, int32_t offset, + float scale, size_t numElements); + +template +datautil::StatusCode datautil::castToFloat(float *out, T_QuantType *in, + size_t numElements) { + if (nullptr == out || nullptr == in) { + // MLLM_LOG_ERROR_LEGACY("Received a nullptr"); + return StatusCode::INVALID_BUFFER; + } + for (size_t i = 0; i < numElements; i++) { + out[i] = static_cast(in[i]); + } + return StatusCode::SUCCESS; +} + +template datautil::StatusCode +datautil::castToFloat(float *out, uint8_t *in, size_t numElements); + +template datautil::StatusCode +datautil::castToFloat(float *out, uint16_t *in, size_t numElements); + +template datautil::StatusCode +datautil::castToFloat(float *out, uint32_t *in, size_t numElements); + +template datautil::StatusCode +datautil::castToFloat(float *out, int8_t *in, size_t numElements); + +template datautil::StatusCode +datautil::castToFloat(float *out, int16_t *in, size_t numElements); + +template datautil::StatusCode +datautil::castToFloat(float *out, int32_t *in, size_t numElements); + +template datautil::StatusCode +datautil::castToFloat<__fp16>(float *out, __fp16 *in, size_t numElements); + +template +datautil::StatusCode datautil::castFromFloat(T_QuantType *out, float *in, + size_t numElements) { + if (nullptr == out || nullptr == in) { + // MLLM_LOG_ERROR_LEGACY("Received a nullptr"); + return StatusCode::INVALID_BUFFER; + } + for (size_t i = 0; i < numElements; i++) { + out[i] = static_cast(in[i]); + } + return StatusCode::SUCCESS; +} + +template datautil::StatusCode +datautil::castFromFloat(uint8_t *out, float *in, size_t numElements); + +template datautil::StatusCode +datautil::castFromFloat(uint16_t *out, float *in, size_t numElements); + +template datautil::StatusCode +datautil::castFromFloat(uint32_t *out, float *in, size_t numElements); + +template datautil::StatusCode +datautil::castFromFloat(int8_t *out, float *in, size_t numElements); + +template datautil::StatusCode +datautil::castFromFloat(int16_t *out, float *in, size_t numElements); + +template datautil::StatusCode +datautil::castFromFloat(int32_t *out, float *in, size_t numElements); + +template datautil::StatusCode +datautil::castFromFloat<__fp16>(__fp16 *out, float *in, size_t numElements); diff --git a/nntrainer/npu/qnn/Utils/DataUtil.hpp b/nntrainer/npu/qnn/Utils/DataUtil.hpp new file mode 100644 index 000000000..31ed12064 --- /dev/null +++ b/nntrainer/npu/qnn/Utils/DataUtil.hpp @@ -0,0 +1,108 @@ +//============================================================================== +// +// Copyright (c) 2019-2023 Qualcomm Technologies, Inc. +// All Rights Reserved. +// Confidential and Proprietary - Qualcomm Technologies, Inc. +// +//============================================================================== +#pragma once + +#include +#include +#include + +#include "QnnTypes.h" + +namespace qnn { +namespace tools { +namespace datautil { +enum class StatusCode { + SUCCESS, + DATA_READ_FAIL, + DATA_WRITE_FAIL, + FILE_OPEN_FAIL, + DIRECTORY_CREATE_FAIL, + INVALID_DIMENSIONS, + INVALID_DATA_TYPE, + DATA_SIZE_MISMATCH, + INVALID_BUFFER, +}; + +const size_t g_bitsPerByte = 8; + +using ReadBatchDataRetType_t = std::tuple; + +std::tuple getDataTypeSizeInBytes(Qnn_DataType_t dataType); + +std::tuple calculateLength(std::vector dims, + Qnn_DataType_t dataType); + +size_t calculateElementCount(std::vector dims); + +std::tuple getFileSize(std::string filePath); + +StatusCode readDataFromFile(std::string filePath, std::vector dims, + Qnn_DataType_t dataType, uint8_t *buffer); + +/* + * Read data in batches from Queue and try to matches the model input's + * batches. If the queue is empty while matching the batch size of model, + * pad the remaining buffer with zeros + * @param filePathsQueue image paths queue + * @param dims model input dimensions + * @param dataType to create input buffer from file + * @param buffer to fill the input image data + * + * @return ReadBatchDataRetType_t returns numFilesCopied and batchSize along + * with status + */ +ReadBatchDataRetType_t +readBatchDataAndUpdateQueue(std::queue &filePaths, + std::vector dims, Qnn_DataType_t dataType, + uint8_t *buffer); + +StatusCode readBinaryFromFile(std::string filePath, uint8_t *buffer, + size_t bufferSize); + +StatusCode writeDataToFile(std::string fileDir, std::string fileName, + std::vector dims, Qnn_DataType_t dataType, + uint8_t *buffer); + +StatusCode writeBatchDataToFile(std::vector fileDirs, + std::string fileName, std::vector dims, + Qnn_DataType_t dataType, uint8_t *buffer, + const size_t batchSize); + +StatusCode writeBinaryToFile(std::string fileDir, std::string fileName, + uint8_t *buffer, size_t bufferSize); + +template +datautil::StatusCode floatToTfN(T_QuantType *out, float *in, int32_t offset, + float scale, size_t numElements); + +template +datautil::StatusCode tfNToFloat(float *out, T_QuantType *in, int32_t offset, + float scale, size_t numElements); + +template +datautil::StatusCode castToFloat(float *out, T_QuantType *in, + size_t numElements); + +template +datautil::StatusCode castFromFloat(T_QuantType *out, float *in, + size_t numElements); + +const std::map g_dataTypeToSize = { + {QNN_DATATYPE_INT_8, 1}, {QNN_DATATYPE_INT_16, 2}, + {QNN_DATATYPE_INT_32, 4}, {QNN_DATATYPE_INT_64, 8}, + {QNN_DATATYPE_UINT_8, 1}, {QNN_DATATYPE_UINT_16, 2}, + {QNN_DATATYPE_UINT_32, 4}, {QNN_DATATYPE_UINT_64, 8}, + {QNN_DATATYPE_FLOAT_16, 2}, {QNN_DATATYPE_FLOAT_32, 4}, + {QNN_DATATYPE_FLOAT_64, 8}, {QNN_DATATYPE_SFIXED_POINT_8, 1}, + {QNN_DATATYPE_SFIXED_POINT_16, 2}, {QNN_DATATYPE_SFIXED_POINT_32, 4}, + {QNN_DATATYPE_UFIXED_POINT_8, 1}, {QNN_DATATYPE_UFIXED_POINT_16, 2}, + {QNN_DATATYPE_UFIXED_POINT_32, 4}, {QNN_DATATYPE_BOOL_8, 1}, +}; +} // namespace datautil +} // namespace tools +} // namespace qnn diff --git a/nntrainer/npu/qnn/Utils/DynamicLoadUtil.cpp b/nntrainer/npu/qnn/Utils/DynamicLoadUtil.cpp new file mode 100644 index 000000000..8dbab14c9 --- /dev/null +++ b/nntrainer/npu/qnn/Utils/DynamicLoadUtil.cpp @@ -0,0 +1,192 @@ +//============================================================================== +// +// Copyright (c) 2019-2022 Qualcomm Technologies, Inc. +// All Rights Reserved. +// Confidential and Proprietary - Qualcomm Technologies, Inc. +// +//============================================================================== + +#include + +#include "DynamicLoadUtil.hpp" +//#include "Log.h" +#include "Logger.hpp" +#include "PAL/DynamicLoading.hpp" + +using namespace qnn; +using namespace qnn::tools; + +typedef Qnn_ErrorHandle_t (*QnnInterfaceGetProvidersFn_t)( + const QnnInterface_t ***providerList, uint32_t *numProviders); + +typedef Qnn_ErrorHandle_t (*QnnSystemInterfaceGetProvidersFn_t)( + const QnnSystemInterface_t ***providerList, uint32_t *numProviders); + +template +static inline T resolveSymbol(void *libHandle, const char *sym) { + T ptr = (T)pal::dynamicloading::dlSym(libHandle, sym); + if (ptr == nullptr) { + // MLLM_LOG_ERROR_LEGACY("Unable to access symbol [%s]. + // pal::dynamicloading::dlError(): %s", + // sym, + // pal::dynamicloading::dlError()); + } + return ptr; +} + +dynamicloadutil::StatusCode dynamicloadutil::getQnnFunctionPointers( + std::string backendPath, std::string modelPath, + sample_app::QnnFunctionPointers *qnnFunctionPointers, void **backendHandleRtn, + bool loadModelLib, void **modelHandleRtn) { + void *libBackendHandle = pal::dynamicloading::dlOpen( + backendPath.c_str(), + pal::dynamicloading::DL_NOW | pal::dynamicloading::DL_GLOBAL); + if (nullptr == libBackendHandle) { + // MLLM_LOG_ERROR_LEGACY("Unable to load backend. + // pal::dynamicloading::dlError(): %s", + // pal::dynamicloading::dlError()); + return StatusCode::FAIL_LOAD_BACKEND; + } + if (nullptr != backendHandleRtn) { + *backendHandleRtn = libBackendHandle; + } + // Get QNN Interface + QnnInterfaceGetProvidersFn_t getInterfaceProviders{nullptr}; + getInterfaceProviders = resolveSymbol( + libBackendHandle, "QnnInterface_getProviders"); + if (nullptr == getInterfaceProviders) { + return StatusCode::FAIL_SYM_FUNCTION; + } + QnnInterface_t **interfaceProviders{nullptr}; + uint32_t numProviders{0}; + if (QNN_SUCCESS != + getInterfaceProviders((const QnnInterface_t ***)&interfaceProviders, + &numProviders)) { + // MLLM_LOG_ERROR_LEGACY("Failed to get interface providers."); + return StatusCode::FAIL_GET_INTERFACE_PROVIDERS; + } + if (nullptr == interfaceProviders) { + // MLLM_LOG_ERROR_LEGACY("Failed to get interface providers: null interface + // providers received."); + return StatusCode::FAIL_GET_INTERFACE_PROVIDERS; + } + if (0 == numProviders) { + // MLLM_LOG_ERROR_LEGACY("Failed to get interface providers: 0 interface + // providers."); + return StatusCode::FAIL_GET_INTERFACE_PROVIDERS; + } + bool foundValidInterface{false}; + for (size_t pIdx = 0; pIdx < numProviders; pIdx++) { + if (QNN_API_VERSION_MAJOR == + interfaceProviders[pIdx]->apiVersion.coreApiVersion.major && + QNN_API_VERSION_MINOR <= + interfaceProviders[pIdx]->apiVersion.coreApiVersion.minor) { + foundValidInterface = true; + qnnFunctionPointers->qnnInterface = + interfaceProviders[pIdx]->QNN_INTERFACE_VER_NAME; + break; + } + } + if (!foundValidInterface) { + // MLLM_LOG_ERROR_LEGACY("Unable to find a valid interface."); + libBackendHandle = nullptr; + return StatusCode::FAIL_GET_INTERFACE_PROVIDERS; + } + + if (true == loadModelLib) { + QNN_INFO("Loading model shared library ([model].so)"); + void *libModelHandle = pal::dynamicloading::dlOpen( + modelPath.c_str(), + pal::dynamicloading::DL_NOW | pal::dynamicloading::DL_LOCAL); + if (nullptr == libModelHandle) { + // MLLM_LOG_ERROR_LEGACY("Unable to load model. + // pal::dynamicloading::dlError(): %s", pal::dynamicloading::dlError()); + return StatusCode::FAIL_LOAD_MODEL; + } + if (nullptr != modelHandleRtn) { + *modelHandleRtn = libModelHandle; + } + + std::string modelPrepareFunc = "QnnModel_composeGraphs"; + qnnFunctionPointers->composeGraphsFnHandle = + resolveSymbol( + libModelHandle, modelPrepareFunc.c_str()); + if (nullptr == qnnFunctionPointers->composeGraphsFnHandle) { + return StatusCode::FAIL_SYM_FUNCTION; + } + + std::string modelFreeFunc = "QnnModel_freeGraphsInfo"; + qnnFunctionPointers->freeGraphInfoFnHandle = + resolveSymbol( + libModelHandle, modelFreeFunc.c_str()); + if (nullptr == qnnFunctionPointers->freeGraphInfoFnHandle) { + return StatusCode::FAIL_SYM_FUNCTION; + } + } else { + QNN_INFO("Model wasn't loaded from a shared library."); + } + return StatusCode::SUCCESS; +} + +dynamicloadutil::StatusCode dynamicloadutil::getQnnSystemFunctionPointers( + std::string systemLibraryPath, + sample_app::QnnFunctionPointers *qnnFunctionPointers) { + QNN_FUNCTION_ENTRY_LOG; + if (!qnnFunctionPointers) { + // MLLM_LOG_ERROR_LEGACY("nullptr provided for qnnFunctionPointers"); + return StatusCode::FAILURE; + } + void *systemLibraryHandle = pal::dynamicloading::dlOpen( + systemLibraryPath.c_str(), + pal::dynamicloading::DL_NOW | pal::dynamicloading::DL_LOCAL); + if (nullptr == systemLibraryHandle) { + // MLLM_LOG_ERROR_LEGACY("Unable to load system library. + // pal::dynamicloading::dlError(): %s", + // pal::dynamicloading::dlError()); + return StatusCode::FAIL_LOAD_SYSTEM_LIB; + } + QnnSystemInterfaceGetProvidersFn_t getSystemInterfaceProviders{nullptr}; + getSystemInterfaceProviders = + resolveSymbol( + systemLibraryHandle, "QnnSystemInterface_getProviders"); + if (nullptr == getSystemInterfaceProviders) { + return StatusCode::FAIL_SYM_FUNCTION; + } + QnnSystemInterface_t **systemInterfaceProviders{nullptr}; + uint32_t numProviders{0}; + if (QNN_SUCCESS != + getSystemInterfaceProviders( + (const QnnSystemInterface_t ***)&systemInterfaceProviders, + &numProviders)) { + // MLLM_LOG_ERROR_LEGACY("Failed to get system interface providers."); + return StatusCode::FAIL_GET_INTERFACE_PROVIDERS; + } + if (nullptr == systemInterfaceProviders) { + // MLLM_LOG_ERROR_LEGACY("Failed to get system interface providers: null + // interface providers received."); + return StatusCode::FAIL_GET_INTERFACE_PROVIDERS; + } + if (0 == numProviders) { + // MLLM_LOG_ERROR_LEGACY("Failed to get interface providers: 0 interface + // providers."); + return StatusCode::FAIL_GET_INTERFACE_PROVIDERS; + } + bool foundValidSystemInterface{false}; + for (size_t pIdx = 0; pIdx < numProviders; pIdx++) { + if (QNN_SYSTEM_API_VERSION_MAJOR == + systemInterfaceProviders[pIdx]->systemApiVersion.major && + QNN_SYSTEM_API_VERSION_MINOR <= + systemInterfaceProviders[pIdx]->systemApiVersion.minor) { + foundValidSystemInterface = true; + qnnFunctionPointers->qnnSystemInterface = + systemInterfaceProviders[pIdx]->QNN_SYSTEM_INTERFACE_VER_NAME; + break; + } + } + if (!foundValidSystemInterface) { + // MLLM_LOG_ERROR_LEGACY("Unable to find a valid system interface."); + return StatusCode::FAIL_GET_INTERFACE_PROVIDERS; + } + QNN_FUNCTION_EXIT_LOG; + return StatusCode::SUCCESS; +} diff --git a/nntrainer/npu/qnn/Utils/DynamicLoadUtil.hpp b/nntrainer/npu/qnn/Utils/DynamicLoadUtil.hpp new file mode 100644 index 000000000..b8fd2493c --- /dev/null +++ b/nntrainer/npu/qnn/Utils/DynamicLoadUtil.hpp @@ -0,0 +1,36 @@ +//============================================================================== +// +// Copyright (c) 2019-2022 Qualcomm Technologies, Inc. +// All Rights Reserved. +// Confidential and Proprietary - Qualcomm Technologies, Inc. +// +//============================================================================== + +#pragma once + +#include "QNN.hpp" + +namespace qnn { +namespace tools { +namespace dynamicloadutil { +enum class StatusCode { + SUCCESS, + FAILURE, + FAIL_LOAD_BACKEND, + FAIL_LOAD_MODEL, + FAIL_SYM_FUNCTION, + FAIL_GET_INTERFACE_PROVIDERS, + FAIL_LOAD_SYSTEM_LIB, +}; + +StatusCode +getQnnFunctionPointers(std::string backendPath, std::string modelPath, + sample_app::QnnFunctionPointers *qnnFunctionPointers, + void **backendHandle, bool loadModelLib, + void **modelHandleRtn); +StatusCode getQnnSystemFunctionPointers( + std::string systemLibraryPath, + sample_app::QnnFunctionPointers *qnnFunctionPointers); +} // namespace dynamicloadutil +} // namespace tools +} // namespace qnn diff --git a/nntrainer/npu/qnn/Utils/IOTensor.cpp b/nntrainer/npu/qnn/Utils/IOTensor.cpp new file mode 100644 index 000000000..49f346981 --- /dev/null +++ b/nntrainer/npu/qnn/Utils/IOTensor.cpp @@ -0,0 +1,972 @@ +//============================================================================== +// +// Copyright (c) 2020-2022 Qualcomm Technologies, Inc. +// All Rights Reserved. +// Confidential and Proprietary - Qualcomm Technologies, Inc. +// +//============================================================================== +#include +#include +#include +#include +#include +#include + +#include "DataUtil.hpp" +#include "IOTensor.hpp" +// #include "Log.h" +#include "Logger.hpp" +#include "PAL/Directory.hpp" +#include "PAL/FileOp.hpp" +#include "PAL/Path.hpp" +#include "PAL/StringOp.hpp" +#include "QnnTypeMacros.hpp" +#include "QnnTypes.h" + +#define __fp16 _Float16 + +using namespace qnn; +using namespace qnn::tools; +using namespace qnn_wrapper_api; + +// Helper method to read data from files to a buffer. +iotensor::StatusCode iotensor::IOTensor::readDataAndAllocateBuffer( + std::queue &filePaths, std::vector dims, + Qnn_DataType_t dataType, uint8_t **bufferToCopy) { + StatusCode returnStatus = StatusCode::SUCCESS; + *bufferToCopy = nullptr; + returnStatus = allocateBuffer(bufferToCopy, dims, dataType); + if (StatusCode::SUCCESS == returnStatus) { + datautil::StatusCode status; + std::tie(status, m_numFilesPopulated, m_batchSize) = + datautil::readBatchDataAndUpdateQueue( + filePaths, dims, dataType, reinterpret_cast(*bufferToCopy)); + if (datautil::StatusCode::SUCCESS != status) { + QNN_DEBUG("Failure in datautil::readBatchDataAndUpdateQueue"); + returnStatus = StatusCode::FAILURE; + } + } + if (StatusCode::SUCCESS != returnStatus) { + if (nullptr != *bufferToCopy) { + free(*bufferToCopy); + *bufferToCopy = nullptr; + } + } + return returnStatus; +} + +// Helper method to copy a float buffer, quantize it, and copy +// it to a tensor (Qnn_Tensor_t) buffer. +iotensor::StatusCode +iotensor::IOTensor::copyFromFloatToNative(float *floatBuffer, + Qnn_Tensor_t *tensor) { + if (nullptr == floatBuffer || nullptr == tensor) { + // MLLM_LOG_ERROR_LEGACY("copyFromFloatToNative(): received a nullptr"); + return StatusCode::FAILURE; + } + + StatusCode returnStatus = StatusCode::SUCCESS; + std::vector dims; + fillDims(dims, QNN_TENSOR_GET_DIMENSIONS(tensor), + QNN_TENSOR_GET_RANK(tensor)); + + switch (QNN_TENSOR_GET_DATA_TYPE(tensor)) { + case QNN_DATATYPE_UFIXED_POINT_8: + datautil::floatToTfN( + static_cast(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), + floatBuffer, + QNN_TENSOR_GET_QUANT_PARAMS(tensor).scaleOffsetEncoding.offset, + QNN_TENSOR_GET_QUANT_PARAMS(tensor).scaleOffsetEncoding.scale, + datautil::calculateElementCount(dims)); + break; + + case QNN_DATATYPE_UFIXED_POINT_16: + datautil::floatToTfN( + static_cast(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), + floatBuffer, + QNN_TENSOR_GET_QUANT_PARAMS(tensor).scaleOffsetEncoding.offset, + QNN_TENSOR_GET_QUANT_PARAMS(tensor).scaleOffsetEncoding.scale, + datautil::calculateElementCount(dims)); + break; + + case QNN_DATATYPE_FLOAT_16: + if (datautil::StatusCode::SUCCESS != + datautil::castFromFloat<__fp16>( + static_cast<__fp16 *>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), + floatBuffer, datautil::calculateElementCount(dims))) { + // MLLM_LOG_ERROR_LEGACY("failure in castFromFloat<__fp16>"); + returnStatus = StatusCode::FAILURE; + } + break; + + case QNN_DATATYPE_UINT_8: + if (datautil::StatusCode::SUCCESS != + datautil::castFromFloat( + static_cast(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), + floatBuffer, datautil::calculateElementCount(dims))) { + // MLLM_LOG_ERROR_LEGACY("failure in castFromFloat"); + returnStatus = StatusCode::FAILURE; + } + break; + + case QNN_DATATYPE_UINT_16: + if (datautil::StatusCode::SUCCESS != + datautil::castFromFloat( + static_cast(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), + floatBuffer, datautil::calculateElementCount(dims))) { + // MLLM_LOG_ERROR_LEGACY("failure in castFromFloat"); + returnStatus = StatusCode::FAILURE; + } + break; + + case QNN_DATATYPE_UINT_32: + if (datautil::StatusCode::SUCCESS != + datautil::castFromFloat( + static_cast(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), + floatBuffer, datautil::calculateElementCount(dims))) { + // MLLM_LOG_ERROR_LEGACY("failure in castFromFloat"); + returnStatus = StatusCode::FAILURE; + } + break; + + case QNN_DATATYPE_INT_8: + if (datautil::StatusCode::SUCCESS != + datautil::castFromFloat( + static_cast(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), + floatBuffer, datautil::calculateElementCount(dims))) { + // MLLM_LOG_ERROR_LEGACY("failure in castFromFloat"); + returnStatus = StatusCode::FAILURE; + } + break; + + case QNN_DATATYPE_INT_16: + if (datautil::StatusCode::SUCCESS != + datautil::castFromFloat( + static_cast(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), + floatBuffer, datautil::calculateElementCount(dims))) { + // MLLM_LOG_ERROR_LEGACY("failure in castFromFloat"); + returnStatus = StatusCode::FAILURE; + } + break; + + case QNN_DATATYPE_INT_32: + if (datautil::StatusCode::SUCCESS != + datautil::castFromFloat( + static_cast(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), + floatBuffer, datautil::calculateElementCount(dims))) { + // MLLM_LOG_ERROR_LEGACY("failure in castFromFloat"); + returnStatus = StatusCode::FAILURE; + } + break; + + case QNN_DATATYPE_BOOL_8: + if (datautil::StatusCode::SUCCESS != + datautil::castFromFloat( + static_cast(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), + floatBuffer, datautil::calculateElementCount(dims))) { + // MLLM_LOG_ERROR_LEGACY("failure in castFromFloat"); + returnStatus = StatusCode::FAILURE; + } + break; + + default: + // MLLM_LOG_ERROR_LEGACY("Datatype not supported yet!"); + returnStatus = StatusCode::FAILURE; + break; + } + return returnStatus; +} + +// Helper method to populate an input tensor in the graph during execution. +// It relies on reading data from files provided during app creation. +iotensor::StatusCode +iotensor::IOTensor::populateInputTensor(std::queue &filePaths, + Qnn_Tensor_t *input, + iotensor::InputDataType inputDataType) { + if (nullptr == input) { + // MLLM_LOG_ERROR_LEGACY("input is nullptr"); + return StatusCode::FAILURE; + } + + auto returnStatus = StatusCode::SUCCESS; + std::vector dims; + fillDims(dims, QNN_TENSOR_GET_DIMENSIONS(input), QNN_TENSOR_GET_RANK(input)); + + if (inputDataType == InputDataType::FLOAT && + QNN_TENSOR_GET_DATA_TYPE(input) != QNN_DATATYPE_FLOAT_32) { + uint8_t *fileToBuffer = nullptr; + returnStatus = readDataAndAllocateBuffer( + filePaths, dims, QNN_DATATYPE_FLOAT_32, &fileToBuffer); + if (StatusCode::SUCCESS == returnStatus) { + QNN_DEBUG("readDataFromFileToBuffer successful"); + returnStatus = + copyFromFloatToNative(reinterpret_cast(fileToBuffer), input); + } + if (nullptr != fileToBuffer) { + free(fileToBuffer); + fileToBuffer = nullptr; + } + } else { + datautil::StatusCode status; + std::tie(status, m_numFilesPopulated, m_batchSize) = + datautil::readBatchDataAndUpdateQueue( + filePaths, dims, QNN_TENSOR_GET_DATA_TYPE(input), + static_cast(QNN_TENSOR_GET_CLIENT_BUF(input).data)); + if (datautil::StatusCode::SUCCESS != status) { + QNN_DEBUG("Failure in datautil::readBatchDataAndUpdateQueue"); + returnStatus = StatusCode::FAILURE; + } + } + return returnStatus; +} + +// Helper method to populate all input tensors during execution. +iotensor::StatusCode iotensor::IOTensor::populateInputTensors( + uint32_t graphIdx, std::vector> &filePathsQueue, + Qnn_Tensor_t *inputs, qnn_wrapper_api::GraphInfo_t graphInfo, + iotensor::InputDataType inputDataType) { + QNN_DEBUG("populateInputTensors() graphIndx %d", graphIdx); + if (nullptr == inputs) { + // MLLM_LOG_ERROR_LEGACY("inputs is nullptr"); + return StatusCode::FAILURE; + } + auto inputCount = graphInfo.numInputTensors; + if (filePathsQueue.size() != inputCount) { + // MLLM_LOG_ERROR_LEGACY( + // "Incorrect amount of Input files for graphIdx: %d. Expected: %d, " + // "received: %d", + // graphIdx, + // inputCount, + // filePathsQueue.size()); + return StatusCode::FAILURE; + } + + for (size_t inputIdx = 0; inputIdx < inputCount; inputIdx++) { + if (StatusCode::SUCCESS != populateInputTensor(filePathsQueue[inputIdx], + &(inputs[inputIdx]), + inputDataType)) { + QNN_DEBUG("populateInputTensor() failure for input: %d", inputIdx); + return StatusCode::FAILURE; + } + } + return StatusCode::SUCCESS; +} + +// Helper method to populate an input tensor in the graph during execution. +// It relies on reading data from buffer provided during executeGraph() call. +iotensor::StatusCode +iotensor::IOTensor::populateInputTensor(uint8_t *buffer, Qnn_Tensor_t *input, + iotensor::InputDataType inputDataType) { + if (nullptr == input) { + // MLLM_LOG_ERROR_LEGACY("input is nullptr"); + return StatusCode::FAILURE; + } + std::vector dims; + fillDims(dims, QNN_TENSOR_GET_DIMENSIONS(input), QNN_TENSOR_GET_RANK(input)); + if (inputDataType == InputDataType::FLOAT && + QNN_TENSOR_GET_DATA_TYPE(input) != QNN_DATATYPE_FLOAT_32) { + QNN_DEBUG("Received FLOAT input, but model needs non-float input"); + if (StatusCode::SUCCESS != + copyFromFloatToNative(reinterpret_cast(buffer), input)) { + QNN_DEBUG("copyFromFloatToNative failure"); + return StatusCode::FAILURE; + } + } else { + size_t length; + datautil::StatusCode returnStatus; + std::tie(returnStatus, length) = + datautil::calculateLength(dims, QNN_TENSOR_GET_DATA_TYPE(input)); + if (datautil::StatusCode::SUCCESS != returnStatus) { + return StatusCode::FAILURE; + } + pal::StringOp::memscpy( + reinterpret_cast(QNN_TENSOR_GET_CLIENT_BUF(input).data), + length, buffer, length); + } + return StatusCode::SUCCESS; +} + +// Helper method to populate all input tensors. +iotensor::StatusCode iotensor::IOTensor::populateInputTensors( + uint32_t graphIdx, std::vector inputBuffers, Qnn_Tensor_t *inputs, + qnn_wrapper_api::GraphInfo_t graphInfo, + iotensor::InputDataType inputDataType) { + if (nullptr == inputs) { + // MLLM_LOG_ERROR_LEGACY("inputs is nullptr"); + return StatusCode::FAILURE; + } + auto inputCount = graphInfo.numInputTensors; + if (inputBuffers.size() != inputCount) { + // MLLM_LOG_ERROR_LEGACY("Incorrect amount of Input Buffers for graphIdx: + // %d. Expected: %d, received: %d", + // graphIdx, + // inputCount, + // inputBuffers.size()); + return StatusCode::FAILURE; + } + for (size_t inputIdx = 0; inputIdx < inputCount; inputIdx++) { + if (StatusCode::SUCCESS != populateInputTensor(inputBuffers[inputIdx], + &(inputs[inputIdx]), + inputDataType)) { + QNN_DEBUG("populateInputTensor() failure for input: %d", inputIdx); + return StatusCode::FAILURE; + } + } + return StatusCode::SUCCESS; +} + +// Setup details for Qnn_Tensor_t for execution +// based on information in Qnn_TensorWrapper_t provided by model.so. +iotensor::StatusCode +iotensor::IOTensor::setupTensors(Qnn_Tensor_t **tensors, uint32_t tensorCount, + Qnn_Tensor_t *tensorWrappers) { + if (nullptr == tensorWrappers) { + // MLLM_LOG_ERROR_LEGACY("tensorWrappers is nullptr"); + return StatusCode::FAILURE; + } + if (0 == tensorCount) { + QNN_INFO("tensor count is 0. Nothing to setup."); + return StatusCode::SUCCESS; + } + auto returnStatus = StatusCode::SUCCESS; + *tensors = (Qnn_Tensor_t *)calloc(1, tensorCount * sizeof(Qnn_Tensor_t)); + if (nullptr == *tensors) { + // MLLM_LOG_ERROR_LEGACY("mem alloc failed for *tensors"); + returnStatus = StatusCode::FAILURE; + return returnStatus; + } + for (size_t tensorIdx = 0; tensorIdx < tensorCount; tensorIdx++) { + Qnn_Tensor_t wrapperTensor = tensorWrappers[tensorIdx]; + std::vector dims; + fillDims(dims, QNN_TENSOR_GET_DIMENSIONS(wrapperTensor), + QNN_TENSOR_GET_RANK(wrapperTensor)); + if (StatusCode::SUCCESS == returnStatus) { + QNN_DEBUG("allocateBuffer successful"); + (*tensors)[tensorIdx] = QNN_TENSOR_INIT; + returnStatus = (sample_app::deepCopyQnnTensorInfo( + ((*tensors) + tensorIdx), &wrapperTensor) == true + ? StatusCode::SUCCESS + : StatusCode::FAILURE); + } + if (StatusCode::SUCCESS == returnStatus) { + QNN_DEBUG("deepCopyQnnTensorInfo successful"); + QNN_TENSOR_SET_MEM_TYPE(((*tensors) + tensorIdx), QNN_TENSORMEMTYPE_RAW); + } + Qnn_ClientBuffer_t clientBuffer = QNN_CLIENT_BUFFER_INIT; + returnStatus = + allocateBuffer(reinterpret_cast(&clientBuffer.data), dims, + QNN_TENSOR_GET_DATA_TYPE((*tensors) + tensorIdx)); + datautil::StatusCode datautilStatus{datautil::StatusCode::SUCCESS}; + size_t length{0}; + std::tie(datautilStatus, length) = datautil::calculateLength( + dims, QNN_TENSOR_GET_DATA_TYPE((*tensors) + tensorIdx)); + if (datautilStatus != datautil::StatusCode::SUCCESS) { + returnStatus = StatusCode::FAILURE; + } + clientBuffer.dataSize = length; + QNN_TENSOR_SET_CLIENT_BUF(((*tensors) + tensorIdx), clientBuffer); + if (StatusCode::SUCCESS != returnStatus) { + // MLLM_LOG_ERROR_LEGACY("Failure in setupTensors, cleaning up + // resources"); + if (nullptr != (QNN_TENSOR_GET_CLIENT_BUF((*tensors) + tensorIdx)).data) { + free(QNN_TENSOR_GET_CLIENT_BUF((*tensors) + tensorIdx).data); + } + tearDownTensors(*tensors, tensorIdx); + *tensors = nullptr; + returnStatus = StatusCode::FAILURE; + // MLLM_LOG_ERROR_LEGACY("Failure in setupTensors, done cleaning up + // resources"); + return returnStatus; + } + } + return returnStatus; +} + +iotensor::StatusCode iotensor::IOTensor::setupTensorsNoCopy( + Qnn_Tensor_t **tensors, uint32_t tensorCount, Qnn_Tensor_t *tensorWrappers) { + if (nullptr == tensorWrappers) { + // MLLM_LOG_ERROR_LEGACY("tensorWrappers is nullptr"); + return StatusCode::FAILURE; + } + if (0 == tensorCount) { + QNN_INFO("tensor count is 0. Nothing to setup."); + return StatusCode::SUCCESS; + } + auto returnStatus = StatusCode::SUCCESS; + *tensors = (Qnn_Tensor_t *)calloc(1, tensorCount * sizeof(Qnn_Tensor_t)); + if (nullptr == *tensors) { + // MLLM_LOG_ERROR_LEGACY("mem alloc failed for *tensors"); + returnStatus = StatusCode::FAILURE; + return returnStatus; + } + for (size_t tensorIdx = 0; tensorIdx < tensorCount; tensorIdx++) { + Qnn_Tensor_t wrapperTensor = tensorWrappers[tensorIdx]; + std::vector dims; + fillDims(dims, QNN_TENSOR_GET_DIMENSIONS(wrapperTensor), + QNN_TENSOR_GET_RANK(wrapperTensor)); + if (StatusCode::SUCCESS == returnStatus) { + QNN_DEBUG("allocateBuffer successful"); + (*tensors)[tensorIdx] = QNN_TENSOR_INIT; + returnStatus = (sample_app::deepCopyQnnTensorInfo( + ((*tensors) + tensorIdx), &wrapperTensor) == true + ? StatusCode::SUCCESS + : StatusCode::FAILURE); + } + if (StatusCode::SUCCESS == returnStatus) { + QNN_DEBUG("deepCopyQnnTensorInfo successful"); + QNN_TENSOR_SET_MEM_TYPE(((*tensors) + tensorIdx), + QNN_TENSORMEMTYPE_MEMHANDLE); + } + } + return returnStatus; +} + +// Setup details for all input and output tensors for graph execution. +iotensor::StatusCode iotensor::IOTensor::setupInputAndOutputTensors( + Qnn_Tensor_t **inputs, Qnn_Tensor_t **outputs, + qnn_wrapper_api::GraphInfo_t graphInfo) { + auto returnStatus = StatusCode::SUCCESS; +#ifdef QNN_ARM + if (StatusCode::SUCCESS != setupTensorsNoCopy(inputs, + graphInfo.numInputTensors, + (graphInfo.inputTensors))) { + // MLLM_LOG_ERROR_LEGACY("Failure in setting up input tensors"); + returnStatus = StatusCode::FAILURE; + } + if (StatusCode::SUCCESS != setupTensorsNoCopy(outputs, + graphInfo.numOutputTensors, + (graphInfo.outputTensors))) { + // MLLM_LOG_ERROR_LEGACY("Failure in setting up output tensors"); + returnStatus = StatusCode::FAILURE; + } +#else + if (StatusCode::SUCCESS != setupTensors(inputs, graphInfo.numInputTensors, + (graphInfo.inputTensors))) { + // MLLM_LOG_ERROR_LEGACY("Failure in setting up input tensors"); + returnStatus = StatusCode::FAILURE; + } + if (StatusCode::SUCCESS != setupTensors(outputs, graphInfo.numOutputTensors, + (graphInfo.outputTensors))) { + // MLLM_LOG_ERROR_LEGACY("Failure in setting up output tensors"); + returnStatus = StatusCode::FAILURE; + } +#endif + if (StatusCode::SUCCESS != returnStatus) { + // MLLM_LOG_ERROR_LEGACY("Failure in setupInputAndOutputTensors, cleaning up + // resources"); + if (nullptr != *inputs) { + QNN_DEBUG("cleaning up input tensors"); + tearDownTensors(*inputs, graphInfo.numInputTensors); + *inputs = nullptr; + } + if (nullptr != *outputs) { + QNN_DEBUG("cleaning up output tensors"); + tearDownTensors(*outputs, graphInfo.numOutputTensors); + *outputs = nullptr; + } + // MLLM_LOG_ERROR_LEGACY("Failure in setupInputAndOutputTensors, done + // cleaning up resources"); + } + return returnStatus; +} + +// Clean up all tensors related data after execution. +iotensor::StatusCode iotensor::IOTensor::tearDownTensors(Qnn_Tensor_t *tensors, + uint32_t tensorCount) { + for (size_t tensorIdx = 0; tensorIdx < tensorCount; tensorIdx++) { + QNN_DEBUG("freeing resources for tensor: %d", tensorIdx); + if (nullptr != QNN_TENSOR_GET_DIMENSIONS(tensors[tensorIdx])) { + QNN_DEBUG("freeing dimensions"); + free(QNN_TENSOR_GET_DIMENSIONS(tensors[tensorIdx])); + } + if (nullptr != QNN_TENSOR_GET_CLIENT_BUF(tensors[tensorIdx]).data) { + QNN_DEBUG("freeing clientBuf.data"); + free(QNN_TENSOR_GET_CLIENT_BUF(tensors[tensorIdx]).data); + } + } + free(tensors); + return StatusCode::SUCCESS; +} + +// Clean up all input and output tensors after execution. +iotensor::StatusCode iotensor::IOTensor::tearDownInputAndOutputTensors( + Qnn_Tensor_t *inputs, Qnn_Tensor_t *outputs, size_t numInputTensors, + size_t numOutputTensors) { + if (nullptr != inputs) { + QNN_INFO("cleaning up resources for input tensors"); + tearDownTensors(inputs, numInputTensors); + inputs = nullptr; + } + if (nullptr != outputs) { + QNN_INFO("cleaning up resources for output tensors"); + tearDownTensors(outputs, numOutputTensors); + outputs = nullptr; + } + return StatusCode::SUCCESS; +} + +// Helper method to allocate a buffer. +iotensor::StatusCode +iotensor::IOTensor::allocateBuffer(uint8_t **buffer, std::vector dims, + Qnn_DataType_t dataType) { + size_t elementCount = datautil::calculateElementCount(dims); + auto returnStatus = StatusCode::SUCCESS; + switch (dataType) { + case QNN_DATATYPE_FLOAT_32: + QNN_DEBUG("allocating float buffer"); + returnStatus = + allocateBuffer(reinterpret_cast(buffer), elementCount); + break; + + case QNN_DATATYPE_FLOAT_16: + QNN_DEBUG("allocating fp16 buffer"); + returnStatus = + allocateBuffer<__fp16>(reinterpret_cast<__fp16 **>(buffer), elementCount); + break; + + case QNN_DATATYPE_UINT_8: + case QNN_DATATYPE_UFIXED_POINT_8: + QNN_DEBUG("allocating uint8_t buffer"); + returnStatus = allocateBuffer(reinterpret_cast(buffer), + elementCount); + break; + + case QNN_DATATYPE_UINT_16: + case QNN_DATATYPE_UFIXED_POINT_16: + QNN_DEBUG("allocating uint16_t buffer"); + returnStatus = allocateBuffer( + reinterpret_cast(buffer), elementCount); + break; + + case QNN_DATATYPE_UINT_32: + QNN_DEBUG("allocating uint32_t buffer"); + returnStatus = allocateBuffer( + reinterpret_cast(buffer), elementCount); + break; + + case QNN_DATATYPE_INT_8: + QNN_DEBUG("allocating int8_t buffer"); + returnStatus = + allocateBuffer(reinterpret_cast(buffer), elementCount); + break; + + case QNN_DATATYPE_INT_16: + QNN_DEBUG("allocating int16_t buffer"); + returnStatus = allocateBuffer(reinterpret_cast(buffer), + elementCount); + break; + + case QNN_DATATYPE_INT_32: + QNN_DEBUG("allocating int32_t buffer"); + returnStatus = allocateBuffer(reinterpret_cast(buffer), + elementCount); + break; + + case QNN_DATATYPE_BOOL_8: + QNN_DEBUG("allocating bool buffer"); + returnStatus = allocateBuffer(reinterpret_cast(buffer), + elementCount); + break; + + default: + // MLLM_LOG_ERROR_LEGACY("Datatype not supported yet!"); + returnStatus = StatusCode::FAILURE; + break; + } + return returnStatus; +} + +// Helper method to allocate a buffer. +template +iotensor::StatusCode iotensor::IOTensor::allocateBuffer(T **buffer, + size_t &elementCount) { + QNN_DEBUG("ElementCount: %d, sizeof(T): %d, total size: %d", elementCount, + sizeof(T), elementCount * sizeof(T)); + *buffer = (T *)malloc(elementCount * sizeof(T)); + if (nullptr == *buffer) { + // MLLM_LOG_ERROR_LEGACY("mem alloc failed for *buffer"); + return StatusCode::FAILURE; + } + return StatusCode::SUCCESS; +} + +// Convert data to float or de-quantization. This is used when +// user requests for float output and the model produces +// non-float output. +iotensor::StatusCode iotensor::IOTensor::convertToFloat(float **out, + Qnn_Tensor_t *tensor) { + if (nullptr == tensor) { + // MLLM_LOG_ERROR_LEGACY("tensors is nullptr"); + return StatusCode::FAILURE; + } + std::vector dims; + fillDims(dims, QNN_TENSOR_GET_DIMENSIONS(tensor), + QNN_TENSOR_GET_RANK(tensor)); + auto returnStatus = StatusCode::SUCCESS; + size_t elementCount = datautil::calculateElementCount(dims); + returnStatus = allocateBuffer(out, elementCount); + if (StatusCode::SUCCESS != returnStatus) { + // MLLM_LOG_ERROR_LEGACY("failure in allocateBuffer"); + return returnStatus; + } + switch (QNN_TENSOR_GET_DATA_TYPE(tensor)) { + case QNN_DATATYPE_UFIXED_POINT_8: + if (datautil::StatusCode::SUCCESS != + datautil::tfNToFloat( + *out, + reinterpret_cast(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), + QNN_TENSOR_GET_QUANT_PARAMS(tensor).scaleOffsetEncoding.offset, + QNN_TENSOR_GET_QUANT_PARAMS(tensor).scaleOffsetEncoding.scale, + elementCount)) { + // MLLM_LOG_ERROR_LEGACY("failure in tfNToFloat"); + returnStatus = StatusCode::FAILURE; + } + break; + + case QNN_DATATYPE_UFIXED_POINT_16: + if (datautil::StatusCode::SUCCESS != + datautil::tfNToFloat( + *out, + reinterpret_cast(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), + QNN_TENSOR_GET_QUANT_PARAMS(tensor).scaleOffsetEncoding.offset, + QNN_TENSOR_GET_QUANT_PARAMS(tensor).scaleOffsetEncoding.scale, + elementCount)) { + // MLLM_LOG_ERROR_LEGACY("failure in tfNToFloat"); + returnStatus = StatusCode::FAILURE; + } + break; + + case QNN_DATATYPE_FLOAT_16: + if (datautil::StatusCode::SUCCESS != + datautil::castToFloat<__fp16>( + *out, + reinterpret_cast<__fp16 *>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), + elementCount)) { + // MLLM_LOG_ERROR_LEGACY("failure in castToFloat<__fp16>"); + returnStatus = StatusCode::FAILURE; + } + break; + + case QNN_DATATYPE_UINT_8: + if (datautil::StatusCode::SUCCESS != + datautil::castToFloat( + *out, + reinterpret_cast(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), + elementCount)) { + // MLLM_LOG_ERROR_LEGACY("failure in castToFloat"); + returnStatus = StatusCode::FAILURE; + } + break; + + case QNN_DATATYPE_UINT_16: + if (datautil::StatusCode::SUCCESS != + datautil::castToFloat( + *out, + reinterpret_cast(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), + elementCount)) { + // MLLM_LOG_ERROR_LEGACY("failure in castToFloat"); + returnStatus = StatusCode::FAILURE; + } + break; + + case QNN_DATATYPE_UINT_32: + if (datautil::StatusCode::SUCCESS != + datautil::castToFloat( + *out, + reinterpret_cast(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), + elementCount)) { + // MLLM_LOG_ERROR_LEGACY("failure in castToFloat"); + returnStatus = StatusCode::FAILURE; + } + break; + + case QNN_DATATYPE_INT_8: + if (datautil::StatusCode::SUCCESS != + datautil::castToFloat( + *out, + reinterpret_cast(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), + elementCount)) { + // MLLM_LOG_ERROR_LEGACY("failure in castToFloat"); + returnStatus = StatusCode::FAILURE; + } + break; + + case QNN_DATATYPE_INT_16: + if (datautil::StatusCode::SUCCESS != + datautil::castToFloat( + *out, + reinterpret_cast(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), + elementCount)) { + // MLLM_LOG_ERROR_LEGACY("failure in castToFloat"); + returnStatus = StatusCode::FAILURE; + } + break; + + case QNN_DATATYPE_INT_32: + if (datautil::StatusCode::SUCCESS != + datautil::castToFloat( + *out, + reinterpret_cast(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), + elementCount)) { + // MLLM_LOG_ERROR_LEGACY("failure in castToFloat"); + returnStatus = StatusCode::FAILURE; + } + break; + + case QNN_DATATYPE_BOOL_8: + if (datautil::StatusCode::SUCCESS != + datautil::castToFloat( + *out, + reinterpret_cast(QNN_TENSOR_GET_CLIENT_BUF(tensor).data), + elementCount)) { + // MLLM_LOG_ERROR_LEGACY("failure in castToFloat"); + returnStatus = StatusCode::FAILURE; + } + break; + + default: + // MLLM_LOG_ERROR_LEGACY("Datatype not supported yet!"); + returnStatus = StatusCode::FAILURE; + break; + } + if (StatusCode::SUCCESS != returnStatus) { + QNN_DEBUG("freeing *out"); + if (*out != nullptr) { + free(*out); + *out = nullptr; + } + } + return returnStatus; +} + +// Helper method to convert Output tensors to float and write them +// out to files. +iotensor::StatusCode iotensor::IOTensor::convertAndWriteOutputTensorInFloat( + Qnn_Tensor_t *output, std::vector outputPaths, + std::string fileName) { + if (nullptr == output) { + // MLLM_LOG_ERROR_LEGACY("output is nullptr"); + return StatusCode::FAILURE; + } + + auto returnStatus = StatusCode::SUCCESS; + std::vector dims; + fillDims(dims, QNN_TENSOR_GET_DIMENSIONS(output), + QNN_TENSOR_GET_RANK(output)); + float *floatBuffer = nullptr; + returnStatus = convertToFloat(&floatBuffer, output); + if (StatusCode::SUCCESS != returnStatus) { + // MLLM_LOG_ERROR_LEGACY("failure in convertToFloat"); + return StatusCode::FAILURE; + } + uint8_t *bufferToWrite = reinterpret_cast(floatBuffer); + if (datautil::StatusCode::SUCCESS != + datautil::writeBatchDataToFile(outputPaths, fileName, dims, + QNN_DATATYPE_FLOAT_32, bufferToWrite, + m_batchSize)) { + // MLLM_LOG_ERROR_LEGACY("failure in writeBatchDataToFile"); + returnStatus = StatusCode::FAILURE; + } + if (nullptr != floatBuffer) { + QNN_DEBUG("freeing floatBuffer"); + free(floatBuffer); + floatBuffer = nullptr; + } + return returnStatus; +} + +// Helper method to write out output. There is no de-quantization here. +// Just write output as is to files. +iotensor::StatusCode +iotensor::IOTensor::writeOutputTensor(Qnn_Tensor_t *output, + std::vector outputPaths, + std::string fileName) { + if (nullptr == output) { + // MLLM_LOG_ERROR_LEGACY("output is nullptr"); + return StatusCode::FAILURE; + } + auto returnStatus = StatusCode::SUCCESS; + std::vector dims; + fillDims(dims, QNN_TENSOR_GET_DIMENSIONS(output), + QNN_TENSOR_GET_RANK(output)); + uint8_t *bufferToWrite = + reinterpret_cast(QNN_TENSOR_GET_CLIENT_BUF(output).data); + if (datautil::StatusCode::SUCCESS != + datautil::writeBatchDataToFile(outputPaths, fileName, dims, + QNN_TENSOR_GET_DATA_TYPE(output), + bufferToWrite, m_batchSize)) { + // MLLM_LOG_ERROR_LEGACY("failure in writeBatchDataToFile"); + returnStatus = StatusCode::FAILURE; + } + return returnStatus; +} + +// Helper method to write out output. There is no de-quantization here. +// Just write output as is to files. +iotensor::StatusCode +iotensor::IOTensor::writeOutputTensor(Qnn_Tensor_t *output, + uint8_t *output_buffer) { + if (nullptr == output) { + // MLLM_LOG_ERROR_LEGACY("output is nullptr"); + return StatusCode::FAILURE; + } + auto returnStatus = StatusCode::SUCCESS; + std::vector dims; + fillDims(dims, QNN_TENSOR_GET_DIMENSIONS(output), + QNN_TENSOR_GET_RANK(output)); + float *floatBuffer = nullptr; + returnStatus = convertToFloat(&floatBuffer, output); + if (StatusCode::SUCCESS != returnStatus) { + // MLLM_LOG_ERROR_LEGACY("failure in convertToFloat"); + return StatusCode::FAILURE; + } + uint8_t *bufferToWrite = reinterpret_cast(floatBuffer); + + datautil::StatusCode err{datautil::StatusCode::SUCCESS}; + size_t length{0}; + std::tie(err, length) = + datautil::calculateLength(dims, QNN_DATATYPE_FLOAT_32); + if (datautil::StatusCode::SUCCESS != err) { + return StatusCode::FAILURE; + } + + memcpy(output_buffer, bufferToWrite, length); + + return returnStatus; +} + +// Write out all output tensors to files. If output_data_type is float, +// then all outputs will be raw floats regardless of what the model outputs. +// If the output_data_type is native, then output is written as produced by the +// model. Also, for native option, a json with quantization parameters is +// written out. If output_data_type is float_and_native, both above are done. If +// the output in the graph is float, then output_data_type has no effect. +iotensor::StatusCode iotensor::IOTensor::writeOutputTensors( + uint32_t graphIdx, size_t startIdx, char *graphName, Qnn_Tensor_t *outputs, + uint32_t numOutputs, iotensor::OutputDataType outputDatatype, + uint32_t graphsCount, std::string outputPath) { + if (nullptr == outputs) { + // MLLM_LOG_ERROR_LEGACY("Received nullptr"); + return StatusCode::FAILURE; + } + if (graphsCount > 1) { + if (nullptr != graphName && strlen(graphName) > 0) { + outputPath += (pal::Path::getSeparator() + std::string(graphName)); + } else { + outputPath += (pal::Path::getSeparator() + std::string("Graph_") + + std::to_string(graphIdx)); + } + } + auto returnStatus = StatusCode::SUCCESS; + std::vector outputPaths; + for (size_t idx = 0; idx < m_numFilesPopulated; idx++) { + std::string output = + outputPath + (pal::Path::getSeparator() + std::string("Result_") + + std::to_string(startIdx + idx)); + outputPaths.push_back(output); + } + for (size_t outputIdx = 0; outputIdx < numOutputs; outputIdx++) { + QNN_DEBUG("Writing output for outputIdx: %d", outputIdx); + std::string outputFilePrefix; + if (nullptr != QNN_TENSOR_GET_NAME(outputs[outputIdx]) && + strlen(QNN_TENSOR_GET_NAME(outputs[outputIdx])) > 0) { + outputFilePrefix = std::string(QNN_TENSOR_GET_NAME(outputs[outputIdx])); + } else { + outputFilePrefix = std::string("Output_") + std::to_string(outputIdx); + } + auto outputFile = outputFilePrefix + std::string(".raw"); + auto outputFileNative = outputFilePrefix + std::string("_native.raw"); + if (QNN_TENSOR_GET_DATA_TYPE(outputs[outputIdx]) == QNN_DATATYPE_FLOAT_32) { + QNN_DEBUG("Writing in output->dataType == QNN_DATATYPE_FLOAT_32"); + returnStatus = + writeOutputTensor(&(outputs[outputIdx]), outputPaths, outputFile); + } else if (outputDatatype == OutputDataType::FLOAT_ONLY) { + QNN_DEBUG("Writing in output->dataType == OutputDataType::FLOAT_ONLY"); + returnStatus = convertAndWriteOutputTensorInFloat( + &(outputs[outputIdx]), outputPaths, outputFile); + } else if (outputDatatype == OutputDataType::NATIVE_ONLY) { + QNN_DEBUG("Writing in output->dataType == OutputDataType::NATIVE_ONLY"); + returnStatus = + writeOutputTensor(&(outputs[outputIdx]), outputPaths, outputFileNative); + } else if (outputDatatype == OutputDataType::FLOAT_AND_NATIVE) { + QNN_DEBUG( + "Writing in output->dataType == OutputDataType::FLOAT_AND_NATIVE"); + returnStatus = convertAndWriteOutputTensorInFloat( + &(outputs[outputIdx]), outputPaths, outputFile); + if (StatusCode::SUCCESS == returnStatus) { + returnStatus = writeOutputTensor(&(outputs[outputIdx]), outputPaths, + outputFileNative); + } + } + } + return returnStatus; +} + +// Helper method to allocate a buffer and copy data to it. +iotensor::StatusCode +iotensor::IOTensor::allocateAndCopyBuffer(uint8_t **buffer, + Qnn_Tensor_t *tensor) { + if (nullptr == tensor) { + return StatusCode::FAILURE; + } + std::vector dims; + fillDims(dims, QNN_TENSOR_GET_DIMENSIONS(tensor), + QNN_TENSOR_GET_RANK(tensor)); + datautil::StatusCode datautilStatus; + size_t length; + std::tie(datautilStatus, length) = + datautil::calculateLength(dims, QNN_TENSOR_GET_DATA_TYPE(tensor)); + if (datautilStatus != datautil::StatusCode::SUCCESS) { + return StatusCode::FAILURE; + } + if (StatusCode::SUCCESS != + allocateBuffer(buffer, dims, QNN_TENSOR_GET_DATA_TYPE(tensor))) { + // MLLM_LOG_ERROR_LEGACY("failure in allocateBuffer"); + return StatusCode::FAILURE; + } + pal::StringOp::memscpy(*buffer, length * sizeof(uint8_t), + QNN_TENSOR_GET_CLIENT_BUF(tensor).data, + length * sizeof(uint8_t)); + return StatusCode::SUCCESS; +} + +iotensor::StatusCode iotensor::IOTensor::fillDims(std::vector &dims, + uint32_t *inDimensions, + uint32_t rank) { + if (nullptr == inDimensions) { + // MLLM_LOG_ERROR_LEGACY("input dimensions is nullptr"); + return StatusCode::FAILURE; + } + for (size_t r = 0; r < rank; r++) { + dims.push_back(inDimensions[r]); + } + return StatusCode::SUCCESS; +} + +iotensor::OutputDataType +iotensor::parseOutputDataType(std::string dataTypeString) { + std::transform(dataTypeString.begin(), dataTypeString.end(), + dataTypeString.begin(), ::tolower); + OutputDataType parsedDataType = OutputDataType::INVALID; + if (dataTypeString == "float_only") { + parsedDataType = OutputDataType::FLOAT_ONLY; + } else if (dataTypeString == "native_only") { + parsedDataType = OutputDataType::NATIVE_ONLY; + } else if (dataTypeString == "float_and_native") { + parsedDataType = OutputDataType::FLOAT_AND_NATIVE; + } + return parsedDataType; +} + +iotensor::InputDataType +iotensor::parseInputDataType(std::string dataTypeString) { + std::transform(dataTypeString.begin(), dataTypeString.end(), + dataTypeString.begin(), ::tolower); + InputDataType parsedDataType = InputDataType::INVALID; + if (dataTypeString == "float") { + parsedDataType = InputDataType::FLOAT; + } else if (dataTypeString == "native") { + parsedDataType = InputDataType::NATIVE; + } + return parsedDataType; +} diff --git a/nntrainer/npu/qnn/Utils/IOTensor.hpp b/nntrainer/npu/qnn/Utils/IOTensor.hpp new file mode 100644 index 000000000..7d07f28ea --- /dev/null +++ b/nntrainer/npu/qnn/Utils/IOTensor.hpp @@ -0,0 +1,122 @@ +//============================================================================== +// +// Copyright (c) 2020, 2022 Qualcomm Technologies, Inc. +// All Rights Reserved. +// Confidential and Proprietary - Qualcomm Technologies, Inc. +// +//============================================================================== +#pragma once + +#include +#include + +#include "QnnBackend.h" +#include "QnnCommon.h" +#include "QnnContext.h" +#include "QnnGraph.h" +#include "QnnProperty.h" +#include "QnnSampleAppUtils.hpp" +#include "QnnTensor.h" +#include "QnnTypes.h" +#include "WrapperUtils/QnnWrapperUtils.hpp" + +namespace qnn { +namespace tools { +namespace iotensor { + +enum class StatusCode { SUCCESS, FAILURE }; +enum class OutputDataType { + FLOAT_ONLY, + NATIVE_ONLY, + FLOAT_AND_NATIVE, + INVALID +}; +enum class InputDataType { FLOAT, NATIVE, INVALID }; + +OutputDataType parseOutputDataType(std::string dataTypeString); +InputDataType parseInputDataType(std::string dataTypeString); + +class IOTensor { +public: + IOTensor() : m_batchSize(1), m_numFilesPopulated(0) {} + + StatusCode setupInputAndOutputTensors(Qnn_Tensor_t **inputs, + Qnn_Tensor_t **outputs, + qnn_wrapper_api::GraphInfo_t graphInfo); + + StatusCode writeOutputTensors(uint32_t graphIdx, size_t startIdx, + char *graphName, Qnn_Tensor_t *outputs, + uint32_t numOutputs, + OutputDataType outputDatatype, + uint32_t graphsCount, std::string outputPath); + + StatusCode populateInputTensors( + uint32_t graphIdx, std::vector> &filePathsQueue, + Qnn_Tensor_t *inputs, qnn_wrapper_api::GraphInfo_t graphInfo, + iotensor::InputDataType inputDataType); + + StatusCode populateInputTensors(uint32_t graphIdx, + std::vector inputBuffers, + Qnn_Tensor_t *inputs, + qnn_wrapper_api::GraphInfo_t graphInfo, + InputDataType inputDataType); + + StatusCode tearDownInputAndOutputTensors(Qnn_Tensor_t *inputs, + Qnn_Tensor_t *outputs, + size_t numInputTensors, + size_t numOutputTensors); + + StatusCode writeOutputTensor(Qnn_Tensor_t *output, uint8_t *output_buffer); + +private: + size_t m_batchSize; + size_t m_numFilesPopulated; + + StatusCode populateInputTensor(std::queue &filePaths, + Qnn_Tensor_t *input, + InputDataType inputDataType); + + StatusCode populateInputTensor(uint8_t *buffer, Qnn_Tensor_t *input, + InputDataType inputDataType); + + StatusCode readDataAndAllocateBuffer(std::queue &filePaths, + std::vector dims, + Qnn_DataType_t dataType, + uint8_t **bufferToCopy); + + template + StatusCode allocateBuffer(T **buffer, size_t &elementCount); + + StatusCode convertToFloat(float **out, Qnn_Tensor_t *output); + + StatusCode + convertAndWriteOutputTensorInFloat(Qnn_Tensor_t *output, + std::vector outputPaths, + std::string fileName); + + StatusCode writeOutputTensor(Qnn_Tensor_t *output, + std::vector outputPaths, + std::string fileName); + + StatusCode allocateAndCopyBuffer(uint8_t **buffer, Qnn_Tensor_t *tensor); + + StatusCode tearDownTensors(Qnn_Tensor_t *tensors, uint32_t tensorCount); + + StatusCode allocateBuffer(uint8_t **buffer, std::vector dims, + Qnn_DataType_t dataType); + + StatusCode copyFromFloatToNative(float *floatBuffer, Qnn_Tensor_t *tensor); + + StatusCode setupTensors(Qnn_Tensor_t **tensors, uint32_t tensorCount, + Qnn_Tensor_t *tensorsInfo); + // just set the tensor info, no buffer allocation + // used when enable qnn shared buffer for input and output + StatusCode setupTensorsNoCopy(Qnn_Tensor_t **tensors, uint32_t tensorCount, + Qnn_Tensor_t *tensorsInfo); + + StatusCode fillDims(std::vector &dims, uint32_t *inDimensions, + uint32_t rank); +}; +} // namespace iotensor +} // namespace tools +} // namespace qnn diff --git a/nntrainer/npu/qnn/Utils/QnnSampleAppUtils.cpp b/nntrainer/npu/qnn/Utils/QnnSampleAppUtils.cpp new file mode 100644 index 000000000..76c035e04 --- /dev/null +++ b/nntrainer/npu/qnn/Utils/QnnSampleAppUtils.cpp @@ -0,0 +1,358 @@ +//============================================================================== +// +// Copyright (c) 2019-2023 Qualcomm Technologies, Inc. +// All Rights Reserved. +// Confidential and Proprietary - Qualcomm Technologies, Inc. +// +//============================================================================== + +#include +#include +#include +#include +#include +#include +#include + +#include "Log/Logger.hpp" +#include "PAL/Directory.hpp" +#include "PAL/FileOp.hpp" +#include "PAL/Path.hpp" +#include "PAL/StringOp.hpp" +#include "QnnTypeMacros.hpp" +#include "Utils/QnnSampleAppUtils.hpp" + +using namespace qnn; +using namespace qnn::tools; +using namespace qnn_wrapper_api; + +void sample_app::split(std::vector &splitString, + const std::string &tokenizedString, + const char separator) { + splitString.clear(); + std::istringstream tokenizedStringStream(tokenizedString); + while (!tokenizedStringStream.eof()) { + std::string value; + getline(tokenizedStringStream, value, separator); + if (!value.empty()) { + splitString.push_back(value); + } + } +} + +void sample_app::parseInputFilePaths(std::vector &inputFilePaths, + std::vector &paths, + std::string separator) { + for (auto &inputInfo : inputFilePaths) { + auto position = inputInfo.find(separator); + if (position != std::string::npos) { + auto path = inputInfo.substr(position + separator.size()); + paths.push_back(path); + } else { + paths.push_back(inputInfo); + } + } +} + +sample_app::ReadInputListsRetType_t +sample_app::readInputLists(std::vector inputFileListPaths) { + std::vector>> filePathsLists; + for (auto const &path : inputFileListPaths) { + bool readSuccess; + std::vector> filePathList; + std::tie(filePathList, readSuccess) = readInputList(path); + if (!readSuccess) { + filePathsLists.clear(); + return std::make_tuple(filePathsLists, false); + } + filePathsLists.push_back(filePathList); + } + return std::make_tuple(filePathsLists, true); +} + +sample_app::ReadInputListRetType_t +sample_app::readInputList(const std::string inputFileListPath) { + std::queue lines; + std::ifstream fileListStream(inputFileListPath); + if (!fileListStream) { + QNN_ERROR("Failed to open input file: %s", inputFileListPath.c_str()); + std::vector> result; + return std::make_tuple(result, false); + } + std::string fileLine; + while (std::getline(fileListStream, fileLine)) { + if (fileLine.empty()) + continue; + lines.push(fileLine); + } + if (!lines.empty() && lines.front().compare(0, 1, "#") == 0) { + lines.pop(); + } + std::string separator = ":="; + std::vector> filePathsList; + while (!lines.empty()) { + std::vector paths{}; + std::vector inputFilePaths; + split(inputFilePaths, lines.front(), ' '); + parseInputFilePaths(inputFilePaths, paths, separator); + // TODO: multi input support + filePathsList.reserve(paths.size()); + for (size_t idx = 0; idx < paths.size(); idx++) { + if (idx >= filePathsList.size()) { + filePathsList.push_back(std::queue()); + } + filePathsList.back().push(paths[idx]); + } + lines.pop(); + } + return std::make_tuple(filePathsList, true); +} + +sample_app::ProfilingLevel +sample_app::parseProfilingLevel(std::string profilingLevelString) { + std::transform(profilingLevelString.begin(), profilingLevelString.end(), + profilingLevelString.begin(), ::tolower); + ProfilingLevel parsedProfilingLevel = ProfilingLevel::INVALID; + if (profilingLevelString == "off") { + parsedProfilingLevel = ProfilingLevel::OFF; + } else if (profilingLevelString == "basic") { + parsedProfilingLevel = ProfilingLevel::BASIC; + } else if (profilingLevelString == "detailed") { + parsedProfilingLevel = ProfilingLevel::DETAILED; + } + return parsedProfilingLevel; +} + +bool sample_app::deepCopyQnnTensorInfo(Qnn_Tensor_t *dst, + const Qnn_Tensor_t *src) { + if (nullptr == dst || nullptr == src) { + QNN_ERROR("Received nullptr"); + return false; + } + // set tensor.version before using QNN_TENSOR_SET macros, as they require the + // version to be set to correctly assign values + dst->version = src->version; + const char *tensorName = QNN_TENSOR_GET_NAME(src); + if (!tensorName) { + QNN_TENSOR_SET_NAME(dst, nullptr); + } else { + QNN_TENSOR_SET_NAME(dst, + pal::StringOp::strndup(tensorName, strlen(tensorName))); + } + QNN_TENSOR_SET_ID(dst, QNN_TENSOR_GET_ID(src)); + QNN_TENSOR_SET_TYPE(dst, QNN_TENSOR_GET_TYPE(src)); + QNN_TENSOR_SET_DATA_FORMAT(dst, QNN_TENSOR_GET_DATA_FORMAT(src)); + QNN_TENSOR_SET_DATA_TYPE(dst, QNN_TENSOR_GET_DATA_TYPE(src)); + Qnn_QuantizeParams_t qParams = QNN_QUANTIZE_PARAMS_INIT; + qParams.encodingDefinition = + QNN_TENSOR_GET_QUANT_PARAMS(src).encodingDefinition; + qParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED; + if (QNN_TENSOR_GET_QUANT_PARAMS(src).quantizationEncoding == + QNN_QUANTIZATION_ENCODING_SCALE_OFFSET) { + qParams.quantizationEncoding = + QNN_TENSOR_GET_QUANT_PARAMS(src).quantizationEncoding; + qParams.scaleOffsetEncoding = + QNN_TENSOR_GET_QUANT_PARAMS(src).scaleOffsetEncoding; + } else if (QNN_TENSOR_GET_QUANT_PARAMS(src).quantizationEncoding == + QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET) { + qParams.quantizationEncoding = + QNN_TENSOR_GET_QUANT_PARAMS(src).quantizationEncoding; + qParams.axisScaleOffsetEncoding.axis = + QNN_TENSOR_GET_QUANT_PARAMS(src).axisScaleOffsetEncoding.axis; + qParams.axisScaleOffsetEncoding.numScaleOffsets = + QNN_TENSOR_GET_QUANT_PARAMS(src).axisScaleOffsetEncoding.numScaleOffsets; + if (QNN_TENSOR_GET_QUANT_PARAMS(src) + .axisScaleOffsetEncoding.numScaleOffsets > 0) { + qParams.axisScaleOffsetEncoding.scaleOffset = + (Qnn_ScaleOffset_t *)malloc(QNN_TENSOR_GET_QUANT_PARAMS(src) + .axisScaleOffsetEncoding.numScaleOffsets * + sizeof(Qnn_ScaleOffset_t)); + if (qParams.axisScaleOffsetEncoding.scaleOffset) { + for (size_t idx = 0; idx < QNN_TENSOR_GET_QUANT_PARAMS(src) + .axisScaleOffsetEncoding.numScaleOffsets; + idx++) { + qParams.axisScaleOffsetEncoding.scaleOffset[idx].scale = + QNN_TENSOR_GET_QUANT_PARAMS(src) + .axisScaleOffsetEncoding.scaleOffset[idx] + .scale; + qParams.axisScaleOffsetEncoding.scaleOffset[idx].offset = + QNN_TENSOR_GET_QUANT_PARAMS(src) + .axisScaleOffsetEncoding.scaleOffset[idx] + .offset; + } + } + } + } + QNN_TENSOR_SET_QUANT_PARAMS(dst, qParams); + QNN_TENSOR_SET_RANK(dst, QNN_TENSOR_GET_RANK(src)); + QNN_TENSOR_SET_DIMENSIONS(dst, nullptr); + if (QNN_TENSOR_GET_RANK(src) > 0) { + QNN_TENSOR_SET_DIMENSIONS( + dst, (uint32_t *)malloc(QNN_TENSOR_GET_RANK(src) * sizeof(uint32_t))); + if (QNN_TENSOR_GET_DIMENSIONS(dst)) { + pal::StringOp::memscpy(QNN_TENSOR_GET_DIMENSIONS(dst), + QNN_TENSOR_GET_RANK(src) * sizeof(uint32_t), + QNN_TENSOR_GET_DIMENSIONS(src), + QNN_TENSOR_GET_RANK(src) * sizeof(uint32_t)); + } + } + return true; +} + +bool sample_app::copyTensorsInfo(const Qnn_Tensor_t *tensorsInfoSrc, + Qnn_Tensor_t *&tensorWrappers, + uint32_t tensorsCount) { + QNN_FUNCTION_ENTRY_LOG; + auto returnStatus = true; + tensorWrappers = (Qnn_Tensor_t *)calloc(tensorsCount, sizeof(Qnn_Tensor_t)); + if (nullptr == tensorWrappers) { + QNN_ERROR("Failed to allocate memory for tensorWrappers."); + return false; + } + if (returnStatus) { + for (size_t tIdx = 0; tIdx < tensorsCount; tIdx++) { + QNN_DEBUG("Extracting tensorInfo for tensor Idx: %d", tIdx); + tensorWrappers[tIdx] = QNN_TENSOR_INIT; + deepCopyQnnTensorInfo(&tensorWrappers[tIdx], &tensorsInfoSrc[tIdx]); + } + } + QNN_FUNCTION_EXIT_LOG; + return returnStatus; +} + +bool sample_app::copyGraphsInfoV1( + const QnnSystemContext_GraphInfoV1_t *graphInfoSrc, + qnn_wrapper_api::GraphInfo_t *graphInfoDst) { + graphInfoDst->graphName = nullptr; + if (graphInfoSrc->graphName) { + graphInfoDst->graphName = pal::StringOp::strndup( + graphInfoSrc->graphName, strlen(graphInfoSrc->graphName)); + } + graphInfoDst->inputTensors = nullptr; + graphInfoDst->numInputTensors = 0; + if (graphInfoSrc->graphInputs) { + if (!copyTensorsInfo(graphInfoSrc->graphInputs, graphInfoDst->inputTensors, + graphInfoSrc->numGraphInputs)) { + return false; + } + graphInfoDst->numInputTensors = graphInfoSrc->numGraphInputs; + } + graphInfoDst->outputTensors = nullptr; + graphInfoDst->numOutputTensors = 0; + if (graphInfoSrc->graphOutputs) { + if (!copyTensorsInfo(graphInfoSrc->graphOutputs, + graphInfoDst->outputTensors, + graphInfoSrc->numGraphOutputs)) { + return false; + } + graphInfoDst->numOutputTensors = graphInfoSrc->numGraphOutputs; + } + return true; +} + +bool sample_app::copyGraphsInfo(const QnnSystemContext_GraphInfo_t *graphsInput, + const uint32_t numGraphs, + qnn_wrapper_api::GraphInfo_t **&graphsInfo) { + QNN_FUNCTION_ENTRY_LOG; + if (!graphsInput) { + QNN_ERROR("Received nullptr for graphsInput."); + return false; + } + auto returnStatus = true; + graphsInfo = (qnn_wrapper_api::GraphInfo_t **)calloc( + numGraphs, sizeof(qnn_wrapper_api::GraphInfo_t *)); + qnn_wrapper_api::GraphInfo_t *graphInfoArr = + (qnn_wrapper_api::GraphInfo_t *)calloc( + numGraphs, sizeof(qnn_wrapper_api::GraphInfo_t)); + if (nullptr == graphsInfo || nullptr == graphInfoArr) { + QNN_ERROR("Failure to allocate memory for *graphInfo"); + returnStatus = false; + } + if (true == returnStatus) { + for (size_t gIdx = 0; gIdx < numGraphs; gIdx++) { + QNN_DEBUG("Extracting graphsInfo for graph Idx: %d", gIdx); + if (graphsInput[gIdx].version == + QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_1) { + copyGraphsInfoV1(&graphsInput[gIdx].graphInfoV1, &graphInfoArr[gIdx]); + } + graphsInfo[gIdx] = graphInfoArr + gIdx; + } + } + if (true != returnStatus) { + QNN_ERROR("Received an ERROR during extractGraphsInfo. Freeing resources."); + if (graphsInfo) { + for (uint32_t gIdx = 0; gIdx < numGraphs; gIdx++) { + if (graphsInfo[gIdx]) { + if (nullptr != graphsInfo[gIdx]->graphName) { + free(graphsInfo[gIdx]->graphName); + graphsInfo[gIdx]->graphName = nullptr; + } + qnn_wrapper_api::freeQnnTensors(graphsInfo[gIdx]->inputTensors, + graphsInfo[gIdx]->numInputTensors); + qnn_wrapper_api::freeQnnTensors(graphsInfo[gIdx]->outputTensors, + graphsInfo[gIdx]->numOutputTensors); + } + } + free(*graphsInfo); + } + free(graphsInfo); + graphsInfo = nullptr; + } + QNN_FUNCTION_EXIT_LOG; + return true; +} + +bool sample_app::copyMetadataToGraphsInfo( + const QnnSystemContext_BinaryInfo_t *binaryInfo, + qnn_wrapper_api::GraphInfo_t **&graphsInfo, uint32_t &graphsCount) { + if (nullptr == binaryInfo) { + QNN_ERROR("binaryInfo is nullptr."); + return false; + } + graphsCount = 0; + if (binaryInfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_1) { + if (binaryInfo->contextBinaryInfoV1.graphs) { + if (!copyGraphsInfo(binaryInfo->contextBinaryInfoV1.graphs, + binaryInfo->contextBinaryInfoV1.numGraphs, + graphsInfo)) { + QNN_ERROR("Failed while copying graphs Info."); + return false; + } + graphsCount = binaryInfo->contextBinaryInfoV1.numGraphs; + return true; + } + } else if (binaryInfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_2) { + if (binaryInfo->contextBinaryInfoV2.graphs) { + if (!copyGraphsInfo(binaryInfo->contextBinaryInfoV2.graphs, + binaryInfo->contextBinaryInfoV2.numGraphs, + graphsInfo)) { + QNN_ERROR("Failed while copying graphs Info."); + return false; + } + graphsCount = binaryInfo->contextBinaryInfoV2.numGraphs; + return true; + } + } + QNN_ERROR("Unrecognized system context binary info version."); + return false; +} + +QnnLog_Level_t sample_app::parseLogLevel(std::string logLevelString) { + QNN_FUNCTION_ENTRY_LOG; + std::transform(logLevelString.begin(), logLevelString.end(), + logLevelString.begin(), ::tolower); + QnnLog_Level_t parsedLogLevel = QNN_LOG_LEVEL_MAX; + if (logLevelString == "error") { + parsedLogLevel = QNN_LOG_LEVEL_ERROR; + } else if (logLevelString == "warn") { + parsedLogLevel = QNN_LOG_LEVEL_WARN; + } else if (logLevelString == "info") { + parsedLogLevel = QNN_LOG_LEVEL_INFO; + } else if (logLevelString == "verbose") { + parsedLogLevel = QNN_LOG_LEVEL_VERBOSE; + } else if (logLevelString == "debug") { + parsedLogLevel = QNN_LOG_LEVEL_DEBUG; + } + QNN_FUNCTION_EXIT_LOG; + return parsedLogLevel; +} diff --git a/nntrainer/npu/qnn/Utils/QnnSampleAppUtils.hpp b/nntrainer/npu/qnn/Utils/QnnSampleAppUtils.hpp new file mode 100644 index 000000000..d576be213 --- /dev/null +++ b/nntrainer/npu/qnn/Utils/QnnSampleAppUtils.hpp @@ -0,0 +1,70 @@ +//============================================================================== +// +// Copyright (c) 2019-2022 Qualcomm Technologies, Inc. +// All Rights Reserved. +// Confidential and Proprietary - Qualcomm Technologies, Inc. +// +//============================================================================== +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "QNN.hpp" + +namespace qnn { +namespace tools { +namespace sample_app { + +enum class ProfilingLevel { OFF, BASIC, DETAILED, INVALID }; + +using ReadInputListRetType_t = + std::tuple>, bool>; + +ReadInputListRetType_t readInputList(std::string inputFileListPath); + +using ReadInputListsRetType_t = + std::tuple>>, bool>; + +ReadInputListsRetType_t +readInputLists(std::vector inputFileListPath); + +ProfilingLevel parseProfilingLevel(std::string profilingLevelString); + +void parseInputFilePaths(std::vector &inputFilePaths, + std::vector &paths, + std::string separator); + +void split(std::vector &splitString, + const std::string &tokenizedString, const char separator); + +bool copyMetadataToGraphsInfo(const QnnSystemContext_BinaryInfo_t *binaryInfo, + qnn_wrapper_api::GraphInfo_t **&graphsInfo, + uint32_t &graphsCount); + +bool copyGraphsInfo(const QnnSystemContext_GraphInfo_t *graphsInput, + const uint32_t numGraphs, + qnn_wrapper_api::GraphInfo_t **&graphsInfo); + +bool copyGraphsInfoV1(const QnnSystemContext_GraphInfoV1_t *graphInfoSrc, + qnn_wrapper_api::GraphInfo_t *graphInfoDst); + +bool copyTensorsInfo(const Qnn_Tensor_t *tensorsInfoSrc, + Qnn_Tensor_t *&tensorWrappers, uint32_t tensorsCount); + +bool deepCopyQnnTensorInfo(Qnn_Tensor_t *dst, const Qnn_Tensor_t *src); + +QnnLog_Level_t parseLogLevel(std::string logLevelString); + +void inline exitWithMessage(std::string &&msg, int code) { + std::cerr << msg << std::endl; + std::exit(code); +} + +} // namespace sample_app +} // namespace tools +} // namespace qnn diff --git a/nntrainer/npu/qnn/WrapperUtils/QnnWrapperUtils.cpp b/nntrainer/npu/qnn/WrapperUtils/QnnWrapperUtils.cpp new file mode 100644 index 000000000..3c909a8b1 --- /dev/null +++ b/nntrainer/npu/qnn/WrapperUtils/QnnWrapperUtils.cpp @@ -0,0 +1,205 @@ +//============================================================================== +// +// Copyright (c) 2019-2022 Qualcomm Technologies, Inc. +// All Rights Reserved. +// Confidential and Proprietary - Qualcomm Technologies, Inc. +// +//============================================================================== + +#include +#include +#include + +#include "QnnModelPal.hpp" +#include "QnnTypeMacros.hpp" +#include "QnnWrapperUtils.hpp" + +namespace qnn_wrapper_api { +size_t memscpy(void *dst, size_t dstSize, const void *src, size_t copySize) { + if (!dst || !src || !dstSize || !copySize) + return 0; + + size_t minSize = dstSize < copySize ? dstSize : copySize; + + memcpy(dst, src, minSize); + + return minSize; +} + +ModelError_t getQnnGraphConfigFromInfo( + const char *graphName, const GraphConfigInfo_t **graphsConfigInfo, + const uint32_t numGraphsConfigInfo, const QnnGraph_Config_t **&graphConfigs) { + if (!graphsConfigInfo || numGraphsConfigInfo == 0) { + PRINT_DEBUG( + "getQnnGraphConfigFromInfo() no custom configs passed for graph:%s.\n", + graphName); + return MODEL_NO_ERROR; + } + + size_t found = 0; + + for (uint32_t i = 0; i < numGraphsConfigInfo; i++) { + if (!graphsConfigInfo[i]) { + PRINT_ERROR("getQnnGraphConfigFromInfo() lookup error while trying to " + "query graphName:%s. " + "numGraphsConfigInfo > num of element in graphsConfigInfo\n", + graphName); + return MODEL_INVALID_ARGUMENT_ERROR; + } + if (strcmp(graphsConfigInfo[i]->graphName, graphName) == 0) { + graphConfigs = graphsConfigInfo[i]->graphConfigs; + found++; + } + } + + if (!found) { + PRINT_ERROR( + "getQnnGraphConfigFromInfo() unable to find graphName:%s in provided " + "graphsConfigInfo object.\n", + graphName); + return MODEL_INVALID_ARGUMENT_ERROR; + } else if (found > 1) { + PRINT_ERROR("getQnnGraphConfigFromInfo() duplicate GraphConfigInfo entries " + "found with " + "graphName:%s.\n", + graphName); + return MODEL_INVALID_ARGUMENT_ERROR; + } else { + return MODEL_NO_ERROR; + } +} + +ModelError_t deepCopyQnnTensors(Qnn_Tensor_t &src, Qnn_Tensor_t &dst) { + ModelError_t err; + VALIDATE_TENSOR_VERSION(src, err); + + dst.version = src.version; + QNN_TENSOR_SET_NAME(dst, + strnDup(QNN_TENSOR_GET_NAME(src), + std::string(QNN_TENSOR_GET_NAME(src)).size())); + if (QNN_TENSOR_GET_NAME(dst) == nullptr) { + return MODEL_TENSOR_ERROR; + } + QNN_TENSOR_SET_ID(dst, QNN_TENSOR_GET_ID(src)); + QNN_TENSOR_SET_TYPE(dst, QNN_TENSOR_GET_TYPE(src)); + QNN_TENSOR_SET_DATA_FORMAT(dst, QNN_TENSOR_GET_DATA_FORMAT(src)); + QNN_TENSOR_SET_DATA_TYPE(dst, QNN_TENSOR_GET_DATA_TYPE(src)); + QNN_TENSOR_SET_MEM_TYPE(dst, QNN_TENSOR_GET_MEM_TYPE(src)); + + // Only metadata (i.e. non-static data) is copied from source to destination. + // The union still must be initialized so that the clientBuf/memHandle do not + // contain garbage data + if (QNN_TENSOR_GET_MEM_TYPE(src) == QNN_TENSORMEMTYPE_RAW) { + Qnn_ClientBuffer_t clientBuf = {nullptr, 0}; + QNN_TENSOR_SET_CLIENT_BUF(dst, clientBuf); + } else if (QNN_TENSOR_GET_MEM_TYPE(src) == QNN_TENSORMEMTYPE_MEMHANDLE) { + QNN_TENSOR_SET_MEM_HANDLE(dst, nullptr); + } else { + return MODEL_TENSOR_ERROR; + } + + Qnn_QuantizeParams_t srcQParam = QNN_TENSOR_GET_QUANT_PARAMS(src); + Qnn_QuantizationEncoding_t encoding = srcQParam.quantizationEncoding; + if (encoding == QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET) { + // need to allocate and copy memory for scaleOffset as it is a pointer array + Qnn_QuantizeParams_t srcQParamCpy = srcQParam; + Qnn_AxisScaleOffset_t &axisScaleOffset = + srcQParamCpy.axisScaleOffsetEncoding; + Qnn_ScaleOffset_t **scaleOffset = &axisScaleOffset.scaleOffset; + size_t scaleOffsetSize = + axisScaleOffset.numScaleOffsets * sizeof(Qnn_ScaleOffset_t); + *scaleOffset = (Qnn_ScaleOffset_t *)malloc(scaleOffsetSize); + memscpy(*scaleOffset, scaleOffsetSize, + srcQParam.axisScaleOffsetEncoding.scaleOffset, scaleOffsetSize); + QNN_TENSOR_SET_QUANT_PARAMS(dst, srcQParamCpy); + } else if (encoding == QNN_QUANTIZATION_ENCODING_BW_AXIS_SCALE_OFFSET) { + // need to allocate and copy memory for scaleOffset as it is a pointer array + Qnn_QuantizeParams_t srcQParamCpy = srcQParam; + Qnn_BwAxisScaleOffset_t &bwAxisScaleOffset = + srcQParamCpy.bwAxisScaleOffsetEncoding; + size_t scaleSize = bwAxisScaleOffset.numElements * sizeof(float); + float **scales = &bwAxisScaleOffset.scales; + int32_t **offsets = &bwAxisScaleOffset.offsets; + *scales = (float *)malloc(scaleSize); + memscpy(*scales, scaleSize, srcQParam.bwAxisScaleOffsetEncoding.scales, + scaleSize); + + // Only copy offsets if present, nullptr implies all offsets are 0 + if (bwAxisScaleOffset.offsets != nullptr) { + size_t offsetSize = bwAxisScaleOffset.numElements * sizeof(int32_t); + *offsets = (int32_t *)malloc(offsetSize); + memscpy(*offsets, offsetSize, srcQParam.bwAxisScaleOffsetEncoding.offsets, + offsetSize); + } + QNN_TENSOR_SET_QUANT_PARAMS(dst, srcQParamCpy); + } else { + QNN_TENSOR_SET_QUANT_PARAMS(dst, srcQParam); + } + + // need to allocate and copy memory for all the pointer members + uint32_t rank = QNN_TENSOR_GET_RANK(src); + QNN_TENSOR_SET_RANK(dst, rank); + size_t dimSize = rank * sizeof(uint32_t); + uint32_t *dimensions = (uint32_t *)malloc(dimSize); + if (dimensions == nullptr) { + PRINT_ERROR("deepCopyQnnTensors() Allocation error while copying tensor %s", + QNN_TENSOR_GET_NAME(src)); + return MODEL_TENSOR_ERROR; + } + memscpy(dimensions, dimSize, QNN_TENSOR_GET_DIMENSIONS(src), dimSize); + QNN_TENSOR_SET_DIMENSIONS(dst, dimensions); + + return err; +} + +ModelError_t freeQnnTensor(Qnn_Tensor_t &tensor) { + ModelError_t err; + VALIDATE_TENSOR_VERSION(tensor, err); + + // free all pointer allocations in struct + free((void *)QNN_TENSOR_GET_NAME(tensor)); + free(QNN_TENSOR_GET_DIMENSIONS(tensor)); + + return MODEL_NO_ERROR; +} + +ModelError_t freeQnnTensors(Qnn_Tensor_t *&tensors, uint32_t numTensors) { + // free all pointer allocations in struct + for (size_t i = 0; i < numTensors; i++) { + freeQnnTensor(tensors[i]); + } + free(tensors); + + return MODEL_NO_ERROR; +} + +std::string getModelErrorName(ModelError_t modelError) { + switch (modelError) { + case MODEL_NO_ERROR: + return "MODEL_NO_ERROR"; + case MODEL_TENSOR_ERROR: + return "MODEL_TENSOR_ERROR"; + case MODEL_PARAMS_ERROR: + return "MODEL_PARAMS_ERROR"; + case MODEL_NODES_ERROR: + return "MODEL_NODES_ERROR"; + case MODEL_GRAPH_ERROR: + return "MODEL_GRAPH_ERROR"; + case MODEL_CONTEXT_ERROR: + return "MODEL_CONTEXT_ERROR"; + case MODEL_GENERATION_ERROR: + return "MODEL_GENERATION_ERROR"; + case MODEL_SETUP_ERROR: + return "MODEL_SETUP_ERROR"; + case MODEL_UNKNOWN_ERROR: + return "MODEL_UNKNOWN_ERROR"; + case MODEL_INVALID_ARGUMENT_ERROR: + return "MODEL_INVALID_ARGUMENT_ERROR"; + case MODEL_FILE_ERROR: + return "MODEL_FILE_ERROR"; + default: + return "INVALID_ERROR_CODE"; + } +} + +} // namespace qnn_wrapper_api diff --git a/nntrainer/npu/qnn/WrapperUtils/QnnWrapperUtils.hpp b/nntrainer/npu/qnn/WrapperUtils/QnnWrapperUtils.hpp new file mode 100644 index 000000000..4327b23f5 --- /dev/null +++ b/nntrainer/npu/qnn/WrapperUtils/QnnWrapperUtils.hpp @@ -0,0 +1,203 @@ +//============================================================================== +// +// Copyright (c) 2019-2022 Qualcomm Technologies, Inc. +// All Rights Reserved. +// Confidential and Proprietary - Qualcomm Technologies, Inc. +// +//============================================================================== + +#pragma once + +#include "QNN/QnnContext.h" +#include "QNN/QnnGraph.h" +#include "QNN/QnnTensor.h" +#include "QNN/QnnTypes.h" +#include + +namespace qnn_wrapper_api { + +// macro utils + +// Enables FILE[LINE]: FMT for VALIDATE macro +#ifdef QNN_ENABLE_DEBUG + +#define PRINTF(fmt, ...) \ + do { \ + printf("%s[%d]: ", __FILE__, __LINE__); \ + printf((fmt), ##__VA_ARGS__); \ + } while (0) + +#else + +#define PRINTF(fmt, ...) \ + do { \ + printf((fmt), ##__VA_ARGS__); \ + } while (0) + +#endif + +#ifdef QNN_ENABLE_DEBUG +#define PRINT_DEBUG(fmt, ...) \ + do { \ + printf("[ DEBUG ] "); \ + PRINTF((fmt), ##__VA_ARGS__); \ + } while (0) +#else +#define PRINT_DEBUG(fmt, ...) +#endif + +// Enables ERROR tag for errors +#define PRINT_ERROR(fmt, ...) \ + do { \ + printf("[ ERROR ] "); \ + PRINTF((fmt), ##__VA_ARGS__); \ + } while (0) + +// Enables WARNING tag for errors +#define PRINT_WARNING(fmt, ...) \ + do { \ + printf("[ WARNING ] "); \ + PRINTF((fmt), ##__VA_ARGS__); \ + } while (0) + +// Enables INFO tag for errors +#define PRINT_INFO(fmt, ...) \ + do { \ + printf("[ INFO ] "); \ + PRINTF((fmt), ##__VA_ARGS__); \ + } while (0) + +#define STRINGFY(str) str +#define STRINGFYVALUE(str) STRINGFY(str) + +// Ensures ModelError_t returning functions return MODEL_NO_ERROR +// retStatus should be set to MODEL_NO_ERROR before passing to macro +#define VALIDATE(value, retStatus) \ + do { \ + retStatus = value; \ + if (retStatus != qnn_wrapper_api::MODEL_NO_ERROR) { \ + PRINT_ERROR("%s expected MODEL_NO_ERROR, got %s\n", #value, \ + getModelErrorName(retStatus).c_str()); \ + return retStatus; \ + } \ + } while (0) + +// macros for retrieving binary data +#define BINVARSTART(NAME) \ + ({ \ + extern const uint8_t _binary_obj_binary_##NAME##_raw_start[]; \ + (void *)_binary_obj_binary_##NAME##_raw_start; \ + }) +#define BINVAREND(NAME) \ + ({ \ + extern const uint8_t _binary_obj_binary_##NAME##_raw_end[]; \ + (void *)_binary_obj_binary_##NAME##_raw_end; \ + }) +#define BINLEN(NAME) \ + ({ \ + extern const uint8_t _binary_obj_binary_##NAME##_raw_start[]; \ + extern const uint8_t _binary_obj_binary_##NAME##_raw_end[]; \ + (uint32_t)((_binary_obj_binary_##NAME##_raw_end) - \ + (_binary_obj_binary_##NAME##_raw_start)); \ + }) + +typedef enum ModelError { + MODEL_NO_ERROR = 0, + MODEL_TENSOR_ERROR = 1, + MODEL_PARAMS_ERROR = 2, + MODEL_NODES_ERROR = 3, + MODEL_GRAPH_ERROR = 4, + MODEL_CONTEXT_ERROR = 5, + MODEL_GENERATION_ERROR = 6, + MODEL_SETUP_ERROR = 7, + MODEL_INVALID_ARGUMENT_ERROR = 8, + MODEL_FILE_ERROR = 9, + MODEL_MEMORY_ALLOCATE_ERROR = 10, + // Value selected to ensure 32 bits. + MODEL_UNKNOWN_ERROR = 0x7FFFFFFF +} ModelError_t; + +/** + * @brief Returns the error message associated with a given error code + * + * @param[in] modelError ModelError_t error code + * + * @return string message + */ +std::string getModelErrorName(ModelError_t modelError); + +typedef struct GraphInfo { + Qnn_GraphHandle_t graph; + char *graphName; + Qnn_Tensor_t *inputTensors; + uint32_t numInputTensors; + Qnn_Tensor_t *outputTensors; + uint32_t numOutputTensors; +} GraphInfo_t; +typedef GraphInfo_t *GraphInfoPtr_t; + +typedef struct GraphConfigInfo { + char *graphName; + const QnnGraph_Config_t **graphConfigs; +} GraphConfigInfo_t; + +/** + * @brief Helper function to get Qnn GraphConfig structure from provided + * GraphConfigInfo using graphName. + * + * @param[in] graphName the Qnn graphName to use for lookup + * + * @param[in] graphsConfigInfo array of GraphConfig_t objects + * + * @param[in] numGraphsConfigInfo the number of array elements in + * graphConfigInfo + * + * @param[out] graphConfigs the result of query of graphName from + * graphsConfigInfo if successful. + * + * @return Error code + * + */ +ModelError_t getQnnGraphConfigFromInfo( + const char *graphName, const GraphConfigInfo_t **graphsConfigInfo, + const uint32_t numGraphsConfigInfo, const QnnGraph_Config_t **&graphConfigs); + +/** + * @brief Deep Copies QnnTensor_t structs to a pointer array destination + * location. Note: The copy will be stored on the heap and as such requires + * caller to make appropriate free call(s) using function below. Note 2: + * deepCopy is only done for metadata + * + * @param[in] source tensor object to copy from + * + * @param[in] destination tensor object to copy to + * + * @return Error code + */ +ModelError_t deepCopyQnnTensors(Qnn_Tensor_t &source, + Qnn_Tensor_t &destination); + +/** + * @brief Frees all memory allocated tensor attributes. + * + * @param[in] tensor Qnn_Tensor_t object to free + * + * @return Error code + */ +ModelError_t freeQnnTensor(Qnn_Tensor_t &tensor); + +/** + * @brief Loops through and frees all memory allocated tensor attributes for + * each tensor object. + * + * @param[in] tensors array of tensor objects to free + * + * @param[in] numTensors length of the above tensors array + * + * @return Error code + */ +ModelError_t freeQnnTensors(Qnn_Tensor_t *&tensors, uint32_t numTensors); + +size_t memscpy(void *dst, size_t dstSize, const void *src, size_t copySize); + +} // namespace qnn_wrapper_api diff --git a/nntrainer/npu/qnn/meson.build b/nntrainer/npu/qnn/meson.build new file mode 100644 index 000000000..3f259ddd6 --- /dev/null +++ b/nntrainer/npu/qnn/meson.build @@ -0,0 +1 @@ +subdir('LLaMAPackage') diff --git a/nntrainer/npu/qnn/op/QNNLinear.cpp b/nntrainer/npu/qnn/op/QNNLinear.cpp new file mode 100644 index 000000000..e31612449 --- /dev/null +++ b/nntrainer/npu/qnn/op/QNNLinear.cpp @@ -0,0 +1,359 @@ +#include "QNNLinear.hpp" +#include "QnnTypes.h" +#include +#include + +namespace nntrainer { +QNNLinear::QNNLinear() { + // weight_.setBackend(bn); + // bias_.setBackend(bn); + + // weightScale_.setBackend(bn); + // biasScale_.setBackend(bn); + // outputScale_.setBackend(bn); + // inputScale_.setBackend(bn); +} + +// QNNLinear::QNNLinear(Backend *bn, string opName, int in_features, int +// out_features, bool bias) : +// QNNCommonOp(bn, opName), in_features_(in_features), +// out_features_(out_features), support_bias_(bias) { +// weight_.setBackend(bn); +// bias_.setBackend(bn); + +// weightScale_.setBackend(bn); +// biasScale_.setBackend(bn); +// outputScale_.setBackend(bn); +// inputScale_.setBackend(bn); +// } + +// ErrorCode QNNLinear::reshape(vector> inputs, +// vector> outputs) { +// assert(inputs.size() == 1); +// assert(outputs.size() == 1); +// // N | C | H | W +// // ----------------------------------------------- +// // 1 |out_channel | in_channel | 1 +// // |out_features| in_features | +// // ----------------------------------------------- +// // batch |in_channel | seq_len | 1 +// // |in_features | inputs[0]->sequence() | +// // ----------------------------------------------- +// // batch |out_channel | seq_len | 1 +// // |out_features| inputs[0]->sequence() | +// assert(inputs[0]->head() == 1); +// assert(in_features_ == inputs[0]->dimension()); +// outputs[0]->reshape(inputs[0]->batch(), inputs[0]->head(), +// inputs[0]->sequence(), out_features_); return Op::reshape(inputs, +// outputs); +// } + +// ErrorCode QNNLinear::setUp(vector> inputs, +// vector> outputs) { +// outputs[0]->setDtype(MLLM_TYPE_I8); +// // add matmul param to qnn +// vector paramsMatmul = { +// {.paramType = QNN_PARAMTYPE_SCALAR, +// .name = "transpose_in0", +// .scalarParam = (Qnn_Scalar_t){QNN_DATATYPE_BOOL_8, {.bool8Value = +// 0}}}, +// {.paramType = QNN_PARAMTYPE_SCALAR, +// .name = "transpose_in1", +// .scalarParam = (Qnn_Scalar_t){QNN_DATATYPE_BOOL_8, {.bool8Value = +// 1}}}}; + +// uint32_t +// dimensions_InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D_dilation[] = {2}; +// uint32_t InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D_dilation[] = {1, +// 1}; uint32_t +// dimensions_InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D_pad_amount[] = +// {2, 2}; uint32_t +// InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D_pad_amount[] = {0, 0, 0, 0}; +// uint32_t dimensions_InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D_stride[] +// = {2}; uint32_t InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D_stride[] = +// {1, 1}; + +// vector params_InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D = +// { +// {.paramType = QNN_PARAMTYPE_TENSOR, +// .name = "stride", +// .tensorParam = +// (Qnn_Tensor_t){ +// .version = QNN_TENSOR_VERSION_1, +// .v1 = {.id = 0, +// .name = +// "InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D_stride", +// .type = QNN_TENSOR_TYPE_STATIC, +// .dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER, +// .dataType = QNN_DATATYPE_UINT_32, +// .quantizeParams = {QNN_DEFINITION_UNDEFINED, +// QNN_QUANTIZATION_ENCODING_UNDEFINED, +// {.scaleOffsetEncoding = {.scale = +// 0.0000000000000000f, +// .offset = +// 0}}}, +// .rank = 1, +// .dimensions = +// dimensions_InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D_stride, +// .memType = QNN_TENSORMEMTYPE_RAW, +// .clientBuf = +// {.data = (uint8_t +// *)InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D_stride, +// .dataSize = 8}}}}, +// {.paramType = QNN_PARAMTYPE_TENSOR, +// .name = "pad_amount", +// .tensorParam = +// (Qnn_Tensor_t){ +// .version = QNN_TENSOR_VERSION_1, +// .v1 = {.id = 0, +// .name = +// "InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D_pad_amount", +// .type = QNN_TENSOR_TYPE_STATIC, +// .dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER, +// .dataType = QNN_DATATYPE_UINT_32, +// .quantizeParams = {QNN_DEFINITION_UNDEFINED, +// QNN_QUANTIZATION_ENCODING_UNDEFINED, +// {.scaleOffsetEncoding = {.scale = +// 0.0000000000000000f, +// .offset = +// 0}}}, +// .rank = 2, +// .dimensions = +// dimensions_InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D_pad_amount, +// .memType = QNN_TENSORMEMTYPE_RAW, +// .clientBuf = +// {.data = (uint8_t *) +// InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D_pad_amount, +// .dataSize = 16}}}}, + +// }; + +// // add weight tensor to qnn +// uint32_t dimensionsWeight[4] = {1, 1, +// static_cast(weight_.sequence()), +// static_cast(weight_.dimension())}; + +// auto qnnQuantDefined = QNN_DEFINITION_UNDEFINED; +// float weightScale = 0; + +// qnnQuantDefined = QNN_DEFINITION_DEFINED; +// weightScale = weightScale_.hostPtr()[0]; + +// qnnBackend_->modelAddTensor(weight_.name(), (Qnn_Tensor_t){ +// .version = +// QNN_TENSOR_VERSION_1, .v1 +// = { +// .id = 0, +// .name = +// weight_.name().c_str(), +// .type = +// QNN_TENSOR_TYPE_STATIC, +// .dataFormat = +// QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER, +// .dataType = +// QNN_DATATYPE_SFIXED_POINT_8, +// .quantizeParams = +// {qnnQuantDefined, +// QNN_QUANTIZATION_ENCODING_SCALE_OFFSET, +// {.scaleOffsetEncoding +// = +// {.scale +// = +// weightScale, +// .offset +// = +// 0}}}, +// .rank = 4, +// .dimensions = +// dimensionsWeight, +// .memType = +// QNN_TENSORMEMTYPE_RAW, +// .clientBuf = {.data = +// weight_.hostPtr(), +// .dataSize +// = +// (uint32_t)weight_.cntSize()}}}); +// // free weight host memory +// weight_.free(); + +// // dimensions of matmul output and bias +// uint32_t dimensionsOutput[4] = +// {static_cast(outputs[0]->batch()), +// static_cast(outputs[0]->sequence()), +// static_cast(outputs[0]->head()), +// static_cast(outputs[0]->dimension())}; + +// auto outName = outputs[0]->name(); + +// // if don't support bias, just dequantize and write to tensor with name +// of outputs[0] if (!support_bias_) { +// float outputScale = 0; +// outputScale = outputScale_.hostPtr()[0] / 127.0; +// outputScale = roundf(outputScale * 100000) / 100000; + +// vector matmulOut = {{QNN_TENSOR_VERSION_1, +// {.v1 = { +// .id = 0, +// .name = outName.c_str(), +// .type = +// getOutputTensorType(outputs[0]), +// .dataFormat = +// QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER, +// .dataType = +// QNN_DATATYPE_SFIXED_POINT_8, +// .quantizeParams = +// {QNN_DEFINITION_DEFINED, +// QNN_QUANTIZATION_ENCODING_SCALE_OFFSET, +// {.scaleOffsetEncoding +// = {.scale +// = +// outputScale, +// .offset = +// 0}}}, +// .rank = 4, +// .dimensions = +// dimensionsOutput, .memType = +// QNN_TENSORMEMTYPE_RAW, +// .clientBuf = {.data = +// nullptr, +// .dataSize = +// 0}}}}}; +// return graphAddNode(name() + ".linearint8", "Conv2d", +// {inputs[0]->name(), weight_.name()}, matmulOut, +// params_InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D); +// } + +// // add bias tensor to qnn +// uint32_t dimensionsBias[1] = {(uint32_t)out_features_}; +// float biasScale = 0; + +// qnnQuantDefined = QNN_DEFINITION_DEFINED; +// biasScale = biasScale_.hostPtr()[0]; + +// qnnBackend_->modelAddTensor(bias_.name(), (Qnn_Tensor_t){ +// .version = +// QNN_TENSOR_VERSION_1, .v1 = +// { +// .id = 0, +// .name = +// bias_.name().c_str(), +// .type = +// QNN_TENSOR_TYPE_STATIC, +// .dataFormat = +// QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER, +// .dataType = +// QNN_DATATYPE_UFIXED_POINT_8, +// .quantizeParams = +// {qnnQuantDefined, +// QNN_QUANTIZATION_ENCODING_SCALE_OFFSET, +// {.scaleOffsetEncoding +// = +// {.scale +// = +// biasScale, +// .offset +// = +// -128}}}, +// .rank = 1, +// .dimensions = +// dimensionsBias, +// .memType = +// QNN_TENSORMEMTYPE_RAW, +// .clientBuf = {.data = +// bias_.hostPtr(), +// .dataSize +// = +// (uint32_t)bias_.cntSize()}}}); +// // free bias host memory +// bias_.free(); + +// float outputScale = 0; +// outputScale = outputScale_.hostPtr()[0] / 127.0; +// outputScale = roundf(outputScale * 100000) / 100000; + +// // final output +// vector biasOutput = {{QNN_TENSOR_VERSION_1, +// {.v1 = { +// .id = 0, +// .name = outName.c_str(), +// .type = +// getOutputTensorType(outputs[0]), +// .dataFormat = +// QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER, +// .dataType = +// QNN_DATATYPE_SFIXED_POINT_8, +// .quantizeParams = +// {QNN_DEFINITION_DEFINED, +// QNN_QUANTIZATION_ENCODING_SCALE_OFFSET, +// {.scaleOffsetEncoding +// = {.scale = +// outputScale, +// .offset = +// 0}}}, +// .rank = 4, +// .dimensions = dimensionsOutput, +// .memType = +// QNN_TENSORMEMTYPE_RAW, +// .clientBuf = {.data = nullptr, +// .dataSize = +// 0}}}}}; +// return graphAddNode(name() + ".linearint8", "Conv2d", {inputs[0]->name(), +// weight_.name(), bias_.name()}, biasOutput, +// params_InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D); +// } + +// ErrorCode QNNLinear::load(AbstructLoader &loader) { +// weight_.setName(name() + ".weight"); +// weight_.reshape(1, 1, in_features_, out_features_); +// weight_.setDtype(MLLM_TYPE_I8); +// weight_.alloc(); +// loader.load(&weight_); + +// bias_.setName(name() + ".bias"); +// bias_.reshape(1, 1, 1, out_features_); +// bias_.setDtype(MLLM_TYPE_I8); +// bias_.alloc(); +// if (support_bias_) { +// loader.load(&bias_); +// // sign to unsign +// for (int i = 0; i < out_features_; i++) { +// int32_t val = bias_.dataAt(0, 0, 0, i); +// val += 128; +// bias_.setDataAt(0, 0, 0, i, (uint8_t)val); +// } +// } else { +// memset(bias_.hostPtr(), 0, bias_.cntSize()); +// } + +// weightScale_.setName(name() + ".weight.scale"); +// weightScale_.reshape(1, 1, 1, 1); +// weightScale_.setDtype(MLLM_TYPE_F32); +// weightScale_.alloc(); +// loader.load(&weightScale_); + +// biasScale_.setName(name() + ".bias.scale"); +// biasScale_.reshape(1, 1, 1, 1); +// biasScale_.setDtype(MLLM_TYPE_F32); +// biasScale_.alloc(); +// loader.load(&biasScale_); + +// outputScale_.setName(name() + ".output_scale"); +// outputScale_.reshape(1, 1, 1, 1); +// outputScale_.setDtype(MLLM_TYPE_F32); +// outputScale_.alloc(); +// loader.load(&outputScale_); + +// inputScale_.setName(name() + ".input_scale"); +// inputScale_.reshape(1, 1, 1, 1); +// inputScale_.setDtype(MLLM_TYPE_F32); +// inputScale_.alloc(); +// loader.load(&inputScale_); + +// return Op::load(loader); +// } + +// ErrorCode QNNLinear::free(vector> inputs, +// vector> outputs) { +// return Op::free(inputs, outputs); +// } +} // namespace nntrainer diff --git a/nntrainer/npu/qnn/op/QNNLinear.hpp b/nntrainer/npu/qnn/op/QNNLinear.hpp new file mode 100644 index 000000000..1542849f2 --- /dev/null +++ b/nntrainer/npu/qnn/op/QNNLinear.hpp @@ -0,0 +1,44 @@ + +#ifndef NNTR_QNNLINEAR_H +#define NNTR_QNNLINEAR_H + +namespace nntrainer { +class QNNLinear { +public: + QNNLinear(); + virtual ~QNNLinear() = default; + // virtual ErrorCode reshape(vector> inputs, + // vector> outputs) override; virtual ErrorCode + // setUp(vector> inputs, vector> + // outputs) override; virtual ErrorCode load(AbstructLoader &loader) override; + // virtual ErrorCode free(vector> inputs, + // vector> outputs) override; + +private: + int in_features_; + int out_features_; + bool support_bias_; + // Tensor weight_; + // Tensor bias_; + // #ifdef SMOOTHQUANT + // Tensor weightScale_; + // Tensor biasScale_; + // #endif + // Tensor outputScale_; + // Tensor inputScale_; +}; + +// class QNNLinearINT8Creator : public QNNBackend::Creator { +// public: +// virtual Op *create(OpParam op_param, Backend *bn, string name) const { +// int in_features = op_param["in_features"]; +// int out_features = op_param["out_features"]; +// int bias = op_param["bias"]; +// return new QNNLinearINT8(bn, name, in_features, out_features, +// (bool)bias); +// } +// }; + +} // namespace nntrainer + +#endif diff --git a/nntrainer/npu/qnn/tools/prepare_ops.sh b/nntrainer/npu/qnn/tools/prepare_ops.sh new file mode 100755 index 000000000..fa30fcebd --- /dev/null +++ b/nntrainer/npu/qnn/tools/prepare_ops.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +if [ "x${HEXAGON_SDK_ROOT}" = "x" ]; then + echo "HEXAGON_SDK_ROOT is not set, we will set evn using /local/mnt/workspace/Qualcomm/Hexagon_SDK/5.5.2.0/setup_sdk_env.source" + ln -s /local/mnt/workspace/Qualcomm/Hexagon_SDK/5.5.2.0/ HexagonSDK + source HexagonSDK/setup_sdk_env.source +fi + +echo "QNN_SDK_ROOT is not set, we will set /opt/qcom/aistack/qairt/2.28.2.241116/" +ln -s /opt/qcom/aistack/qairt/2.28.2.241116/ qairt +export QNN_SDK_ROOT=/opt/qcom/aistack/qairt/2.28.2.241116/ +source ${QNN_SDK_ROOT}/bin/envsetup.sh + +echo "QNN_SDK_ROOT=./qairt" +echo "HEXAGON_SDK_ROOT=./HexagonSDK" + +echo "ANDROID_ROOT_DIR=${ANDROID_ROOT_DIR}" +echo "ANDROID_NDK_ROOT=${ANDROID_NDK_ROOT}" +echo "QNX_BIN_DIR=${QNX_BIN_DIR}" +echo "LV_TOOLS_DIR=${LV_TOOLS_DIR}" +echo "LRH_TOOLS_DIR=${LRH_TOOLS_DIR}" + +echo "DEFAULT_HEXAGON_TOOLS_ROOT=${DEFAULT_HEXAGON_TOOLS_ROOT}" +echo "DEFAULT_DSP_ARCH=${DEFAULT_DSP_ARCH}" +ehco "DEFAULT_BUILD=${DEFAULT_BUILD}" +echo "DEFAULT_HLOS_ARCH=${DEFAULT_HLOS_ARCH}" +echo "DEFAULT_TOOLS_VARIANT=${DEFAULT_TOOLS_VARIANT}" +echo "DEFAULT_NO_OURT_INC=${DEFAULT_NO_QURT_INC}" +echo "DEFAULT_TREE=${DEFAULT_TREE}" +echo "CMAKE_ROOT_PATH=${CMAKE_ROOT_PATH}" +echo "DEBUGGER_UTILS=${DEBUGGER_UTILS}" +echo "HEXAGONSDK_TELEMATICS_ROOT=$HEXAGONSDK_TELEMATICS_ROOT}" + +echo "AISW_SDK_ROOT=${AISW_SDK_ROOT}" +echo "PYTHONPATH=${PYTHONPATH}" +echo "PATH=${PATH}" +echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" +echo "HEXAGON_TOOLS_DIR=${HEXAGON_TOOLS_DIR}" +echo "SNPE_ROOT=${SNPE_ROOT}" + +cd LLaMAPackage + +make htp_v75 && make htp_aarch64 + diff --git a/nntrainer/qnn_context.cpp b/nntrainer/qnn_context.cpp new file mode 100644 index 000000000..b6c200792 --- /dev/null +++ b/nntrainer/qnn_context.cpp @@ -0,0 +1,264 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * Copyright (C) 2024 Debadri Samaddar + * + * @file cl_context.h + * @date 23 Feb 2024 + * @see https://github.com/nnstreamer/nntrainer + * @author Debadri Samaddar + * @author Niket Agarwal + * @author Thummala Pallavi + * @bug No known bugs except for NYI items + * @brief This file contains app context related functions and classes that + * manages the global configuration of the current OpenCL environment. It also + * creates the OpenCL command queue and context. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nntrainer { + +std::mutex cl_factory_mutex; + +std::once_flag global_cl_context_init_flag; + +static void add_default_object(ClContext &cc) { + + if (FullyConnectedLayerCl::registerClKernels()) { + cc.registerFactory(nntrainer::createLayer, + FullyConnectedLayerCl::type, + ml::train::LayerType::LAYER_FC); + } + + cc.registerFactory(nntrainer::createLayer, + AdditionLayerCL::type, + ml::train::LayerType::LAYER_ADDITION); + + // @todo swiglulayercl also needs to be updated. + cc.registerFactory(nntrainer::createLayer, SwiGLULayerCl::type, + ml::train::LayerType::LAYER_SWIGLU); + + if (ReshapeLayerCl::registerClKernels()) { + cc.registerFactory(nntrainer::createLayer, + ReshapeLayerCl::type, + ml::train::LayerType::LAYER_RESHAPE); + } + + // @todo rmsnormlayercl also needs to be updated. + cc.registerFactory(nntrainer::createLayer, + RMSNormLayerCl::type, ml::train::LayerType::LAYER_RMSNORM); + + if (ConcatLayerCl::registerClKernels()) { + cc.registerFactory(nntrainer::createLayer, + ConcatLayerCl::type, ml::train::LayerType::LAYER_CONCAT); + } + + // @todo transposlayercl also needs to be updated. + cc.registerFactory(nntrainer::createLayer, + TransposeLayerCl::type, + ml::train::LayerType::LAYER_TRANSPOSE); +} + +static void registerer(ClContext &cc) noexcept { + try { + cc.initBlasClKernels(); + add_default_object(cc); + } catch (std::exception &e) { + ml_loge("cl_context: registering layers failed!!, reason: %s", e.what()); + } catch (...) { + ml_loge("cl_context: registering layer failed due to unknown reason"); + } +}; + +ClContext &ClContext::Global() { + static ClContext instance; + + // initializing commandqueue and context + bool result = instance.clInit(); + + if (!result) { + ml_loge("cl_context: opencl command queue creation failed"); + } + + /// in g++ there is a bug that hangs up if caller throws, + /// so registerer is noexcept although it'd better not + /// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70298 + std::call_once(global_cl_context_init_flag, registerer, std::ref(instance)); + return instance; +} + +template +const int ClContext::registerFactory(const FactoryType factory, + const std::string &key, + const int int_key) { + static_assert(isSupported::value, + "cl_context: given type is not supported for current context"); + + auto &index = std::get>(factory_map); + auto &str_map = std::get>(index); + auto &int_map = std::get(index); + + std::string assigned_key = key == "" ? factory({})->getType() : key; + + std::transform(assigned_key.begin(), assigned_key.end(), assigned_key.begin(), + [](unsigned char c) { return std::tolower(c); }); + + const std::lock_guard lock(cl_factory_mutex); + if (str_map.find(assigned_key) != str_map.end()) { + std::stringstream ss; + ss << "cl_context: cannot register factory with already taken key: " << key; + throw std::invalid_argument(ss.str().c_str()); + } + + if (int_key != -1 && int_map.find(int_key) != int_map.end()) { + std::stringstream ss; + ss << "cl_context: cannot register factory with already taken int key: " + << int_key; + throw std::invalid_argument(ss.str().c_str()); + } + + int assigned_int_key = int_key == -1 ? str_map.size() + 1 : int_key; + + str_map[assigned_key] = factory; + int_map[assigned_int_key] = assigned_key; + + ml_logd("cl_context: factory has registered with key: %s, int_key: %d", + assigned_key.c_str(), assigned_int_key); + + return assigned_int_key; +} + +void ClContext::initBlasClKernels() { + if (blas_kernels_initialized) { + ml_logi( + "ClContext: Default blas kernels already registered and initialized"); + return; + } + + registerClKernel(sgemv_cl_kernel_, "sgemv_cl"); + registerClKernel(sgemv_cl_noTrans_kernel_, "sgemv_cl_noTrans"); + registerClKernel(dot_cl_kernel_, "dot_cl"); + registerClKernel(sgemm_cl_noTrans_kernel_, "sgemm_cl_noTrans"); + registerClKernel(sgemm_cl_transA_kernel_, "sgemm_cl_transA"); + registerClKernel(sgemm_cl_transB_kernel_, "sgemm_cl_transB"); + registerClKernel(sgemm_cl_transAB_kernel_, "sgemm_cl_transAB"); + registerClKernel(addition_cl_kernel_, "addition_cl"); + registerClKernel(sscal_cl_kernel_, "sscal_cl"); + +#ifdef ENABLE_FP16 + registerClKernel(sgemv_cl_kernel_fp16_, "sgemv_cl_fp16"); + registerClKernel(sgemv_cl_noTrans_kernel_fp16_, "sgemv_cl_noTrans_fp16"); + registerClKernel(dot_cl_kernel_fp16_, "dot_cl_fp16"); + registerClKernel(sgemm_cl_noTrans_kernel_fp16_, "sgemm_cl_noTrans_fp16"); + registerClKernel(sgemm_cl_transA_kernel_fp16_, "sgemm_cl_transA_fp16"); + registerClKernel(sgemm_cl_transB_kernel_fp16_, "sgemm_cl_transB_fp16"); + registerClKernel(sgemm_cl_transAB_kernel_fp16_, "sgemm_cl_transAB_fp16"); + registerClKernel(addition_cl_kernel_fp16_, "addition_cl_fp16"); + registerClKernel(sscal_cl_kernel_fp16_, "sscal_cl_fp16"); +#endif + blas_kernels_initialized = true; +} + +void ClContext::initAttentionClKernels() { + if (attention_kernels_initialized) { + ml_logi("ClContext: Default attention kernels already registered and " + "initialized"); + return; + } + + registerClKernel(rotary_emb_cl_kernel_, "rotary_emb_cl"); + +#ifdef ENABLE_FP16 + registerClKernel(rotary_emb_cl_kernel_fp16_, "rotary_emb_cl_fp16"); +#endif + attention_kernels_initialized = true; +} + +const ClContext::SharedPtrClKernel +ClContext::registerClKernel(std::string kernel_string, + std::string kernel_name) { + // check if created before + if (ocl_kernel_map.find(kernel_name) != ocl_kernel_map.end()) { + ml_logi("Kernel already registered and initialized: %s", + kernel_name.c_str()); + return ocl_kernel_map[kernel_name]; + } + + // creating shared_ptr for kernel object + SharedPtrClKernel kernelPtr = std::make_shared(); + if (!clCreateKernel(kernel_string, kernel_name, kernelPtr)) { + ml_loge("Failed to register kernel %s", kernel_name.c_str()); + return nullptr; + } + // add to map + ocl_kernel_map.emplace(kernel_name, kernelPtr); + return ocl_kernel_map[kernel_name]; +} + +bool ClContext::clCreateKernel(std::string &kernel_string, + std::string &kernel_name, + const SharedPtrClKernel &kernel_ptr_) { + + ml_logi("Kernel initializing: %s", kernel_name.c_str()); + + bool result = false; + + do { + opencl::Program program; + + // reading binary + std::ifstream fs(opencl::Program::DEFAULT_KERNEL_PATH + "/" + kernel_name + + "_kernel.bin", + std::ios::binary | std::ios::in); + + if (fs.good()) { + fs.seekg(0, std::ios::end); + size_t binary_size = fs.tellg(); + fs.seekg(0, std::ios::beg); + + unsigned char chunk[binary_size]; + fs.read((char *)chunk, binary_size); + + result = program.CreateCLProgramWithBinary( + context_inst_.GetContext(), context_inst_.GetDeviceId(), binary_size, + chunk, + opencl::Program::DEFAULT_KERNEL_PATH + "/" + kernel_name + + "_kernel.bin", + ""); + } else { + result = + program.CreateCLProgram(context_inst_.GetContext(), + context_inst_.GetDeviceId(), kernel_string, ""); + } + + if (!result) { + break; + } + + result = kernel_ptr_->CreateKernelFromProgram(program, kernel_name); + if (!result) { + break; + } + + } while (false); + + return result; +} + +/** + * @copydoc const int ClContext::registerFactory + */ +template const int ClContext::registerFactory( + const FactoryType factory, const std::string &key, + const int int_key); + +} // namespace nntrainer diff --git a/nntrainer/qnn_context.h b/nntrainer/qnn_context.h new file mode 100644 index 000000000..025365546 --- /dev/null +++ b/nntrainer/qnn_context.h @@ -0,0 +1,299 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * Copyright (C) 2024 Debadri Samaddar + * + * @file cl_context.h + * @date 23 Feb 2024 + * @see https://github.com/nnstreamer/nntrainer + * @author Debadri Samaddar + * @bug No known bugs except for NYI items + * @brief This file contains app context related functions and classes that + * manages the global configuration of the current OpenCL environment. It also + * creates the OpenCL command queue and context. + */ + +#ifndef __CL_CONTEXT_H__ +#define __CL_CONTEXT_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include + +namespace nntrainer { + +extern std::mutex cl_factory_mutex; + +/** + * @class ClContext contains user-dependent configuration for OpenCL support + * @brief OpenCL support for app context + */ + +class ClContext { + +public: + using PropsType = std::vector; + + template using PtrType = std::unique_ptr; + + using SharedPtrClKernel = std::shared_ptr; + + template + using FactoryType = std::function(const PropsType &)>; + + template + using PtrFactoryType = PtrType (*)(const PropsType &); + + template + using StrIndexType = std::unordered_map>; + + /** integer to string key */ + using IntIndexType = std::unordered_map; + + /** string to kernel pointer map*/ + using OclKernelMap = std::unordered_map; + + /** + * This type contains tuple of + * 1) integer -> string index + * 2) string -> factory index + */ + template + using IndexType = std::tuple, IntIndexType>; + + template using FactoryMap = std::tuple...>; + + // getting static instance of commandqueue and opencl context + opencl::CommandQueueManager &command_queue_inst_ = + opencl::CommandQueueManager::GetInstance(); + + opencl::ContextManager &context_inst_ = opencl::ContextManager::GetInstance(); + + /** + * @brief Default constructor + */ + ClContext() = default; + + /** + * + * @brief Get Global cl context. + * + * @return ClContext& + */ + static ClContext &Global(); + + /** + * @brief Factory register function, use this function to register custom + * object + * + * @tparam T object to create. Currently Layer is supported + * @param factory factory function that creates std::unique_ptr + * @param key key to access the factory, if key is empty, try to find key by + * calling factory({})->getType(); + * @param int_key key to access the factory by integer, if it is -1(default), + * the function automatically unsigned the key and return + * @return const int unique integer value to access the current factory + * @throw invalid argument when key and/or int_key is already taken + */ + template + const int registerFactory(const PtrFactoryType factory, + const std::string &key = "", + const int int_key = -1) { + FactoryType f = factory; + return registerFactory(f, key, int_key); + } + + /** + * @brief Factory register function, use this function to register custom + * object + * + * @tparam T object to create. Currently Layer is supported + * @param factory factory function that creates std::unique_ptr + * @param key key to access the factory, if key is empty, try to find key by + * calling factory({})->getType(); + * @param int_key key to access the factory by integer, if it is -1(default), + * the function automatically unsigned the key and return + * @return const int unique integer value to access the current factory + * @throw invalid argument when key and/or int_key is already taken + */ + template + const int registerFactory(const FactoryType factory, + const std::string &key = "", + const int int_key = -1); + + /** + * @brief Create an Object from the integer key + * + * @tparam T Type of Object, currently, Only Layer is supported + * @param int_key integer key + * @param props property + * @return PtrType unique pointer to the object + */ + template + PtrType createObject(const int int_key, + const PropsType &props = {}) const { + static_assert(isSupported::value, + "given type is not supported for current app context"); + auto &index = std::get>(factory_map); + auto &int_map = std::get(index); + + const auto &entry = int_map.find(int_key); + + if (entry == int_map.end()) { + std::stringstream ss; + ss << "Int Key is not found for the object. Key: " << int_key; + throw exception::not_supported(ss.str().c_str()); + } + + // entry is an object of int_map which is an unordered_map + return createObject(entry->second, props); + } + + /** + * @brief Create an Object from the string key + * + * @tparam T Type of object, currently, only Layer is supported + * @param key integer key + * @param props property + * @return PtrType unique pointer to the object + */ + template + PtrType createObject(const std::string &key, + const PropsType &props = {}) const { + auto &index = std::get>(factory_map); + auto &str_map = std::get>(index); + + std::string lower_key; + lower_key.resize(key.size()); + + std::transform(key.begin(), key.end(), lower_key.begin(), + [](unsigned char c) { return std::tolower(c); }); + + const auto &entry = str_map.find(lower_key); + + if (entry == str_map.end()) { + std::stringstream ss; + ss << "Key is not found for the object. Key: " << lower_key; + throw exception::not_supported(ss.str().c_str()); + } + + // entry -> object of str_map -> unordered_map> + return entry->second(props); + } + + /** + * @brief register or return already present OpenCl kernel pointer + * @param kernel_string kernel implementation string + * @param kernel_name kernel name + * @return std::shared_ptr + */ + const SharedPtrClKernel registerClKernel(std::string kernel_string, + std::string kernel_name); + + /** + * @brief Initialize and register all blas OpenCl kernels + */ + void initBlasClKernels(); + + /** + * @brief Initialize and register all attention OpenCl kernels + */ + void initAttentionClKernels(); + + /** + * @brief destructor to release opencl commandQueue + */ + ~ClContext() { + if (cl_initialized) { + command_queue_inst_.ReleaseCommandQueue(); + // getContext() is called by clCreateKernel + context_inst_.ReleaseContext(); + } + }; + +private: + // flag to check opencl commandqueue and context inititalization + bool cl_initialized = false; + + // flag to check default blas kernels registered or not + bool blas_kernels_initialized = false; + + // flag to check default attention kernels registered or not + bool attention_kernels_initialized = false; + + FactoryMap factory_map; + + template struct isSupportedHelper; + + // global map to store initialized opencl::Kernel + inline static OclKernelMap ocl_kernel_map; + + /** + * @brief supportHelper to check if given type is supported within cl context + */ + template + struct isSupportedHelper> { + static constexpr bool value = + (std::is_same_v, std::decay_t> || ...); + }; + + /** + * @brief supportHelper to check if given type is supported within cl context + */ + template + struct isSupported : isSupportedHelper {}; + + /** + * @brief Initialize opencl commandqueue and context + * @return true if OpenCL context and command queue creation is successful, + * false otherwise + */ + + bool clInit() { + // if commandqueue already created + if (cl_initialized) + return true; + + // getContext() called inside createCommandQueue which creates clContext + bool result = command_queue_inst_.CreateCommandQueue(); + cl_initialized = result; + return cl_initialized; + }; + + /** + * @brief create OpenCl kernel + * @param kernel_string reference of implementation string + * @param kernel_name reference of kernel_name + * @param kernel_ptr_ reference of shared_ptr of Kernel + * @return true if successful, false otherwise + */ + bool clCreateKernel(std::string &kernel_string, std::string &kernel_name, + const SharedPtrClKernel &kernel_ptr_); +}; + +/** + * @copydoc const int ClContext::registerFactory + */ +extern template const int ClContext::registerFactory( + const FactoryType factory, const std::string &key, + const int int_key); + +} // namespace nntrainer + +#endif /* __CL_CONTEXT_H__ */