diff --git a/meson_options.txt b/meson_options.txt
index e904d9de3..968dc214f 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -45,6 +45,7 @@ option('enable-avx', type: 'boolean', value: true)
 option('enable-opencl', type: 'boolean', value: false)
 option('enable-biqgemm', type: 'boolean', value: false)
 option('enable-benchmarks', type: 'boolean', value : false)
+option('enable-qnn', type: 'boolean', value: true)
 
 # ml-api dependency (to enable, install capi-inference from github.com/nnstreamer/api )
 # To inter-operate with nnstreamer and ML-API packages, you need to enable this.
@@ -57,3 +58,4 @@ option('nnstreamer-subplugin-install-path', type: 'string', value: '/usr/lib/nns
 
 # application related options
 option('enable_encoder', type: 'boolean', value: false)
+
diff --git a/nntrainer/meson.build b/nntrainer/meson.build
index ed15b8f2a..383e144be 100644
--- a/nntrainer/meson.build
+++ b/nntrainer/meson.build
@@ -50,6 +50,11 @@ if get_option('enable-opencl')
   nntrainer_elements += 'layers/cl_layers'
 endif
 
+if get_option('enable-qnn')
+  message ('QNN build is enabled. Will work only if Qualcomm NPU is available.')
+  nntrainer_elements += 'npu'
+endif
+
 foreach elem : nntrainer_elements
   subdir(elem)
   nntrainer_inc += include_directories(elem)
diff --git a/nntrainer/npu/meson.build b/nntrainer/npu/meson.build
new file mode 100644
index 000000000..0773800f4
--- /dev/null
+++ b/nntrainer/npu/meson.build
@@ -0,0 +1 @@
+subdir('qnn')
diff --git a/nntrainer/npu/qnn/PAL/include/PAL/Debug.hpp b/nntrainer/npu/qnn/PAL/include/PAL/Debug.hpp
new file mode 100644
index 000000000..d03331c26
--- /dev/null
+++ b/nntrainer/npu/qnn/PAL/include/PAL/Debug.hpp
@@ -0,0 +1,21 @@
+//============================================================================
+//
+//  Copyright (c) 2020-2022 Qualcomm Technologies, Inc.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//============================================================================
+
+#pragma once
+
+#define DEBUG_ON 0
+
+#if DEBUG_ON
+#define DEBUG_MSG(...)            \
+  {                               \
+    fprintf(stderr, __VA_ARGS__); \
+    fprintf(stderr, "\n");        \
+  }
+#else
+#define DEBUG_MSG(...)
+#endif
diff --git a/nntrainer/npu/qnn/PAL/include/PAL/Directory.hpp b/nntrainer/npu/qnn/PAL/include/PAL/Directory.hpp
new file mode 100644
index 000000000..6ebef2288
--- /dev/null
+++ b/nntrainer/npu/qnn/PAL/include/PAL/Directory.hpp
@@ -0,0 +1,81 @@
+//==============================================================================
+//
+//  Copyright (c) 2008-2014, 2020-2022 Qualcomm Technologies, Inc.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+//---------------------------------------------------------------------------
+/// @file
+///   This file includes APIs for directory operations on supported platforms
+//---------------------------------------------------------------------------
+
+#pragma once
+
+#include <string>
+
+#include "PAL/FileOp.hpp"
+
+namespace pal {
+class Directory;
+}
+
+class pal::Directory {
+public:
+  using DirMode = pal::FileOp::FileMode;
+  //---------------------------------------------------------------------------
+  /// @brief
+  ///   Creates a directory in the file system.
+  /// @param path
+  ///   Name of directory to create.
+  /// @param dirmode
+  ///   Directory mode
+  /// @return
+  ///   True if
+  ///     1. create a directory successfully
+  ///     2. or directory exist already
+  ///   False otherwise
+  ///
+  ///  For example:
+  ///
+  ///  - Create a directory in default.
+  ///     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  ///     pal::Directory::Create(path, pal::Directory::DirMode::S_DEFAULT_);
+  ///     pal::Directory::Create(path);
+  ///     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  ///
+  ///  - Create a directory with specific permission.
+  ///     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  ///     pal::Directory::Create(path, pal::Directory::DirMode::S_IRWXU_|
+  ///                                  pal::Directory::DirMode::S_IRWXG_|
+  ///                                  pal::Directory::DirMode::S_IRWXO_);
+  ///     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  ///
+  /// @note For windows, dirmode is not used.
+  /// @note For linux, dirmode is used to set the permission of the folder.
+  //---------------------------------------------------------------------------
+  static bool
+  create(const std::string &path,
+         pal::Directory::DirMode dirmode = pal::Directory::DirMode::S_DEFAULT_);
+
+  //---------------------------------------------------------------------------
+  /// @brief
+  ///   Removes the entire directory whether it's empty or not.
+  /// @param path
+  ///   Name of directory to delete.
+  /// @return
+  ///   True if the directory was successfully deleted, false otherwise.
+  //---------------------------------------------------------------------------
+  static bool remove(const std::string &path);
+
+  //---------------------------------------------------------------------------
+  /// @brief
+  ///   Creates a directory and all parent directories required.
+  /// @param path
+  ///   Path of directory to create.
+  /// @return
+  ///   True if the directory was successfully created, false otherwise.
+  //---------------------------------------------------------------------------
+  static bool makePath(const std::string &path);
+};
diff --git a/nntrainer/npu/qnn/PAL/include/PAL/DynamicLoading.hpp b/nntrainer/npu/qnn/PAL/include/PAL/DynamicLoading.hpp
new file mode 100644
index 000000000..1d1a13393
--- /dev/null
+++ b/nntrainer/npu/qnn/PAL/include/PAL/DynamicLoading.hpp
@@ -0,0 +1,101 @@
+//==============================================================================
+//
+// Copyright (c) 2020-2022 Qualcomm Technologies, Inc.
+// All Rights Reserved.
+// Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+//---------------------------------------------------------------------------
+/// @file
+///   This file includes APIs for dynamic loading on supported platforms
+//---------------------------------------------------------------------------
+
+#pragma once
+
+#include <string>
+
+namespace pal {
+namespace dynamicloading {
+// we only support subset of POSIX of dlopen/dlsym/dladdr/dlerror/dlclose
+// except the following flags for dlopen, others should be done only
+// when we really need them
+// DL_NOW is MUST
+// DL_LOCAL is enabled if not specified
+enum {
+  DL_NOW = 0x0001,
+  DL_LOCAL = 0x0002,
+  DL_GLOBAL = 0x0004,
+};
+
+// specify this address to distingiush from NULL pointer
+#define DL_DEFAULT (void *)(0x4)
+
+//---------------------------------------------------------------------------
+/// @brief
+///   Loads the dynamic shared object
+/// @param filename
+///   If contains path separators, treat it as relative or absolute pathname
+///   or search it for the rule of dynamic linker
+/// @param flags
+///   - DL_NOW: resolve undefined symbols before return. MUST be specified.
+///   - DL_LOCAL: optional, but the default specified. Symbols defined in this
+///     shared object are not made available to resolve references in
+///     subsequently loaded shared objects
+///   - DL_GLOBAL: optional, resolve symbol globally
+/// @return
+///   On success, a non-NULL handle for the loaded library.
+///   On error, NULL
+//---------------------------------------------------------------------------
+void *dlOpen(const char *filename, int flags);
+
+//---------------------------------------------------------------------------
+/// @brief
+///   Obtain address of a symbol in a shared object or executable
+/// @param handle
+///   A handle of a dynamic loaded shared object returned by dlopen
+/// @param symbol
+///   A null-terminated symbol name
+/// @return
+///   On success, return the address associated with symbol
+///   On error, NULL
+//---------------------------------------------------------------------------
+void *dlSym(void *handle, const char *symbol);
+
+//---------------------------------------------------------------------------
+/// @brief
+///   Translate the address of a symbol to the path of the belonging shared
+///   object
+/// @param addr
+///   Address of symbol in a shared object
+/// @param path
+///   Full name of shared object that contains address, usually it is an
+///   absolute path
+/// @return
+///   On success, return a non-zero value
+///   On error, return 0
+//---------------------------------------------------------------------------
+int dlAddrToLibName(void *addr, std::string &name);
+
+//---------------------------------------------------------------------------
+/// @brief
+///   Decrements the reference count on the dynamically loaded shared object
+///   referred to by handle. If the reference count drops to 0, then the
+///   object is unloaded.
+/// @return
+///   On success, 0; on error, a nonzero value
+//---------------------------------------------------------------------------
+int dlClose(void *handle);
+
+//---------------------------------------------------------------------------
+/// @brief
+///   Obtain error diagnostic for functions in the dl-family APIs.
+/// @return
+///   Returns a human-readable, null-terminated string describing the most
+///   recent error that occurred from a call to one of the functions in the
+///   dl-family APIs.
+//---------------------------------------------------------------------------
+char *dlError(void);
+
+} // namespace dynamicloading
+} // namespace pal
diff --git a/nntrainer/npu/qnn/PAL/include/PAL/FileOp.hpp b/nntrainer/npu/qnn/PAL/include/PAL/FileOp.hpp
new file mode 100644
index 000000000..1b25a1d57
--- /dev/null
+++ b/nntrainer/npu/qnn/PAL/include/PAL/FileOp.hpp
@@ -0,0 +1,246 @@
+//==============================================================================
+//
+// Copyright (c) 2008-2022 Qualcomm Technologies, Inc.
+// All Rights Reserved.
+// Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+//------------------------------------------------------------------------------
+/// @file
+///   This file includes APIs for file operations on the supported platforms
+//------------------------------------------------------------------------------
+
+#pragma once
+
+#include <fcntl.h>
+
+#include <string>
+#include <vector>
+
+namespace pal {
+class FileOp;
+}
+
+//------------------------------------------------------------------------------
+/// @brief
+///   FileOp contains OS Specific file system functionality.
+//------------------------------------------------------------------------------
+class pal::FileOp {
+public:
+  // enum for symbolic constants mode, strictly follow linux usage
+  // windows or another OS user should transfer the usage
+  // ref : http://man7.org/linux/man-pages/man2/open.2.html
+  enum class FileMode : uint32_t {
+    S_DEFAULT_ = 0777,
+    S_IRWXU_ = 0700,
+    S_IRUSR_ = 0400,
+    S_IWUSR_ = 0200,
+    S_IXUSR_ = 0100,
+    S_IRWXG_ = 0070,
+    S_IRGRP_ = 0040,
+    S_IWGRP_ = 0020,
+    S_IXGRP_ = 0010,
+    S_IRWXO_ = 0007,
+    S_IROTH_ = 0004,
+    S_IWOTH_ = 0002,
+    S_IXOTH_ = 0001
+  };
+
+  //---------------------------------------------------------------------------
+  /// @brief
+  ///   Copies a file from one location to another, overwrites if the
+  ///   destination already exists.
+  /// @param source
+  ///   File name of the source file.
+  /// @param target
+  ///   File name of the target file.
+  /// @return
+  ///   True on success, otherwise false.
+  //---------------------------------------------------------------------------
+  static bool copyOverFile(const std::string &source,
+                           const std::string &target);
+
+  //---------------------------------------------------------------------------
+  /// @brief
+  ///   Checks whether the file exists or not.
+  /// @param fileName
+  ///   File name of the source file, including its complete path.
+  /// @return
+  ///   True on success, otherwise false.
+  //---------------------------------------------------------------------------
+  static bool checkFileExists(const std::string &fileName);
+
+  //---------------------------------------------------------------------------
+  /// @brief
+  ///   Renames an existing file. If the file with target name exists, this call
+  ///   overwrites it with the file with source name.
+  /// @param source
+  ///   Current File name.
+  /// @param target
+  ///   New name of the file.
+  /// @param overwrite
+  ///   Flag indicating to overwrite existing file with newName
+  /// @return
+  ///   True if successful, otherwise false.
+  /// @warning
+  ///   Does not work if source and target are on different filesystems.
+  //---------------------------------------------------------------------------
+  static bool move(const std::string &source, const std::string &target,
+                   bool overwrite);
+
+  //---------------------------------------------------------------------------
+  /// @brief
+  ///   Delete an existing file
+  /// @param fileName
+  ///   File name of the file to be deleted.
+  /// @return
+  ///   True if successful, otherwise false.
+  //---------------------------------------------------------------------------
+  static bool deleteFile(const std::string &fileName);
+
+  //---------------------------------------------------------------------------
+  /// @brief
+  ///   Check if path is a directory or not
+  /// @param path
+  ///   Path to check
+  /// @return
+  ///   True if successful, otherwise false.
+  //---------------------------------------------------------------------------
+  static bool checkIsDir(const std::string &path);
+
+  //---------------------------------------------------------------------------
+  /// @brief Data type representing parts of a filename
+  //---------------------------------------------------------------------------
+  typedef struct {
+    //---------------------------------------------------------------------------
+    /// @brief Name of the file without the extension (i.e., basename)
+    //---------------------------------------------------------------------------
+    std::string basename;
+
+    //---------------------------------------------------------------------------
+    /// @brief Name of the file extension (i.e., .txt or .hlnd, .html)
+    //---------------------------------------------------------------------------
+    std::string extension;
+
+    //---------------------------------------------------------------------------
+    /// @brief
+    ///   Location of the file (i.e., /abc/xyz/foo.bar <-- /abc/xyz/).
+    ///   If the file name has no location then the Directory points to
+    ///   empty string
+    //---------------------------------------------------------------------------
+    std::string directory;
+  } FilenamePartsType_t;
+
+  //---------------------------------------------------------------------------
+  /// @brief
+  ///   Determines the components of a given filename, being the directory,
+  ///   basename and extension. If the file has no location or extension, these
+  ///   components remain empty
+  /// @param filename
+  ///   Path of the file for which the components are to be determined
+  /// @param filenameParts
+  ///   Will contain the file name components when this function returns
+  /// @return
+  ///   True if successful, false otherwise
+  //---------------------------------------------------------------------------
+  static bool getFileInfo(const std::string &filename,
+                          FilenamePartsType_t &filenameParts);
+
+  //---------------------------------------------------------------------------
+  /// @brief
+  ///   Typedef for a vector of FilenamePartsType_t
+  //---------------------------------------------------------------------------
+  typedef std::vector<FilenamePartsType_t> FilenamePartsListType_t;
+
+  //---------------------------------------------------------------------------
+  /// @brief
+  ///   Typedef for a vector of FilenamePartsType_t const iterator
+  //---------------------------------------------------------------------------
+  typedef std::vector<FilenamePartsType_t>::const_iterator
+    FilenamePartsListTypeIter_t;
+
+  //---------------------------------------------------------------------------
+  /// @brief
+  ///   Returns a vector of FilenamePartsType_t objects for a given directory
+  /// @param path
+  ///   Path to scan for files
+  /// @return
+  ///   True if successful, false otherwise
+  //---------------------------------------------------------------------------
+  static bool getFileInfoList(const std::string &path,
+                              FilenamePartsListType_t &filenamePartsList);
+
+  //---------------------------------------------------------------------------
+  /// @brief
+  ///   Returns a vector of FilenamePartsType_t objects for a given directory
+  ///   and the child directories inside.
+  /// @param path
+  ///   Path to directory to scan for files for
+  ///   @note if path is not a directory - the function will return false
+  /// @param filenamePartList
+  ///   List to append to
+  /// @param ignoreDirs
+  ///   If this flag is set to true, directories (and symbolic links to
+  ///   directories) are not included in the list. Only actual files below the
+  ///   specified directory path will be appended.
+  /// @return True if successful, false otherwise
+  /// @note Directories in list only populate Directory member variable of the
+  /// struct.
+  ///       That is Basename and Extension will be empty strings.
+  /// @note Symbolic links to directories are not followed. This is to avoid
+  /// possible
+  ///       infinite recursion. However the initial call to this method can have
+  ///       path to be a symbolic link to a directory. If ignoreDirs is true,
+  ///       symbolic links to directories are also ignored.
+  /// @note The order in which the files/directories are listed is platform
+  ///       dependent. However files inside a directory always come before the
+  ///       directory itself.
+  //---------------------------------------------------------------------------
+  static bool
+  getFileInfoListRecursive(const std::string &path,
+                           FilenamePartsListType_t &filenamePartsList,
+                           const bool ignoreDirs);
+
+  //---------------------------------------------------------------------------
+  /// @brief
+  ///   Create an absolute path from the supplied path
+  /// @param path
+  ///   Path should not contain trailing '/' or '\\'
+  /// @return
+  ///   Return absolute path without trailing '/' or '\\'
+  //---------------------------------------------------------------------------
+  static std::string getAbsolutePath(const std::string &path);
+
+  //---------------------------------------------------------------------------
+  /// @brief Get the file name from a path
+  //---------------------------------------------------------------------------
+  static std::string getFileName(const std::string &file);
+
+  //---------------------------------------------------------------------------
+  /// @brief Get the directory path to a file
+  //---------------------------------------------------------------------------
+  static std::string getDirectory(const std::string &file);
+
+  //---------------------------------------------------------------------------
+  /// @brief Get the current working directory.
+  /// @returns The absolute CWD or empty string if the path could not be
+  ///          retrieved (because it was too long or deleted for example).
+  //---------------------------------------------------------------------------
+  static std::string getCurrentWorkingDirectory();
+
+  //---------------------------------------------------------------------------
+  /// @brief Set the current working directory
+  //---------------------------------------------------------------------------
+  static bool setCurrentWorkingDirectory(const std::string &workingDir);
+
+  //---------------------------------------------------------------------------
+  /// @brief Returns true if the file contains any extension or false.
+  //---------------------------------------------------------------------------
+  static bool hasFileExtension(const std::string &file);
+
+  //---------------------------------------------------------------------------
+  /// @brief Returns full path of file, Directory/Basename(.Extension, if any)
+  //---------------------------------------------------------------------------
+  static std::string partsToString(const FilenamePartsType_t &filenameParts);
+};
diff --git a/nntrainer/npu/qnn/PAL/include/PAL/GetOpt.hpp b/nntrainer/npu/qnn/PAL/include/PAL/GetOpt.hpp
new file mode 100644
index 000000000..c54ac1966
--- /dev/null
+++ b/nntrainer/npu/qnn/PAL/include/PAL/GetOpt.hpp
@@ -0,0 +1,92 @@
+//==============================================================================
+//
+// Copyright (c) 2020-2022 Qualcomm Technologies, Inc.
+// All Rights Reserved.
+// Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+//--------------------------------------------------------------------------------
+/// @file
+///   This file includes APIs for the command line parsing on supported
+///   platforms
+//--------------------------------------------------------------------------------
+
+#pragma once
+
+namespace pal {
+// we implement a similar API for POSIX.2
+// so that some global var are necessary
+
+extern const char *g_optArg;
+extern int g_optInd;
+
+enum {
+  no_argument = 0,
+  required_argument = 1,
+  optional_argument = 2,
+};
+
+//--------------------------------------------------------------------------------------------------
+/// @brief
+///   This structure describes a single long option name for the sake of
+///   getopt_long. The argument longopts must be an array of these structures,
+///   one for each long option. Terminate the array with an element containing
+///   all zeros.
+//--------------------------------------------------------------------------------------------------
+struct Option {
+  //--------------------------------------------------------------------------------------------------
+  /// @brief The name of the long option.
+  //--------------------------------------------------------------------------------------------------
+  const char *name;
+
+  //--------------------------------------------------------------------------------------------------
+  /// @brief
+  ///   If the option does not take an argument, no_argument (or 0).
+  ///   If the option requires an argument, required_argument (or 1).
+  //--------------------------------------------------------------------------------------------------
+  int hasArg;
+
+  //--------------------------------------------------------------------------------------------------
+  /// @brief
+  ///   Specifies how results are returned for a long option.
+  ///   If flag is NULL, then GetOptLongOnly() returns val. Otherwise, it
+  ///   returns 0, and flag points to a variable which is set to val if the
+  ///   option is found, but left unchanged if the option is not found.
+  //--------------------------------------------------------------------------------------------------
+  int *flag;
+
+  //--------------------------------------------------------------------------------------------------
+  /// @brief
+  ///   The value to return, or to load into the variable pointed to by flag.
+  ///   The last element of the array has to be filled with zeros.
+  //--------------------------------------------------------------------------------------------------
+  int val;
+};
+
+//--------------------------------------------------------------------------------------------------
+/// @brief
+///   This parses command-line options as POSIX getopt_long_only()
+///   but we don't support optstring and optonal_argument now
+/// @param argc
+///   Argument count
+/// @param argv
+///   Argument array
+/// @param optstring
+///   Legitimate option characters, short options, don't support now
+/// @param longopts
+///   A pointer to the first element of an array of struct option,
+///   has_arg field in the struct option indicates 3 possibilities,
+///   no_argument, required_argument or optional_argument. we don't
+///   support optional_argument now
+/// @param longindex
+///   If longindex is not NULL, it points to a variable which is set
+///   to the index of the long option relative to longopts
+/// @return
+///   -1 for parsing done, '?' for non-recognized arguments, 0 for
+///   flag in longopts is not NULL and saved the val to it
+//--------------------------------------------------------------------------------------------------
+int getOptLongOnly(int argc, const char *const argv[], const char *optstring,
+                   const struct Option *longopts, int *longindex);
+
+} // namespace pal
diff --git a/nntrainer/npu/qnn/PAL/include/PAL/Path.hpp b/nntrainer/npu/qnn/PAL/include/PAL/Path.hpp
new file mode 100644
index 000000000..374aead6a
--- /dev/null
+++ b/nntrainer/npu/qnn/PAL/include/PAL/Path.hpp
@@ -0,0 +1,50 @@
+//==============================================================================
+//
+//  Copyright (c) 2008-2014, 2020-2022 Qualcomm Technologies, Inc.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//==============================================================================
+
+//------------------------------------------------------------------------------
+/// @file
+///   The file includes APIs for path related operations on supported platforms
+//------------------------------------------------------------------------------
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+namespace pal {
+class Path;
+}
+
+class pal::Path {
+public:
+  //---------------------------------------------------------------------------
+  /// @brief Returns path separator for the system
+  //---------------------------------------------------------------------------
+  static char getSeparator();
+
+  //---------------------------------------------------------------------------
+  /// @brief Concatenate s1 and s2
+  //---------------------------------------------------------------------------
+  static std::string combine(const std::string &s1, const std::string &s2);
+
+  //---------------------------------------------------------------------------
+  /// @brief Get the directory name
+  //---------------------------------------------------------------------------
+  static std::string getDirectoryName(const std::string &path);
+
+  //---------------------------------------------------------------------------
+  /// @brief Get absolute path
+  //---------------------------------------------------------------------------
+  static std::string getAbsolute(const std::string &path);
+
+  //---------------------------------------------------------------------------
+  /// @brief Check if the input path is absolute path
+  //---------------------------------------------------------------------------
+  static bool isAbsolute(const std::string &path);
+
+private:
+};
diff --git a/nntrainer/npu/qnn/PAL/include/PAL/StringOp.hpp b/nntrainer/npu/qnn/PAL/include/PAL/StringOp.hpp
new file mode 100644
index 000000000..f3da64319
--- /dev/null
+++ b/nntrainer/npu/qnn/PAL/include/PAL/StringOp.hpp
@@ -0,0 +1,61 @@
+//==============================================================================
+//
+// Copyright (c) 2018-2022 Qualcomm Technologies, Inc.
+// All Rights Reserved.
+// Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+//-----------------------------------------------------------------------------
+/// @file
+///   The file inludes APIs for string operations on supported platforms
+//-----------------------------------------------------------------------------
+
+#pragma once
+
+#include <sys/types.h>
+
+namespace pal {
+class StringOp;
+}
+
+//------------------------------------------------------------------------------
+/// @brief
+///   FileOp contains OS Specific file system functionality.
+//------------------------------------------------------------------------------
+class pal::StringOp {
+public:
+  //---------------------------------------------------------------------------
+  /// @brief
+  ///   Copy copy_size bytes from buffer src to buffer dst. Behaviour of the
+  ///   function is undefined if src and dst overlap.
+  /// @param dst
+  ///   Destination buffer
+  /// @param dst_size
+  ///   Size of destination buffer
+  /// @param src
+  ///   Source buffer
+  /// @param copy_size
+  ///   Number of bytes to copy
+  /// @return
+  ///   Number of bytes copied
+  //---------------------------------------------------------------------------
+  static size_t memscpy(void *dst, size_t dstSize, const void *src,
+                        size_t copySize);
+
+  //---------------------------------------------------------------------------
+  /// @brief
+  ///   Returns a pointer to a null-terminated byte string, which contains
+  ///   copies of at most size bytes from the string pointed to by str. If the
+  ///   null terminator is not encountered in the first size bytes, it is added
+  ///   to the duplicated string.
+  /// @param source
+  ///   Source string
+  /// @param maxlen
+  ///   Max number of bytes to copy from str
+  /// @return
+  ///   A pointer to the newly allocated string, or a null pointer if an error
+  ///   occurred.
+  //---------------------------------------------------------------------------
+  static char *strndup(const char *source, size_t maxlen);
+};
diff --git a/nntrainer/npu/qnn/PAL/src/common/GetOpt.cpp b/nntrainer/npu/qnn/PAL/src/common/GetOpt.cpp
new file mode 100644
index 000000000..cb3f0176d
--- /dev/null
+++ b/nntrainer/npu/qnn/PAL/src/common/GetOpt.cpp
@@ -0,0 +1,150 @@
+//=============================================================================
+//
+//  Copyright (c) 2020-2022 Qualcomm Technologies, Inc.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//=============================================================================
+
+#include <string.h>
+
+#include <string>
+
+#include "PAL/GetOpt.hpp"
+
+using namespace std;
+
+namespace pal {
+
+const char *g_optArg = nullptr;
+int g_optInd = 1;
+
+static const struct Option *
+findOpt(const string str, const struct Option *longopts, int *longindex) {
+  const struct Option *opt = nullptr;
+  int idx = 0;
+  size_t searchEnd = str.find_first_of("=");
+
+  for (opt = longopts; opt->name && strlen(opt->name) > 0; opt++, idx++) {
+    if (str.substr(0, searchEnd) == opt->name) {
+      if (longindex) {
+        *longindex = idx;
+      }
+      break;
+    }
+  }
+  // if not found, opt would point to the last element of longopts
+  // whose name MUST be empty
+  return opt->name ? opt : nullptr;
+}
+
+int getOptLongOnly(int argc, const char *const argv[], const char *,
+                   const struct Option *longopts, int *longindex) {
+  const struct Option *opt;
+  int argLen = 0;
+  bool isShort = false;
+  const char *arg = "";
+
+  g_optArg = nullptr;
+  // no arg, means the end of command
+  if (g_optInd >= argc) {
+    return -1;
+  }
+
+  arg = argv[g_optInd];
+
+  if (arg[0] != '-') {
+    g_optInd += 1;
+    return '?';
+  }
+
+  argLen = strlen(arg);
+
+  if (argLen < 2) {
+    g_optInd += 1;
+    return '?';
+  }
+
+  if (!longopts) {
+    g_optInd += 1;
+    return '?';
+  }
+
+  // check short options with this form, -a arg
+  if (argLen == 2) {
+    isShort = true;
+    // check short options with this form, -a=arg
+  } else if (argLen > 3 && arg[2] == '=') {
+    isShort = true;
+    // check for long options, can be used for both forms
+  } else if (argLen > 2 && arg[1] != '=') {
+    if (arg[1] != '-') {
+      g_optInd += 1;
+      return '?';
+    }
+    isShort = false;
+  }
+
+  // start after -- to find the option
+  const char *const optStr = isShort ? &arg[1] : &arg[2];
+  opt = findOpt(optStr, longopts, longindex);
+  if (!opt) {
+    g_optInd += 1;
+    return '?';
+  }
+
+  if (opt->hasArg == no_argument) {
+    g_optInd += 1;
+
+    if (!opt->flag) {
+      return opt->val;
+    } else {
+      *(opt->flag) = opt->val;
+      return 0;
+    }
+  }
+
+  if (opt->hasArg == required_argument) {
+    string optStr = argv[g_optInd];
+    size_t assignIdx = optStr.find_first_of("=");
+    bool advance = (assignIdx == string::npos);
+
+    // if it is --opt arg form, this will be true,
+    // so we need to advance one step to get arg
+    // otherwise, need to stop advance step & extract arg from argv[g_optInd]
+    if (advance) {
+      g_optInd += 1;
+    }
+
+    if (g_optInd >= argc) {
+      return '?';
+    } else {
+      // if advance, means it is the form --opt arg
+      // otherwise, the form, --opt=arg
+      if (advance) {
+        // since g_optInd is advanced, g_optArg can be assigned directly
+        g_optArg = argv[g_optInd];
+      } else {
+        if (assignIdx == optStr.size()) {
+          return '?';
+        }
+        // for not advanced form,
+        // g_optArg should point to the address right after "="
+        g_optArg = &argv[g_optInd][assignIdx + 1];
+      }
+      // OK, now we are ready to handle the next pair
+      g_optInd += 1;
+
+      if (!opt->flag) {
+        return opt->val;
+      } else {
+        *(opt->flag) = opt->val;
+        return 0;
+      }
+    }
+  }
+
+  return '?';
+} // end of getOptLongOnly
+
+} // namespace pal
diff --git a/nntrainer/npu/qnn/PAL/src/common/StringOp.cpp b/nntrainer/npu/qnn/PAL/src/common/StringOp.cpp
new file mode 100644
index 000000000..eb917681b
--- /dev/null
+++ b/nntrainer/npu/qnn/PAL/src/common/StringOp.cpp
@@ -0,0 +1,48 @@
+//==============================================================================
+//
+//  Copyright (c) 2018-2022 Qualcomm Technologies, Inc.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "PAL/StringOp.hpp"
+
+//---------------------------------------------------------------------------
+//    pal::StringOp::memscpy
+//---------------------------------------------------------------------------
+size_t pal::StringOp::memscpy(void *dst, size_t dstSize, const void *src,
+                              size_t copySize) {
+  if (!dst || !src || !dstSize || !copySize)
+    return 0;
+
+  size_t minSize = dstSize < copySize ? dstSize : copySize;
+
+  memcpy(dst, src, minSize);
+
+  return minSize;
+}
+
+//---------------------------------------------------------------------------
+//    pal::StringOp::strndup
+//---------------------------------------------------------------------------
+char *pal::StringOp::strndup(const char *source, size_t maxlen) {
+#ifdef _WIN32
+  size_t length = ::strnlen(source, maxlen);
+
+  char *destination = (char *)malloc((length + 1) * sizeof(char));
+  if (destination == nullptr)
+    return nullptr;
+
+  // copy length bytes to destination and leave destination[length] to be
+  // null terminator
+  strncpy_s(destination, length + 1, source, length);
+
+  return destination;
+#else
+  return ::strndup(source, maxlen);
+#endif
+}
diff --git a/nntrainer/npu/qnn/PAL/src/linux/Directory.cpp b/nntrainer/npu/qnn/PAL/src/linux/Directory.cpp
new file mode 100644
index 000000000..5819e6ec1
--- /dev/null
+++ b/nntrainer/npu/qnn/PAL/src/linux/Directory.cpp
@@ -0,0 +1,155 @@
+//==============================================================================
+//
+//  Copyright (c) 2008-2022 Qualcomm Technologies, Inc.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#ifndef __QNXNTO__
+#include <sys/sendfile.h>
+#endif
+#include <dirent.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <cstring>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "PAL/Directory.hpp"
+#include "PAL/FileOp.hpp"
+#include "PAL/Path.hpp"
+
+//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+#ifdef __QNXNTO__
+static bool is_qnx_dir(const struct dirent *ep) {
+  struct dirent_extra *exp;
+  bool is_dir = false;
+
+  for (exp = _DEXTRA_FIRST(ep); _DEXTRA_VALID(exp, ep);
+       exp = _DEXTRA_NEXT(exp)) {
+    if (exp->d_type == _DTYPE_STAT || exp->d_type == _DTYPE_LSTAT) {
+      struct stat *statbuff = &((dirent_extra_stat *)exp)->d_stat;
+      if (statbuff && S_ISDIR(statbuff->st_mode)) {
+        is_dir = true;
+        break;
+      }
+    }
+  }
+  return is_dir;
+}
+#endif
+
+// ------------------------------------------------------------------------------
+//    pal::Directory::create
+// ------------------------------------------------------------------------------
+bool pal::Directory::create(const std::string &path,
+                            pal::Directory::DirMode dirmode) {
+  struct stat st;
+  int status = 0;
+  if (stat(path.c_str(), &st) != 0) {
+    // Directory does not exist
+    status = mkdir(path.c_str(), static_cast<mode_t>(dirmode));
+  } else if (!S_ISDIR(st.st_mode)) {
+    errno = ENOTDIR;
+    status = -1;
+  }
+  return (status == 0);
+}
+
+//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+bool pal::Directory::remove(const std::string &dirName) {
+  DIR *dir;
+  struct dirent *entry;
+
+  dir = opendir(dirName.c_str());
+  if (dir == nullptr) {
+    // If the directory doesn't exist then just return true.
+    if (errno == ENOENT) {
+      return true;
+    }
+    return false;
+  }
+
+#ifdef __QNXNTO__
+  if (dircntl(dir, D_SETFLAG, D_FLAG_STAT) == -1) {
+    return false;
+  }
+#endif
+
+  // Recursively traverse the directory tree.
+  while ((entry = readdir(dir)) != nullptr) {
+    if (strcmp(entry->d_name, ".") && strcmp(entry->d_name, "..")) {
+      std::stringstream ss;
+      ss << dirName << Path::getSeparator() << entry->d_name;
+      std::string path = ss.str();
+#ifdef __QNXNTO__
+      if (is_qnx_dir(entry))
+#else
+      if (entry->d_type == DT_DIR)
+#endif
+      {
+        // It's a directory so we need to drill down into it and delete
+        // its contents.
+        if (!remove(path)) {
+          return false;
+        }
+      } else {
+        if (::remove(path.c_str())) {
+          return false;
+        }
+      }
+    }
+  }
+
+  closedir(dir);
+
+  if (::remove(dirName.c_str())) {
+    return false;
+  }
+
+  return true;
+}
+
+bool pal::Directory::makePath(const std::string &path) {
+  struct stat st;
+  bool rc = false;
+
+  if (path == ".") {
+    rc = true;
+  } else if (stat(path.c_str(), &st) == 0) {
+    if (st.st_mode & S_IFDIR) {
+      rc = true;
+    }
+  } else {
+    size_t offset = path.find_last_of(Path::getSeparator());
+    if (offset != std::string::npos) {
+      std::string newPath = path.substr(0, offset);
+      if (!makePath(newPath)) {
+        return false;
+      }
+    }
+
+    // There is a possible race condition, where a file/directory can be
+    // created in between the stat() above, and the mkdir() call here.
+    // So, ignore the return code from the mkdir() call, and then re-check
+    // for existence of the directory after it. Ensure both that it exists
+    // and that it is a directory - just like above.
+    mkdir(path.c_str(), 0777);
+
+    if ((stat(path.c_str(), &st) == 0) && (st.st_mode & S_IFDIR)) {
+      rc = true;
+    }
+  }
+
+  return rc;
+}
diff --git a/nntrainer/npu/qnn/PAL/src/linux/DynamicLoading.cpp b/nntrainer/npu/qnn/PAL/src/linux/DynamicLoading.cpp
new file mode 100644
index 000000000..4b2f00823
--- /dev/null
+++ b/nntrainer/npu/qnn/PAL/src/linux/DynamicLoading.cpp
@@ -0,0 +1,90 @@
+//==============================================================================
+//
+// Copyright (c) 2020-2022 Qualcomm Technologies, Inc.
+// All Rights Reserved.
+// Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <dlfcn.h>
+#include <stdlib.h>
+#include <vector>
+
+//#include "Log.h"
+#include "PAL/Debug.hpp"
+#include "PAL/DynamicLoading.hpp"
+const std::vector<std::string> LIB_PREFIX = {
+  "/system/lib64/", "/odm/lib64/", "/vendor/lib64/",
+  "/data/local/tmp/mllm/qnn-lib/", "/system_ext/lib64/"};
+void *pal::dynamicloading::dlOpen(const char *filename, int flags) {
+  int realFlags = 0;
+
+  if (flags & DL_NOW) {
+    realFlags |= RTLD_NOW;
+  }
+
+  if (flags & DL_LOCAL) {
+    realFlags |= RTLD_LOCAL;
+  }
+
+  if (flags & DL_GLOBAL) {
+    realFlags |= RTLD_GLOBAL;
+  }
+
+  auto res = ::dlopen(filename, realFlags);
+  if (!res) {
+    for (auto prefix_ : LIB_PREFIX) {
+      std::string prefix = prefix_ + filename;
+      res = ::dlopen(prefix.c_str(), realFlags);
+      if (res) {
+        break;
+      }
+      // MLLM_LOG_ERROR("{} not found", prefix);
+    }
+  }
+  return res;
+}
+
+void *pal::dynamicloading::dlSym(void *handle, const char *symbol) {
+  if (handle == DL_DEFAULT) {
+    handle = RTLD_DEFAULT;
+  }
+
+  return ::dlsym(handle, symbol);
+}
+
+int pal::dynamicloading::dlAddrToLibName(void *addr, std::string &name) {
+  // Clean the output buffer
+  name = std::string();
+
+  // If the address is empty, return zero as treating failure
+  if (!addr) {
+    DEBUG_MSG("Input address is nullptr.");
+    return 0;
+  }
+
+  // Dl_info do not maintain the lifetime of its string members,
+  // it would be maintained by dlopen() and dlclose(),
+  // so we do not need to release it manually
+  Dl_info info;
+  int result = ::dladdr(addr, &info);
+
+  // If dladdr() successes, set name to the library name
+  if (result) {
+    name = std::string(info.dli_fname);
+  } else {
+    DEBUG_MSG("Input address could not be matched to a shared object.");
+  }
+
+  return result;
+}
+
+int pal::dynamicloading::dlClose(void *handle) {
+  if (!handle) {
+    return 0;
+  }
+
+  return ::dlclose(handle);
+}
+
+char *pal::dynamicloading::dlError(void) { return ::dlerror(); }
diff --git a/nntrainer/npu/qnn/PAL/src/linux/FileOp.cpp b/nntrainer/npu/qnn/PAL/src/linux/FileOp.cpp
new file mode 100644
index 000000000..baebafbea
--- /dev/null
+++ b/nntrainer/npu/qnn/PAL/src/linux/FileOp.cpp
@@ -0,0 +1,362 @@
+//==============================================================================
+//
+//  Copyright (c) 2008-2013,2015,2019-2022 Qualcomm Technologies, Inc.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#ifndef __QNXNTO__
+#include <sys/sendfile.h>
+#endif
+#include <dirent.h>
+#include <errno.h>
+#include <limits.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <cstring>
+#include <iostream>
+#include <limits>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "PAL/Debug.hpp"
+#include "PAL/FileOp.hpp"
+#include "PAL/Path.hpp"
+
+typedef struct stat Stat_t;
+
+//---------------------------------------------------------------------------
+//    pal::FileOp::HasFileExtension
+//---------------------------------------------------------------------------
+bool pal::FileOp::checkFileExists(const std::string &fileName) {
+  Stat_t sb;
+
+  if (stat(fileName.c_str(), &sb) == -1) {
+    return false;
+  } else {
+    return true;
+  }
+}
+
+//---------------------------------------------------------------------------
+//    pal::FileOp::move
+//---------------------------------------------------------------------------
+bool pal::FileOp::move(const std::string &currentName,
+                       const std::string &newName, bool overwrite) {
+  if (overwrite) {
+    remove(newName.c_str());
+  }
+  return (rename(currentName.c_str(), newName.c_str()) == 0);
+}
+
+//---------------------------------------------------------------------------
+//    pal::FileOp::deleteFile
+//---------------------------------------------------------------------------
+bool pal::FileOp::deleteFile(const std::string &fileName) {
+  return (remove(fileName.c_str()) == 0);
+}
+
+//------------------------------------------------------------------------------
+// pal::FileOp::checkIsDir
+//------------------------------------------------------------------------------
+bool pal::FileOp::checkIsDir(const std::string &fileName) {
+  bool retVal = false;
+  Stat_t sb;
+  if (stat(fileName.c_str(), &sb) == 0) {
+    if (sb.st_mode & S_IFDIR) {
+      retVal = true;
+    }
+  }
+  return retVal;
+}
+
+//------------------------------------------------------------------------------
+//    pal::FileOp::getFileInfo
+//------------------------------------------------------------------------------
+bool pal::FileOp::getFileInfo(const std::string &filename,
+                              pal::FileOp::FilenamePartsType_t &filenameParts) {
+  std::string name;
+
+  // Clear the result
+  filenameParts.basename.clear();
+  filenameParts.extension.clear();
+  filenameParts.directory.clear();
+
+  size_t lastPathSeparator = filename.find_last_of(Path::getSeparator());
+  if (lastPathSeparator == std::string::npos) {
+    // No directory
+    name = filename;
+  } else {
+    // has a directory part
+    filenameParts.directory = filename.substr(0, lastPathSeparator);
+    name = filename.substr(lastPathSeparator + 1);
+  }
+
+  size_t ext = name.find_last_of(".");
+  if (ext == std::string::npos) {
+    // no extension
+    filenameParts.basename = name;
+  } else {
+    // has extension
+    filenameParts.basename = name.substr(0, ext);
+    filenameParts.extension = name.substr(ext + 1);
+  }
+
+  return true;
+}
+
+//---------------------------------------------------------------------------
+//    pal::FileOp::copyOverFile
+//---------------------------------------------------------------------------
+bool pal::FileOp::copyOverFile(const std::string &fromFile,
+                               const std::string &toFile) {
+  bool rc = false;
+  int readFd;
+  int writeFd;
+  struct stat statBuf;
+
+  // Open the input file.
+  readFd = ::open(fromFile.c_str(), O_RDONLY);
+  if (readFd == -1) {
+    close(readFd);
+    return false;
+  }
+
+  // Stat the input file to obtain its size. */
+  if (fstat(readFd, &statBuf) != 0) {
+    close(readFd);
+    return false;
+  }
+
+  // Open the output file for writing, with the same permissions as the input
+  writeFd =
+    ::open(toFile.c_str(), O_WRONLY | O_CREAT | O_TRUNC, statBuf.st_mode);
+  if (writeFd == -1) {
+    close(readFd);
+    return false;
+  }
+
+  // Copy the file in a non-kernel specific way */
+  char fileBuf[8192];
+  ssize_t rBytes, wBytes;
+  while (true) {
+    rBytes = read(readFd, fileBuf, sizeof(fileBuf));
+
+    if (!rBytes) {
+      rc = true;
+      break;
+    }
+
+    if (rBytes < 0) {
+      rc = false;
+      break;
+    }
+
+    wBytes = write(writeFd, fileBuf, (size_t)rBytes);
+
+    if (!wBytes) {
+      rc = true;
+      break;
+    }
+
+    if (wBytes < 0) {
+      rc = false;
+      break;
+    }
+  }
+
+  /* Close up. */
+  close(readFd);
+  close(writeFd);
+  return rc;
+}
+
+static bool getFileInfoListRecursiveImpl(
+  const std::string &path,
+  pal::FileOp::FilenamePartsListType_t &filenamePartsList,
+  const bool ignoreDirs, size_t maxDepth) {
+  struct dirent **namelist = nullptr;
+  int entryCount = 0;
+
+  // Base case
+  if (maxDepth == 0) {
+    return true;
+  }
+
+#ifdef __ANDROID__
+  // android dirent.h has the wrong signature for alphasort so it had to be
+  // disabled or fixed
+  entryCount = scandir(path.c_str(), &namelist, 0, 0);
+#else
+  entryCount = scandir(path.c_str(), &namelist, 0, alphasort);
+#endif
+  if (entryCount < 0) {
+    return false;
+  } else {
+    while (entryCount--) {
+      const std::string dName(namelist[entryCount]->d_name);
+      free(namelist[entryCount]);
+
+      // skip current directory, prev directory and empty string
+      if (dName.empty() || dName == "." || dName == "..") {
+        continue;
+      }
+
+      std::string curPath = path;
+      curPath += pal::Path::getSeparator();
+      curPath += dName;
+
+      // recurse if directory but avoid symbolic links to directories
+      if (pal::FileOp::checkIsDir(curPath)) {
+        Stat_t sb;
+        if (lstat(curPath.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {
+          if (!getFileInfoListRecursiveImpl(curPath, filenamePartsList,
+                                            ignoreDirs, maxDepth - 1)) {
+            return false;
+          }
+        }
+
+        if (ignoreDirs) {
+          continue;
+        }
+
+        // Append training / to make this path look like a directory for
+        // getFileInfo()
+        if (curPath.back() != pal::Path::getSeparator()) {
+          curPath += pal::Path::getSeparator();
+        }
+      }
+
+      // add to vector
+      pal::FileOp::FilenamePartsType_t filenameParts;
+      if (pal::FileOp::getFileInfo(curPath, filenameParts)) {
+        filenamePartsList.push_back(filenameParts);
+      }
+    }
+
+    free(namelist);
+  }
+
+  return true;
+}
+
+//---------------------------------------------------------------------------
+//    pal::FileOp::getFileInfoList
+//---------------------------------------------------------------------------
+bool pal::FileOp::getFileInfoList(const std::string &path,
+                                  FilenamePartsListType_t &filenamePartsList) {
+  return getFileInfoListRecursiveImpl(path, filenamePartsList, false, 1);
+}
+
+//---------------------------------------------------------------------------
+//    pal::FileOp::getFileInfoListRecursive
+//---------------------------------------------------------------------------
+bool pal::FileOp::getFileInfoListRecursive(
+  const std::string &path, FilenamePartsListType_t &filenamePartsList,
+  const bool ignoreDirs) {
+  return getFileInfoListRecursiveImpl(path, filenamePartsList, ignoreDirs,
+                                      std::numeric_limits<size_t>::max());
+}
+
+//---------------------------------------------------------------------------
+//    pal::FileOp::getAbsolutePath
+//---------------------------------------------------------------------------
+std::string pal::FileOp::getAbsolutePath(const std::string &path) {
+  // NOTE: This implementation is broken currently when a path with
+  // non-existant components is passed! NEO-19723 was created to address.
+  char absPath[PATH_MAX + 1] = {0};
+
+  if (realpath(path.c_str(), absPath) == NULL) {
+    DEBUG_MSG("GetAbsolute path fail! Error code : %d", errno);
+    return std::string();
+  }
+  return std::string(absPath);
+}
+
+//---------------------------------------------------------------------------
+//    pal::FileOp::setCWD
+//---------------------------------------------------------------------------
+bool pal::FileOp::setCurrentWorkingDirectory(const std::string &workingDir) {
+  return chdir(workingDir.c_str()) == 0;
+}
+
+//---------------------------------------------------------------------------
+//    pal::FileOp::getDirectory
+//---------------------------------------------------------------------------
+std::string pal::FileOp::getDirectory(const std::string &file) {
+  std::string rc = file;
+  size_t offset = file.find_last_of(Path::getSeparator());
+  if (offset != std::string::npos) {
+    rc = file.substr(0, offset);
+  }
+  return rc;
+}
+
+//---------------------------------------------------------------------------
+//    pal::FileOp::getFileName
+//---------------------------------------------------------------------------
+std::string pal::FileOp::getFileName(const std::string &file) {
+  std::string rc = file;
+  size_t offset = file.find_last_of(Path::getSeparator());
+  if (offset != std::string::npos) {
+    rc = file.substr(offset + 1); // +1 to skip path separator
+  }
+  return rc;
+}
+
+//---------------------------------------------------------------------------
+//    pal::FileOp::hasFileExtension
+//---------------------------------------------------------------------------
+bool pal::FileOp::hasFileExtension(const std::string &file) {
+  FilenamePartsType_t parts;
+  getFileInfo(file, parts);
+
+  return !parts.extension.empty();
+}
+
+//---------------------------------------------------------------------------
+//    pal::FileOp::getCWD
+//---------------------------------------------------------------------------
+std::string pal::FileOp::getCurrentWorkingDirectory() {
+  char buffer[PATH_MAX + 1];
+  buffer[0] = '\0';
+
+  // If there is any failure return empty string. It is technically possible
+  // to handle paths exceeding PATH_MAX on some flavors of *nix but platforms
+  // like Android (Bionic) do no provide such capability. For consistency we
+  // will not handle extra long path names.
+  if (nullptr == getcwd(buffer, PATH_MAX)) {
+    return std::string();
+  } else {
+    return std::string(buffer);
+  }
+}
+
+//---------------------------------------------------------------------------
+//    pal::FileOp::partsToString
+//---------------------------------------------------------------------------
+std::string
+pal::FileOp::partsToString(const FilenamePartsType_t &filenameParts) {
+  std::string path;
+
+  if (!filenameParts.directory.empty()) {
+    path += filenameParts.directory;
+    path += Path::getSeparator();
+  }
+  if (!filenameParts.basename.empty()) {
+    path += filenameParts.basename;
+  }
+  if (!filenameParts.extension.empty()) {
+    path += ".";
+    path += filenameParts.extension;
+  }
+  return path;
+}
diff --git a/nntrainer/npu/qnn/PAL/src/linux/Path.cpp b/nntrainer/npu/qnn/PAL/src/linux/Path.cpp
new file mode 100644
index 000000000..bc40117d0
--- /dev/null
+++ b/nntrainer/npu/qnn/PAL/src/linux/Path.cpp
@@ -0,0 +1,48 @@
+//==============================================================================
+//
+//  Copyright (c) 2008-2014, 2015, 2020-2022 Qualcomm Technologies, Inc.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <stdlib.h>
+
+#include <sstream>
+#ifndef PATH_MAX
+#include <limits.h>
+#endif
+
+#include "PAL/FileOp.hpp"
+#include "PAL/Path.hpp"
+
+char pal::Path::getSeparator() { return '/'; }
+
+std::string pal::Path::combine(const std::string &s1, const std::string &s2) {
+  std::stringstream ss;
+  ss << s1;
+  if (s1.size() > 0 && s1[s1.size() - 1] != getSeparator()) {
+    ss << getSeparator();
+  }
+  ss << s2;
+  return ss.str();
+}
+
+std::string pal::Path::getDirectoryName(const std::string &path) {
+  std::string rc = path;
+  size_t index = path.find_last_of(pal::Path::getSeparator());
+  if (index != std::string::npos) {
+    rc = path.substr(0, index);
+  }
+  return rc;
+}
+
+std::string pal::Path::getAbsolute(const std::string &path) {
+  // Functionality was duplicated of function in FileOp
+  // Just call that function directly instead
+  return pal::FileOp::getAbsolutePath(path);
+}
+
+bool pal::Path::isAbsolute(const std::string &path) {
+  return path.size() > 0 && path[0] == getSeparator();
+}
diff --git a/nntrainer/npu/qnn/QNN.hpp b/nntrainer/npu/qnn/QNN.hpp
new file mode 100644
index 000000000..3f61030b1
--- /dev/null
+++ b/nntrainer/npu/qnn/QNN.hpp
@@ -0,0 +1,37 @@
+//==============================================================================
+//
+// Copyright (c) 2020-2023 Qualcomm Technologies, Inc.
+// All Rights Reserved.
+// Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#pragma once
+
+#include "QnnInterface.h"
+#include "System/QnnSystemInterface.h"
+#include "WrapperUtils/QnnWrapperUtils.hpp"
+
+namespace qnn {
+namespace tools {
+namespace sample_app {
+
+// Graph Related Function Handle Types
+typedef qnn_wrapper_api::ModelError_t (*ComposeGraphsFnHandleType_t)(
+  Qnn_BackendHandle_t, QNN_INTERFACE_VER_TYPE, Qnn_ContextHandle_t,
+  const qnn_wrapper_api::GraphConfigInfo_t **, const uint32_t,
+  qnn_wrapper_api::GraphInfo_t ***, uint32_t *, bool, QnnLog_Callback_t,
+  QnnLog_Level_t);
+typedef qnn_wrapper_api::ModelError_t (*FreeGraphInfoFnHandleType_t)(
+  qnn_wrapper_api::GraphInfo_t ***, uint32_t);
+
+typedef struct QnnFunctionPointers {
+  ComposeGraphsFnHandleType_t composeGraphsFnHandle;
+  FreeGraphInfoFnHandleType_t freeGraphInfoFnHandle;
+  QNN_INTERFACE_VER_TYPE qnnInterface;
+  QNN_SYSTEM_INTERFACE_VER_TYPE qnnSystemInterface;
+} QnnFunctionPointers;
+
+} // namespace sample_app
+} // namespace tools
+} // namespace qnn
diff --git a/nntrainer/npu/qnn/QnnTypeMacros.hpp b/nntrainer/npu/qnn/QnnTypeMacros.hpp
new file mode 100644
index 000000000..bcd561ac3
--- /dev/null
+++ b/nntrainer/npu/qnn/QnnTypeMacros.hpp
@@ -0,0 +1,546 @@
+//==============================================================================
+//
+//  Copyright (c) 2022 Qualcomm Technologies, Inc.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#pragma once
+
+#include <cstdlib>
+#include <cstring>
+#include <string>
+
+#include "QnnTypes.h"
+#include "WrapperUtils/QnnWrapperUtils.hpp"
+
+namespace qnn_wrapper_api {
+
+/**
+ * @brief Verifies the tensor object passed is of supported Qnn_Tensor_t API
+ * version
+ *
+ * @param[in] tensor Qnn_Tensor_t object to validate
+ *
+ * @return Error code
+ */
+inline ModelError_t validateTensorVersion(Qnn_Tensor_t tensor) {
+  if (tensor.version != QNN_TENSOR_VERSION_1) {
+    PRINT_ERROR(
+      "validateTensorVersion() tensor %s, got unsupported version %d.",
+      tensor.v1.name, tensor.version);
+    return MODEL_TENSOR_ERROR;
+  }
+  return MODEL_NO_ERROR;
+}
+
+/**
+ * @brief Verifies the tensor object passed is of supported Qnn_OpConfig_t API
+ * version
+ *
+ * @param[in] tensor Qnn_OpConfig_t object to validate
+ *
+ * @return Error code
+ */
+inline ModelError_t validateOpConfigVersion(Qnn_OpConfig_t opConfig) {
+  if (opConfig.version != QNN_OPCONFIG_VERSION_1) {
+    PRINT_ERROR("validateOpConfigVersion() op %s, got unsupported version %d.",
+                opConfig.v1.name, opConfig.version);
+    return MODEL_NODES_ERROR;
+  }
+  return MODEL_NO_ERROR;
+}
+
+inline const char *getQnnOpConfigName(const Qnn_OpConfig_t &opConfig) {
+  if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+    return opConfig.v1.name;
+  }
+  return nullptr;
+}
+
+inline const char *getQnnOpConfigName(const Qnn_OpConfig_t *opConfig) {
+  return getQnnOpConfigName(*opConfig);
+}
+
+inline const char *getQnnOpConfigPackageName(const Qnn_OpConfig_t &opConfig) {
+  if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+    return opConfig.v1.packageName;
+  }
+  return nullptr;
+}
+
+inline const char *getQnnOpConfigPackageName(const Qnn_OpConfig_t *opConfig) {
+  return getQnnOpConfigPackageName(*opConfig);
+}
+
+inline const char *getQnnOpConfigTypeName(const Qnn_OpConfig_t &opConfig) {
+  if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+    return opConfig.v1.typeName;
+  }
+  return nullptr;
+}
+
+inline const char *getQnnOpConfigTypeName(const Qnn_OpConfig_t *opConfig) {
+  return getQnnOpConfigTypeName(*opConfig);
+}
+
+inline uint32_t getQnnOpConfigNumParams(const Qnn_OpConfig_t &opConfig) {
+  if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+    return opConfig.v1.numOfParams;
+  }
+  return 0u;
+}
+
+inline uint32_t getQnnOpConfigNumParams(const Qnn_OpConfig_t *opConfig) {
+  return getQnnOpConfigNumParams(*opConfig);
+}
+
+inline const Qnn_Param_t *getQnnOpConfigParams(const Qnn_OpConfig_t &opConfig) {
+  if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+    return opConfig.v1.params;
+  }
+  return nullptr;
+}
+
+inline const Qnn_Param_t *getQnnOpConfigParams(const Qnn_OpConfig_t *opConfig) {
+  return getQnnOpConfigParams(*opConfig);
+}
+
+inline uint32_t getQnnOpConfigNumInputs(const Qnn_OpConfig_t &opConfig) {
+  if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+    return opConfig.v1.numOfInputs;
+  }
+  return 0u;
+}
+
+inline uint32_t getQnnOpConfigNumInputs(const Qnn_OpConfig_t *opConfig) {
+  return getQnnOpConfigNumInputs(*opConfig);
+}
+
+inline const Qnn_Tensor_t *
+getQnnOpConfigInputs(const Qnn_OpConfig_t &opConfig) {
+  if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+    return opConfig.v1.inputTensors;
+  }
+  return nullptr;
+}
+
+inline const Qnn_Tensor_t *
+getQnnOpConfigInputs(const Qnn_OpConfig_t *opConfig) {
+  return getQnnOpConfigInputs(*opConfig);
+}
+
+inline uint32_t getQnnOpConfigNumOutputs(const Qnn_OpConfig_t &opConfig) {
+  if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+    return opConfig.v1.numOfOutputs;
+  }
+  return 0u;
+}
+
+inline uint32_t getQnnOpConfigNumOutputs(const Qnn_OpConfig_t *opConfig) {
+  return getQnnOpConfigNumOutputs(*opConfig);
+}
+
+inline const Qnn_Tensor_t *
+getQnnOpConfigOutputs(const Qnn_OpConfig_t &opConfig) {
+  if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+    return opConfig.v1.outputTensors;
+  }
+  return nullptr;
+}
+
+inline const Qnn_Tensor_t *
+getQnnOpConfigOutputs(const Qnn_OpConfig_t *opConfig) {
+  return getQnnOpConfigOutputs(*opConfig);
+}
+
+inline void setQnnOpConfigName(Qnn_OpConfig_t &opConfig, const char *name) {
+  if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+    opConfig.v1.name = name;
+  }
+}
+
+inline void setQnnOpConfigName(Qnn_OpConfig_t *opConfig, const char *name) {
+  setQnnOpConfigName(*opConfig, name);
+}
+
+inline void setQnnOpConfigPackageName(Qnn_OpConfig_t &opConfig,
+                                      const char *packageName) {
+  if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+    opConfig.v1.packageName = packageName;
+  }
+}
+
+inline void setQnnOpConfigPackageName(Qnn_OpConfig_t *opConfig,
+                                      const char *packageName) {
+  setQnnOpConfigPackageName(*opConfig, packageName);
+}
+
+inline void setQnnOpConfigTypeName(Qnn_OpConfig_t &opConfig,
+                                   const char *typeName) {
+  if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+    opConfig.v1.typeName = typeName;
+  }
+}
+
+inline void setQnnOpConfigTypeName(Qnn_OpConfig_t *opConfig,
+                                   const char *typeName) {
+  setQnnOpConfigTypeName(*opConfig, typeName);
+}
+
+inline void setQnnOpConfigParams(Qnn_OpConfig_t &opConfig, uint32_t numOfParams,
+                                 Qnn_Param_t *params) {
+  if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+    opConfig.v1.numOfParams = numOfParams;
+    opConfig.v1.params = params;
+  }
+}
+
+inline void setQnnOpConfigParams(Qnn_OpConfig_t *opConfig, uint32_t numOfParams,
+                                 Qnn_Param_t *params) {
+  setQnnOpConfigParams(*opConfig, numOfParams, params);
+}
+
+inline void setQnnOpConfigInputs(Qnn_OpConfig_t &opConfig, uint32_t numOfInputs,
+                                 Qnn_Tensor_t *inputTensors) {
+  if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+    opConfig.v1.numOfInputs = numOfInputs;
+    opConfig.v1.inputTensors = inputTensors;
+  }
+}
+
+inline void setQnnOpConfigInputs(Qnn_OpConfig_t *opConfig, uint32_t numOfInputs,
+                                 Qnn_Tensor_t *inputTensors) {
+  setQnnOpConfigInputs(*opConfig, numOfInputs, inputTensors);
+}
+
+inline void setQnnOpConfigOutputs(Qnn_OpConfig_t &opConfig,
+                                  uint32_t numOfOutputs,
+                                  Qnn_Tensor_t *outputTensors) {
+  if (opConfig.version == QNN_OPCONFIG_VERSION_1) {
+    opConfig.v1.numOfOutputs = numOfOutputs;
+    opConfig.v1.outputTensors = outputTensors;
+  }
+}
+
+inline void setQnnOpConfigOutputs(Qnn_OpConfig_t *opConfig,
+                                  uint32_t numOfOutputs,
+                                  Qnn_Tensor_t *outputTensors) {
+  setQnnOpConfigOutputs(*opConfig, numOfOutputs, outputTensors);
+}
+
+// inline Qnn_OpConfig_t
+
+inline uint32_t getQnnTensorId(const Qnn_Tensor_t &tensor) {
+  if (tensor.version == QNN_TENSOR_VERSION_1) {
+    return tensor.v1.id;
+  }
+  return 0u;
+}
+
+inline uint32_t getQnnTensorId(const Qnn_Tensor_t *tensor) {
+  return getQnnTensorId(*tensor);
+}
+
+inline const char *getQnnTensorName(const Qnn_Tensor_t &tensor) {
+  if (tensor.version == QNN_TENSOR_VERSION_1) {
+    return tensor.v1.name;
+  }
+  return 0u;
+}
+
+inline const char *getQnnTensorName(const Qnn_Tensor_t *tensor) {
+  return getQnnTensorName(*tensor);
+}
+
+inline Qnn_TensorType_t getQnnTensorType(const Qnn_Tensor_t &tensor) {
+  if (tensor.version == QNN_TENSOR_VERSION_1) {
+    return tensor.v1.type;
+  }
+  return QNN_TENSOR_TYPE_UNDEFINED;
+}
+
+inline Qnn_TensorType_t getQnnTensorType(const Qnn_Tensor_t *tensor) {
+  return getQnnTensorType(*tensor);
+}
+
+inline Qnn_TensorDataFormat_t
+getQnnTensorDataFormat(const Qnn_Tensor_t &tensor) {
+  if (tensor.version == QNN_TENSOR_VERSION_1) {
+    return tensor.v1.dataFormat;
+  }
+  return QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER;
+}
+
+inline Qnn_TensorDataFormat_t
+getQnnTensorDataFormat(const Qnn_Tensor_t *tensor) {
+  return getQnnTensorDataFormat(*tensor);
+}
+
+inline Qnn_DataType_t getQnnTensorDataType(const Qnn_Tensor_t &tensor) {
+  if (tensor.version == QNN_TENSOR_VERSION_1) {
+    return tensor.v1.dataType;
+  }
+  return QNN_DATATYPE_UNDEFINED;
+}
+
+inline Qnn_DataType_t getQnnTensorDataType(const Qnn_Tensor_t *tensor) {
+  return getQnnTensorDataType(*tensor);
+}
+
+inline Qnn_QuantizeParams_t
+getQnnTensorQuantParams(const Qnn_Tensor_t &tensor) {
+  if (tensor.version == QNN_TENSOR_VERSION_1) {
+    return tensor.v1.quantizeParams;
+  }
+  return QNN_QUANTIZE_PARAMS_INIT;
+}
+
+inline Qnn_QuantizeParams_t
+getQnnTensorQuantParams(const Qnn_Tensor_t *tensor) {
+  return getQnnTensorQuantParams(*tensor);
+}
+
+inline uint32_t getQnnTensorRank(const Qnn_Tensor_t &tensor) {
+  if (tensor.version == QNN_TENSOR_VERSION_1) {
+    return tensor.v1.rank;
+  }
+  return 0u;
+}
+
+inline uint32_t getQnnTensorRank(const Qnn_Tensor_t *tensor) {
+  return getQnnTensorRank(*tensor);
+}
+
+inline uint32_t *getQnnTensorDimensions(const Qnn_Tensor_t &tensor) {
+  if (tensor.version == QNN_TENSOR_VERSION_1) {
+    return tensor.v1.dimensions;
+  }
+  return nullptr;
+}
+
+inline uint32_t *getQnnTensorDimensions(const Qnn_Tensor_t *tensor) {
+  return getQnnTensorDimensions(*tensor);
+}
+
+inline Qnn_TensorMemType_t getQnnTensorMemType(const Qnn_Tensor_t &tensor) {
+  if (tensor.version == QNN_TENSOR_VERSION_1) {
+    return tensor.v1.memType;
+  }
+  return QNN_TENSORMEMTYPE_UNDEFINED;
+}
+
+inline Qnn_TensorMemType_t getQnnTensorMemType(const Qnn_Tensor_t *tensor) {
+  return getQnnTensorMemType(*tensor);
+}
+
+inline Qnn_ClientBuffer_t getQnnTensorClientBuf(const Qnn_Tensor_t &tensor) {
+  if (tensor.version == QNN_TENSOR_VERSION_1) {
+    return tensor.v1.clientBuf;
+  }
+  return QNN_CLIENT_BUFFER_INIT;
+}
+
+inline Qnn_ClientBuffer_t getQnnTensorClientBuf(const Qnn_Tensor_t *tensor) {
+  return getQnnTensorClientBuf(*tensor);
+}
+
+inline Qnn_MemHandle_t getQnnTensorMemHandle(const Qnn_Tensor_t &tensor) {
+  if (tensor.version == QNN_TENSOR_VERSION_1) {
+    return tensor.v1.memHandle;
+  }
+  return nullptr;
+}
+
+inline Qnn_MemHandle_t getQnnTensorMemHandle(const Qnn_Tensor_t *tensor) {
+  return getQnnTensorMemHandle(*tensor);
+}
+
+inline void setQnnTensorId(Qnn_Tensor_t &tensor, uint32_t id) {
+  if (tensor.version == QNN_TENSOR_VERSION_1) {
+    tensor.v1.id = id;
+  }
+}
+
+inline void setQnnTensorId(Qnn_Tensor_t *tensor, uint32_t id) {
+  setQnnTensorId(*tensor, id);
+}
+
+inline void setQnnTensorName(Qnn_Tensor_t &tensor, const char *name) {
+  if (tensor.version == QNN_TENSOR_VERSION_1) {
+    tensor.v1.name = name;
+  }
+}
+
+inline void setQnnTensorName(Qnn_Tensor_t *tensor, const char *name) {
+  setQnnTensorName(*tensor, name);
+}
+
+inline void setQnnTensorType(Qnn_Tensor_t &tensor, Qnn_TensorType_t type) {
+  if (tensor.version == QNN_TENSOR_VERSION_1) {
+    tensor.v1.type = type;
+  }
+}
+
+inline void setQnnTensorType(Qnn_Tensor_t *tensor, Qnn_TensorType_t type) {
+  setQnnTensorType(*tensor, type);
+}
+
+inline void setQnnTensorDataFormat(Qnn_Tensor_t &tensor,
+                                   Qnn_TensorDataFormat_t format) {
+  if (tensor.version == QNN_TENSOR_VERSION_1) {
+    tensor.v1.dataFormat = format;
+  }
+}
+
+inline void setQnnTensorDataFormat(Qnn_Tensor_t *tensor,
+                                   Qnn_TensorDataFormat_t format) {
+  setQnnTensorDataFormat(*tensor, format);
+}
+
+inline void setQnnTensorDataType(Qnn_Tensor_t &tensor,
+                                 Qnn_DataType_t dataType) {
+  if (tensor.version == QNN_TENSOR_VERSION_1) {
+    tensor.v1.dataType = dataType;
+  }
+}
+
+inline void setQnnTensorDataType(Qnn_Tensor_t *tensor,
+                                 Qnn_DataType_t dataType) {
+  setQnnTensorDataType(*tensor, dataType);
+}
+
+inline void setQnnTensorQuantParams(Qnn_Tensor_t &tensor,
+                                    Qnn_QuantizeParams_t params) {
+  if (tensor.version == QNN_TENSOR_VERSION_1) {
+    tensor.v1.quantizeParams = params;
+  }
+}
+
+inline void setQnnTensorQuantParams(Qnn_Tensor_t *tensor,
+                                    Qnn_QuantizeParams_t params) {
+  setQnnTensorQuantParams(*tensor, params);
+}
+
+inline void setQnnTensorRank(Qnn_Tensor_t &tensor, uint32_t rank) {
+  if (tensor.version == QNN_TENSOR_VERSION_1) {
+    tensor.v1.rank = rank;
+  }
+}
+
+inline void setQnnTensorRank(Qnn_Tensor_t *tensor, uint32_t rank) {
+  setQnnTensorRank(*tensor, rank);
+}
+
+inline void setQnnTensorDimensions(Qnn_Tensor_t &tensor, uint32_t *dims) {
+  if (tensor.version == QNN_TENSOR_VERSION_1) {
+    tensor.v1.dimensions = dims;
+  }
+}
+
+inline void setQnnTensorDimensions(Qnn_Tensor_t *tensor, uint32_t *dims) {
+  setQnnTensorDimensions(*tensor, dims);
+}
+
+inline void setQnnTensorMemType(Qnn_Tensor_t &tensor,
+                                Qnn_TensorMemType_t memType) {
+  if (tensor.version == QNN_TENSOR_VERSION_1) {
+    tensor.v1.memType = memType;
+  }
+}
+
+inline void setQnnTensorMemType(Qnn_Tensor_t *tensor,
+                                Qnn_TensorMemType_t memType) {
+  setQnnTensorMemType(*tensor, memType);
+}
+
+inline void setQnnTensorClientBuf(Qnn_Tensor_t &tensor,
+                                  Qnn_ClientBuffer_t clientBuf) {
+  if (tensor.version == QNN_TENSOR_VERSION_1) {
+    tensor.v1.clientBuf = clientBuf;
+  }
+}
+
+inline void setQnnTensorClientBuf(Qnn_Tensor_t *tensor,
+                                  Qnn_ClientBuffer_t clientBuf) {
+  setQnnTensorClientBuf(*tensor, clientBuf);
+}
+
+inline void setQnnTensorMemHandle(Qnn_Tensor_t &tensor,
+                                  Qnn_MemHandle_t handle) {
+  if (tensor.version == QNN_TENSOR_VERSION_1) {
+    tensor.v1.memHandle = handle;
+  }
+}
+
+inline void setQnnTensorMemHandle(Qnn_Tensor_t *tensor,
+                                  Qnn_MemHandle_t handle) {
+  setQnnTensorMemHandle(*tensor, handle);
+}
+
+// Validation
+#define VALIDATE_TENSOR_VERSION(tensor, err) \
+  VALIDATE(validateTensorVersion(tensor), err)
+#define VALIDATE_OP_CONFIG_VERSION(op, err) \
+  VALIDATE(validateOpConfigVersion(op), err)
+
+// Accessors for QNN Op Config
+#define QNN_OP_CFG_GET_NAME(opConfig) getQnnOpConfigName(opConfig)
+#define QNN_OP_CFG_GET_PACKAGE_NAME(opConfig) \
+  getQnnOpConfigPackageName(opConfig)
+#define QNN_OP_CFG_GET_TYPE_NAME(opConfig) getQnnOpConfigTypeName(opConfig)
+#define QNN_OP_CFG_GET_NUM_PARAMS(opConfig) getQnnOpConfigNumParams(opConfig)
+#define QNN_OP_CFG_GET_PARAMS(opConfig) getQnnOpConfigParams(opConfig)
+#define QNN_OP_CFG_GET_NUM_INPUTS(opConfig) getQnnOpConfigNumInputs(opConfig)
+#define QNN_OP_CFG_GET_INPUTS(opConfig) getQnnOpConfigInputs(opConfig)
+#define QNN_OP_CFG_GET_NUM_OUTPUTS(opConfig) getQnnOpConfigNumOutputs(opConfig)
+#define QNN_OP_CFG_GET_OUTPUTS(opConfig) getQnnOpConfigOutputs(opConfig)
+
+// Modifiers for QNN Op Config
+#define QNN_OP_CFG_SET_NAME(opConfig, value) setQnnOpConfigName(opConfig, value)
+#define QNN_OP_CFG_SET_PACKAGE_NAME(opConfig, value) \
+  setQnnOpConfigPackageName(opConfig, value)
+#define QNN_OP_CFG_SET_TYPE_NAME(opConfig, value) \
+  setQnnOpConfigTypeName(opConfig, value)
+#define QNN_OP_CFG_SET_PARAMS(opConfig, numOfParams, params) \
+  setQnnOpConfigParams(opConfig, numOfParams, params)
+#define QNN_OP_CFG_SET_INPUTS(opConfig, numOfInputs, inputTensors) \
+  setQnnOpConfigInputs(opConfig, numOfInputs, inputTensors)
+#define QNN_OP_CFG_SET_OUTPUTS(opConfig, numOfOutputs, outputTensors) \
+  setQnnOpConfigOutputs(opConfig, numOfOutputs, outputTensors)
+
+// Accessors for QNN Tensor
+#define QNN_TENSOR_GET_ID(tensor) getQnnTensorId(tensor)
+#define QNN_TENSOR_GET_NAME(tensor) getQnnTensorName(tensor)
+#define QNN_TENSOR_GET_TYPE(tensor) getQnnTensorType(tensor)
+#define QNN_TENSOR_GET_DATA_FORMAT(tensor) getQnnTensorDataFormat(tensor)
+#define QNN_TENSOR_GET_DATA_TYPE(tensor) getQnnTensorDataType(tensor)
+#define QNN_TENSOR_GET_QUANT_PARAMS(tensor) getQnnTensorQuantParams(tensor)
+#define QNN_TENSOR_GET_RANK(tensor) getQnnTensorRank(tensor)
+#define QNN_TENSOR_GET_DIMENSIONS(tensor) getQnnTensorDimensions(tensor)
+#define QNN_TENSOR_GET_MEM_TYPE(tensor) getQnnTensorMemType(tensor)
+#define QNN_TENSOR_GET_CLIENT_BUF(tensor) getQnnTensorClientBuf(tensor)
+#define QNN_TENSOR_GET_MEM_HANDLE(tensor) getQnnTensorMemHandle(tensor)
+
+// Modifiers for QNN Tensor
+#define QNN_TENSOR_SET_ID(tensor, value) setQnnTensorId(tensor, value)
+#define QNN_TENSOR_SET_NAME(tensor, value) setQnnTensorName(tensor, value)
+#define QNN_TENSOR_SET_TYPE(tensor, value) setQnnTensorType(tensor, value)
+#define QNN_TENSOR_SET_DATA_FORMAT(tensor, value) \
+  setQnnTensorDataFormat(tensor, value)
+#define QNN_TENSOR_SET_DATA_TYPE(tensor, value) \
+  setQnnTensorDataType(tensor, value)
+#define QNN_TENSOR_SET_QUANT_PARAMS(tensor, value) \
+  setQnnTensorQuantParams(tensor, value)
+#define QNN_TENSOR_SET_RANK(tensor, value) setQnnTensorRank(tensor, value)
+#define QNN_TENSOR_SET_DIMENSIONS(tensor, value) \
+  setQnnTensorDimensions(tensor, value)
+#define QNN_TENSOR_SET_MEM_TYPE(tensor, value) \
+  setQnnTensorMemType(tensor, value)
+#define QNN_TENSOR_SET_CLIENT_BUF(tensor, value) \
+  setQnnTensorClientBuf(tensor, value)
+#define QNN_TENSOR_SET_MEM_HANDLE(tensor, value) \
+  setQnnTensorMemHandle(tensor, value)
+
+} // namespace qnn_wrapper_api
diff --git a/nntrainer/npu/qnn/Utils/BuildId.hpp b/nntrainer/npu/qnn/Utils/BuildId.hpp
new file mode 100644
index 000000000..9e6e29152
--- /dev/null
+++ b/nntrainer/npu/qnn/Utils/BuildId.hpp
@@ -0,0 +1,19 @@
+//==============================================================================
+//
+//  Copyright (c) 2020 Qualcomm Technologies, Inc.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#pragma once
+
+namespace qnn {
+namespace tools {
+
+inline std::string getBuildId() {
+  return std::string("v2.16.4.231110151339_60331");
+}
+
+} // namespace tools
+} // namespace qnn
diff --git a/nntrainer/npu/qnn/Utils/DataUtil.cpp b/nntrainer/npu/qnn/Utils/DataUtil.cpp
new file mode 100644
index 000000000..885520dc6
--- /dev/null
+++ b/nntrainer/npu/qnn/Utils/DataUtil.cpp
@@ -0,0 +1,417 @@
+//==============================================================================
+//
+//  Copyright (c) 2019-2022 Qualcomm Technologies, Inc.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+#include <cmath>
+#include <fstream>
+#include <iostream>
+#include <numeric>
+#include <queue>
+
+#include "DataUtil.hpp"
+//#include "Log.h"
+#include "Logger.hpp"
+#include "PAL/Directory.hpp"
+#include "PAL/FileOp.hpp"
+#include "PAL/Path.hpp"
+
+#define __fp16 _Float16
+
+using namespace qnn;
+using namespace qnn::tools;
+
+std::tuple<datautil::StatusCode, size_t>
+datautil::getDataTypeSizeInBytes(Qnn_DataType_t dataType) {
+  if (g_dataTypeToSize.find(dataType) == g_dataTypeToSize.end()) {
+    // MLLM_LOG_ERROR_LEGACY("Invalid qnn data type provided");
+    return std::make_tuple(StatusCode::INVALID_DATA_TYPE, 0);
+  }
+  return std::make_tuple(StatusCode::SUCCESS,
+                         g_dataTypeToSize.find(dataType)->second);
+}
+
+size_t datautil::calculateElementCount(std::vector<size_t> dims) {
+  if (dims.size() == 0) {
+    return 0;
+  }
+  return std::accumulate(dims.begin(), dims.end(), 1,
+                         std::multiplies<size_t>());
+}
+
+std::tuple<datautil::StatusCode, size_t>
+datautil::calculateLength(std::vector<size_t> dims, Qnn_DataType_t dataType) {
+  if (dims.size() == 0) {
+    // MLLM_LOG_ERROR_LEGACY("dims.size() is zero");
+    return std::make_tuple(StatusCode::INVALID_DIMENSIONS, 0);
+  }
+  StatusCode returnStatus{StatusCode::SUCCESS};
+  size_t length{0};
+  std::tie(returnStatus, length) = getDataTypeSizeInBytes(dataType);
+  if (StatusCode::SUCCESS != returnStatus) {
+    return std::make_tuple(returnStatus, 0);
+  }
+  length *= calculateElementCount(dims);
+  return std::make_tuple(StatusCode::SUCCESS, length);
+}
+
+datautil::StatusCode datautil::readDataFromFile(std::string filePath,
+                                                std::vector<size_t> dims,
+                                                Qnn_DataType_t dataType,
+                                                uint8_t *buffer) {
+  if (nullptr == buffer) {
+    // MLLM_LOG_ERROR_LEGACY("buffer is nullptr");
+    return StatusCode::INVALID_BUFFER;
+  }
+  std::ifstream in(filePath, std::ifstream::binary);
+  if (!in) {
+    // MLLM_LOG_ERROR_LEGACY("Failed to open input file: %s", filePath.c_str());
+    return StatusCode::FILE_OPEN_FAIL;
+  }
+  in.seekg(0, in.end);
+  const size_t length = in.tellg();
+  in.seekg(0, in.beg);
+  StatusCode err{StatusCode::SUCCESS};
+  size_t l{0};
+  std::tie(err, l) = datautil::calculateLength(dims, dataType);
+  if (StatusCode::SUCCESS != err) {
+    return err;
+  }
+  if (length != l) {
+    // MLLM_LOG_ERROR_LEGACY("Input file %s: file size in bytes (%d), should be
+    // equal to: %d", filePath.c_str(), length, l);
+    return StatusCode::DATA_SIZE_MISMATCH;
+  }
+
+  if (!in.read(reinterpret_cast<char *>(buffer), length)) {
+    // MLLM_LOG_ERROR_LEGACY("Failed to read the contents of: %s",
+    // filePath.c_str());
+    return StatusCode::DATA_READ_FAIL;
+  }
+  return StatusCode::SUCCESS;
+}
+
+datautil::ReadBatchDataRetType_t datautil::readBatchDataAndUpdateQueue(
+  std::queue<std::string> &filePaths, std::vector<size_t> dims,
+  Qnn_DataType_t dataType, uint8_t *buffer) {
+  if (nullptr == buffer) {
+    // MLLM_LOG_ERROR_LEGACY("buffer is nullptr");
+    return std::make_tuple(StatusCode::INVALID_BUFFER, 0, 0);
+  }
+  StatusCode err{StatusCode::SUCCESS};
+  size_t l{0};
+  std::tie(err, l) = datautil::calculateLength(dims, dataType);
+  if (StatusCode::SUCCESS != err) {
+    return std::make_tuple(err, 0, 0);
+  }
+  size_t numInputsCopied = 0;
+  size_t numBatchSize = 0;
+  size_t totalLength = 0;
+  do {
+    if (filePaths.empty()) {
+      numBatchSize += (l - totalLength) / (totalLength / numBatchSize);
+      // pad the vector with zeros
+      memset(buffer + totalLength, 0, (l - totalLength) * sizeof(char));
+      totalLength = l;
+    } else {
+      std::ifstream in(filePaths.front(), std::ifstream::binary);
+      if (!in) {
+        // MLLM_LOG_ERROR_LEGACY("Failed to open input file: %s",
+        // filePaths.front().c_str());
+        return std::make_tuple(StatusCode::FILE_OPEN_FAIL, numInputsCopied,
+                               numBatchSize);
+      }
+      in.seekg(0, in.end);
+      const size_t length = in.tellg();
+      in.seekg(0, in.beg);
+      if ((l % length) != 0 || length > l || length == 0) {
+        // MLLM_LOG_ERROR_LEGACY("Input file %s: file size in bytes (%d), should
+        // be multiples of: %d",
+        //                       filePaths.front().c_str(),
+        //                       length,
+        //                       l);
+        return std::make_tuple(StatusCode::DATA_SIZE_MISMATCH, numInputsCopied,
+                               numBatchSize);
+      }
+      if (!in.read(
+            reinterpret_cast<char *>(buffer + (numInputsCopied * length)),
+            length)) {
+        // MLLM_LOG_ERROR_LEGACY("Failed to read the contents of: %s",
+        // filePaths.front().c_str());
+        return std::make_tuple(StatusCode::DATA_READ_FAIL, numInputsCopied,
+                               numBatchSize);
+      }
+      QNN_VERBOSE("Return from readDataFromFile()");
+      totalLength += length;
+      numInputsCopied += 1;
+      numBatchSize += 1;
+      filePaths.pop();
+    }
+  } while (totalLength < l);
+  return std::make_tuple(StatusCode::SUCCESS, numInputsCopied, numBatchSize);
+}
+
+std::tuple<datautil::StatusCode, size_t>
+datautil::getFileSize(std::string filePath) {
+  std::ifstream in(filePath, std::ifstream::binary);
+  if (!in) {
+    // MLLM_LOG_ERROR_LEGACY("Failed to open input file: %s", filePath.c_str());
+    return std::make_tuple(StatusCode::FILE_OPEN_FAIL, 0);
+  }
+  in.seekg(0, in.end);
+  const size_t length = in.tellg();
+  in.seekg(0, in.beg);
+  return std::make_tuple(StatusCode::SUCCESS, length);
+}
+
+datautil::StatusCode datautil::readBinaryFromFile(std::string filePath,
+                                                  uint8_t *buffer,
+                                                  size_t bufferSize) {
+  if (nullptr == buffer) {
+    // MLLM_LOG_ERROR_LEGACY("buffer is nullptr");
+    return StatusCode::INVALID_BUFFER;
+  }
+  std::ifstream in(filePath, std::ifstream::binary);
+  if (!in) {
+    // MLLM_LOG_ERROR_LEGACY("Failed to open input file: %s", filePath.c_str());
+    return StatusCode::FILE_OPEN_FAIL;
+  }
+  if (!in.read(reinterpret_cast<char *>(buffer), bufferSize)) {
+    // MLLM_LOG_ERROR_LEGACY("Failed to read the contents of: %s",
+    // filePath.c_str());
+    return StatusCode::DATA_READ_FAIL;
+  }
+  return StatusCode::SUCCESS;
+}
+
+datautil::StatusCode datautil::writeDataToFile(std::string fileDir,
+                                               std::string fileName,
+                                               std::vector<size_t> dims,
+                                               Qnn_DataType_t dataType,
+                                               uint8_t *buffer) {
+  if (nullptr == buffer) {
+    // MLLM_LOG_ERROR_LEGACY("buffer is nullptr");
+    return StatusCode::INVALID_BUFFER;
+  }
+  if (!pal::Directory::makePath(fileDir)) {
+    // MLLM_LOG_ERROR_LEGACY("Failed to create output directory: %s",
+    // fileDir.c_str());
+    return StatusCode::DIRECTORY_CREATE_FAIL;
+  }
+  const std::string outputPath(fileDir + pal::Path::getSeparator() + fileName);
+  std::ofstream os(outputPath, std::ofstream::binary);
+  if (!os) {
+    // MLLM_LOG_ERROR_LEGACY("Failed to open output file for writing: %s",
+    // outputPath.c_str());
+    return StatusCode::FILE_OPEN_FAIL;
+  }
+  StatusCode err{StatusCode::SUCCESS};
+  size_t length{0};
+  std::tie(err, length) = datautil::calculateLength(dims, dataType);
+  if (StatusCode::SUCCESS != err) {
+    return err;
+  }
+  for (size_t l = 0; l < length; l++) {
+    os.write(reinterpret_cast<char *>(&(*(buffer + l))), 1);
+  }
+  return StatusCode::SUCCESS;
+}
+
+datautil::StatusCode
+datautil::writeBatchDataToFile(std::vector<std::string> fileDirs,
+                               std::string fileName, std::vector<size_t> dims,
+                               Qnn_DataType_t dataType, uint8_t *buffer,
+                               const size_t batchSize) {
+  if (nullptr == buffer) {
+    // MLLM_LOG_ERROR_LEGACY("buffer is nullptr");
+    return StatusCode::INVALID_BUFFER;
+  }
+  StatusCode err{StatusCode::SUCCESS};
+  size_t length{0};
+  std::tie(err, length) = datautil::calculateLength(dims, dataType);
+  if (StatusCode::SUCCESS != err) {
+    return err;
+  }
+  auto outputSize = (length / batchSize);
+  for (size_t batchIndex = 0; batchIndex < fileDirs.size(); batchIndex++) {
+    std::string fileDir = fileDirs[batchIndex];
+    if (!pal::Directory::makePath(fileDir)) {
+      // MLLM_LOG_ERROR_LEGACY("Failed to create output directory: %s",
+      // fileDir.c_str());
+      return StatusCode::DIRECTORY_CREATE_FAIL;
+    }
+    const std::string outputPath(fileDir + pal::Path::getSeparator() +
+                                 fileName);
+    std::ofstream os(outputPath, std::ofstream::binary);
+    if (!os) {
+      // MLLM_LOG_ERROR_LEGACY("Failed to open output file for writing: %s",
+      // outputPath.c_str());
+      return StatusCode::FILE_OPEN_FAIL;
+    }
+    for (size_t l = 0; l < outputSize; l++) {
+      size_t bufferIndex = l + (batchIndex * outputSize);
+      os.write(reinterpret_cast<char *>(&(*(buffer + bufferIndex))), 1);
+    }
+  }
+  return StatusCode::SUCCESS;
+}
+
+datautil::StatusCode datautil::writeBinaryToFile(std::string fileDir,
+                                                 std::string fileName,
+                                                 uint8_t *buffer,
+                                                 size_t bufferSize) {
+  if (nullptr == buffer) {
+    // MLLM_LOG_ERROR_LEGACY("buffer is nullptr");
+    return StatusCode::INVALID_BUFFER;
+  }
+  if (!pal::Directory::makePath(fileDir)) {
+    // MLLM_LOG_ERROR_LEGACY("Failed to create output directory: %s",
+    // fileDir.c_str());
+    return StatusCode::DIRECTORY_CREATE_FAIL;
+  }
+  const std::string outputPath(fileDir + pal::Path::getSeparator() + fileName);
+  std::ofstream os(outputPath, std::ofstream::binary);
+  if (!os) {
+    // MLLM_LOG_ERROR_LEGACY("Failed to open output file for writing: %s",
+    // outputPath.c_str());
+    return StatusCode::FILE_OPEN_FAIL;
+  }
+  os.write(reinterpret_cast<char *>(buffer), bufferSize);
+  return StatusCode::SUCCESS;
+}
+
+template <typename T_QuantType>
+datautil::StatusCode datautil::floatToTfN(T_QuantType *out, float *in,
+                                          int32_t offset, float scale,
+                                          size_t numElements) {
+  static_assert(std::is_unsigned<T_QuantType>::value,
+                "floatToTfN supports unsigned only!");
+
+  if (nullptr == out || nullptr == in) {
+    // MLLM_LOG_ERROR_LEGACY("Received a nullptr");
+    return StatusCode::INVALID_BUFFER;
+  }
+
+  size_t dataTypeSizeInBytes = sizeof(T_QuantType);
+  size_t bitWidth = dataTypeSizeInBytes * g_bitsPerByte;
+  double trueBitWidthMax = pow(2, bitWidth) - 1;
+  double encodingMin = offset * scale;
+  double encodingMax = (trueBitWidthMax + offset) * scale;
+  double encodingRange = encodingMax - encodingMin;
+
+  for (size_t i = 0; i < numElements; ++i) {
+    int quantizedValue =
+      round(trueBitWidthMax * (in[i] - encodingMin) / encodingRange);
+    if (quantizedValue < 0)
+      quantizedValue = 0;
+    else if (quantizedValue > (int)trueBitWidthMax)
+      quantizedValue = (int)trueBitWidthMax;
+    out[i] = static_cast<T_QuantType>(quantizedValue);
+  }
+  return StatusCode::SUCCESS;
+}
+
+template datautil::StatusCode
+datautil::floatToTfN<uint8_t>(uint8_t *out, float *in, int32_t offset,
+                              float scale, size_t numElements);
+
+template datautil::StatusCode
+datautil::floatToTfN<uint16_t>(uint16_t *out, float *in, int32_t offset,
+                               float scale, size_t numElements);
+
+template <typename T_QuantType>
+datautil::StatusCode datautil::tfNToFloat(float *out, T_QuantType *in,
+                                          int32_t offset, float scale,
+                                          size_t numElements) {
+  static_assert(std::is_unsigned<T_QuantType>::value,
+                "tfNToFloat supports unsigned only!");
+
+  if (nullptr == out || nullptr == in) {
+    // MLLM_LOG_ERROR_LEGACY("Received a nullptr");
+    return StatusCode::INVALID_BUFFER;
+  }
+  for (size_t i = 0; i < numElements; i++) {
+    double quantizedValue = static_cast<double>(in[i]);
+    double offsetDouble = static_cast<double>(offset);
+    out[i] = static_cast<double>((quantizedValue + offsetDouble) * scale);
+  }
+  return StatusCode::SUCCESS;
+}
+
+template datautil::StatusCode
+datautil::tfNToFloat<uint8_t>(float *out, uint8_t *in, int32_t offset,
+                              float scale, size_t numElements);
+
+template datautil::StatusCode
+datautil::tfNToFloat<uint16_t>(float *out, uint16_t *in, int32_t offset,
+                               float scale, size_t numElements);
+
+template <typename T_QuantType>
+datautil::StatusCode datautil::castToFloat(float *out, T_QuantType *in,
+                                           size_t numElements) {
+  if (nullptr == out || nullptr == in) {
+    // MLLM_LOG_ERROR_LEGACY("Received a nullptr");
+    return StatusCode::INVALID_BUFFER;
+  }
+  for (size_t i = 0; i < numElements; i++) {
+    out[i] = static_cast<float>(in[i]);
+  }
+  return StatusCode::SUCCESS;
+}
+
+template datautil::StatusCode
+datautil::castToFloat<uint8_t>(float *out, uint8_t *in, size_t numElements);
+
+template datautil::StatusCode
+datautil::castToFloat<uint16_t>(float *out, uint16_t *in, size_t numElements);
+
+template datautil::StatusCode
+datautil::castToFloat<uint32_t>(float *out, uint32_t *in, size_t numElements);
+
+template datautil::StatusCode
+datautil::castToFloat<int8_t>(float *out, int8_t *in, size_t numElements);
+
+template datautil::StatusCode
+datautil::castToFloat<int16_t>(float *out, int16_t *in, size_t numElements);
+
+template datautil::StatusCode
+datautil::castToFloat<int32_t>(float *out, int32_t *in, size_t numElements);
+
+template datautil::StatusCode
+datautil::castToFloat<__fp16>(float *out, __fp16 *in, size_t numElements);
+
+template <typename T_QuantType>
+datautil::StatusCode datautil::castFromFloat(T_QuantType *out, float *in,
+                                             size_t numElements) {
+  if (nullptr == out || nullptr == in) {
+    // MLLM_LOG_ERROR_LEGACY("Received a nullptr");
+    return StatusCode::INVALID_BUFFER;
+  }
+  for (size_t i = 0; i < numElements; i++) {
+    out[i] = static_cast<T_QuantType>(in[i]);
+  }
+  return StatusCode::SUCCESS;
+}
+
+template datautil::StatusCode
+datautil::castFromFloat<uint8_t>(uint8_t *out, float *in, size_t numElements);
+
+template datautil::StatusCode
+datautil::castFromFloat<uint16_t>(uint16_t *out, float *in, size_t numElements);
+
+template datautil::StatusCode
+datautil::castFromFloat<uint32_t>(uint32_t *out, float *in, size_t numElements);
+
+template datautil::StatusCode
+datautil::castFromFloat<int8_t>(int8_t *out, float *in, size_t numElements);
+
+template datautil::StatusCode
+datautil::castFromFloat<int16_t>(int16_t *out, float *in, size_t numElements);
+
+template datautil::StatusCode
+datautil::castFromFloat<int32_t>(int32_t *out, float *in, size_t numElements);
+
+template datautil::StatusCode
+datautil::castFromFloat<__fp16>(__fp16 *out, float *in, size_t numElements);
diff --git a/nntrainer/npu/qnn/Utils/DataUtil.hpp b/nntrainer/npu/qnn/Utils/DataUtil.hpp
new file mode 100644
index 000000000..31ed12064
--- /dev/null
+++ b/nntrainer/npu/qnn/Utils/DataUtil.hpp
@@ -0,0 +1,108 @@
+//==============================================================================
+//
+//  Copyright (c) 2019-2023 Qualcomm Technologies, Inc.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+#pragma once
+
+#include <map>
+#include <queue>
+#include <vector>
+
+#include "QnnTypes.h"
+
+namespace qnn {
+namespace tools {
+namespace datautil {
+enum class StatusCode {
+  SUCCESS,
+  DATA_READ_FAIL,
+  DATA_WRITE_FAIL,
+  FILE_OPEN_FAIL,
+  DIRECTORY_CREATE_FAIL,
+  INVALID_DIMENSIONS,
+  INVALID_DATA_TYPE,
+  DATA_SIZE_MISMATCH,
+  INVALID_BUFFER,
+};
+
+const size_t g_bitsPerByte = 8;
+
+using ReadBatchDataRetType_t = std::tuple<StatusCode, size_t, size_t>;
+
+std::tuple<StatusCode, size_t> getDataTypeSizeInBytes(Qnn_DataType_t dataType);
+
+std::tuple<StatusCode, size_t> calculateLength(std::vector<size_t> dims,
+                                               Qnn_DataType_t dataType);
+
+size_t calculateElementCount(std::vector<size_t> dims);
+
+std::tuple<StatusCode, size_t> getFileSize(std::string filePath);
+
+StatusCode readDataFromFile(std::string filePath, std::vector<size_t> dims,
+                            Qnn_DataType_t dataType, uint8_t *buffer);
+
+/*
+ * Read data in batches from Queue and try to matches the model input's
+ * batches. If the queue is empty while matching the batch size of model,
+ * pad the remaining buffer with zeros
+ * @param filePathsQueue image paths queue
+ * @param dims model input dimensions
+ * @param dataType to create input buffer from file
+ * @param buffer to fill the input image data
+ *
+ * @return ReadBatchDataRetType_t returns numFilesCopied and batchSize along
+ * with status
+ */
+ReadBatchDataRetType_t
+readBatchDataAndUpdateQueue(std::queue<std::string> &filePaths,
+                            std::vector<size_t> dims, Qnn_DataType_t dataType,
+                            uint8_t *buffer);
+
+StatusCode readBinaryFromFile(std::string filePath, uint8_t *buffer,
+                              size_t bufferSize);
+
+StatusCode writeDataToFile(std::string fileDir, std::string fileName,
+                           std::vector<size_t> dims, Qnn_DataType_t dataType,
+                           uint8_t *buffer);
+
+StatusCode writeBatchDataToFile(std::vector<std::string> fileDirs,
+                                std::string fileName, std::vector<size_t> dims,
+                                Qnn_DataType_t dataType, uint8_t *buffer,
+                                const size_t batchSize);
+
+StatusCode writeBinaryToFile(std::string fileDir, std::string fileName,
+                             uint8_t *buffer, size_t bufferSize);
+
+template <typename T_QuantType>
+datautil::StatusCode floatToTfN(T_QuantType *out, float *in, int32_t offset,
+                                float scale, size_t numElements);
+
+template <typename T_QuantType>
+datautil::StatusCode tfNToFloat(float *out, T_QuantType *in, int32_t offset,
+                                float scale, size_t numElements);
+
+template <typename T_QuantType>
+datautil::StatusCode castToFloat(float *out, T_QuantType *in,
+                                 size_t numElements);
+
+template <typename T_QuantType>
+datautil::StatusCode castFromFloat(T_QuantType *out, float *in,
+                                   size_t numElements);
+
+const std::map<Qnn_DataType_t, size_t> g_dataTypeToSize = {
+  {QNN_DATATYPE_INT_8, 1},           {QNN_DATATYPE_INT_16, 2},
+  {QNN_DATATYPE_INT_32, 4},          {QNN_DATATYPE_INT_64, 8},
+  {QNN_DATATYPE_UINT_8, 1},          {QNN_DATATYPE_UINT_16, 2},
+  {QNN_DATATYPE_UINT_32, 4},         {QNN_DATATYPE_UINT_64, 8},
+  {QNN_DATATYPE_FLOAT_16, 2},        {QNN_DATATYPE_FLOAT_32, 4},
+  {QNN_DATATYPE_FLOAT_64, 8},        {QNN_DATATYPE_SFIXED_POINT_8, 1},
+  {QNN_DATATYPE_SFIXED_POINT_16, 2}, {QNN_DATATYPE_SFIXED_POINT_32, 4},
+  {QNN_DATATYPE_UFIXED_POINT_8, 1},  {QNN_DATATYPE_UFIXED_POINT_16, 2},
+  {QNN_DATATYPE_UFIXED_POINT_32, 4}, {QNN_DATATYPE_BOOL_8, 1},
+};
+} // namespace datautil
+} // namespace tools
+} // namespace qnn
diff --git a/nntrainer/npu/qnn/Utils/DynamicLoadUtil.cpp b/nntrainer/npu/qnn/Utils/DynamicLoadUtil.cpp
new file mode 100644
index 000000000..8dbab14c9
--- /dev/null
+++ b/nntrainer/npu/qnn/Utils/DynamicLoadUtil.cpp
@@ -0,0 +1,192 @@
+//==============================================================================
+//
+//  Copyright (c) 2019-2022 Qualcomm Technologies, Inc.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <iostream>
+
+#include "DynamicLoadUtil.hpp"
+//#include "Log.h"
+#include "Logger.hpp"
+#include "PAL/DynamicLoading.hpp"
+
+using namespace qnn;
+using namespace qnn::tools;
+
+typedef Qnn_ErrorHandle_t (*QnnInterfaceGetProvidersFn_t)(
+  const QnnInterface_t ***providerList, uint32_t *numProviders);
+
+typedef Qnn_ErrorHandle_t (*QnnSystemInterfaceGetProvidersFn_t)(
+  const QnnSystemInterface_t ***providerList, uint32_t *numProviders);
+
+template <class T>
+static inline T resolveSymbol(void *libHandle, const char *sym) {
+  T ptr = (T)pal::dynamicloading::dlSym(libHandle, sym);
+  if (ptr == nullptr) {
+    // MLLM_LOG_ERROR_LEGACY("Unable to access symbol [%s].
+    // pal::dynamicloading::dlError(): %s",
+    //                       sym,
+    //                       pal::dynamicloading::dlError());
+  }
+  return ptr;
+}
+
+dynamicloadutil::StatusCode dynamicloadutil::getQnnFunctionPointers(
+  std::string backendPath, std::string modelPath,
+  sample_app::QnnFunctionPointers *qnnFunctionPointers, void **backendHandleRtn,
+  bool loadModelLib, void **modelHandleRtn) {
+  void *libBackendHandle = pal::dynamicloading::dlOpen(
+    backendPath.c_str(),
+    pal::dynamicloading::DL_NOW | pal::dynamicloading::DL_GLOBAL);
+  if (nullptr == libBackendHandle) {
+    // MLLM_LOG_ERROR_LEGACY("Unable to load backend.
+    // pal::dynamicloading::dlError(): %s",
+    //                       pal::dynamicloading::dlError());
+    return StatusCode::FAIL_LOAD_BACKEND;
+  }
+  if (nullptr != backendHandleRtn) {
+    *backendHandleRtn = libBackendHandle;
+  }
+  // Get QNN Interface
+  QnnInterfaceGetProvidersFn_t getInterfaceProviders{nullptr};
+  getInterfaceProviders = resolveSymbol<QnnInterfaceGetProvidersFn_t>(
+    libBackendHandle, "QnnInterface_getProviders");
+  if (nullptr == getInterfaceProviders) {
+    return StatusCode::FAIL_SYM_FUNCTION;
+  }
+  QnnInterface_t **interfaceProviders{nullptr};
+  uint32_t numProviders{0};
+  if (QNN_SUCCESS !=
+      getInterfaceProviders((const QnnInterface_t ***)&interfaceProviders,
+                            &numProviders)) {
+    // MLLM_LOG_ERROR_LEGACY("Failed to get interface providers.");
+    return StatusCode::FAIL_GET_INTERFACE_PROVIDERS;
+  }
+  if (nullptr == interfaceProviders) {
+    // MLLM_LOG_ERROR_LEGACY("Failed to get interface providers: null interface
+    // providers received.");
+    return StatusCode::FAIL_GET_INTERFACE_PROVIDERS;
+  }
+  if (0 == numProviders) {
+    // MLLM_LOG_ERROR_LEGACY("Failed to get interface providers: 0 interface
+    // providers.");
+    return StatusCode::FAIL_GET_INTERFACE_PROVIDERS;
+  }
+  bool foundValidInterface{false};
+  for (size_t pIdx = 0; pIdx < numProviders; pIdx++) {
+    if (QNN_API_VERSION_MAJOR ==
+          interfaceProviders[pIdx]->apiVersion.coreApiVersion.major &&
+        QNN_API_VERSION_MINOR <=
+          interfaceProviders[pIdx]->apiVersion.coreApiVersion.minor) {
+      foundValidInterface = true;
+      qnnFunctionPointers->qnnInterface =
+        interfaceProviders[pIdx]->QNN_INTERFACE_VER_NAME;
+      break;
+    }
+  }
+  if (!foundValidInterface) {
+    // MLLM_LOG_ERROR_LEGACY("Unable to find a valid interface.");
+    libBackendHandle = nullptr;
+    return StatusCode::FAIL_GET_INTERFACE_PROVIDERS;
+  }
+
+  if (true == loadModelLib) {
+    QNN_INFO("Loading model shared library ([model].so)");
+    void *libModelHandle = pal::dynamicloading::dlOpen(
+      modelPath.c_str(),
+      pal::dynamicloading::DL_NOW | pal::dynamicloading::DL_LOCAL);
+    if (nullptr == libModelHandle) {
+      // MLLM_LOG_ERROR_LEGACY("Unable to load model.
+      // pal::dynamicloading::dlError(): %s", pal::dynamicloading::dlError());
+      return StatusCode::FAIL_LOAD_MODEL;
+    }
+    if (nullptr != modelHandleRtn) {
+      *modelHandleRtn = libModelHandle;
+    }
+
+    std::string modelPrepareFunc = "QnnModel_composeGraphs";
+    qnnFunctionPointers->composeGraphsFnHandle =
+      resolveSymbol<sample_app::ComposeGraphsFnHandleType_t>(
+        libModelHandle, modelPrepareFunc.c_str());
+    if (nullptr == qnnFunctionPointers->composeGraphsFnHandle) {
+      return StatusCode::FAIL_SYM_FUNCTION;
+    }
+
+    std::string modelFreeFunc = "QnnModel_freeGraphsInfo";
+    qnnFunctionPointers->freeGraphInfoFnHandle =
+      resolveSymbol<sample_app::FreeGraphInfoFnHandleType_t>(
+        libModelHandle, modelFreeFunc.c_str());
+    if (nullptr == qnnFunctionPointers->freeGraphInfoFnHandle) {
+      return StatusCode::FAIL_SYM_FUNCTION;
+    }
+  } else {
+    QNN_INFO("Model wasn't loaded from a shared library.");
+  }
+  return StatusCode::SUCCESS;
+}
+
+dynamicloadutil::StatusCode dynamicloadutil::getQnnSystemFunctionPointers(
+  std::string systemLibraryPath,
+  sample_app::QnnFunctionPointers *qnnFunctionPointers) {
+  QNN_FUNCTION_ENTRY_LOG;
+  if (!qnnFunctionPointers) {
+    // MLLM_LOG_ERROR_LEGACY("nullptr provided for qnnFunctionPointers");
+    return StatusCode::FAILURE;
+  }
+  void *systemLibraryHandle = pal::dynamicloading::dlOpen(
+    systemLibraryPath.c_str(),
+    pal::dynamicloading::DL_NOW | pal::dynamicloading::DL_LOCAL);
+  if (nullptr == systemLibraryHandle) {
+    // MLLM_LOG_ERROR_LEGACY("Unable to load system library.
+    // pal::dynamicloading::dlError(): %s",
+    //                       pal::dynamicloading::dlError());
+    return StatusCode::FAIL_LOAD_SYSTEM_LIB;
+  }
+  QnnSystemInterfaceGetProvidersFn_t getSystemInterfaceProviders{nullptr};
+  getSystemInterfaceProviders =
+    resolveSymbol<QnnSystemInterfaceGetProvidersFn_t>(
+      systemLibraryHandle, "QnnSystemInterface_getProviders");
+  if (nullptr == getSystemInterfaceProviders) {
+    return StatusCode::FAIL_SYM_FUNCTION;
+  }
+  QnnSystemInterface_t **systemInterfaceProviders{nullptr};
+  uint32_t numProviders{0};
+  if (QNN_SUCCESS !=
+      getSystemInterfaceProviders(
+        (const QnnSystemInterface_t ***)&systemInterfaceProviders,
+        &numProviders)) {
+    // MLLM_LOG_ERROR_LEGACY("Failed to get system interface providers.");
+    return StatusCode::FAIL_GET_INTERFACE_PROVIDERS;
+  }
+  if (nullptr == systemInterfaceProviders) {
+    // MLLM_LOG_ERROR_LEGACY("Failed to get system interface providers: null
+    // interface providers received.");
+    return StatusCode::FAIL_GET_INTERFACE_PROVIDERS;
+  }
+  if (0 == numProviders) {
+    // MLLM_LOG_ERROR_LEGACY("Failed to get interface providers: 0 interface
+    // providers.");
+    return StatusCode::FAIL_GET_INTERFACE_PROVIDERS;
+  }
+  bool foundValidSystemInterface{false};
+  for (size_t pIdx = 0; pIdx < numProviders; pIdx++) {
+    if (QNN_SYSTEM_API_VERSION_MAJOR ==
+          systemInterfaceProviders[pIdx]->systemApiVersion.major &&
+        QNN_SYSTEM_API_VERSION_MINOR <=
+          systemInterfaceProviders[pIdx]->systemApiVersion.minor) {
+      foundValidSystemInterface = true;
+      qnnFunctionPointers->qnnSystemInterface =
+        systemInterfaceProviders[pIdx]->QNN_SYSTEM_INTERFACE_VER_NAME;
+      break;
+    }
+  }
+  if (!foundValidSystemInterface) {
+    // MLLM_LOG_ERROR_LEGACY("Unable to find a valid system interface.");
+    return StatusCode::FAIL_GET_INTERFACE_PROVIDERS;
+  }
+  QNN_FUNCTION_EXIT_LOG;
+  return StatusCode::SUCCESS;
+}
diff --git a/nntrainer/npu/qnn/Utils/DynamicLoadUtil.hpp b/nntrainer/npu/qnn/Utils/DynamicLoadUtil.hpp
new file mode 100644
index 000000000..b8fd2493c
--- /dev/null
+++ b/nntrainer/npu/qnn/Utils/DynamicLoadUtil.hpp
@@ -0,0 +1,36 @@
+//==============================================================================
+//
+//  Copyright (c) 2019-2022 Qualcomm Technologies, Inc.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#pragma once
+
+#include "QNN.hpp"
+
+namespace qnn {
+namespace tools {
+namespace dynamicloadutil {
+enum class StatusCode {
+  SUCCESS,
+  FAILURE,
+  FAIL_LOAD_BACKEND,
+  FAIL_LOAD_MODEL,
+  FAIL_SYM_FUNCTION,
+  FAIL_GET_INTERFACE_PROVIDERS,
+  FAIL_LOAD_SYSTEM_LIB,
+};
+
+StatusCode
+getQnnFunctionPointers(std::string backendPath, std::string modelPath,
+                       sample_app::QnnFunctionPointers *qnnFunctionPointers,
+                       void **backendHandle, bool loadModelLib,
+                       void **modelHandleRtn);
+StatusCode getQnnSystemFunctionPointers(
+  std::string systemLibraryPath,
+  sample_app::QnnFunctionPointers *qnnFunctionPointers);
+} // namespace dynamicloadutil
+} // namespace tools
+} // namespace qnn
diff --git a/nntrainer/npu/qnn/Utils/IOTensor.cpp b/nntrainer/npu/qnn/Utils/IOTensor.cpp
new file mode 100644
index 000000000..49f346981
--- /dev/null
+++ b/nntrainer/npu/qnn/Utils/IOTensor.cpp
@@ -0,0 +1,972 @@
+//==============================================================================
+//
+//  Copyright (c) 2020-2022 Qualcomm Technologies, Inc.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+#include <algorithm>
+#include <cstring>
+#include <fstream>
+#include <iostream>
+#include <numeric>
+#include <queue>
+
+#include "DataUtil.hpp"
+#include "IOTensor.hpp"
+// #include "Log.h"
+#include "Logger.hpp"
+#include "PAL/Directory.hpp"
+#include "PAL/FileOp.hpp"
+#include "PAL/Path.hpp"
+#include "PAL/StringOp.hpp"
+#include "QnnTypeMacros.hpp"
+#include "QnnTypes.h"
+
+#define __fp16 _Float16
+
+using namespace qnn;
+using namespace qnn::tools;
+using namespace qnn_wrapper_api;
+
+// Helper method to read data from files to a buffer.
+iotensor::StatusCode iotensor::IOTensor::readDataAndAllocateBuffer(
+  std::queue<std::string> &filePaths, std::vector<size_t> dims,
+  Qnn_DataType_t dataType, uint8_t **bufferToCopy) {
+  StatusCode returnStatus = StatusCode::SUCCESS;
+  *bufferToCopy = nullptr;
+  returnStatus = allocateBuffer(bufferToCopy, dims, dataType);
+  if (StatusCode::SUCCESS == returnStatus) {
+    datautil::StatusCode status;
+    std::tie(status, m_numFilesPopulated, m_batchSize) =
+      datautil::readBatchDataAndUpdateQueue(
+        filePaths, dims, dataType, reinterpret_cast<uint8_t *>(*bufferToCopy));
+    if (datautil::StatusCode::SUCCESS != status) {
+      QNN_DEBUG("Failure in datautil::readBatchDataAndUpdateQueue");
+      returnStatus = StatusCode::FAILURE;
+    }
+  }
+  if (StatusCode::SUCCESS != returnStatus) {
+    if (nullptr != *bufferToCopy) {
+      free(*bufferToCopy);
+      *bufferToCopy = nullptr;
+    }
+  }
+  return returnStatus;
+}
+
+// Helper method to copy a float buffer, quantize it, and copy
+// it to a tensor (Qnn_Tensor_t) buffer.
+iotensor::StatusCode
+iotensor::IOTensor::copyFromFloatToNative(float *floatBuffer,
+                                          Qnn_Tensor_t *tensor) {
+  if (nullptr == floatBuffer || nullptr == tensor) {
+    // MLLM_LOG_ERROR_LEGACY("copyFromFloatToNative(): received a nullptr");
+    return StatusCode::FAILURE;
+  }
+
+  StatusCode returnStatus = StatusCode::SUCCESS;
+  std::vector<size_t> dims;
+  fillDims(dims, QNN_TENSOR_GET_DIMENSIONS(tensor),
+           QNN_TENSOR_GET_RANK(tensor));
+
+  switch (QNN_TENSOR_GET_DATA_TYPE(tensor)) {
+  case QNN_DATATYPE_UFIXED_POINT_8:
+    datautil::floatToTfN<uint8_t>(
+      static_cast<uint8_t *>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data),
+      floatBuffer,
+      QNN_TENSOR_GET_QUANT_PARAMS(tensor).scaleOffsetEncoding.offset,
+      QNN_TENSOR_GET_QUANT_PARAMS(tensor).scaleOffsetEncoding.scale,
+      datautil::calculateElementCount(dims));
+    break;
+
+  case QNN_DATATYPE_UFIXED_POINT_16:
+    datautil::floatToTfN<uint16_t>(
+      static_cast<uint16_t *>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data),
+      floatBuffer,
+      QNN_TENSOR_GET_QUANT_PARAMS(tensor).scaleOffsetEncoding.offset,
+      QNN_TENSOR_GET_QUANT_PARAMS(tensor).scaleOffsetEncoding.scale,
+      datautil::calculateElementCount(dims));
+    break;
+
+  case QNN_DATATYPE_FLOAT_16:
+    if (datautil::StatusCode::SUCCESS !=
+        datautil::castFromFloat<__fp16>(
+          static_cast<__fp16 *>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data),
+          floatBuffer, datautil::calculateElementCount(dims))) {
+      // MLLM_LOG_ERROR_LEGACY("failure in castFromFloat<__fp16>");
+      returnStatus = StatusCode::FAILURE;
+    }
+    break;
+
+  case QNN_DATATYPE_UINT_8:
+    if (datautil::StatusCode::SUCCESS !=
+        datautil::castFromFloat<uint8_t>(
+          static_cast<uint8_t *>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data),
+          floatBuffer, datautil::calculateElementCount(dims))) {
+      // MLLM_LOG_ERROR_LEGACY("failure in castFromFloat<uint8_t>");
+      returnStatus = StatusCode::FAILURE;
+    }
+    break;
+
+  case QNN_DATATYPE_UINT_16:
+    if (datautil::StatusCode::SUCCESS !=
+        datautil::castFromFloat<uint16_t>(
+          static_cast<uint16_t *>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data),
+          floatBuffer, datautil::calculateElementCount(dims))) {
+      // MLLM_LOG_ERROR_LEGACY("failure in castFromFloat<uint16_t>");
+      returnStatus = StatusCode::FAILURE;
+    }
+    break;
+
+  case QNN_DATATYPE_UINT_32:
+    if (datautil::StatusCode::SUCCESS !=
+        datautil::castFromFloat<uint32_t>(
+          static_cast<uint32_t *>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data),
+          floatBuffer, datautil::calculateElementCount(dims))) {
+      // MLLM_LOG_ERROR_LEGACY("failure in castFromFloat<uint32_t>");
+      returnStatus = StatusCode::FAILURE;
+    }
+    break;
+
+  case QNN_DATATYPE_INT_8:
+    if (datautil::StatusCode::SUCCESS !=
+        datautil::castFromFloat<int8_t>(
+          static_cast<int8_t *>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data),
+          floatBuffer, datautil::calculateElementCount(dims))) {
+      // MLLM_LOG_ERROR_LEGACY("failure in castFromFloat<int8_t>");
+      returnStatus = StatusCode::FAILURE;
+    }
+    break;
+
+  case QNN_DATATYPE_INT_16:
+    if (datautil::StatusCode::SUCCESS !=
+        datautil::castFromFloat<int16_t>(
+          static_cast<int16_t *>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data),
+          floatBuffer, datautil::calculateElementCount(dims))) {
+      // MLLM_LOG_ERROR_LEGACY("failure in castFromFloat<int16_t>");
+      returnStatus = StatusCode::FAILURE;
+    }
+    break;
+
+  case QNN_DATATYPE_INT_32:
+    if (datautil::StatusCode::SUCCESS !=
+        datautil::castFromFloat<int32_t>(
+          static_cast<int32_t *>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data),
+          floatBuffer, datautil::calculateElementCount(dims))) {
+      // MLLM_LOG_ERROR_LEGACY("failure in castFromFloat<int32_t>");
+      returnStatus = StatusCode::FAILURE;
+    }
+    break;
+
+  case QNN_DATATYPE_BOOL_8:
+    if (datautil::StatusCode::SUCCESS !=
+        datautil::castFromFloat<uint8_t>(
+          static_cast<uint8_t *>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data),
+          floatBuffer, datautil::calculateElementCount(dims))) {
+      // MLLM_LOG_ERROR_LEGACY("failure in castFromFloat<bool>");
+      returnStatus = StatusCode::FAILURE;
+    }
+    break;
+
+  default:
+    // MLLM_LOG_ERROR_LEGACY("Datatype not supported yet!");
+    returnStatus = StatusCode::FAILURE;
+    break;
+  }
+  return returnStatus;
+}
+
+// Helper method to populate an input tensor in the graph during execution.
+// It relies on reading data from files provided during app creation.
+iotensor::StatusCode
+iotensor::IOTensor::populateInputTensor(std::queue<std::string> &filePaths,
+                                        Qnn_Tensor_t *input,
+                                        iotensor::InputDataType inputDataType) {
+  if (nullptr == input) {
+    // MLLM_LOG_ERROR_LEGACY("input is nullptr");
+    return StatusCode::FAILURE;
+  }
+
+  auto returnStatus = StatusCode::SUCCESS;
+  std::vector<size_t> dims;
+  fillDims(dims, QNN_TENSOR_GET_DIMENSIONS(input), QNN_TENSOR_GET_RANK(input));
+
+  if (inputDataType == InputDataType::FLOAT &&
+      QNN_TENSOR_GET_DATA_TYPE(input) != QNN_DATATYPE_FLOAT_32) {
+    uint8_t *fileToBuffer = nullptr;
+    returnStatus = readDataAndAllocateBuffer(
+      filePaths, dims, QNN_DATATYPE_FLOAT_32, &fileToBuffer);
+    if (StatusCode::SUCCESS == returnStatus) {
+      QNN_DEBUG("readDataFromFileToBuffer successful");
+      returnStatus =
+        copyFromFloatToNative(reinterpret_cast<float *>(fileToBuffer), input);
+    }
+    if (nullptr != fileToBuffer) {
+      free(fileToBuffer);
+      fileToBuffer = nullptr;
+    }
+  } else {
+    datautil::StatusCode status;
+    std::tie(status, m_numFilesPopulated, m_batchSize) =
+      datautil::readBatchDataAndUpdateQueue(
+        filePaths, dims, QNN_TENSOR_GET_DATA_TYPE(input),
+        static_cast<uint8_t *>(QNN_TENSOR_GET_CLIENT_BUF(input).data));
+    if (datautil::StatusCode::SUCCESS != status) {
+      QNN_DEBUG("Failure in datautil::readBatchDataAndUpdateQueue");
+      returnStatus = StatusCode::FAILURE;
+    }
+  }
+  return returnStatus;
+}
+
+// Helper method to populate all input tensors during execution.
+iotensor::StatusCode iotensor::IOTensor::populateInputTensors(
+  uint32_t graphIdx, std::vector<std::queue<std::string>> &filePathsQueue,
+  Qnn_Tensor_t *inputs, qnn_wrapper_api::GraphInfo_t graphInfo,
+  iotensor::InputDataType inputDataType) {
+  QNN_DEBUG("populateInputTensors() graphIndx %d", graphIdx);
+  if (nullptr == inputs) {
+    // MLLM_LOG_ERROR_LEGACY("inputs is nullptr");
+    return StatusCode::FAILURE;
+  }
+  auto inputCount = graphInfo.numInputTensors;
+  if (filePathsQueue.size() != inputCount) {
+    // MLLM_LOG_ERROR_LEGACY(
+    //     "Incorrect amount of Input files for graphIdx: %d. Expected: %d, "
+    //     "received: %d",
+    //     graphIdx,
+    //     inputCount,
+    //     filePathsQueue.size());
+    return StatusCode::FAILURE;
+  }
+
+  for (size_t inputIdx = 0; inputIdx < inputCount; inputIdx++) {
+    if (StatusCode::SUCCESS != populateInputTensor(filePathsQueue[inputIdx],
+                                                   &(inputs[inputIdx]),
+                                                   inputDataType)) {
+      QNN_DEBUG("populateInputTensor() failure for input: %d", inputIdx);
+      return StatusCode::FAILURE;
+    }
+  }
+  return StatusCode::SUCCESS;
+}
+
+// Helper method to populate an input tensor in the graph during execution.
+// It relies on reading data from buffer provided during executeGraph() call.
+iotensor::StatusCode
+iotensor::IOTensor::populateInputTensor(uint8_t *buffer, Qnn_Tensor_t *input,
+                                        iotensor::InputDataType inputDataType) {
+  if (nullptr == input) {
+    // MLLM_LOG_ERROR_LEGACY("input is nullptr");
+    return StatusCode::FAILURE;
+  }
+  std::vector<size_t> dims;
+  fillDims(dims, QNN_TENSOR_GET_DIMENSIONS(input), QNN_TENSOR_GET_RANK(input));
+  if (inputDataType == InputDataType::FLOAT &&
+      QNN_TENSOR_GET_DATA_TYPE(input) != QNN_DATATYPE_FLOAT_32) {
+    QNN_DEBUG("Received FLOAT input, but model needs non-float input");
+    if (StatusCode::SUCCESS !=
+        copyFromFloatToNative(reinterpret_cast<float *>(buffer), input)) {
+      QNN_DEBUG("copyFromFloatToNative failure");
+      return StatusCode::FAILURE;
+    }
+  } else {
+    size_t length;
+    datautil::StatusCode returnStatus;
+    std::tie(returnStatus, length) =
+      datautil::calculateLength(dims, QNN_TENSOR_GET_DATA_TYPE(input));
+    if (datautil::StatusCode::SUCCESS != returnStatus) {
+      return StatusCode::FAILURE;
+    }
+    pal::StringOp::memscpy(
+      reinterpret_cast<uint8_t *>(QNN_TENSOR_GET_CLIENT_BUF(input).data),
+      length, buffer, length);
+  }
+  return StatusCode::SUCCESS;
+}
+
+// Helper method to populate all input tensors.
+iotensor::StatusCode iotensor::IOTensor::populateInputTensors(
+  uint32_t graphIdx, std::vector<uint8_t *> inputBuffers, Qnn_Tensor_t *inputs,
+  qnn_wrapper_api::GraphInfo_t graphInfo,
+  iotensor::InputDataType inputDataType) {
+  if (nullptr == inputs) {
+    // MLLM_LOG_ERROR_LEGACY("inputs is nullptr");
+    return StatusCode::FAILURE;
+  }
+  auto inputCount = graphInfo.numInputTensors;
+  if (inputBuffers.size() != inputCount) {
+    // MLLM_LOG_ERROR_LEGACY("Incorrect amount of Input Buffers for graphIdx:
+    // %d. Expected: %d, received: %d",
+    //                       graphIdx,
+    //                       inputCount,
+    //                       inputBuffers.size());
+    return StatusCode::FAILURE;
+  }
+  for (size_t inputIdx = 0; inputIdx < inputCount; inputIdx++) {
+    if (StatusCode::SUCCESS != populateInputTensor(inputBuffers[inputIdx],
+                                                   &(inputs[inputIdx]),
+                                                   inputDataType)) {
+      QNN_DEBUG("populateInputTensor() failure for input: %d", inputIdx);
+      return StatusCode::FAILURE;
+    }
+  }
+  return StatusCode::SUCCESS;
+}
+
+// Setup details for Qnn_Tensor_t for execution
+// based on information in Qnn_TensorWrapper_t provided by model.so.
+iotensor::StatusCode
+iotensor::IOTensor::setupTensors(Qnn_Tensor_t **tensors, uint32_t tensorCount,
+                                 Qnn_Tensor_t *tensorWrappers) {
+  if (nullptr == tensorWrappers) {
+    // MLLM_LOG_ERROR_LEGACY("tensorWrappers is nullptr");
+    return StatusCode::FAILURE;
+  }
+  if (0 == tensorCount) {
+    QNN_INFO("tensor count is 0. Nothing to setup.");
+    return StatusCode::SUCCESS;
+  }
+  auto returnStatus = StatusCode::SUCCESS;
+  *tensors = (Qnn_Tensor_t *)calloc(1, tensorCount * sizeof(Qnn_Tensor_t));
+  if (nullptr == *tensors) {
+    // MLLM_LOG_ERROR_LEGACY("mem alloc failed for *tensors");
+    returnStatus = StatusCode::FAILURE;
+    return returnStatus;
+  }
+  for (size_t tensorIdx = 0; tensorIdx < tensorCount; tensorIdx++) {
+    Qnn_Tensor_t wrapperTensor = tensorWrappers[tensorIdx];
+    std::vector<size_t> dims;
+    fillDims(dims, QNN_TENSOR_GET_DIMENSIONS(wrapperTensor),
+             QNN_TENSOR_GET_RANK(wrapperTensor));
+    if (StatusCode::SUCCESS == returnStatus) {
+      QNN_DEBUG("allocateBuffer successful");
+      (*tensors)[tensorIdx] = QNN_TENSOR_INIT;
+      returnStatus = (sample_app::deepCopyQnnTensorInfo(
+                        ((*tensors) + tensorIdx), &wrapperTensor) == true
+                        ? StatusCode::SUCCESS
+                        : StatusCode::FAILURE);
+    }
+    if (StatusCode::SUCCESS == returnStatus) {
+      QNN_DEBUG("deepCopyQnnTensorInfo successful");
+      QNN_TENSOR_SET_MEM_TYPE(((*tensors) + tensorIdx), QNN_TENSORMEMTYPE_RAW);
+    }
+    Qnn_ClientBuffer_t clientBuffer = QNN_CLIENT_BUFFER_INIT;
+    returnStatus =
+      allocateBuffer(reinterpret_cast<uint8_t **>(&clientBuffer.data), dims,
+                     QNN_TENSOR_GET_DATA_TYPE((*tensors) + tensorIdx));
+    datautil::StatusCode datautilStatus{datautil::StatusCode::SUCCESS};
+    size_t length{0};
+    std::tie(datautilStatus, length) = datautil::calculateLength(
+      dims, QNN_TENSOR_GET_DATA_TYPE((*tensors) + tensorIdx));
+    if (datautilStatus != datautil::StatusCode::SUCCESS) {
+      returnStatus = StatusCode::FAILURE;
+    }
+    clientBuffer.dataSize = length;
+    QNN_TENSOR_SET_CLIENT_BUF(((*tensors) + tensorIdx), clientBuffer);
+    if (StatusCode::SUCCESS != returnStatus) {
+      // MLLM_LOG_ERROR_LEGACY("Failure in setupTensors, cleaning up
+      // resources");
+      if (nullptr != (QNN_TENSOR_GET_CLIENT_BUF((*tensors) + tensorIdx)).data) {
+        free(QNN_TENSOR_GET_CLIENT_BUF((*tensors) + tensorIdx).data);
+      }
+      tearDownTensors(*tensors, tensorIdx);
+      *tensors = nullptr;
+      returnStatus = StatusCode::FAILURE;
+      // MLLM_LOG_ERROR_LEGACY("Failure in setupTensors, done cleaning up
+      // resources");
+      return returnStatus;
+    }
+  }
+  return returnStatus;
+}
+
+iotensor::StatusCode iotensor::IOTensor::setupTensorsNoCopy(
+  Qnn_Tensor_t **tensors, uint32_t tensorCount, Qnn_Tensor_t *tensorWrappers) {
+  if (nullptr == tensorWrappers) {
+    // MLLM_LOG_ERROR_LEGACY("tensorWrappers is nullptr");
+    return StatusCode::FAILURE;
+  }
+  if (0 == tensorCount) {
+    QNN_INFO("tensor count is 0. Nothing to setup.");
+    return StatusCode::SUCCESS;
+  }
+  auto returnStatus = StatusCode::SUCCESS;
+  *tensors = (Qnn_Tensor_t *)calloc(1, tensorCount * sizeof(Qnn_Tensor_t));
+  if (nullptr == *tensors) {
+    // MLLM_LOG_ERROR_LEGACY("mem alloc failed for *tensors");
+    returnStatus = StatusCode::FAILURE;
+    return returnStatus;
+  }
+  for (size_t tensorIdx = 0; tensorIdx < tensorCount; tensorIdx++) {
+    Qnn_Tensor_t wrapperTensor = tensorWrappers[tensorIdx];
+    std::vector<size_t> dims;
+    fillDims(dims, QNN_TENSOR_GET_DIMENSIONS(wrapperTensor),
+             QNN_TENSOR_GET_RANK(wrapperTensor));
+    if (StatusCode::SUCCESS == returnStatus) {
+      QNN_DEBUG("allocateBuffer successful");
+      (*tensors)[tensorIdx] = QNN_TENSOR_INIT;
+      returnStatus = (sample_app::deepCopyQnnTensorInfo(
+                        ((*tensors) + tensorIdx), &wrapperTensor) == true
+                        ? StatusCode::SUCCESS
+                        : StatusCode::FAILURE);
+    }
+    if (StatusCode::SUCCESS == returnStatus) {
+      QNN_DEBUG("deepCopyQnnTensorInfo successful");
+      QNN_TENSOR_SET_MEM_TYPE(((*tensors) + tensorIdx),
+                              QNN_TENSORMEMTYPE_MEMHANDLE);
+    }
+  }
+  return returnStatus;
+}
+
+// Setup details for all input and output tensors for graph execution.
+iotensor::StatusCode iotensor::IOTensor::setupInputAndOutputTensors(
+  Qnn_Tensor_t **inputs, Qnn_Tensor_t **outputs,
+  qnn_wrapper_api::GraphInfo_t graphInfo) {
+  auto returnStatus = StatusCode::SUCCESS;
+#ifdef QNN_ARM
+  if (StatusCode::SUCCESS != setupTensorsNoCopy(inputs,
+                                                graphInfo.numInputTensors,
+                                                (graphInfo.inputTensors))) {
+    // MLLM_LOG_ERROR_LEGACY("Failure in setting up input tensors");
+    returnStatus = StatusCode::FAILURE;
+  }
+  if (StatusCode::SUCCESS != setupTensorsNoCopy(outputs,
+                                                graphInfo.numOutputTensors,
+                                                (graphInfo.outputTensors))) {
+    // MLLM_LOG_ERROR_LEGACY("Failure in setting up output tensors");
+    returnStatus = StatusCode::FAILURE;
+  }
+#else
+  if (StatusCode::SUCCESS != setupTensors(inputs, graphInfo.numInputTensors,
+                                          (graphInfo.inputTensors))) {
+    // MLLM_LOG_ERROR_LEGACY("Failure in setting up input tensors");
+    returnStatus = StatusCode::FAILURE;
+  }
+  if (StatusCode::SUCCESS != setupTensors(outputs, graphInfo.numOutputTensors,
+                                          (graphInfo.outputTensors))) {
+    // MLLM_LOG_ERROR_LEGACY("Failure in setting up output tensors");
+    returnStatus = StatusCode::FAILURE;
+  }
+#endif
+  if (StatusCode::SUCCESS != returnStatus) {
+    // MLLM_LOG_ERROR_LEGACY("Failure in setupInputAndOutputTensors, cleaning up
+    // resources");
+    if (nullptr != *inputs) {
+      QNN_DEBUG("cleaning up input tensors");
+      tearDownTensors(*inputs, graphInfo.numInputTensors);
+      *inputs = nullptr;
+    }
+    if (nullptr != *outputs) {
+      QNN_DEBUG("cleaning up output tensors");
+      tearDownTensors(*outputs, graphInfo.numOutputTensors);
+      *outputs = nullptr;
+    }
+    // MLLM_LOG_ERROR_LEGACY("Failure in setupInputAndOutputTensors, done
+    // cleaning up resources");
+  }
+  return returnStatus;
+}
+
+// Clean up all tensors related data after execution.
+iotensor::StatusCode iotensor::IOTensor::tearDownTensors(Qnn_Tensor_t *tensors,
+                                                         uint32_t tensorCount) {
+  for (size_t tensorIdx = 0; tensorIdx < tensorCount; tensorIdx++) {
+    QNN_DEBUG("freeing resources for tensor: %d", tensorIdx);
+    if (nullptr != QNN_TENSOR_GET_DIMENSIONS(tensors[tensorIdx])) {
+      QNN_DEBUG("freeing dimensions");
+      free(QNN_TENSOR_GET_DIMENSIONS(tensors[tensorIdx]));
+    }
+    if (nullptr != QNN_TENSOR_GET_CLIENT_BUF(tensors[tensorIdx]).data) {
+      QNN_DEBUG("freeing clientBuf.data");
+      free(QNN_TENSOR_GET_CLIENT_BUF(tensors[tensorIdx]).data);
+    }
+  }
+  free(tensors);
+  return StatusCode::SUCCESS;
+}
+
+// Clean up all input and output tensors after execution.
+iotensor::StatusCode iotensor::IOTensor::tearDownInputAndOutputTensors(
+  Qnn_Tensor_t *inputs, Qnn_Tensor_t *outputs, size_t numInputTensors,
+  size_t numOutputTensors) {
+  if (nullptr != inputs) {
+    QNN_INFO("cleaning up resources for input tensors");
+    tearDownTensors(inputs, numInputTensors);
+    inputs = nullptr;
+  }
+  if (nullptr != outputs) {
+    QNN_INFO("cleaning up resources for output tensors");
+    tearDownTensors(outputs, numOutputTensors);
+    outputs = nullptr;
+  }
+  return StatusCode::SUCCESS;
+}
+
+// Helper method to allocate a buffer.
+iotensor::StatusCode
+iotensor::IOTensor::allocateBuffer(uint8_t **buffer, std::vector<size_t> dims,
+                                   Qnn_DataType_t dataType) {
+  size_t elementCount = datautil::calculateElementCount(dims);
+  auto returnStatus = StatusCode::SUCCESS;
+  switch (dataType) {
+  case QNN_DATATYPE_FLOAT_32:
+    QNN_DEBUG("allocating float buffer");
+    returnStatus =
+      allocateBuffer<float>(reinterpret_cast<float **>(buffer), elementCount);
+    break;
+
+  case QNN_DATATYPE_FLOAT_16:
+    QNN_DEBUG("allocating fp16 buffer");
+    returnStatus =
+      allocateBuffer<__fp16>(reinterpret_cast<__fp16 **>(buffer), elementCount);
+    break;
+
+  case QNN_DATATYPE_UINT_8:
+  case QNN_DATATYPE_UFIXED_POINT_8:
+    QNN_DEBUG("allocating uint8_t buffer");
+    returnStatus = allocateBuffer<uint8_t>(reinterpret_cast<uint8_t **>(buffer),
+                                           elementCount);
+    break;
+
+  case QNN_DATATYPE_UINT_16:
+  case QNN_DATATYPE_UFIXED_POINT_16:
+    QNN_DEBUG("allocating uint16_t buffer");
+    returnStatus = allocateBuffer<uint16_t>(
+      reinterpret_cast<uint16_t **>(buffer), elementCount);
+    break;
+
+  case QNN_DATATYPE_UINT_32:
+    QNN_DEBUG("allocating uint32_t buffer");
+    returnStatus = allocateBuffer<uint32_t>(
+      reinterpret_cast<uint32_t **>(buffer), elementCount);
+    break;
+
+  case QNN_DATATYPE_INT_8:
+    QNN_DEBUG("allocating int8_t buffer");
+    returnStatus =
+      allocateBuffer<int8_t>(reinterpret_cast<int8_t **>(buffer), elementCount);
+    break;
+
+  case QNN_DATATYPE_INT_16:
+    QNN_DEBUG("allocating int16_t buffer");
+    returnStatus = allocateBuffer<int16_t>(reinterpret_cast<int16_t **>(buffer),
+                                           elementCount);
+    break;
+
+  case QNN_DATATYPE_INT_32:
+    QNN_DEBUG("allocating int32_t buffer");
+    returnStatus = allocateBuffer<int32_t>(reinterpret_cast<int32_t **>(buffer),
+                                           elementCount);
+    break;
+
+  case QNN_DATATYPE_BOOL_8:
+    QNN_DEBUG("allocating bool buffer");
+    returnStatus = allocateBuffer<uint8_t>(reinterpret_cast<uint8_t **>(buffer),
+                                           elementCount);
+    break;
+
+  default:
+    // MLLM_LOG_ERROR_LEGACY("Datatype not supported yet!");
+    returnStatus = StatusCode::FAILURE;
+    break;
+  }
+  return returnStatus;
+}
+
+// Helper method to allocate a buffer.
+template <typename T>
+iotensor::StatusCode iotensor::IOTensor::allocateBuffer(T **buffer,
+                                                        size_t &elementCount) {
+  QNN_DEBUG("ElementCount: %d, sizeof(T): %d, total size: %d", elementCount,
+            sizeof(T), elementCount * sizeof(T));
+  *buffer = (T *)malloc(elementCount * sizeof(T));
+  if (nullptr == *buffer) {
+    // MLLM_LOG_ERROR_LEGACY("mem alloc failed for *buffer");
+    return StatusCode::FAILURE;
+  }
+  return StatusCode::SUCCESS;
+}
+
+// Convert data to float or de-quantization. This is used when
+// user requests for float output and the model produces
+// non-float output.
+iotensor::StatusCode iotensor::IOTensor::convertToFloat(float **out,
+                                                        Qnn_Tensor_t *tensor) {
+  if (nullptr == tensor) {
+    // MLLM_LOG_ERROR_LEGACY("tensors is nullptr");
+    return StatusCode::FAILURE;
+  }
+  std::vector<size_t> dims;
+  fillDims(dims, QNN_TENSOR_GET_DIMENSIONS(tensor),
+           QNN_TENSOR_GET_RANK(tensor));
+  auto returnStatus = StatusCode::SUCCESS;
+  size_t elementCount = datautil::calculateElementCount(dims);
+  returnStatus = allocateBuffer<float>(out, elementCount);
+  if (StatusCode::SUCCESS != returnStatus) {
+    // MLLM_LOG_ERROR_LEGACY("failure in allocateBuffer<float>");
+    return returnStatus;
+  }
+  switch (QNN_TENSOR_GET_DATA_TYPE(tensor)) {
+  case QNN_DATATYPE_UFIXED_POINT_8:
+    if (datautil::StatusCode::SUCCESS !=
+        datautil::tfNToFloat<uint8_t>(
+          *out,
+          reinterpret_cast<uint8_t *>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data),
+          QNN_TENSOR_GET_QUANT_PARAMS(tensor).scaleOffsetEncoding.offset,
+          QNN_TENSOR_GET_QUANT_PARAMS(tensor).scaleOffsetEncoding.scale,
+          elementCount)) {
+      // MLLM_LOG_ERROR_LEGACY("failure in tfNToFloat<uint8_t>");
+      returnStatus = StatusCode::FAILURE;
+    }
+    break;
+
+  case QNN_DATATYPE_UFIXED_POINT_16:
+    if (datautil::StatusCode::SUCCESS !=
+        datautil::tfNToFloat<uint16_t>(
+          *out,
+          reinterpret_cast<uint16_t *>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data),
+          QNN_TENSOR_GET_QUANT_PARAMS(tensor).scaleOffsetEncoding.offset,
+          QNN_TENSOR_GET_QUANT_PARAMS(tensor).scaleOffsetEncoding.scale,
+          elementCount)) {
+      // MLLM_LOG_ERROR_LEGACY("failure in tfNToFloat<uint8_t>");
+      returnStatus = StatusCode::FAILURE;
+    }
+    break;
+
+  case QNN_DATATYPE_FLOAT_16:
+    if (datautil::StatusCode::SUCCESS !=
+        datautil::castToFloat<__fp16>(
+          *out,
+          reinterpret_cast<__fp16 *>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data),
+          elementCount)) {
+      // MLLM_LOG_ERROR_LEGACY("failure in castToFloat<__fp16>");
+      returnStatus = StatusCode::FAILURE;
+    }
+    break;
+
+  case QNN_DATATYPE_UINT_8:
+    if (datautil::StatusCode::SUCCESS !=
+        datautil::castToFloat<uint8_t>(
+          *out,
+          reinterpret_cast<uint8_t *>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data),
+          elementCount)) {
+      // MLLM_LOG_ERROR_LEGACY("failure in castToFloat<uint8_t>");
+      returnStatus = StatusCode::FAILURE;
+    }
+    break;
+
+  case QNN_DATATYPE_UINT_16:
+    if (datautil::StatusCode::SUCCESS !=
+        datautil::castToFloat<uint16_t>(
+          *out,
+          reinterpret_cast<uint16_t *>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data),
+          elementCount)) {
+      // MLLM_LOG_ERROR_LEGACY("failure in castToFloat<uint16_t>");
+      returnStatus = StatusCode::FAILURE;
+    }
+    break;
+
+  case QNN_DATATYPE_UINT_32:
+    if (datautil::StatusCode::SUCCESS !=
+        datautil::castToFloat<uint32_t>(
+          *out,
+          reinterpret_cast<uint32_t *>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data),
+          elementCount)) {
+      // MLLM_LOG_ERROR_LEGACY("failure in castToFloat<uint32_t>");
+      returnStatus = StatusCode::FAILURE;
+    }
+    break;
+
+  case QNN_DATATYPE_INT_8:
+    if (datautil::StatusCode::SUCCESS !=
+        datautil::castToFloat<int8_t>(
+          *out,
+          reinterpret_cast<int8_t *>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data),
+          elementCount)) {
+      // MLLM_LOG_ERROR_LEGACY("failure in castToFloat<int8_t>");
+      returnStatus = StatusCode::FAILURE;
+    }
+    break;
+
+  case QNN_DATATYPE_INT_16:
+    if (datautil::StatusCode::SUCCESS !=
+        datautil::castToFloat<int16_t>(
+          *out,
+          reinterpret_cast<int16_t *>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data),
+          elementCount)) {
+      // MLLM_LOG_ERROR_LEGACY("failure in castToFloat<int16_t>");
+      returnStatus = StatusCode::FAILURE;
+    }
+    break;
+
+  case QNN_DATATYPE_INT_32:
+    if (datautil::StatusCode::SUCCESS !=
+        datautil::castToFloat<int32_t>(
+          *out,
+          reinterpret_cast<int32_t *>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data),
+          elementCount)) {
+      // MLLM_LOG_ERROR_LEGACY("failure in castToFloat<int32_t>");
+      returnStatus = StatusCode::FAILURE;
+    }
+    break;
+
+  case QNN_DATATYPE_BOOL_8:
+    if (datautil::StatusCode::SUCCESS !=
+        datautil::castToFloat<uint8_t>(
+          *out,
+          reinterpret_cast<uint8_t *>(QNN_TENSOR_GET_CLIENT_BUF(tensor).data),
+          elementCount)) {
+      // MLLM_LOG_ERROR_LEGACY("failure in castToFloat<bool>");
+      returnStatus = StatusCode::FAILURE;
+    }
+    break;
+
+  default:
+    // MLLM_LOG_ERROR_LEGACY("Datatype not supported yet!");
+    returnStatus = StatusCode::FAILURE;
+    break;
+  }
+  if (StatusCode::SUCCESS != returnStatus) {
+    QNN_DEBUG("freeing *out");
+    if (*out != nullptr) {
+      free(*out);
+      *out = nullptr;
+    }
+  }
+  return returnStatus;
+}
+
+// Helper method to convert Output tensors to float and write them
+// out to files.
+iotensor::StatusCode iotensor::IOTensor::convertAndWriteOutputTensorInFloat(
+  Qnn_Tensor_t *output, std::vector<std::string> outputPaths,
+  std::string fileName) {
+  if (nullptr == output) {
+    // MLLM_LOG_ERROR_LEGACY("output is nullptr");
+    return StatusCode::FAILURE;
+  }
+
+  auto returnStatus = StatusCode::SUCCESS;
+  std::vector<size_t> dims;
+  fillDims(dims, QNN_TENSOR_GET_DIMENSIONS(output),
+           QNN_TENSOR_GET_RANK(output));
+  float *floatBuffer = nullptr;
+  returnStatus = convertToFloat(&floatBuffer, output);
+  if (StatusCode::SUCCESS != returnStatus) {
+    // MLLM_LOG_ERROR_LEGACY("failure in convertToFloat");
+    return StatusCode::FAILURE;
+  }
+  uint8_t *bufferToWrite = reinterpret_cast<uint8_t *>(floatBuffer);
+  if (datautil::StatusCode::SUCCESS !=
+      datautil::writeBatchDataToFile(outputPaths, fileName, dims,
+                                     QNN_DATATYPE_FLOAT_32, bufferToWrite,
+                                     m_batchSize)) {
+    // MLLM_LOG_ERROR_LEGACY("failure in writeBatchDataToFile");
+    returnStatus = StatusCode::FAILURE;
+  }
+  if (nullptr != floatBuffer) {
+    QNN_DEBUG("freeing floatBuffer");
+    free(floatBuffer);
+    floatBuffer = nullptr;
+  }
+  return returnStatus;
+}
+
+// Helper method to write out output. There is no de-quantization here.
+// Just write output as is to files.
+iotensor::StatusCode
+iotensor::IOTensor::writeOutputTensor(Qnn_Tensor_t *output,
+                                      std::vector<std::string> outputPaths,
+                                      std::string fileName) {
+  if (nullptr == output) {
+    // MLLM_LOG_ERROR_LEGACY("output is nullptr");
+    return StatusCode::FAILURE;
+  }
+  auto returnStatus = StatusCode::SUCCESS;
+  std::vector<size_t> dims;
+  fillDims(dims, QNN_TENSOR_GET_DIMENSIONS(output),
+           QNN_TENSOR_GET_RANK(output));
+  uint8_t *bufferToWrite =
+    reinterpret_cast<uint8_t *>(QNN_TENSOR_GET_CLIENT_BUF(output).data);
+  if (datautil::StatusCode::SUCCESS !=
+      datautil::writeBatchDataToFile(outputPaths, fileName, dims,
+                                     QNN_TENSOR_GET_DATA_TYPE(output),
+                                     bufferToWrite, m_batchSize)) {
+    // MLLM_LOG_ERROR_LEGACY("failure in writeBatchDataToFile");
+    returnStatus = StatusCode::FAILURE;
+  }
+  return returnStatus;
+}
+
+// Helper method to write out output. There is no de-quantization here.
+// Just write output as is to files.
+iotensor::StatusCode
+iotensor::IOTensor::writeOutputTensor(Qnn_Tensor_t *output,
+                                      uint8_t *output_buffer) {
+  if (nullptr == output) {
+    // MLLM_LOG_ERROR_LEGACY("output is nullptr");
+    return StatusCode::FAILURE;
+  }
+  auto returnStatus = StatusCode::SUCCESS;
+  std::vector<size_t> dims;
+  fillDims(dims, QNN_TENSOR_GET_DIMENSIONS(output),
+           QNN_TENSOR_GET_RANK(output));
+  float *floatBuffer = nullptr;
+  returnStatus = convertToFloat(&floatBuffer, output);
+  if (StatusCode::SUCCESS != returnStatus) {
+    // MLLM_LOG_ERROR_LEGACY("failure in convertToFloat");
+    return StatusCode::FAILURE;
+  }
+  uint8_t *bufferToWrite = reinterpret_cast<uint8_t *>(floatBuffer);
+
+  datautil::StatusCode err{datautil::StatusCode::SUCCESS};
+  size_t length{0};
+  std::tie(err, length) =
+    datautil::calculateLength(dims, QNN_DATATYPE_FLOAT_32);
+  if (datautil::StatusCode::SUCCESS != err) {
+    return StatusCode::FAILURE;
+  }
+
+  memcpy(output_buffer, bufferToWrite, length);
+
+  return returnStatus;
+}
+
+// Write out all output tensors to files. If output_data_type is float,
+// then all outputs will be raw floats regardless of what the model outputs.
+// If the output_data_type is native, then output is written as produced by the
+// model. Also, for native option, a json with quantization parameters is
+// written out. If output_data_type is float_and_native, both above are done. If
+// the output in the graph is float, then output_data_type has no effect.
+iotensor::StatusCode iotensor::IOTensor::writeOutputTensors(
+  uint32_t graphIdx, size_t startIdx, char *graphName, Qnn_Tensor_t *outputs,
+  uint32_t numOutputs, iotensor::OutputDataType outputDatatype,
+  uint32_t graphsCount, std::string outputPath) {
+  if (nullptr == outputs) {
+    // MLLM_LOG_ERROR_LEGACY("Received nullptr");
+    return StatusCode::FAILURE;
+  }
+  if (graphsCount > 1) {
+    if (nullptr != graphName && strlen(graphName) > 0) {
+      outputPath += (pal::Path::getSeparator() + std::string(graphName));
+    } else {
+      outputPath += (pal::Path::getSeparator() + std::string("Graph_") +
+                     std::to_string(graphIdx));
+    }
+  }
+  auto returnStatus = StatusCode::SUCCESS;
+  std::vector<std::string> outputPaths;
+  for (size_t idx = 0; idx < m_numFilesPopulated; idx++) {
+    std::string output =
+      outputPath + (pal::Path::getSeparator() + std::string("Result_") +
+                    std::to_string(startIdx + idx));
+    outputPaths.push_back(output);
+  }
+  for (size_t outputIdx = 0; outputIdx < numOutputs; outputIdx++) {
+    QNN_DEBUG("Writing output for outputIdx: %d", outputIdx);
+    std::string outputFilePrefix;
+    if (nullptr != QNN_TENSOR_GET_NAME(outputs[outputIdx]) &&
+        strlen(QNN_TENSOR_GET_NAME(outputs[outputIdx])) > 0) {
+      outputFilePrefix = std::string(QNN_TENSOR_GET_NAME(outputs[outputIdx]));
+    } else {
+      outputFilePrefix = std::string("Output_") + std::to_string(outputIdx);
+    }
+    auto outputFile = outputFilePrefix + std::string(".raw");
+    auto outputFileNative = outputFilePrefix + std::string("_native.raw");
+    if (QNN_TENSOR_GET_DATA_TYPE(outputs[outputIdx]) == QNN_DATATYPE_FLOAT_32) {
+      QNN_DEBUG("Writing in output->dataType == QNN_DATATYPE_FLOAT_32");
+      returnStatus =
+        writeOutputTensor(&(outputs[outputIdx]), outputPaths, outputFile);
+    } else if (outputDatatype == OutputDataType::FLOAT_ONLY) {
+      QNN_DEBUG("Writing in output->dataType == OutputDataType::FLOAT_ONLY");
+      returnStatus = convertAndWriteOutputTensorInFloat(
+        &(outputs[outputIdx]), outputPaths, outputFile);
+    } else if (outputDatatype == OutputDataType::NATIVE_ONLY) {
+      QNN_DEBUG("Writing in output->dataType == OutputDataType::NATIVE_ONLY");
+      returnStatus =
+        writeOutputTensor(&(outputs[outputIdx]), outputPaths, outputFileNative);
+    } else if (outputDatatype == OutputDataType::FLOAT_AND_NATIVE) {
+      QNN_DEBUG(
+        "Writing in output->dataType == OutputDataType::FLOAT_AND_NATIVE");
+      returnStatus = convertAndWriteOutputTensorInFloat(
+        &(outputs[outputIdx]), outputPaths, outputFile);
+      if (StatusCode::SUCCESS == returnStatus) {
+        returnStatus = writeOutputTensor(&(outputs[outputIdx]), outputPaths,
+                                         outputFileNative);
+      }
+    }
+  }
+  return returnStatus;
+}
+
+// Helper method to allocate a buffer and copy data to it.
+iotensor::StatusCode
+iotensor::IOTensor::allocateAndCopyBuffer(uint8_t **buffer,
+                                          Qnn_Tensor_t *tensor) {
+  if (nullptr == tensor) {
+    return StatusCode::FAILURE;
+  }
+  std::vector<size_t> dims;
+  fillDims(dims, QNN_TENSOR_GET_DIMENSIONS(tensor),
+           QNN_TENSOR_GET_RANK(tensor));
+  datautil::StatusCode datautilStatus;
+  size_t length;
+  std::tie(datautilStatus, length) =
+    datautil::calculateLength(dims, QNN_TENSOR_GET_DATA_TYPE(tensor));
+  if (datautilStatus != datautil::StatusCode::SUCCESS) {
+    return StatusCode::FAILURE;
+  }
+  if (StatusCode::SUCCESS !=
+      allocateBuffer(buffer, dims, QNN_TENSOR_GET_DATA_TYPE(tensor))) {
+    // MLLM_LOG_ERROR_LEGACY("failure in allocateBuffer");
+    return StatusCode::FAILURE;
+  }
+  pal::StringOp::memscpy(*buffer, length * sizeof(uint8_t),
+                         QNN_TENSOR_GET_CLIENT_BUF(tensor).data,
+                         length * sizeof(uint8_t));
+  return StatusCode::SUCCESS;
+}
+
+iotensor::StatusCode iotensor::IOTensor::fillDims(std::vector<size_t> &dims,
+                                                  uint32_t *inDimensions,
+                                                  uint32_t rank) {
+  if (nullptr == inDimensions) {
+    // MLLM_LOG_ERROR_LEGACY("input dimensions is nullptr");
+    return StatusCode::FAILURE;
+  }
+  for (size_t r = 0; r < rank; r++) {
+    dims.push_back(inDimensions[r]);
+  }
+  return StatusCode::SUCCESS;
+}
+
+iotensor::OutputDataType
+iotensor::parseOutputDataType(std::string dataTypeString) {
+  std::transform(dataTypeString.begin(), dataTypeString.end(),
+                 dataTypeString.begin(), ::tolower);
+  OutputDataType parsedDataType = OutputDataType::INVALID;
+  if (dataTypeString == "float_only") {
+    parsedDataType = OutputDataType::FLOAT_ONLY;
+  } else if (dataTypeString == "native_only") {
+    parsedDataType = OutputDataType::NATIVE_ONLY;
+  } else if (dataTypeString == "float_and_native") {
+    parsedDataType = OutputDataType::FLOAT_AND_NATIVE;
+  }
+  return parsedDataType;
+}
+
+iotensor::InputDataType
+iotensor::parseInputDataType(std::string dataTypeString) {
+  std::transform(dataTypeString.begin(), dataTypeString.end(),
+                 dataTypeString.begin(), ::tolower);
+  InputDataType parsedDataType = InputDataType::INVALID;
+  if (dataTypeString == "float") {
+    parsedDataType = InputDataType::FLOAT;
+  } else if (dataTypeString == "native") {
+    parsedDataType = InputDataType::NATIVE;
+  }
+  return parsedDataType;
+}
diff --git a/nntrainer/npu/qnn/Utils/IOTensor.hpp b/nntrainer/npu/qnn/Utils/IOTensor.hpp
new file mode 100644
index 000000000..7d07f28ea
--- /dev/null
+++ b/nntrainer/npu/qnn/Utils/IOTensor.hpp
@@ -0,0 +1,122 @@
+//==============================================================================
+//
+//  Copyright (c) 2020, 2022 Qualcomm Technologies, Inc.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+#pragma once
+
+#include <memory>
+#include <queue>
+
+#include "QnnBackend.h"
+#include "QnnCommon.h"
+#include "QnnContext.h"
+#include "QnnGraph.h"
+#include "QnnProperty.h"
+#include "QnnSampleAppUtils.hpp"
+#include "QnnTensor.h"
+#include "QnnTypes.h"
+#include "WrapperUtils/QnnWrapperUtils.hpp"
+
+namespace qnn {
+namespace tools {
+namespace iotensor {
+
+enum class StatusCode { SUCCESS, FAILURE };
+enum class OutputDataType {
+  FLOAT_ONLY,
+  NATIVE_ONLY,
+  FLOAT_AND_NATIVE,
+  INVALID
+};
+enum class InputDataType { FLOAT, NATIVE, INVALID };
+
+OutputDataType parseOutputDataType(std::string dataTypeString);
+InputDataType parseInputDataType(std::string dataTypeString);
+
+class IOTensor {
+public:
+  IOTensor() : m_batchSize(1), m_numFilesPopulated(0) {}
+
+  StatusCode setupInputAndOutputTensors(Qnn_Tensor_t **inputs,
+                                        Qnn_Tensor_t **outputs,
+                                        qnn_wrapper_api::GraphInfo_t graphInfo);
+
+  StatusCode writeOutputTensors(uint32_t graphIdx, size_t startIdx,
+                                char *graphName, Qnn_Tensor_t *outputs,
+                                uint32_t numOutputs,
+                                OutputDataType outputDatatype,
+                                uint32_t graphsCount, std::string outputPath);
+
+  StatusCode populateInputTensors(
+    uint32_t graphIdx, std::vector<std::queue<std::string>> &filePathsQueue,
+    Qnn_Tensor_t *inputs, qnn_wrapper_api::GraphInfo_t graphInfo,
+    iotensor::InputDataType inputDataType);
+
+  StatusCode populateInputTensors(uint32_t graphIdx,
+                                  std::vector<uint8_t *> inputBuffers,
+                                  Qnn_Tensor_t *inputs,
+                                  qnn_wrapper_api::GraphInfo_t graphInfo,
+                                  InputDataType inputDataType);
+
+  StatusCode tearDownInputAndOutputTensors(Qnn_Tensor_t *inputs,
+                                           Qnn_Tensor_t *outputs,
+                                           size_t numInputTensors,
+                                           size_t numOutputTensors);
+
+  StatusCode writeOutputTensor(Qnn_Tensor_t *output, uint8_t *output_buffer);
+
+private:
+  size_t m_batchSize;
+  size_t m_numFilesPopulated;
+
+  StatusCode populateInputTensor(std::queue<std::string> &filePaths,
+                                 Qnn_Tensor_t *input,
+                                 InputDataType inputDataType);
+
+  StatusCode populateInputTensor(uint8_t *buffer, Qnn_Tensor_t *input,
+                                 InputDataType inputDataType);
+
+  StatusCode readDataAndAllocateBuffer(std::queue<std::string> &filePaths,
+                                       std::vector<size_t> dims,
+                                       Qnn_DataType_t dataType,
+                                       uint8_t **bufferToCopy);
+
+  template <typename T>
+  StatusCode allocateBuffer(T **buffer, size_t &elementCount);
+
+  StatusCode convertToFloat(float **out, Qnn_Tensor_t *output);
+
+  StatusCode
+  convertAndWriteOutputTensorInFloat(Qnn_Tensor_t *output,
+                                     std::vector<std::string> outputPaths,
+                                     std::string fileName);
+
+  StatusCode writeOutputTensor(Qnn_Tensor_t *output,
+                               std::vector<std::string> outputPaths,
+                               std::string fileName);
+
+  StatusCode allocateAndCopyBuffer(uint8_t **buffer, Qnn_Tensor_t *tensor);
+
+  StatusCode tearDownTensors(Qnn_Tensor_t *tensors, uint32_t tensorCount);
+
+  StatusCode allocateBuffer(uint8_t **buffer, std::vector<size_t> dims,
+                            Qnn_DataType_t dataType);
+
+  StatusCode copyFromFloatToNative(float *floatBuffer, Qnn_Tensor_t *tensor);
+
+  StatusCode setupTensors(Qnn_Tensor_t **tensors, uint32_t tensorCount,
+                          Qnn_Tensor_t *tensorsInfo);
+  // just set the tensor info, no buffer allocation
+  // used when enable qnn shared buffer for input and output
+  StatusCode setupTensorsNoCopy(Qnn_Tensor_t **tensors, uint32_t tensorCount,
+                                Qnn_Tensor_t *tensorsInfo);
+
+  StatusCode fillDims(std::vector<size_t> &dims, uint32_t *inDimensions,
+                      uint32_t rank);
+};
+} // namespace iotensor
+} // namespace tools
+} // namespace qnn
diff --git a/nntrainer/npu/qnn/Utils/QnnSampleAppUtils.cpp b/nntrainer/npu/qnn/Utils/QnnSampleAppUtils.cpp
new file mode 100644
index 000000000..76c035e04
--- /dev/null
+++ b/nntrainer/npu/qnn/Utils/QnnSampleAppUtils.cpp
@@ -0,0 +1,358 @@
+//==============================================================================
+//
+//  Copyright (c) 2019-2023 Qualcomm Technologies, Inc.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <algorithm>
+#include <cstring>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <tuple>
+
+#include "Log/Logger.hpp"
+#include "PAL/Directory.hpp"
+#include "PAL/FileOp.hpp"
+#include "PAL/Path.hpp"
+#include "PAL/StringOp.hpp"
+#include "QnnTypeMacros.hpp"
+#include "Utils/QnnSampleAppUtils.hpp"
+
+using namespace qnn;
+using namespace qnn::tools;
+using namespace qnn_wrapper_api;
+
+void sample_app::split(std::vector<std::string> &splitString,
+                       const std::string &tokenizedString,
+                       const char separator) {
+  splitString.clear();
+  std::istringstream tokenizedStringStream(tokenizedString);
+  while (!tokenizedStringStream.eof()) {
+    std::string value;
+    getline(tokenizedStringStream, value, separator);
+    if (!value.empty()) {
+      splitString.push_back(value);
+    }
+  }
+}
+
+void sample_app::parseInputFilePaths(std::vector<std::string> &inputFilePaths,
+                                     std::vector<std::string> &paths,
+                                     std::string separator) {
+  for (auto &inputInfo : inputFilePaths) {
+    auto position = inputInfo.find(separator);
+    if (position != std::string::npos) {
+      auto path = inputInfo.substr(position + separator.size());
+      paths.push_back(path);
+    } else {
+      paths.push_back(inputInfo);
+    }
+  }
+}
+
+sample_app::ReadInputListsRetType_t
+sample_app::readInputLists(std::vector<std::string> inputFileListPaths) {
+  std::vector<std::vector<std::queue<std::string>>> filePathsLists;
+  for (auto const &path : inputFileListPaths) {
+    bool readSuccess;
+    std::vector<std::queue<std::string>> filePathList;
+    std::tie(filePathList, readSuccess) = readInputList(path);
+    if (!readSuccess) {
+      filePathsLists.clear();
+      return std::make_tuple(filePathsLists, false);
+    }
+    filePathsLists.push_back(filePathList);
+  }
+  return std::make_tuple(filePathsLists, true);
+}
+
+sample_app::ReadInputListRetType_t
+sample_app::readInputList(const std::string inputFileListPath) {
+  std::queue<std::string> lines;
+  std::ifstream fileListStream(inputFileListPath);
+  if (!fileListStream) {
+    QNN_ERROR("Failed to open input file: %s", inputFileListPath.c_str());
+    std::vector<std::queue<std::string>> result;
+    return std::make_tuple(result, false);
+  }
+  std::string fileLine;
+  while (std::getline(fileListStream, fileLine)) {
+    if (fileLine.empty())
+      continue;
+    lines.push(fileLine);
+  }
+  if (!lines.empty() && lines.front().compare(0, 1, "#") == 0) {
+    lines.pop();
+  }
+  std::string separator = ":=";
+  std::vector<std::queue<std::string>> filePathsList;
+  while (!lines.empty()) {
+    std::vector<std::string> paths{};
+    std::vector<std::string> inputFilePaths;
+    split(inputFilePaths, lines.front(), ' ');
+    parseInputFilePaths(inputFilePaths, paths, separator);
+    // TODO: multi input support
+    filePathsList.reserve(paths.size());
+    for (size_t idx = 0; idx < paths.size(); idx++) {
+      if (idx >= filePathsList.size()) {
+        filePathsList.push_back(std::queue<std::string>());
+      }
+      filePathsList.back().push(paths[idx]);
+    }
+    lines.pop();
+  }
+  return std::make_tuple(filePathsList, true);
+}
+
+sample_app::ProfilingLevel
+sample_app::parseProfilingLevel(std::string profilingLevelString) {
+  std::transform(profilingLevelString.begin(), profilingLevelString.end(),
+                 profilingLevelString.begin(), ::tolower);
+  ProfilingLevel parsedProfilingLevel = ProfilingLevel::INVALID;
+  if (profilingLevelString == "off") {
+    parsedProfilingLevel = ProfilingLevel::OFF;
+  } else if (profilingLevelString == "basic") {
+    parsedProfilingLevel = ProfilingLevel::BASIC;
+  } else if (profilingLevelString == "detailed") {
+    parsedProfilingLevel = ProfilingLevel::DETAILED;
+  }
+  return parsedProfilingLevel;
+}
+
+bool sample_app::deepCopyQnnTensorInfo(Qnn_Tensor_t *dst,
+                                       const Qnn_Tensor_t *src) {
+  if (nullptr == dst || nullptr == src) {
+    QNN_ERROR("Received nullptr");
+    return false;
+  }
+  // set tensor.version before using QNN_TENSOR_SET macros, as they require the
+  // version to be set to correctly assign values
+  dst->version = src->version;
+  const char *tensorName = QNN_TENSOR_GET_NAME(src);
+  if (!tensorName) {
+    QNN_TENSOR_SET_NAME(dst, nullptr);
+  } else {
+    QNN_TENSOR_SET_NAME(dst,
+                        pal::StringOp::strndup(tensorName, strlen(tensorName)));
+  }
+  QNN_TENSOR_SET_ID(dst, QNN_TENSOR_GET_ID(src));
+  QNN_TENSOR_SET_TYPE(dst, QNN_TENSOR_GET_TYPE(src));
+  QNN_TENSOR_SET_DATA_FORMAT(dst, QNN_TENSOR_GET_DATA_FORMAT(src));
+  QNN_TENSOR_SET_DATA_TYPE(dst, QNN_TENSOR_GET_DATA_TYPE(src));
+  Qnn_QuantizeParams_t qParams = QNN_QUANTIZE_PARAMS_INIT;
+  qParams.encodingDefinition =
+    QNN_TENSOR_GET_QUANT_PARAMS(src).encodingDefinition;
+  qParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED;
+  if (QNN_TENSOR_GET_QUANT_PARAMS(src).quantizationEncoding ==
+      QNN_QUANTIZATION_ENCODING_SCALE_OFFSET) {
+    qParams.quantizationEncoding =
+      QNN_TENSOR_GET_QUANT_PARAMS(src).quantizationEncoding;
+    qParams.scaleOffsetEncoding =
+      QNN_TENSOR_GET_QUANT_PARAMS(src).scaleOffsetEncoding;
+  } else if (QNN_TENSOR_GET_QUANT_PARAMS(src).quantizationEncoding ==
+             QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET) {
+    qParams.quantizationEncoding =
+      QNN_TENSOR_GET_QUANT_PARAMS(src).quantizationEncoding;
+    qParams.axisScaleOffsetEncoding.axis =
+      QNN_TENSOR_GET_QUANT_PARAMS(src).axisScaleOffsetEncoding.axis;
+    qParams.axisScaleOffsetEncoding.numScaleOffsets =
+      QNN_TENSOR_GET_QUANT_PARAMS(src).axisScaleOffsetEncoding.numScaleOffsets;
+    if (QNN_TENSOR_GET_QUANT_PARAMS(src)
+          .axisScaleOffsetEncoding.numScaleOffsets > 0) {
+      qParams.axisScaleOffsetEncoding.scaleOffset =
+        (Qnn_ScaleOffset_t *)malloc(QNN_TENSOR_GET_QUANT_PARAMS(src)
+                                      .axisScaleOffsetEncoding.numScaleOffsets *
+                                    sizeof(Qnn_ScaleOffset_t));
+      if (qParams.axisScaleOffsetEncoding.scaleOffset) {
+        for (size_t idx = 0; idx < QNN_TENSOR_GET_QUANT_PARAMS(src)
+                                     .axisScaleOffsetEncoding.numScaleOffsets;
+             idx++) {
+          qParams.axisScaleOffsetEncoding.scaleOffset[idx].scale =
+            QNN_TENSOR_GET_QUANT_PARAMS(src)
+              .axisScaleOffsetEncoding.scaleOffset[idx]
+              .scale;
+          qParams.axisScaleOffsetEncoding.scaleOffset[idx].offset =
+            QNN_TENSOR_GET_QUANT_PARAMS(src)
+              .axisScaleOffsetEncoding.scaleOffset[idx]
+              .offset;
+        }
+      }
+    }
+  }
+  QNN_TENSOR_SET_QUANT_PARAMS(dst, qParams);
+  QNN_TENSOR_SET_RANK(dst, QNN_TENSOR_GET_RANK(src));
+  QNN_TENSOR_SET_DIMENSIONS(dst, nullptr);
+  if (QNN_TENSOR_GET_RANK(src) > 0) {
+    QNN_TENSOR_SET_DIMENSIONS(
+      dst, (uint32_t *)malloc(QNN_TENSOR_GET_RANK(src) * sizeof(uint32_t)));
+    if (QNN_TENSOR_GET_DIMENSIONS(dst)) {
+      pal::StringOp::memscpy(QNN_TENSOR_GET_DIMENSIONS(dst),
+                             QNN_TENSOR_GET_RANK(src) * sizeof(uint32_t),
+                             QNN_TENSOR_GET_DIMENSIONS(src),
+                             QNN_TENSOR_GET_RANK(src) * sizeof(uint32_t));
+    }
+  }
+  return true;
+}
+
+bool sample_app::copyTensorsInfo(const Qnn_Tensor_t *tensorsInfoSrc,
+                                 Qnn_Tensor_t *&tensorWrappers,
+                                 uint32_t tensorsCount) {
+  QNN_FUNCTION_ENTRY_LOG;
+  auto returnStatus = true;
+  tensorWrappers = (Qnn_Tensor_t *)calloc(tensorsCount, sizeof(Qnn_Tensor_t));
+  if (nullptr == tensorWrappers) {
+    QNN_ERROR("Failed to allocate memory for tensorWrappers.");
+    return false;
+  }
+  if (returnStatus) {
+    for (size_t tIdx = 0; tIdx < tensorsCount; tIdx++) {
+      QNN_DEBUG("Extracting tensorInfo for tensor Idx: %d", tIdx);
+      tensorWrappers[tIdx] = QNN_TENSOR_INIT;
+      deepCopyQnnTensorInfo(&tensorWrappers[tIdx], &tensorsInfoSrc[tIdx]);
+    }
+  }
+  QNN_FUNCTION_EXIT_LOG;
+  return returnStatus;
+}
+
+bool sample_app::copyGraphsInfoV1(
+  const QnnSystemContext_GraphInfoV1_t *graphInfoSrc,
+  qnn_wrapper_api::GraphInfo_t *graphInfoDst) {
+  graphInfoDst->graphName = nullptr;
+  if (graphInfoSrc->graphName) {
+    graphInfoDst->graphName = pal::StringOp::strndup(
+      graphInfoSrc->graphName, strlen(graphInfoSrc->graphName));
+  }
+  graphInfoDst->inputTensors = nullptr;
+  graphInfoDst->numInputTensors = 0;
+  if (graphInfoSrc->graphInputs) {
+    if (!copyTensorsInfo(graphInfoSrc->graphInputs, graphInfoDst->inputTensors,
+                         graphInfoSrc->numGraphInputs)) {
+      return false;
+    }
+    graphInfoDst->numInputTensors = graphInfoSrc->numGraphInputs;
+  }
+  graphInfoDst->outputTensors = nullptr;
+  graphInfoDst->numOutputTensors = 0;
+  if (graphInfoSrc->graphOutputs) {
+    if (!copyTensorsInfo(graphInfoSrc->graphOutputs,
+                         graphInfoDst->outputTensors,
+                         graphInfoSrc->numGraphOutputs)) {
+      return false;
+    }
+    graphInfoDst->numOutputTensors = graphInfoSrc->numGraphOutputs;
+  }
+  return true;
+}
+
+bool sample_app::copyGraphsInfo(const QnnSystemContext_GraphInfo_t *graphsInput,
+                                const uint32_t numGraphs,
+                                qnn_wrapper_api::GraphInfo_t **&graphsInfo) {
+  QNN_FUNCTION_ENTRY_LOG;
+  if (!graphsInput) {
+    QNN_ERROR("Received nullptr for graphsInput.");
+    return false;
+  }
+  auto returnStatus = true;
+  graphsInfo = (qnn_wrapper_api::GraphInfo_t **)calloc(
+    numGraphs, sizeof(qnn_wrapper_api::GraphInfo_t *));
+  qnn_wrapper_api::GraphInfo_t *graphInfoArr =
+    (qnn_wrapper_api::GraphInfo_t *)calloc(
+      numGraphs, sizeof(qnn_wrapper_api::GraphInfo_t));
+  if (nullptr == graphsInfo || nullptr == graphInfoArr) {
+    QNN_ERROR("Failure to allocate memory for *graphInfo");
+    returnStatus = false;
+  }
+  if (true == returnStatus) {
+    for (size_t gIdx = 0; gIdx < numGraphs; gIdx++) {
+      QNN_DEBUG("Extracting graphsInfo for graph Idx: %d", gIdx);
+      if (graphsInput[gIdx].version ==
+          QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_1) {
+        copyGraphsInfoV1(&graphsInput[gIdx].graphInfoV1, &graphInfoArr[gIdx]);
+      }
+      graphsInfo[gIdx] = graphInfoArr + gIdx;
+    }
+  }
+  if (true != returnStatus) {
+    QNN_ERROR("Received an ERROR during extractGraphsInfo. Freeing resources.");
+    if (graphsInfo) {
+      for (uint32_t gIdx = 0; gIdx < numGraphs; gIdx++) {
+        if (graphsInfo[gIdx]) {
+          if (nullptr != graphsInfo[gIdx]->graphName) {
+            free(graphsInfo[gIdx]->graphName);
+            graphsInfo[gIdx]->graphName = nullptr;
+          }
+          qnn_wrapper_api::freeQnnTensors(graphsInfo[gIdx]->inputTensors,
+                                          graphsInfo[gIdx]->numInputTensors);
+          qnn_wrapper_api::freeQnnTensors(graphsInfo[gIdx]->outputTensors,
+                                          graphsInfo[gIdx]->numOutputTensors);
+        }
+      }
+      free(*graphsInfo);
+    }
+    free(graphsInfo);
+    graphsInfo = nullptr;
+  }
+  QNN_FUNCTION_EXIT_LOG;
+  return true;
+}
+
+bool sample_app::copyMetadataToGraphsInfo(
+  const QnnSystemContext_BinaryInfo_t *binaryInfo,
+  qnn_wrapper_api::GraphInfo_t **&graphsInfo, uint32_t &graphsCount) {
+  if (nullptr == binaryInfo) {
+    QNN_ERROR("binaryInfo is nullptr.");
+    return false;
+  }
+  graphsCount = 0;
+  if (binaryInfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_1) {
+    if (binaryInfo->contextBinaryInfoV1.graphs) {
+      if (!copyGraphsInfo(binaryInfo->contextBinaryInfoV1.graphs,
+                          binaryInfo->contextBinaryInfoV1.numGraphs,
+                          graphsInfo)) {
+        QNN_ERROR("Failed while copying graphs Info.");
+        return false;
+      }
+      graphsCount = binaryInfo->contextBinaryInfoV1.numGraphs;
+      return true;
+    }
+  } else if (binaryInfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_2) {
+    if (binaryInfo->contextBinaryInfoV2.graphs) {
+      if (!copyGraphsInfo(binaryInfo->contextBinaryInfoV2.graphs,
+                          binaryInfo->contextBinaryInfoV2.numGraphs,
+                          graphsInfo)) {
+        QNN_ERROR("Failed while copying graphs Info.");
+        return false;
+      }
+      graphsCount = binaryInfo->contextBinaryInfoV2.numGraphs;
+      return true;
+    }
+  }
+  QNN_ERROR("Unrecognized system context binary info version.");
+  return false;
+}
+
+QnnLog_Level_t sample_app::parseLogLevel(std::string logLevelString) {
+  QNN_FUNCTION_ENTRY_LOG;
+  std::transform(logLevelString.begin(), logLevelString.end(),
+                 logLevelString.begin(), ::tolower);
+  QnnLog_Level_t parsedLogLevel = QNN_LOG_LEVEL_MAX;
+  if (logLevelString == "error") {
+    parsedLogLevel = QNN_LOG_LEVEL_ERROR;
+  } else if (logLevelString == "warn") {
+    parsedLogLevel = QNN_LOG_LEVEL_WARN;
+  } else if (logLevelString == "info") {
+    parsedLogLevel = QNN_LOG_LEVEL_INFO;
+  } else if (logLevelString == "verbose") {
+    parsedLogLevel = QNN_LOG_LEVEL_VERBOSE;
+  } else if (logLevelString == "debug") {
+    parsedLogLevel = QNN_LOG_LEVEL_DEBUG;
+  }
+  QNN_FUNCTION_EXIT_LOG;
+  return parsedLogLevel;
+}
diff --git a/nntrainer/npu/qnn/Utils/QnnSampleAppUtils.hpp b/nntrainer/npu/qnn/Utils/QnnSampleAppUtils.hpp
new file mode 100644
index 000000000..d576be213
--- /dev/null
+++ b/nntrainer/npu/qnn/Utils/QnnSampleAppUtils.hpp
@@ -0,0 +1,70 @@
+//==============================================================================
+//
+//  Copyright (c) 2019-2022 Qualcomm Technologies, Inc.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+#pragma once
+
+#include <iostream>
+#include <map>
+#include <queue>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "QNN.hpp"
+
+namespace qnn {
+namespace tools {
+namespace sample_app {
+
+enum class ProfilingLevel { OFF, BASIC, DETAILED, INVALID };
+
+using ReadInputListRetType_t =
+  std::tuple<std::vector<std::queue<std::string>>, bool>;
+
+ReadInputListRetType_t readInputList(std::string inputFileListPath);
+
+using ReadInputListsRetType_t =
+  std::tuple<std::vector<std::vector<std::queue<std::string>>>, bool>;
+
+ReadInputListsRetType_t
+readInputLists(std::vector<std::string> inputFileListPath);
+
+ProfilingLevel parseProfilingLevel(std::string profilingLevelString);
+
+void parseInputFilePaths(std::vector<std::string> &inputFilePaths,
+                         std::vector<std::string> &paths,
+                         std::string separator);
+
+void split(std::vector<std::string> &splitString,
+           const std::string &tokenizedString, const char separator);
+
+bool copyMetadataToGraphsInfo(const QnnSystemContext_BinaryInfo_t *binaryInfo,
+                              qnn_wrapper_api::GraphInfo_t **&graphsInfo,
+                              uint32_t &graphsCount);
+
+bool copyGraphsInfo(const QnnSystemContext_GraphInfo_t *graphsInput,
+                    const uint32_t numGraphs,
+                    qnn_wrapper_api::GraphInfo_t **&graphsInfo);
+
+bool copyGraphsInfoV1(const QnnSystemContext_GraphInfoV1_t *graphInfoSrc,
+                      qnn_wrapper_api::GraphInfo_t *graphInfoDst);
+
+bool copyTensorsInfo(const Qnn_Tensor_t *tensorsInfoSrc,
+                     Qnn_Tensor_t *&tensorWrappers, uint32_t tensorsCount);
+
+bool deepCopyQnnTensorInfo(Qnn_Tensor_t *dst, const Qnn_Tensor_t *src);
+
+QnnLog_Level_t parseLogLevel(std::string logLevelString);
+
+void inline exitWithMessage(std::string &&msg, int code) {
+  std::cerr << msg << std::endl;
+  std::exit(code);
+}
+
+} // namespace sample_app
+} // namespace tools
+} // namespace qnn
diff --git a/nntrainer/npu/qnn/WrapperUtils/QnnWrapperUtils.cpp b/nntrainer/npu/qnn/WrapperUtils/QnnWrapperUtils.cpp
new file mode 100644
index 000000000..3c909a8b1
--- /dev/null
+++ b/nntrainer/npu/qnn/WrapperUtils/QnnWrapperUtils.cpp
@@ -0,0 +1,205 @@
+//==============================================================================
+//
+//  Copyright (c) 2019-2022 Qualcomm Technologies, Inc.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#include <cstdlib>
+#include <cstring>
+#include <string>
+
+#include "QnnModelPal.hpp"
+#include "QnnTypeMacros.hpp"
+#include "QnnWrapperUtils.hpp"
+
+namespace qnn_wrapper_api {
+size_t memscpy(void *dst, size_t dstSize, const void *src, size_t copySize) {
+  if (!dst || !src || !dstSize || !copySize)
+    return 0;
+
+  size_t minSize = dstSize < copySize ? dstSize : copySize;
+
+  memcpy(dst, src, minSize);
+
+  return minSize;
+}
+
+ModelError_t getQnnGraphConfigFromInfo(
+  const char *graphName, const GraphConfigInfo_t **graphsConfigInfo,
+  const uint32_t numGraphsConfigInfo, const QnnGraph_Config_t **&graphConfigs) {
+  if (!graphsConfigInfo || numGraphsConfigInfo == 0) {
+    PRINT_DEBUG(
+      "getQnnGraphConfigFromInfo() no custom configs passed for graph:%s.\n",
+      graphName);
+    return MODEL_NO_ERROR;
+  }
+
+  size_t found = 0;
+
+  for (uint32_t i = 0; i < numGraphsConfigInfo; i++) {
+    if (!graphsConfigInfo[i]) {
+      PRINT_ERROR("getQnnGraphConfigFromInfo() lookup error while trying to "
+                  "query graphName:%s. "
+                  "numGraphsConfigInfo > num of element in graphsConfigInfo\n",
+                  graphName);
+      return MODEL_INVALID_ARGUMENT_ERROR;
+    }
+    if (strcmp(graphsConfigInfo[i]->graphName, graphName) == 0) {
+      graphConfigs = graphsConfigInfo[i]->graphConfigs;
+      found++;
+    }
+  }
+
+  if (!found) {
+    PRINT_ERROR(
+      "getQnnGraphConfigFromInfo() unable to find graphName:%s in provided "
+      "graphsConfigInfo object.\n",
+      graphName);
+    return MODEL_INVALID_ARGUMENT_ERROR;
+  } else if (found > 1) {
+    PRINT_ERROR("getQnnGraphConfigFromInfo() duplicate GraphConfigInfo entries "
+                "found with "
+                "graphName:%s.\n",
+                graphName);
+    return MODEL_INVALID_ARGUMENT_ERROR;
+  } else {
+    return MODEL_NO_ERROR;
+  }
+}
+
+ModelError_t deepCopyQnnTensors(Qnn_Tensor_t &src, Qnn_Tensor_t &dst) {
+  ModelError_t err;
+  VALIDATE_TENSOR_VERSION(src, err);
+
+  dst.version = src.version;
+  QNN_TENSOR_SET_NAME(dst,
+                      strnDup(QNN_TENSOR_GET_NAME(src),
+                              std::string(QNN_TENSOR_GET_NAME(src)).size()));
+  if (QNN_TENSOR_GET_NAME(dst) == nullptr) {
+    return MODEL_TENSOR_ERROR;
+  }
+  QNN_TENSOR_SET_ID(dst, QNN_TENSOR_GET_ID(src));
+  QNN_TENSOR_SET_TYPE(dst, QNN_TENSOR_GET_TYPE(src));
+  QNN_TENSOR_SET_DATA_FORMAT(dst, QNN_TENSOR_GET_DATA_FORMAT(src));
+  QNN_TENSOR_SET_DATA_TYPE(dst, QNN_TENSOR_GET_DATA_TYPE(src));
+  QNN_TENSOR_SET_MEM_TYPE(dst, QNN_TENSOR_GET_MEM_TYPE(src));
+
+  // Only metadata (i.e. non-static data) is copied from source to destination.
+  // The union still must be initialized so that the clientBuf/memHandle do not
+  // contain garbage data
+  if (QNN_TENSOR_GET_MEM_TYPE(src) == QNN_TENSORMEMTYPE_RAW) {
+    Qnn_ClientBuffer_t clientBuf = {nullptr, 0};
+    QNN_TENSOR_SET_CLIENT_BUF(dst, clientBuf);
+  } else if (QNN_TENSOR_GET_MEM_TYPE(src) == QNN_TENSORMEMTYPE_MEMHANDLE) {
+    QNN_TENSOR_SET_MEM_HANDLE(dst, nullptr);
+  } else {
+    return MODEL_TENSOR_ERROR;
+  }
+
+  Qnn_QuantizeParams_t srcQParam = QNN_TENSOR_GET_QUANT_PARAMS(src);
+  Qnn_QuantizationEncoding_t encoding = srcQParam.quantizationEncoding;
+  if (encoding == QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET) {
+    // need to allocate and copy memory for scaleOffset as it is a pointer array
+    Qnn_QuantizeParams_t srcQParamCpy = srcQParam;
+    Qnn_AxisScaleOffset_t &axisScaleOffset =
+      srcQParamCpy.axisScaleOffsetEncoding;
+    Qnn_ScaleOffset_t **scaleOffset = &axisScaleOffset.scaleOffset;
+    size_t scaleOffsetSize =
+      axisScaleOffset.numScaleOffsets * sizeof(Qnn_ScaleOffset_t);
+    *scaleOffset = (Qnn_ScaleOffset_t *)malloc(scaleOffsetSize);
+    memscpy(*scaleOffset, scaleOffsetSize,
+            srcQParam.axisScaleOffsetEncoding.scaleOffset, scaleOffsetSize);
+    QNN_TENSOR_SET_QUANT_PARAMS(dst, srcQParamCpy);
+  } else if (encoding == QNN_QUANTIZATION_ENCODING_BW_AXIS_SCALE_OFFSET) {
+    // need to allocate and copy memory for scaleOffset as it is a pointer array
+    Qnn_QuantizeParams_t srcQParamCpy = srcQParam;
+    Qnn_BwAxisScaleOffset_t &bwAxisScaleOffset =
+      srcQParamCpy.bwAxisScaleOffsetEncoding;
+    size_t scaleSize = bwAxisScaleOffset.numElements * sizeof(float);
+    float **scales = &bwAxisScaleOffset.scales;
+    int32_t **offsets = &bwAxisScaleOffset.offsets;
+    *scales = (float *)malloc(scaleSize);
+    memscpy(*scales, scaleSize, srcQParam.bwAxisScaleOffsetEncoding.scales,
+            scaleSize);
+
+    // Only copy offsets if present, nullptr implies all offsets are 0
+    if (bwAxisScaleOffset.offsets != nullptr) {
+      size_t offsetSize = bwAxisScaleOffset.numElements * sizeof(int32_t);
+      *offsets = (int32_t *)malloc(offsetSize);
+      memscpy(*offsets, offsetSize, srcQParam.bwAxisScaleOffsetEncoding.offsets,
+              offsetSize);
+    }
+    QNN_TENSOR_SET_QUANT_PARAMS(dst, srcQParamCpy);
+  } else {
+    QNN_TENSOR_SET_QUANT_PARAMS(dst, srcQParam);
+  }
+
+  // need to allocate and copy memory for all the pointer members
+  uint32_t rank = QNN_TENSOR_GET_RANK(src);
+  QNN_TENSOR_SET_RANK(dst, rank);
+  size_t dimSize = rank * sizeof(uint32_t);
+  uint32_t *dimensions = (uint32_t *)malloc(dimSize);
+  if (dimensions == nullptr) {
+    PRINT_ERROR("deepCopyQnnTensors() Allocation error while copying tensor %s",
+                QNN_TENSOR_GET_NAME(src));
+    return MODEL_TENSOR_ERROR;
+  }
+  memscpy(dimensions, dimSize, QNN_TENSOR_GET_DIMENSIONS(src), dimSize);
+  QNN_TENSOR_SET_DIMENSIONS(dst, dimensions);
+
+  return err;
+}
+
+ModelError_t freeQnnTensor(Qnn_Tensor_t &tensor) {
+  ModelError_t err;
+  VALIDATE_TENSOR_VERSION(tensor, err);
+
+  // free all pointer allocations in struct
+  free((void *)QNN_TENSOR_GET_NAME(tensor));
+  free(QNN_TENSOR_GET_DIMENSIONS(tensor));
+
+  return MODEL_NO_ERROR;
+}
+
+ModelError_t freeQnnTensors(Qnn_Tensor_t *&tensors, uint32_t numTensors) {
+  // free all pointer allocations in struct
+  for (size_t i = 0; i < numTensors; i++) {
+    freeQnnTensor(tensors[i]);
+  }
+  free(tensors);
+
+  return MODEL_NO_ERROR;
+}
+
+std::string getModelErrorName(ModelError_t modelError) {
+  switch (modelError) {
+  case MODEL_NO_ERROR:
+    return "MODEL_NO_ERROR";
+  case MODEL_TENSOR_ERROR:
+    return "MODEL_TENSOR_ERROR";
+  case MODEL_PARAMS_ERROR:
+    return "MODEL_PARAMS_ERROR";
+  case MODEL_NODES_ERROR:
+    return "MODEL_NODES_ERROR";
+  case MODEL_GRAPH_ERROR:
+    return "MODEL_GRAPH_ERROR";
+  case MODEL_CONTEXT_ERROR:
+    return "MODEL_CONTEXT_ERROR";
+  case MODEL_GENERATION_ERROR:
+    return "MODEL_GENERATION_ERROR";
+  case MODEL_SETUP_ERROR:
+    return "MODEL_SETUP_ERROR";
+  case MODEL_UNKNOWN_ERROR:
+    return "MODEL_UNKNOWN_ERROR";
+  case MODEL_INVALID_ARGUMENT_ERROR:
+    return "MODEL_INVALID_ARGUMENT_ERROR";
+  case MODEL_FILE_ERROR:
+    return "MODEL_FILE_ERROR";
+  default:
+    return "INVALID_ERROR_CODE";
+  }
+}
+
+} // namespace qnn_wrapper_api
diff --git a/nntrainer/npu/qnn/WrapperUtils/QnnWrapperUtils.hpp b/nntrainer/npu/qnn/WrapperUtils/QnnWrapperUtils.hpp
new file mode 100644
index 000000000..4327b23f5
--- /dev/null
+++ b/nntrainer/npu/qnn/WrapperUtils/QnnWrapperUtils.hpp
@@ -0,0 +1,203 @@
+//==============================================================================
+//
+//  Copyright (c) 2019-2022 Qualcomm Technologies, Inc.
+//  All Rights Reserved.
+//  Confidential and Proprietary - Qualcomm Technologies, Inc.
+//
+//==============================================================================
+
+#pragma once
+
+#include "QNN/QnnContext.h"
+#include "QNN/QnnGraph.h"
+#include "QNN/QnnTensor.h"
+#include "QNN/QnnTypes.h"
+#include <string>
+
+namespace qnn_wrapper_api {
+
+// macro utils
+
+// Enables FILE[LINE]: FMT for VALIDATE macro
+#ifdef QNN_ENABLE_DEBUG
+
+#define PRINTF(fmt, ...)                    \
+  do {                                      \
+    printf("%s[%d]: ", __FILE__, __LINE__); \
+    printf((fmt), ##__VA_ARGS__);           \
+  } while (0)
+
+#else
+
+#define PRINTF(fmt, ...)          \
+  do {                            \
+    printf((fmt), ##__VA_ARGS__); \
+  } while (0)
+
+#endif
+
+#ifdef QNN_ENABLE_DEBUG
+#define PRINT_DEBUG(fmt, ...)     \
+  do {                            \
+    printf("[ DEBUG ] ");         \
+    PRINTF((fmt), ##__VA_ARGS__); \
+  } while (0)
+#else
+#define PRINT_DEBUG(fmt, ...)
+#endif
+
+// Enables ERROR tag for errors
+#define PRINT_ERROR(fmt, ...)     \
+  do {                            \
+    printf("[ ERROR ] ");         \
+    PRINTF((fmt), ##__VA_ARGS__); \
+  } while (0)
+
+// Enables WARNING tag for errors
+#define PRINT_WARNING(fmt, ...)   \
+  do {                            \
+    printf("[ WARNING ] ");       \
+    PRINTF((fmt), ##__VA_ARGS__); \
+  } while (0)
+
+// Enables INFO tag for errors
+#define PRINT_INFO(fmt, ...)      \
+  do {                            \
+    printf("[ INFO ] ");          \
+    PRINTF((fmt), ##__VA_ARGS__); \
+  } while (0)
+
+#define STRINGFY(str) str
+#define STRINGFYVALUE(str) STRINGFY(str)
+
+// Ensures ModelError_t returning functions return MODEL_NO_ERROR
+// retStatus should be set to MODEL_NO_ERROR before passing to macro
+#define VALIDATE(value, retStatus)                                \
+  do {                                                            \
+    retStatus = value;                                            \
+    if (retStatus != qnn_wrapper_api::MODEL_NO_ERROR) {           \
+      PRINT_ERROR("%s expected MODEL_NO_ERROR, got %s\n", #value, \
+                  getModelErrorName(retStatus).c_str());          \
+      return retStatus;                                           \
+    }                                                             \
+  } while (0)
+
+// macros for retrieving binary data
+#define BINVARSTART(NAME)                                         \
+  ({                                                              \
+    extern const uint8_t _binary_obj_binary_##NAME##_raw_start[]; \
+    (void *)_binary_obj_binary_##NAME##_raw_start;                \
+  })
+#define BINVAREND(NAME)                                         \
+  ({                                                            \
+    extern const uint8_t _binary_obj_binary_##NAME##_raw_end[]; \
+    (void *)_binary_obj_binary_##NAME##_raw_end;                \
+  })
+#define BINLEN(NAME)                                              \
+  ({                                                              \
+    extern const uint8_t _binary_obj_binary_##NAME##_raw_start[]; \
+    extern const uint8_t _binary_obj_binary_##NAME##_raw_end[];   \
+    (uint32_t)((_binary_obj_binary_##NAME##_raw_end) -            \
+               (_binary_obj_binary_##NAME##_raw_start));          \
+  })
+
+typedef enum ModelError {
+  MODEL_NO_ERROR = 0,
+  MODEL_TENSOR_ERROR = 1,
+  MODEL_PARAMS_ERROR = 2,
+  MODEL_NODES_ERROR = 3,
+  MODEL_GRAPH_ERROR = 4,
+  MODEL_CONTEXT_ERROR = 5,
+  MODEL_GENERATION_ERROR = 6,
+  MODEL_SETUP_ERROR = 7,
+  MODEL_INVALID_ARGUMENT_ERROR = 8,
+  MODEL_FILE_ERROR = 9,
+  MODEL_MEMORY_ALLOCATE_ERROR = 10,
+  // Value selected to ensure 32 bits.
+  MODEL_UNKNOWN_ERROR = 0x7FFFFFFF
+} ModelError_t;
+
+/**
+ * @brief Returns the error message associated with a given error code
+ *
+ * @param[in] modelError ModelError_t error code
+ *
+ * @return string message
+ */
+std::string getModelErrorName(ModelError_t modelError);
+
+typedef struct GraphInfo {
+  Qnn_GraphHandle_t graph;
+  char *graphName;
+  Qnn_Tensor_t *inputTensors;
+  uint32_t numInputTensors;
+  Qnn_Tensor_t *outputTensors;
+  uint32_t numOutputTensors;
+} GraphInfo_t;
+typedef GraphInfo_t *GraphInfoPtr_t;
+
+typedef struct GraphConfigInfo {
+  char *graphName;
+  const QnnGraph_Config_t **graphConfigs;
+} GraphConfigInfo_t;
+
+/**
+ * @brief Helper function to get Qnn GraphConfig structure from provided
+ * GraphConfigInfo using graphName.
+ *
+ * @param[in] graphName the Qnn graphName to use for lookup
+ *
+ * @param[in] graphsConfigInfo array of GraphConfig_t objects
+ *
+ * @param[in] numGraphsConfigInfo the number of array elements in
+ * graphConfigInfo
+ *
+ * @param[out] graphConfigs the result of query of graphName from
+ * graphsConfigInfo if successful.
+ *
+ * @return Error code
+ *
+ */
+ModelError_t getQnnGraphConfigFromInfo(
+  const char *graphName, const GraphConfigInfo_t **graphsConfigInfo,
+  const uint32_t numGraphsConfigInfo, const QnnGraph_Config_t **&graphConfigs);
+
+/**
+ * @brief Deep Copies QnnTensor_t structs to a pointer array destination
+ * location. Note: The copy will be stored on the heap and as such requires
+ * caller to make appropriate free call(s) using function below. Note 2:
+ * deepCopy is only done for metadata
+ *
+ * @param[in] source tensor object to copy from
+ *
+ * @param[in] destination tensor object to copy to
+ *
+ * @return Error code
+ */
+ModelError_t deepCopyQnnTensors(Qnn_Tensor_t &source,
+                                Qnn_Tensor_t &destination);
+
+/**
+ * @brief Frees all memory allocated tensor attributes.
+ *
+ * @param[in] tensor Qnn_Tensor_t object to free
+ *
+ * @return Error code
+ */
+ModelError_t freeQnnTensor(Qnn_Tensor_t &tensor);
+
+/**
+ * @brief Loops through and frees all memory allocated tensor attributes for
+ * each tensor object.
+ *
+ * @param[in] tensors array of tensor objects to free
+ *
+ * @param[in] numTensors length of the above tensors array
+ *
+ * @return Error code
+ */
+ModelError_t freeQnnTensors(Qnn_Tensor_t *&tensors, uint32_t numTensors);
+
+size_t memscpy(void *dst, size_t dstSize, const void *src, size_t copySize);
+
+} // namespace qnn_wrapper_api
diff --git a/nntrainer/npu/qnn/meson.build b/nntrainer/npu/qnn/meson.build
new file mode 100644
index 000000000..3f259ddd6
--- /dev/null
+++ b/nntrainer/npu/qnn/meson.build
@@ -0,0 +1 @@
+subdir('LLaMAPackage')
diff --git a/nntrainer/npu/qnn/op/QNNLinear.cpp b/nntrainer/npu/qnn/op/QNNLinear.cpp
new file mode 100644
index 000000000..e31612449
--- /dev/null
+++ b/nntrainer/npu/qnn/op/QNNLinear.cpp
@@ -0,0 +1,359 @@
+#include "QNNLinear.hpp"
+#include "QnnTypes.h"
+#include <cstdint>
+#include <memory>
+
+namespace nntrainer {
+QNNLinear::QNNLinear() {
+  // weight_.setBackend(bn);
+  // bias_.setBackend(bn);
+
+  // weightScale_.setBackend(bn);
+  // biasScale_.setBackend(bn);
+  // outputScale_.setBackend(bn);
+  // inputScale_.setBackend(bn);
+}
+
+// QNNLinear::QNNLinear(Backend *bn, string opName, int in_features, int
+// out_features, bool bias) :
+//     QNNCommonOp(bn, opName), in_features_(in_features),
+//     out_features_(out_features), support_bias_(bias) {
+//     weight_.setBackend(bn);
+//     bias_.setBackend(bn);
+
+//     weightScale_.setBackend(bn);
+//     biasScale_.setBackend(bn);
+//     outputScale_.setBackend(bn);
+//     inputScale_.setBackend(bn);
+// }
+
+// ErrorCode QNNLinear::reshape(vector<shared_ptr<Tensor>> inputs,
+// vector<shared_ptr<Tensor>> outputs) {
+//     assert(inputs.size() == 1);
+//     assert(outputs.size() == 1);
+//     // N     |    C       |   H                   |  W
+//     // -----------------------------------------------
+//     // 1     |out_channel | in_channel            |  1
+//     //       |out_features| in_features           |
+//     // -----------------------------------------------
+//     // batch |in_channel  | seq_len               |  1
+//     //       |in_features | inputs[0]->sequence()   |
+//     // -----------------------------------------------
+//     // batch |out_channel | seq_len               |  1
+//     //       |out_features|  inputs[0]->sequence()  |
+//     assert(inputs[0]->head() == 1);
+//     assert(in_features_ == inputs[0]->dimension());
+//     outputs[0]->reshape(inputs[0]->batch(), inputs[0]->head(),
+//     inputs[0]->sequence(), out_features_); return Op::reshape(inputs,
+//     outputs);
+// }
+
+// ErrorCode QNNLinear::setUp(vector<shared_ptr<Tensor>> inputs,
+// vector<shared_ptr<Tensor>> outputs) {
+//     outputs[0]->setDtype(MLLM_TYPE_I8);
+//     // add matmul param to qnn
+//     vector<Qnn_Param_t> paramsMatmul = {
+//         {.paramType = QNN_PARAMTYPE_SCALAR,
+//          .name = "transpose_in0",
+//          .scalarParam = (Qnn_Scalar_t){QNN_DATATYPE_BOOL_8, {.bool8Value =
+//          0}}},
+//         {.paramType = QNN_PARAMTYPE_SCALAR,
+//          .name = "transpose_in1",
+//          .scalarParam = (Qnn_Scalar_t){QNN_DATATYPE_BOOL_8, {.bool8Value =
+//          1}}}};
+
+//     uint32_t
+//     dimensions_InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D_dilation[] = {2};
+//     uint32_t InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D_dilation[] = {1,
+//     1}; uint32_t
+//     dimensions_InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D_pad_amount[] =
+//     {2, 2}; uint32_t
+//     InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D_pad_amount[] = {0, 0, 0, 0};
+//     uint32_t dimensions_InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D_stride[]
+//     = {2}; uint32_t InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D_stride[] =
+//     {1, 1};
+
+//     vector<Qnn_Param_t> params_InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D =
+//     {
+//         {.paramType = QNN_PARAMTYPE_TENSOR,
+//          .name = "stride",
+//          .tensorParam =
+//              (Qnn_Tensor_t){
+//                  .version = QNN_TENSOR_VERSION_1,
+//                  .v1 = {.id = 0,
+//                         .name =
+//                         "InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D_stride",
+//                         .type = QNN_TENSOR_TYPE_STATIC,
+//                         .dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER,
+//                         .dataType = QNN_DATATYPE_UINT_32,
+//                         .quantizeParams = {QNN_DEFINITION_UNDEFINED,
+//                                            QNN_QUANTIZATION_ENCODING_UNDEFINED,
+//                                            {.scaleOffsetEncoding = {.scale =
+//                                            0.0000000000000000f,
+//                                                                     .offset =
+//                                                                     0}}},
+//                         .rank = 1,
+//                         .dimensions =
+//                         dimensions_InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D_stride,
+//                         .memType = QNN_TENSORMEMTYPE_RAW,
+//                         .clientBuf =
+//                             {.data = (uint8_t
+//                             *)InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D_stride,
+//                              .dataSize = 8}}}},
+//         {.paramType = QNN_PARAMTYPE_TENSOR,
+//          .name = "pad_amount",
+//          .tensorParam =
+//              (Qnn_Tensor_t){
+//                  .version = QNN_TENSOR_VERSION_1,
+//                  .v1 = {.id = 0,
+//                         .name =
+//                         "InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D_pad_amount",
+//                         .type = QNN_TENSOR_TYPE_STATIC,
+//                         .dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER,
+//                         .dataType = QNN_DATATYPE_UINT_32,
+//                         .quantizeParams = {QNN_DEFINITION_UNDEFINED,
+//                                            QNN_QUANTIZATION_ENCODING_UNDEFINED,
+//                                            {.scaleOffsetEncoding = {.scale =
+//                                            0.0000000000000000f,
+//                                                                     .offset =
+//                                                                     0}}},
+//                         .rank = 2,
+//                         .dimensions =
+//                             dimensions_InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D_pad_amount,
+//                         .memType = QNN_TENSORMEMTYPE_RAW,
+//                         .clientBuf =
+//                             {.data = (uint8_t *)
+//                                  InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D_pad_amount,
+//                              .dataSize = 16}}}},
+
+//     };
+
+//     // add weight tensor to qnn
+//     uint32_t dimensionsWeight[4] = {1, 1,
+//     static_cast<uint32_t>(weight_.sequence()),
+//     static_cast<uint32_t>(weight_.dimension())};
+
+//     auto qnnQuantDefined = QNN_DEFINITION_UNDEFINED;
+//     float weightScale = 0;
+
+//     qnnQuantDefined = QNN_DEFINITION_DEFINED;
+//     weightScale = weightScale_.hostPtr<float>()[0];
+
+//     qnnBackend_->modelAddTensor(weight_.name(), (Qnn_Tensor_t){
+//                                                     .version =
+//                                                     QNN_TENSOR_VERSION_1, .v1
+//                                                     = {
+//                                                         .id = 0,
+//                                                         .name =
+//                                                         weight_.name().c_str(),
+//                                                         .type =
+//                                                         QNN_TENSOR_TYPE_STATIC,
+//                                                         .dataFormat =
+//                                                         QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER,
+//                                                         .dataType =
+//                                                         QNN_DATATYPE_SFIXED_POINT_8,
+//                                                         .quantizeParams =
+//                                                         {qnnQuantDefined,
+//                                                                            QNN_QUANTIZATION_ENCODING_SCALE_OFFSET,
+//                                                                            {.scaleOffsetEncoding
+//                                                                            =
+//                                                                            {.scale
+//                                                                            =
+//                                                                            weightScale,
+//                                                                            .offset
+//                                                                            =
+//                                                                            0}}},
+//                                                         .rank = 4,
+//                                                         .dimensions =
+//                                                         dimensionsWeight,
+//                                                         .memType =
+//                                                         QNN_TENSORMEMTYPE_RAW,
+//                                                         .clientBuf = {.data =
+//                                                         weight_.hostPtr<void>(),
+//                                                                       .dataSize
+//                                                                       =
+//                                                                       (uint32_t)weight_.cntSize()}}});
+//     // free weight host memory
+//     weight_.free();
+
+//     // dimensions of matmul output and bias
+//     uint32_t dimensionsOutput[4] =
+//     {static_cast<uint32_t>(outputs[0]->batch()),
+//                                     static_cast<uint32_t>(outputs[0]->sequence()),
+//                                     static_cast<uint32_t>(outputs[0]->head()),
+//                                     static_cast<uint32_t>(outputs[0]->dimension())};
+
+//     auto outName = outputs[0]->name();
+
+//     // if don't support bias, just dequantize and write to tensor with name
+//     of outputs[0] if (!support_bias_) {
+//         float outputScale = 0;
+//         outputScale = outputScale_.hostPtr<float>()[0] / 127.0;
+//         outputScale = roundf(outputScale * 100000) / 100000;
+
+//         vector<Qnn_Tensor_t> matmulOut = {{QNN_TENSOR_VERSION_1,
+//                                            {.v1 = {
+//                                                 .id = 0,
+//                                                 .name = outName.c_str(),
+//                                                 .type =
+//                                                 getOutputTensorType(outputs[0]),
+//                                                 .dataFormat =
+//                                                 QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER,
+//                                                 .dataType =
+//                                                 QNN_DATATYPE_SFIXED_POINT_8,
+//                                                 .quantizeParams =
+//                                                 {QNN_DEFINITION_DEFINED,
+//                                                                    QNN_QUANTIZATION_ENCODING_SCALE_OFFSET,
+//                                                                    {.scaleOffsetEncoding
+//                                                                    = {.scale
+//                                                                    =
+//                                                                    outputScale,
+//                                                                    .offset =
+//                                                                    0}}},
+//                                                 .rank = 4,
+//                                                 .dimensions =
+//                                                 dimensionsOutput, .memType =
+//                                                 QNN_TENSORMEMTYPE_RAW,
+//                                                 .clientBuf = {.data =
+//                                                 nullptr,
+//                                                               .dataSize =
+//                                                               0}}}}};
+//         return graphAddNode(name() + ".linearint8", "Conv2d",
+//         {inputs[0]->name(), weight_.name()}, matmulOut,
+//         params_InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D);
+//     }
+
+//     // add bias tensor to qnn
+//     uint32_t dimensionsBias[1] = {(uint32_t)out_features_};
+//     float biasScale = 0;
+
+//     qnnQuantDefined = QNN_DEFINITION_DEFINED;
+//     biasScale = biasScale_.hostPtr<float>()[0];
+
+//     qnnBackend_->modelAddTensor(bias_.name(), (Qnn_Tensor_t){
+//                                                   .version =
+//                                                   QNN_TENSOR_VERSION_1, .v1 =
+//                                                   {
+//                                                       .id = 0,
+//                                                       .name =
+//                                                       bias_.name().c_str(),
+//                                                       .type =
+//                                                       QNN_TENSOR_TYPE_STATIC,
+//                                                       .dataFormat =
+//                                                       QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER,
+//                                                       .dataType =
+//                                                       QNN_DATATYPE_UFIXED_POINT_8,
+//                                                       .quantizeParams =
+//                                                       {qnnQuantDefined,
+//                                                                          QNN_QUANTIZATION_ENCODING_SCALE_OFFSET,
+//                                                                          {.scaleOffsetEncoding
+//                                                                          =
+//                                                                          {.scale
+//                                                                          =
+//                                                                          biasScale,
+//                                                                          .offset
+//                                                                          =
+//                                                                          -128}}},
+//                                                       .rank = 1,
+//                                                       .dimensions =
+//                                                       dimensionsBias,
+//                                                       .memType =
+//                                                       QNN_TENSORMEMTYPE_RAW,
+//                                                       .clientBuf = {.data =
+//                                                       bias_.hostPtr<void>(),
+//                                                                     .dataSize
+//                                                                     =
+//                                                                     (uint32_t)bias_.cntSize()}}});
+//     // free bias host memory
+//     bias_.free();
+
+//     float outputScale = 0;
+//     outputScale = outputScale_.hostPtr<float>()[0] / 127.0;
+//     outputScale = roundf(outputScale * 100000) / 100000;
+
+//     // final output
+//     vector<Qnn_Tensor_t> biasOutput = {{QNN_TENSOR_VERSION_1,
+//                                         {.v1 = {
+//                                              .id = 0,
+//                                              .name = outName.c_str(),
+//                                              .type =
+//                                              getOutputTensorType(outputs[0]),
+//                                              .dataFormat =
+//                                              QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER,
+//                                              .dataType =
+//                                              QNN_DATATYPE_SFIXED_POINT_8,
+//                                              .quantizeParams =
+//                                              {QNN_DEFINITION_DEFINED,
+//                                                                 QNN_QUANTIZATION_ENCODING_SCALE_OFFSET,
+//                                                                 {.scaleOffsetEncoding
+//                                                                 = {.scale =
+//                                                                 outputScale,
+//                                                                 .offset =
+//                                                                 0}}},
+//                                              .rank = 4,
+//                                              .dimensions = dimensionsOutput,
+//                                              .memType =
+//                                              QNN_TENSORMEMTYPE_RAW,
+//                                              .clientBuf = {.data = nullptr,
+//                                                            .dataSize =
+//                                                            0}}}}};
+//     return graphAddNode(name() + ".linearint8", "Conv2d", {inputs[0]->name(),
+//     weight_.name(), bias_.name()}, biasOutput,
+//     params_InceptionV3_InceptionV3_Conv2d_1a_3x3_Conv2D);
+// }
+
+// ErrorCode QNNLinear::load(AbstructLoader &loader) {
+//     weight_.setName(name() + ".weight");
+//     weight_.reshape(1, 1, in_features_, out_features_);
+//     weight_.setDtype(MLLM_TYPE_I8);
+//     weight_.alloc();
+//     loader.load(&weight_);
+
+//     bias_.setName(name() + ".bias");
+//     bias_.reshape(1, 1, 1, out_features_);
+//     bias_.setDtype(MLLM_TYPE_I8);
+//     bias_.alloc();
+//     if (support_bias_) {
+//         loader.load(&bias_);
+//         // sign to unsign
+//         for (int i = 0; i < out_features_; i++) {
+//             int32_t val = bias_.dataAt<int8_t>(0, 0, 0, i);
+//             val += 128;
+//             bias_.setDataAt<uint8_t>(0, 0, 0, i, (uint8_t)val);
+//         }
+//     } else {
+//         memset(bias_.hostPtr<void>(), 0, bias_.cntSize());
+//     }
+
+//     weightScale_.setName(name() + ".weight.scale");
+//     weightScale_.reshape(1, 1, 1, 1);
+//     weightScale_.setDtype(MLLM_TYPE_F32);
+//     weightScale_.alloc();
+//     loader.load(&weightScale_);
+
+//     biasScale_.setName(name() + ".bias.scale");
+//     biasScale_.reshape(1, 1, 1, 1);
+//     biasScale_.setDtype(MLLM_TYPE_F32);
+//     biasScale_.alloc();
+//     loader.load(&biasScale_);
+
+//     outputScale_.setName(name() + ".output_scale");
+//     outputScale_.reshape(1, 1, 1, 1);
+//     outputScale_.setDtype(MLLM_TYPE_F32);
+//     outputScale_.alloc();
+//     loader.load(&outputScale_);
+
+//     inputScale_.setName(name() + ".input_scale");
+//     inputScale_.reshape(1, 1, 1, 1);
+//     inputScale_.setDtype(MLLM_TYPE_F32);
+//     inputScale_.alloc();
+//     loader.load(&inputScale_);
+
+//     return Op::load(loader);
+// }
+
+// ErrorCode QNNLinear::free(vector<shared_ptr<Tensor>> inputs,
+// vector<shared_ptr<Tensor>> outputs) {
+//     return Op::free(inputs, outputs);
+// }
+} // namespace nntrainer
diff --git a/nntrainer/npu/qnn/op/QNNLinear.hpp b/nntrainer/npu/qnn/op/QNNLinear.hpp
new file mode 100644
index 000000000..1542849f2
--- /dev/null
+++ b/nntrainer/npu/qnn/op/QNNLinear.hpp
@@ -0,0 +1,44 @@
+
+#ifndef NNTR_QNNLINEAR_H
+#define NNTR_QNNLINEAR_H
+
+namespace nntrainer {
+class QNNLinear {
+public:
+  QNNLinear();
+  virtual ~QNNLinear() = default;
+  // virtual ErrorCode reshape(vector<shared_ptr<Tensor>> inputs,
+  // vector<shared_ptr<Tensor>> outputs) override; virtual ErrorCode
+  // setUp(vector<shared_ptr<Tensor>> inputs, vector<shared_ptr<Tensor>>
+  // outputs) override; virtual ErrorCode load(AbstructLoader &loader) override;
+  // virtual ErrorCode free(vector<shared_ptr<Tensor>> inputs,
+  // vector<shared_ptr<Tensor>> outputs) override;
+
+private:
+  int in_features_;
+  int out_features_;
+  bool support_bias_;
+  // Tensor weight_;
+  // Tensor bias_;
+  // #ifdef SMOOTHQUANT
+  // Tensor weightScale_;
+  // Tensor biasScale_;
+  // #endif
+  // Tensor outputScale_;
+  // Tensor inputScale_;
+};
+
+// class QNNLinearINT8Creator : public QNNBackend::Creator {
+// public:
+//     virtual Op *create(OpParam op_param, Backend *bn, string name) const {
+//         int in_features = op_param["in_features"];
+//         int out_features = op_param["out_features"];
+//         int bias = op_param["bias"];
+//         return new QNNLinearINT8(bn, name, in_features, out_features,
+//         (bool)bias);
+//     }
+// };
+
+} // namespace nntrainer
+
+#endif
diff --git a/nntrainer/npu/qnn/tools/prepare_ops.sh b/nntrainer/npu/qnn/tools/prepare_ops.sh
new file mode 100755
index 000000000..fa30fcebd
--- /dev/null
+++ b/nntrainer/npu/qnn/tools/prepare_ops.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+if [ "x${HEXAGON_SDK_ROOT}" = "x" ]; then
+    echo "HEXAGON_SDK_ROOT is not set, we will set evn using /local/mnt/workspace/Qualcomm/Hexagon_SDK/5.5.2.0/setup_sdk_env.source"
+    ln -s /local/mnt/workspace/Qualcomm/Hexagon_SDK/5.5.2.0/ HexagonSDK
+    source HexagonSDK/setup_sdk_env.source
+fi
+
+echo "QNN_SDK_ROOT is not set, we will set /opt/qcom/aistack/qairt/2.28.2.241116/"
+ln -s /opt/qcom/aistack/qairt/2.28.2.241116/ qairt
+export QNN_SDK_ROOT=/opt/qcom/aistack/qairt/2.28.2.241116/
+source ${QNN_SDK_ROOT}/bin/envsetup.sh
+
+echo "QNN_SDK_ROOT=./qairt"
+echo "HEXAGON_SDK_ROOT=./HexagonSDK"
+
+echo "ANDROID_ROOT_DIR=${ANDROID_ROOT_DIR}"
+echo "ANDROID_NDK_ROOT=${ANDROID_NDK_ROOT}"
+echo "QNX_BIN_DIR=${QNX_BIN_DIR}"
+echo "LV_TOOLS_DIR=${LV_TOOLS_DIR}"
+echo "LRH_TOOLS_DIR=${LRH_TOOLS_DIR}"
+
+echo "DEFAULT_HEXAGON_TOOLS_ROOT=${DEFAULT_HEXAGON_TOOLS_ROOT}"
+echo "DEFAULT_DSP_ARCH=${DEFAULT_DSP_ARCH}"
+ehco "DEFAULT_BUILD=${DEFAULT_BUILD}"
+echo "DEFAULT_HLOS_ARCH=${DEFAULT_HLOS_ARCH}"
+echo "DEFAULT_TOOLS_VARIANT=${DEFAULT_TOOLS_VARIANT}"
+echo "DEFAULT_NO_OURT_INC=${DEFAULT_NO_QURT_INC}"
+echo "DEFAULT_TREE=${DEFAULT_TREE}"
+echo "CMAKE_ROOT_PATH=${CMAKE_ROOT_PATH}"
+echo "DEBUGGER_UTILS=${DEBUGGER_UTILS}"
+echo "HEXAGONSDK_TELEMATICS_ROOT=$HEXAGONSDK_TELEMATICS_ROOT}"
+
+echo "AISW_SDK_ROOT=${AISW_SDK_ROOT}"
+echo "PYTHONPATH=${PYTHONPATH}"
+echo "PATH=${PATH}"
+echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}"
+echo "HEXAGON_TOOLS_DIR=${HEXAGON_TOOLS_DIR}"
+echo "SNPE_ROOT=${SNPE_ROOT}"
+
+cd LLaMAPackage
+
+make htp_v75 && make htp_aarch64
+
diff --git a/nntrainer/qnn_context.cpp b/nntrainer/qnn_context.cpp
new file mode 100644
index 000000000..b6c200792
--- /dev/null
+++ b/nntrainer/qnn_context.cpp
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2024 Debadri Samaddar <s.debadri@samsung.com>
+ *
+ * @file    cl_context.h
+ * @date    23 Feb 2024
+ * @see     https://github.com/nnstreamer/nntrainer
+ * @author  Debadri Samaddar <s.debadri@samsung.com>
+ * @author  Niket Agarwal <niket.a@samsung.com>
+ * @author  Thummala Pallavi <t.pallavi@samsung.com>
+ * @bug     No known bugs except for NYI items
+ * @brief   This file contains app context related functions and classes that
+ * manages the global configuration of the current OpenCL environment. It also
+ * creates the OpenCL command queue and context.
+ */
+
+#include <addition_layer_cl.h>
+#include <attention_kernel_strings.h>
+#include <blas_kernel_strings.h>
+#include <cl_context.h>
+#include <concat_cl.h>
+#include <fc_layer_cl.h>
+#include <reshape_cl.h>
+#include <rmsnorm_layer_cl.h>
+#include <swiglu_cl.h>
+#include <transpose_cl.h>
+
+namespace nntrainer {
+
+std::mutex cl_factory_mutex;
+
+std::once_flag global_cl_context_init_flag;
+
+static void add_default_object(ClContext &cc) {
+
+  if (FullyConnectedLayerCl::registerClKernels()) {
+    cc.registerFactory(nntrainer::createLayer<FullyConnectedLayerCl>,
+                       FullyConnectedLayerCl::type,
+                       ml::train::LayerType::LAYER_FC);
+  }
+
+  cc.registerFactory(nntrainer::createLayer<AdditionLayerCL>,
+                     AdditionLayerCL::type,
+                     ml::train::LayerType::LAYER_ADDITION);
+
+  // @todo swiglulayercl also needs to be updated.
+  cc.registerFactory(nntrainer::createLayer<SwiGLULayerCl>, SwiGLULayerCl::type,
+                     ml::train::LayerType::LAYER_SWIGLU);
+
+  if (ReshapeLayerCl::registerClKernels()) {
+    cc.registerFactory(nntrainer::createLayer<ReshapeLayerCl>,
+                       ReshapeLayerCl::type,
+                       ml::train::LayerType::LAYER_RESHAPE);
+  }
+
+  // @todo rmsnormlayercl also needs to be updated.
+  cc.registerFactory(nntrainer::createLayer<RMSNormLayerCl>,
+                     RMSNormLayerCl::type, ml::train::LayerType::LAYER_RMSNORM);
+
+  if (ConcatLayerCl::registerClKernels()) {
+    cc.registerFactory(nntrainer::createLayer<ConcatLayerCl>,
+                       ConcatLayerCl::type, ml::train::LayerType::LAYER_CONCAT);
+  }
+
+  // @todo transposlayercl also needs to be updated.
+  cc.registerFactory(nntrainer::createLayer<TransposeLayerCl>,
+                     TransposeLayerCl::type,
+                     ml::train::LayerType::LAYER_TRANSPOSE);
+}
+
+static void registerer(ClContext &cc) noexcept {
+  try {
+    cc.initBlasClKernels();
+    add_default_object(cc);
+  } catch (std::exception &e) {
+    ml_loge("cl_context: registering layers failed!!, reason: %s", e.what());
+  } catch (...) {
+    ml_loge("cl_context: registering layer failed due to unknown reason");
+  }
+};
+
+ClContext &ClContext::Global() {
+  static ClContext instance;
+
+  // initializing commandqueue and context
+  bool result = instance.clInit();
+
+  if (!result) {
+    ml_loge("cl_context: opencl command queue creation failed");
+  }
+
+  /// in g++ there is a bug that hangs up if caller throws,
+  /// so registerer is noexcept although it'd better not
+  /// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70298
+  std::call_once(global_cl_context_init_flag, registerer, std::ref(instance));
+  return instance;
+}
+
+template <typename T>
+const int ClContext::registerFactory(const FactoryType<T> factory,
+                                     const std::string &key,
+                                     const int int_key) {
+  static_assert(isSupported<T>::value,
+                "cl_context: given type is not supported for current context");
+
+  auto &index = std::get<IndexType<T>>(factory_map);
+  auto &str_map = std::get<StrIndexType<T>>(index);
+  auto &int_map = std::get<IntIndexType>(index);
+
+  std::string assigned_key = key == "" ? factory({})->getType() : key;
+
+  std::transform(assigned_key.begin(), assigned_key.end(), assigned_key.begin(),
+                 [](unsigned char c) { return std::tolower(c); });
+
+  const std::lock_guard<std::mutex> lock(cl_factory_mutex);
+  if (str_map.find(assigned_key) != str_map.end()) {
+    std::stringstream ss;
+    ss << "cl_context: cannot register factory with already taken key: " << key;
+    throw std::invalid_argument(ss.str().c_str());
+  }
+
+  if (int_key != -1 && int_map.find(int_key) != int_map.end()) {
+    std::stringstream ss;
+    ss << "cl_context: cannot register factory with already taken int key: "
+       << int_key;
+    throw std::invalid_argument(ss.str().c_str());
+  }
+
+  int assigned_int_key = int_key == -1 ? str_map.size() + 1 : int_key;
+
+  str_map[assigned_key] = factory;
+  int_map[assigned_int_key] = assigned_key;
+
+  ml_logd("cl_context: factory has registered with key: %s, int_key: %d",
+          assigned_key.c_str(), assigned_int_key);
+
+  return assigned_int_key;
+}
+
+void ClContext::initBlasClKernels() {
+  if (blas_kernels_initialized) {
+    ml_logi(
+      "ClContext: Default blas kernels already registered and initialized");
+    return;
+  }
+
+  registerClKernel(sgemv_cl_kernel_, "sgemv_cl");
+  registerClKernel(sgemv_cl_noTrans_kernel_, "sgemv_cl_noTrans");
+  registerClKernel(dot_cl_kernel_, "dot_cl");
+  registerClKernel(sgemm_cl_noTrans_kernel_, "sgemm_cl_noTrans");
+  registerClKernel(sgemm_cl_transA_kernel_, "sgemm_cl_transA");
+  registerClKernel(sgemm_cl_transB_kernel_, "sgemm_cl_transB");
+  registerClKernel(sgemm_cl_transAB_kernel_, "sgemm_cl_transAB");
+  registerClKernel(addition_cl_kernel_, "addition_cl");
+  registerClKernel(sscal_cl_kernel_, "sscal_cl");
+
+#ifdef ENABLE_FP16
+  registerClKernel(sgemv_cl_kernel_fp16_, "sgemv_cl_fp16");
+  registerClKernel(sgemv_cl_noTrans_kernel_fp16_, "sgemv_cl_noTrans_fp16");
+  registerClKernel(dot_cl_kernel_fp16_, "dot_cl_fp16");
+  registerClKernel(sgemm_cl_noTrans_kernel_fp16_, "sgemm_cl_noTrans_fp16");
+  registerClKernel(sgemm_cl_transA_kernel_fp16_, "sgemm_cl_transA_fp16");
+  registerClKernel(sgemm_cl_transB_kernel_fp16_, "sgemm_cl_transB_fp16");
+  registerClKernel(sgemm_cl_transAB_kernel_fp16_, "sgemm_cl_transAB_fp16");
+  registerClKernel(addition_cl_kernel_fp16_, "addition_cl_fp16");
+  registerClKernel(sscal_cl_kernel_fp16_, "sscal_cl_fp16");
+#endif
+  blas_kernels_initialized = true;
+}
+
+void ClContext::initAttentionClKernels() {
+  if (attention_kernels_initialized) {
+    ml_logi("ClContext: Default attention kernels already registered and "
+            "initialized");
+    return;
+  }
+
+  registerClKernel(rotary_emb_cl_kernel_, "rotary_emb_cl");
+
+#ifdef ENABLE_FP16
+  registerClKernel(rotary_emb_cl_kernel_fp16_, "rotary_emb_cl_fp16");
+#endif
+  attention_kernels_initialized = true;
+}
+
+const ClContext::SharedPtrClKernel
+ClContext::registerClKernel(std::string kernel_string,
+                            std::string kernel_name) {
+  // check if created before
+  if (ocl_kernel_map.find(kernel_name) != ocl_kernel_map.end()) {
+    ml_logi("Kernel already registered and initialized: %s",
+            kernel_name.c_str());
+    return ocl_kernel_map[kernel_name];
+  }
+
+  // creating shared_ptr for kernel object
+  SharedPtrClKernel kernelPtr = std::make_shared<opencl::Kernel>();
+  if (!clCreateKernel(kernel_string, kernel_name, kernelPtr)) {
+    ml_loge("Failed to register kernel %s", kernel_name.c_str());
+    return nullptr;
+  }
+  // add to map
+  ocl_kernel_map.emplace(kernel_name, kernelPtr);
+  return ocl_kernel_map[kernel_name];
+}
+
+bool ClContext::clCreateKernel(std::string &kernel_string,
+                               std::string &kernel_name,
+                               const SharedPtrClKernel &kernel_ptr_) {
+
+  ml_logi("Kernel initializing: %s", kernel_name.c_str());
+
+  bool result = false;
+
+  do {
+    opencl::Program program;
+
+    // reading binary
+    std::ifstream fs(opencl::Program::DEFAULT_KERNEL_PATH + "/" + kernel_name +
+                       "_kernel.bin",
+                     std::ios::binary | std::ios::in);
+
+    if (fs.good()) {
+      fs.seekg(0, std::ios::end);
+      size_t binary_size = fs.tellg();
+      fs.seekg(0, std::ios::beg);
+
+      unsigned char chunk[binary_size];
+      fs.read((char *)chunk, binary_size);
+
+      result = program.CreateCLProgramWithBinary(
+        context_inst_.GetContext(), context_inst_.GetDeviceId(), binary_size,
+        chunk,
+        opencl::Program::DEFAULT_KERNEL_PATH + "/" + kernel_name +
+          "_kernel.bin",
+        "");
+    } else {
+      result =
+        program.CreateCLProgram(context_inst_.GetContext(),
+                                context_inst_.GetDeviceId(), kernel_string, "");
+    }
+
+    if (!result) {
+      break;
+    }
+
+    result = kernel_ptr_->CreateKernelFromProgram(program, kernel_name);
+    if (!result) {
+      break;
+    }
+
+  } while (false);
+
+  return result;
+}
+
+/**
+ * @copydoc const int ClContext::registerFactory
+ */
+template const int ClContext::registerFactory<nntrainer::Layer>(
+  const FactoryType<nntrainer::Layer> factory, const std::string &key,
+  const int int_key);
+
+} // namespace nntrainer
diff --git a/nntrainer/qnn_context.h b/nntrainer/qnn_context.h
new file mode 100644
index 000000000..025365546
--- /dev/null
+++ b/nntrainer/qnn_context.h
@@ -0,0 +1,299 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2024 Debadri Samaddar <s.debadri@samsung.com>
+ *
+ * @file    cl_context.h
+ * @date    23 Feb 2024
+ * @see     https://github.com/nnstreamer/nntrainer
+ * @author  Debadri Samaddar <s.debadri@samsung.com>
+ * @bug     No known bugs except for NYI items
+ * @brief   This file contains app context related functions and classes that
+ * manages the global configuration of the current OpenCL environment. It also
+ * creates the OpenCL command queue and context.
+ */
+
+#ifndef __CL_CONTEXT_H__
+#define __CL_CONTEXT_H__
+
+#include <algorithm>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <type_traits>
+#include <unordered_map>
+#include <vector>
+
+#include <layer.h>
+#include <layer_devel.h>
+
+#include <opencl_command_queue_manager.h>
+#include <opencl_context_manager.h>
+#include <opencl_kernel.h>
+#include <opencl_program.h>
+
+#include <nntrainer_log.h>
+
+namespace nntrainer {
+
+extern std::mutex cl_factory_mutex;
+
+/**
+ * @class ClContext contains user-dependent configuration for OpenCL support
+ * @brief OpenCL support for app context
+ */
+
+class ClContext {
+
+public:
+  using PropsType = std::vector<std::string>;
+
+  template <typename T> using PtrType = std::unique_ptr<T>;
+
+  using SharedPtrClKernel = std::shared_ptr<opencl::Kernel>;
+
+  template <typename T>
+  using FactoryType = std::function<PtrType<T>(const PropsType &)>;
+
+  template <typename T>
+  using PtrFactoryType = PtrType<T> (*)(const PropsType &);
+
+  template <typename T>
+  using StrIndexType = std::unordered_map<std::string, FactoryType<T>>;
+
+  /** integer to string key */
+  using IntIndexType = std::unordered_map<int, std::string>;
+
+  /** string to kernel pointer map*/
+  using OclKernelMap = std::unordered_map<std::string, SharedPtrClKernel>;
+
+  /**
+   * This type contains tuple of
+   * 1) integer -> string index
+   * 2) string -> factory index
+   */
+  template <typename T>
+  using IndexType = std::tuple<StrIndexType<T>, IntIndexType>;
+
+  template <typename... Ts> using FactoryMap = std::tuple<IndexType<Ts>...>;
+
+  // getting static instance of commandqueue and opencl context
+  opencl::CommandQueueManager &command_queue_inst_ =
+    opencl::CommandQueueManager::GetInstance();
+
+  opencl::ContextManager &context_inst_ = opencl::ContextManager::GetInstance();
+
+  /**
+   * @brief   Default constructor
+   */
+  ClContext() = default;
+
+  /**
+   *
+   * @brief Get Global cl context.
+   *
+   * @return ClContext&
+   */
+  static ClContext &Global();
+
+  /**
+   * @brief Factory register function, use this function to register custom
+   * object
+   *
+   * @tparam T object to create. Currently Layer is supported
+   * @param factory factory function that creates std::unique_ptr<T>
+   * @param key key to access the factory, if key is empty, try to find key by
+   * calling factory({})->getType();
+   * @param int_key key to access the factory by integer, if it is -1(default),
+   * the function automatically unsigned the key and return
+   * @return const int unique integer value to access the current factory
+   * @throw invalid argument when key and/or int_key is already taken
+   */
+  template <typename T>
+  const int registerFactory(const PtrFactoryType<T> factory,
+                            const std::string &key = "",
+                            const int int_key = -1) {
+    FactoryType<T> f = factory;
+    return registerFactory(f, key, int_key);
+  }
+
+  /**
+   * @brief Factory register function, use this function to register custom
+   * object
+   *
+   * @tparam T object to create. Currently Layer is supported
+   * @param factory factory function that creates std::unique_ptr<T>
+   * @param key key to access the factory, if key is empty, try to find key by
+   * calling factory({})->getType();
+   * @param int_key key to access the factory by integer, if it is -1(default),
+   * the function automatically unsigned the key and return
+   * @return const int unique integer value to access the current factory
+   * @throw invalid argument when key and/or int_key is already taken
+   */
+  template <typename T>
+  const int registerFactory(const FactoryType<T> factory,
+                            const std::string &key = "",
+                            const int int_key = -1);
+
+  /**
+   * @brief Create an Object from the integer key
+   *
+   * @tparam T Type of Object, currently, Only Layer is supported
+   * @param int_key integer key
+   * @param props property
+   * @return PtrType<T> unique pointer to the object
+   */
+  template <typename T>
+  PtrType<T> createObject(const int int_key,
+                          const PropsType &props = {}) const {
+    static_assert(isSupported<T>::value,
+                  "given type is not supported for current app context");
+    auto &index = std::get<IndexType<T>>(factory_map);
+    auto &int_map = std::get<IntIndexType>(index);
+
+    const auto &entry = int_map.find(int_key);
+
+    if (entry == int_map.end()) {
+      std::stringstream ss;
+      ss << "Int Key is not found for the object. Key: " << int_key;
+      throw exception::not_supported(ss.str().c_str());
+    }
+
+    // entry is an object of int_map which is an unordered_map<int, std::string>
+    return createObject<T>(entry->second, props);
+  }
+
+  /**
+   * @brief Create an Object from the string key
+   *
+   * @tparam T Type of object, currently, only Layer is supported
+   * @param key integer key
+   * @param props property
+   * @return PtrType<T> unique pointer to the object
+   */
+  template <typename T>
+  PtrType<T> createObject(const std::string &key,
+                          const PropsType &props = {}) const {
+    auto &index = std::get<IndexType<T>>(factory_map);
+    auto &str_map = std::get<StrIndexType<T>>(index);
+
+    std::string lower_key;
+    lower_key.resize(key.size());
+
+    std::transform(key.begin(), key.end(), lower_key.begin(),
+                   [](unsigned char c) { return std::tolower(c); });
+
+    const auto &entry = str_map.find(lower_key);
+
+    if (entry == str_map.end()) {
+      std::stringstream ss;
+      ss << "Key is not found for the object. Key: " << lower_key;
+      throw exception::not_supported(ss.str().c_str());
+    }
+
+    // entry -> object of str_map -> unordered_map<std::string, FactoryType<T>>
+    return entry->second(props);
+  }
+
+  /**
+   * @brief register or return already present OpenCl kernel pointer
+   * @param kernel_string kernel implementation string
+   * @param kernel_name kernel name
+   * @return std::shared_ptr<opencl::Kernel>
+   */
+  const SharedPtrClKernel registerClKernel(std::string kernel_string,
+                                           std::string kernel_name);
+
+  /**
+   * @brief Initialize and register all blas OpenCl kernels
+   */
+  void initBlasClKernels();
+
+  /**
+   * @brief Initialize and register all attention OpenCl kernels
+   */
+  void initAttentionClKernels();
+
+  /**
+   * @brief destructor to release opencl commandQueue
+   */
+  ~ClContext() {
+    if (cl_initialized) {
+      command_queue_inst_.ReleaseCommandQueue();
+      // getContext() is called by clCreateKernel
+      context_inst_.ReleaseContext();
+    }
+  };
+
+private:
+  // flag to check opencl commandqueue and context inititalization
+  bool cl_initialized = false;
+
+  // flag to check default blas kernels registered or not
+  bool blas_kernels_initialized = false;
+
+  // flag to check default attention kernels registered or not
+  bool attention_kernels_initialized = false;
+
+  FactoryMap<nntrainer::Layer> factory_map;
+
+  template <typename Args, typename T> struct isSupportedHelper;
+
+  // global map to store initialized opencl::Kernel
+  inline static OclKernelMap ocl_kernel_map;
+
+  /**
+   * @brief supportHelper to check if given type is supported within cl context
+   */
+  template <typename T, typename... Args>
+  struct isSupportedHelper<T, ClContext::FactoryMap<Args...>> {
+    static constexpr bool value =
+      (std::is_same_v<std::decay_t<T>, std::decay_t<Args>> || ...);
+  };
+
+  /**
+   * @brief supportHelper to check if given type is supported within cl context
+   */
+  template <typename T>
+  struct isSupported : isSupportedHelper<T, decltype(factory_map)> {};
+
+  /**
+   * @brief Initialize opencl commandqueue and context
+   * @return true if OpenCL context and command queue creation is successful,
+   * false otherwise
+   */
+
+  bool clInit() {
+    // if commandqueue already created
+    if (cl_initialized)
+      return true;
+
+    // getContext() called inside createCommandQueue which creates clContext
+    bool result = command_queue_inst_.CreateCommandQueue();
+    cl_initialized = result;
+    return cl_initialized;
+  };
+
+  /**
+   * @brief create OpenCl kernel
+   * @param kernel_string reference of implementation string
+   * @param kernel_name reference of kernel_name
+   * @param kernel_ptr_ reference of shared_ptr of Kernel
+   * @return true if successful, false otherwise
+   */
+  bool clCreateKernel(std::string &kernel_string, std::string &kernel_name,
+                      const SharedPtrClKernel &kernel_ptr_);
+};
+
+/**
+ * @copydoc const int ClContext::registerFactory
+ */
+extern template const int ClContext::registerFactory<nntrainer::Layer>(
+  const FactoryType<nntrainer::Layer> factory, const std::string &key,
+  const int int_key);
+
+} // namespace nntrainer
+
+#endif /* __CL_CONTEXT_H__ */