From 53d3d1458eb639e7965ae4a1e19f254f8ff450e7 Mon Sep 17 00:00:00 2001 From: "Alex G. Lopez" Date: Mon, 22 Jul 2024 10:13:48 +0200 Subject: [PATCH] Naming review and build simplification (#28) * more generic names in line with original two way paper * more succinct cmake presets * add simpler install docs * simplify naming and two way type choice * stack copy is better than mem references * update docs to be more specific to new build types --- CMakePresets.json | 49 ++++++++-------- INSTALL.md | 97 +++++++++++++------------------- Makefile | 7 ++- str_view/str_view.c | 132 +++++++++++++++++--------------------------- 4 files changed, 120 insertions(+), 165 deletions(-) diff --git a/CMakePresets.json b/CMakePresets.json index a44e416..c750a45 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -7,59 +7,60 @@ }, "configurePresets": [ { - "name": "gcc-deb", + "name": "default-deb", "binaryDir": "${sourceDir}/build", "installDir": "${sourceDir}/install", + "hidden": true, "cacheVariables": { "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", "CMAKE_C_STANDARD": "11", - "CMAKE_C_COMPILER": "gcc", "CMAKE_BUILD_TYPE": "Debug", "CMAKE_RUNTIME_OUTPUT_DIRECTORY": "${sourceDir}/build/deb", - "CMAKE_C_FLAGS": - "-g3 -Wall -Wextra -Wfloat-equal -Wtype-limits -Wpointer-arith -Wshadow -Winit-self -fno-diagnostics-show-option -Wno-nonnull-compare -Wno-pointer-bool-conversion" + "CMAKE_C_FLAGS": "-g3 -Wall -Wextra -Wfloat-equal -Wtype-limits -Wpointer-arith -Wshadow -Winit-self -fno-diagnostics-show-option" } }, { - "name": "gcc-rel", + "name": "default-rel", "binaryDir": "${sourceDir}/build", "installDir": "${sourceDir}/install", "cacheVariables": { "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", "CMAKE_C_STANDARD": "11", - "CMAKE_C_COMPILER": "gcc", "CMAKE_BUILD_TYPE": "Release", "CMAKE_RUNTIME_OUTPUT_DIRECTORY": "${sourceDir}/build/rel", - "CMAKE_C_FLAGS": - "-Wall -Wextra -Wfloat-equal -Wtype-limits -Wpointer-arith -Wshadow -Winit-self -fno-diagnostics-show-option -Wno-nonnull-compare -Wno-pointer-bool-conversion" + "CMAKE_C_FLAGS": "-Wall -Wextra -Wfloat-equal -Wtype-limits -Wpointer-arith -Wshadow -Winit-self -fno-diagnostics-show-option" + } + }, + { + "name": "gcc-deb", + "inherits": "default-deb", + "cacheVariables": { + "CMAKE_C_COMPILER": "gcc", + "CMAKE_C_FLAGS": "-g3 -Wall -Wextra -Wfloat-equal -Wtype-limits -Wpointer-arith -Wshadow -Winit-self -fno-diagnostics-show-option -Wno-nonnull-compare -Wno-pointer-bool-conversion" + } + }, + { + "name": "gcc-rel", + "inherits": "default-rel", + "cacheVariables": { + "CMAKE_C_COMPILER": "gcc", + "CMAKE_C_FLAGS": "-Wall -Wextra -Wfloat-equal -Wtype-limits -Wpointer-arith -Wshadow -Winit-self -fno-diagnostics-show-option -Wno-nonnull-compare -Wno-pointer-bool-conversion" } }, { "name": "clang-deb", - "binaryDir": "${sourceDir}/build", - "installDir": "${sourceDir}/install", + "inherits": "default-deb", "cacheVariables": { - "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", - "CMAKE_C_STANDARD": "11", "CMAKE_C_COMPILER": "clang", - "CMAKE_BUILD_TYPE": "Debug", - "CMAKE_RUNTIME_OUTPUT_DIRECTORY": "${sourceDir}/build/deb", - "CMAKE_C_FLAGS": - "-g3 -Wall -Wextra -Wfloat-equal -Wtype-limits -Wpointer-arith -Wshadow -Winit-self -fno-diagnostics-show-option -Wno-pointer-bool-conversion" + "CMAKE_C_FLAGS": "-g3 -Wall -Wextra -Wfloat-equal -Wtype-limits -Wpointer-arith -Wshadow -Winit-self -fno-diagnostics-show-option -Wno-pointer-bool-conversion" } }, { "name": "clang-rel", - "binaryDir": "${sourceDir}/build", - "installDir": "${sourceDir}/install", + "inherits": "default-rel", "cacheVariables": { - "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", - "CMAKE_C_STANDARD": "11", "CMAKE_C_COMPILER": "clang", - "CMAKE_BUILD_TYPE": "Release", - "CMAKE_RUNTIME_OUTPUT_DIRECTORY": "${sourceDir}/build/rel", - "CMAKE_C_FLAGS": - "-Wall -Wextra -Wfloat-equal -Wtype-limits -Wpointer-arith -Wshadow -Winit-self -fno-diagnostics-show-option -Wno-pointer-bool-conversion" + "CMAKE_C_FLAGS": "-Wall -Wextra -Wfloat-equal -Wtype-limits -Wpointer-arith -Wshadow -Winit-self -fno-diagnostics-show-option -Wno-pointer-bool-conversion" } } ] diff --git a/INSTALL.md b/INSTALL.md index dc06ee6..ad486ce 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -2,29 +2,19 @@ ## Quick Start -Use the provided defaults, build the library, install the library, include the library. +1. Use the provided defaults +2. Build the library +3. Install the library +4. Include the library. -### Build the library - -Use GCC to compile the library. - -```zsh -make gcc-rel [OPTIONAL INSTALL PATH] -``` - -Use Clang to compile the library. +To complete steps 1-3 with one command try the following if your system supports `make`. ```zsh -make clang-rel [OPTIONAL INSTALL PATH] +make str_view [OPTIONAL/INSTALL/PATH] ``` -### Install the library +This will use CMake and your default compiler to build and install the library in release mode. By default, this library does not touch your system paths and it is installed in the `install/` directory of this folder. This is best for testing the library out while pointing `cmake` to the install location. Then, deleting the `install/` folder deletes any trace of this library from your system. -By default, this library does not touch your system paths and it is installed in the `install/` directory of this folder. This is best for testing the library out while pointing `cmake` to the install location. Then, deleting the `install/` folder deletes any trace of this library from your system. - -```zsh -make install -``` Then, in your `CMakeLists.txt`: ```cmake @@ -40,8 +30,7 @@ find_package(str_view) specify that this library shall be installed to a location CMake recognizes by default. For example, my preferred location is as follows: ```zsh -make gcc-rel ~/.local -make install +make str_view ~/.local ``` Then the installation looks like this. @@ -83,6 +72,34 @@ The C code. ## Alternative Builds +You may wish to use a different compiler and toolchain than what your system default specifies. Review the `CMakePrests.json` file for different compilers. + +```zsh +make gcc-rel [OPTIONAL/INSTALL/PATH] +make install +``` + +Use Clang to compile the library. + +```zsh +make clang-rel [OPTIONAL/INSTALL/PATH] +make install +``` + +## Without Make + +If your system does not support Makefiles or the `make` command here are the cmake commands one can run that will allow another generator such as `Ninja` to complete building and installation. + +```zsh +# Configure the project cmake files. +# Replace this preset with your own if you'd like. +cmake --preset=clang-rel -DCMAKE_INSTALL_PREFIX=[DESIRED/INSTALL/LOCATION] +cmake --build build +cmake --build build --target install +``` + +## User Presets + If you do not like the default presets, create a `CMakeUserPresets.json` in this folder and place your preferred configuration in that file. Here is my preferred configuration to get you started. I like to use a newer gcc version than the default presets specify. ```json @@ -108,8 +125,7 @@ If you do not like the default presets, create a `CMakeUserPresets.json` in this "CMAKE_C_COMPILER": "gcc-12", "CMAKE_BUILD_TYPE": "Debug", "CMAKE_RUNTIME_OUTPUT_DIRECTORY": "${sourceDir}/build/deb", - "CMAKE_C_FLAGS": - "-g3 -Wall -Wextra -Wfloat-equal -Wtype-limits -Wpointer-arith -Wshadow -Winit-self -fno-diagnostics-show-option -Wno-nonnull-compare -Wno-pointer-bool-conversion" + "CMAKE_C_FLAGS": "-g3 -Wall -Wextra -Wfloat-equal -Wtype-limits -Wpointer-arith -Wshadow -Winit-self -fno-diagnostics-show-option -Wno-nonnull-compare -Wno-pointer-bool-conversion" } }, { @@ -126,44 +142,7 @@ If you do not like the default presets, create a `CMakeUserPresets.json` in this "CMAKE_C_COMPILER": "gcc-12", "CMAKE_BUILD_TYPE": "Release", "CMAKE_RUNTIME_OUTPUT_DIRECTORY": "${sourceDir}/build/rel", - "CMAKE_C_FLAGS": - "-Wall -Wextra -Wfloat-equal -Wtype-limits -Wpointer-arith -Wshadow -Winit-self -fno-diagnostics-show-option -Wno-nonnull-compare -Wno-pointer-bool-conversion" - } - }, - { - "name": "cdeb", - "displayName": "Ninja clang Debug", - "description": "Generated by Ninja with clang base debug preset.", - "generator": "Ninja", - "inherits": [ - "clang-deb" - ], - "cacheVariables": { - "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", - "CMAKE_C_STANDARD": "11", - "CMAKE_C_COMPILER": "clang", - "CMAKE_BUILD_TYPE": "Debug", - "CMAKE_RUNTIME_OUTPUT_DIRECTORY": "${sourceDir}/build/deb", - "CMAKE_C_FLAGS": - "-g3 -Wall -Wextra -Wfloat-equal -Wtype-limits -Wpointer-arith -Wshadow -Winit-self -fno-diagnostics-show-option -Wno-pointer-bool-conversion" - } - }, - { - "name": "crel", - "displayName": "Ninja clang Release", - "description": "Generated by Ninja with clang base release preset.", - "generator": "Ninja", - "inherits": [ - "clang-rel" - ], - "cacheVariables": { - "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", - "CMAKE_C_STANDARD": "11", - "CMAKE_C_COMPILER": "clang", - "CMAKE_BUILD_TYPE": "Release", - "CMAKE_RUNTIME_OUTPUT_DIRECTORY": "${sourceDir}/build/rel", - "CMAKE_C_FLAGS": - "-Wall -Wextra -Wfloat-equal -Wtype-limits -Wpointer-arith -Wshadow -Winit-self -fno-diagnostics-show-option -Wno-pointer-bool-conversion" + "CMAKE_C_FLAGS": "-Wall -Wextra -Wfloat-equal -Wtype-limits -Wpointer-arith -Wshadow -Winit-self -fno-diagnostics-show-option -Wno-nonnull-compare -Wno-pointer-bool-conversion" } } ] diff --git a/Makefile b/Makefile index ab7e6c7..6034510 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: default install build gcc-rel gcc-deb clang-rel clang-deb tests samples gcc-all-deb gcc-all-rel clang-all-deb clang-all-rel test-deb test-rel clean +.PHONY: default install build gcc-rel gcc-deb clang-rel clang-deb tests samples gcc-all-deb gcc-all-rel clang-all-deb clang-all-rel str_view test-deb test-rel clean MAKE := $(MAKE) MAKEFLAGS += --no-print-directory @@ -16,6 +16,11 @@ default: build build: cmake --build $(BUILD_DIR) $(JOBS) +str_view: + cmake --preset=default-rel -DCMAKE_INSTALL_PREFIX=$(PREFIX) + $(MAKE) build + cmake --build $(BUILD_DIR) --target install $(JOBS) + install: cmake --build $(BUILD_DIR) --target install $(JOBS) diff --git a/str_view/str_view.c b/str_view/str_view.c index 8cd7060..c37d3ed 100644 --- a/str_view/str_view.c +++ b/str_view/str_view.c @@ -17,7 +17,7 @@ struct sv_factorization { /* Position in the needle at which (local period = period). */ - ssize_t start_critical_pos; + ssize_t critical_pos; /* A distance in the needle such that two letters always coincide. */ ssize_t period_dist; }; @@ -35,25 +35,22 @@ static size_t sv_before_rfind(str_view, str_view); static size_t sv_min(size_t, size_t); static sv_threeway_cmp sv_char_cmp(char, char); static ssize_t sv_ssizet_max(ssize_t, ssize_t); -static size_t sv_two_way_memoization(ssize_t hay_sz, char const[static hay_sz], - ssize_t needle_sz, - char const[static needle_sz], ssize_t, - ssize_t); -static size_t sv_two_way_normal(ssize_t hay_sz, char const[static hay_sz], - ssize_t needle_sz, char const[static needle_sz], - ssize_t, ssize_t); -static size_t sv_rtwo_way_memoization(ssize_t hay_sz, char const[static hay_sz], - ssize_t needle_sz, - char const[static needle_sz], ssize_t, - ssize_t); -static size_t sv_rtwo_way_normal(ssize_t hay_sz, char const[static hay_sz], - ssize_t needle_sz, - char const[static needle_sz], ssize_t, - ssize_t); -static size_t sv_two_way(ssize_t hay_sz, char const[static hay_sz], - ssize_t needle_sz, char const[static needle_sz]); -static size_t sv_rtwo_way(ssize_t hay_sz, char const[static hay_sz], +static size_t sv_pos_memo(ssize_t hay_sz, char const[static hay_sz], + ssize_t needle_sz, char const[static needle_sz], + ssize_t, ssize_t); +static size_t sv_pos_normal(ssize_t hay_sz, char const[static hay_sz], + ssize_t needle_sz, char const[static needle_sz], + ssize_t, ssize_t); +static size_t sv_rpos_memo(ssize_t hay_sz, char const[static hay_sz], + ssize_t needle_sz, char const[static needle_sz], + ssize_t, ssize_t); +static size_t sv_rpos_normal(ssize_t hay_sz, char const[static hay_sz], + ssize_t needle_sz, char const[static needle_sz], + ssize_t, ssize_t); +static size_t sv_tw_match(ssize_t hay_sz, char const[static hay_sz], ssize_t needle_sz, char const[static needle_sz]); +static size_t sv_tw_rmatch(ssize_t hay_sz, char const[static hay_sz], + ssize_t needle_sz, char const[static needle_sz]); static struct sv_factorization sv_maximal_suffix(ssize_t needle_sz, char const[static needle_sz]); static struct sv_factorization @@ -891,7 +888,7 @@ sv_strnstrn(ssize_t const hay_sz, char const hay[static hay_sz], return sv_fourbyte_strnstrn(hay_sz, (unsigned char *)hay, 4, (unsigned char *)needle); } - return sv_two_way(hay_sz, hay, needle_sz, needle); + return sv_tw_match(hay_sz, hay, needle_sz, needle); } /* For now reverse logic for backwards searches has been separated into @@ -926,7 +923,7 @@ sv_rstrnstrn(ssize_t const hay_sz, char const hay[static hay_sz], return sv_rfourbyte_strnstrn(hay_sz, (unsigned char *)hay, 4, (unsigned char *)needle); } - return sv_rtwo_way(hay_sz, hay, needle_sz, needle); + return sv_tw_rmatch(hay_sz, hay, needle_sz, needle); } /*============== Post-Precomputation Two-Way Search =================*/ @@ -957,44 +954,29 @@ sv_rstrnstrn(ssize_t const hay_sz, char const hay[static hay_sz], an entire string. Returns the position at which needle begins if found and the size of the hay stack if not found. */ static inline size_t -sv_two_way(ssize_t const hay_sz, char const hay[static hay_sz], - ssize_t const needle_sz, char const needle[static needle_sz]) -{ - /* ssize_t is used throughout. Is this the best choice? The two-way - algo relies on negative numbers. This fits with size_t capabilities - but does not feel right. Plain old signed may be better. */ - ssize_t critical_pos = 0; - ssize_t period_dist = 0; +sv_tw_match(ssize_t const hay_sz, char const hay[static hay_sz], + ssize_t const needle_sz, char const needle[static needle_sz]) +{ /* Preprocessing to get critical position and period distance. */ struct sv_factorization const s = sv_maximal_suffix(needle_sz, needle); struct sv_factorization const r = sv_maximal_suffix_rev(needle_sz, needle); - if (s.start_critical_pos > r.start_critical_pos) - { - critical_pos = s.start_critical_pos; - period_dist = s.period_dist; - } - else - { - critical_pos = r.start_critical_pos; - period_dist = r.period_dist; - } + struct sv_factorization const w = (s.critical_pos > r.critical_pos) ? s : r; /* Determine if memoization is available due to found border/overlap. */ - if (!memcmp(needle, needle + period_dist, critical_pos + 1)) + if (!memcmp(needle, needle + w.period_dist, w.critical_pos + 1)) { - return sv_two_way_memoization(hay_sz, hay, needle_sz, needle, - period_dist, critical_pos); + return sv_pos_memo(hay_sz, hay, needle_sz, needle, w.period_dist, + w.critical_pos); } - return sv_two_way_normal(hay_sz, hay, needle_sz, needle, period_dist, - critical_pos); + return sv_pos_normal(hay_sz, hay, needle_sz, needle, w.period_dist, + w.critical_pos); } /* Two Way string matching algorithm adapted from ESMAJ http://igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260 */ static size_t -sv_two_way_memoization(ssize_t const hay_sz, char const hay[static hay_sz], - ssize_t const needle_sz, - char const needle[static needle_sz], - ssize_t const period_dist, ssize_t const critical_pos) +sv_pos_memo(ssize_t const hay_sz, char const hay[static hay_sz], + ssize_t const needle_sz, char const needle[static needle_sz], + ssize_t const period_dist, ssize_t const critical_pos) { ssize_t lpos = 0; ssize_t rpos = 0; @@ -1034,9 +1016,9 @@ sv_two_way_memoization(ssize_t const hay_sz, char const hay[static hay_sz], /* Two Way string matching algorithm adapted from ESMAJ http://igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260 */ static size_t -sv_two_way_normal(ssize_t const hay_sz, char const hay[static hay_sz], - ssize_t const needle_sz, char const needle[static needle_sz], - ssize_t period_dist, ssize_t const critical_pos) +sv_pos_normal(ssize_t const hay_sz, char const hay[static hay_sz], + ssize_t const needle_sz, char const needle[static needle_sz], + ssize_t period_dist, ssize_t const critical_pos) { period_dist = sv_ssizet_max(critical_pos + 1, needle_sz - critical_pos - 1) + 1; @@ -1109,7 +1091,7 @@ sv_maximal_suffix(ssize_t const needle_sz, char const needle[static needle_sz]) break; } } - return (struct sv_factorization){.start_critical_pos = suff_pos, + return (struct sv_factorization){.critical_pos = suff_pos, .period_dist = period}; } @@ -1153,7 +1135,7 @@ sv_maximal_suffix_rev(ssize_t const needle_sz, break; } } - return (struct sv_factorization){.start_critical_pos = suff_pos, + return (struct sv_factorization){.critical_pos = suff_pos, .period_dist = period}; } @@ -1196,38 +1178,26 @@ sv_maximal_suffix_rev(ssize_t const needle_sz, /* Searches a string from right to left with a two-way algorithm. Returns the position of the start of the strig if found and string size if not. */ static inline size_t -sv_rtwo_way(ssize_t const hay_sz, char const hay[static hay_sz], - ssize_t const needle_sz, char const needle[static needle_sz]) +sv_tw_rmatch(ssize_t const hay_sz, char const hay[static hay_sz], + ssize_t const needle_sz, char const needle[static needle_sz]) { - ssize_t critical_pos = 0; - ssize_t period_dist = 0; struct sv_factorization const s = sv_rmaximal_suffix(needle_sz, needle); struct sv_factorization const r = sv_rmaximal_suffix_rev(needle_sz, needle); - if (s.start_critical_pos > r.start_critical_pos) - { - critical_pos = s.start_critical_pos; - period_dist = s.period_dist; - } - else - { - critical_pos = r.start_critical_pos; - period_dist = r.period_dist; - } + struct sv_factorization const w = (s.critical_pos > r.critical_pos) ? s : r; if (!sv_rmemcmp(needle + needle_sz - 1, - needle + needle_sz - period_dist - 1, critical_pos + 1)) + needle + needle_sz - w.period_dist - 1, w.critical_pos + 1)) { - return sv_rtwo_way_memoization(hay_sz, hay, needle_sz, needle, - period_dist, critical_pos); + return sv_rpos_memo(hay_sz, hay, needle_sz, needle, w.period_dist, + w.critical_pos); } - return sv_rtwo_way_normal(hay_sz, hay, needle_sz, needle, period_dist, - critical_pos); + return sv_rpos_normal(hay_sz, hay, needle_sz, needle, w.period_dist, + w.critical_pos); } static size_t -sv_rtwo_way_memoization(ssize_t const hay_sz, char const hay[static hay_sz], - ssize_t const needle_sz, - char const needle[static needle_sz], - ssize_t const period_dist, ssize_t const critical_pos) +sv_rpos_memo(ssize_t const hay_sz, char const hay[static hay_sz], + ssize_t const needle_sz, char const needle[static needle_sz], + ssize_t const period_dist, ssize_t const critical_pos) { ssize_t lpos = 0; ssize_t rpos = 0; @@ -1268,9 +1238,9 @@ sv_rtwo_way_memoization(ssize_t const hay_sz, char const hay[static hay_sz], } static size_t -sv_rtwo_way_normal(ssize_t const hay_sz, char const hay[static hay_sz], - ssize_t const needle_sz, char const needle[static needle_sz], - ssize_t period_dist, ssize_t const critical_pos) +sv_rpos_normal(ssize_t const hay_sz, char const hay[static hay_sz], + ssize_t const needle_sz, char const needle[static needle_sz], + ssize_t period_dist, ssize_t const critical_pos) { period_dist = sv_ssizet_max(critical_pos + 1, needle_sz - critical_pos - 1) + 1; @@ -1346,7 +1316,7 @@ sv_rmaximal_suffix(ssize_t const needle_sz, char const needle[static needle_sz]) break; } } - return (struct sv_factorization){.start_critical_pos = suff_pos, + return (struct sv_factorization){.critical_pos = suff_pos, .period_dist = period}; } @@ -1388,7 +1358,7 @@ sv_rmaximal_suffix_rev(ssize_t const needle_sz, break; } } - return (struct sv_factorization){.start_critical_pos = suff_pos, + return (struct sv_factorization){.critical_pos = suff_pos, .period_dist = period}; }