diff --git a/include/proj/metadata.hpp b/include/proj/metadata.hpp index 42b3925490..c894ccc1ac 100644 --- a/include/proj/metadata.hpp +++ b/include/proj/metadata.hpp @@ -397,11 +397,15 @@ class PROJ_GCC_DLL Identifier : public util::BaseObject, PROJ_DLL static bool isEquivalentName(const char *a, const char *b) noexcept; + PROJ_DLL static bool + isEquivalentName(const char *a, const char *b, + bool biggerDifferencesAllowed) noexcept; PROJ_PRIVATE : //! @cond Doxygen_Suppress PROJ_INTERNAL static std::string - canonicalizeName(const std::string &str); + canonicalizeName(const std::string &str, + bool biggerDifferencesAllowed = true); PROJ_INTERNAL void _exportToWKT(io::WKTFormatter *formatter) const override; // throw(io::FormattingException) diff --git a/scripts/reference_exported_symbols.txt b/scripts/reference_exported_symbols.txt index 36ff952f86..901ac4525b 100644 --- a/scripts/reference_exported_symbols.txt +++ b/scripts/reference_exported_symbols.txt @@ -523,6 +523,7 @@ osgeo::proj::metadata::Identifier::description() const osgeo::proj::metadata::Identifier::~Identifier() osgeo::proj::metadata::Identifier::Identifier(osgeo::proj::metadata::Identifier const&) osgeo::proj::metadata::Identifier::isEquivalentName(char const*, char const*) +osgeo::proj::metadata::Identifier::isEquivalentName(char const*, char const*, bool) osgeo::proj::metadata::Identifier::uri() const osgeo::proj::metadata::Identifier::version() const osgeo::proj::metadata::PositionalAccuracy::create(std::string const&) diff --git a/src/apps/projinfo.cpp b/src/apps/projinfo.cpp index 17f213dc33..debaea6352 100644 --- a/src/apps/projinfo.cpp +++ b/src/apps/projinfo.cpp @@ -370,6 +370,7 @@ static BaseObjectNNPtr buildObject( limitResultCount); if (res.size() == 1) { obj = res.front().as_nullable(); + break; } else { for (const auto &l_obj : res) { if (Identifier::isEquivalentName( diff --git a/src/iso19111/factory.cpp b/src/iso19111/factory.cpp index 56dd92f1b1..6bf33982d5 100644 --- a/src/iso19111/factory.cpp +++ b/src/iso19111/factory.cpp @@ -9171,7 +9171,8 @@ AuthorityFactory::createObjectsFromNameEx( auto sqlRes = d->run(sql, params); bool isFirst = true; bool firstIsDeprecated = false; - bool foundExactMatch = false; + size_t countExactMatch = 0; + size_t countExactMatchOnAlias = 0; std::size_t hashCodeFirstMatch = 0; for (const auto &row : sqlRes) { const auto &name = row[3]; @@ -9262,9 +9263,12 @@ AuthorityFactory::createObjectsFromNameEx( throw std::runtime_error("Unsupported table_name"); }; const auto obj = getObject(table_name, code); - if (metadata::Identifier::canonicalizeName(obj->nameStr()) == - canonicalizedSearchedName) { - foundExactMatch = true; + if (metadata::Identifier::isEquivalentName( + obj->nameStr().c_str(), searchedName.c_str(), false)) { + countExactMatch++; + } else if (metadata::Identifier::isEquivalentName( + name.c_str(), searchedName.c_str(), false)) { + countExactMatchOnAlias++; } const auto objPtr = obj.get(); @@ -9280,14 +9284,21 @@ AuthorityFactory::createObjectsFromNameEx( } } - // If we found a name that is an exact match, and all objects have the - // same type, and we are not in approximate mode, only keep the - // object(s) with the exact name match. - if (foundExactMatch && hashCodeFirstMatch != 0 && !approximateMatch) { + // If we found several objects that are an exact match, and all objects + // have the same type, and we are not in approximate mode, only keep the + // objects with the exact name match. + if ((countExactMatch + countExactMatchOnAlias) >= 1 && + hashCodeFirstMatch != 0 && !approximateMatch) { std::list resTmp; + bool biggerDifferencesAllowed = (countExactMatch == 0); for (const auto &pair : res) { - if (metadata::Identifier::canonicalizeName( - pair.first->nameStr()) == canonicalizedSearchedName) { + if (metadata::Identifier::isEquivalentName( + pair.first->nameStr().c_str(), searchedName.c_str(), + biggerDifferencesAllowed) || + (countExactMatch == 0 && + metadata::Identifier::isEquivalentName( + pair.second.c_str(), searchedName.c_str(), + biggerDifferencesAllowed))) { resTmp.emplace_back(pair); } } @@ -9298,6 +9309,7 @@ AuthorityFactory::createObjectsFromNameEx( auto sortLambda = [](const PairObjectName &a, const PairObjectName &b) { const auto &aName = a.first->nameStr(); const auto &bName = b.first->nameStr(); + if (aName.size() < bName.size()) { return true; } diff --git a/src/iso19111/io.cpp b/src/iso19111/io.cpp index dfd6d78573..cc55ef7ba3 100644 --- a/src/iso19111/io.cpp +++ b/src/iso19111/io.cpp @@ -7966,20 +7966,23 @@ static BaseObjectNNPtr createFromUserInput(const std::string &text, // If there's exactly only one object whose name is equivalent // to the user input, return it. - IdentifiedObjectPtr identifiedObj; - for (const auto &obj : res) { - if (Identifier::isEquivalentName(obj->nameStr().c_str(), - objectName.c_str())) { - if (identifiedObj == nullptr) { - identifiedObj = obj.as_nullable(); - } else { - identifiedObj = nullptr; - break; + for (int pass = 0; pass <= 1; ++pass) { + IdentifiedObjectPtr identifiedObj; + for (const auto &obj : res) { + if (Identifier::isEquivalentName( + obj->nameStr().c_str(), objectName.c_str(), + /* biggerDifferencesAllowed = */ pass == 1)) { + if (identifiedObj == nullptr) { + identifiedObj = obj.as_nullable(); + } else { + identifiedObj = nullptr; + break; + } } } - } - if (identifiedObj) { - return identifiedObj; + if (identifiedObj) { + return identifiedObj; + } } std::string msg("several objects matching this name: "); diff --git a/src/iso19111/metadata.cpp b/src/iso19111/metadata.cpp index 6c0a420690..a0fde0ee35 100644 --- a/src/iso19111/metadata.cpp +++ b/src/iso19111/metadata.cpp @@ -1214,6 +1214,14 @@ static bool isIgnoredChar(char ch) { // --------------------------------------------------------------------------- +//! @cond Doxygen_Suppress +static char lower(char ch) { + return ch >= 'A' && ch <= 'Z' ? ch - 'A' + 'a' : ch; +} +//! @endcond + +// --------------------------------------------------------------------------- + //! @cond Doxygen_Suppress static const struct utf8_to_lower { const char *utf8; @@ -1249,21 +1257,94 @@ static const struct utf8_to_lower *get_ascii_replacement(const char *c_str) { // --------------------------------------------------------------------------- //! @cond Doxygen_Suppress -std::string Identifier::canonicalizeName(const std::string &str) { + +/** Checks if needle is a substring of c_str. + * + * e.g matchesLowerCase("JavaScript", "java") returns true + */ +static bool matchesLowerCase(const char *c_str, const char *needle) { + size_t i = 0; + for (; c_str[i] && needle[i]; ++i) { + if (lower(c_str[i]) != lower(needle[i])) { + return false; + } + } + return needle[i] == 0; +} +//! @endcond + +// --------------------------------------------------------------------------- + +//! @cond Doxygen_Suppress + +static inline bool isdigit(char ch) { return ch >= '0' && ch <= '9'; } +//! @endcond + +// --------------------------------------------------------------------------- + +//! @cond Doxygen_Suppress +std::string Identifier::canonicalizeName(const std::string &str, + bool biggerDifferencesAllowed) { std::string res; const char *c_str = str.c_str(); for (size_t i = 0; c_str[i] != 0; ++i) { - const auto ch = c_str[i]; + const auto ch = lower(c_str[i]); if (ch == ' ' && c_str[i + 1] == '+' && c_str[i + 2] == ' ') { i += 2; continue; } - if (ch == '1' && !res.empty() && - !(res.back() >= '0' && res.back() <= '9') && c_str[i + 1] == '9' && - c_str[i + 2] >= '0' && c_str[i + 2] <= '9') { + + // Canonicalize "19dd" (where d is a digit) as "dd" + if (ch == '1' && !res.empty() && !isdigit(res.back()) && + c_str[i + 1] == '9' && isdigit(c_str[i + 2]) && + isdigit(c_str[i + 3])) { ++i; continue; } + + if (biggerDifferencesAllowed) { + + const auto skipSubstring = [](char l_ch, const char *l_str, + size_t &idx, const char *substr) { + if (l_ch == substr[0] && idx > 0 && + isIgnoredChar(l_str[idx - 1]) && + matchesLowerCase(l_str + idx, substr)) { + idx += strlen(substr) - 1; + return true; + } + return false; + }; + + // Skip "zone" or "height" if preceding character is a space + if (skipSubstring(ch, c_str, i, "zone") || + skipSubstring(ch, c_str, i, "height")) { + continue; + } + + // Replace a substring by its first character if preceding character + // is a space or a digit + const auto replaceByFirstChar = [](char l_ch, const char *l_str, + size_t &idx, const char *substr, + std::string &l_res) { + if (l_ch == substr[0] && idx > 0 && + (isIgnoredChar(l_str[idx - 1]) || + isdigit(l_str[idx - 1])) && + matchesLowerCase(l_str + idx, substr)) { + l_res.push_back(l_ch); + idx += strlen(substr) - 1; + return true; + } + return false; + }; + + // Replace "north" or "south" by its first character if preceding + // character is a space or a digit + if (replaceByFirstChar(ch, c_str, i, "north", res) || + replaceByFirstChar(ch, c_str, i, "south", res)) { + continue; + } + } + if (static_cast(ch) > 127) { const auto *replacement = get_ascii_replacement(c_str + i); if (replacement) { @@ -1273,7 +1354,7 @@ std::string Identifier::canonicalizeName(const std::string &str) { } } if (!isIgnoredChar(ch)) { - res.push_back(static_cast(::tolower(ch))); + res.push_back(ch); } } return res; @@ -1286,15 +1367,22 @@ std::string Identifier::canonicalizeName(const std::string &str) { * * Two names are equivalent by removing any space, underscore, dash, slash, * { or } character from them, and comparing in a case insensitive way. + * + * @param a first string + * @param b second string + * @param biggerDifferencesAllowed if true, "height" and "zone" words are + * ignored, and "north" is shortened as "n" and "south" as "n". + * @since 9.6 */ -bool Identifier::isEquivalentName(const char *a, const char *b) noexcept { +bool Identifier::isEquivalentName(const char *a, const char *b, + bool biggerDifferencesAllowed) noexcept { size_t i = 0; size_t j = 0; char lastValidA = 0; char lastValidB = 0; while (a[i] != 0 || b[j] != 0) { - char aCh = a[i]; - char bCh = b[j]; + char aCh = lower(a[i]); + char bCh = lower(b[j]); if (aCh == ' ' && a[i + 1] == '+' && a[i + 2] == ' ' && a[i + 3] != 0) { i += 3; continue; @@ -1311,18 +1399,69 @@ bool Identifier::isEquivalentName(const char *a, const char *b) noexcept { ++j; continue; } - if (aCh == '1' && !(lastValidA >= '0' && lastValidA <= '9') && - a[i + 1] == '9' && a[i + 2] >= '0' && a[i + 2] <= '9') { + + // Canonicalize "19dd" (where d is a digit) as "dd" + if (aCh == '1' && !isdigit(lastValidA) && a[i + 1] == '9' && + isdigit(a[i + 2]) && isdigit(a[i + 3])) { i += 2; lastValidA = '9'; continue; } - if (bCh == '1' && !(lastValidB >= '0' && lastValidB <= '9') && - b[j + 1] == '9' && b[j + 2] >= '0' && b[j + 2] <= '9') { + if (bCh == '1' && !isdigit(lastValidB) && b[j + 1] == '9' && + isdigit(b[j + 2]) && isdigit(b[j + 3])) { j += 2; lastValidB = '9'; continue; } + + if (biggerDifferencesAllowed) { + // Skip a substring if preceding character is a space + const auto skipSubString = [](char ch, const char *str, size_t &idx, + const char *substr) { + if (ch == substr[0] && idx > 0 && isIgnoredChar(str[idx - 1]) && + matchesLowerCase(str + idx, substr)) { + idx += strlen(substr); + return true; + } + return false; + }; + + bool skip = false; + if (skipSubString(aCh, a, i, "zone")) + skip = true; + if (skipSubString(bCh, b, j, "zone")) + skip = true; + if (skip) + continue; + + if (skipSubString(aCh, a, i, "height")) + skip = true; + if (skipSubString(bCh, b, j, "height")) + skip = true; + if (skip) + continue; + + // Replace a substring by its first character if preceding character + // is a space or a digit + const auto replaceByFirstChar = [](char ch, const char *str, + size_t &idx, + const char *substr) { + if (ch == substr[0] && idx > 0 && + (isIgnoredChar(str[idx - 1]) || isdigit(str[idx - 1])) && + matchesLowerCase(str + idx, substr)) { + idx += strlen(substr) - 1; + return true; + } + return false; + }; + + if (!replaceByFirstChar(aCh, a, i, "north")) + replaceByFirstChar(aCh, a, i, "south"); + + if (!replaceByFirstChar(bCh, b, j, "north")) + replaceByFirstChar(bCh, b, j, "south"); + } + if (static_cast(aCh) > 127) { const auto *replacement = get_ascii_replacement(a + i); if (replacement) { @@ -1337,8 +1476,7 @@ bool Identifier::isEquivalentName(const char *a, const char *b) noexcept { j += strlen(replacement->utf8) - 1; } } - if ((aCh == 0 && bCh != 0) || (aCh != 0 && bCh == 0) || - ::tolower(aCh) != ::tolower(bCh)) { + if (aCh != bCh) { return false; } lastValidA = aCh; @@ -1353,6 +1491,17 @@ bool Identifier::isEquivalentName(const char *a, const char *b) noexcept { // --------------------------------------------------------------------------- +/** \brief Returns whether two names are considered equivalent. + * + * Two names are equivalent by removing any space, underscore, dash, slash, + * { or } character from them, and comparing in a case insensitive way. + */ +bool Identifier::isEquivalentName(const char *a, const char *b) noexcept { + return isEquivalentName(a, b, /* biggerDifferencesAllowed = */ true); +} + +// --------------------------------------------------------------------------- + //! @cond Doxygen_Suppress struct PositionalAccuracy::Private { std::string value_{}; diff --git a/test/unit/test_io.cpp b/test/unit/test_io.cpp index 9cf4bfcea1..083d3ac33d 100644 --- a/test/unit/test_io.cpp +++ b/test/unit/test_io.cpp @@ -13689,7 +13689,24 @@ TEST(io, createFromUserInput) { auto obj = createFromUserInput("NGF IGN69 height", dbContext); auto crs = nn_dynamic_pointer_cast(obj); EXPECT_TRUE(crs != nullptr); - EXPECT_EQ(crs->nameStr(), "NGF-IGN69 height"); + EXPECT_EQ(crs->nameStr(), "NGF-IGN69 height"); // EPSG:5720 + } + + { + // Approximate match of a vertical CRS + auto obj = createFromUserInput("NGF IGN1969", dbContext); + auto crs = nn_dynamic_pointer_cast(obj); + EXPECT_TRUE(crs != nullptr); + EXPECT_EQ(crs->nameStr(), "NGF-IGN 1969"); // IGNF69:IGN69 + } + + { + // Approximate match of a vertical CRS + auto obj = createFromUserInput("NGF IGN69", dbContext); + auto crs = nn_dynamic_pointer_cast(obj); + EXPECT_TRUE(crs != nullptr); + // Questionnable if we shouldn't match EPSG:5720 instead + EXPECT_EQ(crs->nameStr(), "NGF-IGN 1969"); // IGNF69:IGN69 } { @@ -13700,6 +13717,14 @@ TEST(io, createFromUserInput) { EXPECT_EQ(crs->nameStr(), "WGS 84 + EGM96 height"); } + { + // Approximate match + auto obj = createFromUserInput("WGS 84 + EGM96", dbContext); + auto crs = nn_dynamic_pointer_cast(obj); + ASSERT_TRUE(crs != nullptr); + EXPECT_EQ(crs->nameStr(), "WGS 84 + EGM96 height"); + } + { // Approximate match on each piece of the compound CRS auto obj = createFromUserInput("WGS84 + NAVD88 height", dbContext); @@ -13783,6 +13808,18 @@ TEST(io, createFromUserInput) { ASSERT_TRUE(coordinateMetadata != nullptr); EXPECT_EQ(coordinateMetadata->coordinateEpochAsDecimalYear(), 2025.1); } + + { + // Approximate match involving using "north" instead of N and lacking + // "zone" + auto obj = createFromUserInput("WGS 84 UTM 31 north", dbContext); + auto crs = nn_dynamic_pointer_cast(obj); + ASSERT_TRUE(crs != nullptr); + EXPECT_EQ(crs->nameStr(), "WGS 84 / UTM zone 31N"); + } + + // Should not match WGS84 or IGM85 + EXPECT_THROW(createFromUserInput("WGS 85", dbContext), ParsingException); } // ---------------------------------------------------------------------------