Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 9.5] Speed-up dramatically proj.db build time. #4284

Merged
merged 1 commit into from
Oct 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion data/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,12 @@ set(ALL_SQL_IN "${CMAKE_CURRENT_BINARY_DIR}/all.sql.in")
set(PROJ_DB "${CMAKE_CURRENT_BINARY_DIR}/proj.db")
include(sql_filelist.cmake)

set(PROJ_DB_SQL_EXPECTED_MD5 "9e3a1467df1fcfccd029ecfcc676803d")

add_custom_command(
OUTPUT ${PROJ_DB}
COMMAND ${CMAKE_COMMAND} -E remove -f ${PROJ_DB}
COMMAND ${CMAKE_COMMAND} "-DALL_SQL_IN=${ALL_SQL_IN}" "-DEXE_SQLITE3=${EXE_SQLITE3}" "-DPROJ_DB=${PROJ_DB}" "-DPROJ_VERSION=${PROJ_VERSION}" "-DPROJ_DB_CACHE_DIR=${PROJ_DB_CACHE_DIR}"
COMMAND ${CMAKE_COMMAND} "-DALL_SQL_IN=${ALL_SQL_IN}" "-DEXE_SQLITE3=${EXE_SQLITE3}" "-DPROJ_DB=${PROJ_DB}" "-DPROJ_VERSION=${PROJ_VERSION}" "-DPROJ_DB_CACHE_DIR=${PROJ_DB_CACHE_DIR}" "-DPROJ_DB_SQL_EXPECTED_MD5=${PROJ_DB_SQL_EXPECTED_MD5}"
-P "${CMAKE_CURRENT_SOURCE_DIR}/generate_proj_db.cmake"
COMMAND ${CMAKE_COMMAND} -E copy ${PROJ_DB} ${CMAKE_CURRENT_BINARY_DIR}/for_tests
DEPENDS ${SQL_FILES} "${CMAKE_CURRENT_SOURCE_DIR}/generate_proj_db.cmake"
Expand Down
52 changes: 42 additions & 10 deletions data/generate_proj_db.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,48 @@ function(cat IN_FILE OUT_FILE)
file(APPEND ${OUT_FILE} "${CONTENTS}")
endfunction()

file(WRITE "${ALL_SQL_IN}" "")
include(sql_filelist.cmake)
foreach(SQL_FILE ${SQL_FILES})
cat(${SQL_FILE} "${ALL_SQL_IN}")
endforeach()

# Do ${PROJ_VERSION} substitution
file(READ ${ALL_SQL_IN} CONTENTS)
string(REPLACE "\${PROJ_VERSION}" "${PROJ_VERSION}" CONTENTS_MOD "${CONTENTS}")
file(WRITE "${ALL_SQL_IN}" "${CONTENTS_MOD}")
# Generates a all.sql.in file from all the individual .sql files, taking
# into account if extra validation checks must be done before inserting data
# records
function(generate_all_sql_in ALL_SQL_IN_FILENAME EXTRA_VALIDATION OUT_MD5)
set(PROJ_DB_EXTRA_VALIDATION ${EXTRA_VALIDATION})
file(WRITE "${ALL_SQL_IN_FILENAME}" "")
include(sql_filelist.cmake)
foreach(SQL_FILE ${SQL_FILES})
cat(${SQL_FILE} "${ALL_SQL_IN_FILENAME}")
endforeach()

# Compute the MD5 before PROJ_VERSION substitution to avoid updating the
# expected MD5 if we just bump the PROJ_VERSION
configure_file("${ALL_SQL_IN_FILENAME}" "${ALL_SQL_IN_FILENAME}.tmp" NEWLINE_STYLE UNIX)
file(MD5 "${ALL_SQL_IN_FILENAME}.tmp" OUT_MD5_LOCAL)
set(${OUT_MD5} "${OUT_MD5_LOCAL}" PARENT_SCOPE)

# Do ${PROJ_VERSION} substitution
file(READ ${ALL_SQL_IN_FILENAME} CONTENTS)
string(REPLACE "\${PROJ_VERSION}" "${PROJ_VERSION}" CONTENTS_MOD "${CONTENTS}")
file(WRITE "${ALL_SQL_IN_FILENAME}" "${CONTENTS_MOD}")
endfunction()

generate_all_sql_in("${ALL_SQL_IN}" OFF PROJ_DB_SQL_MD5)

if (NOT "${PROJ_DB_SQL_MD5}" STREQUAL "${PROJ_DB_SQL_EXPECTED_MD5}")
message(WARNING "all.sql.in content has changed. Running extra validation checks when building proj.db...")

set(ALL_SQL_IN_EXTRA_VALIDATION "${ALL_SQL_IN}.extra_validation")
generate_all_sql_in("${ALL_SQL_IN_EXTRA_VALIDATION}" ON PROJ_DB_SQL_EXTRA_VALIDATION_MD5)

set(PROJ_DB_EXTRA_VALIDATION_FILENAME "${PROJ_DB}.extra_validation")
file(REMOVE "${PROJ_DB_EXTRA_VALIDATION_FILENAME}")
execute_process(COMMAND "${EXE_SQLITE3}" "${PROJ_DB_EXTRA_VALIDATION_FILENAME}"
INPUT_FILE "${ALL_SQL_IN_EXTRA_VALIDATION}"
RESULT_VARIABLE STATUS)
if(STATUS AND NOT STATUS EQUAL 0)
message(FATAL_ERROR "Build of proj.db from ${ALL_SQL_IN_EXTRA_VALIDATION} failed")
else()
message(FATAL_ERROR "Update 'set(PROJ_DB_SQL_EXPECTED_MD5 ...)' line in data/CMakeLists.txt with ${PROJ_DB_SQL_MD5} value")
endif()
endif()

if(IS_DIRECTORY ${PROJ_DB_CACHE_DIR})
set(USE_PROJ_DB_CACHE_DIR TRUE)
Expand Down
3 changes: 3 additions & 0 deletions data/sql/analyze_vacuum.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ANALYZE;

VACUUM;
178 changes: 0 additions & 178 deletions data/sql/commit.sql
Original file line number Diff line number Diff line change
Expand Up @@ -11,181 +11,3 @@ CREATE INDEX concatenated_operation_idx ON concatenated_operation(source_crs_aut

-- We don't need to select by auth_name, code so nullify them to save space
UPDATE usage SET auth_name = NULL, code = NULL;

-- Final consistency checks
CREATE TABLE dummy(foo);
CREATE TRIGGER final_checks
BEFORE INSERT ON dummy
FOR EACH ROW BEGIN

-- check that view definitions have no error
SELECT RAISE(ABORT, 'corrupt definition of coordinate_operation_view')
WHERE (SELECT 1 FROM coordinate_operation_view LIMIT 1) = 0;
SELECT RAISE(ABORT, 'corrupt definition of crs_view')
WHERE (SELECT 1 FROM crs_view LIMIT 1) = 0;
SELECT RAISE(ABORT, 'corrupt definition of object_view')
WHERE (SELECT 1 FROM object_view LIMIT 1) = 0;
SELECT RAISE(ABORT, 'corrupt definition of authority_list')
WHERE (SELECT 1 FROM authority_list LIMIT 1) = 0;

-- check that the auth_name of all objects in object_view is recorded in builtin_authorities
SELECT RAISE(ABORT, 'One or several authorities referenced in object_view are missing in builtin_authorities')
WHERE EXISTS (
SELECT DISTINCT o.auth_name FROM object_view o WHERE NOT EXISTS (
SELECT 1 FROM builtin_authorities b WHERE o.auth_name = b.auth_name)
);

-- check that a usage is registered for most objects where this is needed
SELECT RAISE(ABORT, 'One or several objects lack a corresponding record in the usage table')
WHERE EXISTS (
SELECT * FROM object_view o WHERE NOT EXISTS (
SELECT 1 FROM usage u WHERE
o.table_name = u.object_table_name AND
o.auth_name = u.object_auth_name AND
o.code = u.object_code)
AND o.table_name NOT IN ('unit_of_measure', 'axis',
'celestial_body', 'ellipsoid', 'prime_meridian', 'extent')
-- the IGNF registry lacks extent for the following objects
AND NOT (o.auth_name = 'IGNF' AND o.table_name IN ('geodetic_datum', 'vertical_datum', 'conversion'))
);

SELECT RAISE(ABORT, 'Geodetic datum ensemble defined, but no ensemble member')
WHERE EXISTS (
SELECT * FROM geodetic_datum d WHERE ensemble_accuracy IS NOT NULL
AND NOT EXISTS (SELECT 1 FROM geodetic_datum_ensemble_member WHERE
d.auth_name = ensemble_auth_name AND d.code = ensemble_code)
);

SELECT RAISE(ABORT, 'Vertical datum ensemble defined, but no ensemble member')
WHERE EXISTS (
SELECT * FROM vertical_datum d WHERE ensemble_accuracy IS NOT NULL
AND NOT EXISTS (SELECT 1 FROM vertical_datum_ensemble_member WHERE
d.auth_name = ensemble_auth_name AND d.code = ensemble_code)
);

SELECT RAISE(ABORT, 'PROJ defines an alias that exists in EPSG')
WHERE EXISTS (
SELECT * FROM (
SELECT count(*) AS count, table_name, auth_name, code, alt_name FROM alias_name
WHERE source in ('EPSG', 'PROJ')
AND NOT (source = 'PROJ' AND alt_name IN ('GGRS87', 'NAD27', 'NAD83'))
GROUP BY table_name, auth_name, code, alt_name) x WHERE count > 1
);

-- test to check that our custom grid transformation overrides are really needed
SELECT RAISE(ABORT, 'PROJ grid_transformation defined whereas EPSG has one')
WHERE EXISTS (SELECT 1 FROM grid_transformation g1
JOIN grid_transformation g2
ON g1.source_crs_auth_name = g2.source_crs_auth_name
AND g1.source_crs_code = g2.source_crs_code
AND g1.target_crs_auth_name = g2.target_crs_auth_name
AND g1.target_crs_code = g2.target_crs_code
WHERE g1.auth_name = 'PROJ' AND g1.code NOT LIKE '%_RESTRICTED_TO_VERTCRS%' AND g2.auth_name = 'EPSG' AND g2.deprecated = 0 AND (
(g1.interpolation_crs_auth_name IS NULL AND g2.interpolation_crs_auth_name IS NULL) OR
(g1.interpolation_crs_auth_name IS NOT NULL AND g2.interpolation_crs_auth_name IS NOT NULL AND
g1.interpolation_crs_auth_name = g2.interpolation_crs_auth_name AND
g1.interpolation_crs_code = g2.interpolation_crs_code)))
OR EXISTS (SELECT 1 FROM grid_transformation g1
JOIN grid_transformation g2
ON g1.source_crs_auth_name = g2.target_crs_auth_name
AND g1.source_crs_code = g2.target_crs_code
AND g1.target_crs_auth_name = g1.source_crs_auth_name
AND g1.target_crs_code = g1.source_crs_code
WHERE g1.auth_name = 'PROJ' AND g1.code NOT LIKE '%_RESTRICTED_TO_VERTCRS%' AND g2.auth_name = 'EPSG' AND g2.deprecated = 0);

SELECT RAISE(ABORT, 'Arg! there is now a EPSG:102100 object. Hack in createFromUserInput() will no longer work')
WHERE EXISTS(SELECT 1 FROM crs_view WHERE auth_name = 'EPSG' AND code = '102100');

-- check coordinate_operation_view "foreign keys"
SELECT RAISE(ABORT, 'One coordinate_operation has a broken source_crs link')
WHERE EXISTS (SELECT * FROM coordinate_operation_view cov WHERE
cov.source_crs_auth_name || cov.source_crs_code NOT IN
(SELECT auth_name || code FROM crs_view));
SELECT RAISE(ABORT, 'One coordinate_operation has a broken target_crs link')
WHERE EXISTS (SELECT * FROM coordinate_operation_view cov WHERE
cov.target_crs_auth_name || cov.target_crs_code NOT IN
(SELECT auth_name || code FROM crs_view));

-- check that transformations intersect the area of use of their source/target CRS
-- EPSG, ESRI and IGNF have cases where this does not hold.
SELECT RAISE(ABORT, 'The area of use of at least one coordinate_operation does not intersect the one of its source CRS')
WHERE EXISTS (SELECT * FROM coordinate_operation_view v, crs_view c, usage vu, extent ve, usage cu, extent ce WHERE
v.deprecated = 0 AND
(v.table_name = 'grid_transformation' OR v.auth_name NOT IN ('EPSG', 'ESRI', 'IGNF')) AND
v.source_crs_auth_name = c.auth_name AND
v.source_crs_code = c.code AND
vu.object_table_name = v.table_name AND
vu.object_auth_name = v.auth_name AND
vu.object_code = v.code AND
vu.extent_auth_name = ve.auth_name AND
vu.extent_code = ve.code AND
cu.object_table_name = c.table_name AND
cu.object_auth_name = c.auth_name AND
cu.object_code = c.code AND
cu.extent_auth_name = ce.auth_name AND
cu.extent_code = ce.code AND
NOT ((ce.south_lat < ve.north_lat AND ve.south_lat < ce.north_lat) OR
(ce.west_lon < ce.east_lon AND ve.west_lon < ve.east_lon AND
NOT (ce.west_lon < ve.east_lon AND ve.west_lon < ce.east_lon))) );
SELECT RAISE(ABORT, 'The area of use of at least one coordinate_operation does not intersect the one of its target CRS')
WHERE EXISTS (SELECT * FROM coordinate_operation_view v, crs_view c, usage vu, extent ve, usage cu, extent ce WHERE
v.deprecated = 0 AND
((v.table_name = 'grid_transformation' AND NOT (v.auth_name = 'IGNF' AND v.code = 'TSG1185'))
OR v.auth_name NOT IN ('EPSG', 'ESRI', 'IGNF')) AND
v.target_crs_auth_name = c.auth_name AND
v.target_crs_code = c.code AND
vu.object_table_name = v.table_name AND
vu.object_auth_name = v.auth_name AND
vu.object_code = v.code AND
vu.extent_auth_name = ve.auth_name AND
vu.extent_code = ve.code AND
cu.object_table_name = c.table_name AND
cu.object_auth_name = c.auth_name AND
cu.object_code = c.code AND
cu.extent_auth_name = ce.auth_name AND
cu.extent_code = ce.code AND
NOT ((ce.south_lat < ve.north_lat AND ve.south_lat < ce.north_lat) OR
(ce.west_lon < ce.east_lon AND ve.west_lon < ve.east_lon AND
NOT (ce.west_lon < ve.east_lon AND ve.west_lon < ce.east_lon))) );

-- check geoid_model table
SELECT RAISE(ABORT, 'missing GEOID99 in geoid_model')
WHERE NOT EXISTS(SELECT 1 FROM geoid_model WHERE name = 'GEOID99');
SELECT RAISE(ABORT, 'missing GEOID03 in geoid_model')
WHERE NOT EXISTS(SELECT 1 FROM geoid_model WHERE name = 'GEOID03');
SELECT RAISE(ABORT, 'missing GEOID06 in geoid_model')
WHERE NOT EXISTS(SELECT 1 FROM geoid_model WHERE name = 'GEOID06');
SELECT RAISE(ABORT, 'missing GEOID09 in geoid_model')
WHERE NOT EXISTS(SELECT 1 FROM geoid_model WHERE name = 'GEOID09');
SELECT RAISE(ABORT, 'missing GEOID12A in geoid_model')
WHERE NOT EXISTS(SELECT 1 FROM geoid_model WHERE name = 'GEOID12A');
SELECT RAISE(ABORT, 'missing GEOID12B in geoid_model')
WHERE NOT EXISTS(SELECT 1 FROM geoid_model WHERE name = 'GEOID12B');
SELECT RAISE(ABORT, 'missing GEOID18 in geoid_model')
WHERE NOT EXISTS(SELECT 1 FROM geoid_model WHERE name = 'GEOID18');

-- check presence of au_ga_AUSGeoid98.tif
SELECT RAISE(ABORT, 'missing au_ga_AUSGeoid98.tif')
WHERE NOT EXISTS(SELECT 1 FROM grid_alternatives WHERE proj_grid_name = 'au_ga_AUSGeoid98.tif');

-- check PROJ.VERSION value
SELECT RAISE(ABORT, 'Value of PROJ.VERSION entry of metadata tables not substituted by actual value')
WHERE (SELECT 1 FROM metadata WHERE key = 'PROJ.VERSION' AND value LIKE '$%');

-- Only available in sqlite >= 3.16. May be activated as soon as support for ubuntu 16 is dropped
-- check all foreign key contraints have an 'ON DELETE CASCADE'
-- SELECT RAISE(ABORT, 'FK constraint with missing "ON DELETE CASCADE"')
-- WHERE EXISTS (SELECT 1 FROM
-- pragma_foreign_key_list(name),
-- (SELECT name from sqlite_master WHERE type='table')
-- WHERE upper(on_delete) != 'CASCADE');


END;
INSERT INTO dummy DEFAULT VALUES;
DROP TRIGGER final_checks;
DROP TABLE dummy;

ANALYZE;

VACUUM;
9 changes: 9 additions & 0 deletions data/sql/conversion_triggers_hand_written.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@

CREATE TRIGGER conversion_method_check_insert_trigger_orthographic
INSTEAD OF INSERT ON conversion
BEGIN

SELECT RAISE(ABORT, 'insert on conversion violates constraint: bad parameters for Orthographic')
WHERE NEW.deprecated != 1 AND NEW.method_auth_name = 'EPSG' AND NEW.method_code = '9840' AND (NEW.method_name != 'Orthographic' OR NEW.param1_auth_name != 'EPSG' OR NEW.param1_code != '8801' OR NEW.param1_name != 'Latitude of natural origin' OR NEW.param1_value IS NULL OR NEW.param1_uom_auth_name IS NULL OR NEW.param1_uom_code IS NULL OR (SELECT type FROM unit_of_measure WHERE auth_name = NEW.param1_uom_auth_name AND code = NEW.param1_uom_code) != 'angle' OR NEW.param2_auth_name != 'EPSG' OR NEW.param2_code != '8802' OR NEW.param2_name != 'Longitude of natural origin' OR NEW.param2_value IS NULL OR NEW.param2_uom_auth_name IS NULL OR NEW.param2_uom_code IS NULL OR (SELECT type FROM unit_of_measure WHERE auth_name = NEW.param2_uom_auth_name AND code = NEW.param2_uom_code) != 'angle' OR NEW.param3_auth_name != 'EPSG' OR NEW.param3_code != '8806' OR NEW.param3_name != 'False easting' OR NEW.param3_value IS NULL OR NEW.param3_uom_auth_name IS NULL OR NEW.param3_uom_code IS NULL OR (SELECT type FROM unit_of_measure WHERE auth_name = NEW.param3_uom_auth_name AND code = NEW.param3_uom_code) != 'length' OR NEW.param4_auth_name != 'EPSG' OR NEW.param4_code != '8807' OR NEW.param4_name != 'False northing' OR NEW.param4_value IS NULL OR NEW.param4_uom_auth_name IS NULL OR NEW.param4_uom_code IS NULL OR (SELECT type FROM unit_of_measure WHERE auth_name = NEW.param4_uom_auth_name AND code = NEW.param4_uom_code) != 'length' OR NEW.param5_auth_name IS NOT NULL OR NEW.param5_code IS NOT NULL OR NEW.param5_name IS NOT NULL OR NEW.param5_value IS NOT NULL OR NEW.param5_uom_auth_name IS NOT NULL OR NEW.param5_uom_code IS NOT NULL OR NEW.param6_auth_name IS NOT NULL OR NEW.param6_code IS NOT NULL OR NEW.param6_name IS NOT NULL OR NEW.param6_value IS NOT NULL OR NEW.param6_uom_auth_name IS NOT NULL OR NEW.param6_uom_code IS NOT NULL OR NEW.param7_auth_name IS NOT NULL OR NEW.param7_code IS NOT NULL OR NEW.param7_name IS NOT NULL OR NEW.param7_value IS NOT NULL OR NEW.param7_uom_auth_name IS NOT NULL OR NEW.param7_uom_code IS NOT NULL);

END;
9 changes: 0 additions & 9 deletions data/sql/customizations_early.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,3 @@ INSERT INTO "scope" VALUES('PROJ','SCOPE_UNKNOWN','Not known.',0);
INSERT INTO celestial_body VALUES('PROJ', 'EARTH', 'Earth', 6378137.0);

INSERT INTO versioned_auth_name_mapping VALUES ('IAU_2015', 'IAU', '2015', 1);

CREATE TRIGGER conversion_method_check_insert_trigger_orthographic
INSTEAD OF INSERT ON conversion
BEGIN

SELECT RAISE(ABORT, 'insert on conversion violates constraint: bad parameters for Orthographic')
WHERE NEW.deprecated != 1 AND NEW.method_auth_name = 'EPSG' AND NEW.method_code = '9840' AND (NEW.method_name != 'Orthographic' OR NEW.param1_auth_name != 'EPSG' OR NEW.param1_code != '8801' OR NEW.param1_name != 'Latitude of natural origin' OR NEW.param1_value IS NULL OR NEW.param1_uom_auth_name IS NULL OR NEW.param1_uom_code IS NULL OR (SELECT type FROM unit_of_measure WHERE auth_name = NEW.param1_uom_auth_name AND code = NEW.param1_uom_code) != 'angle' OR NEW.param2_auth_name != 'EPSG' OR NEW.param2_code != '8802' OR NEW.param2_name != 'Longitude of natural origin' OR NEW.param2_value IS NULL OR NEW.param2_uom_auth_name IS NULL OR NEW.param2_uom_code IS NULL OR (SELECT type FROM unit_of_measure WHERE auth_name = NEW.param2_uom_auth_name AND code = NEW.param2_uom_code) != 'angle' OR NEW.param3_auth_name != 'EPSG' OR NEW.param3_code != '8806' OR NEW.param3_name != 'False easting' OR NEW.param3_value IS NULL OR NEW.param3_uom_auth_name IS NULL OR NEW.param3_uom_code IS NULL OR (SELECT type FROM unit_of_measure WHERE auth_name = NEW.param3_uom_auth_name AND code = NEW.param3_uom_code) != 'length' OR NEW.param4_auth_name != 'EPSG' OR NEW.param4_code != '8807' OR NEW.param4_name != 'False northing' OR NEW.param4_value IS NULL OR NEW.param4_uom_auth_name IS NULL OR NEW.param4_uom_code IS NULL OR (SELECT type FROM unit_of_measure WHERE auth_name = NEW.param4_uom_auth_name AND code = NEW.param4_uom_code) != 'length' OR NEW.param5_auth_name IS NOT NULL OR NEW.param5_code IS NOT NULL OR NEW.param5_name IS NOT NULL OR NEW.param5_value IS NOT NULL OR NEW.param5_uom_auth_name IS NOT NULL OR NEW.param5_uom_code IS NOT NULL OR NEW.param6_auth_name IS NOT NULL OR NEW.param6_code IS NOT NULL OR NEW.param6_name IS NOT NULL OR NEW.param6_value IS NOT NULL OR NEW.param6_uom_auth_name IS NOT NULL OR NEW.param6_uom_code IS NOT NULL OR NEW.param7_auth_name IS NOT NULL OR NEW.param7_code IS NOT NULL OR NEW.param7_name IS NOT NULL OR NEW.param7_value IS NOT NULL OR NEW.param7_uom_auth_name IS NOT NULL OR NEW.param7_uom_code IS NOT NULL);

END;
Loading
Loading