diff --git a/.github/workflows/ci_env.yml b/.github/workflows/ci_env.yml index d143012..1743625 100644 --- a/.github/workflows/ci_env.yml +++ b/.github/workflows/ci_env.yml @@ -27,6 +27,19 @@ jobs: shell: bash -el {0} run: | pip install -r requirements.txt + pip install . + - name: Workaround for poppler lib issue + shell: bash -el {0} + run: | + conda list + pip install poppler-utils + conda install 'poppler=>22.10.0' + - name: Check installation + shell: bash -el {0} + run: | + conda list + gdal-config --version + python -c "from osgeo import gdal; print(gdal.__version__)" - name: Test with pytest shell: bash -el {0} run: | diff --git a/README.md b/README.md index 70601d4..c791919 100644 --- a/README.md +++ b/README.md @@ -410,7 +410,7 @@ defining the step numbers to skip when writing PCRaster maps. Same as old grib2p grib2pcraster). It can be average or accumulation. -  halfweightsIf set to True and type is "average", it will avaluate the average by using half weights for the first and the last step +  halfweightsIf set to true and type is "average", it will avaluate the average by using half weights for the first and the last step  forceZeroArrayOptional. In case of “accumulation”, and only @@ -669,6 +669,20 @@ Attributes p, leafsize and eps for the kd tree algorithm are default in scipy li | eps | 0 | | leafsize | 10 | +#### ADW +It's the Angular Distance Weighted (ADW) algorithm with scipy.kd_tree, using 4 neighbours. +If @adw_broadcasting is set to true, computations will run in full broadcasting mode but requires more memory + +```json +{ +"Interpolation": { + "@latMap": "/dataset/maps/europe5km/lat.map", + "@lonMap": "/dataset/maps/europe5km/long.map", + "@mode": "adw", + "@adw_broadcasting": false} +} +``` + #### bilinear It's the bilinear interpolation algorithm applyied on regular and irregular grids. On irregular grids, it tries to get the best quatrilateral around each target point, but at the same time tries to use the best stable and grid-like shape from starting points. To do so, evaluates interpolation looking at point on similar latitude, thus on projected grib files may show some irregular results. @@ -734,7 +748,7 @@ The JSON configuration in the execution file will look like: { "Aggregation": { "@type": "average", - "@halfweights": False} + "@halfweights": false} } ``` @@ -752,7 +766,7 @@ Temperatures are often extracted as averages on 24 hours or 6 hours. Here's a ty "Aggregation": { "@step": 24, "@type": "average", -"@halfweights": False +"@halfweights": false }, "OutMaps": { "@cloneMap": "/dataset/maps/europe/dem.map", @@ -798,7 +812,7 @@ This is needed because we performed 24 hours average over 6 hourly steps. The to evaluate the average, the following steps are executed: -- when "halfweights" is False, the results of the function is the sum of all the values from "start_step-aggregation_step+1" to end_step, taking for each step the value corresponding to the next available value in the grib file. E.g: +- when "halfweights" is false, the results of the function is the sum of all the values from "start_step-aggregation_step+1" to end_step, taking for each step the value corresponding to the next available value in the grib file. E.g: INPUT: start_step=24, end_step=, aggregation_step=24 GRIB File: contains data starting from step 0 to 48 every 6 hours: 0,6,12,18,24,30,.... @@ -807,7 +821,7 @@ GRIB File: contains data starting from step 0 to 48 every 6 hours: 0,6,12,18,24, Day 2: same as Day 1 starting from (24+24)-24+1=25... -- when "halfweights" is True, the results of the function is the sum of all the values from "start_step-aggregation_step" to end_step, taking for each step the value corresponding to the next available value in the grib file but using half of the weights for the first and the last step in each aggregation_step cicle. E.g: +- when "halfweights" is true, the results of the function is the sum of all the values from "start_step-aggregation_step" to end_step, taking for each step the value corresponding to the next available value in the grib file but using half of the weights for the first and the last step in each aggregation_step cicle. E.g: INPUT: start_step=24, end_step=, aggregation_step=24 GRIB File: contains data starting from step 0 to 72 every 6 hours: 0,6,12,18,24,30,36,.... @@ -816,7 +830,7 @@ GRIB File: contains data starting from step 0 to 72 every 6 hours: 0,6,12,18,24, Day 2: same as Day 1 starting from (24+24)-24=24: the step 24 will have a weight of 3, while steps 30,36 and 42 will be counted 6 times, and finally the step 48 will have a weight of 3. -- if start_step is zero or is not specified, the aggregation will start from 1 +- if start_step is zero or is not specified, the aggregation will start from 0 ### Accumulation For precipitation values, accumulation over 6 or 24 hours is often performed. Here's an example of configuration and execution output in DEBUG mode. diff --git a/configuration/global/intertables.json b/configuration/global/intertables.json index e164b50..2fb617d 100644 --- a/configuration/global/intertables.json +++ b/configuration/global/intertables.json @@ -21,6 +21,17 @@ 680 ] }, + "I-15.75_16.125_511_415_212065_rotated_ll_-1700000_2700000_5000_-5000_34.97_71.07_-1700000_2700000_5000_-5000_-24.35_51.35scipy_adw": { + "filename": "tbl_pf10tp_550800_scipy_adw.npy", + "method": "adw", + "source_shape": [ + 212065 + ], + "target_shape": [ + 810, + 680 + ] + }, "I-15.75_16.125_511_415_212065_rotated_ll_-1700000_2700000_5000_-5000_34.97_71.07_-1700000_2700000_5000_-5000_-24.35_51.35scipy_nearest": { "filename": "tbl_pf10slhf_550800_scipy_nearest.npy", "method": "nearest", diff --git a/requirements.txt b/requirements.txt index bf2f40e..e1cbf81 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ numpy>=1.18.2 scipy>=1.4.1 # GDAL numexpr>=2.7.1 -# eccodes-python # replacing old GRIB API +importlib-metadata<5.0.0 # dask packages for parallel interpolation dask[bag] diff --git a/src/pyg2p/VERSION b/src/pyg2p/VERSION index 9b7a431..448ada3 100644 --- a/src/pyg2p/VERSION +++ b/src/pyg2p/VERSION @@ -1 +1 @@ -3.2.4 \ No newline at end of file +3.2.5 \ No newline at end of file diff --git a/src/pyg2p/main/api.py b/src/pyg2p/main/api.py index 70b9341..5d4b3f8 100644 --- a/src/pyg2p/main/api.py +++ b/src/pyg2p/main/api.py @@ -168,6 +168,7 @@ def _define_exec_params(self): self._vars['outMaps.clone'] = self.api_conf['OutMaps']['cloneMap'] interpolation_conf = self.api_conf['OutMaps']['Interpolation'] self._vars['interpolation.mode'] = interpolation_conf.get('mode', self.default_values['interpolation.mode']) + self._vars['interpolation.use_broadcasting'] = interpolation_conf.get('use_broadcasting', False) self._vars['interpolation.rotated_target'] = interpolation_conf.get('rotated_target', False) if not self._vars['interpolation.dir'] and self.api_conf.get('intertableDir'): self._vars['interpolation.dirs']['user'] = self.api_conf['intertableDir'] diff --git a/src/pyg2p/main/context.py b/src/pyg2p/main/context.py index 02dbcf1..90416b3 100644 --- a/src/pyg2p/main/context.py +++ b/src/pyg2p/main/context.py @@ -11,7 +11,7 @@ class Context: - allowed_interp_methods = ('grib_nearest', 'grib_invdist', 'nearest', 'invdist', 'bilinear', 'triangulation', 'bilinear_delaunay') + allowed_interp_methods = ('grib_nearest', 'grib_invdist', 'nearest', 'invdist', 'adw', 'cdd', 'bilinear', 'triangulation', 'bilinear_delaunay') default_values = {'interpolation.mode': 'grib_nearest', 'outMaps.unitTime': '24'} def __getitem__(self, param): @@ -360,6 +360,7 @@ def _define_exec_params(self): self._vars['outMaps.clone'] = exec_conf['OutMaps']['@cloneMap'] interpolation_conf = exec_conf['OutMaps']['Interpolation'] self._vars['interpolation.mode'] = interpolation_conf.get('@mode', self.default_values['interpolation.mode']) + self._vars['interpolation.adw_broadcasting'] = interpolation_conf.get('@adw_broadcasting', False) self._vars['interpolation.rotated_target'] = interpolation_conf.get('@rotated_target', False) if not self._vars['interpolation.dir'] and interpolation_conf.get('@intertableDir'): # get from JSON @@ -412,6 +413,7 @@ def _define_exec_params(self): if exec_conf.get('Aggregation'): self._vars['aggregation.step'] = exec_conf['Aggregation'].get('@step') self._vars['aggregation.type'] = exec_conf['Aggregation'].get('@type') + self._vars['aggregation.halfweights'] = bool(exec_conf['Aggregation'].get('@halfweights')) self._vars['execution.doAggregation'] = bool(self._vars.get('aggregation.step')) \ and bool(self._vars.get('aggregation.type')) diff --git a/src/pyg2p/main/interpolation/__init__.py b/src/pyg2p/main/interpolation/__init__.py index 0f24089..5759d80 100644 --- a/src/pyg2p/main/interpolation/__init__.py +++ b/src/pyg2p/main/interpolation/__init__.py @@ -9,7 +9,7 @@ from . import grib_interpolation_lib from .latlong import LatLong -from .scipy_interpolation_lib import ScipyInterpolation, DEBUG_BILINEAR_INTERPOLATION, \ +from .scipy_interpolation_lib import ScipyInterpolation, DEBUG_BILINEAR_INTERPOLATION, DEBUG_ADW_INTERPOLATION, \ DEBUG_MIN_LAT, DEBUG_MIN_LON, DEBUG_MAX_LAT, DEBUG_MAX_LON from ...exceptions import ApplicationException, NO_INTERTABLE_CREATED @@ -20,9 +20,9 @@ class Interpolator(Loggable): _LOADED_INTERTABLES = {} _prefix = 'I' - scipy_modes_nnear = {'nearest': 1, 'invdist': 4, 'bilinear': 4, 'triangulation': 3, 'bilinear_delaunay': 4} + scipy_modes_nnear = {'nearest': 1, 'invdist': 4, 'adw': 4, 'cdd': 4, 'bilinear': 4, 'triangulation': 3, 'bilinear_delaunay': 4} suffixes = {'grib_nearest': 'grib_nearest', 'grib_invdist': 'grib_invdist', - 'nearest': 'scipy_nearest', 'invdist': 'scipy_invdist', + 'nearest': 'scipy_nearest', 'invdist': 'scipy_invdist', 'adw': 'scipy_adw', 'cdd': 'scipy_cdd', 'bilinear': 'scipy_bilinear', 'triangulation': 'scipy_triangulation', 'bilinear_delaunay': 'scipy_bilinear_delaunay'} _format_intertable = 'tbl{prognum}_{source_file}_{target_size}_{suffix}.npy.gz'.format @@ -31,6 +31,7 @@ def __init__(self, exec_ctx, mv_input): self._mv_grib = mv_input self.interpolate_with_grib = exec_ctx.is_with_grib_interpolation self._mode = exec_ctx.get('interpolation.mode') + self._adw_broadcasting = exec_ctx.get('interpolation.adw_broadcasting') self._source_filename = pyg2p.util.files.filename(exec_ctx.get('input.file')) self._suffix = self.suffixes[self._mode] self._intertable_dirs = exec_ctx.get('interpolation.dirs') @@ -127,7 +128,7 @@ def _read_intertable(self, tbl_fullpath): coeffs = intertable['coeffs'] return indexes[0], indexes[1], indexes[2], indexes[3], indexes[4], indexes[5], coeffs[0], coeffs[1], coeffs[2], coeffs[3] else: - # self._mode in ('invdist', 'nearest', 'bilinear', 'triangulation', 'bilinear_delaunay'): + # self._mode in ('invdist', 'adw', 'cdd', 'nearest', 'bilinear', 'triangulation', 'bilinear_delaunay'): # return indexes and weighted distances (only used with nnear > 1) indexes = intertable['indexes'] coeffs = intertable['coeffs'] @@ -177,11 +178,33 @@ def interpolate_scipy(self, latgrib, longrib, v, grid_id, grid_details=None): else: intertable_id, intertable_name = self._intertable_filename(grid_id) + if DEBUG_ADW_INTERPOLATION: + # to debug create a limited controlled set of input values + # + # np.random.seed(0) + # latgrib = np.random.uniform(low=3, high=11, size=10) + # longrib = np.random.uniform(low=46, high=50, size=10) + # v = np.random.uniform(low=100, high=200, size=10) + # latgrib = np.array([ 7.39050803, 8.72151493, 7.82210701, 7.35906546, 6.38923839, + # 8.1671529, 6.50069769, 10.13418401, 10.70930208, 6.06753215]) + # longrib = np.array([49.16690015, 48.11557968, 48.27217824, 49.70238655, 46.28414423, + # 46.3485172, 46.08087359, 49.33047938, 49.112627, 49.48004859]) + # v = np.array([197.86183422, 179.91585642, 146.14793623, 178.05291763, 111.82744259, + # 163.99210213, 114.33532874, 194.4668917, 152.18483218, 141.466194 ]) + # latgrib = np.array([ 7.39050803, 8.72151493, 7.82210701, 7.35906546]) + # longrib = np.array([49.16690015, 48.11557968, 48.27217824, 49.70238655]) + # v = np.array([100, 133, 166, 200 ]) + latgrib = np.array([ 8, 8, 8, 8]) + longrib = np.array([45, 48.5, 49, 50]) + v = np.array([200, 100, 100, 100 ]) + intertable_id, intertable_name = 'DEBUG_ADW','DEBUG_ADW.npy' + nnear = self.scipy_modes_nnear[self._mode] - if pyg2p.util.files.exists(intertable_name) or pyg2p.util.files.exists(intertable_name + '.gz'): - indexes, weights = self._read_intertable(intertable_name) - result = self._interpolate_scipy_append_mv(v, weights, indexes, nnear) + if (not DEBUG_ADW_INTERPOLATION) and \ + (pyg2p.util.files.exists(intertable_name) or pyg2p.util.files.exists(intertable_name + '.gz')): + indexes, weights = self._read_intertable(intertable_name) + result = self._interpolate_scipy_append_mv(v, weights, indexes, nnear) elif self.create_if_missing: self.intertables_config.check_write() @@ -192,7 +215,7 @@ def interpolate_scipy(self, latgrib, longrib, v, grid_id, grid_details=None): self._log('\nInterpolating table not found\n Id: {}\nWill create file: {}'.format(intertable_id, intertable_name), 'WARN') scipy_interpolation = ScipyInterpolation(longrib, latgrib, grid_details, v.ravel(), nnear, self.mv_out, self._mv_grib, target_is_rotated=self._rotated_target_grid, - parallel=self.parallel, mode=self._mode) + parallel=self.parallel, mode=self._mode, use_broadcasting=self._adw_broadcasting) _, weights, indexes = scipy_interpolation.interpolate(lonefas, latefas) result = self._interpolate_scipy_append_mv(v, weights, indexes, nnear) diff --git a/src/pyg2p/main/interpolation/scipy_interpolation_lib.py b/src/pyg2p/main/interpolation/scipy_interpolation_lib.py index 5c91710..46671af 100644 --- a/src/pyg2p/main/interpolation/scipy_interpolation_lib.py +++ b/src/pyg2p/main/interpolation/scipy_interpolation_lib.py @@ -19,6 +19,7 @@ DEBUG_BILINEAR_INTERPOLATION = False +DEBUG_ADW_INTERPOLATION = False # DEBUG_MIN_LAT = 88 # DEBUG_MIN_LON = -180 # DEBUG_MAX_LAT = 90 @@ -31,10 +32,14 @@ # DEBUG_MIN_LON = -24 # DEBUG_MAX_LAT = 70 # DEBUG_MAX_LON = -22 -DEBUG_MIN_LAT = -10 -DEBUG_MIN_LON = -100 -DEBUG_MAX_LAT = 25 -DEBUG_MAX_LON = -50 +# DEBUG_MIN_LAT = -10 +# DEBUG_MIN_LON = -100 +# DEBUG_MAX_LAT = 25 +# DEBUG_MAX_LON = -50 +DEBUG_MIN_LAT = 7 +DEBUG_MIN_LON = 45 +DEBUG_MAX_LAT = 9 +DEBUG_MAX_LON = 50 #DEBUG_NN = 15410182 @@ -283,7 +288,7 @@ class ScipyInterpolation(object): def __init__(self, longrib, latgrib, grid_details, source_values, nnear, mv_target, mv_source, target_is_rotated=False, parallel=False, - mode='nearest'): + mode='nearest', use_broadcasting = False): stdout.write('Start scipy interpolation: {}\n'.format(now_string())) self.geodetic_info = grid_details self.source_grid_is_rotated = 'rotated' in grid_details.get('gridType') @@ -294,6 +299,7 @@ def __init__(self, longrib, latgrib, grid_details, source_values, nnear, self.latgrib = latgrib self.nnear = nnear self.mode = mode + self.use_broadcasting = use_broadcasting self._mv_target = mv_target self._mv_source = mv_source self.z = source_values @@ -337,22 +343,23 @@ def interpolate(self, target_lons, target_lats): else: if self.mode == 'invdist': # return results, distances, indexes - result, weights, indexes = self._build_weights_invdist(distances, indexes, self.nnear) - else: - if self.mode == 'bilinear' and self.nnear == 4: # bilinear interpolation only supported with nnear = 4 - # BILINEAR INTERPOLATION - result, weights, indexes = self._build_weights_bilinear(distances, indexes, efas_locations, self.nnear) - else: - if self.mode == 'triangulation': - # linear barycentric interpolation on Delaunay triangulation - result, weights, indexes = self._build_weights_triangulation(use_bilinear=False) - else: - if self.mode == 'bilinear_delaunay': - # bilinear interpolation on Delaunay triangulation - result, weights, indexes = self._build_weights_triangulation(use_bilinear=True) - else: - raise ApplicationException.get_exc(INVALID_INTERPOL_METHOD, - f"interpolation method not supported (mode = {self.mode}, nnear = {self.nnear})") + result, weights, indexes = self._build_weights_invdist(distances, indexes, self.nnear) + elif self.mode == 'adw': + result, weights, indexes = self._build_weights_invdist(distances, indexes, self.nnear, adw_type='Shepard', use_broadcasting=self.use_broadcasting) + elif self.mode == 'cdd': + result, weights, indexes = self._build_weights_invdist(distances, indexes, self.nnear, adw_type='CDD', use_broadcasting=self.use_broadcasting) + elif self.mode == 'bilinear' and self.nnear == 4: # bilinear interpolation only supported with nnear = 4 + # BILINEAR INTERPOLATION + result, weights, indexes = self._build_weights_bilinear(distances, indexes, efas_locations, self.nnear) + elif self.mode == 'triangulation': + # linear barycentric interpolation on Delaunay triangulation + result, weights, indexes = self._build_weights_triangulation(use_bilinear=False) + elif self.mode == 'bilinear_delaunay': + # bilinear interpolation on Delaunay triangulation + result, weights, indexes = self._build_weights_triangulation(use_bilinear=True) + else: + raise ApplicationException.get_exc(INVALID_INTERPOL_METHOD, + f"interpolation method not supported (mode = {self.mode}, nnear = {self.nnear})") stdout.write('End scipy interpolation: {}\n'.format(now_string())) @@ -419,8 +426,58 @@ def _build_nn(self, distances, indexes): stdout.flush() return result, idxs - # Invdist Optimized version (using broadcast) - def _build_weights_invdist(self, distances, indexes, nnear): + # # Invdist Optimized version (using broadcast) + # def _build_weights_invdist(self, distances, indexes, nnear): + # z = self.z + # result = mask_it(np.empty((len(distances),) + np.shape(z[0])), self._mv_target, 1) + # weights = empty((len(distances),) + (nnear,)) + # idxs = empty((len(indexes),) + (nnear,), fill_value=z.size, dtype=int) + # num_cells = result.size + # back_char, _ = progress_step_and_backchar(num_cells) + + # stdout.write('Skipping neighbors at distance > {}\n'.format(self.min_upper_bound)) + # stdout.write('{}Building coeffs: {}/{} ({:.2f}%)\n'.format(back_char, 0, 5, 0, 0)) + # stdout.flush() + + # dist_leq_1e_10 = distances[:, 0] <= 1e-10 + # dist_leq_min_upper_bound = np.logical_and(~dist_leq_1e_10, distances[:, 0] <= self.min_upper_bound) + + # # distances <= 1e-10 : take exactly the point, weight = 1 + # onlyfirst_array = np.zeros(nnear) + # onlyfirst_array[0] = 1 + # weights[dist_leq_1e_10] = onlyfirst_array + # idxs[dist_leq_1e_10] = indexes[dist_leq_1e_10] + # result[dist_leq_1e_10] = z[indexes[dist_leq_1e_10][:, 0]] + + # w = np.zeros_like(distances) + # w[dist_leq_min_upper_bound] = 1. / distances[dist_leq_min_upper_bound] ** 2 + # stdout.write('{}Building coeffs: {}/{} ({:.2f}%)\n'.format(back_char, 1, 5, 100/5)) + # stdout.flush() + # sums = np.sum(w[dist_leq_min_upper_bound], axis=1, keepdims=True) + # stdout.write('{}Building coeffs: {}/{} ({:.2f}%)\n'.format(back_char, 2, 5, 2*100/5)) + # stdout.flush() + # weights[dist_leq_min_upper_bound] = w[dist_leq_min_upper_bound] / sums + # stdout.write('{}Building coeffs: {}/{} ({:.2f}%)\n'.format(back_char, 3, 5, 3*100/5)) + # stdout.flush() + # wz = np.einsum('ij,ij->i', weights, z[indexes]) + # stdout.write('{}Building coeffs: {}/{} ({:.2f}%)\n'.format(back_char, 4, 5, 4*100/5)) + # stdout.flush() + # idxs[dist_leq_min_upper_bound] = indexes[dist_leq_min_upper_bound] + # result[dist_leq_min_upper_bound] = wz[dist_leq_min_upper_bound] + # stdout.write('{}Building coeffs: {}/{} (100%)\n'.format(back_char, 5, 5)) + # stdout.flush() + # return result, weights, idxs + + # Inverse distance weights (IDW) interpolation, with optional Angular distance weighting (ADW) factor + # if adw_type == None -> simple IDW + # if adw_type == Shepard -> Shepard 1968 original formulation + def _build_weights_invdist(self, distances, indexes, nnear, adw_type = None, use_broadcasting = False): + if DEBUG_ADW_INTERPOLATION: + self.min_upper_bound = 1000000000 + if DEBUG_BILINEAR_INTERPOLATION: + n_debug=1940 + else: + n_debug=11805340 z = self.z result = mask_it(np.empty((len(distances),) + np.shape(z[0])), self._mv_target, 1) weights = empty((len(distances),) + (nnear,)) @@ -436,7 +493,9 @@ def _build_weights_invdist(self, distances, indexes, nnear): dist_leq_min_upper_bound = np.logical_and(~dist_leq_1e_10, distances[:, 0] <= self.min_upper_bound) # distances <= 1e-10 : take exactly the point, weight = 1 - weights[dist_leq_1e_10] = np.array([1., 0., 0., 0.]) + onlyfirst_array = np.zeros(nnear) + onlyfirst_array[0] = 1 + weights[dist_leq_1e_10] = onlyfirst_array idxs[dist_leq_1e_10] = indexes[dist_leq_1e_10] result[dist_leq_1e_10] = z[indexes[dist_leq_1e_10][:, 0]] @@ -444,19 +503,150 @@ def _build_weights_invdist(self, distances, indexes, nnear): w[dist_leq_min_upper_bound] = 1. / distances[dist_leq_min_upper_bound] ** 2 stdout.write('{}Building coeffs: {}/{} ({:.2f}%)\n'.format(back_char, 1, 5, 100/5)) stdout.flush() + + if (adw_type=='Shepard'): + # initialize weights + weight_directional = np.zeros_like(distances) + + # get "s" vector as the inverse distance with power = 1 (in "w" we have the invdist power 2) + # actually s(d) should be + # 1/d when 0 < d < r'/3 + # (27/4r')*(d/r'-1) when r'/3 < d < r' + # 0 when r' < d + # The orginal method consider a range of 4 to 10 data points e removes distant point using the rule: + # pi*r= 7(N/A) where N id the number of points and A is the area of the largest poligon enclosed by data points + # r'(C^n) = di(n+1) where di is the Point having the minimum distance major than the first n Points + # so that + # r' = r' C^4 = di(5) when n(C) <= 4 + # r' = r when 4 < n(C) <= 10 + # r' = r' C^10 = di(11) when 10 < n(C) + # + # TODO: check if the implementation needs to be updated accordingly + # here we take only the inverse of the distance in "s" + s = np.zeros_like(distances) + s[dist_leq_min_upper_bound] = 1. / distances[dist_leq_min_upper_bound] + + self.lat_inALL = self.target_latsOR.ravel() + self.lon_inALL = self.target_lonsOR.ravel() + + # start_time = time.time() + if not use_broadcasting: + for i in range(nnear): + xj_diff = self.lat_inALL[:, np.newaxis] - self.latgrib[indexes] + yj_diff = self.lon_inALL[:, np.newaxis] - self.longrib[indexes] + + Dj = np.sqrt(xj_diff**2 + yj_diff**2) + # TODO: check here: we could use "distances", but we are actually evaluating the cos funcion on lat and lon values, that + # approximates the real angle... to use "distances" we need to operate in 3d version of the points + # in theory it should be OK to use the solution above, otherwise we can change it to + # Di = distances[i] + # Dj = distances + # and formula cos_theta = [(x-xi)*(x-xj) + (y-yi)*(y-yj) + (z-zi)*(z-zj)] / di*dj + + # cos_theta = [(x-xi)*(x-xj) + (y-yi)*(y-yj)] / di*dj. + #cos_theta = (xi_diff[:,np.newaxis] * xj_diff + yi_diff[:,np.newaxis] * yj_diff) / (Di[:,np.newaxis] * Dj) + # cos_theta = (xj_diff[:,i,np.newaxis] * xj_diff + yj_diff[:,i,np.newaxis] * yj_diff) / (Dj[:,i,np.newaxis] * Dj) + # numerator = np.sum((1 - cos_theta) * s, axis=1) + # denominator = np.sum(s, axis=1) + + cos_theta = (xj_diff[:,i,np.newaxis] * xj_diff + yj_diff[:,i,np.newaxis] * yj_diff) / (Dj[:,i,np.newaxis] * Dj) + # skip when i==j, so that directional weight of i is evaluated on all points j where j!=i + # TODO: tip: since cos_theta = 1 for i==j, to speed up we can skip this passage and apply i!=j only on denominator + cos_theta = cos_theta[:,np.concatenate([np.arange(i), np.arange(i+1, cos_theta.shape[1])])] + sj = s[:,np.concatenate([np.arange(i), np.arange(i+1, s.shape[1])])] + numerator = np.sum((1 - cos_theta) * sj, axis=1) + denominator = np.sum(sj, axis=1) + + weight_directional[dist_leq_min_upper_bound, i] = numerator[dist_leq_min_upper_bound] / denominator[dist_leq_min_upper_bound] + + # DEBUG: test the point at n_debug 11805340=lat 8.025 lon 47.0249999 + if DEBUG_ADW_INTERPOLATION: + print("i: {}".format(i)) + print("cos_theta: {}".format(cos_theta[n_debug])) + print("s: {}".format(s[n_debug])) + print("numerator: {}".format(numerator[n_debug])) + print("denominator: {}".format(denominator[n_debug])) + else: + # All in broadcasting: + # this algorithm uses huge amount of memory and deas not speed up much on standard Virtual Machine + # TODO: check if it is worth to use it on HPC + # Compute xi and yi for all elements + xi = self.latgrib[indexes] + yi = self.longrib[indexes] + # Compute xi_diff and yi_diff for all elements + xi_diff = self.lat_inALL[:, np.newaxis] - xi + yi_diff = self.lon_inALL[:, np.newaxis] - yi + # Compute Di for all elements + Di = np.sqrt(xi_diff**2 + yi_diff**2) + # Compute cos_theta for all elements + cos_theta = (xi_diff[:, :, np.newaxis] * xi_diff[:, np.newaxis, :] + yi_diff[:, :, np.newaxis] * yi_diff[:, np.newaxis, :]) / (Di[:, :, np.newaxis] * Di[:, np.newaxis, :]) + # skip when i==j, so that directional weight of i is evaluated on all points j where j!=i + # TODO: tip: since cos_theta = 1 for i==j, to speed up we can skip this passage and apply i!=j only on denominator + # Delete the diagonal elements from cos_theta + # Reshape cos_theta to (n, nnear, nnear-1) + n = cos_theta.shape[0] + m = cos_theta.shape[1] + strided = np.lib.stride_tricks.as_strided + s0,s1,s2 = cos_theta[:].strides + cos_theta = strided(cos_theta.ravel()[1:], shape=(n,m-1,m), strides=(s0,s1+s2,s2)).reshape(n,m,-1) + sj = np.tile(s[:, np.newaxis, :], (1, 4, 1)) + s0,s1,s2 = sj[:].strides + sj = strided(sj.ravel()[1:], shape=(n,m-1,m), strides=(s0,s1+s2,s2)).reshape(n,m,-1) + + numerator = np.sum((1 - cos_theta) * sj, axis=2) + denominator = np.sum(sj, axis=2) + # Compute weight_directional for all elements + weight_directional[dist_leq_min_upper_bound, :] = numerator[dist_leq_min_upper_bound, :] / denominator[dist_leq_min_upper_bound, :] + + # end_time = time.time() + # elapsed_time = end_time - start_time + # print(f"Elapsed time for computation: {elapsed_time:.6f} seconds") + + # update weights with directional ones + if DEBUG_ADW_INTERPOLATION: + print("weight_directional: {}".format(weight_directional[n_debug])) + print("w (before adding angular weights): {}".format(w[n_debug])) + w = np.multiply(w,1+weight_directional) + if DEBUG_ADW_INTERPOLATION: + print("w (after adding angular weights): {}".format(w[n_debug])) + elif (adw_type=='CDD'): + raise ApplicationException.get_exc(INVALID_INTERPOL_METHOD, + f"interpolation method not implemented yet (mode = {self.mode}, nnear = {self.nnear}, adw_type = {adw_type})") + elif (adw_type!=None): + raise ApplicationException.get_exc(INVALID_INTERPOL_METHOD, + f"interpolation method not supported (mode = {self.mode}, nnear = {self.nnear}, adw_type = {adw_type})") + + #normalize weights sums = np.sum(w[dist_leq_min_upper_bound], axis=1, keepdims=True) stdout.write('{}Building coeffs: {}/{} ({:.2f}%)\n'.format(back_char, 2, 5, 2*100/5)) stdout.flush() weights[dist_leq_min_upper_bound] = w[dist_leq_min_upper_bound] / sums + if DEBUG_ADW_INTERPOLATION: + dist_smalltest = distances[:, 0] <= 10000 + onlyfirst_array_test = np.zeros(nnear) + onlyfirst_array_test[0] = 1000 + weights[dist_smalltest]=onlyfirst_array_test + print("weights (after normalization): {}".format(weights[n_debug])) stdout.write('{}Building coeffs: {}/{} ({:.2f}%)\n'.format(back_char, 3, 5, 3*100/5)) stdout.flush() + wz = np.einsum('ij,ij->i', weights, z[indexes]) stdout.write('{}Building coeffs: {}/{} ({:.2f}%)\n'.format(back_char, 4, 5, 4*100/5)) stdout.flush() idxs[dist_leq_min_upper_bound] = indexes[dist_leq_min_upper_bound] result[dist_leq_min_upper_bound] = wz[dist_leq_min_upper_bound] + if DEBUG_ADW_INTERPOLATION: + print("result: {}".format(result[n_debug])) + print("lat: {}".format(self.lat_inALL[n_debug])) + print("lon: {}".format(self.lon_inALL[n_debug])) + print("idxs: {}".format(idxs[n_debug])) + print("distances: {}".format(distances[n_debug])) + print("latgrib: {}".format(self.latgrib[idxs[n_debug]])) + print("longrib: {}".format(self.longrib[idxs[n_debug]])) + print("value: {}".format(self.z[idxs[n_debug]])) stdout.write('{}Building coeffs: {}/{} (100%)\n'.format(back_char, 5, 5)) stdout.flush() + return result, weights, idxs # take additional points from the KDTree close to the current point and replace the wrong ones with a new ones @@ -742,7 +932,7 @@ def _build_weights_triangulation(self, use_bilinear = False): longrib_max = normalized_longrib.max() longrib_min = normalized_longrib.min() - # evaluate an approx_grib_resolution by using 10 times the first longidure values + # evaluate an approx_grib_resolution by using 10 times the first longitude values # to check if the whole globe is covered approx_grib_resolution = abs(normalized_longrib[0]-normalized_longrib[1])*1.5 is_global_map = (360-(longrib_max-longrib_min)) 0 and not self._second_t_res: - if self._aggregation_halfweights: - iter_start = self._start - self._aggregation_step - iter_end = self._end - self._aggregation_step + 1 - else: - iter_start = self._start - self._aggregation_step + 1 - elif self._second_t_res: + if self._start == 0 or self._second_t_res: iter_start = self._start + iter_end = self._end - self._aggregation_step + 2 else: - iter_start = 0 + iter_start = self._start - self._aggregation_step + iter_end = self._end - self._aggregation_step + 1 for iter_ in range(iter_start, iter_end, self._aggregation_step): - if self._aggregation_halfweights == False and self._start == 0: - iter_from = iter_ + 1 + if self._aggregation_halfweights or self._start == 0: + iter_from = iter_ iter_to = iter_ + self._aggregation_step + 1 else: - iter_from = iter_ - iter_to = iter_ + self._aggregation_step - if self._aggregation_halfweights: - iter_to += 1 + iter_from = iter_ + 1 + iter_to = iter_ + self._aggregation_step + 1 temp_sum = np.zeros(shape_iter) v_ord_keys = list(v_ord.keys()) diff --git a/src/pyg2p/main/readers/netcdf.py b/src/pyg2p/main/readers/netcdf.py index 7190458..2d4ef8a 100644 --- a/src/pyg2p/main/readers/netcdf.py +++ b/src/pyg2p/main/readers/netcdf.py @@ -14,8 +14,9 @@ def __init__(self, nc_map): self._log(f'Reading {nc_map}') nc_map = nc_map.as_posix() if isinstance(nc_map, Path) else nc_map self._dataset = Dataset(nc_map) - self._var_name = list(self._dataset.variables.items())[-1][0] # get the last variable name - self._rows, self._cols = self._dataset.variables[self._var_name].shape # just use shape to know rows and cols... + self.var_name = self.find_main_var(nc_map) + + self._rows, self._cols = self._dataset.variables[self.var_name].shape # just use shape to know rows and cols... if ('lon' in list(self._dataset.variables)): self.label_lat = 'lat' self.label_lon = 'lon' @@ -26,10 +27,7 @@ def __init__(self, nc_map): self._origX = self._dataset.variables[self.label_lon][:].min() self._origY = self._dataset.variables[self.label_lat][:].min() self._pxlW = self._dataset.variables[self.label_lon][1]-self._dataset.variables[self.label_lon][0] - self._pxlH = self._dataset.variables[self.label_lat][1]-self._dataset.variables[self.label_lat][0] - - self.var_name = self.find_main_var(nc_map) - + self._pxlH = self._dataset.variables[self.label_lat][1]-self._dataset.variables[self.label_lat][0] self.lat_min = self._dataset.variables[self.label_lat][:].min() self.lon_min = self._dataset.variables[self.label_lon][:].min() self.lat_max = self._dataset.variables[self.label_lat][:].max() @@ -52,8 +50,8 @@ def values(self): return data def get_lat_lon_values(self): - lats = np.reshape(self._dataset.variables[self.label_lat][:],(-1,1))*np.ones(self._dataset.variables[self._var_name].shape) - lons = self._dataset.variables[self.label_lon][:]*np.ones(self._dataset.variables[self._var_name].shape) + lats = np.reshape(self._dataset.variables[self.label_lat][:],(-1,1))*np.ones(self._dataset.variables[self.var_name].shape) + lons = self._dataset.variables[self.label_lon][:]*np.ones(self._dataset.variables[self.var_name].shape) return lats.data, lons.data def get_lat_values(self): diff --git a/tests/test_interpolation.py b/tests/test_interpolation.py index 5d990c8..67009b2 100644 --- a/tests/test_interpolation.py +++ b/tests/test_interpolation.py @@ -45,6 +45,27 @@ def test_interpolation_create_scipy_invdist(self): assert shape_target == values_resampled.shape os.unlink('tests/data/tbl_pf10tp_550800_scipy_invdist.npy.gz') + @pytest.mark.slow + def test_interpolation_create_scipy_adw(self): + d = deepcopy(config_dict) + d['interpolation.create'] = True + d['interpolation.parallel'] = True + d['interpolation.mode'] = 'adw' + d['interpolation.adw_broadcasting'] = True + file = d['input.file'] + reader = GRIBReader(file) + messages = reader.select_messages(shortName='2t') + grid_id = messages.grid_id + missing = messages.missing_value + ctx = MockedExecutionContext(d, False) + interpolator = Interpolator(ctx, missing) + values_in = messages.values_first_or_single_res[messages.first_step_range] + lats, lons = messages.latlons + values_resampled = interpolator.interpolate_scipy(lats, lons, values_in, grid_id, messages.grid_details) + shape_target = PCRasterReader(d['interpolation.latMap']).values.shape + assert shape_target == values_resampled.shape + os.unlink('tests/data/tbl_pf10tp_550800_scipy_adw.npy.gz') + @pytest.mark.slow def test_interpolation_create_scipy_bilinear(self): d = deepcopy(config_dict)