# -*- coding: utf-8 -*-
"""
Contains objects for interfacing ``gridwxcomp`` with gdal command line tools
used for spatial interpolation of scattered point data.
"""
import numpy as np
import os
import pandas as pd
from pathlib import Path
import subprocess
import xml.dom.minidom
import xml.etree.cElementTree
from .spatial import zonal_stats, calc_pt_error, make_points_file
from .plot import station_bar_plot
from .util import read_config, reproject_crs_for_bounds
[docs]class InterpGdal(object):
"""
Usage of gdal command line tools within ``gridwxcomp``, currently
utilizes the `GDAL grid <https://www.gdal.org/gdal_grid.html>`__
command line tool.
Arguments:
summary_csv_path (str): path to [var]_summary_comp CSV file created
by :func:`gridwxcomp.calc_bias_ratios.calc_bias_ratios` containing
point bias ratios and statistics, station coordinates, etc.
config_path (str): path to the input configuration file with control
parameters such as spatial coordinate reference system, resolution,
and the bounds for spatial interpolation.
Attributes:
summary_csv_path (:obj:`pathlib.Path`): absolute path object to
input ``summary_csv_path``.
config_path (:obj:`pathlib.Path`): absolute path to configuration
input file.
config_dict (dict): dictionary of items read from the configuration
file.
layers (list): list of layers in ``summary_csv_path`` to interpolate
e.g. when using :meth:`InterpGdal.gdal_grid` with ``layer="all"``
defaults to :attr:`InterpGdal.default_layers`.
grid_bounds (tuple or None): default None. Extent for interpolation
raster in order (min long, max long, min lat, max lat).
interp_meth (str): default 'invdist'. gdal_grid interpolation method.
interped_rasters (list): empty list that is appended with
:obj:`pathlib.Path` objects to interpolated rasters after using
:meth:`InterpGdal.gdal_grid`.
params (dict or None): default None. After :meth:`InterpGdal.gdal_grid`
``params`` is updated with the last used interpolation parameters
in the form of a dictionary with parameter names as keys.
Example:
The :class:`InterpGdal` class is useful for experimenting with multiple
interpolation algorithms provided by gdal which are optimized and
often sensitive to multiple parameters. We can use the object
to loop over a range of parameter combinations to test how algorithms
perform, we might pick a single layer to test, in this case the
growing season bias ratios between station and gridded reference
evapotranspiration (etr_mm). Below is a routine to conduct a
basic sensitivity analysis of the power and smooth parameters of the
inverse distance to a power interpolation method
>>> from gridwxcomp.interpgdal import InterpGdal
>>> import os
>>> # create instance variable from summary csv
>>> summary_file = 'PATH/TO/etr_mm_summary_comp.csv'
>>> config_file = 'PATH/TO/gridwxcomp_config.ini'
>>> # create a InterpGdal instance
>>> test = InterpGdal(summary_file)
>>> layer = 'grow_mean' # could be a list
>>> # run inverse distance interpolation with different params
>>> for p in range(1,10):
>>> for s in [0, 0.5, 1, 1.5, 2]:
>>> # build output directory based on parameter sets
>>> out_dir = os.path.join('spatial', 'invdist',
>>> 'power={}_smooth={}'.format(p, s))
>>> params = {'power': p, 'smooth': s}
>>> test.gdal_grid(out_dir=out_dir, layer=layer, params=params)
Note, we did not assign the interpolation method 'invdist' because it
is the default. To use another interpolation method we would
assign the ``interp_meth`` kwarg to :meth:`InterpGdal.gdal_grid`.
Similarly, we could experiment with other parameters which all can be
found in the class attribute :attr:`InterpGdal.default_params`. The
instance variable :attr:`InterpGdal.params` can also be used to save
metadata on parameters used for each run.
"""
# gdal_grid interpolation methods
interp_methods = ('average',
'invdist',
'invdistnn',
'linear',
'nearest')
"""
interp_methods (tuple): gdal_grid interpolation algorithms.
"""
default_layers = ('Jan',
'Feb',
'Mar',
'Apr',
'May',
'Jun',
'Jul',
'Aug',
'Sep',
'Oct',
'Nov',
'Dec',
'grow',
'annual',
'summer')
"""
default_layers (tuple): Layers to interpolate created by
:func:`gridwxcomp.calc_bias_ratios.calc_bias_ratios` and then
:func:`gridwxcomp.spatial.make_points_file`, e.g. "Jan" in the shapefile
which is "Jan_mean" found in ``summary_csv_path``.
"""
# interp params, method key, param dic as values
default_params = {
'invdist': {
'power': 2,
'smoothing': 0,
'radius1': 0,
'radius2': 0,
'angle': 0,
'max_points': 0,
'min_points': 0,
'nodata': -999
},
'invdistnn': {
'power': 2,
'smoothing': 0,
'radius': 10,
'max_points': 12,
'min_points': 0,
'nodata': -999
},
'average': {
'radius1': 0,
'radius2': 0,
'angle': 0,
'min_points': 0,
'nodata': -999
},
'linear': {
'radius': -1,
'nodata': -999
},
'nearest': {
'radius1': 0,
'radius2': 0,
'angle': 0,
'nodata': -999
}
}
"""
default_params (dict): Dictionary with default parameters for each
interpolation algorithm, slightly modified from GDAL defaults.
Keys are interpolation method names, keys are dictionaries with
parameter names keys and corresponding values.
"""
def __init__(self, summary_csv_path, config_path):
if not Path(summary_csv_path).is_file():
raise FileNotFoundError(
'Summary CSV file: {} not found!'.format(
Path(summary_csv_path).absolute())
)
if not Path(config_path).is_file():
raise FileNotFoundError(
'Configuration file: {} not found!'.format(
Path(config_path).absolute())
)
self.summary_csv_path = Path(summary_csv_path).absolute()
self.config_path = Path(config_path).absolute()
self.config_dict = read_config(config_path)
self.layers = list(self.default_layers) # mutable instance attr.
self.grid_bounds = None
self.interp_meth = 'invdist'
self.interped_rasters = [] # appended with Path objects
self.params = None # to hold last used interp. parameters as dict
[docs] def gdal_grid(self, layer='all', out_dir='',
interp_meth='invdist', params=None, scale_factor=1, z_stats=True,
res_plot=True, grid_id_name='GRID_ID', options=None):
"""
Run gdal_grid command line tool to interpolate point ratios.
For further information on theinterpolation algorithms including
their function, parameters, and options see
`gdal_grid <https://www.gdal.org/gdal_grid.html>`__.
Arguments:
layer (str, list): default 'all'. Name of summary file column
to interpolate, e.g. 'Jan_mean', or list of names. If 'all'
use all variables in mutable instance attribute "layers".
out_dir (str): default ''. Output directory to save rasters and
zonal stats CSV, always appended to the root dir of the
input summary CSV parent path that contains point ratios.
interp_meth (str): default 'invdist'. gdal interpolation algorithm
params (dict, str, or None): default None. Parameters for
interpolation algorithm. See examples for format rules.
bounds (tuple): default None. Tuple of bounding coordinates
in the following order (min long, max long, min lat, max lat)
which need to be in decimal degrees or meters.
scale_factor (float, int): default 1. Scaling factor to apply to
original grid resolution to create resampling resolution. If
scale_factor = 0.1, the interpolation resolution will be
one tenth of the grid resolution listed in the config file.
z_stats (bool): default True. Calculate zonal means of
interpolated surface to gridded cells in fishnet and save to a
CSV file. The CSV file will be saved to the same directory as
the interpolated raster file(s).
res_plot (bool): default True. Make bar plot for residual (error)
between interpolated and station value for ``layer``.
options (str or None): default None. Extra command line options for
gdal_grid spatial interpolation.
Returns:
None
Examples:
The default interpolation algorithm 'invdist' or inverse distance
weighting to a power to interpolate bias ratios in a summary CSV
file that was first created by :mod:`gridwxcomp.calc_bias_ratios`.
The default option will interpolate all layers in
:obj:`InterpGdal.default_layers` and calculate zonal statistics for
all layers. The interpolation resolution and projection are
specified by the user in the configuration file which was used to
create the :obj:`gridwxcomp.InterpGdal` object, however if they are
not specified there, the fallback used is Lambert Conformal Conic
projected coordinate reference system and 1000 m resolution. This
example limits the interpolation to two layers, growing season and
annual mean bias ratios,
>>> from gridwxcomp.interpgdal import InterpGdal
>>> summary_file = 'PATH/TO/[var]_summary_comp.csv'
>>> out_dir = 'default_params'
>>> layers = ['grow_mean', 'annual_mean']
>>>
>>> # create a InterpGdal instance
>>> test = InterpGdal(summary_file)
>>> # run inverse distance interpolation
>>> test.gdal_grid(out_dir=out_dir, layer=layers)
Note, zonal statistics to gridded cells and interpolated residual
plots are computed by default. A gridded fishnet must have been
previously created using :func:`gridwxcomp.spatial.make_grid`.
After running the code above the following files will be created
in the 'default_params' directory which will be built in the same
location as the input summary CSV::
default_params/
├── annual_mean.tiff
├── annual_mean_ESRI_102004.tiff
├── etr_mm_summary_comp_all_yrs.csv
├── etr_mm_summary_pts_wgs84.cpg
├── etr_mm_summary_pts_wgs84.dbf
├── etr_mm_summary_pts_wgs84.prj
├── etr_mm_summary_pts_wgs84.shp
├── etr_mm_summary_pts_wgs84.shx
├── etr_mm_summary_pts_ESRI_102004.cpg
├── etr_mm_summary_pts_ESRI_102004.dbf
├── etr_mm_summary_pts_ESRI_102004.prj
├── etr_mm_summary_pts_ESRI_102004.shp
├── etr_mm_summary_pts_ESRI_102004.shx
├── zonal_stats.csv
├── grow_mean.tiff
├── grow_mean_ESRI_102004.tiff
└── residual_plots/
├── annual_res.html
└── grow_res.html
GeoTiff interpolated raster files are now created for the select
layers. The file "zonal_stats.csv" contains grid_id as an index and
each layer zonal mean as columns. For example,
========== ================== ==================
grid_id grow_mean annual_mean
========== ================== ==================
511747 0.9650671287940088 0.9078723876243023
510361 0.9658465063428492 0.9097255715561022
508975 0.9667075970344162 0.9117676407214926
========== ================== ==================
There are several :class:`InterpGdal` instance attributes that
may be useful, for example to see the parameters that were used
for the last call to :meth:`InterpGdal.gdal_grid`
>>> test.params
{'power': 2,
'smoothing': 0,
'radius1': 0,
'radius2': 0,
'angle': 0,
'max_points': 0,
'min_points': 0,
'nodata': -999}
Or to find the paths to the interpolated raster files that
have been created by the instance (all), the "interped_rasters"
instance attribute is a list of all :obj:`pathlib.Path` objects
of absolute paths of raster files. To get them as strings,
>>> list(map(str, test.interped_rasters))
['PATH/TO/grow_mean.tiff',
'PATH/TO/annual_mean.tiff']
Similary, the raster extent that was used and will be used again for
any subsequent calls of :meth:`InterpGdal.gdal_grid` can be
retrieved by
>>> test.grid_bounds
(-111.74583329966664,
-108.74583330033335,
38.21250000003333,
40.462499999966674)
Raises:
KeyError: if interp_meth is not a valid gdal_grid interpolation
algorithm name.
"""
cwd = os.getcwd()
# out_dir as subdir from dir with summary CSV
out_dir = (Path(self.summary_csv_path).parent/Path(out_dir)).resolve()
if not out_dir.is_dir():
print('{}\nDoes not exist, creating directory'.format(str(out_dir)))
out_dir.mkdir(parents=True, exist_ok=True)
source_file = Path(self.summary_csv_path).name
source = source_file.replace('.csv', '_tmp')
if interp_meth not in InterpGdal.interp_methods:
err_msg = '{} not a valid interpolation method'.format(interp_meth)
raise KeyError(err_msg)
self.interp_meth = interp_meth
# look up default parameters for interpolation method
if not params:
params = InterpGdal.default_params.get(self.interp_meth)
# avoid zero NA values for zonal_stats, set default NA rep to -999
if not params.get('nodata'):
params['nodata'] = -999
# update instance parameters for later reference
self.params = params
# parameters to command line input str :name=value:name=value ...
param_str = ':'+':'.join(
'{}={}'.format(key, val) for (key, val) in params.items())
# get boundary and projection info
reproj_bounds = reproject_crs_for_bounds(
self.config_dict.get('input_bounds'),
self.config_dict.get('interpolation_resolution'),
self.config_dict.get('input_data_projection'),
self.config_dict.get('interpolation_projection'),
self.config_dict.get('interpolation_resolution_decimals'))
xmin = reproj_bounds['xmin']
xmax = reproj_bounds['xmax']
ymin = reproj_bounds['ymin']
ymax = reproj_bounds['ymax']
grid_res = self.config_dict['interpolation_resolution']
# create the points shapefile and construct path to the correct one in LCC projection
make_points_file(
self.summary_csv_path,
self.config_path,
grid_id_name=grid_id_name)
path_root = os.path.split(self.summary_csv_path)[0]
file_name = os.path.split(self.summary_csv_path)[1]
var_name = file_name.split('_summ')[0]
spatial_dir = os.path.join(path_root, 'spatial')
crs_proj = self.config_dict.get('interpolation_projection')
reproj_name = crs_proj.replace(':', '_')
reproj_shapefile_path = os.path.join(
spatial_dir, '{v}_summary_pts_{c}.shp'.format(
v=var_name, c=reproj_name))
# to parse options, like --config GDAL_NUM_THREADS update here
if not options:
options = ''
def _run_gdal_grid(layer):
"""reuse if running multiple layers"""
existing_layers = pd.read_csv(self.summary_csv_path).columns
if layer in self.default_layers and f'{layer}_mean' not in existing_layers:
print('\nError: {} does not exist in input CSV:\n {}'.format(
f'{layer}_mean', self.summary_csv_path), '\nSkipping interpolation.')
return
elif layer not in self.default_layers and layer not in existing_layers:
print('\nError: {} does not exist in input CSV:\n {}'.format(
f'{layer}', self.summary_csv_path), '\nSkipping interpolation.')
return
# move to out_dir to run gdal command
os.chdir(out_dir)
reproj_tiff_file = '{}_{}.tiff'.format(
layer, reproj_name)
# always in WGS84 to match grid
resampled_tiff_file = '{}.tiff'.format(layer)
# add raster path to instance if not already there (overwritten)
out_file = out_dir.joinpath(reproj_tiff_file)
# print message to console/logging about interpolation
grid_var = Path(self.summary_csv_path).name.split('_summ')[0]
_interp_msg(
grid_var, layer, self.interp_meth,
(scale_factor * grid_res), out_file)
# build command line arguments
cmd = (f'gdal_grid -l {var_name}_summary_pts_{reproj_name} '
f'-zfield {layer} -a {interp_meth}{param_str} '
f'-txe {xmin} {xmax} -tye {ymax} {ymin} '
f'-tr {grid_res * scale_factor} {grid_res * scale_factor} '
f'-ot Float32 -of GTiff {reproj_shapefile_path} '
f'{reproj_tiff_file}')
# run gdal_grid with arguments, x-platform
p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
out, err = p.communicate()
if err:
print(err)
else:
if out_file not in self.interped_rasters:
self.interped_rasters.append(out_file)
p.stdout.close()
p.stderr.close()
# Resample using gdal warp, to the grid extent using bilinear method
# always in WGS 84 CRS
grid_extent = self.config_dict.get('input_bounds')
# resample resolution in decimal degrees
resample_res = self.config_dict.get('output_data_resolution')
cmd = (f'gdalwarp -overwrite -r bilinear '
f'-te {grid_extent["xmin"]} {grid_extent["ymin"]} '
f'{grid_extent["xmax"]} {grid_extent["ymax"]} '
f'-t_srs EPSG:4326 -tr {resample_res} {resample_res} '
f'{reproj_tiff_file} {resampled_tiff_file}')
p = subprocess.Popen(
cmd, shell=True, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
out, err = p.communicate()
if err:
print(err)
p.stdout.close()
p.stderr.close()
# Change directory back to CWD
os.chdir(cwd)
# calculate interpolated values and error at stations
# only calc residuals for mean bias ratios, i.e. not std dev, etc.
if layer in InterpGdal.default_layers:
calc_pt_error(
self.summary_csv_path, self.config_path, out_dir, layer,
grid_var, grid_id_name=grid_id_name)
else:
# delete tmp summary csv used in interpgdal _make_pt_vrt method
# normally deleted in calc_pt_error
tmp_csv = str(self.summary_csv_path).replace('.csv', '_tmp.csv')
if Path(tmp_csv).resolve().is_file():
Path(tmp_csv).resolve().unlink()
# zonal means extracted to gridded dataset cell index
if z_stats:
zonal_stats(
self.summary_csv_path,
out_dir.joinpath(resampled_tiff_file),
grid_id_name=grid_id_name)
# residual (error) bar plot, only for mean bias ratios
if res_plot and layer in InterpGdal.default_layers:
y_label = 'residual (interpolated minus station value)'
title = 'layer: {} algorithm: {} (gdal_grid) res: {:.0f} (m).'.\
format(layer, self.interp_meth, grid_res)
res_plot_dir = Path(out_dir)/'residual_plots'
subtitle = 'parameters: {}'.format(params)
# residual name has _res suffix
layer_name = f"{layer}_res"
source_file = Path(out_dir)/Path(self.summary_csv_path).name
station_bar_plot(
source_file, layer_name, out_dir=res_plot_dir,
y_label=y_label, title=title, subtitle=subtitle)
# run interpolation and zonal statistics depending on layer kwarg
if layer == 'all': # potential for multiprocessing
for item in self.layers:
_run_gdal_grid(item)
# run single field
elif isinstance(layer, str):
_run_gdal_grid(layer)
# run select list or tuple of layers
elif isinstance(layer, (list, tuple)):
for item in layer:
_run_gdal_grid(item)
def _prettify(elem):
"""
Return an indented, multiline XML string for a XML element tree.
"""
rough_string = xml.etree.cElementTree.tostring(elem, 'utf-8', method='xml')
reparsed = xml.dom.minidom.parseString(rough_string)
return reparsed.toprettyxml(indent=' ')
def _interp_msg(grid_var, layer, function, res, out_file):
print(
'\nInterpolating {g} point bias ratios for: {t}\n'.format(g=grid_var, t=layer),
'Using the "{}" method\n'.format(function),
'Resolution (pixel size) of output raster: {} meters'.format(res)
)
print('GeoTIFF raster will be saved to: \n', os.path.abspath(out_file))