Source code for w4h.mapping

"""The Mapping module contains the functions used for geospatial analysis throughout the package.
This includes some input/output as well as functions to make manipulatin of geospatial data more simple
"""

import datetime
import inspect
import json
import pathlib
import os
import warnings

import rioxarray as rxr
import xarray as xr
import geopandas as gpd
import pandas as pd
import numpy as np
import pyproj
import shapely
from owslib.wcs import WebCoverageService
from shapely.geometry import Point
from urllib.request import urlopen
from rasterio import MemoryFile

import w4h

from w4h import logger_function, verbose_print

lidarURL = r'https://data.isgs.illinois.edu/arcgis/services/Elevation/IL_Statewide_Lidar_DEM_WGS/ImageServer/WCSServer?request=GetCapabilities&service=WCS'


# Align and coregister rasters

[docs]
def align_rasters(grids_unaligned=None, model_grid=None,
                  no_data_val_grid=0, verbose=False, log=False):
    """Reprojects two rasters and aligns their pixels

    Parameters
    ----------
    grids_unaligned : list or xarray.DataArray
        Contains a list of grids or one unaligned grid
    model_grid : xarray.DataArray
        Contains model grid
    no_data_val_grid : int, default=0
        Sets value of no data pixels
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    alignedGrids : list or xarray.DataArray
        Contains aligned grids
    """
    
    if grids_unaligned is None:
        grids_unaligned = [w4h.get_resources()['surf_elev'], w4h.get_resources()['bedrock_elev']]
    if model_grid is None:
        model_grid = w4h.get_resources()['model_grid']

    logger_function(log, locals(), inspect.currentframe().f_code.co_name)
    if verbose:
        verbose_print(align_rasters, locals(), exclude_params=['grids_unaligned', 'model_grid'])
    if isinstance(grids_unaligned, (tuple, list)):
        alignedGrids = []
        for g in grids_unaligned:

            alignedGrid = g.rio.reproject_match(model_grid)

            try:
                no_data_val_grid = alignedGrid.attrs['_FillValue'] #Extract from dataset itself
            except:
                pass
            alignedGrid = alignedGrid.where(alignedGrid != no_data_val_grid)  #Replace no data values with NaNs
            alignedGrids.append(alignedGrid)
    else:
        alignedGrid = grids_unaligned.rio.reproject_match(model_grid)

        try:
            noDataVal = alignedGrid.attrs['_FillValue'] #Extract from dataset itself
        except:
            pass

        alignedGrids = alignedGrid.where(alignedGrid != noDataVal, other=np.nan)  #Replace no data values with NaNs
    
    return alignedGrids


# Convert coords in columns to geometry in geopandas dataframe

[docs]
def coords2geometry(df_no_geometry, xcol='LONGITUDE', ycol='LATITUDE', zcol='ELEV_FT', input_coords_crs='EPSG:4269', output_crs='EPSG:5070', use_z=False, wkt_col='WKT', geometry_source='coords', verbose=False, log=False):
    """Adds geometry to points with xy coordinates in the specified coordinate reference system.

    Parameters
    ----------
    df_no_geometry : pandas.Dataframe
        a Pandas dataframe containing points
    xcol : str, default='LONGITUDE'
        Name of column holding x coordinate data in df_no_geometry
    ycol : str, default='LATITUDE'
        Name of column holding y coordinate data in df_no_geometry
    zcol : str, default='ELEV_FT'
        Name of column holding z coordinate data in df_no_geometry
    input_coords_crs : str, default='EPSG:4269'
        Name of crs used for geometry
    use_z : bool, default=False
        Whether to use z column in calculation
    geometry_source : str {'coords', 'wkt', 'geometry'}
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    gdf : geopandas.GeoDataFrame
        Geopandas dataframe with points and their geometry values

    """
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)

    if verbose:
        verbose_print(coords2geometry, locals(), exclude_params=['df_no_geometry'])

    if isinstance(df_no_geometry, gpd.GeoDataFrame):
        gdf = df_no_geometry
    else:
        wktList = ['wkt', 'well known text', 'wellknowntext', 'well_known_text', 'w']
        coords_list = ['coords', 'coordinates', 'xycoords', 'xy', 'xyz', 'xyzcoords', 'xycoordinates', 'xyzcoordinates', 'c']
        geometryList = ['geometry', 'geom', 'geo', 'g']
        if geometry_source.lower() in wktList:
            from shapely import wkt
            df_no_geometry['geometry'] = df_no_geometry[wkt_col].apply(wkt.loads)
            df_no_geometry.drop(wkt_col, axis=1, inplace=True) #Drop WKT column
            geometry = df_no_geometry['geometry']
        elif geometry_source.lower() in coords_list:#coords = pd.concat([y, x], axis=1)
            x = np.stack(df_no_geometry[xcol])
            y = np.stack(df_no_geometry[ycol])
            if use_z:
                z = np.stack(df_no_geometry[zcol])
                geometry = gpd.points_from_xy(x, y, z=z, crs=input_coords_crs)
            else:
                geometry = gpd.points_from_xy(x, y, crs=input_coords_crs)
        elif geometry_source.lower() in geometryList:
            geometry = df_no_geometry['geometry']
        else:
            warnings.warn(message=f"""The parameter geometry_source={geometry_source} is not recognized.
                        Should be one of 'coords' (if x, y (and/or z) columns with coordintes used), 
                        'wkt' (if column with wkt string used), or 
                        'geometry' (if column with shapely geometry objects used, as with a GeoDataFrame)""")
            
        gdf = gpd.GeoDataFrame(df_no_geometry, geometry=geometry, crs=input_coords_crs).to_crs(output_crs)
    return gdf



# Clip a geodataframe to a study area

[docs]
def clip_gdf2study_area(study_area, gdf, log=False, verbose=False):
    """Clips dataframe to only include things within study area.

    Parameters
    ----------
    study_area : geopandas.GeoDataFrame
        Inputs study area polygon
    gdf : geopandas.GeoDataFrame
        Inputs point data
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    gdfClip : geopandas.GeoDataFrame
        Contains only points within the study area
    
    """
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)

    if verbose:
        verbose_print(clip_gdf2study_area, locals(), exclude_params=['study_area', 'gdf'])

    if study_area is None:
        return gdf
    else:
        studyArea_proj = study_area.to_crs(gdf.crs).copy()
        gdfClip = gpd.clip(gdf, studyArea_proj) #Easier to project just study area to ensure data fit
        gdfClip = gdfClip.reset_index(drop=True) #Reset index
    
    return gdfClip



# Get drift and layer thickness, given a surface and bedrock grid

[docs]
def get_drift_thick(surface_elev=None, bedrock_elev=None, layers=9, plot=False, verbose=False, log=False):
    """Finds the distance from surface_elev to bedrock_elev and then divides by number of layers to get layer thickness.

    Parameters
    ----------
    surface_elev : rioxarray.DataArray
        array holding surface elevation
    bedrock_elev : rioxarray.DataArray
        array holding bedrock elevation
    layers : int, default=9
        number of layers needed to calculate thickness for
    plot : bool, default=False
        tells function to either plot the data or not

    Returns
    -------
    driftThick : rioxarray.DataArray
        Contains data array containing depth to bedrock at each point
    layerThick : rioxarray.DataArray
        Contains data array with layer thickness at each point

    """
    
    if surface_elev is None:
        surface_elev = w4h.get_resources()['surf_elev']
    if bedrock_elev is None:
        bedrock_elev = w4h.get_resources()['bedrock_elev']
    
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)
    if verbose:
        verbose_print(get_drift_thick, locals(), exclude_params=['surface_elev', 'bedrock_elev'])
    xr.set_options(keep_attrs=True)

    driftThick = surface_elev - bedrock_elev
    driftThick = driftThick.clip(0,max=5000,keep_attrs=True)
    if plot:
        import matplotlib.pyplot as plt
        fig, ax = plt.subplots()
        maxThick = np.nanmax(driftThick)
        if maxThick > 550:
            maxThick = 550
        dtPlot = driftThick.plot(vmin=0, vmax=maxThick, ax=ax)
        ax.set_title("Drift Thickness")
    try:
        noDataVal = driftThick.attrs['_FillValue'] #Extract from dataset itself
    except:
        noDataVal = 100001
    
    driftThick = driftThick.where(driftThick <100000, other=np.nan)  #Replace no data values with NaNs
    driftThick = driftThick.where(driftThick >-100000, other=np.nan)  #Replace no data values with NaNs

    layerThick = driftThick/layers
    
    xr.set_options(keep_attrs='default')

    return driftThick, layerThick



# Clip a grid to a study area

[docs]
def grid2study_area(study_area, grid, output_crs='EPSG:5070',verbose=False, log=False):
    """Clips grid to study area.

    Parameters
    ----------
    study_area : geopandas.GeoDataFrame
        inputs study area polygon
    grid : xarray.DataArray
        inputs grid array
    output_crs : str, default='EPSG:5070'
        inputs the coordinate reference system for the study area
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    grid : xarray.DataArray
        returns xarray containing grid clipped only to area within study area

    """
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)
    if verbose:
        verbose_print(grid2study_area, locals(), exclude_params=['study_area', 'grid'])
    
    if output_crs=='':
        output_crs=study_area.crs

    #Get input CRS's
    grid_crs = pyproj.CRS.from_wkt(grid.rio.crs.to_wkt())
    study_area_crs = study_area.crs
    
    # Reproject if needed
    if output_crs != grid_crs:
        grid = grid.rio.reproject(output_crs)
    grid_crs = pyproj.CRS.from_wkt(grid.rio.crs.to_wkt())

    if grid_crs != study_area_crs:
        study_area = study_area.to_crs(grid_crs)   

    # We'll just clip to outer bounds of study area
    saExtent = study_area.total_bounds

    if grid['y'][-1].values - grid['y'][0].values > 0:
        miny=saExtent[1]
        maxy=saExtent[3]
    else:
        miny=saExtent[3]
        maxy=saExtent[1]        
        
    if grid['x'][-1].values - grid['x'][0].values > 0:
        minx=saExtent[0]
        maxx=saExtent[2]
    else:
        minx=saExtent[2]
        maxx=saExtent[0]
    
    # "Clip" it
    grid = grid.sel(x=slice(minx, maxx), y=slice(miny, maxy))
    if 'band' in grid.dims:
        grid = grid.isel(band=0)

    return grid



# Read a grid from a file in using rioxarray

[docs]
def read_grid(grid_path=None, grid_type='model', no_data_val_grid=0, use_service=False, study_area=None,  grid_crs=None, output_crs='EPSG:5070', verbose=False, log=False, **kwargs):
    """Reads in grid

    Parameters
    ----------
    grid_path : str or pathlib.Path, default=None
        Path to a grid file
    grid_type : str, default='model'
        Sets what type of grid to load in
    no_data_val_grid : int, default=0
        Sets the no data value of the grid
    use_service : str, default=False
        Sets which service the function uses
    study_area : geopandas.GeoDataFrame, default=None
        Dataframe containing study area polygon
    grid_crs : str, default=None
        Sets crs to use if clipping to study area
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    gridIN : xarray.DataArray
        Returns grid
    
    """
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)
    if verbose:
        verbose_print(read_grid, locals(), exclude_params=['study_area'])

    # Get study area and study_areaa_crs situated first
    if study_area is not None:
        if not isinstance(study_area, gpd.GeoDataFrame):
            if isinstance(study_area, (str, pathlib.Path)) or pathlib.Path(study_area).exists():
                if verbose:
                    print(f"\tThe study_area parameter is not a geopandas.GeoDataframe object. Attempting to read now.")
                    
                study_area_kwargs = {k: v for k, v in kwargs.items() if k in inspect.signature(read_study_area).parameters.keys()}
                study_area = read_study_area(study_area=study_area, output_crs=output_crs, verbose=verbose, log=log, **study_area_kwargs)
            else:
                raise RuntimeError(f'\tstudy_area={study_area} was specified, but this is not a geopandas.GeoDataFrame and cannot be read by w4h.read_study_area()')
        
        study_area_crs = study_area.crs
    
        if study_area_crs is None:
            study_area_crs = study_area.crs
        study_area = study_area.to_crs(output_crs)
        study_area_crs = study_area.crs

    if isinstance(grid_path, (xr.DataArray, xr.Dataset)):
        gridIN = grid_path

        if study_area is not None:
            if grid_crs is None:
                try:
                    grid_crs = pyproj.CRS.from_wkt(gridIN.rio.crs.to_wkt())
                except Exception:
                    if verbose:
                        print(f"\t CRS could not be extracted from grid itself. Assuming sample CRS:\n\t {w4h.get_resources()['ISWS_CRS']}")
                    iswsCRS = pyproj.CRS.from_json_dict(w4h.read_dict(w4h.get_resources()['ISWS_CRS']))
                    gridIN.rio.write_crs(iswsCRS)
            elif grid_crs.lower() == 'isws':
                iswsCRS = pyproj.CRS.from_json_dict(w4h.read_dict(w4h.get_resources()['ISWS_CRS']))
                gridIN.rio.write_crs(iswsCRS)
            else:
                try:
                    gridIN.rio.write_crs(grid_crs)
                except Exception as e:
                    raise RuntimeError(e)

            gridIN = gridIN.rio.reproject(output_crs)
            gridIN = grid2study_area(study_area=study_area, grid=gridIN, output_crs=output_crs,)
        else:
            gridIN = gridIN.rio.reproject(output_crs)
            if 'band' in gridIN.dims:
                gridIN = gridIN.sel(band=1)
    elif grid_type == 'model':
        if 'read_grid' in list(kwargs.keys()):
            rgrid = kwargs['read_grid']
        else:
            rgrid = True
        try:
            gridIN = read_model_grid(model_grid_path=grid_path, study_area=study_area,  no_data_val_grid=0, read_grid=rgrid, grid_crs=grid_crs, output_crs=output_crs, verbose=verbose)
        except Exception as e:
            print("ERROR reading in model grid")
            print(e)
    else:
        if str(use_service).lower() == 'wcs':
            gridIN = read_wcs(study_area, wcs_url=lidarURL, **kwargs)
        elif str(use_service).lower() == 'wms':
            gridIN = read_wms(study_area, wcs_url=lidarURL, **kwargs)
        elif use_service is True:
            #Deafults to wms
            gridIN = read_wms(study_area, wcs_url=lidarURL, **kwargs)
        else:
            gridIN = rxr.open_rasterio(grid_path)

        if study_area is not None:
            if grid_crs is None:
                try:
                    grid_crs = pyproj.CRS.from_wkt(gridIN.rio.crs.to_wkt())
                except Exception:
                    if verbose:
                        print(f"\t CRS could not be extracted from grid itself. Assuming sample CRS:\n\t {w4h.get_resources()['ISWS_CRS']}")
                    iswsCRS = pyproj.CRS.from_json_dict(w4h.read_dict(w4h.get_resources()['ISWS_CRS']))
                    gridIN.rio.write_crs(iswsCRS)
            elif grid_crs.lower() == 'isws':
                iswsCRS = pyproj.CRS.from_json_dict(w4h.read_dict(w4h.get_resources()['ISWS_CRS']))
                gridIN.rio.write_crs(iswsCRS)
                
            gridIN = gridIN.rio.reproject(output_crs)
            gridIN = grid2study_area(study_area=study_area, grid=gridIN, output_crs=output_crs,)
        else:
            gridIN = gridIN.rio.reproject(output_crs)
            if 'band' in gridIN.dims:
                gridIN = gridIN.sel(band=1)

        try:
            no_data_val_grid = gridIN.attrs['_FillValue'] #Extract from dataset itself
        except:
            pass
                
        gridIN = gridIN.where(gridIN != no_data_val_grid, other=np.nan)  #Replace no data values with NaNs
    
    # Remove extra "band" dimension if only a single band
    if 'band' in gridIN.sizes.keys() and gridIN.sizes['band'] == 1:
        gridIN = gridIN.isel(band=0)

    if 'band' in gridIN.coords and gridIN.coords['band'].shape == ():
        gridIN = gridIN.drop_vars('band')
    
    return gridIN



# Read the model grid into (rio)xarray

[docs]
def read_model_grid(model_grid_path, study_area=None, no_data_val_grid=0, read_grid=True, node_byspace=True, grid_crs=None, output_crs='EPSG:5070', verbose=False, log=False):
    """Reads in model grid to xarray data array

    Parameters
    ----------
    grid_path : str
        Path to model grid file
    study_area : geopandas.GeoDataFrame, default=None
        Dataframe containing study area polygon
    no_data_val_grid : int, default=0
        value assigned to areas with no data
    readGrid : bool, default=True
        Whether function to either read grid or create grid
    node_byspace : bool, default=False
        Denotes how to create grid
    output_crs : str, default='EPSG:5070'
        Inputs study area crs
    grid_crs : str, default=None
        Inputs grid crs
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    modelGrid : xarray.DataArray
        Data array containing model grid
    """
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)
    if verbose:
        verbose_print(read_model_grid, locals(), exclude_params='study_area')
    
    if read_grid and model_grid_path is not None:
        modelGridIN = rxr.open_rasterio(model_grid_path)

        if grid_crs is None:
            try:
                grid_crs=modelGridIN.spatial_ref.crs_wkt
            except Exception:
                iswsCRSPath = w4h.get_resources()['ISWS_CRS']
                iswsCRS = w4h.read_dict(iswsCRSPath, keytype=None)
                grid_crs = iswsCRS              
                modelGridIN.rio.write_crs(grid_crs)
        elif isinstance(grid_crs, str) and grid_crs.lower()=='isws':
            iswsCRSPath = w4h.get_resources()['ISWS_CRS']
            iswsCRS = w4h.read_dict(iswsCRSPath, keytype=None)            
            grid_crs = iswsCRS              
            modelGridIN.rio.write_crs(grid_crs)
        elif isinstance(grid_crs, pathlib.PurePath) or (isinstance(grid_crs, str) and pathlib.Path(grid_crs).exists()):
            iswsCRSPath = w4h.get_resources()['ISWS_CRS']
            grid_crs = w4h.read_dict(iswsCRSPath, keytype=None)            
            modelGridIN.rio.write_crs(grid_crs)
        else:
            warnings.warn(f'CRS Specification for grid is {grid_crs}, but this cannot be written to the grid')
        
        modelGridIN = modelGridIN.rio.reproject(output_crs) 
           
        if study_area is not None:                
            study_area = study_area.to_crs(output_crs)
            modelGrid = grid2study_area(study_area=study_area, grid=modelGridIN, output_crs=output_crs)
        else:
            modelGrid = modelGridIN

        try:
            noDataVal = float(modelGrid.attrs['_FillValue']) #Extract from dataset itsel
        except:
            noDataVal = no_data_val_grid

        modelGrid = modelGrid.where(modelGrid != noDataVal, other=np.nan)   #Replace no data values with NaNs
        modelGrid = modelGrid.where(modelGrid == np.nan, other=1) #Replace all other values with 1
        modelGrid.rio.reproject(grid_crs, inplace=True)     
    elif model_grid_path is None and study_area is None:
        if verbose:
            print("ERROR: Either model_grid_path or study_area must be defined.")
    else:
        spatRefDict = w4h.read_dict(iswsCRSPath, keytype=None)            

        saExtent = study_area.total_bounds

        startX = saExtent[0] #Starting X Coordinate
        startY = saExtent[1] #starting Y Coordinate
        
        endX = saExtent[2]
        endY = saExtent[3]
        
        if node_byspace:
            xSpacing = 625 #X Node spacing 
            ySpacing = xSpacing #Y Node spacing  
            
            x = np.arange(startX, endX, xSpacing)
            y = np.arange(startY, endY, ySpacing)
        else:
            xNodes = 100 #Number of X Nodes
            yNodes = 100 #Number of Y Nodes

            x = np.linspace(startX, endX, num=xNodes)
            y = np.linspace(startY, endY, num=yNodes)        
        
        xx, yy = np.meshgrid(x, y)
        zz = np.ones_like(xx).transpose()

        yIn = np.flipud(y)

        coords = {'x':x,'y':yIn, 'spatial_ref':0}
        dims = {'x':x,'y':yIn}
        
        modelGrid = xr.DataArray(data=zz,coords=coords,attrs={'_FillValue':no_data_val_grid}, dims=dims)
        modelGrid.spatial_ref.attrs['spatial_ref'] = {}
        if grid_crs is None or grid_crs=='isws' or grid_crs=='ISWS':
            for k in spatRefDict:
                modelGrid.spatial_ref.attrs[k] = spatRefDict[k]
    
    # Remove extra "band" dimension if only a single band
    if 'band' in modelGrid.sizes.keys() and modelGrid.sizes['band'] == 1:
        modelGrid = modelGrid.isel(band=0)
    
    return modelGrid



# Read study area shapefile (or other file) into geopandas

[docs]
def read_study_area(study_area=None, study_area_crs=None, output_crs='EPSG:5070', buffer=None, return_original=False, log=False, verbose=False, **read_file_kwargs):
    """Read study area geospatial file into geopandas

    Parameters
    ----------
    study_area : str, pathlib.Path, geopandas.GeoDataFrame, or shapely.Geometry
        Filepath to any geospatial file readable by geopandas. 
        Polygon is best, but may work with other types if extent is correct.
        If shapely.Geometry, the crs should also be specified using a valid input to gpd.GeoDataFrame(crs=<crs>).
    study_area_crs : str, tuple, dict, optional
        Not needed unless CRS must be read in manually (e.g, with a shapely.Geometry). CRS designation readable by geopandas/pyproj.
    output_crs : str, tuple, dict, optional
        CRS to transform study_area to before returning. CRS designation should be readable by geopandas/pyproj. By default, 'EPSG:5070'.
    buffer : None or numeric, default=None
        If None, no buffer created. If a numeric value is given (float or int, for example), a buffer will be created at that distance in the unit of the study_area_crs.
    return_original : bool, default=False
        Whether to return the (reprojected) study area as well as the (reprojected) buffered study area. Study area is only used for clipping data, so usually return_original=False is sufficient.
    log : bool, default = False
        Whether to log results to log file, by default False
    verbose : bool, default=False
        Whether to print status and results to terminal

    Returns
    -------
    studyAreaIN : geopandas dataframe
        Geopandas dataframe with polygon geometry.
    """
    
    if study_area is None:
        if verbose:
            print("\tNo study_area specified, using the study area in the included sample data.")
        study_area = w4h.get_resources()['study_area']

    logger_function(log, locals(), inspect.currentframe().f_code.co_name)
    if verbose:
        verbose_print(read_study_area, locals())
    
    if isinstance(study_area, (gpd.GeoDataFrame, gpd.GeoSeries)):
        studyAreaIN = study_area
    elif isinstance(study_area, shapely.Geometry):
        studyAreaIN = gpd.GeoDataFrame(index=[0], crs=study_area_crs, geometry=[study_area])
    else:
        studyAreaIN = gpd.read_file(study_area, **read_file_kwargs)
    studyAreaIN.to_crs(output_crs, inplace=True)

    # Create a buffered study area for clipping
    if buffer is not None:
        if return_original:
            studyAreaNoBuffer = studyAreaIN
        studyAreaIN = studyAreaIN.buffer(distance=buffer)
        
        if verbose:
            print('\tBuffer applied.')
    
    if verbose:
        print("\tStudy area read.")

    if return_original:
        return studyAreaIN, studyAreaNoBuffer
    return studyAreaIN



# Read wcsservice into rioxarray

[docs]
def read_wcs(study_area, wcs_url=lidarURL, res_x=30, res_y=30, verbose=False, log=False, **kwargs):
    """Reads a WebCoverageService from a url and returns a rioxarray dataset containing it.

    Parameters
    ----------
    study_area : geopandas.GeoDataFrame
        Dataframe containing study area polygon
    wcs_url : str, default=lidarURL
    Represents the url for the WCS
    res_x : int, default=30
        Sets resolution for x axis
    res_y : int, default=30
        Sets resolution for y axis
    log : bool, default = False
        Whether to log results to log file, by default False
    **kwargs

    Returns
    -------
    wcsData_rxr : xarray.DataArray
        A xarray dataarray holding the image from the WebCoverageService
    """
    #Drawn largely from: https://git.wur.nl/isric/soilgrids/soilgrids.notebooks/-/blob/master/01-WCS-basics.ipynb
    
    #30m DEM
    #wcs_url = r'https://data.isgs.illinois.edu/arcgis/services/Elevation/IL_DEM_30M/ImageServer/WCSServer?request=GetCapabilities&service=WCS'
    #lidar url:
    #lidarURL = r'https://data.isgs.illinois.edu/arcgis/services/Elevation/IL_Statewide_Lidar_DEM_WGS/ImageServer/WCSServer?request=GetCapabilities&service=WCS'

    #studyAreaPath = r"\\isgs-sinkhole.ad.uillinois.edu\geophysics\Balikian\ISWS_HydroGeo\WellDataAutoClassification\SampleData\ESL_StudyArea_5mi.shp"
    #study_area = gpd.read_file(studyAreaPath)
    if study_area is None:
        print('ERROR: study_area must be specified to use read_wcs (currently set to {})'.format(study_area))
        return

    logger_function(log, locals(), inspect.currentframe().f_code.co_name)

    if verbose:
        verbose_print(read_wcs, locals(), exclude_params=['study_area'])      

    if 'wcs_url' in kwargs:
        wcs_url = kwargs['wcs_url']
    if 'res_x' in kwargs:
        res_x = kwargs['res_x']
    if 'res_y' in kwargs:
        res_y = kwargs['res_y']
    
    width_in = ''
    height_in= ''

    #Create coverage object
    my_wcs = WebCoverageService(wcs_url, version='1.0.0') 
    #names = [k for k in my_wcs.contents.keys()]
    #print(names)
    dataID = 'IL_Statewide_Lidar_DEM'
    data = my_wcs.contents[dataID]
    dBBox = data.boundingboxes #Is this an error?
    
    study_area = study_area.to_crs(data.boundingboxes[0]['nativeSrs'])
    saBBox = study_area.total_bounds
    
    #In case study area bounding box goes outside data bounding box, use data bounding box values
    newBBox = []
    for i,c in enumerate(dBBox[0]['bbox']):
        if i == 0 or i==2:
            if saBBox[i] < c:
                newBBox.append(saBBox[i])
            else:
                newBBox.append(c)
        else:
            if saBBox[i] > c:
                newBBox.append(saBBox[i])
            else:
                newBBox.append(c)

    #Recalculate resolution if it is too fine to read in
    #Start by getting the area of the study area bounding box
    saWidth = saBBox[2]-saBBox[0]
    saHeight = saBBox[3]-saBBox[1]
    saBBoxAreaM = saWidth*saHeight
    saBBoxAreaKM = saBBoxAreaM/(1000*1000) #Area in km^2

    if saBBoxAreaM/(res_x*res_y) > (4100*15000)*0.457194: #What I think might be the max download size?
        print("Resolution inputs overriden, file request too large.")
        res_x=str(round(saWidth/2500, 2))

        width_in  = str(int(saWidth/float(res_x )))
        height_in = str(int(saHeight/float(res_x)))
        
        res_y=str(round(saHeight/height_in, 2))

        print('New resolution is: '+res_x+'m_x X '+res_y+'m_y' )
        print('Dataset size: '+width_in+' pixels_x X '+height_in+' pixels_y')

    bBox = tuple(newBBox)
    bBox_str = str(tuple(newBBox)[1:-1]).replace(' ','')
    dataCRS = 'EPSG:3857'

    #Format WCS request using owslib
    response = my_wcs.getCoverage(
        identifier=my_wcs[dataID].id, 
        crs=dataCRS,#'urn:ogc:def:crs:EPSG::26716',
        bbox=bBox,
        resx=res_x, 
        resy=res_y,
        timeout=60,
        #width = width_in, height=height_in,
        format='GeoTIFF')
    response

    #If I can figure out url, this might be better?
    #baseURL = r'https://data.isgs.illinois.edu/arcgis/services/Elevation/'+dataID+'/ImageServer/WCSServer'
    #addonRequestURL = '?request=GetCoverage&service=WCS&bbox='+bBox_str+'&srs='+dataCRS+'&format=GeoTIFF'+'&WIDTH='+width_in+'&HEIGHT='+height_in+')'
    #reqURL = baseURL+addonRequestURL
    #wcsData_rxr =  rxr.open_rasterio(reqURL)

    with MemoryFile(response) as memfile:
        with memfile.open() as dataset:
            wcsData_rxr =  rxr.open_rasterio(dataset)

    return wcsData_rxr



# Read wms service into rioxarray

[docs]
def read_wms(study_area, layer_name='IL_Statewide_Lidar_DEM_WGS:None', wms_url=lidarURL, srs='EPSG:3857', 
             clip_to_studyarea=True, bbox=[-9889002.615500,5134541.069716,-9737541.607038,5239029.627400],
             res_x=30, res_y=30, size_x=512, size_y=512, 
             format='image/tiff', verbose=False, log=False, **kwargs):
    """
    Reads a WebMapService from a url and returns a rioxarray dataset containing it.

    Parameters
    ----------
    study_area : geopandas.GeoDataFrame
        Dataframe containg study area polygon
    layer_name : str, default='IL_Statewide_Lidar_DEM_WGS:None'
        Represents the layer name in the WMS
    wms_url : str, default=lidarURL
        Represents the url for the WMS
    srs : str, default='EPSG:3857'
        Sets the srs
    clip_to_studyarea : bool, default=True
        Whether to clip to study area or not
    res_x : int, default=30
        Sets resolution for x axis
    res_y : int, default=512
        Sets resolution for y axis
    size_x : int, default=512
        Sets width of result
    size_y : int, default=512
        Sets height of result
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    wmsData_rxr : xarray.DataArray
        Holds the image from the WebMapService
    """
    if study_area is None:
        print('ERROR: study_area must be specified to use read_wms (currently set to {})'.format(study_area))
        return
    
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)
    if verbose:
        verbose_print(read_wms, locals(), exclude_params=['study_area'])
    from owslib.wms import WebMapService
    # Define WMS endpoint URL
    if 'wms_url' in kwargs:
        wms_url = kwargs['wms_url']
    else:
        wms_url = wms_url

    # Create WMS connection object
    wms = WebMapService(wms_url)
    # Print available layers
    #print(wms.contents)

    # Select desired layer
    if 'layer_name' in kwargs:
        layer = kwargs['layer_name']
    else:
        layer = layer_name
    
    data = wms.contents#[layer]
    if 'srs' in kwargs:
        studyArea_proj = study_area.to_crs(kwargs['srs'])
        saBBox = studyArea_proj.total_bounds
    else:
        studyArea_proj = study_area.to_crs(srs)
    
    saBBox = studyArea_proj.total_bounds

    if layer == 'IL_Statewide_Lidar_DEM_WGS:None':
        dBBox = data['0'].boundingBox #Is this an error?

        gpdDict = {'Label': ['Surf Data Box'], 'geometry': [shapely.geometry.Polygon(((dBBox[0], dBBox[1]), (dBBox[0], dBBox[3]), (dBBox[2], dBBox[3]), (dBBox[2], dBBox[1]), (dBBox[0], dBBox[1])))]}
        dBBoxGDF = gpd.GeoDataFrame(gpdDict, crs=dBBox[4])
        dBBoxGDF.to_crs(srs)

        #In case study area bounding box goes outside data bounding box, use data bounding box values
        newBBox = []
        for i,c in enumerate(dBBox):
            if type(c) is str:
                pass
            elif i == 0 or i==2:
                if saBBox[i] < c:
                    newBBox.append(saBBox[i])
                else:
                    newBBox.append(c)
            else:
                if saBBox[i] > c:
                    newBBox.append(saBBox[i])
                else:
                    newBBox.append(c)

    saWidth = saBBox[2]-saBBox[0]
    saHeight = saBBox[3]-saBBox[1]    
    # Check kwargs for rest of parameters
    if 'size_x' in kwargs:
        size_x = kwargs['size_x']
    if 'size_y' in kwargs:
        size_y = kwargs['size_y']
    if 'format' in kwargs:
        format = kwargs['format']
    if 'clip_to_studyarea' in kwargs:
        clip_to_studyarea = kwargs['clip_to_studyarea']
   
    #get the wms
    if clip_to_studyarea:
        img = wms.getmap(layers=[layer], srs=srs, bbox=saBBox, size=(size_x, size_y), format=format, transparent=True, timeout=60)        
    else:
        img = wms.getmap(layers=[layer], srs=srs, bbox=bbox, size=(size_x, size_y), format=format, transparent=True, timeout=60)

    #Save wms in memory to a raster dataset
    with MemoryFile(img) as memfile:
        with memfile.open() as dataset:
            wmsData_rxr = rxr.open_rasterio(dataset)

    #if clip_to_studyarea:
    #    wmsData_rxr = wmsData_rxr.sel(x=slice(saBBox[0], saBBox[2]), y=slice(saBBox[3], saBBox[1]))#.sel(band=1)

    return wmsData_rxr



# Function to sample raster points to points specified in geodataframe

[docs]
def sample_raster_points(raster=None, points_df=None, 
            well_id_col='API_NUMBER', xcol='LONGITUDE', ycol='LATITUDE', new_col='SAMPLED', 
            verbose=False, log=False):  
    """Sample raster values to points from geopandas geodataframe.

    Parameters
    ----------
    raster : rioxarray data array
        Raster containing values to be sampled.
    points_df : geopandas.geodataframe
        Geopandas dataframe with geometry column containing point values to sample.
    well_id_col : str, default="API_NUMBER"
        Column that uniquely identifies each well so multiple sampling points are not taken per well
    xcol : str, default='LONGITUDE'
        Column containing name for x-column, by default 'LONGITUDE.'
        This is used to output (potentially) reprojected point coordinates so as not to overwrite the original.
    ycol : str, default='LATITUDE'
        Column containing name for y-column, by default 'LATITUDE.'
        This is used to output (potentially) reprojected point coordinates so as not to overwrite the original.    new_col : str, optional
    new_col : str, default='SAMPLED'
        Name for name of new column containing points sampled from the raster, by default 'SAMPLED'.
    verbose : bool, default=True
        Whether to send to print() information about progress of function, by default True.
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    points_df : geopandas.geodataframe
        Same as points_df, but with sampled values and potentially with reprojected coordinates.
    """
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)

    if raster is None:
        raster = w4h.get_resources()['surf_elev']
    if points_df is None:
        points_df = w4h.get_resources()['well_data']

    if verbose:
        verbose_print(sample_raster_points, locals(), exclude_params=['raster', 'points_df'])
        print(f"\tSampling {new_col} grid for at all well locations.")

    #Project points to raster CRS
    rastercrsWKT=raster.spatial_ref.crs_wkt
    if rastercrsWKT != points_df.crs:
        if verbose:
            print("\tTemporarily reprojecting raster data to point data's CRS.")
        pointCRS = points_df.crs.to_epsg()
        if pointCRS is None:
            pointCRS = points_df.crs.to_wkt()

        raster = raster.rio.reproject(pointCRS)
        #points_df = points_df.to_crs(rastercrsWKT)

    xCOLOUT = xcol+'_PROJ'
    yCOLOUT = ycol+'_PROJ'
    points_df[xCOLOUT] = points_df['geometry'].x
    points_df[yCOLOUT] = points_df['geometry'].y
    #xData = np.array(points_df[xCOLOUT].values)
    #yData = np.array(points_df[yCOLOUT].values)
    zData = []
    zID = []
    zInd = []

    # Get unique well values to reduce sampling time
    uniqueWells = points_df.drop_duplicates(subset=[well_id_col])
    if verbose:
        print(f"\t{uniqueWells.shape[0]} unique wells idenfied using {well_id_col} column")
    
    # Potential update: use xarray indexing to sample data values
    xs = xr.DataArray(uniqueWells[xCOLOUT].values, dims="points")
    ys = xr.DataArray(uniqueWells[yCOLOUT].values, dims="points")

    zData = raster.sel(x=xs, y=ys, method="nearest").values
    zInd = np.arange(uniqueWells.shape[0])
    zData = zip(uniqueWells[well_id_col], zData)
    
    # Loop over DataFrame rows
    # print("LOOP ITERATION")
    #for i, row in uniqueWells.iterrows():
        # Select data from DataArray at current coordinates and append to list
    #    zInd.append(i)
    #    zData.append([row[well_id_col], raster.sel(x=row[xCOLOUT], y=row[yCOLOUT], method='nearest').item()])

    inputtype = points_df.dtypes[well_id_col]
    wellZDF = pd.DataFrame(zData, columns=[well_id_col, new_col], index=pd.Index(zInd))

    # Merge each unique well's data with all well intervals
    wellZDF[well_id_col].astype(inputtype, copy=False)
    points_df = points_df.merge(wellZDF, how='left', on=well_id_col)
    #points_df[new_col] = zData#sampleDF[new_col]
    return points_df



# Merge xyz dataframe into a metadata dataframe

[docs]
def xyz_metadata_merge(xyz, metadata, verbose=False, log=False):
    """Add elevation to header data file.

    Parameters
    ----------
    xyz : pandas.Dataframe
        Contains elevation for the points
    metadata : pandas dataframe
        Header data file
    log : bool, default = False
        Whether to log results to log file, by default False

    Returns
    -------
    headerXYZData : pandas.Dataframe
        Header dataset merged to get elevation values

    """
    logger_function(log, locals(), inspect.currentframe().f_code.co_name)
    if verbose:
        verbose_print(xyz_metadata_merge, locals(), exclude_params=['xyz'])
    headerXYZData = metadata.merge(xyz, how='left', on='API_NUMBER')
    headerXYZData.drop(['LATITUDE_x', 'LONGITUDE_x'], axis=1, inplace=True)
    headerXYZData.rename({'LATITUDE_y':'LATITUDE', 'LONGITUDE_y':'LONGITUDE'}, axis=1, inplace=True)
    return headerXYZData
Source code for w4h.mapping

w4h

Navigation

Related Topics