skmap.misc.sample_groups#
- sample_groups(points, *group_element_columns, spatial_resolution=None, temporal_resolution=None, date_column='date')[source]#
Construct group IDs for spatial and temporal cross-validation.
Groups point samples into tiles of spatial_resolution width and height and/or intervals of temporal_resolution size. group_element_columns are also concatenated into the final group ID of each sample.
- Parameters:
points (
GeoDataFrame) – GeoDataFrame containing point samples.*group_element_columns –
Names of additional columns to be concatenated into the final group IDs.
spatial_resolution (
Union[int,float,None]) – Tile size (both x and y) for grouping, in sample CRS units.temporal_resolution (
Optional[timedelta]) – Interval size for grouping.date_column (
str) – Name of the column containing sample timestamps (as datetime objects).
- Return type:
ndarray- Returns:
1D string array containing the group id of each sample.
Examples
>>> import geopandas as gp >>> import shapely >>> import numpy as np >>> from datetime import datetime, timedelta >>> from sklearn.linear_model import LogisticRegression >>> from sklearn.model_selection import cross_val_score, GroupKFold >>> >>> from skmap.misc import sample_groups >>> >>> np.random.seed(42) >>> >>> # construct some synthetic point data >>> coords = np.random.random((1000, 2)) * 4000 >>> dates = datetime.now() + np.array([*map( ... timedelta, ... range(1000), ... )]) >>> >>> points = gp.GeoDataFrame({ ... 'geometry': shapely.points(coords), ... 'date': dates, ... 'group': np.random.choice(['a', 'b'], size=1000), ... 'predictor': np.random.random(1000), ... 'target': np.random.randint(2, size=1000), ... }) >>> >>> # get the point groups >>> groups = sample_groups( ... points, ... 'group', ... spatial_resolution=1000, ... temporal_resolution=timedelta(days=365), ... ) >>> >>> print(np.unique(groups)) ['ax0y0t0' 'ax0y0t1' 'ax0y0t2' 'ax0y1t0' 'ax0y1t1' 'ax0y1t2' 'ax0y2t0' 'ax0y2t1' 'ax0y2t2' 'ax0y3t0' 'ax0y3t1' 'ax0y3t2' 'ax1y0t0' 'ax1y0t1' 'ax1y0t2' 'ax1y1t0' 'ax1y1t1' 'ax1y1t2' 'ax1y2t0' 'ax1y2t1' 'ax1y2t2' 'ax1y3t0' 'ax1y3t1' 'ax1y3t2' 'ax2y0t0' 'ax2y0t1' 'ax2y0t2' 'ax2y1t0' 'ax2y1t1' 'ax2y1t2' 'ax2y2t0' 'ax2y2t1' 'ax2y2t2' 'ax2y3t0' 'ax2y3t1' 'ax2y3t2' 'ax3y0t0' 'ax3y0t1' 'ax3y0t2' 'ax3y1t0' 'ax3y1t1' 'ax3y1t2' 'ax3y2t0' 'ax3y2t1' 'ax3y2t2' 'ax3y3t0' 'ax3y3t1' 'ax3y3t2' 'bx0y0t0' 'bx0y0t1' 'bx0y0t2' 'bx0y1t0' 'bx0y1t1' 'bx0y1t2' 'bx0y2t0' 'bx0y2t1' 'bx0y2t2' 'bx0y3t0' 'bx0y3t1' 'bx0y3t2' 'bx1y0t0' 'bx1y0t1' 'bx1y0t2' 'bx1y1t0' 'bx1y1t1' 'bx1y1t2' 'bx1y2t0' 'bx1y2t1' 'bx1y2t2' 'bx1y3t0' 'bx1y3t1' 'bx1y3t2' 'bx2y0t0' 'bx2y0t1' 'bx2y0t2' 'bx2y1t0' 'bx2y1t1' 'bx2y1t2' 'bx2y2t0' 'bx2y2t1' 'bx2y2t2' 'bx2y3t0' 'bx2y3t1' 'bx2y3t2' 'bx3y0t0' 'bx3y0t1' 'bx3y0t2' 'bx3y1t0' 'bx3y1t1' 'bx3y1t2' 'bx3y2t0' 'bx3y2t1' 'bx3y2t2' 'bx3y3t0' 'bx3y3t1' 'bx3y3t2'] >>> >>> kfold = GroupKFold(n_splits=5) >>> >>> # cross validate a classifier >>> print(cross_val_score( ... estimator=LogisticRegression(), ... X=points.predictor.values.reshape(-1, 1), ... y=points.target, ... scoring='f1', ... groups=groups, # our groups go here ... )) [0.67549669 0.63309353 0.55084746 0.6 0.67109635]