|
2 | 2 | Query operations. |
3 | 3 |
|
4 | 4 | range_query, |
5 | | -knn_query |
| 5 | +knn_query, |
| 6 | +query_all_points_by_range, |
6 | 7 |
|
7 | 8 | """ |
8 | | -from __future__ import annotations |
| 9 | +from datetime import timedelta |
9 | 10 |
|
10 | 11 | import numpy as np |
11 | 12 | import pandas as pd |
@@ -62,6 +63,25 @@ def range_query( |
62 | 63 | ------ |
63 | 64 | ValueError: if distance measure is invalid |
64 | 65 |
|
| 66 | + Examples |
| 67 | + -------- |
| 68 | + >>> from pymove.query.query import range_query |
| 69 | + >>> traj_df |
| 70 | + lat lon datetime id |
| 71 | + 0 16.4 -54.9 2014-10-11 18:00:00 1 |
| 72 | + 1 16.4 -55.9 2014-10-12 00:00:00 1 |
| 73 | + 2 16.4 -56.9 2014-10-12 06:00:00 1 |
| 74 | + >>> move_df |
| 75 | + lat lon datetime id |
| 76 | + 0 33.1 -77.0 2012-05-19 00:00:00 2 |
| 77 | + 1 32.8 -77.1 2012-05-19 06:00:00 3 |
| 78 | + 2 32.5 -77.3 2012-05-19 12:00:00 4 |
| 79 | + >>> range_query( |
| 80 | + >>> traj_df, move_df, min_dist=80.5 |
| 81 | + >>> ) |
| 82 | + lat lon datetime id |
| 83 | + 1 32.8 -77.1 2012-05-19 06:00:00 3 |
| 84 | + 2 32.5 -77.3 2012-05-19 12:00:00 4 |
65 | 85 | """ |
66 | 86 | result = traj.copy() |
67 | 87 | result.drop(result.index, inplace=True) |
@@ -136,6 +156,27 @@ def knn_query( |
136 | 156 | ------ |
137 | 157 | ValueError: if distance measure is invalid |
138 | 158 |
|
| 159 | + Examples |
| 160 | + -------- |
| 161 | + >>> from pymove.query.query import knn_query |
| 162 | + >>> traj_df |
| 163 | + lat lon datetime id |
| 164 | + 0 16.4 -54.9 2014-10-11 18:00:00 1 |
| 165 | + 1 16.4 -55.9 2014-10-12 00:00:00 1 |
| 166 | + 2 16.4 -56.9 2014-10-12 06:00:00 1 |
| 167 | + >>> move_df |
| 168 | + lat lon datetime id |
| 169 | + 0 33.1 -77.0 2012-05-19 00:00:00 2 |
| 170 | + 1 32.8 -77.1 2012-05-19 06:00:00 3 |
| 171 | + 2 32.5 -77.3 2012-05-19 12:00:00 4 |
| 172 | + >>> knn_query( |
| 173 | + >>> traj_df, move_df, k=1 |
| 174 | + >>> ) |
| 175 | + lat lon datetime id |
| 176 | + 0 16.4 -54.9 2014-10-11 18:00:00 1 |
| 177 | + 1 16.4 -55.9 2014-10-12 00:00:00 1 |
| 178 | + 2 16.4 -56.9 2014-10-12 06:00:00 1 |
| 179 | + 2 32.5 -77.3 2012-05-19 12:00:00 4 |
139 | 180 | """ |
140 | 181 | k_list = pd.DataFrame([[np.Inf, 'empty']] * k, columns=['distance', TRAJ_ID]) |
141 | 182 |
|
@@ -176,3 +217,200 @@ def dist_measure(traj, this, latitude, longitude, datetime): |
176 | 217 | ) |
177 | 218 |
|
178 | 219 | return result |
| 220 | + |
| 221 | + |
| 222 | +def _datetime_filter( |
| 223 | + row: DataFrame, |
| 224 | + move_df: DataFrame, |
| 225 | + minimum_distance: timedelta |
| 226 | +) -> DataFrame: |
| 227 | + """ |
| 228 | + Returns all the points of the DataFrame which are in a temporal distance. |
| 229 | +
|
| 230 | + Given a row referencing to a point, a DataFrame with |
| 231 | + multiple points and a minimum distance, it returns |
| 232 | + all the points of the DataFrame which are in a temporal |
| 233 | + distance equal or smaller than the minimum distance |
| 234 | + parameter. |
| 235 | +
|
| 236 | + Parameters |
| 237 | + ---------- |
| 238 | + row: dataframe |
| 239 | + The input of one point of a trajectory. |
| 240 | + move_df: dataframe |
| 241 | + The input trajectory data. |
| 242 | + minimum_distance: datetime.timedelta |
| 243 | + the minimum temporal distance between the points. |
| 244 | +
|
| 245 | + Returns |
| 246 | + ------- |
| 247 | + DataFrame |
| 248 | + dataframe with all the points of move_df which are in |
| 249 | + a temporal distance equal or smaller than the minimum |
| 250 | + distance parameter. |
| 251 | +
|
| 252 | + Examples |
| 253 | + -------- |
| 254 | + >>> from pymove.query.query import _datetime_filter |
| 255 | + >>>> point |
| 256 | + lat lon datetime id |
| 257 | + 0 16.4 -54.9 2014-10-11 18:00:00 1 |
| 258 | + >>> move_df |
| 259 | + lat lon datetime id |
| 260 | + 0 33.1 -77.0 2012-05-19 00:00:00 2 |
| 261 | + 1 32.8 -77.1 2012-05-19 06:00:00 3 |
| 262 | + 2 32.5 -77.3 2012-05-19 12:00:00 4 |
| 263 | + >>> _datetime_filter(point, move_df, timedelta(hours=21010)) |
| 264 | + lat lon datetime id temporal_distance\ |
| 265 | + target_id target_lat target_lon target_datetime |
| 266 | + 0 32.5 -77.3 2012-05-19 12:00:00 4 875 days 06:00:00\ |
| 267 | + 1 16.4 -54.9 2014-10-11 18:00:00 |
| 268 | + """ |
| 269 | + datetime = row['datetime'] |
| 270 | + move_df['temporal_distance'] = (move_df['datetime'] - datetime).abs() |
| 271 | + filtered = move_df[ |
| 272 | + (move_df['temporal_distance'] < minimum_distance) |
| 273 | + & (move_df['temporal_distance'] > -minimum_distance) |
| 274 | + ] |
| 275 | + |
| 276 | + if (filtered.shape[0] > 0): |
| 277 | + filtered['target_id'] = row['id'] |
| 278 | + filtered['target_lat'] = row['lat'] |
| 279 | + filtered['target_lon'] = row['lon'] |
| 280 | + filtered['target_datetime'] = row['datetime'] |
| 281 | + |
| 282 | + return filtered |
| 283 | + |
| 284 | + |
| 285 | +def _meters_filter( |
| 286 | + row: DataFrame, |
| 287 | + move_df: DataFrame, |
| 288 | + minimum_distance: float |
| 289 | +) -> DataFrame: |
| 290 | + """ |
| 291 | + Returns all the points of the DataFrame which are in a spatial distance. |
| 292 | +
|
| 293 | + Given a row referencing to a point, a DataFrame with |
| 294 | + multiple points and a minimum distance, it returns |
| 295 | + all the points of the DataFrame which are in a spatial |
| 296 | + distance (in meters) equal or smaller than the minimum distance |
| 297 | + parameter. |
| 298 | +
|
| 299 | + Parameters |
| 300 | + ---------- |
| 301 | + row: dataframe |
| 302 | + The input of one point of a trajectory. |
| 303 | + move_df: dataframe |
| 304 | + The input trajectory data. |
| 305 | + minimum_distance: float |
| 306 | + the minimum spatial distance between the points in meters. |
| 307 | +
|
| 308 | + Returns |
| 309 | + ------- |
| 310 | + DataFrame |
| 311 | + dataframe with all the points of move_df which are in |
| 312 | + a spatial distance equal or smaller than the minimum |
| 313 | + distance parameter. |
| 314 | +
|
| 315 | + Examples |
| 316 | + -------- |
| 317 | + >>> from pymove.query.query import _meters_filter |
| 318 | + >>> point |
| 319 | + lat lon datetime id |
| 320 | + 0 16.4 -54.9 2014-10-11 18:00:00 1 |
| 321 | + >>> move_df |
| 322 | + lat lon datetime id |
| 323 | + 0 33.1 -77.0 2012-05-19 00:00:00 2 |
| 324 | + 1 32.8 -77.1 2012-05-19 06:00:00 3 |
| 325 | + 2 32.5 -77.3 2012-05-19 12:00:00 4 |
| 326 | + >>> _meters_filter(firstpoint, move_df, 3190000) |
| 327 | + lat lon datetime id spatial_distance\ |
| 328 | + target_id target_lat target_lon target_datetime |
| 329 | + 0 32.5 -77.3 2012-05-19 12:00:00 4 3.182834e+06\ |
| 330 | + 1 16.4 -54.9 2014-10-11 18:00:00 |
| 331 | + """ |
| 332 | + lat = row[LATITUDE] |
| 333 | + lon = row[LONGITUDE] |
| 334 | + move_df['spatial_distance'] = distances.euclidean_distance_in_meters( |
| 335 | + lat1=lat, lon1=lon, lat2=move_df[LATITUDE], lon2=move_df[LONGITUDE] |
| 336 | + ) |
| 337 | + filtered = move_df[move_df['spatial_distance'] < minimum_distance] |
| 338 | + |
| 339 | + if (filtered.shape[0] > 0): |
| 340 | + filtered['target_id'] = row[TRAJ_ID] |
| 341 | + filtered['target_lat'] = row[LATITUDE] |
| 342 | + filtered['target_lon'] = row[LONGITUDE] |
| 343 | + filtered['target_datetime'] = row[DATETIME] |
| 344 | + |
| 345 | + return filtered |
| 346 | + |
| 347 | + |
| 348 | +def query_all_points_by_range( |
| 349 | + traj1: DataFrame, |
| 350 | + move_df: DataFrame, |
| 351 | + minimum_meters: float = 100, |
| 352 | + minimum_time: timedelta = None |
| 353 | +) -> DataFrame: |
| 354 | + """ |
| 355 | + Queries closest point within a spatial range based on meters and a temporal range. |
| 356 | +
|
| 357 | + Selects only the points between two Move Dataframes |
| 358 | + that have the closest point within a spatial range |
| 359 | + based on meters and a temporal range. |
| 360 | +
|
| 361 | + Parameters |
| 362 | + ---------- |
| 363 | + traj1: dataframe |
| 364 | + The input of a trajectory data. |
| 365 | + move_df: dataframe |
| 366 | + The input of another trajectory data. |
| 367 | + minimum_meters: float, optional |
| 368 | + the minimum spatial distance, based in meters, between the points, by default 100 |
| 369 | + minimum_time: datetime.timedelta, optional |
| 370 | + the minimum temporal distance between the points, by default timedelta(minutes=2) |
| 371 | + datetime_label: string, optional |
| 372 | + the label that refers to the datetime label of the dataframes, by default DATETIME |
| 373 | +
|
| 374 | + Returns |
| 375 | + ------- |
| 376 | + DataFrame |
| 377 | + dataframe with all the points of move_df which are in |
| 378 | + a spatial distance and temporal distance equal or smaller |
| 379 | + than the minimum distance parameters. |
| 380 | +
|
| 381 | + Examples |
| 382 | + -------- |
| 383 | + >>> from pymove.query.query import query_all_points_by_range |
| 384 | + >>> traj_df |
| 385 | + lat lon datetime id |
| 386 | + 0 16.4 -54.9 2014-10-11 18:00:00 1 |
| 387 | + 1 16.4 -55.9 2014-10-12 00:00:00 1 |
| 388 | + 2 16.4 -56.9 2014-10-12 06:00:00 1 |
| 389 | + >>> move_df |
| 390 | + lat lon datetime id |
| 391 | + 0 33.1 -77.0 2012-05-19 00:00:00 2 |
| 392 | + 1 32.8 -77.1 2012-05-19 06:00:00 3 |
| 393 | + 2 32.5 -77.3 2012-05-19 12:00:00 4 |
| 394 | + >>> query_all_points_by_range( |
| 395 | + >>> traj_df, move_df, minimum_meters=3190000, minimum_time=timedelta(hours=21010) |
| 396 | + >>> ) |
| 397 | + lat lon datetime id spatial_distance target_id\ |
| 398 | + target_lat target_lon target_datetime temporal_distance |
| 399 | + 0 32.5 -77.3 2012-05-19 12:00:00 4 3.182834e+06 1\ |
| 400 | + 16.4 -54.9 2014-10-11 18:00:00 875 days 06:00:00 |
| 401 | + """ |
| 402 | + if minimum_time is None: |
| 403 | + minimum_time = timedelta(minutes=2) |
| 404 | + |
| 405 | + result = DataFrame([]) |
| 406 | + total = traj1.shape[0] |
| 407 | + for _, row in progress_bar( |
| 408 | + traj1.iterrows(), |
| 409 | + desc='Querying all points by temporal and spatial distance', |
| 410 | + total=total |
| 411 | + ): |
| 412 | + coinc_points = _meters_filter(row, move_df, minimum_meters) |
| 413 | + coinc_points = _datetime_filter(row, coinc_points, minimum_time) |
| 414 | + result = coinc_points.append(result) |
| 415 | + |
| 416 | + return result |
0 commit comments