Skip to content

Commit 6e046cd

Browse files
authored
Merge pull request #198 from InsightLab/insert-parcial-similarity-functions
Insert parcial similarity functions
2 parents 4e13445 + e55099f commit 6e046cd

File tree

3 files changed

+475
-4
lines changed

3 files changed

+475
-4
lines changed

pymove/query/query.py

Lines changed: 240 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@
22
Query operations.
33
44
range_query,
5-
knn_query
5+
knn_query,
6+
query_all_points_by_range,
67
78
"""
8-
from __future__ import annotations
9+
from datetime import timedelta
910

1011
import numpy as np
1112
import pandas as pd
@@ -62,6 +63,25 @@ def range_query(
6263
------
6364
ValueError: if distance measure is invalid
6465
66+
Examples
67+
--------
68+
>>> from pymove.query.query import range_query
69+
>>> traj_df
70+
lat lon datetime id
71+
0 16.4 -54.9 2014-10-11 18:00:00 1
72+
1 16.4 -55.9 2014-10-12 00:00:00 1
73+
2 16.4 -56.9 2014-10-12 06:00:00 1
74+
>>> move_df
75+
lat lon datetime id
76+
0 33.1 -77.0 2012-05-19 00:00:00 2
77+
1 32.8 -77.1 2012-05-19 06:00:00 3
78+
2 32.5 -77.3 2012-05-19 12:00:00 4
79+
>>> range_query(
80+
>>> traj_df, move_df, min_dist=80.5
81+
>>> )
82+
lat lon datetime id
83+
1 32.8 -77.1 2012-05-19 06:00:00 3
84+
2 32.5 -77.3 2012-05-19 12:00:00 4
6585
"""
6686
result = traj.copy()
6787
result.drop(result.index, inplace=True)
@@ -136,6 +156,27 @@ def knn_query(
136156
------
137157
ValueError: if distance measure is invalid
138158
159+
Examples
160+
--------
161+
>>> from pymove.query.query import knn_query
162+
>>> traj_df
163+
lat lon datetime id
164+
0 16.4 -54.9 2014-10-11 18:00:00 1
165+
1 16.4 -55.9 2014-10-12 00:00:00 1
166+
2 16.4 -56.9 2014-10-12 06:00:00 1
167+
>>> move_df
168+
lat lon datetime id
169+
0 33.1 -77.0 2012-05-19 00:00:00 2
170+
1 32.8 -77.1 2012-05-19 06:00:00 3
171+
2 32.5 -77.3 2012-05-19 12:00:00 4
172+
>>> knn_query(
173+
>>> traj_df, move_df, k=1
174+
>>> )
175+
lat lon datetime id
176+
0 16.4 -54.9 2014-10-11 18:00:00 1
177+
1 16.4 -55.9 2014-10-12 00:00:00 1
178+
2 16.4 -56.9 2014-10-12 06:00:00 1
179+
2 32.5 -77.3 2012-05-19 12:00:00 4
139180
"""
140181
k_list = pd.DataFrame([[np.Inf, 'empty']] * k, columns=['distance', TRAJ_ID])
141182

@@ -176,3 +217,200 @@ def dist_measure(traj, this, latitude, longitude, datetime):
176217
)
177218

178219
return result
220+
221+
222+
def _datetime_filter(
223+
row: DataFrame,
224+
move_df: DataFrame,
225+
minimum_distance: timedelta
226+
) -> DataFrame:
227+
"""
228+
Returns all the points of the DataFrame which are in a temporal distance.
229+
230+
Given a row referencing to a point, a DataFrame with
231+
multiple points and a minimum distance, it returns
232+
all the points of the DataFrame which are in a temporal
233+
distance equal or smaller than the minimum distance
234+
parameter.
235+
236+
Parameters
237+
----------
238+
row: dataframe
239+
The input of one point of a trajectory.
240+
move_df: dataframe
241+
The input trajectory data.
242+
minimum_distance: datetime.timedelta
243+
the minimum temporal distance between the points.
244+
245+
Returns
246+
-------
247+
DataFrame
248+
dataframe with all the points of move_df which are in
249+
a temporal distance equal or smaller than the minimum
250+
distance parameter.
251+
252+
Examples
253+
--------
254+
>>> from pymove.query.query import _datetime_filter
255+
>>>> point
256+
lat lon datetime id
257+
0 16.4 -54.9 2014-10-11 18:00:00 1
258+
>>> move_df
259+
lat lon datetime id
260+
0 33.1 -77.0 2012-05-19 00:00:00 2
261+
1 32.8 -77.1 2012-05-19 06:00:00 3
262+
2 32.5 -77.3 2012-05-19 12:00:00 4
263+
>>> _datetime_filter(point, move_df, timedelta(hours=21010))
264+
lat lon datetime id temporal_distance\
265+
target_id target_lat target_lon target_datetime
266+
0 32.5 -77.3 2012-05-19 12:00:00 4 875 days 06:00:00\
267+
1 16.4 -54.9 2014-10-11 18:00:00
268+
"""
269+
datetime = row['datetime']
270+
move_df['temporal_distance'] = (move_df['datetime'] - datetime).abs()
271+
filtered = move_df[
272+
(move_df['temporal_distance'] < minimum_distance)
273+
& (move_df['temporal_distance'] > -minimum_distance)
274+
]
275+
276+
if (filtered.shape[0] > 0):
277+
filtered['target_id'] = row['id']
278+
filtered['target_lat'] = row['lat']
279+
filtered['target_lon'] = row['lon']
280+
filtered['target_datetime'] = row['datetime']
281+
282+
return filtered
283+
284+
285+
def _meters_filter(
286+
row: DataFrame,
287+
move_df: DataFrame,
288+
minimum_distance: float
289+
) -> DataFrame:
290+
"""
291+
Returns all the points of the DataFrame which are in a spatial distance.
292+
293+
Given a row referencing to a point, a DataFrame with
294+
multiple points and a minimum distance, it returns
295+
all the points of the DataFrame which are in a spatial
296+
distance (in meters) equal or smaller than the minimum distance
297+
parameter.
298+
299+
Parameters
300+
----------
301+
row: dataframe
302+
The input of one point of a trajectory.
303+
move_df: dataframe
304+
The input trajectory data.
305+
minimum_distance: float
306+
the minimum spatial distance between the points in meters.
307+
308+
Returns
309+
-------
310+
DataFrame
311+
dataframe with all the points of move_df which are in
312+
a spatial distance equal or smaller than the minimum
313+
distance parameter.
314+
315+
Examples
316+
--------
317+
>>> from pymove.query.query import _meters_filter
318+
>>> point
319+
lat lon datetime id
320+
0 16.4 -54.9 2014-10-11 18:00:00 1
321+
>>> move_df
322+
lat lon datetime id
323+
0 33.1 -77.0 2012-05-19 00:00:00 2
324+
1 32.8 -77.1 2012-05-19 06:00:00 3
325+
2 32.5 -77.3 2012-05-19 12:00:00 4
326+
>>> _meters_filter(firstpoint, move_df, 3190000)
327+
lat lon datetime id spatial_distance\
328+
target_id target_lat target_lon target_datetime
329+
0 32.5 -77.3 2012-05-19 12:00:00 4 3.182834e+06\
330+
1 16.4 -54.9 2014-10-11 18:00:00
331+
"""
332+
lat = row[LATITUDE]
333+
lon = row[LONGITUDE]
334+
move_df['spatial_distance'] = distances.euclidean_distance_in_meters(
335+
lat1=lat, lon1=lon, lat2=move_df[LATITUDE], lon2=move_df[LONGITUDE]
336+
)
337+
filtered = move_df[move_df['spatial_distance'] < minimum_distance]
338+
339+
if (filtered.shape[0] > 0):
340+
filtered['target_id'] = row[TRAJ_ID]
341+
filtered['target_lat'] = row[LATITUDE]
342+
filtered['target_lon'] = row[LONGITUDE]
343+
filtered['target_datetime'] = row[DATETIME]
344+
345+
return filtered
346+
347+
348+
def query_all_points_by_range(
349+
traj1: DataFrame,
350+
move_df: DataFrame,
351+
minimum_meters: float = 100,
352+
minimum_time: timedelta = None
353+
) -> DataFrame:
354+
"""
355+
Queries closest point within a spatial range based on meters and a temporal range.
356+
357+
Selects only the points between two Move Dataframes
358+
that have the closest point within a spatial range
359+
based on meters and a temporal range.
360+
361+
Parameters
362+
----------
363+
traj1: dataframe
364+
The input of a trajectory data.
365+
move_df: dataframe
366+
The input of another trajectory data.
367+
minimum_meters: float, optional
368+
the minimum spatial distance, based in meters, between the points, by default 100
369+
minimum_time: datetime.timedelta, optional
370+
the minimum temporal distance between the points, by default timedelta(minutes=2)
371+
datetime_label: string, optional
372+
the label that refers to the datetime label of the dataframes, by default DATETIME
373+
374+
Returns
375+
-------
376+
DataFrame
377+
dataframe with all the points of move_df which are in
378+
a spatial distance and temporal distance equal or smaller
379+
than the minimum distance parameters.
380+
381+
Examples
382+
--------
383+
>>> from pymove.query.query import query_all_points_by_range
384+
>>> traj_df
385+
lat lon datetime id
386+
0 16.4 -54.9 2014-10-11 18:00:00 1
387+
1 16.4 -55.9 2014-10-12 00:00:00 1
388+
2 16.4 -56.9 2014-10-12 06:00:00 1
389+
>>> move_df
390+
lat lon datetime id
391+
0 33.1 -77.0 2012-05-19 00:00:00 2
392+
1 32.8 -77.1 2012-05-19 06:00:00 3
393+
2 32.5 -77.3 2012-05-19 12:00:00 4
394+
>>> query_all_points_by_range(
395+
>>> traj_df, move_df, minimum_meters=3190000, minimum_time=timedelta(hours=21010)
396+
>>> )
397+
lat lon datetime id spatial_distance target_id\
398+
target_lat target_lon target_datetime temporal_distance
399+
0 32.5 -77.3 2012-05-19 12:00:00 4 3.182834e+06 1\
400+
16.4 -54.9 2014-10-11 18:00:00 875 days 06:00:00
401+
"""
402+
if minimum_time is None:
403+
minimum_time = timedelta(minutes=2)
404+
405+
result = DataFrame([])
406+
total = traj1.shape[0]
407+
for _, row in progress_bar(
408+
traj1.iterrows(),
409+
desc='Querying all points by temporal and spatial distance',
410+
total=total
411+
):
412+
coinc_points = _meters_filter(row, move_df, minimum_meters)
413+
coinc_points = _datetime_filter(row, coinc_points, minimum_time)
414+
result = coinc_points.append(result)
415+
416+
return result

0 commit comments

Comments
 (0)