Skip to content

Commit f6728e1

Browse files
authored
feat(spans-migration): create translation function for Discover -> Explore queries (#98940)
I've created the translation function for the Discover -> Explore saved queries migration. The function takes in a discover query and correctly maps and translates the fields to the newly formatted explore queries. All of the translation logic is done in the translation layer (`translate_mep_to_eap()`). The only logic part not done in the translation layer is the orderby conversion. In discover orderbys are stored in an underscore format. Ex. `-count_unique(user.id)` is represented as `-count_unique_user_id`. The translation layer accepts the function format and not the underscore format. Since this is a Discover specific format, I've done the translation in this function to match it to the correct field in the fields array with all special character stripped and pass that version into the orderby query part. There are a couple things that still need to be done for the Discover migration (i'll do them in different PRs): 1. return the dropped fields from the translation layer and populate the `changed_reason` field in explore saved queries 2. make a script for running the migration as a job a. migrate the projects -> queries mapping as well
1 parent d4fdb9e commit f6728e1

File tree

2 files changed

+639
-0
lines changed

2 files changed

+639
-0
lines changed
Lines changed: 272 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,272 @@
1+
import re
2+
from typing import Any
3+
4+
from sentry.discover.arithmetic import is_equation
5+
from sentry.discover.models import DiscoverSavedQuery
6+
from sentry.discover.translation.mep_to_eap import (
7+
INDEXED_EQUATIONS_PATTERN,
8+
DroppedFields,
9+
QueryParts,
10+
translate_mep_to_eap,
11+
)
12+
from sentry.explore.models import ExploreSavedQuery, ExploreSavedQueryDataset
13+
from sentry.integrations.slack.unfurl.discover import is_aggregate
14+
from sentry.search.events.fields import get_function_alias_with_columns, is_function, parse_function
15+
16+
# we're going to keep the chart types from discover
17+
# bar = 0, line = 1, area = 2
18+
CHART_TYPES = {
19+
"default": 2,
20+
"previous": 2,
21+
"top5": 2,
22+
"daily": 0,
23+
"dailytop5": 0,
24+
"bar": 0,
25+
}
26+
27+
28+
def strip_negative_from_orderby(orderby):
29+
"""
30+
This function is used to strip the negative from an orderby item.
31+
"""
32+
if orderby.startswith("-"):
33+
return orderby[1:], True
34+
return orderby, False
35+
36+
37+
def _get_translated_orderby_item(orderby, columns, is_negated):
38+
"""
39+
This function is used to translate the function underscore notation for orderby items
40+
to regular function notation. We do this by stripping both the orderby item and the given columns
41+
(which could be functions and fields) and then checking if it matches up to any of those stripped columns.
42+
"""
43+
columns_underscore_list = []
44+
for column in columns:
45+
if is_function(column):
46+
aggregate, fields, alias = parse_function(column)
47+
columns_underscore_list.append(get_function_alias_with_columns(aggregate, fields))
48+
else:
49+
# non-function columns don't change format
50+
columns_underscore_list.append(column)
51+
joined_orderby_item = orderby
52+
if is_function(orderby):
53+
aggregate, fields, alias = parse_function(orderby)
54+
joined_orderby_item = get_function_alias_with_columns(aggregate, fields)
55+
56+
converted_orderby = None
57+
for index, stripped_column in enumerate(columns_underscore_list):
58+
if joined_orderby_item == stripped_column:
59+
converted_orderby = columns[index]
60+
break
61+
62+
if converted_orderby is not None:
63+
if is_negated:
64+
converted_orderby = f"-{converted_orderby}"
65+
return converted_orderby
66+
# if the orderby item is not in the columns, it should be dropped anyways
67+
else:
68+
return None
69+
70+
71+
def _format_orderby_for_translation(orderby, columns):
72+
orderby_converted_list = []
73+
if type(orderby) is str:
74+
orderby = [orderby]
75+
if type(orderby) is list:
76+
for orderby_item in orderby:
77+
stripped_orderby_item, is_negated = strip_negative_from_orderby(orderby_item)
78+
# equation orderby can be formatted in indexed format
79+
# (we will keep it in indexed format because the translation layer handles it)
80+
if re.match(INDEXED_EQUATIONS_PATTERN, stripped_orderby_item):
81+
orderby_converted_list.append(orderby_item)
82+
elif is_equation(stripped_orderby_item):
83+
orderby_converted_list.append(orderby_item)
84+
# if the orderby item is in the columns list it exists and is a field
85+
elif stripped_orderby_item in columns:
86+
orderby_converted_list.append(orderby_item)
87+
else:
88+
# orderby functions can be formated in all underscores like -count_unique_user_id for count_unique(user.id)
89+
# this does not apply to fields and equations
90+
translated_orderby_item = _get_translated_orderby_item(
91+
stripped_orderby_item, columns, is_negated
92+
)
93+
if translated_orderby_item is not None:
94+
orderby_converted_list.append(translated_orderby_item)
95+
else:
96+
return None
97+
98+
return orderby_converted_list
99+
100+
101+
def _translate_discover_query_field_to_explore_query_schema(
102+
query: dict[str, Any],
103+
) -> tuple[dict[str, Any], DroppedFields]:
104+
105+
conditions = query.get("query", "")
106+
# have to separate equations and fields
107+
fields = query.get("fields", [])
108+
yAxis_fields = query.get("yAxis", [])
109+
# some yAxis fields can be a single string
110+
visualized_fields = yAxis_fields if type(yAxis_fields) is list else [yAxis_fields]
111+
112+
# in explore there is no concept of chart only (yaxis) fields or table only fields,
113+
# so we're just adding all the fields into the columns/equations lists
114+
base_fields = [field for field in fields if not is_equation(field)]
115+
# add visualized_fields that are not equations and not already in fields
116+
additional_visualized_fields = [
117+
field for field in visualized_fields if not is_equation(field) and field not in base_fields
118+
]
119+
columns = base_fields + additional_visualized_fields
120+
121+
# all equations in the visualized_fields have to be in the fields list
122+
equations = [field for field in fields if is_equation(field)]
123+
124+
orderby = query.get("orderby", "")
125+
# need to make sure all orderby functions are in the correct format (i.e. not in -count_unique_user_id format)
126+
orderby_converted_list = _format_orderby_for_translation(orderby, columns)
127+
128+
translated_query_parts, dropped_fields_from_translation = translate_mep_to_eap(
129+
QueryParts(
130+
selected_columns=columns,
131+
query=conditions,
132+
equations=equations,
133+
orderby=orderby_converted_list,
134+
)
135+
)
136+
137+
translated_aggregate_columns = (
138+
[field for field in translated_query_parts["selected_columns"] if is_aggregate(field)]
139+
if translated_query_parts["selected_columns"] is not None
140+
else []
141+
)
142+
143+
translated_non_aggregate_columns = (
144+
[field for field in translated_query_parts["selected_columns"] if not is_aggregate(field)]
145+
if translated_query_parts["selected_columns"] is not None
146+
else []
147+
)
148+
149+
translated_equations = (
150+
[field for field in translated_query_parts["equations"]]
151+
if translated_query_parts["equations"] is not None
152+
else []
153+
)
154+
155+
# if we have any aggregates or equation we should be in aggregate mode
156+
if len(translated_aggregate_columns) > 0 or len(translated_equations) > 0:
157+
mode = "aggregate"
158+
else:
159+
mode = "samples"
160+
161+
display = query.get("display", "default")
162+
interval = None
163+
164+
chart_type = CHART_TYPES[display]
165+
# only intervals that matter are the daily ones, rest can be defaulted to explore default
166+
if display in ["daily", "dailytop5"]:
167+
interval = "1d"
168+
169+
y_axes = translated_aggregate_columns + translated_equations
170+
# aggregate fields parameter contains groupBys and yAxes
171+
aggregate_fields = [
172+
{"groupBy": translated_column} for translated_column in translated_non_aggregate_columns
173+
] + [{"yAxes": [y_axis], "chartType": chart_type} for y_axis in y_axes]
174+
175+
# we want to make sure the id field is always included in samples mode
176+
# because without it the 'id' field is not sortable on the samples table
177+
fields_with_id = (
178+
(["id"] + translated_non_aggregate_columns)
179+
if "id" not in translated_non_aggregate_columns
180+
else translated_non_aggregate_columns
181+
)
182+
183+
if translated_query_parts["orderby"] is None or len(translated_query_parts["orderby"]) == 0:
184+
translated_orderby = None
185+
aggregate_orderby = None
186+
else:
187+
translated_orderby = translated_query_parts["orderby"][0]
188+
stripped_translated_orderby, is_negated = strip_negative_from_orderby(translated_orderby)
189+
if re.match(INDEXED_EQUATIONS_PATTERN, stripped_translated_orderby):
190+
try:
191+
translated_equation_index = int(
192+
stripped_translated_orderby.split("[")[1].split("]")[0]
193+
)
194+
orderby_equation = translated_equations[translated_equation_index]
195+
# if the orderby is an equation there's only aggregate orderby
196+
translated_orderby = orderby_equation if not is_negated else f"-{orderby_equation}"
197+
aggregate_orderby = translated_orderby
198+
except (IndexError, ValueError):
199+
translated_orderby = None
200+
aggregate_orderby = None
201+
202+
else:
203+
aggregate_orderby = (
204+
translated_orderby
205+
if is_aggregate(stripped_translated_orderby)
206+
or is_equation(stripped_translated_orderby)
207+
else None
208+
)
209+
210+
query_list = [
211+
{
212+
"query": translated_query_parts["query"],
213+
"fields": fields_with_id,
214+
"orderby": (translated_orderby if aggregate_orderby is None else None),
215+
"mode": mode,
216+
"aggregateField": aggregate_fields,
217+
"aggregateOrderby": aggregate_orderby if mode == "aggregate" else None,
218+
}
219+
]
220+
221+
explore_query = {
222+
"environment": query.get("environment", []),
223+
"start": query.get("start", None),
224+
"end": query.get("end", None),
225+
"range": query.get("range", None),
226+
"interval": interval,
227+
"query": query_list,
228+
}
229+
230+
return explore_query, dropped_fields_from_translation
231+
232+
233+
def translate_discover_query_to_explore_query(
234+
discover_query: DiscoverSavedQuery,
235+
) -> ExploreSavedQuery:
236+
237+
translated_query_field, dropped_fields_from_translation = (
238+
_translate_discover_query_field_to_explore_query_schema(discover_query.query)
239+
)
240+
241+
changed_reason = {
242+
"equations": dropped_fields_from_translation["equations"],
243+
"columns": dropped_fields_from_translation["selected_columns"],
244+
"orderby": dropped_fields_from_translation["orderby"],
245+
}
246+
247+
create_defaults = {
248+
"date_updated": discover_query.date_updated,
249+
"date_added": discover_query.date_created,
250+
"created_by_id": discover_query.created_by_id,
251+
"visits": discover_query.visits,
252+
"last_visited": discover_query.last_visited,
253+
"dataset": ExploreSavedQueryDataset.SEGMENT_SPANS,
254+
"is_multi_query": False,
255+
"organization": discover_query.organization,
256+
"name": discover_query.name,
257+
"query": translated_query_field,
258+
"changed_reason": changed_reason,
259+
}
260+
261+
if discover_query.explore_query is not None:
262+
discover_query.explore_query.changed_reason = changed_reason
263+
discover_query.explore_query.query = translated_query_field
264+
discover_query.explore_query.save()
265+
new_explore_query = discover_query.explore_query
266+
else:
267+
new_explore_query = ExploreSavedQuery(**create_defaults)
268+
new_explore_query.save()
269+
discover_query.explore_query_id = new_explore_query.id
270+
discover_query.save()
271+
272+
return new_explore_query

0 commit comments

Comments
 (0)