Skip to content

Commit 76544f2

Browse files
authored
Merge pull request #19943 from asgerf/approximate-related-location
Support approximate related locations
2 parents 7421399 + 3ffda2f commit 76544f2

File tree

4 files changed

+92
-45
lines changed

4 files changed

+92
-45
lines changed

java/ql/lib/semmle/code/java/security/regexp/PolynomialReDoSQuery.qll

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,18 @@ module PolynomialRedosConfig implements DataFlow::ConfigSig {
4747
node instanceof SimpleTypeSanitizer or
4848
node.asExpr().(MethodCall).getMethod() instanceof LengthRestrictedMethod
4949
}
50+
51+
predicate observeDiffInformedIncrementalMode() { any() }
52+
53+
Location getASelectedSinkLocation(DataFlow::Node sink) {
54+
exists(SuperlinearBackTracking::PolynomialBackTrackingTerm regexp |
55+
regexp.getRootTerm() = sink.(PolynomialRedosSink).getRegExp()
56+
|
57+
result = sink.getLocation()
58+
or
59+
result = regexp.getLocation()
60+
)
61+
}
5062
}
5163

5264
module PolynomialRedosFlow = TaintTracking::Global<PolynomialRedosConfig>;

python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSQuery.qll

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,7 @@ private module PolynomialReDoSConfig implements DataFlow::ConfigSig {
1818

1919
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
2020

21-
// Diff-informed incremental mode is currently disabled for this query due to
22-
// API limitations. The query exposes sink.getABacktrackingTerm() as an alert
23-
// location, but there is no way to express that information through
24-
// getASelectedSinkLocation() because there is no @location in the CodeQL
25-
// database that corresponds to a term inside a regular expression. As a
26-
// result, this query could miss alerts in diff-informed incremental mode.
27-
//
28-
// To address this problem, we need to have a version of
29-
// getASelectedSinkLocation() that uses hasLocationInfo() instead of
30-
// returning Location objects.
31-
predicate observeDiffInformedIncrementalMode() { none() }
21+
predicate observeDiffInformedIncrementalMode() { any() }
3222

3323
Location getASelectedSinkLocation(DataFlow::Node sink) {
3424
result = sink.(Sink).getHighlight().getLocation()

ruby/ql/lib/codeql/ruby/security/regexp/PolynomialReDoSQuery.qll

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,16 @@ private module PolynomialReDoSConfig implements DataFlow::ConfigSig {
1818
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
1919

2020
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
21+
22+
// Diff-informedness is disabled because of RegExpTerms having incorrect locations when
23+
// the regexp is parsed from a string arising from constant folding.
24+
predicate observeDiffInformedIncrementalMode() { none() }
25+
26+
Location getASelectedSinkLocation(DataFlow::Node sink) {
27+
result = sink.(Sink).getHighlight().getLocation()
28+
or
29+
result = sink.(Sink).getRegExp().getRootTerm().getLocation()
30+
}
2131
}
2232

2333
/**

shared/util/codeql/util/AlertFiltering.qll

Lines changed: 69 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,14 @@ module;
88
private import codeql.util.Location
99

1010
/**
11-
* Holds if the query should produce alerts that match the given line ranges.
11+
* Holds if the query may restrict its computation to only produce alerts that match the given line
12+
* ranges. This predicate is used for implementing _diff-informed queries_ for pull requests in
13+
* GitHub Code Scanning.
1214
*
1315
* This predicate is active if and only if it is nonempty. If this predicate is inactive, it has no
14-
* effect. If it is active, it accepts any alert that has at least one matching location.
15-
*
16-
* Note that an alert that is not accepted by this filtering predicate may still be included in the
17-
* query results if it is accepted by another active filtering predicate in this module. An alert is
18-
* excluded from the query results if only if (1) there is at least one active filtering predicate,
19-
* and (2) it is not accepted by any active filtering predicate.
16+
* effect. If it is active, queries may omit alerts that don't have a _primary_ or _related_
17+
* location (in SARIF terminology) whose start line is within a specified range. Queries are allowed
18+
* to produce alerts outside this range.
2019
*
2120
* An alert location is a match if it matches a row in this predicate. If `startLineStart` and
2221
* `startLineEnd` are both 0, the row specifies a whole-file match, and a location is a match if
@@ -29,26 +28,24 @@ private import codeql.util.Location
2928
* - startLineStart: inclusive start of the range for alert location start line number (1-based).
3029
* - startLineEnd: inclusive end of the range for alert location start line number (1-based).
3130
*
32-
* A query should either perform no alert filtering, or adhere to all the filtering rules in this
33-
* module and return all and only the accepted alerts.
34-
*
35-
* This predicate is suitable for situations where we want to filter alerts at line granularity,
36-
* such as based on the pull request diff.
31+
* Note that an alert that is not accepted by this filtering predicate may still be included in the
32+
* query results if it is accepted by another active filtering predicate in this module. An alert is
33+
* excluded from the query results if only if (1) there is at least one active filtering predicate,
34+
* and (2) it is not accepted by any active filtering predicate.
3735
*
3836
* See also: `restrictAlertsToExactLocation`.
3937
*/
4038
extensible predicate restrictAlertsTo(string filePath, int startLineStart, int startLineEnd);
4139

4240
/**
43-
* Holds if the query should produce alerts that match the given locations.
41+
* Holds if the query may restrict its computation to only produce alerts that match the given
42+
* character ranges. This predicate is suitable for testing, where we want to filter by the exact
43+
* alert location, distinguishing between alerts on the same line.
4444
*
4545
* This predicate is active if and only if it is nonempty. If this predicate is inactive, it has no
46-
* effect. If it is active, it accepts any alert that has at least one matching location.
47-
*
48-
* Note that an alert that is not accepted by this filtering predicate may still be included in the
49-
* query results if it is accepted by another active filtering predicate in this module. An alert is
50-
* excluded from the query results if only if (1) there is at least one active filtering predicate,
51-
* and (2) it is not accepted by any active filtering predicate.
46+
* effect. If it is active, queries may omit alerts that don't have a _primary_ or _related_
47+
* location (in SARIF terminology) whose location equals a tuple from this predicate. Queries are
48+
* allowed to produce alerts outside this range.
5249
*
5350
* An alert location is a match if it matches a row in this predicate. Each row specifies an exact
5451
* location: an alert location is a match if its file path matches `filePath`, its start line and
@@ -61,11 +58,10 @@ extensible predicate restrictAlertsTo(string filePath, int startLineStart, int s
6158
* - endLine: alert location end line number (1-based).
6259
* - endColumn: alert location end column number (1-based).
6360
*
64-
* A query should either perform no alert filtering, or adhere to all the filtering rules in this
65-
* module and return all and only the accepted alerts.
66-
*
67-
* This predicate is suitable for situations where we want to filter by the exact alert location,
68-
* distinguishing between alerts on the same line.
61+
* Note that an alert that is not accepted by this filtering predicate may still be included in the
62+
* query results if it is accepted by another active filtering predicate in this module. An alert is
63+
* excluded from the query results if only if (1) there is at least one active filtering predicate,
64+
* and (2) it is not accepted by any active filtering predicate.
6965
*
7066
* See also: `restrictAlertsTo`.
7167
*/
@@ -75,25 +71,64 @@ extensible predicate restrictAlertsToExactLocation(
7571

7672
/** Module for applying alert location filtering. */
7773
module AlertFilteringImpl<LocationSig Location> {
78-
/** Applies alert filtering to the given location. */
74+
pragma[nomagic]
75+
private predicate restrictAlertsToEntireFile(string filePath) { restrictAlertsTo(filePath, 0, 0) }
76+
77+
pragma[nomagic]
78+
private predicate restrictAlertsToLine(string filePath, int line) {
79+
exists(int startLineStart, int startLineEnd |
80+
restrictAlertsTo(filePath, startLineStart, startLineEnd) and
81+
line = [startLineStart .. startLineEnd]
82+
)
83+
}
84+
85+
/**
86+
* Holds if the given location intersects the diff range. When that is the
87+
* case, ensuring that alerts mentioning this location are included in the
88+
* query results is a valid overapproximation of the requirements for
89+
* _diff-informed queries_ as documented under `restrictAlertsTo`.
90+
*
91+
* Testing for full intersection rather than only matching the start line
92+
* means that this predicate is more broadly useful than just checking whether
93+
* a specific element is considered to be in the diff range of GitHub Code
94+
* Scanning:
95+
* - If it's inconvenient to pass the exact `Location` of the element of
96+
* interest, it's valid to use a `Location` of an enclosing element.
97+
* - This predicate could be useful for other systems of alert presentation
98+
* where the rules don't exactly match GitHub Code Scanning.
99+
*
100+
* If there is no diff range, this predicate holds for all locations. Note
101+
* that this predicate has a bindingset and will therefore be inlined;
102+
* callers should include enough context to ensure efficient evaluation.
103+
*/
79104
bindingset[location]
80105
predicate filterByLocation(Location location) {
81106
not restrictAlertsTo(_, _, _) and not restrictAlertsToExactLocation(_, _, _, _, _)
82107
or
83-
exists(string filePath, int startLineStart, int startLineEnd |
84-
restrictAlertsTo(filePath, startLineStart, startLineEnd)
85-
|
86-
startLineStart = 0 and
87-
startLineEnd = 0 and
108+
exists(string filePath |
109+
restrictAlertsToEntireFile(filePath) and
88110
location.hasLocationInfo(filePath, _, _, _, _)
89111
or
90-
location.hasLocationInfo(filePath, [startLineStart .. startLineEnd], _, _, _)
112+
exists(int locStartLine, int locEndLine |
113+
location.hasLocationInfo(filePath, locStartLine, _, locEndLine, _)
114+
|
115+
restrictAlertsToLine(pragma[only_bind_into](filePath), [locStartLine .. locEndLine])
116+
)
91117
)
92118
or
93-
exists(string filePath, int startLine, int startColumn, int endLine, int endColumn |
94-
restrictAlertsToExactLocation(filePath, startLine, startColumn, endLine, endColumn)
119+
// Check if an exact filter-location is fully contained in `location`.
120+
// This is slow but only used for testing.
121+
exists(
122+
string filePath, int startLine, int startColumn, int endLine, int endColumn,
123+
int filterStartLine, int filterStartColumn, int filterEndLine, int filterEndColumn
95124
|
96-
location.hasLocationInfo(filePath, startLine, startColumn, endLine, endColumn)
125+
location.hasLocationInfo(filePath, startLine, startColumn, endLine, endColumn) and
126+
restrictAlertsToExactLocation(filePath, filterStartLine, filterStartColumn, filterEndLine,
127+
filterEndColumn) and
128+
startLine <= filterStartLine and
129+
(startLine != filterStartLine or startColumn <= filterStartColumn) and
130+
endLine >= filterEndLine and
131+
(endLine != filterEndLine or endColumn >= filterEndColumn)
97132
)
98133
}
99134
}

0 commit comments

Comments
 (0)