diff --git a/README-zh_CN.md b/README-zh_CN.md index b82cc61cc..8ec501475 100644 --- a/README-zh_CN.md +++ b/README-zh_CN.md @@ -356,6 +356,55 @@ console.log(sqlSlices) 行列号信息不是必传的,如果传了行列号信息,那么收集到的实体中,如果实体位于对应行列号所在的语句下,那么实体的所属的语句对象上会带有 `isContainCaret` 标识,这在与自动补全功能结合时,可以帮助你快速筛选出需要的实体信息。 + +### 获取语义上下文信息 +调用 SQL 实例上的 `getSemanticContextAtCaretPosition` 方法,传入 sql 文本和指定位置的行列号, 例如: +```typescript +import { HiveSQL } from 'dt-sql-parser'; + +const hive = new HiveSQL(); +const sql = 'SELECT * FROM tb;'; +const pos = { lineNumber: 1, column: 18 }; // 'tb;' 的后面 +const semanticContext = hive.getSemanticContextAtCaretPosition(sql, pos); + +console.log(semanticContext); +``` + +*输出* + +```typescript +/* +{ + isStatementBeginning: true, +} +*/ +``` + +目前能收集到的语义上下文信息如下,如果有更多的需求,欢迎提[issue](https://github.com/DTStack/dt-sql-parser/issues) +- `isStatementBeginning` 当前输入位置是否为一条语句的开头 + +默认情况下,`isStatementBeginning` 的收集策略为`SqlSplitStrategy.STRICT` + +有两种可选策略: +- `SqlSplitStrategy.STRICT` 严格策略, 仅以语句分隔符`;`作为上一条语句结束的标识 +- `SqlSplitStrategy.LOOSE` 宽松策略, 以语法解析树为基础分割SQL + +两种策略的差异: +如输入SQL为 +```sql +CREATE TABLE tb (id INT) + +SELECT +``` +CREATE语句后未添加分号,那么当获取SELECT后的语义上下文时, +在`SqlSplitStrategy.STRICT`策略下`isStatementBeginning` 为`false`, 因为CREATE语句未以分号结尾,那么会被认为这条语句尚未结束; +在`SqlSplitStrategy.LOOSE`策略下`isStatementBeginning` 为`true`, 因为语法解析树中这条SQL被拆分成了CREATE独立语句与SELECT独立语句。 + +可以通过第三个`options`参数设置策略: +```typescript +hive.getSemanticContextAtCaretPosition(sql, pos, { splitSqlStrategy: SqlSplitStrategy.LOOSE }); +``` + ### 其他 API - `createLexer` 创建一个 Antlr4 Lexer 实例并返回; diff --git a/README.md b/README.md index 7d65aa8d0..30a472b18 100644 --- a/README.md +++ b/README.md @@ -357,6 +357,57 @@ Call the `getAllEntities` method on the SQL instance, and pass in the sql text a Position is not required, if the position is passed, then in the collected entities, if the entity is located under the statement where the corresponding position is located, then the statement object to which the entity belongs will be marked with `isContainCaret`, which can help you quickly filter out the required entities when combined with the code completion function. +### Get semantic context information + +Call the `getSemanticContextAtCaretPosition` method on the SQL instance, passing in the sql text and the line and column numbers at the specified position, for example: + +```typescript +import { HiveSQL } from 'dt-sql-parser'; + +const hive = new HiveSQL(); +const sql = 'SELECT * FROM tb;'; +const pos = { lineNumber: 1, column: 18 }; // after 'tb;' +const semanticContext = hive.getSemanticContextAtCaretPosition(sql, pos); + +console.log(semanticContext); +``` + +*output* + +```typescript +/* +{ + isStatementBeginning: true, +} +*/ +``` + +Currently, the semantic context information that can be collected is as follows. If there are more requirements, please submit an [issue](https://github.com/DTStack/dt-sql-parser/issues). + +- `isStatementBeginning` Whether the current input position is the beginning of a statement + +The **default strategy** for `isStatementBeginning` is `SqlSplitStrategy.STRICT` + +There are two optional strategies: +- `SqlSplitStrategy.STRICT` Strict strategy, only the statement delimiter `;` is used as the identifier for the end of the previous statement +- `SqlSplitStrategy.LOOSE` Loose strategy, based on the syntax parsing tree to split SQL + +The difference between the two strategies: +For example, if the input SQL is: +```sql +CREATE TABLE tb (id INT) + +SELECT +``` +In the `SqlSplitStrategy.STRICT` strategy, `isStatementBeginning` is `false`, because the CREATE statement is not terminated by a semicolon. + +In the `SqlSplitStrategy.LOOSE` strategy, `isStatementBeginning` is `true`, because the syntax parsing tree splits the SQL into two independent statements: CREATE and SELECT. + +You can set the strategy through the third `options` parameter: +```typescript +hive.getSemanticContextAtCaretPosition(sql, pos, { splitSqlStrategy: SqlSplitStrategy.LOOSE }); +``` + ### Other API - `createLexer` Create an instance of Antlr4 Lexer and return it; diff --git a/benchmark/benchmark.config.ts b/benchmark/benchmark.config.ts index f77731eb9..9a1b10c74 100644 --- a/benchmark/benchmark.config.ts +++ b/benchmark/benchmark.config.ts @@ -80,6 +80,11 @@ const testFiles: TestFile[] = [ includes: ['flink'], testTypes: ['getSuggestionAtCaretPosition'], }, + { + name: 'Collect Semantics', + sqlFileName: 'select.sql', + testTypes: ['getSemanticContextAtCaretPosition'], + }, ]; export default { diff --git a/benchmark/data/params.json b/benchmark/data/params.json index 5e4525c15..5ceb91e37 100644 --- a/benchmark/data/params.json +++ b/benchmark/data/params.json @@ -6,5 +6,8 @@ "suggestion_flink": { "getAllEntities": ["$sql", { "lineNumber": 1020, "column": 38 }], "getSuggestionAtCaretPosition": ["$sql", { "lineNumber": 1020, "column": 38 }] + }, + "select": { + "getSemanticContextAtCaretPosition": ["$sql", { "lineNumber": 997, "column": 25 }] } } diff --git a/benchmark_reports/cold_start/flink.benchmark.md b/benchmark_reports/cold_start/flink.benchmark.md index b286177b8..d3764583a 100644 --- a/benchmark_reports/cold_start/flink.benchmark.md +++ b/benchmark_reports/cold_start/flink.benchmark.md @@ -4,16 +4,16 @@ FlinkSQL ### Report Time -2024/9/9 19:55:03 +2024/12/18 14:50:08 ### Device -macOS 14.4.1 +macOS 15.0.1 (8) arm64 Apple M1 Pro 16.00 GB ### Version `nodejs`: v21.6.1 -`dt-sql-parser`: v4.0.2 +`dt-sql-parser`: v4.1.0-beta.0 `antlr4-c3`: v3.3.7 `antlr4ng`: v2.0.11 @@ -21,16 +21,17 @@ macOS 14.4.1 Cold Start ### Report -| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| -|----------------|----------------------------|--------|----------------| -|Query Collection| getAllTokens | 1015 | 227 | -|Query Collection| validate | 1015 | 221 | -| Insert Columns | getAllTokens | 1001 | 65 | -| Insert Columns | validate | 1001 | 65 | -| Create Table | getAllTokens | 1004 | 27 | -| Create Table | validate | 1004 | 26 | -| Split SQL | splitSQLByStatement | 999 | 52 | -|Collect Entities| getAllEntities | 1056 | 141 | -| Suggestion |getSuggestionAtCaretPosition| 1056 | 131 | +| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| +|-----------------|---------------------------------|--------|----------------| +| Query Collection| getAllTokens | 1015 | 257 | +| Query Collection| validate | 1015 | 277 | +| Insert Columns | getAllTokens | 1001 | 66 | +| Insert Columns | validate | 1001 | 67 | +| Create Table | getAllTokens | 1004 | 27 | +| Create Table | validate | 1004 | 28 | +| Split SQL | splitSQLByStatement | 999 | 53 | +| Collect Entities| getAllEntities | 1056 | 191 | +| Suggestion | getSuggestionAtCaretPosition | 1056 | 185 | +|Collect Semantics|getSemanticContextAtCaretPosition| 1015 | 247 | diff --git a/benchmark_reports/cold_start/hive.benchmark.md b/benchmark_reports/cold_start/hive.benchmark.md index 59aef79d0..bd318df84 100644 --- a/benchmark_reports/cold_start/hive.benchmark.md +++ b/benchmark_reports/cold_start/hive.benchmark.md @@ -4,16 +4,16 @@ HiveSQL ### Report Time -2024/9/9 19:55:03 +2024/12/18 14:50:08 ### Device -macOS 14.4.1 +macOS 15.0.1 (8) arm64 Apple M1 Pro 16.00 GB ### Version `nodejs`: v21.6.1 -`dt-sql-parser`: v4.0.2 +`dt-sql-parser`: v4.1.0-beta.0 `antlr4-c3`: v3.3.7 `antlr4ng`: v2.0.11 @@ -21,18 +21,19 @@ macOS 14.4.1 Cold Start ### Report -| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| -|----------------|----------------------------|--------|----------------| -|Query Collection| getAllTokens | 1015 | 185 | -|Query Collection| validate | 1015 | 179 | -| Update Table | getAllTokens | 1011 | 112 | -| Update Table | validate | 1011 | 109 | -| Insert Columns | getAllTokens | 1001 | 329 | -| Insert Columns | validate | 1001 | 329 | -| Create Table | getAllTokens | 1002 | 21 | -| Create Table | validate | 1002 | 20 | -| Split SQL | splitSQLByStatement | 1001 | 72 | -|Collect Entities| getAllEntities | 1066 | 106 | -| Suggestion |getSuggestionAtCaretPosition| 1066 | 100 | +| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| +|-----------------|---------------------------------|--------|----------------| +| Query Collection| getAllTokens | 1015 | 194 | +| Query Collection| validate | 1015 | 194 | +| Update Table | getAllTokens | 1011 | 126 | +| Update Table | validate | 1011 | 119 | +| Insert Columns | getAllTokens | 1001 | 326 | +| Insert Columns | validate | 1001 | 323 | +| Create Table | getAllTokens | 1002 | 21 | +| Create Table | validate | 1002 | 20 | +| Split SQL | splitSQLByStatement | 1001 | 71 | +| Collect Entities| getAllEntities | 1066 | 338 | +| Suggestion | getSuggestionAtCaretPosition | 1066 | 148 | +|Collect Semantics|getSemanticContextAtCaretPosition| 1015 | 201 | diff --git a/benchmark_reports/cold_start/impala.benchmark.md b/benchmark_reports/cold_start/impala.benchmark.md index c9b5f0ed4..4c4cb6f61 100644 --- a/benchmark_reports/cold_start/impala.benchmark.md +++ b/benchmark_reports/cold_start/impala.benchmark.md @@ -4,16 +4,16 @@ ImpalaSQL ### Report Time -2024/9/9 19:55:03 +2024/12/18 14:50:08 ### Device -macOS 14.4.1 +macOS 15.0.1 (8) arm64 Apple M1 Pro 16.00 GB ### Version `nodejs`: v21.6.1 -`dt-sql-parser`: v4.0.2 +`dt-sql-parser`: v4.1.0-beta.0 `antlr4-c3`: v3.3.7 `antlr4ng`: v2.0.11 @@ -21,18 +21,19 @@ macOS 14.4.1 Cold Start ### Report -| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| -|----------------|----------------------------|--------|----------------| -|Query Collection| getAllTokens | 1015 | 71 | -|Query Collection| validate | 1015 | 71 | -| Update Table | getAllTokens | 1011 | 113 | -| Update Table | validate | 1011 | 108 | -| Insert Columns | getAllTokens | 1001 | 208 | -| Insert Columns | validate | 1001 | 213 | -| Create Table | getAllTokens | 1002 | 23 | -| Create Table | validate | 1002 | 23 | -| Split SQL | splitSQLByStatement | 1001 | 65 | -|Collect Entities| getAllEntities | 1066 | 82 | -| Suggestion |getSuggestionAtCaretPosition| 1066 | 83 | +| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| +|-----------------|---------------------------------|--------|----------------| +| Query Collection| getAllTokens | 1015 | 77 | +| Query Collection| validate | 1015 | 72 | +| Update Table | getAllTokens | 1011 | 120 | +| Update Table | validate | 1011 | 121 | +| Insert Columns | getAllTokens | 1001 | 218 | +| Insert Columns | validate | 1001 | 217 | +| Create Table | getAllTokens | 1002 | 25 | +| Create Table | validate | 1002 | 25 | +| Split SQL | splitSQLByStatement | 1001 | 67 | +| Collect Entities| getAllEntities | 1066 | 93 | +| Suggestion | getSuggestionAtCaretPosition | 1066 | 101 | +|Collect Semantics|getSemanticContextAtCaretPosition| 1015 | 80 | diff --git a/benchmark_reports/cold_start/mysql.benchmark.md b/benchmark_reports/cold_start/mysql.benchmark.md index 93bbc7b22..184191377 100644 --- a/benchmark_reports/cold_start/mysql.benchmark.md +++ b/benchmark_reports/cold_start/mysql.benchmark.md @@ -4,16 +4,16 @@ MySQL ### Report Time -2024/9/9 19:55:03 +2024/12/18 14:50:08 ### Device -macOS 14.4.1 +macOS 15.0.1 (8) arm64 Apple M1 Pro 16.00 GB ### Version `nodejs`: v21.6.1 -`dt-sql-parser`: v4.0.2 +`dt-sql-parser`: v4.1.0-beta.0 `antlr4-c3`: v3.3.7 `antlr4ng`: v2.0.11 @@ -21,18 +21,19 @@ macOS 14.4.1 Cold Start ### Report -| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| -|----------------|----------------------------|--------|----------------| -|Query Collection| getAllTokens | 1015 | 1281 | -|Query Collection| validate | 1015 | 1254 | -| Update Table | getAllTokens | 1011 | 876 | -| Update Table | validate | 1011 | 842 | -| Insert Columns | getAllTokens | 1001 | 261 | -| Insert Columns | validate | 1001 | 266 | -| Create Table | getAllTokens | 1002 | 48 | -| Create Table | validate | 1002 | 45 | -| Split SQL | splitSQLByStatement | 1001 | 287 | -|Collect Entities| getAllEntities | 1066 | 474 | -| Suggestion |getSuggestionAtCaretPosition| 1066 | 462 | +| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| +|-----------------|---------------------------------|--------|----------------| +| Query Collection| getAllTokens | 1015 | 1339 | +| Query Collection| validate | 1015 | 1305 | +| Update Table | getAllTokens | 1011 | 860 | +| Update Table | validate | 1011 | 898 | +| Insert Columns | getAllTokens | 1001 | 282 | +| Insert Columns | validate | 1001 | 284 | +| Create Table | getAllTokens | 1002 | 48 | +| Create Table | validate | 1002 | 50 | +| Split SQL | splitSQLByStatement | 1001 | 305 | +| Collect Entities| getAllEntities | 1066 | 653 | +| Suggestion | getSuggestionAtCaretPosition | 1066 | 637 | +|Collect Semantics|getSemanticContextAtCaretPosition| 1015 | 1418 | diff --git a/benchmark_reports/cold_start/postgresql.benchmark.md b/benchmark_reports/cold_start/postgresql.benchmark.md index 2065d5e5e..4a3810e74 100644 --- a/benchmark_reports/cold_start/postgresql.benchmark.md +++ b/benchmark_reports/cold_start/postgresql.benchmark.md @@ -4,16 +4,16 @@ PostgreSQL ### Report Time -2024/9/9 19:55:03 +2024/12/18 14:50:08 ### Device -macOS 14.4.1 +macOS 15.0.1 (8) arm64 Apple M1 Pro 16.00 GB ### Version `nodejs`: v21.6.1 -`dt-sql-parser`: v4.0.2 +`dt-sql-parser`: v4.1.0-beta.0 `antlr4-c3`: v3.3.7 `antlr4ng`: v2.0.11 @@ -21,18 +21,19 @@ macOS 14.4.1 Cold Start ### Report -| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| -|----------------|----------------------------|--------|----------------| -|Query Collection| getAllTokens | 1015 | 1086 | -|Query Collection| validate | 1015 | 1078 | -| Update Table | getAllTokens | 1011 | 1193 | -| Update Table | validate | 1011 | 1183 | -| Insert Columns | getAllTokens | 1001 | 539 | -| Insert Columns | validate | 1001 | 565 | -| Create Table | getAllTokens | 1002 | 294 | -| Create Table | validate | 1002 | 275 | -| Split SQL | splitSQLByStatement | 1001 | 597 | -|Collect Entities| getAllEntities | 1066 | 797 | -| Suggestion |getSuggestionAtCaretPosition| 1066 | 776 | +| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| +|-----------------|---------------------------------|--------|----------------| +| Query Collection| getAllTokens | 1015 | 1008 | +| Query Collection| validate | 1015 | 955 | +| Update Table | getAllTokens | 1011 | 941 | +| Update Table | validate | 1011 | 936 | +| Insert Columns | getAllTokens | 1001 | 534 | +| Insert Columns | validate | 1001 | 547 | +| Create Table | getAllTokens | 1002 | 288 | +| Create Table | validate | 1002 | 288 | +| Split SQL | splitSQLByStatement | 1001 | 522 | +| Collect Entities| getAllEntities | 1066 | 744 | +| Suggestion | getSuggestionAtCaretPosition | 1066 | 719 | +|Collect Semantics|getSemanticContextAtCaretPosition| 1015 | 941 | diff --git a/benchmark_reports/cold_start/spark.benchmark.md b/benchmark_reports/cold_start/spark.benchmark.md index 73da08c66..f2a3b611b 100644 --- a/benchmark_reports/cold_start/spark.benchmark.md +++ b/benchmark_reports/cold_start/spark.benchmark.md @@ -4,16 +4,16 @@ SparkSQL ### Report Time -2024/9/9 19:55:03 +2024/12/18 14:50:08 ### Device -macOS 14.4.1 +macOS 15.0.1 (8) arm64 Apple M1 Pro 16.00 GB ### Version `nodejs`: v21.6.1 -`dt-sql-parser`: v4.0.2 +`dt-sql-parser`: v4.1.0-beta.0 `antlr4-c3`: v3.3.7 `antlr4ng`: v2.0.11 @@ -21,18 +21,19 @@ macOS 14.4.1 Cold Start ### Report -| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| -|----------------|----------------------------|--------|----------------| -|Query Collection| getAllTokens | 1015 | 268 | -|Query Collection| validate | 1015 | 259 | -| Update Table | getAllTokens | 1011 | 232 | -| Update Table | validate | 1011 | 226 | -| Insert Columns | getAllTokens | 1001 | 198 | -| Insert Columns | validate | 1001 | 200 | -| Create Table | getAllTokens | 1002 | 29 | -| Create Table | validate | 1002 | 30 | -| Split SQL | splitSQLByStatement | 1001 | 111 | -|Collect Entities| getAllEntities | 1066 | 170 | -| Suggestion |getSuggestionAtCaretPosition| 1066 | 164 | +| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| +|-----------------|---------------------------------|--------|----------------| +| Query Collection| getAllTokens | 1015 | 287 | +| Query Collection| validate | 1015 | 277 | +| Update Table | getAllTokens | 1011 | 264 | +| Update Table | validate | 1011 | 253 | +| Insert Columns | getAllTokens | 1001 | 216 | +| Insert Columns | validate | 1001 | 213 | +| Create Table | getAllTokens | 1002 | 29 | +| Create Table | validate | 1002 | 30 | +| Split SQL | splitSQLByStatement | 1001 | 132 | +| Collect Entities| getAllEntities | 1066 | 298 | +| Suggestion | getSuggestionAtCaretPosition | 1066 | 263 | +|Collect Semantics|getSemanticContextAtCaretPosition| 1015 | 319 | diff --git a/benchmark_reports/cold_start/trino.benchmark.md b/benchmark_reports/cold_start/trino.benchmark.md index f6be16797..8c62f0b74 100644 --- a/benchmark_reports/cold_start/trino.benchmark.md +++ b/benchmark_reports/cold_start/trino.benchmark.md @@ -4,16 +4,16 @@ TrinoSQL ### Report Time -2024/9/9 19:55:03 +2024/12/18 14:50:08 ### Device -macOS 14.4.1 +macOS 15.0.1 (8) arm64 Apple M1 Pro 16.00 GB ### Version `nodejs`: v21.6.1 -`dt-sql-parser`: v4.0.2 +`dt-sql-parser`: v4.1.0-beta.0 `antlr4-c3`: v3.3.7 `antlr4ng`: v2.0.11 @@ -21,18 +21,19 @@ macOS 14.4.1 Cold Start ### Report -| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| -|----------------|----------------------------|--------|----------------| -|Query Collection| getAllTokens | 1015 | 89 | -|Query Collection| validate | 1015 | 90 | -| Update Table | getAllTokens | 1011 | 109 | -| Update Table | validate | 1011 | 104 | -| Insert Columns | getAllTokens | 1001 | 202 | -| Insert Columns | validate | 1001 | 202 | -| Create Table | getAllTokens | 1002 | 27 | -| Create Table | validate | 1002 | 26 | -| Split SQL | splitSQLByStatement | 1001 | 71 | -|Collect Entities| getAllEntities | 1066 | 101 | -| Suggestion |getSuggestionAtCaretPosition| 1066 | 94 | +| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| +|-----------------|---------------------------------|--------|----------------| +| Query Collection| getAllTokens | 1015 | 91 | +| Query Collection| validate | 1015 | 94 | +| Update Table | getAllTokens | 1011 | 114 | +| Update Table | validate | 1011 | 127 | +| Insert Columns | getAllTokens | 1001 | 214 | +| Insert Columns | validate | 1001 | 204 | +| Create Table | getAllTokens | 1002 | 28 | +| Create Table | validate | 1002 | 28 | +| Split SQL | splitSQLByStatement | 1001 | 72 | +| Collect Entities| getAllEntities | 1066 | 119 | +| Suggestion | getSuggestionAtCaretPosition | 1066 | 117 | +|Collect Semantics|getSemanticContextAtCaretPosition| 1015 | 108 | diff --git a/benchmark_reports/hot_start/flink.benchmark.md b/benchmark_reports/hot_start/flink.benchmark.md index 5d42130f2..472d2dbc1 100644 --- a/benchmark_reports/hot_start/flink.benchmark.md +++ b/benchmark_reports/hot_start/flink.benchmark.md @@ -4,16 +4,16 @@ FlinkSQL ### Report Time -2024/9/9 19:47:32 +2024/12/18 14:59:09 ### Device -macOS 14.4.1 +macOS 15.0.1 (8) arm64 Apple M1 Pro 16.00 GB ### Version `nodejs`: v21.6.1 -`dt-sql-parser`: v4.0.2 +`dt-sql-parser`: v4.1.0-beta.0 `antlr4-c3`: v3.3.7 `antlr4ng`: v2.0.11 @@ -21,16 +21,17 @@ macOS 14.4.1 Hot Start ### Report -| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| -|----------------|----------------------------|--------|----------------| -|Query Collection| getAllTokens | 1015 | 26 | -|Query Collection| validate | 1015 | 29 | -| Insert Columns | getAllTokens | 1001 | 61 | -| Insert Columns | validate | 1001 | 58 | -| Create Table | getAllTokens | 1004 | 16 | -| Create Table | validate | 1004 | 15 | -| Split SQL | splitSQLByStatement | 999 | 22 | -|Collect Entities| getAllEntities | 1056 | 30 | -| Suggestion |getSuggestionAtCaretPosition| 1056 | 28 | +| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| +|-----------------|---------------------------------|--------|----------------| +| Query Collection| getAllTokens | 1015 | 25 | +| Query Collection| validate | 1015 | 25 | +| Insert Columns | getAllTokens | 1001 | 60 | +| Insert Columns | validate | 1001 | 61 | +| Create Table | getAllTokens | 1004 | 18 | +| Create Table | validate | 1004 | 17 | +| Split SQL | splitSQLByStatement | 999 | 22 | +| Collect Entities| getAllEntities | 1056 | 70 | +| Suggestion | getSuggestionAtCaretPosition | 1056 | 70 | +|Collect Semantics|getSemanticContextAtCaretPosition| 1015 | 26 | diff --git a/benchmark_reports/hot_start/hive.benchmark.md b/benchmark_reports/hot_start/hive.benchmark.md index 061a23970..b421ba2e1 100644 --- a/benchmark_reports/hot_start/hive.benchmark.md +++ b/benchmark_reports/hot_start/hive.benchmark.md @@ -4,16 +4,16 @@ HiveSQL ### Report Time -2024/9/9 19:47:32 +2024/12/18 14:59:09 ### Device -macOS 14.4.1 +macOS 15.0.1 (8) arm64 Apple M1 Pro 16.00 GB ### Version `nodejs`: v21.6.1 -`dt-sql-parser`: v4.0.2 +`dt-sql-parser`: v4.1.0-beta.0 `antlr4-c3`: v3.3.7 `antlr4ng`: v2.0.11 @@ -21,18 +21,19 @@ macOS 14.4.1 Hot Start ### Report -| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| -|----------------|----------------------------|--------|----------------| -|Query Collection| getAllTokens | 1015 | 21 | -|Query Collection| validate | 1015 | 20 | -| Update Table | getAllTokens | 1011 | 22 | -| Update Table | validate | 1011 | 22 | -| Insert Columns | getAllTokens | 1001 | 293 | -| Insert Columns | validate | 1001 | 287 | -| Create Table | getAllTokens | 1002 | 12 | -| Create Table | validate | 1002 | 12 | -| Split SQL | splitSQLByStatement | 1001 | 23 | -|Collect Entities| getAllEntities | 1066 | 20 | -| Suggestion |getSuggestionAtCaretPosition| 1066 | 17 | +| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| +|-----------------|---------------------------------|--------|----------------| +| Query Collection| getAllTokens | 1015 | 20 | +| Query Collection| validate | 1015 | 20 | +| Update Table | getAllTokens | 1011 | 21 | +| Update Table | validate | 1011 | 21 | +| Insert Columns | getAllTokens | 1001 | 294 | +| Insert Columns | validate | 1001 | 293 | +| Create Table | getAllTokens | 1002 | 12 | +| Create Table | validate | 1002 | 12 | +| Split SQL | splitSQLByStatement | 1001 | 24 | +| Collect Entities| getAllEntities | 1066 | 233 | +| Suggestion | getSuggestionAtCaretPosition | 1066 | 58 | +|Collect Semantics|getSemanticContextAtCaretPosition| 1015 | 29 | diff --git a/benchmark_reports/hot_start/impala.benchmark.md b/benchmark_reports/hot_start/impala.benchmark.md index 83accc433..f876e0e8e 100644 --- a/benchmark_reports/hot_start/impala.benchmark.md +++ b/benchmark_reports/hot_start/impala.benchmark.md @@ -4,16 +4,16 @@ ImpalaSQL ### Report Time -2024/9/9 19:47:32 +2024/12/18 14:59:09 ### Device -macOS 14.4.1 +macOS 15.0.1 (8) arm64 Apple M1 Pro 16.00 GB ### Version `nodejs`: v21.6.1 -`dt-sql-parser`: v4.0.2 +`dt-sql-parser`: v4.1.0-beta.0 `antlr4-c3`: v3.3.7 `antlr4ng`: v2.0.11 @@ -21,18 +21,19 @@ macOS 14.4.1 Hot Start ### Report -| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| -|----------------|----------------------------|--------|----------------| -|Query Collection| getAllTokens | 1015 | 25 | -|Query Collection| validate | 1015 | 24 | -| Update Table | getAllTokens | 1011 | 24 | -| Update Table | validate | 1011 | 23 | -| Insert Columns | getAllTokens | 1001 | 186 | -| Insert Columns | validate | 1001 | 187 | -| Create Table | getAllTokens | 1002 | 16 | -| Create Table | validate | 1002 | 15 | -| Split SQL | splitSQLByStatement | 1001 | 23 | -|Collect Entities| getAllEntities | 1066 | 21 | -| Suggestion |getSuggestionAtCaretPosition| 1066 | 18 | +| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| +|-----------------|---------------------------------|--------|----------------| +| Query Collection| getAllTokens | 1015 | 27 | +| Query Collection| validate | 1015 | 25 | +| Update Table | getAllTokens | 1011 | 26 | +| Update Table | validate | 1011 | 24 | +| Insert Columns | getAllTokens | 1001 | 190 | +| Insert Columns | validate | 1001 | 191 | +| Create Table | getAllTokens | 1002 | 15 | +| Create Table | validate | 1002 | 14 | +| Split SQL | splitSQLByStatement | 1001 | 22 | +| Collect Entities| getAllEntities | 1066 | 30 | +| Suggestion | getSuggestionAtCaretPosition | 1066 | 27 | +|Collect Semantics|getSemanticContextAtCaretPosition| 1015 | 26 | diff --git a/benchmark_reports/hot_start/mysql.benchmark.md b/benchmark_reports/hot_start/mysql.benchmark.md index b0999ee29..51c9a8bf6 100644 --- a/benchmark_reports/hot_start/mysql.benchmark.md +++ b/benchmark_reports/hot_start/mysql.benchmark.md @@ -4,16 +4,16 @@ MySQL ### Report Time -2024/9/9 19:47:32 +2024/12/18 14:59:09 ### Device -macOS 14.4.1 +macOS 15.0.1 (8) arm64 Apple M1 Pro 16.00 GB ### Version `nodejs`: v21.6.1 -`dt-sql-parser`: v4.0.2 +`dt-sql-parser`: v4.1.0-beta.0 `antlr4-c3`: v3.3.7 `antlr4ng`: v2.0.11 @@ -21,18 +21,19 @@ macOS 14.4.1 Hot Start ### Report -| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| -|----------------|----------------------------|--------|----------------| -|Query Collection| getAllTokens | 1015 | 28 | -|Query Collection| validate | 1015 | 29 | -| Update Table | getAllTokens | 1011 | 26 | -| Update Table | validate | 1011 | 26 | -| Insert Columns | getAllTokens | 1001 | 184 | -| Insert Columns | validate | 1001 | 188 | -| Create Table | getAllTokens | 1002 | 23 | -| Create Table | validate | 1002 | 19 | -| Split SQL | splitSQLByStatement | 1001 | 27 | -|Collect Entities| getAllEntities | 1066 | 29 | -| Suggestion |getSuggestionAtCaretPosition| 1066 | 23 | +| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| +|-----------------|---------------------------------|--------|----------------| +| Query Collection| getAllTokens | 1015 | 29 | +| Query Collection| validate | 1015 | 29 | +| Update Table | getAllTokens | 1011 | 27 | +| Update Table | validate | 1011 | 27 | +| Insert Columns | getAllTokens | 1001 | 181 | +| Insert Columns | validate | 1001 | 184 | +| Create Table | getAllTokens | 1002 | 19 | +| Create Table | validate | 1002 | 20 | +| Split SQL | splitSQLByStatement | 1001 | 28 | +| Collect Entities| getAllEntities | 1066 | 106 | +| Suggestion | getSuggestionAtCaretPosition | 1066 | 75 | +|Collect Semantics|getSemanticContextAtCaretPosition| 1015 | 31 | diff --git a/benchmark_reports/hot_start/postgresql.benchmark.md b/benchmark_reports/hot_start/postgresql.benchmark.md index a4fc43f09..87973b803 100644 --- a/benchmark_reports/hot_start/postgresql.benchmark.md +++ b/benchmark_reports/hot_start/postgresql.benchmark.md @@ -4,16 +4,16 @@ PostgreSQL ### Report Time -2024/9/9 19:47:32 +2024/12/18 14:59:09 ### Device -macOS 14.4.1 +macOS 15.0.1 (8) arm64 Apple M1 Pro 16.00 GB ### Version `nodejs`: v21.6.1 -`dt-sql-parser`: v4.0.2 +`dt-sql-parser`: v4.1.0-beta.0 `antlr4-c3`: v3.3.7 `antlr4ng`: v2.0.11 @@ -21,18 +21,19 @@ macOS 14.4.1 Hot Start ### Report -| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| -|----------------|----------------------------|--------|----------------| -|Query Collection| getAllTokens | 1015 | 36 | -|Query Collection| validate | 1015 | 37 | -| Update Table | getAllTokens | 1011 | 32 | -| Update Table | validate | 1011 | 31 | -| Insert Columns | getAllTokens | 1001 | 213 | -| Insert Columns | validate | 1001 | 214 | -| Create Table | getAllTokens | 1002 | 18 | -| Create Table | validate | 1002 | 19 | -| Split SQL | splitSQLByStatement | 1001 | 30 | -|Collect Entities| getAllEntities | 1066 | 31 | -| Suggestion |getSuggestionAtCaretPosition| 1066 | 24 | +| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| +|-----------------|---------------------------------|--------|----------------| +| Query Collection| getAllTokens | 1015 | 40 | +| Query Collection| validate | 1015 | 44 | +| Update Table | getAllTokens | 1011 | 33 | +| Update Table | validate | 1011 | 34 | +| Insert Columns | getAllTokens | 1001 | 223 | +| Insert Columns | validate | 1001 | 230 | +| Create Table | getAllTokens | 1002 | 21 | +| Create Table | validate | 1002 | 20 | +| Split SQL | splitSQLByStatement | 1001 | 29 | +| Collect Entities| getAllEntities | 1066 | 41 | +| Suggestion | getSuggestionAtCaretPosition | 1066 | 40 | +|Collect Semantics|getSemanticContextAtCaretPosition| 1015 | 41 | diff --git a/benchmark_reports/hot_start/spark.benchmark.md b/benchmark_reports/hot_start/spark.benchmark.md index f859015b4..344e4495c 100644 --- a/benchmark_reports/hot_start/spark.benchmark.md +++ b/benchmark_reports/hot_start/spark.benchmark.md @@ -4,16 +4,16 @@ SparkSQL ### Report Time -2024/9/9 19:47:32 +2024/12/18 14:59:09 ### Device -macOS 14.4.1 +macOS 15.0.1 (8) arm64 Apple M1 Pro 16.00 GB ### Version `nodejs`: v21.6.1 -`dt-sql-parser`: v4.0.2 +`dt-sql-parser`: v4.1.0-beta.0 `antlr4-c3`: v3.3.7 `antlr4ng`: v2.0.11 @@ -21,18 +21,19 @@ macOS 14.4.1 Hot Start ### Report -| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| -|----------------|----------------------------|--------|----------------| -|Query Collection| getAllTokens | 1015 | 24 | -|Query Collection| validate | 1015 | 28 | -| Update Table | getAllTokens | 1011 | 22 | -| Update Table | validate | 1011 | 22 | -| Insert Columns | getAllTokens | 1001 | 167 | -| Insert Columns | validate | 1001 | 172 | -| Create Table | getAllTokens | 1002 | 13 | -| Create Table | validate | 1002 | 14 | -| Split SQL | splitSQLByStatement | 1001 | 23 | -|Collect Entities| getAllEntities | 1066 | 30 | -| Suggestion |getSuggestionAtCaretPosition| 1066 | 24 | +| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| +|-----------------|---------------------------------|--------|----------------| +| Query Collection| getAllTokens | 1015 | 24 | +| Query Collection| validate | 1015 | 24 | +| Update Table | getAllTokens | 1011 | 23 | +| Update Table | validate | 1011 | 22 | +| Insert Columns | getAllTokens | 1001 | 172 | +| Insert Columns | validate | 1001 | 168 | +| Create Table | getAllTokens | 1002 | 13 | +| Create Table | validate | 1002 | 16 | +| Split SQL | splitSQLByStatement | 1001 | 25 | +| Collect Entities| getAllEntities | 1066 | 127 | +| Suggestion | getSuggestionAtCaretPosition | 1066 | 118 | +|Collect Semantics|getSemanticContextAtCaretPosition| 1015 | 27 | diff --git a/benchmark_reports/hot_start/trino.benchmark.md b/benchmark_reports/hot_start/trino.benchmark.md index b14e5fd06..b950d9a30 100644 --- a/benchmark_reports/hot_start/trino.benchmark.md +++ b/benchmark_reports/hot_start/trino.benchmark.md @@ -4,16 +4,16 @@ TrinoSQL ### Report Time -2024/9/9 19:47:32 +2024/12/18 14:59:09 ### Device -macOS 14.4.1 +macOS 15.0.1 (8) arm64 Apple M1 Pro 16.00 GB ### Version `nodejs`: v21.6.1 -`dt-sql-parser`: v4.0.2 +`dt-sql-parser`: v4.1.0-beta.0 `antlr4-c3`: v3.3.7 `antlr4ng`: v2.0.11 @@ -21,18 +21,19 @@ macOS 14.4.1 Hot Start ### Report -| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| -|----------------|----------------------------|--------|----------------| -|Query Collection| getAllTokens | 1015 | 24 | -|Query Collection| validate | 1015 | 24 | -| Update Table | getAllTokens | 1011 | 23 | -| Update Table | validate | 1011 | 25 | -| Insert Columns | getAllTokens | 1001 | 181 | -| Insert Columns | validate | 1001 | 182 | -| Create Table | getAllTokens | 1002 | 17 | -| Create Table | validate | 1002 | 16 | -| Split SQL | splitSQLByStatement | 1001 | 24 | -|Collect Entities| getAllEntities | 1066 | 21 | -| Suggestion |getSuggestionAtCaretPosition| 1066 | 19 | +| Benchmark Name | Method Name |SQL Rows|Average Time(ms)| +|-----------------|---------------------------------|--------|----------------| +| Query Collection| getAllTokens | 1015 | 27 | +| Query Collection| validate | 1015 | 28 | +| Update Table | getAllTokens | 1011 | 25 | +| Update Table | validate | 1011 | 30 | +| Insert Columns | getAllTokens | 1001 | 193 | +| Insert Columns | validate | 1001 | 189 | +| Create Table | getAllTokens | 1002 | 16 | +| Create Table | validate | 1002 | 16 | +| Split SQL | splitSQLByStatement | 1001 | 25 | +| Collect Entities| getAllEntities | 1066 | 33 | +| Suggestion | getSuggestionAtCaretPosition | 1066 | 30 | +|Collect Semantics|getSemanticContextAtCaretPosition| 1015 | 27 | diff --git a/src/parser/common/basicSQL.ts b/src/parser/common/basicSQL.ts index ccf36e72b..2cc236d99 100644 --- a/src/parser/common/basicSQL.ts +++ b/src/parser/common/basicSQL.ts @@ -14,12 +14,19 @@ import { CandidatesCollection, CodeCompletionCore } from 'antlr4-c3'; import { SQLParserBase } from '../../lib/SQLParserBase'; import { findCaretTokenIndex } from './findCaretTokenIndex'; import { ctxToText, tokenToWord, WordRange, TextSlice } from './textAndWord'; -import { CaretPosition, LOCALE_TYPE, Suggestions, SyntaxSuggestion } from './types'; +import { + CaretPosition, + LOCALE_TYPE, + SemanticCollectOptions, + Suggestions, + SyntaxSuggestion, +} from './types'; import { ParseError, ErrorListener } from './parseErrorListener'; import { ErrorStrategy } from './errorStrategy'; import type { SplitListener } from './splitListener'; import type { EntityCollector } from './entityCollector'; import { EntityContext } from './entityCollector'; +import SemanticContextCollector from './semanticContextCollector'; /** * Basic SQL class, every sql needs extends it. @@ -95,6 +102,16 @@ export abstract class BasicSQL< public locale: LOCALE_TYPE = 'en_US'; + /** + * Get a new semanticContextCollector instance. + */ + protected abstract createSemanticContextCollector( + input: string, + caretPosition: CaretPosition, + allTokens: Token[], + options?: SemanticCollectOptions + ): SemanticContextCollector; + /** * Create an antlr4 lexer from input. * @param input string @@ -452,4 +469,29 @@ export abstract class BasicSQL< return collectListener.getEntities(); } + + /** + * Get semantic context infos + * @param input source string + * @param caretPosition caret position, such as cursor position + * @param options semantic context options + * @returns analyzed semantic context + */ + public getSemanticContextAtCaretPosition( + input: string, + caretPosition: CaretPosition, + options?: SemanticCollectOptions + ) { + const allTokens = this.getAllTokens(input); + const parseTree = this.parseWithCache(input); + const statementContextListener = this.createSemanticContextCollector( + input, + caretPosition, + allTokens, + options + ); + this.listen(statementContextListener, parseTree); + + return statementContextListener.semanticContext; + } } diff --git a/src/parser/common/semanticContextCollector.ts b/src/parser/common/semanticContextCollector.ts new file mode 100644 index 000000000..23e109bda --- /dev/null +++ b/src/parser/common/semanticContextCollector.ts @@ -0,0 +1,268 @@ +import { ErrorNode, ParserRuleContext, TerminalNode, Token } from 'antlr4ng'; +import { findCaretTokenIndex } from '../common/findCaretTokenIndex'; +import { + CaretPosition, + SemanticCollectOptions, + SemanticContext, + SqlSplitStrategy, +} from '../common/types'; + +export const SQL_SPLIT_SYMBOL_TEXT = ';'; + +abstract class SemanticContextCollector { + constructor( + _input: string, + caretPosition: CaretPosition, + allTokens: Token[], + options?: SemanticCollectOptions + ) { + // If caretPosition token is whiteSpace, tokenIndex may be undefined. + const tokenIndex = findCaretTokenIndex(caretPosition, allTokens); + + if (tokenIndex !== undefined) { + this._tokenIndex = tokenIndex; + } + this._allTokens = allTokens; + this.options = { + ...this.options, + ...options, + }; + + if (allTokens?.length) { + let i = tokenIndex ? tokenIndex - 1 : allTokens.length - 1; + /** + * Link to @case4 and @case5 + * Find the previous unhidden token. + * If can't find tokenIndex or current token is whiteSpace at caretPosition, + * prevTokenIndex is useful to help us determine if it is beginning of statement. + */ + while (i >= 0) { + if ( + allTokens[i].channel !== Token.HIDDEN_CHANNEL && + (allTokens[i].line < caretPosition.lineNumber || + (allTokens[i].line === caretPosition.lineNumber && + allTokens[i].column < caretPosition.column)) + ) { + this._prevTokenIndex = allTokens[i].tokenIndex; + break; + } + i--; + } + + /** + * We can directly conclude beginning of statement semantics when current token is + * the first token of tokenStream or the previous token is semicolon + */ + if ( + tokenIndex === 0 || + i === -1 || + (this._prevTokenIndex && + this._allTokens[this._prevTokenIndex].text === SQL_SPLIT_SYMBOL_TEXT) + ) { + this._isStatementBeginning = true; + } + } + } + + public readonly options: SemanticCollectOptions = { + sqlSplitStrategy: SqlSplitStrategy.STRICT, + }; + + private _tokenIndex: number; + private _allTokens: Token[] = []; + + /** + * If current caret position is in a beginning of statement semantics, it needs to follow some cases: + * @case1 there is no statement node with an error before the current statement in the parse tree; + * + * @case2 if it is an uncomplete keyword, it will be parsed as an `ErrorNode` + * and need be a direct child node of `program`; + * + * @case3 if it is a complete keyword, the parsed TerminalNode or ErrorNode should be + * the first leaf node of current statement rule; + * + * @case4 if it is whiteSpace in caret position, we can't visit it in antlr4 listener, + * so we find the first unhidden token before the whiteSpace token, and the unhidden token + * should be the last leaf node of statement its belongs to; + * + * @case5 if the previous token is split symbol like `;`, ignore case1 and forcefully judged as beginning of statement. + */ + private _isStatementBeginning: boolean = false; + + /** + * Prev tokenIndex that not white space before current tokenIndex or caret position + */ + private _prevTokenIndex: number; + + public get semanticContext(): SemanticContext { + return { + isStatementBeginning: this._isStatementBeginning, + }; + } + + abstract getWhiteSpaceRuleType(): number; + + abstract getStatementRuleType(): number; + + private prevStatementHasError(node: TerminalNode | ErrorNode | ParserRuleContext) { + let parent = node.parent as ParserRuleContext; + if (!parent) return false; + + const currentNodeIndex = parent.children!.findIndex((child) => child === node); + if (currentNodeIndex <= 0) return false; + + for (let i = currentNodeIndex - 1; i >= 0; i--) { + const prevNode = parent.children![i]; + if ( + prevNode instanceof ErrorNode || + (prevNode instanceof ParserRuleContext && prevNode.exception !== null) + ) + return true; + } + + return false; + } + + /** + * Most root rule is `program`. + */ + private isRootRule(node: TerminalNode | ErrorNode | ParserRuleContext) { + return node instanceof ParserRuleContext && node?.parent === null; + } + + /** + * link to @case4 + * It should be called in each language's own `enterStatement`. + */ + protected visitStatement(ctx: ParserRuleContext) { + if (this.options.sqlSplitStrategy === SqlSplitStrategy.STRICT) return; + + const isWhiteSpaceToken = + this._tokenIndex === undefined || + this._allTokens[this._tokenIndex]?.type === this.getWhiteSpaceRuleType() || + // PostgreSQL whiteSpace not inlcudes '\n' symbol + this._allTokens[this._tokenIndex]?.text === '\n'; + + const isPrevTokenEndOfStatement = + this._prevTokenIndex && ctx.stop?.tokenIndex === this._prevTokenIndex; + + if (isWhiteSpaceToken && isPrevTokenEndOfStatement && ctx.exception === null) { + this._isStatementBeginning = !this.prevStatementHasError(ctx) + ? true + : this._isStatementBeginning; + } + } + + /** + * Uncomplete keyword will be error node + */ + visitErrorNode(node: ErrorNode): void { + if ( + node.symbol.tokenIndex !== this._tokenIndex || + this._isStatementBeginning || + this.options.sqlSplitStrategy === SqlSplitStrategy.STRICT + ) + return; + + let parent: ParserRuleContext | null = node.parent as ParserRuleContext; + let currentNode: TerminalNode | ParserRuleContext = node; + + /** + * Link to @case2 + * The error node is a direct child node of the program node + */ + if (this.isRootRule(parent)) { + this._isStatementBeginning = !this.prevStatementHasError(currentNode); + return; + } + + /** + * Link to @case3 + * Error node must be the first leaf node of the statement parse tree. + **/ + while (parent !== null && parent.ruleIndex !== this.getStatementRuleType()) { + if (parent.children?.[0] !== currentNode) { + this._isStatementBeginning = false; + return; + } + + currentNode = parent; + parent = currentNode.parent; + } + + let isStatementBeginning = true; + + /** + * Link to @case1 + * Previous statement must have no exception + */ + if (parent?.ruleIndex === this.getStatementRuleType()) { + const programRule = parent.parent; + const currentStatementRuleIndex = + programRule?.children?.findIndex((node) => node === parent) || -1; + if (currentStatementRuleIndex > 0) { + /** + * When you typed a keyword and doesn't match any rule, you will get a EOF error, + * For example, just typed 'CREATE', 'INSERT'. + */ + const isStatementEOF = parent.exception?.offendingToken?.text === ''; + isStatementBeginning = + this.prevStatementHasError(parent) && !isStatementEOF + ? false + : isStatementBeginning; + } + } + + this._isStatementBeginning = isStatementBeginning; + } + + visitTerminal(node: TerminalNode): void { + if ( + node.symbol.tokenIndex !== this._tokenIndex || + this._isStatementBeginning || + this.options.sqlSplitStrategy === SqlSplitStrategy.STRICT + ) + return; + + let currentNode: TerminalNode | ParserRuleContext = node; + let parent = node.parent as ParserRuleContext | null; + + /** + * Link to @case3 + * Current terminal node must be the first leaf node of the statement parse tree. + **/ + while (parent !== null && parent.ruleIndex !== this.getStatementRuleType()) { + if (parent.children?.[0] !== currentNode) { + this._isStatementBeginning = false; + return; + } + + currentNode = parent; + parent = currentNode.parent!; + } + + let isStatementBeginning = true; + + /** + * Link to @case1 + * Previous statement must have no exception + */ + if (parent?.ruleIndex === this.getStatementRuleType()) { + const programRule = parent.parent; + const currentStatementRuleIndex = + programRule?.children?.findIndex((node) => node === parent) || -1; + if (currentStatementRuleIndex > 0) { + isStatementBeginning = this.prevStatementHasError(parent) + ? false + : isStatementBeginning; + } + } + + this._isStatementBeginning = isStatementBeginning; + } + + enterEveryRule(_node: ParserRuleContext): void {} + exitEveryRule(_node: ParserRuleContext): void {} +} + +export default SemanticContextCollector; diff --git a/src/parser/common/types.ts b/src/parser/common/types.ts index ffb7bdb65..8d2d2e67f 100644 --- a/src/parser/common/types.ts +++ b/src/parser/common/types.ts @@ -69,3 +69,31 @@ export interface Suggestions { } export type LOCALE_TYPE = 'zh_CN' | 'en_US'; + +export interface SemanticContext { + isStatementBeginning: boolean; +} + +export enum SqlSplitStrategy { + /** Only end the statement with semicolon symbol */ + STRICT, + /** Based on parse tree to split statements */ + LOOSE, +} + +export interface SemanticCollectOptions { + /** + * `sqlSplitStrategy` will affects the result of `isStatementBeginning`; + * + * For example: + * + * The sql is "select id from t1 create\" + * + * - `SqlSplitStrategy.STRICT`: split symbol `;` is missing after select statement so that it considerd as one statement, and `isStatementBeginning` is false + * + * - `SqlSplitStrategy.LOOSE`: in parse tree, it will parse to "select id from t1" and "create" two single statement, so `isStatementBeginning` is true + * + * @default SqlSplitStrategy.STRICT + */ + sqlSplitStrategy?: SqlSplitStrategy; +} diff --git a/src/parser/flink/flinkSemanticContextCollector.ts b/src/parser/flink/flinkSemanticContextCollector.ts new file mode 100644 index 000000000..2a73c14fe --- /dev/null +++ b/src/parser/flink/flinkSemanticContextCollector.ts @@ -0,0 +1,20 @@ +import { FlinkSqlParserListener } from '../../lib'; +import { FlinkSqlParser, SingleStatementContext } from '../../lib/flink/FlinkSqlParser'; +import SemanticContextCollector from '../common/semanticContextCollector'; + +class FlinkSemanticContextCollector + extends SemanticContextCollector + implements FlinkSqlParserListener +{ + override getWhiteSpaceRuleType(): number { + return FlinkSqlParser.SPACE; + } + override getStatementRuleType(): number { + return FlinkSqlParser.RULE_singleStatement; + } + enterSingleStatement(ctx: SingleStatementContext) { + this.visitStatement(ctx); + } +} + +export { FlinkSemanticContextCollector }; diff --git a/src/parser/flink/index.ts b/src/parser/flink/index.ts index 17c6af3d1..54f1203b4 100644 --- a/src/parser/flink/index.ts +++ b/src/parser/flink/index.ts @@ -3,13 +3,20 @@ import { CharStream, CommonTokenStream, Token } from 'antlr4ng'; import { processTokenCandidates } from '../common/tokenUtils'; import { FlinkSqlLexer } from '../../lib/flink/FlinkSqlLexer'; import { FlinkSqlParser, ProgramContext } from '../../lib/flink/FlinkSqlParser'; +import { + CaretPosition, + EntityContextType, + SemanticCollectOptions, + Suggestions, + SyntaxSuggestion, +} from '../common/types'; import { BasicSQL } from '../common/basicSQL'; import { StmtContextType } from '../common/entityCollector'; import { ErrorListener } from '../common/parseErrorListener'; -import { EntityContextType, Suggestions, SyntaxSuggestion } from '../common/types'; import { FlinkEntityCollector } from './flinkEntityCollector'; import { FlinkErrorListener } from './flinkErrorListener'; import { FlinkSqlSplitListener } from './flinkSplitListener'; +import { FlinkSemanticContextCollector } from './flinkSemanticContextCollector'; export { FlinkEntityCollector, FlinkSqlSplitListener }; @@ -48,6 +55,15 @@ export class FlinkSQL extends BasicSQL { return new MySqlEntityCollector(input, allTokens, caretTokenIndex); } + protected createSemanticContextCollector( + input: string, + caretPosition: CaretPosition, + allTokens: Token[], + options?: SemanticCollectOptions + ) { + return new MySqlSemanticContextCollector(input, caretPosition, allTokens, options); + } + protected processCandidates( candidates: CandidatesCollection, allTokens: Token[], diff --git a/src/parser/mysql/mysqlSemanticContextCollector.ts b/src/parser/mysql/mysqlSemanticContextCollector.ts new file mode 100644 index 000000000..27b3a9ec9 --- /dev/null +++ b/src/parser/mysql/mysqlSemanticContextCollector.ts @@ -0,0 +1,20 @@ +import { MySqlParserListener } from '../../lib'; +import { MySqlParser, SingleStatementContext } from '../../lib/mysql/MySqlParser'; +import SemanticContextCollector from '../common/semanticContextCollector'; + +class MySqlSemanticContextCollector + extends SemanticContextCollector + implements MySqlParserListener +{ + override getWhiteSpaceRuleType(): number { + return MySqlParser.SPACE; + } + override getStatementRuleType(): number { + return MySqlParser.RULE_singleStatement; + } + enterSingleStatement(ctx: SingleStatementContext) { + this.visitStatement(ctx); + } +} + +export { MySqlSemanticContextCollector }; diff --git a/src/parser/postgresql/index.ts b/src/parser/postgresql/index.ts index 779610926..03fc659fd 100644 --- a/src/parser/postgresql/index.ts +++ b/src/parser/postgresql/index.ts @@ -4,13 +4,20 @@ import { processTokenCandidates } from '../common/tokenUtils'; import { PostgreSqlLexer } from '../../lib/postgresql/PostgreSqlLexer'; import { PostgreSqlParser, ProgramContext } from '../../lib/postgresql/PostgreSqlParser'; +import { + CaretPosition, + EntityContextType, + SemanticCollectOptions, + Suggestions, + SyntaxSuggestion, +} from '../common/types'; import { BasicSQL } from '../common/basicSQL'; import { StmtContextType } from '../common/entityCollector'; import { ErrorListener } from '../common/parseErrorListener'; -import { EntityContextType, Suggestions, SyntaxSuggestion } from '../common/types'; import { PostgreSqlEntityCollector } from './postgreEntityCollector'; import { PostgreSqlErrorListener } from './postgreErrorListener'; import { PostgreSqlSplitListener } from './postgreSplitListener'; +import { PostgreSemanticContextCollector } from './postgreSemanticContextCollector'; export { PostgreSqlEntityCollector, PostgreSqlSplitListener }; @@ -52,6 +59,15 @@ export class PostgreSQL extends BasicSQL = new Set([ TrinoSqlParser.RULE_catalogRef, TrinoSqlParser.RULE_catalogNameCreate, diff --git a/src/parser/trino/trinoSemanticContextCollector.ts b/src/parser/trino/trinoSemanticContextCollector.ts new file mode 100644 index 000000000..3ba9cb5ff --- /dev/null +++ b/src/parser/trino/trinoSemanticContextCollector.ts @@ -0,0 +1,17 @@ +import { TrinoSqlListener } from '../../lib'; +import { StatementsContext, TrinoSqlParser } from '../../lib/trino/TrinoSqlParser'; +import SemanticContextCollector from '../common/semanticContextCollector'; + +class TrinoSemanticContextCollector extends SemanticContextCollector implements TrinoSqlListener { + override getWhiteSpaceRuleType(): number { + return TrinoSqlParser.WS; + } + override getStatementRuleType(): number { + return TrinoSqlParser.RULE_statements; + } + enterStatements(ctx: StatementsContext) { + this.visitStatement(ctx); + } +} + +export { TrinoSemanticContextCollector }; diff --git a/test/helper.ts b/test/helper.ts index 146bdc362..05868f4f9 100644 --- a/test/helper.ts +++ b/test/helper.ts @@ -32,6 +32,67 @@ export const readSQL = (dirname: string, fileName: string) => { return result; }; +/** + * Read a sql string with special range + * @param range line and column start from 1 + */ +export const readSQLByRange = ( + sqlSource: { + dirname?: string; + fileName?: string; + sql?: string; + }, + range: { startLine: number; endLine: number; startColumn?: number; endColumn?: number } +) => { + const { dirname, fileName, sql } = sqlSource; + const { startLine, endLine, startColumn, endColumn } = range; + + if (endLine < startLine) throw new RangeError('endLine must greater or equal than startLine!'); + if (!sql && (!dirname || !fileName)) + throw new Error('A sql input or file info params is required!'); + + const content = + sql !== undefined + ? sql + : fs.readFileSync(path.join(dirname, 'fixtures', fileName), 'utf-8'); + let index = 0; + let middleText = ''; + let startLineText = ''; + let endLineText = ''; + let currLine = 1; + + while (index < content.length && currLine <= endLine) { + const char = content[index]; + if (char === '\n') { + currLine++; + } + + if (currLine === startLine) { + // The line break at the beginning needs to be discarded. + if (!(char === '\n' && startLineText === '')) { + startLineText += char; + } + } else if ( + currLine > startLine && + (currLine < endLine || (currLine === endLine && char === '\n')) + ) { + middleText += char; + } else if (currLine === endLine && startLine !== endLine) { + endLineText += char; + } + + index++; + } + + startLineText = startLineText.slice( + startColumn !== undefined ? startColumn - 1 : 0, + endLine === startLine && endColumn !== undefined ? endColumn - 1 : undefined + ); + endLineText = endLineText.slice(0, endColumn !== undefined ? endColumn - 1 : undefined); + + return startLineText + middleText + endLineText; +}; + export function commentOtherLine(sqlContent: string, line: number) { const slices = sqlContent.split('\n').map((item, index) => { if (index !== line - 1) { diff --git a/test/parser/flink/contextCollect/fixtures/semantic.sql b/test/parser/flink/contextCollect/fixtures/semantic.sql new file mode 100644 index 000000000..233da1b8e --- /dev/null +++ b/test/parser/flink/contextCollect/fixtures/semantic.sql @@ -0,0 +1,35 @@ +CREA + +CREATE + +INSERT INTO t1 SEL + +INSERT INTO t1 SELECT + +INSERT + +SELECT id FROM t1; +CRE + +SELECT id FROM t2; + + +CREATE TABLE t1 ( + id INT, +) WITH ( + 'connector' = 'kafka', +) +SEL + +CREATE; SEL + +CREATE TABLE a1(id INT) WITH ('connector' = 'kafka'); +SEL +INSERT INTO t1 VALUES(1); + +CREATE TABLE a1(id INT) WITH ('connector' = 'kafka'); +CREATE TABLE +INSERT INTO t1 VALUES(1); + +CREATE TABLE a1(id INT) WITH ('connector' = 'kafka') +CREATE \ No newline at end of file diff --git a/test/parser/flink/contextCollect/semanticContextCollector.test.ts b/test/parser/flink/contextCollect/semanticContextCollector.test.ts new file mode 100644 index 000000000..e02e8c8ae --- /dev/null +++ b/test/parser/flink/contextCollect/semanticContextCollector.test.ts @@ -0,0 +1,146 @@ +import fs from 'fs'; +import path from 'path'; +import { SqlSplitStrategy } from 'src/parser/common/types'; +import { FlinkSQL } from 'src/parser/flink'; +import { readSQLByRange } from 'test/helper'; + +describe('Flink semantic context collector tests', () => { + const flinkSql = new FlinkSQL(); + const text = fs.readFileSync(path.join(__dirname, 'fixtures', 'semantic.sql'), 'utf-8'); + + test('beginning of statement with uncomplete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 1, endLine: 1 }); + const { isStatementBeginning } = flinkSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 5, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('beginning of statement with complete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 3, endLine: 3 }); + const { isStatementBeginning } = flinkSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 7, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('not beginning of statement with uncomplete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 5, endLine: 5 }); + const { isStatementBeginning } = flinkSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 22, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('not beginning of statement with complete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 7, endLine: 7 }); + const { isStatementBeginning } = flinkSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 22, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('not beginning of statement if type white space after keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 9, endLine: 9 }); + const { isStatementBeginning } = flinkSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 8, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('beginning of statement after an exists statement', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 11, endLine: 12 }); + const { isStatementBeginning } = flinkSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 4, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('beginning of statement after an exists statement and typed white space', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 14, endLine: 15 }); + const { isStatementBeginning } = flinkSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 2, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('not beginning of statement if previous statement exists error', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 17, endLine: 22 }); + const { isStatementBeginning } = flinkSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 6, + column: 4, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('beginning of statement if previous token text is semicolon even if has error', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 24, endLine: 24 }); + // typed keyword + const ctx1 = flinkSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 13, + }); + expect(ctx1.isStatementBeginning).toBeTruthy(); + + // typed white space + const ctx2 = flinkSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 9, + }); + expect(ctx2.isStatementBeginning).toBeTruthy(); + }); + + test('beginning of statement between two statement', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 26, endLine: 28 }); + const { isStatementBeginning } = flinkSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 4, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('not beginning of statement between two statement', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 30, endLine: 32 }); + const { isStatementBeginning } = flinkSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 13, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('test sqlSplitStrategy', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 34, endLine: 35 }); + const { isStatementBeginning: isStatementBeginning1 } = + flinkSql.getSemanticContextAtCaretPosition( + sql, + { + lineNumber: 2, + column: 7, + }, + { + sqlSplitStrategy: SqlSplitStrategy.LOOSE, + } + ); + expect(isStatementBeginning1).toBeTruthy(); + + const { isStatementBeginning: isStatementBeginning2 } = + flinkSql.getSemanticContextAtCaretPosition( + sql, + { + lineNumber: 2, + column: 7, + }, + { + sqlSplitStrategy: SqlSplitStrategy.STRICT, + } + ); + expect(isStatementBeginning2).toBeFalsy(); + }); +}); diff --git a/test/parser/hive/contextCollect/fixtures/semantic.sql b/test/parser/hive/contextCollect/fixtures/semantic.sql new file mode 100644 index 000000000..f75cfb8e3 --- /dev/null +++ b/test/parser/hive/contextCollect/fixtures/semantic.sql @@ -0,0 +1,31 @@ +CREA + +CREATE + +CREATE TABLE a A + +CREATE TABLE a AS + +INSERT + +SELECT id FROM t1; +CRE + +SELECT id FROM t2; + + +CREATE TABLE IF NOT EXISTS a1(id INT, ) +SEL + +CREATE; SEL + +CREATE TABLE a1(id INT); +SEL +INSERT INTO t1 VALUES(1); + +CREATE TABLE a1(id INT); +CREATE VIEW +INSERT INTO t1 VALUES(1); + +CREATE TABLE a1(id INT) +CREATE VIEW diff --git a/test/parser/hive/contextCollect/semanticContextCollector.test.ts b/test/parser/hive/contextCollect/semanticContextCollector.test.ts new file mode 100644 index 000000000..5c2854444 --- /dev/null +++ b/test/parser/hive/contextCollect/semanticContextCollector.test.ts @@ -0,0 +1,146 @@ +import fs from 'fs'; +import path from 'path'; +import { SqlSplitStrategy } from 'src/parser/common/types'; +import { HiveSQL } from 'src/parser/hive'; +import { readSQLByRange } from 'test/helper'; + +describe('Hive semantic context collector tests', () => { + const hiveSql = new HiveSQL(); + const text = fs.readFileSync(path.join(__dirname, 'fixtures', 'semantic.sql'), 'utf-8'); + + test('beginning of statement with uncomplete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 1, endLine: 1 }); + const { isStatementBeginning } = hiveSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 5, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('beginning of statement with complete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 3, endLine: 3 }); + const { isStatementBeginning } = hiveSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 7, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('not beginning of statement with uncomplete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 5, endLine: 5 }); + const { isStatementBeginning } = hiveSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 22, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('not beginning of statement with complete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 7, endLine: 7 }); + const { isStatementBeginning } = hiveSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 18, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('not beginning of statement if type white space after keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 9, endLine: 9 }); + const { isStatementBeginning } = hiveSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 8, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('beginning of statement after an exists statement', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 11, endLine: 12 }); + const { isStatementBeginning } = hiveSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 4, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('beginning of statement after an exists statement and typed white space', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 14, endLine: 15 }); + const { isStatementBeginning } = hiveSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 2, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('not beginning of statement if previous statement exists error', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 17, endLine: 18 }); + const { isStatementBeginning } = hiveSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 4, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('beginning of statement if previous token text is semicolon even if has error', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 20, endLine: 20 }); + // typed keyword + const ctx1 = hiveSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 13, + }); + expect(ctx1.isStatementBeginning).toBeTruthy(); + + // typed white space + const ctx2 = hiveSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 9, + }); + expect(ctx2.isStatementBeginning).toBeTruthy(); + }); + + test('beginning of statement between two statement', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 22, endLine: 24 }); + const { isStatementBeginning } = hiveSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 4, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('not beginning of statement between two statement', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 26, endLine: 28 }); + const { isStatementBeginning } = hiveSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 12, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('test sqlSplitStrategy', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 30, endLine: 31 }); + const { isStatementBeginning: isStatementBeginning1 } = + hiveSql.getSemanticContextAtCaretPosition( + sql, + { + lineNumber: 2, + column: 7, + }, + { + sqlSplitStrategy: SqlSplitStrategy.LOOSE, + } + ); + expect(isStatementBeginning1).toBeTruthy(); + + const { isStatementBeginning: isStatementBeginning2 } = + hiveSql.getSemanticContextAtCaretPosition( + sql, + { + lineNumber: 2, + column: 7, + }, + { + sqlSplitStrategy: SqlSplitStrategy.STRICT, + } + ); + expect(isStatementBeginning2).toBeFalsy(); + }); +}); diff --git a/test/parser/impala/contextCollect/fixtures/semantic.sql b/test/parser/impala/contextCollect/fixtures/semantic.sql new file mode 100644 index 000000000..9dfc27172 --- /dev/null +++ b/test/parser/impala/contextCollect/fixtures/semantic.sql @@ -0,0 +1,31 @@ +CREA + +CREATE + +CREATE TABLE a AS SEL + +CREATE TABLE a AS + +INSERT + +SELECT id FROM t1; +CRE + +SELECT id FROM t2; + + +CREATE TABLE IF NOT EXISTS a1(id INT, ) +SEL + +CREATE; SEL + +CREATE TABLE a1(id INT); +SEL +INSERT INTO t1 VALUES(1); + +CREATE TABLE a1(id INT); +CREATE VIEW +INSERT INTO t1 VALUES(1); + +CREATE TABLE a1(id INT) +CREATE VIEW \ No newline at end of file diff --git a/test/parser/impala/contextCollect/semanticContextCollector.test.ts b/test/parser/impala/contextCollect/semanticContextCollector.test.ts new file mode 100644 index 000000000..ac7a13fd6 --- /dev/null +++ b/test/parser/impala/contextCollect/semanticContextCollector.test.ts @@ -0,0 +1,146 @@ +import fs from 'fs'; +import path from 'path'; +import { SqlSplitStrategy } from 'src/parser/common/types'; +import { ImpalaSQL } from 'src/parser/impala'; +import { readSQLByRange } from 'test/helper'; + +describe('Impala semantic context collector tests', () => { + const impalaSql = new ImpalaSQL(); + const text = fs.readFileSync(path.join(__dirname, 'fixtures', 'semantic.sql'), 'utf-8'); + + test('beginning of statement with uncomplete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 1, endLine: 1 }); + const { isStatementBeginning } = impalaSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 5, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('beginning of statement with complete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 3, endLine: 3 }); + const { isStatementBeginning } = impalaSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 7, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('not beginning of statement with uncomplete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 5, endLine: 5 }); + const { isStatementBeginning } = impalaSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 17, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('not beginning of statement with complete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 7, endLine: 7 }); + const { isStatementBeginning } = impalaSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 18, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('not beginning of statement if type white space after keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 9, endLine: 9 }); + const { isStatementBeginning } = impalaSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 8, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('beginning of statement after an exists statement', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 11, endLine: 12 }); + const { isStatementBeginning } = impalaSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 4, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('beginning of statement after an exists statement and typed white space', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 14, endLine: 15 }); + const { isStatementBeginning } = impalaSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 2, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('not beginning of statement if previous statement exists error', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 17, endLine: 18 }); + const { isStatementBeginning } = impalaSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 4, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('beginning of statement if previous token text is semicolon even if has error', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 20, endLine: 20 }); + // typed keyword + const ctx1 = impalaSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 13, + }); + expect(ctx1.isStatementBeginning).toBeTruthy(); + + // typed white space + const ctx2 = impalaSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 9, + }); + expect(ctx2.isStatementBeginning).toBeTruthy(); + }); + + test('beginning of statement between two statement', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 22, endLine: 24 }); + const { isStatementBeginning } = impalaSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 4, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('not beginning of statement between two statement', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 26, endLine: 28 }); + const { isStatementBeginning } = impalaSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 12, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('test sqlSplitStrategy', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 30, endLine: 31 }); + const { isStatementBeginning: isStatementBeginning1 } = + impalaSql.getSemanticContextAtCaretPosition( + sql, + { + lineNumber: 2, + column: 7, + }, + { + sqlSplitStrategy: SqlSplitStrategy.LOOSE, + } + ); + expect(isStatementBeginning1).toBeTruthy(); + + const { isStatementBeginning: isStatementBeginning2 } = + impalaSql.getSemanticContextAtCaretPosition( + sql, + { + lineNumber: 2, + column: 7, + }, + { + sqlSplitStrategy: SqlSplitStrategy.STRICT, + } + ); + expect(isStatementBeginning2).toBeFalsy(); + }); +}); diff --git a/test/parser/mysql/contextCollect/fixtures/semantic.sql b/test/parser/mysql/contextCollect/fixtures/semantic.sql new file mode 100644 index 000000000..0e255e192 --- /dev/null +++ b/test/parser/mysql/contextCollect/fixtures/semantic.sql @@ -0,0 +1,31 @@ +CREA + +CREATE + +INSERT IN + +CREATE TABLE a AS + +INSERT + +SELECT id FROM t1; +CRE + +SELECT id FROM t2; + + +CREATE TABLE IF NOT EXISTS a1(id INT, ) +SEL + +CREATE; SEL + +CREATE TABLE a1(id INT); +SEL +INSERT INTO t1 VALUES(1); + +CREATE TABLE a1(id INT); +CREATE VIEW +INSERT INTO t1 VALUES(1); + +CREATE TABLE a1(id INT) +CREATE VIEW \ No newline at end of file diff --git a/test/parser/mysql/contextCollect/semanticContextCollector.test.ts b/test/parser/mysql/contextCollect/semanticContextCollector.test.ts new file mode 100644 index 000000000..f281c5b48 --- /dev/null +++ b/test/parser/mysql/contextCollect/semanticContextCollector.test.ts @@ -0,0 +1,146 @@ +import fs from 'fs'; +import path from 'path'; +import { SqlSplitStrategy } from 'src/parser/common/types'; +import { MySQL } from 'src/parser/mysql'; +import { readSQLByRange } from 'test/helper'; + +describe('MySQL semantic context collector tests', () => { + const mySQL = new MySQL(); + const text = fs.readFileSync(path.join(__dirname, 'fixtures', 'semantic.sql'), 'utf-8'); + + test('beginning of statement with uncomplete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 1, endLine: 1 }); + const { isStatementBeginning } = mySQL.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 5, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('beginning of statement with complete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 3, endLine: 3 }); + const { isStatementBeginning } = mySQL.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 7, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('not beginning of statement with uncomplete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 5, endLine: 5 }); + const { isStatementBeginning } = mySQL.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 10, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('not beginning of statement with complete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 7, endLine: 7 }); + const { isStatementBeginning } = mySQL.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 18, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('not beginning of statement if type white space after keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 9, endLine: 9 }); + const { isStatementBeginning } = mySQL.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 8, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('beginning of statement after an exists statement', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 11, endLine: 12 }); + const { isStatementBeginning } = mySQL.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 4, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('beginning of statement after an exists statement and typed white space', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 14, endLine: 15 }); + const { isStatementBeginning } = mySQL.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 2, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('not beginning of statement if previous statement exists error', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 17, endLine: 18 }); + const { isStatementBeginning } = mySQL.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 4, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('beginning of statement if previous token text is semicolon even if has error', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 20, endLine: 20 }); + // typed keyword + const ctx1 = mySQL.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 13, + }); + expect(ctx1.isStatementBeginning).toBeTruthy(); + + // typed white space + const ctx2 = mySQL.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 9, + }); + expect(ctx2.isStatementBeginning).toBeTruthy(); + }); + + test('beginning of statement between two statement', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 22, endLine: 24 }); + const { isStatementBeginning } = mySQL.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 4, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('not beginning of statement between two statement', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 26, endLine: 28 }); + const { isStatementBeginning } = mySQL.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 12, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('test sqlSplitStrategy', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 30, endLine: 31 }); + const { isStatementBeginning: isStatementBeginning1 } = + mySQL.getSemanticContextAtCaretPosition( + sql, + { + lineNumber: 2, + column: 7, + }, + { + sqlSplitStrategy: SqlSplitStrategy.LOOSE, + } + ); + expect(isStatementBeginning1).toBeTruthy(); + + const { isStatementBeginning: isStatementBeginning2 } = + mySQL.getSemanticContextAtCaretPosition( + sql, + { + lineNumber: 2, + column: 7, + }, + { + sqlSplitStrategy: SqlSplitStrategy.STRICT, + } + ); + expect(isStatementBeginning2).toBeFalsy(); + }); +}); diff --git a/test/parser/postgresql/contextCollect/fixtures/semantic.sql b/test/parser/postgresql/contextCollect/fixtures/semantic.sql new file mode 100644 index 000000000..279f09b0f --- /dev/null +++ b/test/parser/postgresql/contextCollect/fixtures/semantic.sql @@ -0,0 +1,31 @@ +CREA + +CREATE + +ALTER TAB + +ALTER TABLE + +INSERT + +SELECT id FROM t1; +CRE + +SELECT id FROM t2; + + +CREATE TABLE IF NOT EXISTS a1(id INT, ) +SEL + +CREATE; SEL + +CREATE TABLE a1(id INT); +SEL +INSERT INTO t1 VALUES(1); + +CREATE TABLE a1(id INT); +CREATE VIEW +INSERT INTO t1 VALUES(1); + +CREATE TABLE a1(id INT) +CREATE VIEW \ No newline at end of file diff --git a/test/parser/postgresql/contextCollect/semanticContextCollector.test.ts b/test/parser/postgresql/contextCollect/semanticContextCollector.test.ts new file mode 100644 index 000000000..ff3774e35 --- /dev/null +++ b/test/parser/postgresql/contextCollect/semanticContextCollector.test.ts @@ -0,0 +1,146 @@ +import fs from 'fs'; +import path from 'path'; +import { SqlSplitStrategy } from 'src/parser/common/types'; +import { PostgreSQL } from 'src/parser/postgresql'; +import { readSQLByRange } from 'test/helper'; + +describe('PostgreSQL semantic context collector tests', () => { + const postgreSql = new PostgreSQL(); + const text = fs.readFileSync(path.join(__dirname, 'fixtures', 'semantic.sql'), 'utf-8'); + + test('beginning of statement with uncomplete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 1, endLine: 1 }); + const { isStatementBeginning } = postgreSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 5, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('beginning of statement with complete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 3, endLine: 3 }); + const { isStatementBeginning } = postgreSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 7, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('not beginning of statement with uncomplete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 5, endLine: 5 }); + const { isStatementBeginning } = postgreSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 10, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('not beginning of statement with complete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 7, endLine: 7 }); + const { isStatementBeginning } = postgreSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 12, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('not beginning of statement if type white space after keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 9, endLine: 9 }); + const { isStatementBeginning } = postgreSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 8, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('beginning of statement after an exists statement', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 11, endLine: 12 }); + const { isStatementBeginning } = postgreSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 4, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('beginning of statement after an exists statement and typed white space', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 14, endLine: 15 }); + const { isStatementBeginning } = postgreSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 2, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('not beginning of statement if previous statement exists error', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 17, endLine: 18 }); + const { isStatementBeginning } = postgreSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 4, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('beginning of statement if previous token text is semicolon even if has error', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 20, endLine: 20 }); + // typed keyword + const ctx1 = postgreSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 13, + }); + expect(ctx1.isStatementBeginning).toBeTruthy(); + + // typed white space + const ctx2 = postgreSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 9, + }); + expect(ctx2.isStatementBeginning).toBeTruthy(); + }); + + test('beginning of statement between two statement', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 22, endLine: 24 }); + const { isStatementBeginning } = postgreSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 4, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('not beginning of statement between two statement', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 26, endLine: 28 }); + const { isStatementBeginning } = postgreSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 12, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('test sqlSplitStrategy', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 30, endLine: 31 }); + const { isStatementBeginning: isStatementBeginning1 } = + postgreSql.getSemanticContextAtCaretPosition( + sql, + { + lineNumber: 2, + column: 7, + }, + { + sqlSplitStrategy: SqlSplitStrategy.LOOSE, + } + ); + expect(isStatementBeginning1).toBeTruthy(); + + const { isStatementBeginning: isStatementBeginning2 } = + postgreSql.getSemanticContextAtCaretPosition( + sql, + { + lineNumber: 2, + column: 7, + }, + { + sqlSplitStrategy: SqlSplitStrategy.STRICT, + } + ); + expect(isStatementBeginning2).toBeFalsy(); + }); +}); diff --git a/test/parser/spark/contextCollect/fixtures/semantic.sql b/test/parser/spark/contextCollect/fixtures/semantic.sql new file mode 100644 index 000000000..88dbd568f --- /dev/null +++ b/test/parser/spark/contextCollect/fixtures/semantic.sql @@ -0,0 +1,31 @@ +CREA + +CREATE + +CREATE TABLE a A + +CREATE TABLE a AS + +INSERT + +SELECT id FROM t1; +CRE + +SELECT id FROM t2; + + +CREATE TABLE IF NOT EXISTS a1(id INT, ) +SEL + +CREATE; SEL + +CREATE TABLE a1(id INT); +SEL +INSERT INTO t1 VALUES(1); + +CREATE TABLE a1(id INT); +CREATE VIEW +INSERT INTO t1 VALUES(1); + +CREATE TABLE a1(id INT) +CREATE VIEW \ No newline at end of file diff --git a/test/parser/spark/contextCollect/semanticContextCollector.test.ts b/test/parser/spark/contextCollect/semanticContextCollector.test.ts new file mode 100644 index 000000000..ca9878bfb --- /dev/null +++ b/test/parser/spark/contextCollect/semanticContextCollector.test.ts @@ -0,0 +1,146 @@ +import fs from 'fs'; +import path from 'path'; +import { SqlSplitStrategy } from 'src/parser/common/types'; +import { SparkSQL } from 'src/parser/spark'; +import { readSQLByRange } from 'test/helper'; + +describe('Spark semantic context collector tests', () => { + const sparkSql = new SparkSQL(); + const text = fs.readFileSync(path.join(__dirname, 'fixtures', 'semantic.sql'), 'utf-8'); + + test('beginning of statement with uncomplete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 1, endLine: 1 }); + const { isStatementBeginning } = sparkSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 5, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('beginning of statement with complete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 3, endLine: 3 }); + const { isStatementBeginning } = sparkSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 7, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('not beginning of statement with uncomplete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 5, endLine: 5 }); + const { isStatementBeginning } = sparkSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 22, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('not beginning of statement with complete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 7, endLine: 7 }); + const { isStatementBeginning } = sparkSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 18, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('not beginning of statement if type white space after keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 9, endLine: 9 }); + const { isStatementBeginning } = sparkSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 8, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('beginning of statement after an exists statement', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 11, endLine: 12 }); + const { isStatementBeginning } = sparkSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 4, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('beginning of statement after an exists statement and typed white space', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 14, endLine: 15 }); + const { isStatementBeginning } = sparkSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 2, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('not beginning of statement if previous statement exists error', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 17, endLine: 18 }); + const { isStatementBeginning } = sparkSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 4, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('beginning of statement if previous token text is semicolon even if has error', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 20, endLine: 20 }); + // typed keyword + const ctx1 = sparkSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 13, + }); + expect(ctx1.isStatementBeginning).toBeTruthy(); + + // typed white space + const ctx2 = sparkSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 9, + }); + expect(ctx2.isStatementBeginning).toBeTruthy(); + }); + + test('beginning of statement between two statement', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 22, endLine: 24 }); + const { isStatementBeginning } = sparkSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 4, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('not beginning of statement between two statement', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 26, endLine: 28 }); + const { isStatementBeginning } = sparkSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 12, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('test sqlSplitStrategy', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 30, endLine: 31 }); + const { isStatementBeginning: isStatementBeginning1 } = + sparkSql.getSemanticContextAtCaretPosition( + sql, + { + lineNumber: 2, + column: 7, + }, + { + sqlSplitStrategy: SqlSplitStrategy.LOOSE, + } + ); + expect(isStatementBeginning1).toBeTruthy(); + + const { isStatementBeginning: isStatementBeginning2 } = + sparkSql.getSemanticContextAtCaretPosition( + sql, + { + lineNumber: 2, + column: 7, + }, + { + sqlSplitStrategy: SqlSplitStrategy.STRICT, + } + ); + expect(isStatementBeginning2).toBeFalsy(); + }); +}); diff --git a/test/parser/trino/contextCollect/fixtures/semantic.sql b/test/parser/trino/contextCollect/fixtures/semantic.sql new file mode 100644 index 000000000..88dbd568f --- /dev/null +++ b/test/parser/trino/contextCollect/fixtures/semantic.sql @@ -0,0 +1,31 @@ +CREA + +CREATE + +CREATE TABLE a A + +CREATE TABLE a AS + +INSERT + +SELECT id FROM t1; +CRE + +SELECT id FROM t2; + + +CREATE TABLE IF NOT EXISTS a1(id INT, ) +SEL + +CREATE; SEL + +CREATE TABLE a1(id INT); +SEL +INSERT INTO t1 VALUES(1); + +CREATE TABLE a1(id INT); +CREATE VIEW +INSERT INTO t1 VALUES(1); + +CREATE TABLE a1(id INT) +CREATE VIEW \ No newline at end of file diff --git a/test/parser/trino/contextCollect/semanticContextCollector.test.ts b/test/parser/trino/contextCollect/semanticContextCollector.test.ts new file mode 100644 index 000000000..540a1216a --- /dev/null +++ b/test/parser/trino/contextCollect/semanticContextCollector.test.ts @@ -0,0 +1,146 @@ +import fs from 'fs'; +import path from 'path'; +import { SqlSplitStrategy } from 'src/parser/common/types'; +import { TrinoSQL } from 'src/parser/trino'; +import { readSQLByRange } from 'test/helper'; + +describe('Trino semantic context collector tests', () => { + const trinoSql = new TrinoSQL(); + const text = fs.readFileSync(path.join(__dirname, 'fixtures', 'semantic.sql'), 'utf-8'); + + test('beginning of statement with uncomplete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 1, endLine: 1 }); + const { isStatementBeginning } = trinoSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 5, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('beginning of statement with complete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 3, endLine: 3 }); + const { isStatementBeginning } = trinoSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 7, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('not beginning of statement with uncomplete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 5, endLine: 5 }); + const { isStatementBeginning } = trinoSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 22, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('not beginning of statement with complete keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 7, endLine: 7 }); + const { isStatementBeginning } = trinoSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 18, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('not beginning of statement if type white space after keyword', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 9, endLine: 9 }); + const { isStatementBeginning } = trinoSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 8, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('beginning of statement after an exists statement', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 11, endLine: 12 }); + const { isStatementBeginning } = trinoSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 4, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('beginning of statement after an exists statement and typed white space', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 14, endLine: 15 }); + const { isStatementBeginning } = trinoSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 2, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('not beginning of statement if previous statement exists error', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 17, endLine: 18 }); + const { isStatementBeginning } = trinoSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 4, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('beginning of statement if previous token text is semicolon even if has error', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 20, endLine: 20 }); + // typed keyword + const ctx1 = trinoSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 13, + }); + expect(ctx1.isStatementBeginning).toBeTruthy(); + + // typed white space + const ctx2 = trinoSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 1, + column: 9, + }); + expect(ctx2.isStatementBeginning).toBeTruthy(); + }); + + test('beginning of statement between two statement', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 22, endLine: 24 }); + const { isStatementBeginning } = trinoSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 4, + }); + expect(isStatementBeginning).toBeTruthy(); + }); + + test('not beginning of statement between two statement', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 26, endLine: 28 }); + const { isStatementBeginning } = trinoSql.getSemanticContextAtCaretPosition(sql, { + lineNumber: 2, + column: 12, + }); + expect(isStatementBeginning).toBeFalsy(); + }); + + test('test sqlSplitStrategy', () => { + const sql = readSQLByRange({ sql: text }, { startLine: 30, endLine: 31 }); + const { isStatementBeginning: isStatementBeginning1 } = + trinoSql.getSemanticContextAtCaretPosition( + sql, + { + lineNumber: 2, + column: 7, + }, + { + sqlSplitStrategy: SqlSplitStrategy.LOOSE, + } + ); + expect(isStatementBeginning1).toBeTruthy(); + + const { isStatementBeginning: isStatementBeginning2 } = + trinoSql.getSemanticContextAtCaretPosition( + sql, + { + lineNumber: 2, + column: 7, + }, + { + sqlSplitStrategy: SqlSplitStrategy.STRICT, + } + ); + expect(isStatementBeginning2).toBeFalsy(); + }); +});