Skip to content

Commit a0cfbf4

Browse files
committed
Adding spider2 schema parsing.
1 parent 6fa2786 commit a0cfbf4

File tree

6 files changed

+284
-0
lines changed

6 files changed

+284
-0
lines changed

t2sql/repo3/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
# Thirdparty repos
22

33
`data` directory contains database from spider and spider2.
4+
- `data/spider` spider dataset, unzipped here
5+
- `data/spider2` spider2-lite databases, unzipped here
46
`Spider2` is the spider2 git repo.

t2sql/t2sql/common/common.go

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
package common
2+
3+
import (
4+
"database/sql"
5+
"flag"
6+
"fmt"
7+
"path/filepath"
8+
"runtime"
9+
10+
_ "github.com/go-sql-driver/mysql"
11+
_ "github.com/mattn/go-sqlite3"
12+
)
13+
14+
var Suit string
15+
var SuitDb string
16+
var MoHost string
17+
var MoUser string
18+
var MoPasswd string
19+
var MoDb string
20+
21+
func ParseArgs() {
22+
flag.StringVar(&Suit, "suit", "spider2", "t2sql suit")
23+
flag.StringVar(&SuitDb, "suitdb", "", "t2sql suit database, default to ALL")
24+
flag.StringVar(&MoHost, "h", "127.0.0.1:6001", "mo database host:port")
25+
flag.StringVar(&MoUser, "u", "dump", "mo database user")
26+
flag.StringVar(&MoPasswd, "p", "111", "mo database password")
27+
flag.StringVar(&MoDb, "d", "mysql", "initial database to conenct to")
28+
flag.Parse()
29+
}
30+
31+
func ProjectRoot() string {
32+
_, file, _, _ := runtime.Caller(0)
33+
dir := filepath.Dir(file)
34+
root, _ := filepath.Abs(filepath.Join(dir, "../.."))
35+
return root
36+
}
37+
38+
type TableInfo struct {
39+
Name string // table name
40+
Sql string // create table sql statement
41+
}
42+
43+
type DbInfo struct {
44+
Name string // database name
45+
SchemaSql string // schema.sql file path
46+
SqlLite string // sqlite file path
47+
TableInfos []TableInfo
48+
}
49+
50+
func LoadDbInfo() (map[string]*DbInfo, error) {
51+
switch Suit {
52+
case "spider2":
53+
return Spider2LoadDbInfo()
54+
default:
55+
return nil, fmt.Errorf("unknown suit: %s", Suit)
56+
}
57+
}
58+
59+
func OpenMoDB() (*sql.DB, error) {
60+
return sql.Open("mysql", fmt.Sprintf("%s:%s@tcp(%s)/%s", MoUser, MoPasswd, MoHost, MoDb))
61+
}
62+
63+
func OpenSqliteDB(name string) (*sql.DB, error) {
64+
return sql.Open("sqlite3", name)
65+
}
66+
67+
func MustExec(db *sql.DB, sql string, args ...interface{}) {
68+
_, err := db.Exec(sql, args...)
69+
if err != nil {
70+
panic(err)
71+
}
72+
}

t2sql/t2sql/common/common_test.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package common
2+
3+
import (
4+
"strings"
5+
"testing"
6+
)
7+
8+
func TestProjectRoot(t *testing.T) {
9+
root := ProjectRoot()
10+
if !strings.HasSuffix(root, "mojo/t2sql") {
11+
t.Errorf("ProjectRoot() = %s, want %s", root, "mojo/t2sql")
12+
}
13+
}

t2sql/t2sql/common/spider2.go

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
package common
2+
3+
import (
4+
"encoding/json"
5+
"fmt"
6+
"os"
7+
"path/filepath"
8+
"strings"
9+
)
10+
11+
type ColInfo struct {
12+
TableName string `json:"table_name"`
13+
Colnames []string `json:"column_names"`
14+
Coltypes []string `json:"column_types"`
15+
Description []string `json:"description"`
16+
}
17+
18+
func loadOneDbInfo(dbInfoDir string, sqliteDir string, dbName string) (*DbInfo, error) {
19+
dbDir := filepath.Join(dbInfoDir, dbName)
20+
files, err := os.ReadDir(dbDir)
21+
if err != nil {
22+
return nil, fmt.Errorf("failed to read dbDir: %s, %w", dbDir, err)
23+
}
24+
25+
dbInfo := DbInfo{
26+
Name: dbName,
27+
SqlLite: filepath.Join(sqliteDir, dbName+".sqlite"),
28+
}
29+
30+
for _, file := range files {
31+
if strings.HasSuffix(file.Name(), ".json") {
32+
jsonFile := filepath.Join(dbDir, file.Name())
33+
jsonData, err := os.ReadFile(jsonFile)
34+
if err != nil {
35+
return nil, fmt.Errorf("cannot read json file: %s, %w", jsonFile, err)
36+
}
37+
38+
// replace NaN with null, NaN is not a valid json value
39+
jsonData = []byte(strings.ReplaceAll(string(jsonData), ": NaN", ": null"))
40+
41+
var colInfo ColInfo
42+
if err := json.Unmarshal(jsonData, &colInfo); err != nil {
43+
return nil, fmt.Errorf("cannot unmarshal json file: %s, %w", jsonFile, err)
44+
}
45+
46+
var tableInfo TableInfo
47+
tableInfo.Name = colInfo.TableName
48+
tableInfo.Sql = fmt.Sprintf("CREATE TABLE %s (", tableInfo.Name)
49+
50+
for i, colname := range colInfo.Colnames {
51+
coltype := colInfo.Coltypes[i]
52+
switch colname {
53+
case "rank":
54+
colname = fmt.Sprintf("%s_rank", tableInfo.Name)
55+
}
56+
if coltype == "" {
57+
coltype = "text"
58+
}
59+
if coltype == "NUM" {
60+
coltype = "float"
61+
}
62+
63+
tableInfo.Sql += fmt.Sprintf("%s %s", colname, coltype)
64+
if i == len(colInfo.Colnames)-1 {
65+
tableInfo.Sql += ");\n"
66+
} else {
67+
tableInfo.Sql += ",\n"
68+
}
69+
}
70+
dbInfo.TableInfos = append(dbInfo.TableInfos, tableInfo)
71+
}
72+
}
73+
return &dbInfo, nil
74+
}
75+
76+
func Spider2LoadDbInfo() (map[string]*DbInfo, error) {
77+
dbInfos := make(map[string]*DbInfo)
78+
79+
rootDir := ProjectRoot()
80+
dbInfoDir := filepath.Join(rootDir, "repo3/Spider2/spider2-lite/resource/databases/sqlite")
81+
sqliteDir := filepath.Join(rootDir, "repo3/data/spider2")
82+
files, err := os.ReadDir(dbInfoDir)
83+
if err != nil {
84+
return nil, fmt.Errorf("failed to read dbInfoDir: %s, %w", dbInfoDir, err)
85+
}
86+
87+
for _, file := range files {
88+
dbInfo, err := loadOneDbInfo(dbInfoDir, sqliteDir, file.Name())
89+
if err != nil {
90+
return nil, err
91+
}
92+
dbInfos[dbInfo.Name] = dbInfo
93+
}
94+
95+
return dbInfos, nil
96+
}

t2sql/t2sql/common/spider2_test.go

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
package common
2+
3+
import (
4+
"database/sql"
5+
"testing"
6+
)
7+
8+
func TestLoadDbInfo(t *testing.T) {
9+
dbInfos, err := Spider2LoadDbInfo()
10+
if err != nil {
11+
t.Fatal(err)
12+
}
13+
14+
t.Logf("Total DBs: %d\n", len(dbInfos))
15+
16+
for _, dbInfo := range dbInfos {
17+
t.Logf("DB: %s\n", dbInfo.Name)
18+
for _, tableInfo := range dbInfo.TableInfos {
19+
t.Logf(" Table: %s\n", tableInfo.Name)
20+
}
21+
}
22+
}
23+
24+
func TestSqliteRead(t *testing.T) {
25+
dbInfos, err := Spider2LoadDbInfo()
26+
f1db := dbInfos["f1"]
27+
if f1db == nil {
28+
t.Fatal("f1db not found")
29+
}
30+
31+
t.Logf("Opening sqlite database: %s\n", f1db.SqlLite)
32+
sqliteDB, err := OpenSqliteDB(f1db.SqlLite)
33+
if err != nil {
34+
t.Fatal(err)
35+
}
36+
defer sqliteDB.Close()
37+
38+
rows, err := sqliteDB.Query("SELECT * FROM drivers LIMIT 10")
39+
if err != nil {
40+
t.Fatal(err)
41+
}
42+
defer rows.Close()
43+
44+
for rows.Next() {
45+
data := make([]sql.NullString, 10)
46+
datap := make([]any, 10)
47+
for i := 0; i < 10; i++ {
48+
datap[i] = &data[i]
49+
}
50+
51+
err = rows.Scan(datap...)
52+
if err != nil {
53+
t.Fatal(err)
54+
}
55+
t.Logf("data: %v\n", data)
56+
}
57+
}
58+
59+
func TestCreateMoTables(t *testing.T) {
60+
ParseArgs()
61+
62+
t.Logf("MoHost: %s\n", MoHost)
63+
t.Logf("MoUser: %s\n", MoUser)
64+
t.Logf("MoPasswd: %s\n", MoPasswd)
65+
t.Logf("MoDb: %s\n", MoDb)
66+
67+
dbInfos, err := Spider2LoadDbInfo()
68+
if err != nil {
69+
t.Fatal(err)
70+
}
71+
72+
f1db := dbInfos["f1"]
73+
if f1db == nil {
74+
t.Fatal("f1db not found")
75+
}
76+
77+
mo, err := OpenMoDB()
78+
if err != nil {
79+
t.Fatal(err)
80+
}
81+
defer mo.Close()
82+
83+
MustExec(mo, "DROP DATABASE IF EXISTS f1")
84+
MustExec(mo, "CREATE DATABASE f1")
85+
MustExec(mo, "USE f1")
86+
87+
for _, tableInfo := range f1db.TableInfos {
88+
_, err = mo.Exec(tableInfo.Sql)
89+
if err != nil {
90+
t.Logf("Error creating table %s:\n%s\n", tableInfo.Name, tableInfo.Sql)
91+
t.Fatal(err)
92+
}
93+
}
94+
}

t2sql/t2sql/go.mod

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
11
module github.com/matrixorigin/mojo/t2sql/t2sql
22

33
go 1.25.0
4+
5+
require (
6+
github.com/go-sql-driver/mysql v1.9.3
7+
github.com/mattn/go-sqlite3 v1.14.32
8+
)
9+
10+
require filippo.io/edwards25519 v1.1.0 // indirect

0 commit comments

Comments
 (0)