Skip to content

Commit 09a051d

Browse files
committed
Schema cleaning.
1 parent a0cfbf4 commit 09a051d

File tree

3 files changed

+83
-33
lines changed

3 files changed

+83
-33
lines changed

t2sql/t2sql/common/common.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,16 @@ func ProjectRoot() string {
3535
return root
3636
}
3737

38+
type ColInfo struct {
39+
Name string
40+
Type string
41+
Description string
42+
}
43+
3844
type TableInfo struct {
39-
Name string // table name
40-
Sql string // create table sql statement
45+
Name string // table name
46+
Sql string // create table sql statement
47+
ColInfos []ColInfo
4148
}
4249

4350
type DbInfo struct {

t2sql/t2sql/common/spider2.go

Lines changed: 71 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import (
88
"strings"
99
)
1010

11-
type ColInfo struct {
11+
type ColInfoParsed struct {
1212
TableName string `json:"table_name"`
1313
Colnames []string `json:"column_names"`
1414
Coltypes []string `json:"column_types"`
@@ -38,34 +38,71 @@ func loadOneDbInfo(dbInfoDir string, sqliteDir string, dbName string) (*DbInfo,
3838
// replace NaN with null, NaN is not a valid json value
3939
jsonData = []byte(strings.ReplaceAll(string(jsonData), ": NaN", ": null"))
4040

41-
var colInfo ColInfo
42-
if err := json.Unmarshal(jsonData, &colInfo); err != nil {
41+
var colParse ColInfoParsed
42+
if err := json.Unmarshal(jsonData, &colParse); err != nil {
4343
return nil, fmt.Errorf("cannot unmarshal json file: %s, %w", jsonFile, err)
4444
}
4545

4646
var tableInfo TableInfo
47-
tableInfo.Name = colInfo.TableName
48-
tableInfo.Sql = fmt.Sprintf("CREATE TABLE %s (", tableInfo.Name)
47+
tableInfo.Name = colParse.TableName
48+
switch strings.ToLower(tableInfo.Name) {
49+
case "match":
50+
tableInfo.Name = "match_table"
51+
}
4952

50-
for i, colname := range colInfo.Colnames {
51-
coltype := colInfo.Coltypes[i]
53+
tableInfo.Sql = fmt.Sprintf("CREATE TABLE %s (", tableInfo.Name)
54+
for i, colname := range colParse.Colnames {
55+
coltype := colParse.Coltypes[i]
5256
switch colname {
53-
case "rank":
54-
colname = fmt.Sprintf("%s_rank", tableInfo.Name)
57+
case "rank", "index", "table", "column", "group", "range",
58+
"cross", "change":
59+
colname = fmt.Sprintf("%s_%s", tableInfo.Name, colname)
5560
}
56-
if coltype == "" {
57-
coltype = "text"
61+
62+
if strings.HasPrefix(colname, "Unnamed: ") {
63+
colname = fmt.Sprintf("unnamed_%s", colname[len("Unnamed: "):])
64+
}
65+
66+
colname = strings.ReplaceAll(colname, "%", "percent")
67+
colname = strings.ReplaceAll(colname, "/", "_over_")
68+
colname = strings.ReplaceAll(colname, "(", "_")
69+
colname = strings.ReplaceAll(colname, ")", "_")
70+
colname = strings.ReplaceAll(colname, "-", "_")
71+
72+
if strings.HasSuffix(colname, "%") {
73+
colname = fmt.Sprintf("%s_percent", colname[:len(colname)-1])
5874
}
59-
if coltype == "NUM" {
75+
76+
switch coltype {
77+
case "NUM":
6078
coltype = "float"
79+
case "", "BLOB SUB_TYPE TEXT", "point":
80+
coltype = "text"
81+
case "jsonb":
82+
coltype = "json"
83+
case "timestamp with time zone":
84+
coltype = "timestamp"
85+
}
86+
87+
if strings.HasPrefix(strings.ToLower(coltype), "nvarchar") {
88+
coltype = "text"
89+
}
90+
91+
if strings.HasPrefix(strings.ToLower(coltype), "character") {
92+
coltype = "varchar(255)"
6193
}
6294

6395
tableInfo.Sql += fmt.Sprintf("%s %s", colname, coltype)
64-
if i == len(colInfo.Colnames)-1 {
96+
if i == len(colParse.Colnames)-1 {
6597
tableInfo.Sql += ");\n"
6698
} else {
6799
tableInfo.Sql += ",\n"
68100
}
101+
tableInfo.ColInfos = append(tableInfo.ColInfos, ColInfo{
102+
Name: colname,
103+
Type: coltype,
104+
Description: colParse.Description[i],
105+
})
69106
}
70107
dbInfo.TableInfos = append(dbInfo.TableInfos, tableInfo)
71108
}
@@ -94,3 +131,24 @@ func Spider2LoadDbInfo() (map[string]*DbInfo, error) {
94131

95132
return dbInfos, nil
96133
}
134+
135+
func Spider2CreateMoTables(dbInfo *DbInfo) error {
136+
dbName := dbInfo.Name
137+
mo, err := OpenMoDB()
138+
if err != nil {
139+
return fmt.Errorf("failed to open mo db: %s, %w", dbName, err)
140+
}
141+
defer mo.Close()
142+
143+
MustExec(mo, "DROP DATABASE IF EXISTS "+dbName)
144+
MustExec(mo, "CREATE DATABASE "+dbName)
145+
MustExec(mo, "USE "+dbName)
146+
147+
for _, tableInfo := range dbInfo.TableInfos {
148+
_, err = mo.Exec(tableInfo.Sql)
149+
if err != nil {
150+
return fmt.Errorf("failed to create db %s, table %s: %w", dbName, tableInfo.Name, err)
151+
}
152+
}
153+
return nil
154+
}

t2sql/t2sql/common/spider2_test.go

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -69,25 +69,10 @@ func TestCreateMoTables(t *testing.T) {
6969
t.Fatal(err)
7070
}
7171

72-
f1db := dbInfos["f1"]
73-
if f1db == nil {
74-
t.Fatal("f1db not found")
75-
}
76-
77-
mo, err := OpenMoDB()
78-
if err != nil {
79-
t.Fatal(err)
80-
}
81-
defer mo.Close()
82-
83-
MustExec(mo, "DROP DATABASE IF EXISTS f1")
84-
MustExec(mo, "CREATE DATABASE f1")
85-
MustExec(mo, "USE f1")
86-
87-
for _, tableInfo := range f1db.TableInfos {
88-
_, err = mo.Exec(tableInfo.Sql)
72+
for _, dbInfo := range dbInfos {
73+
t.Logf("Creating tables for %s\n", dbInfo.Name)
74+
err = Spider2CreateMoTables(dbInfo)
8975
if err != nil {
90-
t.Logf("Error creating table %s:\n%s\n", tableInfo.Name, tableInfo.Sql)
9176
t.Fatal(err)
9277
}
9378
}

0 commit comments

Comments
 (0)