diff --git a/notebooks/singlestore-cheat-sheet/notebook.ipynb b/notebooks/singlestore-cheat-sheet/notebook.ipynb
index d0863e0..f90818f 100644
--- a/notebooks/singlestore-cheat-sheet/notebook.ipynb
+++ b/notebooks/singlestore-cheat-sheet/notebook.ipynb
@@ -33,32 +33,32 @@
{
"attachments": {},
"cell_type": "markdown",
+ "id": "e8caa6d0",
"metadata": {},
"source": [
"# SingleStore Database Cheat Sheet\n",
"\n",
"List of useful commands for SingleStore SQL and Kai (MongoDB API) operations"
- ],
- "id": "e8caa6d0"
+ ]
},
{
"attachments": {},
"cell_type": "markdown",
+ "id": "7ebe210f",
"metadata": {},
"source": [
"## Important Notes\n",
"\n",
"### SingleStore Core Concepts\n",
- "1. Every distributed and columnstore table MUST have a SHARD KEY defined\n",
- "2. Reference tables don't need a SHARD KEY as they are replicated to all nodes\n",
- "3. SingleStore supports both rowstore (default) and columnstore table types\n",
- "4. Hash indexes are recommended for equality comparisons\n",
- "5. JSON operations are optimized for performance in SingleStore\n",
- "6. Use Reference tables for lookup data that needs to be available on all nodes\n",
+ "1. Reference tables don't need a SHARD KEY as they are replicated to all nodes\n",
+ "2. SingleStore supports both rowstore and columnstore (default) table types\n",
+ "3. Hash indexes are recommended for fast equality lookups on large tables\n",
+ "4. JSON operations are optimized for performance in SingleStore\n",
+ "5. Use Reference tables for lookup data that needs to be available on all nodes\n",
"\n",
"### Vector Operations Tips\n",
"1. Vector dimensions must be specified at table creation\n",
- "2. Normalize vectors before cosine similarity calculations\n",
+ "2. Normalize vectors to length 1 before inserting them in the database when you are doing cosine similarity calculations (but note that many models produce length-1 vectors so this is often not necessary; check the documentation for your model)\n",
"3. Choose appropriate index metric based on your use case\n",
"4. Vector operations support AI/ML workloads\n",
"5. Combine with full-text search for hybrid search capabilities\n",
@@ -67,108 +67,80 @@
"---\n",
"\n",
"*For the most up-to-date information, refer to the official SingleStore documentation at https://singlestore.com/docs.*"
- ],
- "id": "7ebe210f"
+ ]
},
{
"attachments": {},
"cell_type": "markdown",
+ "id": "994769ca",
"metadata": {},
"source": [
"## Database Operations"
- ],
- "id": "994769ca"
+ ]
},
{
"cell_type": "code",
"execution_count": 1,
+ "id": "f445a4eb",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": "4 rows affected.",
- "text/plain": "4 rows affected."
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": "
\n \n \n | Database | \n
\n \n \n \n | cluster | \n
\n \n | information_schema | \n
\n \n | memsql | \n
\n \n | nocody | \n
\n \n
",
- "text/plain": "+--------------------+\n| Database |\n+--------------------+\n| cluster |\n| information_schema |\n| memsql |\n| nocody |\n+--------------------+"
- },
- "execution_count": 2,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"%%sql\n",
"# Show Databases\n",
"SHOW DATABASES;"
- ],
- "id": "f445a4eb"
+ ]
},
{
"cell_type": "code",
"execution_count": 2,
+ "id": "9731332f",
"metadata": {},
"outputs": [],
"source": [
"%%sql\n",
"# Create Database\n",
- "CREATE DATABASE database_name; --Note this will not work on free tier due to one DB constraint"
- ],
- "id": "9731332f"
+ "CREATE DATABASE database_name; -- Note this will not work on free tier due to one DB constraint"
+ ]
},
{
"cell_type": "code",
"execution_count": 3,
+ "id": "88bd53f5",
"metadata": {},
"outputs": [],
"source": [
"%%sql\n",
"# Use Database\n",
"USE database_name;"
- ],
- "id": "88bd53f5"
+ ]
},
{
"cell_type": "code",
"execution_count": 4,
+ "id": "faaa24d9",
"metadata": {},
"outputs": [],
"source": [
"%%sql\n",
"# Drop Database\n",
"DROP DATABASE database_name; -- Use with extreme caution"
- ],
- "id": "faaa24d9"
+ ]
},
{
"attachments": {},
"cell_type": "markdown",
+ "id": "ce2ae430",
"metadata": {},
"source": [
"## Table Operations"
- ],
- "id": "ce2ae430"
+ ]
},
{
"cell_type": "code",
"execution_count": 5,
+ "id": "01dcf093",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": "",
- "text/plain": "++\n||\n++\n++"
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"%%sql\n",
"# Create Distributed Table\n",
@@ -177,27 +149,17 @@
" title VARCHAR(255),\n",
" body TEXT,\n",
" category VARCHAR(50),\n",
- " created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,\n",
+ " created_at DATETIME DEFAULT CURRENT_TIMESTAMP,\n",
" SHARD KEY (id)\n",
");"
- ],
- "id": "01dcf093"
+ ]
},
{
"cell_type": "code",
"execution_count": 6,
+ "id": "f887f7d8",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": "",
- "text/plain": "++\n||\n++\n++"
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"%%sql\n",
"# Create Reference Table\n",
@@ -206,78 +168,42 @@
" name VARCHAR(50)\n",
" -- No SHARD KEY needed for reference tables\n",
");"
- ],
- "id": "f887f7d8"
+ ]
},
{
"cell_type": "code",
"execution_count": 7,
+ "id": "235629be",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": "",
- "text/plain": "++\n||\n++\n++"
- },
- "execution_count": 8,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"%%sql\n",
"# Create Columnstore Table\n",
"CREATE TABLE analytics (\n",
" id BIGINT,\n",
" event_type VARCHAR(50),\n",
- " timestamp TIMESTAMP,\n",
+ " ts DATETIME,\n",
" data JSON,\n",
" SORT KEY (timestamp),\n",
- " SHARD KEY (id) -- Required for columnstore tables\n",
- ") ENGINE=columnstore;"
- ],
- "id": "235629be"
+ " SHARD KEY (id)\n",
+ ");"
+ ]
},
{
"attachments": {},
"cell_type": "markdown",
+ "id": "e2a6e1b6",
"metadata": {},
"source": [
"### Table Management"
- ],
- "id": "e2a6e1b6"
+ ]
},
{
"cell_type": "code",
"execution_count": 8,
+ "id": "749f2f14",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": "3 rows affected.",
- "text/plain": "3 rows affected."
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": "5 rows affected.",
- "text/plain": "5 rows affected."
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": "",
- "text/plain": "++\n||\n++\n++"
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"%%sql\n",
"# Show tables\n",
@@ -288,21 +214,21 @@
"\n",
"# Drop table\n",
"DROP TABLE posts;"
- ],
- "id": "749f2f14"
+ ]
},
{
"attachments": {},
"cell_type": "markdown",
+ "id": "68db244a",
"metadata": {},
"source": [
"## Data Manipulation"
- ],
- "id": "68db244a"
+ ]
},
{
"cell_type": "code",
"execution_count": 9,
+ "id": "462e376a",
"metadata": {},
"outputs": [],
"source": [
@@ -315,12 +241,12 @@
"INSERT INTO posts (title, body, category) VALUES\n",
" ('Post Two', 'Body of post two', 'Technology'),\n",
" ('Post Three', 'Body of post three', 'News');"
- ],
- "id": "462e376a"
+ ]
},
{
"cell_type": "code",
"execution_count": 10,
+ "id": "099a9b83",
"metadata": {},
"outputs": [],
"source": [
@@ -334,12 +260,12 @@
"\n",
"# Select with condition\n",
"SELECT * FROM posts WHERE category = 'News';"
- ],
- "id": "099a9b83"
+ ]
},
{
"cell_type": "code",
"execution_count": 11,
+ "id": "00d6e198",
"metadata": {},
"outputs": [],
"source": [
@@ -348,51 +274,51 @@
"UPDATE posts\n",
"SET body = 'Updated body'\n",
"WHERE title = 'Post One';"
- ],
- "id": "00d6e198"
+ ]
},
{
"cell_type": "code",
"execution_count": 12,
+ "id": "1fcc7ac5",
"metadata": {},
"outputs": [],
"source": [
"%%sql\n",
"# Delete Data\n",
"DELETE FROM posts WHERE title = 'Post One';"
- ],
- "id": "1fcc7ac5"
+ ]
},
{
"attachments": {},
"cell_type": "markdown",
+ "id": "6f274e44",
"metadata": {},
"source": [
"## SingleStore Pipelines"
- ],
- "id": "6f274e44"
+ ]
},
{
"attachments": {},
"cell_type": "markdown",
+ "id": "af023547",
"metadata": {},
"source": [
"Pipelines are used to bring data into SingleStore tables from different sources, for example an S3 bucket"
- ],
- "id": "af023547"
+ ]
},
{
"attachments": {},
"cell_type": "markdown",
+ "id": "cd46a8dc",
"metadata": {},
"source": [
"### Create Pipeline"
- ],
- "id": "cd46a8dc"
+ ]
},
{
"cell_type": "code",
"execution_count": 13,
+ "id": "19719e9d",
"metadata": {},
"outputs": [],
"source": [
@@ -405,149 +331,113 @@
"FIELDS TERMINATED BY ','\n",
"LINES TERMINATED BY '\\n'\n",
"IGNORE 1 lines;"
- ],
- "id": "19719e9d"
+ ]
},
{
"attachments": {},
"cell_type": "markdown",
+ "id": "102d3b7e",
"metadata": {},
"source": [
"### Start Pipeline"
- ],
- "id": "102d3b7e"
+ ]
},
{
"cell_type": "code",
"execution_count": 14,
+ "id": "bc89a633",
"metadata": {},
"outputs": [],
"source": [
"%%sql\n",
"START PIPELINE SalesData_Pipeline;"
- ],
- "id": "bc89a633"
+ ]
},
{
"attachments": {},
"cell_type": "markdown",
+ "id": "9169c221",
"metadata": {},
"source": [
"### Check pipeline status"
- ],
- "id": "9169c221"
+ ]
},
{
"cell_type": "code",
"execution_count": 15,
+ "id": "6de90734",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": "\n \n \n | DATABASE_NAME | \n PIPELINE_NAME | \n SOURCE_TYPE | \n FILE_NAME | \n FILE_SIZE | \n FILE_STATE | \n
\n \n \n \n
",
- "text/plain": "+---------------+---------------+-------------+-----------+-----------+------------+\n| DATABASE_NAME | PIPELINE_NAME | SOURCE_TYPE | FILE_NAME | FILE_SIZE | FILE_STATE |\n+---------------+---------------+-------------+-----------+-----------+------------+\n+---------------+---------------+-------------+-----------+-----------+------------+"
- },
- "execution_count": 16,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"%%sql\n",
"SELECT * FROM information_schema.pipelines_files\n",
"WHERE pipeline_name = \"SalesData_Pipeline\";"
- ],
- "id": "6de90734"
+ ]
},
{
"attachments": {},
"cell_type": "markdown",
+ "id": "18eb7d77",
"metadata": {},
"source": [
"### Stop pipeline"
- ],
- "id": "18eb7d77"
+ ]
},
{
"cell_type": "code",
"execution_count": 16,
+ "id": "2d4effec",
"metadata": {},
"outputs": [],
"source": [
"%%sql\n",
"STOP PIPELINE [IF RUNNING] pipeline_name [DETACH];"
- ],
- "id": "2d4effec"
+ ]
},
{
"attachments": {},
"cell_type": "markdown",
+ "id": "12c17368",
"metadata": {},
"source": [
"### Drop Pipeline"
- ],
- "id": "12c17368"
+ ]
},
{
"cell_type": "code",
"execution_count": 17,
+ "id": "2b73d563",
"metadata": {},
"outputs": [],
"source": [
"%%sql\n",
"DROP PIPELINE [IF EXISTS] pipeline_name;"
- ],
- "id": "2b73d563"
+ ]
},
{
"attachments": {},
"cell_type": "markdown",
+ "id": "0f86aae7",
"metadata": {},
"source": [
"## SingleStore Specific Features"
- ],
- "id": "0f86aae7"
+ ]
},
{
"attachments": {},
"cell_type": "markdown",
+ "id": "438cd519",
"metadata": {},
"source": [
"### JSON Operations"
- ],
- "id": "438cd519"
+ ]
},
{
"cell_type": "code",
"execution_count": 18,
+ "id": "43bdef37",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": "1 rows affected.",
- "text/plain": "1 rows affected."
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": "1 rows affected.",
- "text/plain": "1 rows affected."
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": "\n \n \n | title | \n
\n \n \n \n | None | \n
\n \n
",
- "text/plain": "+-------+\n| title |\n+-------+\n| None |\n+-------+"
- },
- "execution_count": 19,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"%%sql\n",
"# Create table with JSON column\n",
@@ -562,35 +452,25 @@
"VALUES ('{\"title\": \"Post One\", \"tags\": [\"news\", \"events\"]}');\n",
"\n",
"# Query JSON\n",
- "SELECT JSON_EXTRACT_JSON(data, '$.title') as title\n",
+ "SELECT JSON_EXTRACT_STRING(data, '$.title') as title\n",
"FROM json_posts;"
- ],
- "id": "43bdef37"
+ ]
},
{
"attachments": {},
"cell_type": "markdown",
+ "id": "22715046",
"metadata": {},
"source": [
"### Vector Operations"
- ],
- "id": "22715046"
+ ]
},
{
"cell_type": "code",
"execution_count": 19,
+ "id": "16203c7f",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": "",
- "text/plain": "++\n||\n++\n++"
- },
- "execution_count": 20,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"%%sql\n",
"# Create table with vector column\n",
@@ -600,35 +480,25 @@
" embedding VECTOR(1536), -- Specify vector dimension\n",
" SHARD KEY (id)\n",
");"
- ],
- "id": "16203c7f"
+ ]
},
{
"cell_type": "code",
"execution_count": 20,
+ "id": "c4242edd",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": "",
- "text/plain": "++\n||\n++\n++"
- },
- "execution_count": 21,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"%%sql\n",
- "# Create vector index using dot product\n",
+ "# Create vector index using dot product as distance metric\n",
"ALTER TABLE embeddings ADD VECTOR INDEX idx_embedding (embedding)\n",
"INDEX_OPTIONS '{\"metric_type\": \"DOT_PRODUCT\"}';"
- ],
- "id": "c4242edd"
+ ]
},
{
"cell_type": "code",
"execution_count": 21,
+ "id": "c6ea6895",
"metadata": {},
"outputs": [],
"source": [
@@ -640,19 +510,22 @@
"ORDER BY similarity DESC\n",
"LIMIT 10;\n",
"\n",
+ "# Create a full-text index\n",
+ "ALTER TABLE embeddings ADD FULLTEXT USING VERSION 2 fts_idx(description);\n",
+ "\n",
"# Hybrid search combining full-text and vector search\n",
"SELECT id, description,\n",
" DOT_PRODUCT(embedding, '[0.1, 0.2, ...]') as vector_score,\n",
- " MATCH(description) 'search terms' as text_score\n",
+ " MATCH(table embeddings) AGAINST('description:(\"search terms\")') as text_score\n",
"FROM embeddings\n",
- "WHERE MATCH(description) 'search terms'\n",
+ "WHERE MATCH(table embeddings) AGAINST('description:(\"search terms\")')\n",
"ORDER BY (vector_score * 0.7 + text_score * 0.3) DESC;"
- ],
- "id": "c6ea6895"
+ ]
},
{
"attachments": {},
"cell_type": "markdown",
+ "id": "7405b7e6",
"metadata": {},
"source": [
"## SingleStore Kai (MongoDB API)\n",
@@ -661,17 +534,17 @@
"```\n",
"mongodb://username:password@hostname:27017/database\n",
"```"
- ],
- "id": "7405b7e6"
+ ]
},
{
"cell_type": "code",
"execution_count": 22,
+ "id": "2d5df461",
"metadata": {},
"outputs": [],
"source": [
- "%%sql\n",
"# MongoDB-style commands\n",
+ "\n",
"# Show databases\n",
"show dbs\n",
"\n",
@@ -683,8 +556,7 @@
"\n",
"# Create collection\n",
"db.createCollection('users')"
- ],
- "id": "2d5df461"
+ ]
},
{
"id": "2ab70470",