Skip to content

Commit 4c9ee9a

Browse files
authored
add in a full text index creation on bulk load (#39)
1 parent fc7cbac commit 4c9ee9a

File tree

3 files changed

+48
-1
lines changed

3 files changed

+48
-1
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ python3 redisgraph_bulk_loader/bulk_insert.py GRAPHNAME [OPTIONS]
5555
| -b | --max-buffer-size INT | (Debug argument) Max batch size (MBs) of each Redis query (default 4096) |
5656
| -c | --max-token-size INT | (Debug argument) Max size (MBs) of each token sent to Redis (default 500) |
5757
| -i | --index Label:Property | After bulk import, create an Index on provided Label:Property pair (optional) |
58+
| -f | --full-text-index Label:Property | After bulk import, create an full text index on provided Label:Property pair (optional) |
5859

5960

6061
The only required arguments are the name to give the newly-created graph (which can appear anywhere) and at least one node CSV file.

redisgraph_bulk_loader/bulk_insert.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ def process_entities(entities):
6363
@click.option('--max-buffer-size', '-b', default=2048, help='max buffer size in megabytes (default 2048)')
6464
@click.option('--max-token-size', '-t', default=500, help='max size of each token in megabytes (default 500, max 512)')
6565
@click.option('--index', '-i', multiple=True, help='Label:Propery on which to create an index')
66-
def bulk_insert(graph, host, port, password, nodes, nodes_with_label, relations, relations_with_type, separator, enforce_schema, skip_invalid_nodes, skip_invalid_edges, quote, max_token_count, max_buffer_size, max_token_size, index):
66+
@click.option('--full-text-index', '-f', multiple=True, help='Label:Propery on which to create an full text search index')
67+
def bulk_insert(graph, host, port, password, nodes, nodes_with_label, relations, relations_with_type, separator, enforce_schema, skip_invalid_nodes, skip_invalid_edges, quote, max_token_count, max_buffer_size, max_token_size, index, full_text_index):
6768
if sys.version_info[0] < 3:
6869
raise Exception("Python 3 is required for the RedisGraph bulk loader.")
6970

@@ -116,6 +117,7 @@ def bulk_insert(graph, host, port, password, nodes, nodes_with_label, relations,
116117
end_time = timer()
117118
query_buf.report_completion(end_time - start_time)
118119

120+
# Add in Graph Indices after graph creation
119121
for i in index:
120122
l, p = i.split(":")
121123
print("Creating Index on Label: %s, Property: %s" %(l, p))
@@ -127,5 +129,18 @@ def bulk_insert(graph, host, port, password, nodes, nodes_with_label, relations,
127129
print("Unable to create Index on Label: %s, Property %s" %(l, p))
128130
print(e)
129131

132+
# Add in Full Text Search Indices after graph creation
133+
for i in full_text_index:
134+
l, p = i.split(":")
135+
print("Creating Full Text Search Index on Label: %s, Property: %s" %(l, p))
136+
try:
137+
index_create = client.execute_command("GRAPH.QUERY", graph, "CALL db.idx.fulltext.createNodeIndex('%s', '%s')" %(l, p))
138+
print(index_create[-1][0].decode("utf-8"))
139+
except redis.exceptions.ResponseError as e:
140+
print("Unable to create Full Text Search Index on Label: %s, Property %s" %(l, p))
141+
print(e)
142+
except:
143+
print("Unknown Error: Unable to create Full Text Search Index on Label: %s, Property %s" %(l, p))
144+
130145
if __name__ == '__main__':
131146
bulk_insert()

test/test_bulk_loader.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ def tearDownClass(cls):
4141
"""Delete temporary files"""
4242
os.remove('/tmp/nodes.tmp')
4343
os.remove('/tmp/relations.tmp')
44+
os.remove('/tmp/nodes_index.tmp')
45+
os.remove('/tmp/nodes_full_text_index.tmp')
4446
cls.redis_con.flushall()
4547

4648
def validate_exception(self, res, expected_msg):
@@ -685,6 +687,35 @@ def test17_ensure_index_is_created(self):
685687
res = r.execute_command("GRAPH.EXPLAIN", graphname, 'MATCH (p:Person) WHERE p.age > 16 RETURN p')
686688
self.assertIn(' Index Scan | (p:Person)', res)
687689

690+
def test18_ensure_full_text_index_is_created(self):
691+
graphname = "index_full_text_test"
692+
with open('/tmp/nodes_full_text_index.tmp', mode='w') as csv_file:
693+
out = csv.writer(csv_file, delimiter='|')
694+
out.writerow(['name:STRING'])
695+
out.writerow(['Emperor Tamarin'])
696+
out.writerow(['Golden Lion Tamarin'])
697+
out.writerow(['Cotton-top Tamarin'])
698+
out.writerow(['Olive Baboon'])
699+
csv_file.close()
700+
701+
runner = CliRunner()
702+
res = runner.invoke(bulk_insert, ['--nodes-with-label', 'Monkeys', '/tmp/nodes_full_text_index.tmp',
703+
'--full-text-index', 'Monkeys:name',
704+
'--enforce-schema',
705+
graphname], catch_exceptions=False)
706+
707+
self.assertEqual(res.exit_code, 0)
708+
self.assertIn('4 nodes created', res.output)
709+
self.assertIn('Indices created: 1', res.output)
710+
711+
graph = Graph(graphname, self.redis_con)
712+
query_result = graph.query("CALL db.idx.fulltext.queryNodes('Monkeys', 'tamarin') YIELD node RETURN node.name")
713+
expected_result = [ ['Emperor Tamarin'],['Golden Lion Tamarin'], ['Cotton-top Tamarin'] ]
714+
715+
# We should find only the tamarins
716+
self.assertEqual(query_result.result_set, expected_result)
717+
718+
688719

689720
if __name__ == '__main__':
690721
unittest.main()

0 commit comments

Comments
 (0)