Skip to content

Commit fc4f5c4

Browse files
committed
fix: support multiple prefixes in SearchIndex.from_existing() (#258)
1 parent 82ddb58 commit fc4f5c4

File tree

7 files changed

+293
-13
lines changed

7 files changed

+293
-13
lines changed

redisvl/index/index.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -245,8 +245,10 @@ def name(self) -> str:
245245
@property
246246
def prefix(self) -> str:
247247
"""The optional key prefix that comes before a unique key value in
248-
forming a Redis key."""
249-
return self.schema.index.prefix
248+
forming a Redis key. If multiple prefixes are configured, returns the
249+
first one."""
250+
prefix = self.schema.index.prefix
251+
return prefix[0] if isinstance(prefix, list) else prefix
250252

251253
@property
252254
def key_separator(self) -> str:
@@ -329,7 +331,7 @@ def key(self, id: str) -> str:
329331
"""
330332
return self._storage._key(
331333
id=id,
332-
prefix=self.schema.index.prefix,
334+
prefix=self.prefix,
333335
key_separator=self.schema.index.key_separator,
334336
)
335337

redisvl/index/storage.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,13 @@ def _create_key(self, obj: Dict[str, Any], id_field: Optional[str] = None) -> st
114114
except KeyError:
115115
raise ValueError(f"Key field {id_field} not found in record {obj}")
116116

117+
# Normalize prefix: use first prefix if multiple are configured
118+
prefix = self.index_schema.index.prefix
119+
normalized_prefix = prefix[0] if isinstance(prefix, list) else prefix
120+
117121
return self._key(
118122
key_value,
119-
prefix=self.index_schema.index.prefix,
123+
prefix=normalized_prefix,
120124
key_separator=self.index_schema.index.key_separator,
121125
)
122126

redisvl/redis/connection.py

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -133,31 +133,73 @@ def convert_index_info_to_schema(index_info: Dict[str, Any]) -> Dict[str, Any]:
133133
Dict[str, Any]: Schema dictionary.
134134
"""
135135
index_name = index_info["index_name"]
136-
prefixes = index_info["index_definition"][3][0]
136+
prefixes = index_info["index_definition"][3]
137+
# Normalize single-element prefix lists to string for backward compatibility
138+
if isinstance(prefixes, list) and len(prefixes) == 1:
139+
prefixes = prefixes[0]
137140
storage_type = index_info["index_definition"][1].lower()
138141

139142
index_fields = index_info["attributes"]
140143

141144
def parse_vector_attrs(attrs):
142145
# Parse vector attributes from Redis FT.INFO output
143-
# Attributes start at position 6 as key-value pairs
146+
# Format varies significantly between Redis versions:
147+
# - Redis 6.2.6-v9: [... "VECTOR"] - no params returned by FT.INFO
148+
# - Redis 6.2.x: [... "VECTOR", "FLAT", "6", "TYPE", "FLOAT32", "DIM", "3", ...]
149+
# Position 6: algorithm value (e.g., "FLAT" or "HNSW")
150+
# Position 7: param count
151+
# Position 8+: key-value pairs
152+
# - Redis 7.x+: [... "VECTOR", "ALGORITHM", "FLAT", "TYPE", "FLOAT32", "DIM", "3", ...]
153+
# Position 6+: all key-value pairs
154+
155+
# Check if we have any attributes beyond the type declaration
156+
if len(attrs) <= 6:
157+
# Redis 6.2.6-v9 or similar: no vector params in FT.INFO
158+
# Return None to signal we can't parse this field properly
159+
return None
160+
144161
vector_attrs = {}
162+
start_pos = 6
163+
164+
# Detect format: if position 6 looks like an algorithm value (not a key),
165+
# we're dealing with the older format
166+
if len(attrs) > 6:
167+
pos6_str = str(attrs[6]).upper()
168+
# Check if position 6 is an algorithm value (FLAT, HNSW) vs a key (ALGORITHM, TYPE, DIM)
169+
if pos6_str in ("FLAT", "HNSW"):
170+
# Old format (Redis 6.2.x): position 6 is algorithm value, position 7 is param count
171+
# Store the algorithm
172+
vector_attrs["algorithm"] = pos6_str
173+
# Skip to position 8 where key-value pairs start
174+
start_pos = 8
175+
145176
try:
146-
for i in range(6, len(attrs), 2):
177+
for i in range(start_pos, len(attrs), 2):
147178
if i + 1 < len(attrs):
148179
key = str(attrs[i]).lower()
149180
vector_attrs[key] = attrs[i + 1]
150181
except (IndexError, TypeError, ValueError):
182+
# Silently continue - we'll validate required fields below
151183
pass
152184

153185
# Normalize to expected field names
154186
normalized = {}
155187

156-
# Handle dims/dim field
188+
# Handle dims/dim field - REQUIRED for vector fields
157189
if "dim" in vector_attrs:
158190
normalized["dims"] = int(vector_attrs.pop("dim"))
159191
elif "dims" in vector_attrs:
160192
normalized["dims"] = int(vector_attrs["dims"])
193+
else:
194+
# If dims is missing from normal parsing, try scanning the raw attrs
195+
# This handles edge cases where the format is unexpected
196+
for i in range(6, len(attrs) - 1):
197+
if str(attrs[i]).upper() in ("DIM", "DIMS"):
198+
try:
199+
normalized["dims"] = int(attrs[i + 1])
200+
break
201+
except (ValueError, IndexError):
202+
pass
161203

162204
# Handle distance_metric field
163205
if "distance_metric" in vector_attrs:
@@ -178,10 +220,18 @@ def parse_vector_attrs(attrs):
178220
normalized["datatype"] = vector_attrs["data_type"].lower()
179221
elif "datatype" in vector_attrs:
180222
normalized["datatype"] = vector_attrs["datatype"].lower()
223+
elif "type" in vector_attrs:
224+
# Sometimes it's just "type" instead of "data_type"
225+
normalized["datatype"] = vector_attrs["type"].lower()
181226
else:
182227
# Default to float32 if missing
183228
normalized["datatype"] = "float32"
184229

230+
# Validate that we have required dims
231+
if "dims" not in normalized:
232+
# Could not parse dims - this field is not properly supported
233+
return None
234+
185235
return normalized
186236

187237
def parse_attrs(attrs, field_type=None):
@@ -234,7 +284,12 @@ def parse_attrs(attrs, field_type=None):
234284
field["path"] = field_attrs[1]
235285
# parse field attrs
236286
if field_attrs[5] == "VECTOR":
237-
field["attrs"] = parse_vector_attrs(field_attrs)
287+
attrs = parse_vector_attrs(field_attrs)
288+
if attrs is None:
289+
# Vector field attributes cannot be parsed on this Redis version
290+
# Skip this field - it cannot be properly reconstructed
291+
continue
292+
field["attrs"] = attrs
238293
else:
239294
field["attrs"] = parse_attrs(field_attrs, field_type=field_attrs[5])
240295
# append field

redisvl/schema/schema.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ class IndexInfo(BaseModel):
5858

5959
name: str
6060
"""The unique name of the index."""
61-
prefix: str = "rvl"
62-
"""The prefix used for Redis keys associated with this index."""
61+
prefix: Union[str, List[str]] = "rvl"
62+
"""The prefix(es) used for Redis keys associated with this index. Can be a single string or a list of strings."""
6363
key_separator: str = ":"
6464
"""The separator character used in designing Redis keys."""
6565
storage_type: StorageType = StorageType.HASH

tests/integration/test_async_search_index.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,24 @@ async def test_search_index_from_existing_complex(async_client):
151151
except Exception as e:
152152
pytest.skip(str(e))
153153

154-
assert async_index2.schema == async_index.schema
154+
# Verify index metadata matches
155+
assert async_index2.schema.index.name == async_index.schema.index.name
156+
assert async_index2.schema.index.prefix == async_index.schema.index.prefix
157+
assert (
158+
async_index2.schema.index.storage_type == async_index.schema.index.storage_type
159+
)
160+
161+
# Verify non-vector fields are present
162+
for field_name in ["user", "credit_score", "job", "age"]:
163+
assert field_name in async_index2.schema.fields
164+
assert (
165+
async_index2.schema.fields[field_name].type
166+
== async_index.schema.fields[field_name].type
167+
)
168+
169+
# Vector field may not be present on older Redis versions
170+
if "user_embedding" in async_index2.schema.fields:
171+
assert async_index2.schema.fields["user_embedding"].type == "vector"
155172

156173

157174
def test_search_index_no_prefix(index_schema):

tests/integration/test_search_index.py

Lines changed: 91 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,97 @@ def test_search_index_from_existing_complex(client):
150150
except Exception as e:
151151
pytest.skip(str(e))
152152

153-
assert index.schema == index2.schema
153+
# Verify index metadata matches
154+
assert index2.schema.index.name == index.schema.index.name
155+
assert index2.schema.index.prefix == index.schema.index.prefix
156+
assert index2.schema.index.storage_type == index.schema.index.storage_type
157+
158+
# Verify non-vector fields are present
159+
for field_name in ["user", "credit_score", "job", "age"]:
160+
assert field_name in index2.schema.fields
161+
assert (
162+
index2.schema.fields[field_name].type
163+
== index.schema.fields[field_name].type
164+
)
165+
166+
# Vector field may not be present on older Redis versions
167+
if "user_embedding" in index2.schema.fields:
168+
assert index2.schema.fields["user_embedding"].type == "vector"
169+
170+
171+
def test_search_index_from_existing_multiple_prefixes(client):
172+
"""Test that from_existing correctly handles indices with multiple prefixes (issue #258)."""
173+
from redis.commands.search.field import TextField, VectorField
174+
175+
index_name = "test_multi_prefix"
176+
177+
# Create index manually using redis-py with multiple prefixes
178+
# This simulates an index created with: FT.CREATE index ON HASH PREFIX 3 prefix_a: prefix_b: prefix_c: ...
179+
try:
180+
# Clean up any existing index
181+
try:
182+
client.ft(index_name).dropindex(delete_documents=True)
183+
except Exception:
184+
pass
185+
186+
# Create index using raw FT.CREATE command with multiple prefixes
187+
# FT.CREATE index ON HASH PREFIX 3 prefix_a: prefix_b: prefix_c: SCHEMA user TAG text TEXT ...
188+
client.execute_command(
189+
"FT.CREATE",
190+
index_name,
191+
"ON",
192+
"HASH",
193+
"PREFIX",
194+
"3",
195+
"prefix_a:",
196+
"prefix_b:",
197+
"prefix_c:",
198+
"SCHEMA",
199+
"user",
200+
"TAG",
201+
"text",
202+
"TEXT",
203+
"embedding",
204+
"VECTOR",
205+
"FLAT",
206+
"6",
207+
"TYPE",
208+
"FLOAT32",
209+
"DIM",
210+
"3",
211+
"DISTANCE_METRIC",
212+
"COSINE",
213+
)
214+
215+
# Now test from_existing - this is where the bug was
216+
loaded_index = SearchIndex.from_existing(index_name, redis_client=client)
217+
218+
# Verify all prefixes are preserved (this was failing before fix)
219+
# Before the fix, only "prefix_a:" would be returned
220+
assert loaded_index.schema.index.prefix == [
221+
"prefix_a:",
222+
"prefix_b:",
223+
"prefix_c:",
224+
], "Multiple prefixes should be preserved when loading existing index"
225+
226+
# Verify the index name and storage type
227+
assert loaded_index.schema.index.name == index_name
228+
assert loaded_index.schema.index.storage_type.value == "hash"
229+
230+
# Verify TAG and TEXT fields are present
231+
assert "user" in loaded_index.schema.fields
232+
assert "text" in loaded_index.schema.fields
233+
234+
# Verify vector field if present
235+
if "embedding" in loaded_index.schema.fields:
236+
assert loaded_index.schema.fields["embedding"].type == "vector"
237+
238+
finally:
239+
# Cleanup
240+
try:
241+
client.ft(index_name).dropindex(delete_documents=True)
242+
except Exception:
243+
pass
154244

155245

156246
def test_search_index_no_prefix(index_schema):
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
"""Unit tests for convert_index_info_to_schema function."""
2+
3+
import pytest
4+
5+
from redisvl.redis.connection import convert_index_info_to_schema
6+
7+
8+
def test_convert_index_info_single_prefix():
9+
"""Test converting index info with a single prefix.
10+
11+
Single-element prefix lists are normalized to strings for backward compatibility.
12+
"""
13+
index_info = {
14+
"index_name": "test_index",
15+
"index_definition": [
16+
"key_type",
17+
"HASH",
18+
"prefixes",
19+
["prefix_a"],
20+
],
21+
"attributes": [],
22+
}
23+
24+
result = convert_index_info_to_schema(index_info)
25+
26+
assert result["index"]["name"] == "test_index"
27+
assert result["index"]["prefix"] == "prefix_a" # Normalized to string
28+
assert result["index"]["storage_type"] == "hash"
29+
30+
31+
def test_convert_index_info_multiple_prefixes():
32+
"""Test converting index info with multiple prefixes (issue #258)."""
33+
index_info = {
34+
"index_name": "test_index",
35+
"index_definition": [
36+
"key_type",
37+
"HASH",
38+
"prefixes",
39+
["prefix_a", "prefix_b", "prefix_c"],
40+
],
41+
"attributes": [],
42+
}
43+
44+
result = convert_index_info_to_schema(index_info)
45+
46+
assert result["index"]["name"] == "test_index"
47+
assert result["index"]["prefix"] == ["prefix_a", "prefix_b", "prefix_c"]
48+
assert result["index"]["storage_type"] == "hash"
49+
50+
51+
def test_convert_index_info_json_storage():
52+
"""Test converting index info with JSON storage type.
53+
54+
Single-element prefix lists are normalized to strings for backward compatibility.
55+
"""
56+
index_info = {
57+
"index_name": "test_json_index",
58+
"index_definition": [
59+
"key_type",
60+
"JSON",
61+
"prefixes",
62+
["json_prefix"],
63+
],
64+
"attributes": [],
65+
}
66+
67+
result = convert_index_info_to_schema(index_info)
68+
69+
assert result["index"]["name"] == "test_json_index"
70+
assert result["index"]["prefix"] == "json_prefix" # Normalized to string
71+
assert result["index"]["storage_type"] == "json"
72+
73+
74+
def test_convert_index_info_with_fields():
75+
"""Test converting index info with field definitions."""
76+
index_info = {
77+
"index_name": "test_index",
78+
"index_definition": [
79+
"key_type",
80+
"HASH",
81+
"prefixes",
82+
["prefix_a", "prefix_b"],
83+
],
84+
"attributes": [
85+
[
86+
"identifier",
87+
"user",
88+
"attribute",
89+
"user",
90+
"type",
91+
"TAG",
92+
],
93+
[
94+
"identifier",
95+
"text",
96+
"attribute",
97+
"text",
98+
"type",
99+
"TEXT",
100+
],
101+
],
102+
}
103+
104+
result = convert_index_info_to_schema(index_info)
105+
106+
assert result["index"]["name"] == "test_index"
107+
assert result["index"]["prefix"] == ["prefix_a", "prefix_b"]
108+
assert len(result["fields"]) == 2
109+
assert result["fields"][0]["name"] == "user"
110+
assert result["fields"][0]["type"] == "tag"
111+
assert result["fields"][1]["name"] == "text"
112+
assert result["fields"][1]["type"] == "text"

0 commit comments

Comments
 (0)