Skip to content

Changed script to work with Elastic 6.0.0. #17

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 27 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
854c69b
Changed script to work with Elastic 6.0.0. Inline scripts are now dep…
Jan 30, 2019
7cc89d0
renamed engine appropriately
Zakkery Jan 31, 2019
ec09b19
optimize plugin and use float32 (#19)
ran22 Feb 10, 2019
41961da
updated jackson version
Feb 10, 2019
f0c27c5
added a cosine score test
Feb 25, 2019
f688c3a
Broke it back into multiple files for simplicity, added support for e…
Zakkery Apr 4, 2019
2aa0f87
Removed unused import
Zakkery Apr 4, 2019
44fafbd
Update README.md
lior-k Apr 4, 2019
3678458
Removed unused method
Zakkery Apr 4, 2019
63d84a3
changed a double usage in a test to float. as part of the move to flo…
Apr 23, 2019
d9e04a1
Fixed vector to be float instead of double
Zakkery Apr 23, 2019
71fae4e
Changed script to work with Elastic 6.0.0. Inline scripts are now dep…
Jan 30, 2019
343ee31
renamed engine appropriately
Zakkery Jan 31, 2019
9fdb85f
Broke it back into multiple files for simplicity, added support for e…
Zakkery Apr 4, 2019
b97a7c6
Removed unused import
Zakkery Apr 4, 2019
4479ef2
Removed unused method
Zakkery Apr 4, 2019
028c8b3
Fixed vector to be float instead of double
Zakkery Apr 23, 2019
4a2ab17
Merge branch 'es-6.0' of https://github.com/Zakkery/fast-elasticsearc…
Zakkery Apr 23, 2019
aa8be17
Changed script to work with Elastic 6.0.0. Inline scripts are now dep…
Jan 30, 2019
51032b5
renamed engine appropriately
Zakkery Jan 31, 2019
a0365c4
Broke it back into multiple files for simplicity, added support for e…
Zakkery Apr 4, 2019
ac7efae
Removed unused import
Zakkery Apr 4, 2019
79a8753
Removed unused method
Zakkery Apr 4, 2019
3489091
Fixed vector to be float instead of double
Zakkery Apr 23, 2019
c5b2843
Fixed vector to be float instead of double
Zakkery Apr 23, 2019
cd5caa1
Merge branch 'es-6.0' of https://github.com/Zakkery/fast-elasticsearc…
Zakkery Apr 25, 2019
1f1d5cd
Fixed testing and checking of sizes
Zakkery Apr 25, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 34 additions & 33 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ give it a try.


## Elasticsearch version
* Currently designed for Elasticsearch 5.6.0.
* Currently designed for Elasticsearch 6.0.0.
* for Elasticsearch 5.2.2 use branch `es-5.2.2`
* for Elasticsearch 2.4.4 use branch `es-2.4.4`

Expand Down Expand Up @@ -42,43 +42,44 @@ give it a try.
* The vector can be of any dimension

### Converting a vector to Base64
to convert an array of doubles to a base64 string we use these example methods:
to convert an array of float32 to a base64 string we use these example methods:

**Java**
```
public static final String convertArrayToBase64(double[] array) {
final int capacity = 8 * array.length;
final ByteBuffer bb = ByteBuffer.allocate(capacity);
for (int i = 0; i < array.length; i++) {
bb.putDouble(array[i]);
}
bb.rewind();
final ByteBuffer encodedBB = Base64.getEncoder().encode(bb);
return new String(encodedBB.array());
public static float[] convertBase64ToArray(String base64Str) {
final byte[] decode = Base64.getDecoder().decode(base64Str.getBytes());
final FloatBuffer floatBuffer = ByteBuffer.wrap(decode).asFloatBuffer();
final float[] dims = new float[floatBuffer.capacity()];
floatBuffer.get(dims);

return dims;
}

public static double[] convertBase64ToArray(String base64Str) {
final byte[] decode = Base64.getDecoder().decode(base64Str.getBytes());
final DoubleBuffer doubleBuffer = ByteBuffer.wrap(decode).asDoubleBuffer();
public static String convertArrayToBase64(float[] array) {
final int capacity = Float.BYTES * array.length;
final ByteBuffer bb = ByteBuffer.allocate(capacity);
for (float v : array) {
bb.putFloat(v);
}
bb.rewind();
final ByteBuffer encodedBB = Base64.getEncoder().encode(bb);

final double[] dims = new double[doubleBuffer.capacity()];
doubleBuffer.get(dims);
return dims;
return new String(encodedBB.array());
}
```
**Python**
```
import base64
import numpy as np

dbig = np.dtype('>f8')
dfloat32 = np.dtype('>f4')

def decode_float_list(base64_string):
bytes = base64.b64decode(base64_string)
return np.frombuffer(bytes, dtype=dbig).tolist()
return np.frombuffer(bytes, dtype=dfloat32).tolist()

def encode_array(arr):
base64_str = base64.b64encode(np.array(arr).astype(dbig)).decode("utf-8")
base64_str = base64.b64encode(np.array(arr).astype(dfloat32)).decode("utf-8")
return base64_str
```

Expand All @@ -87,11 +88,11 @@ def encode_array(arr):
require 'base64'

def decode_float_list(base64_string)
Base64.strict_decode64(base64_string).unpack('G*')
Base64.strict_decode64(base64_string).unpack('g*')
end

def encode_array(arr)
Base64.strict_encode64(arr.pack('G*'))
Base64.strict_encode64(arr.pack('g*'))
end
```

Expand All @@ -103,31 +104,31 @@ import(
"encoding/base64"
)

func convertArrayToBase64(array []float64) string {
bytes := make([]byte, 0, 8*len(array))
func convertArrayToBase64(array []float32) string {
bytes := make([]byte, 0, 4*len(array))
for _, a := range array {
bits := math.Float64bits(a)
b := make([]byte, 8)
binary.BigEndian.PutUint64(b, bits)
bits := math.Float32bits(a)
b := make([]byte, 4)
binary.BigEndian.PutUint32(b, bits)
bytes = append(bytes, b...)
}

encoded := base64.StdEncoding.EncodeToString(bytes)
return encoded
}

func convertBase64ToArray(base64Str string) ([]float64, error) {
func convertBase64ToArray(base64Str string) ([]float32, error) {
decoded, err := base64.StdEncoding.DecodeString(base64Str)
if err != nil {
return nil, err
}

length := len(decoded)
array := make([]float64, 0, length/8)
array := make([]float32, 0, length/4)

for i := 0; i < len(decoded); i += 8 {
bits := binary.BigEndian.Uint64(decoded[i : i+8])
f := math.Float64frombits(bits)
for i := 0; i < len(decoded); i += 4 {
bits := binary.BigEndian.Uint32(decoded[i : i+4])
f := math.Float32frombits(bits)
array = append(array, f)
}
return array, nil
Expand All @@ -146,7 +147,7 @@ func convertBase64ToArray(base64Str string) ([]float64, error) {
"boost_mode": "replace",
"script_score": {
"script": {
"inline": "binary_vector_score",
"source": "binary_vector_score",
"lang": "knn",
"params": {
"cosine": false,
Expand Down
14 changes: 4 additions & 10 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
<name>elasticsearch-binary-vector-scoring</name>
<groupId>com.liorkn.elasticsearch</groupId>
<artifactId>elasticsearch-binary-vector-scoring</artifactId>
<version>5.6.0</version>
<version>6.0.0</version>
<description>ElasticSearch Plugin for Binary Vector Scoring</description>

<licenses>
Expand All @@ -27,11 +27,11 @@
<elasticsearch.license.headerDefinition>${project.basedir}/src/main/resources/license-check/license_header_definition.xml</elasticsearch.license.headerDefinition>

<tests.ifNoTests>warn</tests.ifNoTests>
<elasticsearch.version>5.6.0</elasticsearch.version>
<elasticsearch.version>6.0.0</elasticsearch.version>
<commons-io.version>2.4</commons-io.version>
<httpcore.version>4.4.8</httpcore.version>
<junit.version>4.12</junit.version>
<jackson.version>2.7.4</jackson.version>
<jackson.version>2.8.11.3</jackson.version>
</properties>


Expand Down Expand Up @@ -65,7 +65,7 @@

<dependency>
<groupId>org.elasticsearch.plugin</groupId>
<artifactId>transport-netty3-client</artifactId>
<artifactId>transport-netty4-client</artifactId>
<version>${elasticsearch.version}</version>
<scope>test</scope>
</dependency>
Expand All @@ -86,12 +86,6 @@
</exclusion>
</exclusions>
</dependency>
<!--<dependency>-->
<!--<groupId>org.elasticsearch.plugin</groupId>-->
<!--<artifactId>transport-netty3-client</artifactId>-->
<!--<version>${elasticsearch.version}</version>-->
<!--<scope>test</scope>-->
<!--</dependency>-->
<dependency>
<groupId>org.codelibs.elasticsearch.module</groupId>
<artifactId>lang-painless</artifactId>
Expand Down
19 changes: 10 additions & 9 deletions src/main/java/com/liorkn/elasticsearch/Util.java
Original file line number Diff line number Diff line change
@@ -1,31 +1,32 @@
package com.liorkn.elasticsearch;

import java.nio.ByteBuffer;
import java.nio.DoubleBuffer;
import java.nio.FloatBuffer;
import java.util.Base64;

/**
* Created by Lior Knaany on 4/7/18.
*/
public class Util {

public static final double[] convertBase64ToArray(String base64Str) {
public static float[] convertBase64ToArray(String base64Str) {
final byte[] decode = Base64.getDecoder().decode(base64Str.getBytes());
final DoubleBuffer doubleBuffer = ByteBuffer.wrap(decode).asDoubleBuffer();
final FloatBuffer floatBuffer = ByteBuffer.wrap(decode).asFloatBuffer();
final float[] dims = new float[floatBuffer.capacity()];
floatBuffer.get(dims);

final double[] dims = new double[doubleBuffer.capacity()];
doubleBuffer.get(dims);
return dims;
}

public static final String convertArrayToBase64(double[] array) {
final int capacity = 8 * array.length;
public static String convertArrayToBase64(float[] array) {
final int capacity = Float.BYTES * array.length;
final ByteBuffer bb = ByteBuffer.allocate(capacity);
for (int i = 0; i < array.length; i++) {
bb.putDouble(array[i]);
for (double v : array) {
bb.putFloat((float) v);
}
bb.rewind();
final ByteBuffer encodedBB = Base64.getEncoder().encode(bb);

return new String(encodedBB.array());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package com.liorkn.elasticsearch.engine;

import com.liorkn.elasticsearch.script.VectorScoreScript;

import java.util.Map;

import org.elasticsearch.script.ScriptContext;
import org.elasticsearch.script.ScriptEngine;
import org.elasticsearch.script.SearchScript;

/** This {@link ScriptEngine} uses Lucene segment details to implement document scoring based on their similarity with submitted document. */
public class VectorScoringScriptEngine implements ScriptEngine {

public static final String NAME = "knn";
private static final String SCRIPT_SOURCE = "binary_vector_score";

@Override
public String getType() {
return NAME;
}

@Override
public <T> T compile(String scriptName, String scriptSource, ScriptContext<T> context, Map<String, String> params) {
if (context.equals(SearchScript.CONTEXT) == false) {
throw new IllegalArgumentException(getType() + " scripts cannot be used for context [" + context.name + "]");
}

// we use the script "source" as the script identifier
if (!SCRIPT_SOURCE.equals(scriptSource)) {
throw new IllegalArgumentException("Unknown script name " + scriptSource);
}

SearchScript.Factory factory = VectorScoreScript.VectorScoreScriptFactory::new;
return context.factoryClazz.cast(factory);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,24 @@
*/
package com.liorkn.elasticsearch.plugin;

import com.liorkn.elasticsearch.service.VectorScoringScriptEngineService;
import com.liorkn.elasticsearch.engine.VectorScoringScriptEngine;

import java.util.Collection;

import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.plugins.ScriptPlugin;
import org.elasticsearch.script.ScriptEngineService;

import org.elasticsearch.script.ScriptContext;
import org.elasticsearch.script.ScriptEngine;
/**
* This class is instantiated when Elasticsearch loads the plugin for the
* first time. If you change the name of this plugin, make sure to update
* src/main/resources/es-plugin.properties file that points to this class.
*/
public final class VectorScoringPlugin extends Plugin implements ScriptPlugin {

public final ScriptEngineService getScriptEngineService(Settings settings) {
return new VectorScoringScriptEngineService(settings);
@Override
public ScriptEngine getScriptEngine(Settings settings, Collection<ScriptContext<?>> contexts) {
return new VectorScoringScriptEngine();
}


}
Loading