From 490e935381b1dc67a8c967c9dec1ff9e7e23bf35 Mon Sep 17 00:00:00 2001 From: Albert Weichselbraun Date: Tue, 11 Oct 2022 11:12:34 +0200 Subject: [PATCH 1/3] chg: reformalized word constraints. --- .../stringsearcher/StringSearcherConfig.java | 31 ++++++---- .../neosearch/stringsearcher/trie/Trie.java | 60 +++++-------------- 2 files changed, 34 insertions(+), 57 deletions(-) diff --git a/src/main/java/org/neosearch/stringsearcher/StringSearcherConfig.java b/src/main/java/org/neosearch/stringsearcher/StringSearcherConfig.java index 2ec5874..51c6b1b 100644 --- a/src/main/java/org/neosearch/stringsearcher/StringSearcherConfig.java +++ b/src/main/java/org/neosearch/stringsearcher/StringSearcherConfig.java @@ -1,5 +1,7 @@ package org.neosearch.stringsearcher; +import java.util.function.Predicate; + /** * Configures options for matching strings. * @@ -11,12 +13,10 @@ public class StringSearcherConfig { private boolean allowOverlaps = true; - private boolean onlyWholeWords = false; - - private boolean onlyWholeWordsWhiteSpaceSeparated = false; - private boolean stopOnHit = false; + private Predicate isInWordCharacter = null; + /** * Returns true if the matching should be case insensitive. */ @@ -42,7 +42,8 @@ public boolean isStopOnHit() { /** * Configures it he StringSearcher should stop on hit. - * @param stopOnHit true, if the StringSearch should stop on hit. False + * + * @param stopOnHit true, if the StringSearch should stop on hit. False */ public void setStopOnHit(boolean stopOnHit) { this.stopOnHit = stopOnHit; @@ -56,20 +57,24 @@ public void setAllowOverlaps(boolean allowOverlaps) { this.allowOverlaps = allowOverlaps; } - public boolean isOnlyWholeWords() { - return onlyWholeWords; + public void setOnlyWholeWords(boolean onlyWholeWords) { + this.isInWordCharacter = onlyWholeWords ? ch -> Character.isAlphabetic(ch) : null; } - public void setOnlyWholeWords(boolean onlyWholeWords) { - this.onlyWholeWords = onlyWholeWords; + public void setOnlyWholeWordsWhiteSpaceSeparated(boolean onlyWholeWordsWhiteSpaceSeparated) { + this.isInWordCharacter = onlyWholeWordsWhiteSpaceSeparated ? ch -> !Character.isWhitespace(ch) : null; } - public boolean isOnlyWholeWordsWhiteSpaceSeparated() { - return onlyWholeWordsWhiteSpaceSeparated; + public void setIsInWordCharacter(Predicate isInWordCharacter) { + this.isInWordCharacter = isInWordCharacter; } - public void setOnlyWholeWordsWhiteSpaceSeparated(boolean onlyWholeWordsWhiteSpaceSeparated) { - this.onlyWholeWordsWhiteSpaceSeparated = onlyWholeWordsWhiteSpaceSeparated; + public Predicate isInWordCharacter() { + return this.isInWordCharacter; + } + + public boolean isOnlyWholeWords() { + return isInWordCharacter != null; } } diff --git a/src/main/java/org/neosearch/stringsearcher/trie/Trie.java b/src/main/java/org/neosearch/stringsearcher/trie/Trie.java index feb041d..845e302 100644 --- a/src/main/java/org/neosearch/stringsearcher/trie/Trie.java +++ b/src/main/java/org/neosearch/stringsearcher/trie/Trie.java @@ -1,13 +1,10 @@ package org.neosearch.stringsearcher.trie; -import static java.lang.Character.isWhitespace; - import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Queue; - import org.neosearch.stringsearcher.Emit; import org.neosearch.stringsearcher.EmitHandler; import org.neosearch.stringsearcher.FragmentToken; @@ -24,12 +21,11 @@ import org.neosearch.stringsearcher.trie.util.ListElementRemoval.RemoveElementPredicate; /** - * A trie implementation, based on the Aho-Corasick white paper, Bell - * technologies: http://cr.yp.to/bib/1975/aho.pdf + * A trie implementation, based on the Aho-Corasick white paper, Bell technologies: + * http://cr.yp.to/bib/1975/aho.pdf *

* - * The payload trie adds the possibility to specify emitted payloads for each - * added keyword. + * The payload trie adds the possibility to specify emitted payloads for each added keyword. * * @author Daniel Beck * @param The type of the supplied of the payload @@ -49,7 +45,7 @@ public Trie(final StringSearcherConfig trieConfig) { * Used by the builder to add a text search keyword with a emit payload. * * @param keyword The search term to add to the list of search terms. - * @param emit the payload to emit for this search term. + * @param emit the payload to emit for this search term. * @throws NullPointerException if the keyword is null. */ public void addSearchString(String keyword, T emit) { @@ -124,7 +120,8 @@ public Collection> tokenize(final String text) { } private Token createFragment(final Emit emit, final String text, final int lastCollectedPosition) { - return new FragmentToken(text.substring(lastCollectedPosition + 1, emit == null ? text.length() : emit.getStart())); + return new FragmentToken( + text.substring(lastCollectedPosition + 1, emit == null ? text.length() : emit.getStart())); } private Token createMatch(Emit emit, String text) { @@ -142,10 +139,9 @@ public Collection> parseText(final CharSequence text) { } /** - * Tokenizes the specified text by using a custom EmitHandler and returns the - * emitted outputs. + * Tokenizes the specified text by using a custom EmitHandler and returns the emitted outputs. * - * @param text The character sequence to tokenize. + * @param text The character sequence to tokenize. * @param emitHandler The emit handler that will be used to parse the text. * @return A collection of emits. */ @@ -155,14 +151,9 @@ public Collection> parseText(final CharSequence text, final StatefulEmit final List> collectedEmits = emitHandler.getEmits(); - if (trieConfig.isOnlyWholeWords()) { + if (trieConfig.isInWordCharacter() != null) { removePartialMatches(text, collectedEmits); } - - if (trieConfig.isOnlyWholeWordsWhiteSpaceSeparated()) { - removePartialMatchesWhiteSpaceSeparated(text, collectedEmits); - } - if (!trieConfig.isAllowOverlaps()) { IntervalTree intervalTree = new IntervalTree((List) (List) collectedEmits); intervalTree.removeOverlaps((List) (List) collectedEmits); @@ -172,22 +163,19 @@ public Collection> parseText(final CharSequence text, final StatefulEmit } /** - * Returns true if the text contains contains one of the search terms. Else, - * returns false. + * Returns true if the text contains contains one of the search terms. Else, returns false. * * @param text Specified text. - * @return true if the text contains one of the search terms. Else, returns - * false. + * @return true if the text contains one of the search terms. Else, returns false. */ public boolean containsMatch(final CharSequence text) { return firstMatch(text) != null; } /** - * Tokenizes the specified text by using a custom EmitHandler and returns the - * emitted outputs. + * Tokenizes the specified text by using a custom EmitHandler and returns the emitted outputs. * - * @param text The character sequence to tokenize. + * @param text The character sequence to tokenize. * @param emitHandler The emit handler that will be used to parse the text. */ @@ -258,8 +246,9 @@ public Emit firstMatch(final CharSequence text) { } private boolean isPartialMatch(final CharSequence searchText, final Emit emit) { - return (emit.getStart() != 0 && Character.isAlphabetic(searchText.charAt(emit.getStart() - 1))) - || (emit.getEnd() + 1 != searchText.length() && Character.isAlphabetic(searchText.charAt(emit.getEnd() + 1))); + return (emit.getStart() != 0 && trieConfig.isInWordCharacter().test(searchText.charAt(emit.getStart() - 1))) + || (emit.getEnd() + 1 != searchText.length() + && trieConfig.isInWordCharacter().test(searchText.charAt(emit.getEnd() + 1))); } private void removePartialMatches(final CharSequence searchText, final List> collectedEmits) { @@ -276,23 +265,6 @@ public boolean remove(Emit emit) { ListElementRemoval.removeIf(collectedEmits, predicate); } - private void removePartialMatchesWhiteSpaceSeparated(final CharSequence searchText, final List> collectedEmits) { - final long size = searchText.length(); - final List> removeEmits = new ArrayList<>(); - - for (final Emit emit : collectedEmits) { - if ((emit.getStart() == 0 || isWhitespace(searchText.charAt(emit.getStart() - 1))) - && (emit.getEnd() + 1 == size || isWhitespace(searchText.charAt(emit.getEnd() + 1)))) { - continue; - } - removeEmits.add(emit); - } - - for (final Emit removeEmit : removeEmits) { - collectedEmits.remove(removeEmit); - } - } - private State getState(State currentState, final Character character) { State newCurrentState = currentState.nextState(character); From 477bad0af05515e0a1bf2459d729a676e5bd5525 Mon Sep 17 00:00:00 2001 From: Albert Weichselbraun Date: Tue, 11 Oct 2022 13:19:55 +0200 Subject: [PATCH 2/3] add: support for custom String boundaries. --- .../SimpleStringSearcherBuilder.java | 15 ++++- .../stringsearcher/StringSearcherBuilder.java | 15 ++++- .../stringsearcher/StringSearcherConfig.java | 3 +- .../neosearch/stringsearcher/trie/Trie.java | 57 ++++++++++------- .../stringsearcher/StringBoundaryTest.java | 61 +++++++++++++++++++ 5 files changed, 126 insertions(+), 25 deletions(-) create mode 100644 src/test/java/org/neosearch/stringsearcher/StringBoundaryTest.java diff --git a/src/main/java/org/neosearch/stringsearcher/SimpleStringSearcherBuilder.java b/src/main/java/org/neosearch/stringsearcher/SimpleStringSearcherBuilder.java index 2309551..fb67d58 100644 --- a/src/main/java/org/neosearch/stringsearcher/SimpleStringSearcherBuilder.java +++ b/src/main/java/org/neosearch/stringsearcher/SimpleStringSearcherBuilder.java @@ -1,6 +1,7 @@ package org.neosearch.stringsearcher; import java.util.Collection; +import java.util.function.Predicate; /** * Builder class to create a StringMatcher instance. Several algorithms can be @@ -115,6 +116,18 @@ public SimpleStringSearcherBuilder stopOnHit() { return this; } + /** + * Configure the Trie to match keywords based on the given predicate which + * returns true for all characters that are considered in-word characters. + * + * @return This builder. + */ + public SimpleStringSearcherBuilder setIsInWordCharacter( + Predicate isInWordCharacter) { + this.stringSearcherBuilder.setInWordCharacters(isInWordCharacter); + return this; + } + /** * Configure the PayloadTrie based on the builder settings. * @@ -123,4 +136,4 @@ public SimpleStringSearcherBuilder stopOnHit() { public StringSearcher build() { return this.stringSearcherBuilder.build(); } -} \ No newline at end of file +} diff --git a/src/main/java/org/neosearch/stringsearcher/StringSearcherBuilder.java b/src/main/java/org/neosearch/stringsearcher/StringSearcherBuilder.java index cae89bc..e2e5f91 100644 --- a/src/main/java/org/neosearch/stringsearcher/StringSearcherBuilder.java +++ b/src/main/java/org/neosearch/stringsearcher/StringSearcherBuilder.java @@ -5,7 +5,7 @@ import java.util.LinkedList; import java.util.Map.Entry; import java.util.Queue; - +import java.util.function.Predicate; import org.neosearch.stringsearcher.trie.Trie; /** @@ -167,6 +167,17 @@ public StringSearcherBuilder onlyWholeWordsWhiteSpaceSeparated() { return this; } + /** + * Configure the Trie to match whole keywords based on the given predicate which + * returns true for all characters that are considered in-word characters. + * + * @return This builder. + */ + public StringSearcherBuilder setInWordCharacters(Predicate isInWordCharacter) { + this.config.setIsInWordCharacter(isInWordCharacter); + return this; + } + /** * Configure the Trie to stop after the first keyword is found in the text. * @@ -193,4 +204,4 @@ public StringSearcher build() { } return null; } -} \ No newline at end of file +} diff --git a/src/main/java/org/neosearch/stringsearcher/StringSearcherConfig.java b/src/main/java/org/neosearch/stringsearcher/StringSearcherConfig.java index 51c6b1b..9be4b9d 100644 --- a/src/main/java/org/neosearch/stringsearcher/StringSearcherConfig.java +++ b/src/main/java/org/neosearch/stringsearcher/StringSearcherConfig.java @@ -62,7 +62,8 @@ public void setOnlyWholeWords(boolean onlyWholeWords) { } public void setOnlyWholeWordsWhiteSpaceSeparated(boolean onlyWholeWordsWhiteSpaceSeparated) { - this.isInWordCharacter = onlyWholeWordsWhiteSpaceSeparated ? ch -> !Character.isWhitespace(ch) : null; + this.isInWordCharacter = + onlyWholeWordsWhiteSpaceSeparated ? ch -> !Character.isWhitespace(ch) : null; } public void setIsInWordCharacter(Predicate isInWordCharacter) { diff --git a/src/main/java/org/neosearch/stringsearcher/trie/Trie.java b/src/main/java/org/neosearch/stringsearcher/trie/Trie.java index 845e302..0dcfe6d 100644 --- a/src/main/java/org/neosearch/stringsearcher/trie/Trie.java +++ b/src/main/java/org/neosearch/stringsearcher/trie/Trie.java @@ -21,11 +21,12 @@ import org.neosearch.stringsearcher.trie.util.ListElementRemoval.RemoveElementPredicate; /** - * A trie implementation, based on the Aho-Corasick white paper, Bell technologies: - * http://cr.yp.to/bib/1975/aho.pdf + * A trie implementation, based on the Aho-Corasick white paper, Bell + * technologies: http://cr.yp.to/bib/1975/aho.pdf *

* - * The payload trie adds the possibility to specify emitted payloads for each added keyword. + * The payload trie adds the possibility to specify emitted payloads for each + * added keyword. * * @author Daniel Beck * @param The type of the supplied of the payload @@ -119,9 +120,10 @@ public Collection> tokenize(final String text) { return tokens; } - private Token createFragment(final Emit emit, final String text, final int lastCollectedPosition) { - return new FragmentToken( - text.substring(lastCollectedPosition + 1, emit == null ? text.length() : emit.getStart())); + private Token createFragment(final Emit emit, final String text, + final int lastCollectedPosition) { + return new FragmentToken(text.substring(lastCollectedPosition + 1, + emit == null ? text.length() : emit.getStart())); } private Token createMatch(Emit emit, String text) { @@ -139,14 +141,16 @@ public Collection> parseText(final CharSequence text) { } /** - * Tokenizes the specified text by using a custom EmitHandler and returns the emitted outputs. + * Tokenizes the specified text by using a custom EmitHandler and returns the + * emitted outputs. * * @param text The character sequence to tokenize. * @param emitHandler The emit handler that will be used to parse the text. * @return A collection of emits. */ @SuppressWarnings("unchecked") - public Collection> parseText(final CharSequence text, final StatefulEmitHandler emitHandler) { + public Collection> parseText(final CharSequence text, + final StatefulEmitHandler emitHandler) { parseText(text, (EmitHandler) emitHandler); final List> collectedEmits = emitHandler.getEmits(); @@ -155,7 +159,8 @@ public Collection> parseText(final CharSequence text, final StatefulEmit removePartialMatches(text, collectedEmits); } if (!trieConfig.isAllowOverlaps()) { - IntervalTree intervalTree = new IntervalTree((List) (List) collectedEmits); + IntervalTree intervalTree = + new IntervalTree((List) (List) collectedEmits); intervalTree.removeOverlaps((List) (List) collectedEmits); } @@ -163,17 +168,20 @@ public Collection> parseText(final CharSequence text, final StatefulEmit } /** - * Returns true if the text contains contains one of the search terms. Else, returns false. + * Returns true if the text contains contains one of the search terms. Else, + * returns false. * * @param text Specified text. - * @return true if the text contains one of the search terms. Else, returns false. + * @return true if the text contains one of the search terms. Else, returns + * false. */ public boolean containsMatch(final CharSequence text) { return firstMatch(text) != null; } /** - * Tokenizes the specified text by using a custom EmitHandler and returns the emitted outputs. + * Tokenizes the specified text by using a custom EmitHandler and returns the + * emitted outputs. * * @param text The character sequence to tokenize. * @param emitHandler The emit handler that will be used to parse the text. @@ -228,8 +236,9 @@ public Emit firstMatch(final CharSequence text) { if (payloads != null && !payloads.isEmpty()) { for (final Payload payload : payloads) { - final Emit emit = new Emit<>(position - payload.getKeyword().length() + 1, position, - payload.getKeyword(), payload.getData()); + final Emit emit = + new Emit<>(position - payload.getKeyword().length() + 1, position, + payload.getKeyword(), payload.getData()); if (trieConfig.isOnlyWholeWords()) { if (!isPartialMatch(text, emit)) { return emit; @@ -246,12 +255,14 @@ public Emit firstMatch(final CharSequence text) { } private boolean isPartialMatch(final CharSequence searchText, final Emit emit) { - return (emit.getStart() != 0 && trieConfig.isInWordCharacter().test(searchText.charAt(emit.getStart() - 1))) - || (emit.getEnd() + 1 != searchText.length() - && trieConfig.isInWordCharacter().test(searchText.charAt(emit.getEnd() + 1))); + return (emit.getStart() != 0 + && trieConfig.isInWordCharacter().test(searchText.charAt(emit.getStart() - 1))) + || (emit.getEnd() + 1 != searchText.length() && trieConfig.isInWordCharacter() + .test(searchText.charAt(emit.getEnd() + 1))); } - private void removePartialMatches(final CharSequence searchText, final List> collectedEmits) { + private void removePartialMatches(final CharSequence searchText, + final List> collectedEmits) { final RemoveElementPredicate> predicate = new RemoveElementPredicate>() { @@ -307,15 +318,19 @@ public Trie build() { return this; } - private boolean storeEmits(final int position, final State currentState, final EmitHandler emitHandler) { + private boolean storeEmits(final int position, final State currentState, + final EmitHandler emitHandler) { boolean emitted = false; final Collection> payloads = currentState.emit(); // TODO: The check for empty might be superfluous. if (payloads != null && !payloads.isEmpty()) { for (final Payload payload : payloads) { - emitted = emitHandler.emit(new Emit(position - payload.getKeyword().length() + 1, position, - payload.getKeyword(), payload.getData())) || emitted; + emitted = + emitHandler + .emit(new Emit(position - payload.getKeyword().length() + 1, + position, payload.getKeyword(), payload.getData())) + || emitted; if (emitted && trieConfig.isStopOnHit()) { break; diff --git a/src/test/java/org/neosearch/stringsearcher/StringBoundaryTest.java b/src/test/java/org/neosearch/stringsearcher/StringBoundaryTest.java new file mode 100644 index 0000000..dbf2879 --- /dev/null +++ b/src/test/java/org/neosearch/stringsearcher/StringBoundaryTest.java @@ -0,0 +1,61 @@ +package org.neosearch.stringsearcher; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import java.util.Iterator; +import java.util.function.Predicate; +import org.junit.Test; + +public class StringBoundaryTest { + + private final static Predicate IN_WORD_CHARACTERS = + ch -> Character.isAlphabetic(ch) || Character.isDigit(ch) || ch == '-' || ch == '_'; + + @Test + public void testWordBoundariesForNumbers() { + final String text = "Plida C2 / TELC C2 C3 and the programming language C."; + + StringSearcher searcher = StringSearcher.builder().addSearchString("C") + .addSearchString("C2").setIsInWordCharacter(IN_WORD_CHARACTERS).build(); + Iterator resultIterator = searcher.parseText(text).iterator(); + checkEmit(resultIterator.next(), 6, 7, "C2"); + checkEmit(resultIterator.next(), 16, 17, "C2"); + checkEmit(resultIterator.next(), 51, 51, "C"); + assertFalse("The iterator shouldn't have found more elements", resultIterator.hasNext()); + } + + @Test + public void testWordBoundariesWithPunctuation() { + StringSearcher searcher = + StringSearcher.builder().addSearchString("MySQL").addSearchString("MariaDB") + .addSearchString("Database").addSearchString("Database Systems") + .ignoreOverlaps().setIsInWordCharacter(IN_WORD_CHARACTERS).build(); + Iterator resultIterator = + searcher.parseText("Database Systems: MariaDB;MySQL").iterator(); + checkEmit(resultIterator.next(), 0, 15, "Database Systems"); + checkEmit(resultIterator.next(), 18, 24, "MariaDB"); + checkEmit(resultIterator.next(), 26, 30, "MySQL"); + assertFalse("The iterator shouldn't have found more elements", resultIterator.hasNext()); + } + + @Test + public void testWordsWithSpacesAndHyphens() { + StringSearcher searcher = StringSearcher.builder().addSearchString("ER-Models") + .addSearchString("Database").addSearchString("Database Systems").ignoreOverlaps() + .setIsInWordCharacter(IN_WORD_CHARACTERS).build(); + Iterator resultIterator = + searcher.parseText("Knowledge of ER-Models and Database Systems:-)").iterator(); + checkEmit(resultIterator.next(), 13, 21, "ER-Models"); + checkEmit(resultIterator.next(), 27, 42, "Database Systems"); + assertFalse("The iterator shouldn't have found more elements", resultIterator.hasNext()); + } + + private void checkEmit(Emit next, int expectedStart, int expectedEnd, String expectedKeyword) { + assertEquals("Start of emit should have been " + expectedStart, expectedStart, + next.getStart()); + assertEquals("End of emit should have been " + expectedEnd, expectedEnd, next.getEnd()); + assertEquals("Keyword of emit shoud be " + expectedKeyword, expectedKeyword, + next.getSearchString()); + } + +} From fb46da76c9ff5162c0dafd2957086e96d3e5546c Mon Sep 17 00:00:00 2001 From: Albert Weichselbraun Date: Tue, 11 Oct 2022 13:33:18 +0200 Subject: [PATCH 3/3] chg: adjusted code style to reduce the number of reported changes. --- .../stringsearcher/StringSearcherConfig.java | 1 - .../neosearch/stringsearcher/trie/Trie.java | 42 +++++++------------ 2 files changed, 15 insertions(+), 28 deletions(-) diff --git a/src/main/java/org/neosearch/stringsearcher/StringSearcherConfig.java b/src/main/java/org/neosearch/stringsearcher/StringSearcherConfig.java index 9be4b9d..c76777e 100644 --- a/src/main/java/org/neosearch/stringsearcher/StringSearcherConfig.java +++ b/src/main/java/org/neosearch/stringsearcher/StringSearcherConfig.java @@ -42,7 +42,6 @@ public boolean isStopOnHit() { /** * Configures it he StringSearcher should stop on hit. - * * @param stopOnHit true, if the StringSearch should stop on hit. False */ public void setStopOnHit(boolean stopOnHit) { diff --git a/src/main/java/org/neosearch/stringsearcher/trie/Trie.java b/src/main/java/org/neosearch/stringsearcher/trie/Trie.java index 0dcfe6d..527ba2c 100644 --- a/src/main/java/org/neosearch/stringsearcher/trie/Trie.java +++ b/src/main/java/org/neosearch/stringsearcher/trie/Trie.java @@ -46,7 +46,7 @@ public Trie(final StringSearcherConfig trieConfig) { * Used by the builder to add a text search keyword with a emit payload. * * @param keyword The search term to add to the list of search terms. - * @param emit the payload to emit for this search term. + * @param emit the payload to emit for this search term. * @throws NullPointerException if the keyword is null. */ public void addSearchString(String keyword, T emit) { @@ -120,10 +120,8 @@ public Collection> tokenize(final String text) { return tokens; } - private Token createFragment(final Emit emit, final String text, - final int lastCollectedPosition) { - return new FragmentToken(text.substring(lastCollectedPosition + 1, - emit == null ? text.length() : emit.getStart())); + private Token createFragment(final Emit emit, final String text, final int lastCollectedPosition) { + return new FragmentToken(text.substring(lastCollectedPosition + 1, emit == null ? text.length() : emit.getStart())); } private Token createMatch(Emit emit, String text) { @@ -144,13 +142,12 @@ public Collection> parseText(final CharSequence text) { * Tokenizes the specified text by using a custom EmitHandler and returns the * emitted outputs. * - * @param text The character sequence to tokenize. + * @param text The character sequence to tokenize. * @param emitHandler The emit handler that will be used to parse the text. * @return A collection of emits. */ @SuppressWarnings("unchecked") - public Collection> parseText(final CharSequence text, - final StatefulEmitHandler emitHandler) { + public Collection> parseText(final CharSequence text, final StatefulEmitHandler emitHandler) { parseText(text, (EmitHandler) emitHandler); final List> collectedEmits = emitHandler.getEmits(); @@ -159,8 +156,7 @@ public Collection> parseText(final CharSequence text, removePartialMatches(text, collectedEmits); } if (!trieConfig.isAllowOverlaps()) { - IntervalTree intervalTree = - new IntervalTree((List) (List) collectedEmits); + IntervalTree intervalTree = new IntervalTree((List) (List) collectedEmits); intervalTree.removeOverlaps((List) (List) collectedEmits); } @@ -183,7 +179,7 @@ public boolean containsMatch(final CharSequence text) { * Tokenizes the specified text by using a custom EmitHandler and returns the * emitted outputs. * - * @param text The character sequence to tokenize. + * @param text The character sequence to tokenize. * @param emitHandler The emit handler that will be used to parse the text. */ @@ -236,9 +232,8 @@ public Emit firstMatch(final CharSequence text) { if (payloads != null && !payloads.isEmpty()) { for (final Payload payload : payloads) { - final Emit emit = - new Emit<>(position - payload.getKeyword().length() + 1, position, - payload.getKeyword(), payload.getData()); + final Emit emit = new Emit<>(position - payload.getKeyword().length() + 1, position, + payload.getKeyword(), payload.getData()); if (trieConfig.isOnlyWholeWords()) { if (!isPartialMatch(text, emit)) { return emit; @@ -255,14 +250,11 @@ public Emit firstMatch(final CharSequence text) { } private boolean isPartialMatch(final CharSequence searchText, final Emit emit) { - return (emit.getStart() != 0 - && trieConfig.isInWordCharacter().test(searchText.charAt(emit.getStart() - 1))) - || (emit.getEnd() + 1 != searchText.length() && trieConfig.isInWordCharacter() - .test(searchText.charAt(emit.getEnd() + 1))); + return (emit.getStart() != 0 && trieConfig.isInWordCharacter().test(searchText.charAt(emit.getStart() - 1))) + || (emit.getEnd() + 1 != searchText.length() && trieConfig.isInWordCharacter().test(searchText.charAt(emit.getEnd() + 1))); } - private void removePartialMatches(final CharSequence searchText, - final List> collectedEmits) { + private void removePartialMatches(final CharSequence searchText, final List> collectedEmits) { final RemoveElementPredicate> predicate = new RemoveElementPredicate>() { @@ -318,19 +310,15 @@ public Trie build() { return this; } - private boolean storeEmits(final int position, final State currentState, - final EmitHandler emitHandler) { + private boolean storeEmits(final int position, final State currentState, final EmitHandler emitHandler) { boolean emitted = false; final Collection> payloads = currentState.emit(); // TODO: The check for empty might be superfluous. if (payloads != null && !payloads.isEmpty()) { for (final Payload payload : payloads) { - emitted = - emitHandler - .emit(new Emit(position - payload.getKeyword().length() + 1, - position, payload.getKeyword(), payload.getData())) - || emitted; + emitted = emitHandler.emit(new Emit(position - payload.getKeyword().length() + 1, position, + payload.getKeyword(), payload.getData())) || emitted; if (emitted && trieConfig.isStopOnHit()) { break;