Skip to content

Commit f518a3b

Browse files
committed
Search: Updated indexer to handle non-breaking-spaces
Related to #5640
1 parent 0208f06 commit f518a3b

File tree

2 files changed

+13
-1
lines changed

2 files changed

+13
-1
lines changed

app/Search/SearchIndex.php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,9 @@ protected function generateTermScoreMapFromHtml(string $html): array
160160
/** @var DOMNode $child */
161161
foreach ($doc->getBodyChildren() as $child) {
162162
$nodeName = $child->nodeName;
163-
$termCounts = $this->textToTermCountMap(trim($child->textContent));
163+
$text = trim($child->textContent);
164+
$text = str_replace("\u{00A0}", ' ', $text);
165+
$termCounts = $this->textToTermCountMap($text);
164166
foreach ($termCounts as $term => $count) {
165167
$scoreChange = $count * ($elementScoreAdjustmentMap[$nodeName] ?? 1);
166168
$scoresByTerm[$term] = ($scoresByTerm[$term] ?? 0) + $scoreChange;

tests/Search/SearchIndexingTest.php

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,4 +106,14 @@ public function test_terms_containing_punctuation_within_retain_original_form_an
106106
$this->assertNull($scoreByTerm->get($term), "Failed asserting that \"$term\" is not indexed");
107107
}
108108
}
109+
110+
public function test_non_breaking_spaces_handled_as_spaces()
111+
{
112+
$page = $this->entities->newPage(['html' => '<p>a&nbsp;tigerbadger is a dangerous&nbsp;animal</p>']);
113+
114+
$scoreByTerm = $page->searchTerms()->pluck('score', 'term');
115+
$this->assertNotNull($scoreByTerm->get('tigerbadger'));
116+
$this->assertNotNull($scoreByTerm->get('dangerous'));
117+
$this->assertNotNull($scoreByTerm->get('animal'));
118+
}
109119
}

0 commit comments

Comments
 (0)