From 00070b18fd130a59efd666880e6b6d44a7605fc0 Mon Sep 17 00:00:00 2001 From: Victor Tao Date: Mon, 15 Feb 2021 00:29:11 -0500 Subject: [PATCH 1/9] reverse indxex search init --- src/assets/ts/datastore.ts | 77 ++++++++++++++++++++++++++++++++++++++ src/assets/ts/document.ts | 22 ++++++++--- src/assets/ts/searcher.ts | 59 +++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+), 6 deletions(-) create mode 100644 src/assets/ts/searcher.ts diff --git a/src/assets/ts/datastore.ts b/src/assets/ts/datastore.ts index 201f2bf5..545979fe 100644 --- a/src/assets/ts/datastore.ts +++ b/src/assets/ts/datastore.ts @@ -204,6 +204,83 @@ const decodeParents = (parents: number | Array): Array => { return parents; }; +export class SearchStore { + private prefix: string; + private docname: string; + private backend: DataBackend; + private cache: {[key: string]: any} = {}; + private use_cache: boolean = true; + + constructor(backend: DataBackend, docname = '') { + this.backend = backend; + this.docname = docname; + this.prefix = `${docname}save`; + } + + private async _get( + key: string, + default_value: T, + decode: (value: any) => T = fn_utils.id + ): Promise { + if (simulateDelay) { await timeout(simulateDelay * Math.random()); } + + if (this.use_cache) { + if (key in this.cache) { + return this.cache[key]; + } + } + let value: any = await this.backend.get(key); + try { + // need typeof check because of backwards compatibility plus stupidness like + // JSON.parse([106]) === 106 + if (typeof value === 'string') { + value = JSON.parse(value); + } + } catch (e) { /* do nothing - this should only happen for historical reasons */ } + let decodedValue: T; + if (value === null) { + decodedValue = default_value; + logger.debug('tried getting', key, 'defaulted to', decodedValue); + } else { + decodedValue = decode(value); + logger.debug('got from storage', key, decodedValue); + } + if (this.use_cache) { + this.cache[key] = decodedValue; + } + return decodedValue; + } + + private async _set( + key: string, value: any, encode: (value: any) => any = fn_utils.id + ): Promise { + if (simulateDelay) { await timeout(simulateDelay * Math.random()); } + + if (this.use_cache) { + this.cache[key] = value; + } + const encodedValue = encode(value); + logger.debug('setting to storage', key, encodedValue); + // NOTE: fire and forget + this.backend.set(key, JSON.stringify(encodedValue)).catch((err) => { + setTimeout(() => { throw err; }); + }); + } + + private _rowsKey_(token: string): string { + return `${this.prefix}:rows_${token}`; + } + + + // get last view (for page reload) + public async setRows(token: string, rows: Set) { + return this._set(this._rowsKey_(token), rows); + } + + public async getRows(token: string) { + return this._get(this._rowsKey_(token), new Set()); + } +} export class DocumentStore { private lastId: number | null; private prefix: string; diff --git a/src/assets/ts/document.ts b/src/assets/ts/document.ts index 18d7d4a5..1bf9edb6 100644 --- a/src/assets/ts/document.ts +++ b/src/assets/ts/document.ts @@ -7,11 +7,12 @@ import * as fn_utils from './utils/functional'; // import logger from './utils/logger'; import { isWhitespace } from './utils/text'; import Path from './path'; -import { DocumentStore } from './datastore'; +import { DocumentStore, SearchStore } from './datastore'; import { InMemory } from '../../shared/data_backend'; import { Row, Col, Char, Line, SerializedLine, SerializedBlock } from './types'; +import { Searcher } from './searcher'; type RowInfo = { readonly line: Line; @@ -210,13 +211,15 @@ export default class Document extends EventEmitter { public store: DocumentStore; public name: string; public root: Path; + private searcher: Searcher; - constructor(store: DocumentStore, name = '') { + constructor(store: DocumentStore, searchStore: SearchStore, name = '') { super(); this.cache = new DocumentCache(); this.store = store; this.name = name; this.root = Path.root(); + this.searcher = new Searcher(searchStore); return this; } @@ -746,9 +749,13 @@ export default class Document extends EventEmitter { const query_words = query.split(/\s/g).filter(x => x.length).map(canonicalize); - const paths = this.traverseSubtree(root); - for await (let path of paths) { - const text = await this.getText(path.row); + const possibleRows = await this.searcher.search(query_words); + if (possibleRows === null) { + return results; + } + const possibleRowsArr = Array.from(possibleRows); + for await (let row of possibleRowsArr) { + const text = await this.getText(row); const line = canonicalize(text); const matches: Array = []; if (_.every(query_words.map((word) => { @@ -759,7 +766,10 @@ export default class Document extends EventEmitter { } return true; }))) { - results.push({ path, matches }); + const path = await this.canonicalPath(row); + if (path) { + results.push({ path, matches }); + } } if (nresults > 0 && results.length === nresults) { break; diff --git a/src/assets/ts/searcher.ts b/src/assets/ts/searcher.ts new file mode 100644 index 00000000..b127ad8a --- /dev/null +++ b/src/assets/ts/searcher.ts @@ -0,0 +1,59 @@ +import * as _ from 'lodash'; +// import 'core-js/shim'; + +import { SearchStore } from './datastore'; +import { + Row, Chars +} from './types'; + +export class Searcher { + private searchStore: SearchStore; + private maxRowsStored: number; + constructor(searchStore: SearchStore) { + this.searchStore = searchStore; + this.maxRowsStored = 20000; + } + + public async update(row: Row, oldText: string, newText: string) { + // only updates changed words + const oldTokens = oldText.split(' '); + const newTokens = newText.split(' '); + const oldSet = new Set(oldTokens); + const newSet = new Set(newTokens); + return Promise.all(newTokens.map(async (token) => { + // add new tokens + if (!oldSet.has(token)) { + const rows = await this.searchStore.getRows(token); + rows.add(row); + return this.searchStore.setRows(token, rows); + } + }).concat(oldTokens.map(async (token) => { + // remove deleted tokens + if (!newSet.has(token)) { + const rows = await this.searchStore.getRows(token); + rows.delete(row); + return this.searchStore.setRows(token, rows); + } + }))); + } + + // returns a list of rows which could match the query. Returns null if too many results + public async search(queries: string[]): Promise | null> { + if (queries.length === 0) { + return new Set(); + } + let allRows = await Promise.all(queries.map(async (token) => { + return this.searchStore.getRows(token); + })); + + if (queries.length === 1) { + return allRows[0]; + } + + allRows = allRows.filter((rows) => (rows.size < this.maxRowsStored)); + if (allRows.length === 0) { + return null; + } + return allRows.reduce((a, b) => new Set(Array.from(a).filter(x => b.has(x)))); + } +} \ No newline at end of file From 1db48582a2ff7561ef58e9c340c388a7c9553f7f Mon Sep 17 00:00:00 2001 From: Victor Tao Date: Mon, 15 Feb 2021 12:40:35 -0500 Subject: [PATCH 2/9] can search by words --- src/assets/ts/app.tsx | 38 ++++++++++++++++----------- src/assets/ts/components/settings.tsx | 5 ++-- src/assets/ts/document.ts | 8 ++++-- src/assets/ts/searcher.ts | 1 + 4 files changed, 33 insertions(+), 19 deletions(-) diff --git a/src/assets/ts/app.tsx b/src/assets/ts/app.tsx index 5154111e..f73974f4 100644 --- a/src/assets/ts/app.tsx +++ b/src/assets/ts/app.tsx @@ -27,8 +27,8 @@ import { RegisterTypes } from './register'; import KeyEmitter from './keyEmitter'; import KeyHandler from './keyHandler'; import KeyMappings from './keyMappings'; -import { ClientStore, DocumentStore } from './datastore'; -import { SynchronousInMemory, InMemory } from '../../shared/data_backend'; +import { ClientStore, DocumentStore, SearchStore } from './datastore'; +import DataBackend, { SynchronousInMemory, InMemory } from '../../shared/data_backend'; import { BackendType, SynchronousLocalStorageBackend, LocalStorageBackend, FirebaseBackend, ClientSocketBackend @@ -97,9 +97,14 @@ $(document).ready(async () => { renderMain(); // fire and forget }; + type Stores = { + docStore: DocumentStore, + searchStore: SearchStore + } + const noLocalStorage = (typeof localStorage === 'undefined' || localStorage === null); let clientStore: ClientStore; - let docStore: DocumentStore; + let docStore: Stores; let backend_type: BackendType; let doc; @@ -120,11 +125,15 @@ $(document).ready(async () => { const config: Config = vimConfig; - function getLocalStore(): DocumentStore { - return new DocumentStore(new LocalStorageBackend(docname), docname); + function getStores(backend: DataBackend, docname = '') { + return { docStore: new DocumentStore(backend, docname), searchStore: new SearchStore(backend, docname) }; } - async function getFirebaseStore(): Promise { + function getLocalStore(): Stores { + return getStores(new LocalStorageBackend(docname), docname); + } + + async function getFirebaseStore(): Promise { const firebaseId = clientStore.getDocSetting('firebaseId'); const firebaseApiKey = clientStore.getDocSetting('firebaseApiKey'); const firebaseUserEmail = clientStore.getDocSetting('firebaseUserEmail'); @@ -137,14 +146,13 @@ $(document).ready(async () => { throw new Error('No firebase API key found'); } const fb_backend = new FirebaseBackend(docname, firebaseId, firebaseApiKey); - const dStore = new DocumentStore(fb_backend, docname); await fb_backend.init(firebaseUserEmail || '', firebaseUserPassword || ''); logger.info(`Successfully initialized firebase connection: ${firebaseId}`); - return dStore; + return getStores(fb_backend, docname); } - async function getSocketServerStore(): Promise { + async function getSocketServerStore(): Promise { let socketServerHost; let socketServerDocument; let socketServerPassword; @@ -164,7 +172,7 @@ $(document).ready(async () => { const socket_backend = new ClientSocketBackend(); // NOTE: we don't pass docname to DocumentStore since we want keys // to not have prefixes - const dStore = new DocumentStore(socket_backend); + const dStore = getStores(socket_backend); while (true) { try { await socket_backend.init( @@ -205,7 +213,7 @@ $(document).ready(async () => { backend_type = 'local'; } } else if (backend_type === 'inmemory') { - docStore = new DocumentStore(new InMemory()); + docStore = getStores(new InMemory()); } else if (backend_type === 'socketserver') { try { docStore = await getSocketServerStore(); @@ -229,10 +237,10 @@ $(document).ready(async () => { backend_type = 'local'; } - doc = new Document(docStore, docname); + doc = new Document(docStore.docStore, docStore.searchStore, docname); let to_load: any = null; - if ((await docStore.getChildren(Path.rootRow())).length === 0) { + if ((await docStore.docStore.getChildren(Path.rootRow())).length === 0) { to_load = config.getDefaultData(); } @@ -330,7 +338,7 @@ $(document).ready(async () => { // load plugins const pluginManager = new PluginsManager(session, config, keyBindings); - let enabledPlugins = await docStore.getSetting('enabledPlugins'); + let enabledPlugins = await docStore.docStore.getSetting('enabledPlugins'); if (typeof enabledPlugins.slice === 'undefined') { // for backwards compatibility enabledPlugins = Object.keys(enabledPlugins); } @@ -466,7 +474,7 @@ $(document).ready(async () => { pluginManager.on('status', renderMain); // fire and forget pluginManager.on('enabledPluginsChange', function(enabled) { - docStore.setSetting('enabledPlugins', enabled); + docStore.docStore.setSetting('enabledPlugins', enabled); renderMain(); // fire and forget }); diff --git a/src/assets/ts/components/settings.tsx b/src/assets/ts/components/settings.tsx index 1acd2850..d4feb803 100644 --- a/src/assets/ts/components/settings.tsx +++ b/src/assets/ts/components/settings.tsx @@ -9,7 +9,7 @@ import { MODES } from '../modes'; import Path from '../path'; import Document from '../document'; -import { DocumentStore, ClientStore } from '../datastore'; +import { DocumentStore, ClientStore, SearchStore } from '../datastore'; import { InMemory } from '../../../shared/data_backend'; import Session from '../session'; import Menu from '../menu'; @@ -82,7 +82,8 @@ export default class SettingsComponent extends React.Component { this.initial_theme = getCurrentTheme(props.session.clientStore); (async () => { - const preview_document = new Document(new DocumentStore(new InMemory())); + const backend = new InMemory(); + const preview_document = new Document(new DocumentStore(backend), new SearchStore(backend)); await preview_document.load([ { text: 'Preview document', children: [ { text: 'Breadcrumbs', children: [ diff --git a/src/assets/ts/document.ts b/src/assets/ts/document.ts index 1bf9edb6..7a261512 100644 --- a/src/assets/ts/document.ts +++ b/src/assets/ts/document.ts @@ -320,6 +320,8 @@ export default class Document extends EventEmitter { } public async setLine(row: Row, line: Line) { + const oldLine = await this.getText(row); + await this.searcher.update(row, oldLine, line.join('')); this.cache.setLine(row, line); await this.store.setLine(row, line); } @@ -734,7 +736,8 @@ export default class Document extends EventEmitter { yield* await helper(root); } - public async search(root: Path, query: string, options: SearchOptions = {}) { + public async search(_root: Path, query: string, options: SearchOptions = {}) { + // TODO: implement local search const { nresults = 10, case_sensitive = false } = options; const results: Array<{ path: Path, @@ -901,6 +904,7 @@ export default class Document extends EventEmitter { export class InMemoryDocument extends Document { constructor() { - super(new DocumentStore(new InMemory())); + const backend = new InMemory(); + super(new DocumentStore(backend), new SearchStore(backend)); } } diff --git a/src/assets/ts/searcher.ts b/src/assets/ts/searcher.ts index b127ad8a..6bdc47ab 100644 --- a/src/assets/ts/searcher.ts +++ b/src/assets/ts/searcher.ts @@ -2,6 +2,7 @@ import * as _ from 'lodash'; // import 'core-js/shim'; import { SearchStore } from './datastore'; +import { all } from './plugins'; import { Row, Chars } from './types'; From d6c138ae9b48eec8c00f1484d63114d7366776e6 Mon Sep 17 00:00:00 2001 From: Victor Tao Date: Mon, 15 Feb 2021 16:02:17 -0500 Subject: [PATCH 3/9] search bugfixes --- src/assets/ts/datastore.ts | 33 +++++++++++++++++++++++++++++---- src/assets/ts/document.ts | 12 ++++++++++++ src/assets/ts/searcher.ts | 18 ++++++++++++++---- 3 files changed, 55 insertions(+), 8 deletions(-) diff --git a/src/assets/ts/datastore.ts b/src/assets/ts/datastore.ts index 545979fe..86a0b9c2 100644 --- a/src/assets/ts/datastore.ts +++ b/src/assets/ts/datastore.ts @@ -267,18 +267,39 @@ export class SearchStore { }); } + private hash(token: string) { + // https://stackoverflow.com/questions/7616461/generate-a-hash-from-string-in-javascript + let hash = 0, i, chr; + for (i = 0; i < token.length; i++) { + chr = token.charCodeAt(i); + hash = ((hash << 5) - hash) + chr; + hash |= 0; // Convert to 32bit integer + } + return hash; + } + private _rowsKey_(token: string): string { - return `${this.prefix}:rows_${token}`; + return `${this.prefix}:rows_${this.hash(token)}`; } + private _lastRowKey_(): string { + return `${this.prefix}:lastRow`; + } - // get last view (for page reload) public async setRows(token: string, rows: Set) { - return this._set(this._rowsKey_(token), rows); + return this._set(this._rowsKey_(token), Array.from(rows)); + } + + public async setLastRow(last: number) { + return this._set(this._lastRowKey_(), last); } public async getRows(token: string) { - return this._get(this._rowsKey_(token), new Set()); + return new Set(await this._get(this._rowsKey_(token), new Array())); + } + + public async getLastRow() { + return this._get(this._lastRowKey_(), -1); } } export class DocumentStore { @@ -445,6 +466,10 @@ export class DocumentStore { return await this._get(this._pluginDataKey_(plugin, key), default_value); } + public async getLastIDKey() { + return await this._get(this._lastIDKey_(), 0); + } + // get next row ID // public so test case can override public async getId(): Promise { diff --git a/src/assets/ts/document.ts b/src/assets/ts/document.ts index 7a261512..1c7982a0 100644 --- a/src/assets/ts/document.ts +++ b/src/assets/ts/document.ts @@ -220,6 +220,8 @@ export default class Document extends EventEmitter { this.name = name; this.root = Path.root(); this.searcher = new Searcher(searchStore); + + this.initSearcher(); return this; } @@ -736,6 +738,16 @@ export default class Document extends EventEmitter { yield* await helper(root); } + private async initSearcher() { + const lastInserted = await this.searcher.searchStore.getLastRow(); + const lastRow = await this.store.getLastIDKey(); + for (let i = lastInserted + 1; i <= lastRow; i++) { + console.log(i, lastRow); + await this.searcher.update(i, '', await this.getText(i)); + await this.searcher.searchStore.setLastRow(i); + } + } + public async search(_root: Path, query: string, options: SearchOptions = {}) { // TODO: implement local search const { nresults = 10, case_sensitive = false } = options; diff --git a/src/assets/ts/searcher.ts b/src/assets/ts/searcher.ts index 6bdc47ab..47e74f2e 100644 --- a/src/assets/ts/searcher.ts +++ b/src/assets/ts/searcher.ts @@ -7,8 +7,12 @@ import { Row, Chars } from './types'; +// remove punctuation https://stackoverflow.com/questions/4328500/how-can-i-strip-all-punctuation-from-a-string-in-javascript-using-regex +const punctRE = /[\u2000-\u206F\u2E00-\u2E7F\\'!"#$%&()*+,\-.\/:;<=>?@\[\]^_`{|}~]/g; +const spaceRE = /\s+/g; + export class Searcher { - private searchStore: SearchStore; + public searchStore: SearchStore; private maxRowsStored: number; constructor(searchStore: SearchStore) { this.searchStore = searchStore; @@ -17,15 +21,20 @@ export class Searcher { public async update(row: Row, oldText: string, newText: string) { // only updates changed words - const oldTokens = oldText.split(' '); - const newTokens = newText.split(' '); + oldText = oldText.replace(punctRE, '').replace(spaceRE, ' '); + newText = newText.replace(punctRE, '').replace(spaceRE, ' '); + + const oldTokens = oldText.toLowerCase().split(' '); + const newTokens = newText.toLowerCase().split(' '); const oldSet = new Set(oldTokens); const newSet = new Set(newTokens); return Promise.all(newTokens.map(async (token) => { // add new tokens if (!oldSet.has(token)) { const rows = await this.searchStore.getRows(token); - rows.add(row); + if (rows.size < this.maxRowsStored) { + rows.add(row); + } return this.searchStore.setRows(token, rows); } }).concat(oldTokens.map(async (token) => { @@ -44,6 +53,7 @@ export class Searcher { return new Set(); } let allRows = await Promise.all(queries.map(async (token) => { + token = token.replace(punctRE, '').replace(spaceRE, ''); return this.searchStore.getRows(token); })); From dbcac48d69497a5df51d15e473f757f8d8edaa9c Mon Sep 17 00:00:00 2001 From: Victor Tao Date: Tue, 16 Feb 2021 21:12:39 -0500 Subject: [PATCH 4/9] fix order of updates --- src/assets/ts/document.ts | 2 +- src/assets/ts/searcher.ts | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/assets/ts/document.ts b/src/assets/ts/document.ts index 1c7982a0..c6ef281d 100644 --- a/src/assets/ts/document.ts +++ b/src/assets/ts/document.ts @@ -742,7 +742,7 @@ export default class Document extends EventEmitter { const lastInserted = await this.searcher.searchStore.getLastRow(); const lastRow = await this.store.getLastIDKey(); for (let i = lastInserted + 1; i <= lastRow; i++) { - console.log(i, lastRow); + console.log('inserting row', i, 'out of', lastRow); await this.searcher.update(i, '', await this.getText(i)); await this.searcher.searchStore.setLastRow(i); } diff --git a/src/assets/ts/searcher.ts b/src/assets/ts/searcher.ts index 47e74f2e..722b51a7 100644 --- a/src/assets/ts/searcher.ts +++ b/src/assets/ts/searcher.ts @@ -28,6 +28,14 @@ export class Searcher { const newTokens = newText.toLowerCase().split(' '); const oldSet = new Set(oldTokens); const newSet = new Set(newTokens); + await Promise.all(oldTokens.map(async (token) => { + // remove deleted tokens + if (!newSet.has(token)) { + const rows = await this.searchStore.getRows(token); + rows.delete(row); + return this.searchStore.setRows(token, rows); + } + })); return Promise.all(newTokens.map(async (token) => { // add new tokens if (!oldSet.has(token)) { @@ -37,14 +45,7 @@ export class Searcher { } return this.searchStore.setRows(token, rows); } - }).concat(oldTokens.map(async (token) => { - // remove deleted tokens - if (!newSet.has(token)) { - const rows = await this.searchStore.getRows(token); - rows.delete(row); - return this.searchStore.setRows(token, rows); - } - }))); + })); } // returns a list of rows which could match the query. Returns null if too many results From 89938d53ebaf707c7091286c6b0fa61f656fba44 Mon Sep 17 00:00:00 2001 From: Victor Tao Date: Thu, 25 Feb 2021 01:49:56 -0500 Subject: [PATCH 5/9] only update index on row change --- src/assets/ts/cursor.ts | 1 + src/assets/ts/document.ts | 16 +++++++++++----- src/assets/ts/searcher.ts | 24 +++++++++++++++++++++--- 3 files changed, 33 insertions(+), 8 deletions(-) diff --git a/src/assets/ts/cursor.ts b/src/assets/ts/cursor.ts index a7605429..0ac0c3e9 100644 --- a/src/assets/ts/cursor.ts +++ b/src/assets/ts/cursor.ts @@ -53,6 +53,7 @@ export default class Cursor extends EventEmitter { public async _setPath(path: Path) { await this.emitAsync('rowChange', this.path, path); + this.session.document.searcher.update(this.path.row); this.path = path; } diff --git a/src/assets/ts/document.ts b/src/assets/ts/document.ts index e923b251..f634d3bb 100644 --- a/src/assets/ts/document.ts +++ b/src/assets/ts/document.ts @@ -211,7 +211,7 @@ export default class Document extends EventEmitter { public store: DocumentStore; public name: string; public root: Path; - private searcher: Searcher; + public searcher: Searcher; constructor(store: DocumentStore, searchStore: SearchStore, name = '') { super(); @@ -323,7 +323,7 @@ export default class Document extends EventEmitter { public async setLine(row: Row, line: Line) { const oldLine = await this.getText(row); - await this.searcher.update(row, oldLine, line.join('')); + this.searcher.rowChange(row, oldLine, line.join('')); this.cache.setLine(row, line); await this.store.setLine(row, line); } @@ -743,12 +743,13 @@ export default class Document extends EventEmitter { const lastRow = await this.store.getLastIDKey(); for (let i = lastInserted + 1; i <= lastRow; i++) { console.log('inserting row', i, 'out of', lastRow); - await this.searcher.update(i, '', await this.getText(i)); + this.searcher.rowChange(i, '', await this.getText(i)); + await this.searcher.update(i); await this.searcher.searchStore.setLastRow(i); } } - public async search(_root: Path, query: string, options: SearchOptions = {}) { + public async search(root: Path, query: string, options: SearchOptions = {}) { // TODO: implement local search const { nresults = 10, case_sensitive = false } = options; const results: Array<{ @@ -773,6 +774,12 @@ export default class Document extends EventEmitter { const text = await this.getText(row); const line = canonicalize(text); const matches: Array = []; + const path = await this.canonicalPath(row); + + if (!root.isRoot() && (path === null || !root.isDescendant(path))) { // might not work with cloned rows + continue; + } + if (_.every(query_words.map((word) => { const index = line.indexOf(word); if (index === -1) { return false; } @@ -781,7 +788,6 @@ export default class Document extends EventEmitter { } return true; }))) { - const path = await this.canonicalPath(row); if (path) { results.push({ path, matches }); } diff --git a/src/assets/ts/searcher.ts b/src/assets/ts/searcher.ts index 722b51a7..93886aed 100644 --- a/src/assets/ts/searcher.ts +++ b/src/assets/ts/searcher.ts @@ -14,15 +14,33 @@ const spaceRE = /\s+/g; export class Searcher { public searchStore: SearchStore; private maxRowsStored: number; + private startText: {[row: number]: string}; + private endText: {[row: number]: string}; + constructor(searchStore: SearchStore) { this.searchStore = searchStore; this.maxRowsStored = 20000; + this.startText = {}; + this.endText = {}; + } + + public rowChange(row: Row, oldText: string, newText: string) { + if (!(row in this.startText)) { + this.startText[row] = oldText; + } + this.endText[row] = newText; } - public async update(row: Row, oldText: string, newText: string) { + public async update(row: Row) { + if (!(row in this.startText) || !(row in this.endText)) { + return; + } // only updates changed words - oldText = oldText.replace(punctRE, '').replace(spaceRE, ' '); - newText = newText.replace(punctRE, '').replace(spaceRE, ' '); + const oldText = this.startText[row].replace(punctRE, '').replace(spaceRE, ' '); + const newText = this.endText[row].replace(punctRE, '').replace(spaceRE, ' '); + + delete this.startText[row]; + delete this.endText[row]; const oldTokens = oldText.toLowerCase().split(' '); const newTokens = newText.toLowerCase().split(' '); From 1290410a9a6ef83ff417c57a2f9b3cf692a444cf Mon Sep 17 00:00:00 2001 From: Victor Tao Date: Thu, 25 Feb 2021 12:35:53 -0500 Subject: [PATCH 6/9] load search results in batches --- src/assets/ts/document.ts | 48 ++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/src/assets/ts/document.ts b/src/assets/ts/document.ts index f634d3bb..ead30087 100644 --- a/src/assets/ts/document.ts +++ b/src/assets/ts/document.ts @@ -750,7 +750,6 @@ export default class Document extends EventEmitter { } public async search(root: Path, query: string, options: SearchOptions = {}) { - // TODO: implement local search const { nresults = 10, case_sensitive = false } = options; const results: Array<{ path: Path, @@ -770,31 +769,34 @@ export default class Document extends EventEmitter { return results; } const possibleRowsArr = Array.from(possibleRows); - for await (let row of possibleRowsArr) { - const text = await this.getText(row); - const line = canonicalize(text); - const matches: Array = []; - const path = await this.canonicalPath(row); - - if (!root.isRoot() && (path === null || !root.isDescendant(path))) { // might not work with cloned rows - continue; + const chunkedRows = _.chunk(possibleRowsArr, 15); + for await (let chunk of chunkedRows) { + if (nresults > 0 && results.length >= nresults) { + break; } - - if (_.every(query_words.map((word) => { - const index = line.indexOf(word); - if (index === -1) { return false; } - for (let j = index; j < index + word.length; j++) { - matches.push(j); + await Promise.all(chunk.map(async (row) => { + const text = await this.getText(row); + const line = canonicalize(text); + const matches: Array = []; + const path = await this.canonicalPath(row); + + if (!root.isRoot() && (path === null || !root.isDescendant(path))) { // might not work with cloned rows + return; } - return true; - }))) { - if (path) { - results.push({ path, matches }); + + if (_.every(query_words.map((word) => { + const index = line.indexOf(word); + if (index === -1) { return false; } + for (let j = index; j < index + word.length; j++) { + matches.push(j); + } + return true; + }))) { + if (path && (nresults == 0 || results.length < nresults)) { + results.push({ path, matches }); + } } - } - if (nresults > 0 && results.length === nresults) { - break; - } + })); } return results; } From bd9b87727359715cb74dece45e8ed447d290351f Mon Sep 17 00:00:00 2001 From: Victor Tao Date: Thu, 25 Feb 2021 15:09:08 -0500 Subject: [PATCH 7/9] dont insert deleted rows --- src/assets/ts/document.ts | 16 ++++++++++------ src/assets/ts/searcher.ts | 14 ++++++++++---- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/assets/ts/document.ts b/src/assets/ts/document.ts index ead30087..d60602fa 100644 --- a/src/assets/ts/document.ts +++ b/src/assets/ts/document.ts @@ -742,10 +742,12 @@ export default class Document extends EventEmitter { const lastInserted = await this.searcher.searchStore.getLastRow(); const lastRow = await this.store.getLastIDKey(); for (let i = lastInserted + 1; i <= lastRow; i++) { - console.log('inserting row', i, 'out of', lastRow); - this.searcher.rowChange(i, '', await this.getText(i)); - await this.searcher.update(i); - await this.searcher.searchStore.setLastRow(i); + if (await this.isAttached(i)) { + console.log('inserting row', i, 'out of', lastRow); + this.searcher.rowChange(i, '', await this.getText(i)); + await this.searcher.update(i); + await this.searcher.searchStore.setLastRow(i); + } } } @@ -765,12 +767,13 @@ export default class Document extends EventEmitter { query.split(/\s/g).filter(x => x.length).map(canonicalize); const possibleRows = await this.searcher.search(query_words); + console.log('got rows') if (possibleRows === null) { return results; } const possibleRowsArr = Array.from(possibleRows); - const chunkedRows = _.chunk(possibleRowsArr, 15); - for await (let chunk of chunkedRows) { + const chunkedRows = _.chunk(possibleRowsArr, 20); + for (let chunk of chunkedRows) { if (nresults > 0 && results.length >= nresults) { break; } @@ -798,6 +801,7 @@ export default class Document extends EventEmitter { } })); } + console.log(results); return results; } diff --git a/src/assets/ts/searcher.ts b/src/assets/ts/searcher.ts index 93886aed..78d92512 100644 --- a/src/assets/ts/searcher.ts +++ b/src/assets/ts/searcher.ts @@ -44,9 +44,15 @@ export class Searcher { const oldTokens = oldText.toLowerCase().split(' '); const newTokens = newText.toLowerCase().split(' '); - const oldSet = new Set(oldTokens); - const newSet = new Set(newTokens); - await Promise.all(oldTokens.map(async (token) => { + + const getPrefixs = (token: string) => { + return _.range(token.length).map((idx) => token.slice(0, idx + 1)); + }; + const oldPrefixs = _.flatMap(oldTokens, (token) => getPrefixs(token)); + const newPrefixes = _.flatMap(newTokens, (token) => getPrefixs(token)); + const oldSet = new Set(oldPrefixs); + const newSet = new Set(newPrefixes); + await Promise.all(oldPrefixs.map(async (token) => { // remove deleted tokens if (!newSet.has(token)) { const rows = await this.searchStore.getRows(token); @@ -54,7 +60,7 @@ export class Searcher { return this.searchStore.setRows(token, rows); } })); - return Promise.all(newTokens.map(async (token) => { + return Promise.all(newPrefixes.map(async (token) => { // add new tokens if (!oldSet.has(token)) { const rows = await this.searchStore.getRows(token); From fcce81b1a333766b530072a5dce57f1301fc3027 Mon Sep 17 00:00:00 2001 From: Victor Tao Date: Thu, 25 Feb 2021 15:37:28 -0500 Subject: [PATCH 8/9] search testcase uses prefixes --- src/assets/ts/document.ts | 4 +--- test/testcase.ts | 10 +++++++--- test/tests/search.ts | 25 ++++++++++++------------- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/src/assets/ts/document.ts b/src/assets/ts/document.ts index d60602fa..56a5c4e7 100644 --- a/src/assets/ts/document.ts +++ b/src/assets/ts/document.ts @@ -738,7 +738,7 @@ export default class Document extends EventEmitter { yield* await helper(root); } - private async initSearcher() { + public async initSearcher() { const lastInserted = await this.searcher.searchStore.getLastRow(); const lastRow = await this.store.getLastIDKey(); for (let i = lastInserted + 1; i <= lastRow; i++) { @@ -767,7 +767,6 @@ export default class Document extends EventEmitter { query.split(/\s/g).filter(x => x.length).map(canonicalize); const possibleRows = await this.searcher.search(query_words); - console.log('got rows') if (possibleRows === null) { return results; } @@ -801,7 +800,6 @@ export default class Document extends EventEmitter { } })); } - console.log(results); return results; } diff --git a/test/testcase.ts b/test/testcase.ts index df157b0c..f793f9e5 100644 --- a/test/testcase.ts +++ b/test/testcase.ts @@ -3,7 +3,7 @@ import 'mocha'; import * as _ from 'lodash'; -import { DocumentStore, ClientStore } from '../src/assets/ts/datastore'; +import { DocumentStore, ClientStore, SearchStore } from '../src/assets/ts/datastore'; import { InMemory, SynchronousInMemory } from '../src/shared/data_backend'; import Document from '../src/assets/ts/document'; import Session from '../src/assets/ts/session'; @@ -35,6 +35,7 @@ type TestCaseOptions = { class TestCase { public docStore: DocumentStore; + private searchStore: SearchStore; private clientStore: ClientStore; protected document: Document; protected plugins: Array; @@ -45,8 +46,10 @@ class TestCase { protected prom: Promise; constructor(serialized: Array = [''], options: TestCaseOptions = {}) { - this.docStore = new DocumentStore(new InMemory()); - this.document = new Document(this.docStore); + const backend = new InMemory(); + this.docStore = new DocumentStore(backend); + this.searchStore = new SearchStore(backend); + this.document = new Document(this.docStore, this.searchStore); this.clientStore = new ClientStore(new SynchronousInMemory()); this.plugins = options.plugins || []; @@ -69,6 +72,7 @@ class TestCase { this._chain(async () => { await this.document.load(serialized); + await this.document.initSearcher(); // this must be *after* plugin loading because of plugins with state // e.g. marks needs the database to have the marks loaded diff --git a/test/tests/search.ts b/test/tests/search.ts index 32f3907f..4687e008 100644 --- a/test/tests/search.ts +++ b/test/tests/search.ts @@ -4,7 +4,7 @@ import TestCase from '../testcase'; const globalSearchKey = '/'; const localSearchKey = 'ctrl+/'; -describe('global search', function() { +describe.only('global search', function() { it('works in basic cases', async function() { let t = new TestCase([ 'blah', @@ -21,7 +21,7 @@ describe('global search', function() { ]); t.sendKey(globalSearchKey); t.sendKeys('search'); - t.expectNumMenuResults(5); + t.expectNumMenuResults(3); t.sendKey('enter'); t.sendKeys('dd'); t.expect([ @@ -38,7 +38,7 @@ describe('global search', function() { ]); t.sendKey(globalSearchKey); t.sendKeys('search'); - t.expectNumMenuResults(4); + t.expectNumMenuResults(2); await t.done(); }); @@ -58,14 +58,14 @@ describe('global search', function() { ]); t.sendKey(globalSearchKey); t.sendKeys('search'); - t.expectNumMenuResults(5); + t.expectNumMenuResults(3); t.sendKey('ctrl+j'); t.sendKey('enter'); t.sendKeys('dd'); t.expect([ 'blah', 'searchblah', - 'search', + 'blahsearchblah', 'surch', { text: 'blahsearch', children: [ 'blah', @@ -76,7 +76,7 @@ describe('global search', function() { ]); t.sendKey(globalSearchKey); t.sendKeys('search'); - t.expectNumMenuResults(4); + t.expectNumMenuResults(2); await t.done(); t = new TestCase([ @@ -94,7 +94,7 @@ describe('global search', function() { ]); t.sendKey(globalSearchKey); t.sendKeys('search'); - t.expectNumMenuResults(5); + t.expectNumMenuResults(3); t.sendKey('ctrl+j'); t.sendKey('ctrl+j'); t.sendKey('enter'); @@ -103,17 +103,16 @@ describe('global search', function() { 'blah', 'searchblah', 'blahsearchblah', + 'search', 'surch', { text: 'blahsearch', children: [ 'blah', ] }, - { text: 'blah', children: [ - 'search', - ] }, + 'blah' ]); t.sendKey(globalSearchKey); t.sendKeys('search'); - t.expectNumMenuResults(4); + t.expectNumMenuResults(2); await t.done(); }); @@ -168,7 +167,7 @@ describe('global search', function() { ]); t.sendKey(globalSearchKey); t.sendKeys('search'); - t.expectNumMenuResults(5); + t.expectNumMenuResults(3); t.sendKey('ctrl+k'); t.sendKey('enter'); t.sendKeys('dd'); @@ -185,7 +184,7 @@ describe('global search', function() { ]); t.sendKey(globalSearchKey); t.sendKeys('search'); - t.expectNumMenuResults(4); + t.expectNumMenuResults(2); await t.done(); }); From f9b26078668804deccf6641e771f2850faa3b29b Mon Sep 17 00:00:00 2001 From: Victor Tao Date: Thu, 25 Feb 2021 16:24:44 -0500 Subject: [PATCH 9/9] fix local search, cleanup --- src/assets/ts/document.ts | 23 +++-------------------- test/tests/search.ts | 9 +++++++-- 2 files changed, 10 insertions(+), 22 deletions(-) diff --git a/src/assets/ts/document.ts b/src/assets/ts/document.ts index 56a5c4e7..c2497eba 100644 --- a/src/assets/ts/document.ts +++ b/src/assets/ts/document.ts @@ -720,30 +720,12 @@ export default class Document extends EventEmitter { return path.child(row); } - private async* traverseSubtree(root: Path): AsyncIterableIterator { - const visited_rows: {[row: number]: boolean} = {}; - let that = this; - - async function* helper(path: Path): AsyncIterableIterator { - if (path.row in visited_rows) { - return; - } - visited_rows[path.row] = true; - yield path; - const children = await that.getChildren(path); - for (let i = 0; i < children.length; i++) { - yield* await helper(children[i]); - } - } - yield* await helper(root); - } - public async initSearcher() { const lastInserted = await this.searcher.searchStore.getLastRow(); const lastRow = await this.store.getLastIDKey(); for (let i = lastInserted + 1; i <= lastRow; i++) { if (await this.isAttached(i)) { - console.log('inserting row', i, 'out of', lastRow); + //console.log('inserting row', i, 'out of', lastRow, 'into search store'); this.searcher.rowChange(i, '', await this.getText(i)); await this.searcher.update(i); await this.searcher.searchStore.setLastRow(i); @@ -782,7 +764,7 @@ export default class Document extends EventEmitter { const matches: Array = []; const path = await this.canonicalPath(row); - if (!root.isRoot() && (path === null || !root.isDescendant(path))) { // might not work with cloned rows + if (path === null || !path.isDescendant(root)) { // might not work with cloned rows return; } @@ -800,6 +782,7 @@ export default class Document extends EventEmitter { } })); } + //console.log('Search results', results); return results; } diff --git a/test/tests/search.ts b/test/tests/search.ts index 4687e008..6b1f096f 100644 --- a/test/tests/search.ts +++ b/test/tests/search.ts @@ -4,7 +4,7 @@ import TestCase from '../testcase'; const globalSearchKey = '/'; const localSearchKey = 'ctrl+/'; -describe.only('global search', function() { +describe('global search', function() { it('works in basic cases', async function() { let t = new TestCase([ 'blah', @@ -293,12 +293,17 @@ describe('local search', function() { t.sendKey('enter'); t.sendKey(localSearchKey); t.sendKeys('search'); - t.expectNumMenuResults(3); + t.expectNumMenuResults(1); t.sendKey('enter'); t.sendKeys('dd'); t.expect([ 'blah', 'searchblah', + { text: 'blahsearch', children: [ + 'blahsearchblah', + 'surch', + 'blah', + ] }, { text: 'blah', children: [ 'search', ] },