Skip to content

Commit 87e2052

Browse files
authored
Perform granular updates if reloading textual files (#366)
* Add test with expectation of granular reload and append * Use stdlib sequence matcher to perform granular text updates * Use `real_quick_ratio` to fast-reject very dissimilar updates * Use `insert()` which skips some of the checks as `__setitem__` also checks if index is a number or slice and then checks the range of the slice; we can skip those knowing that `i1 == i2` in the `insert` opcode. * Use match-case instead of elif
1 parent a18f320 commit 87e2052

File tree

4 files changed

+229
-27
lines changed

4 files changed

+229
-27
lines changed

jupyter_ydoc/yunicode.py

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,17 @@
22
# Distributed under the terms of the Modified BSD License.
33

44
from collections.abc import Callable
5+
from difflib import SequenceMatcher
56
from functools import partial
67
from typing import Any
78

89
from pycrdt import Awareness, Doc, Text
910

1011
from .ybasedoc import YBaseDoc
1112

13+
# Heuristic threshold as recommended in difflib documentation
14+
SIMILARITY_THREESHOLD = 0.6
15+
1216

1317
class YUnicode(YBaseDoc):
1418
"""
@@ -35,7 +39,7 @@ def __init__(self, ydoc: Doc | None = None, awareness: Awareness | None = None):
3539
:type awareness: :class:`pycrdt.Awareness`, optional.
3640
"""
3741
super().__init__(ydoc, awareness)
38-
self._ysource = self._ydoc.get("source", type=Text)
42+
self._ysource: Text = self._ydoc.get("source", type=Text)
3943
self.undo_manager.expand_scope(self._ysource)
4044

4145
@property
@@ -64,17 +68,45 @@ def set(self, value: str) -> None:
6468
:param value: The content of the document.
6569
:type value: str
6670
"""
67-
if self.get() == value:
71+
old_value = self.get()
72+
if old_value == value:
6873
# no-op if the values are already the same,
6974
# to avoid side-effects such as cursor jumping to the top
7075
return
7176

7277
with self._ydoc.transaction():
73-
# clear document
74-
self._ysource.clear()
75-
# initialize document
76-
if value:
77-
self._ysource += value
78+
matcher = SequenceMatcher(a=old_value, b=value)
79+
80+
if (
81+
matcher.real_quick_ratio() >= SIMILARITY_THREESHOLD
82+
and matcher.ratio() >= SIMILARITY_THREESHOLD
83+
):
84+
operations = matcher.get_opcodes()
85+
offset = 0
86+
for tag, i1, i2, j1, j2 in operations:
87+
match tag:
88+
case "replace":
89+
self._ysource[i1 + offset : i2 + offset] = value[j1:j2]
90+
offset += (j2 - j1) - (i2 - i1)
91+
case "delete":
92+
del self._ysource[i1 + offset : i2 + offset]
93+
offset -= i2 - i1
94+
case "insert":
95+
self._ysource.insert(i1 + offset, value[j1:j2])
96+
offset += j2 - j1
97+
case "equal":
98+
pass
99+
case _:
100+
raise ValueError(f"Unknown tag '{tag}' in sequence matcher")
101+
else:
102+
# for very different strings, just replace the whole content;
103+
# this avoids generating a huge number of operations
104+
105+
# clear document
106+
self._ysource.clear()
107+
# initialize document
108+
if value:
109+
self._ysource += value
78110

79111
def observe(self, callback: Callable[[str, Any], None]) -> None:
80112
"""

tests/test_ynotebook.py

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
# Copyright (c) Jupyter Development Team.
22
# Distributed under the terms of the Modified BSD License.
33

4-
from dataclasses import dataclass
5-
64
from pycrdt import ArrayEvent, Map, MapEvent, TextEvent
75
from pytest import mark
6+
from utils import ExpectedEvent
87

98
from jupyter_ydoc import YNotebook
109

@@ -119,24 +118,6 @@ def record_changes(topic, event):
119118
]
120119

121120

122-
@dataclass
123-
class ExpectedEvent:
124-
kind: type
125-
path: str | None = None
126-
127-
def __eq__(self, other):
128-
if not isinstance(other, self.kind):
129-
return False
130-
if self.path is not None and self.path != other.path:
131-
return False
132-
return True
133-
134-
def __repr__(self):
135-
if self.path is not None:
136-
return f"ExpectedEvent({self.kind.__name__}, path={self.path!r})"
137-
return f"ExpectedEvent({self.kind.__name__})"
138-
139-
140121
@mark.parametrize(
141122
"modifications, expected_events",
142123
[

tests/test_yunicode.py

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
# Copyright (c) Jupyter Development Team.
22
# Distributed under the terms of the Modified BSD License.
33

4+
from pycrdt import TextEvent
5+
from utils import ExpectedEvent
6+
47
from jupyter_ydoc import YUnicode
58

69

@@ -25,3 +28,163 @@ def record_changes(topic, event):
2528

2629
# No changes should be observed at all
2730
assert changes == []
31+
32+
33+
def test_set_granular_changes():
34+
text = YUnicode()
35+
36+
text.set(
37+
"\n".join(
38+
[
39+
"Mary had a little lamb,",
40+
"Its fleece was white as snow.",
41+
"And everywhere that Mary went,",
42+
"The lamb was sure to go.",
43+
]
44+
)
45+
)
46+
47+
changes = []
48+
49+
def record_changes(topic, event):
50+
changes.append((topic, event)) # pragma: nocover
51+
52+
text.observe(record_changes)
53+
54+
# Call set with the bunny version
55+
text.set(
56+
"\n".join(
57+
[
58+
"Mary had a little bunny,",
59+
"Its fur was white as snow.",
60+
"And everywhere that Mary went,",
61+
"The bunny was sure to hop.",
62+
]
63+
)
64+
)
65+
66+
assert len(changes) == 1
67+
source_events = [e for t, e in changes if t == "source"]
68+
assert source_events == [
69+
ExpectedEvent(
70+
TextEvent,
71+
delta=[
72+
# "Mary had a little <delete:lam>b"
73+
{"retain": 18},
74+
{"delete": 3},
75+
{"retain": 1},
76+
# "Mary had a little b<insert:unny>"
77+
{"insert": "unny"},
78+
# ",↵ Its f<delete:leece>"
79+
{"retain": 7},
80+
{"delete": 5},
81+
# ",↵ Its f<insert:ur>"
82+
{"insert": "ur"},
83+
# " was white as snow.↵"
84+
# "And everywhere that Mary went,↵"
85+
# "The <delete:lam>b"
86+
{"retain": 55},
87+
{"delete": 3},
88+
{"retain": 1},
89+
# "The b<insert:unny> was sure to"
90+
{"insert": "unny"},
91+
{"retain": 13},
92+
# "<delete:g><insert:h>o<insert:p>"
93+
{"delete": 1},
94+
{"insert": "h"},
95+
{"retain": 1},
96+
{"insert": "p"},
97+
],
98+
)
99+
]
100+
101+
102+
def test_set_granular_append():
103+
text = YUnicode()
104+
105+
text.set(
106+
"\n".join(
107+
[
108+
"Mary had a little lamb,",
109+
"Its fleece was white as snow.",
110+
]
111+
)
112+
)
113+
114+
changes = []
115+
116+
def record_changes(topic, event):
117+
changes.append((topic, event)) # pragma: nocover
118+
119+
text.observe(record_changes)
120+
121+
# append a line
122+
text.set(
123+
"\n".join(
124+
[
125+
"Mary had a little lamb,",
126+
"Its fleece was white as snow.",
127+
"And everywhere that Mary went,",
128+
]
129+
)
130+
)
131+
132+
# append one more line
133+
text.set(
134+
"\n".join(
135+
[
136+
"Mary had a little lamb,",
137+
"Its fleece was white as snow.",
138+
"And everywhere that Mary went,",
139+
"The lamb was sure to go.",
140+
]
141+
)
142+
)
143+
144+
assert len(changes) == 2
145+
source_events = [e for t, e in changes if t == "source"]
146+
assert source_events == [
147+
ExpectedEvent(
148+
TextEvent, delta=[{"retain": 53}, {"insert": "\nAnd everywhere that Mary went,"}]
149+
),
150+
ExpectedEvent(TextEvent, delta=[{"retain": 84}, {"insert": "\nThe lamb was sure to go."}]),
151+
]
152+
153+
154+
def test_set_hard_reload_if_very_different():
155+
text = YUnicode()
156+
157+
text.set(
158+
"\n".join(
159+
[
160+
"Mary had a little lamb,",
161+
"Its fleece was white as snow.",
162+
"And everywhere that Mary went,",
163+
"The lamb was sure to go.",
164+
]
165+
)
166+
)
167+
168+
changes = []
169+
170+
def record_changes(topic, event):
171+
changes.append((topic, event)) # pragma: nocover
172+
173+
text.observe(record_changes)
174+
175+
# Call set with a very different nursery rhyme
176+
twinkle_lyrics = "\n".join(
177+
[
178+
"Twinkle, twinkle, little star,",
179+
"How I wonder what you are!",
180+
"Up above the world so high,",
181+
"Like a diamond in the sky.",
182+
]
183+
)
184+
text.set(twinkle_lyrics)
185+
186+
assert len(changes) == 1
187+
source_events = [e for t, e in changes if t == "source"]
188+
assert source_events == [
189+
ExpectedEvent(TextEvent, delta=[{"delete": 109}, {"insert": twinkle_lyrics}])
190+
]

tests/utils.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# Copyright (c) Jupyter Development Team.
22
# Distributed under the terms of the Modified BSD License.
33

4+
from dataclasses import dataclass
5+
46
from anyio import Lock, connect_tcp
57

68

@@ -41,3 +43,27 @@ async def ensure_server_running(host: str, port: int) -> None:
4143
pass
4244
else:
4345
break
46+
47+
48+
@dataclass
49+
class ExpectedEvent:
50+
kind: type
51+
path: str | None = None
52+
delta: list[dict] | None = None
53+
54+
def __eq__(self, other):
55+
if not isinstance(other, self.kind):
56+
return False
57+
if self.path is not None and self.path != other.path:
58+
return False
59+
if self.delta is not None and self.delta != other.delta:
60+
return False
61+
return True
62+
63+
def __repr__(self):
64+
fragments = [self.kind.__name__]
65+
if self.path is not None:
66+
fragments.append(f"path={self.path!r}")
67+
if self.delta is not None:
68+
fragments.append(f"delta={self.delta!r}")
69+
return f"ExpectedEvent({', '.join(fragments)})"

0 commit comments

Comments
 (0)