@@ -1294,6 +1294,46 @@ public void diff_cleanupMerge(LinkedList<Diff> diffs) {
12941294 }
12951295 }
12961296
1297+ /**
1298+ * Rearrange diff boudnaries that split Unicode surrogate pairs.
1299+ * @param diffs Linked list of diff objects
1300+ */
1301+ public void diff_cleanupSplitSurrogates (List <Diff > diffs ) {
1302+ char lastEnd = 0 ;
1303+ boolean isFirst = true ;
1304+ HashSet <Diff > toRemove = new HashSet <Diff >();
1305+
1306+ for (Diff aDiff : diffs ) {
1307+ if (aDiff .text .isEmpty ()) {
1308+ toRemove .add (aDiff );
1309+ continue ;
1310+ }
1311+
1312+ char thisTop = aDiff .text .charAt (0 );
1313+ char thisEnd = aDiff .text .charAt (aDiff .text .length () - 1 );
1314+
1315+ if (Character .isHighSurrogate (thisEnd )) {
1316+ lastEnd = thisEnd ;
1317+ aDiff .text = aDiff .text .substring (0 , aDiff .text .length () - 1 );
1318+ }
1319+
1320+ if (!isFirst && Character .isHighSurrogate (lastEnd ) && Character .isLowSurrogate (thisTop )) {
1321+ aDiff .text = lastEnd + aDiff .text ;
1322+ }
1323+
1324+ isFirst = false ;
1325+
1326+ if ( aDiff .text .isEmpty () ) {
1327+ toRemove .add (aDiff );
1328+ continue ;
1329+ }
1330+ }
1331+
1332+ for (Diff aDiff : toRemove ) {
1333+ diffs .remove (aDiff );
1334+ }
1335+ }
1336+
12971337 /**
12981338 * loc is a location in text1, compute and return the equivalent location in
12991339 * text2.
@@ -1430,31 +1470,8 @@ public int diff_levenshtein(List<Diff> diffs) {
14301470 */
14311471 public String diff_toDelta (List <Diff > diffs ) {
14321472 StringBuilder text = new StringBuilder ();
1433- char lastEnd = 0 ;
1434- boolean isFirst = true ;
1473+ this .diff_cleanupSplitSurrogates (diffs );
14351474 for (Diff aDiff : diffs ) {
1436- if (aDiff .text .isEmpty ()) {
1437- continue ;
1438- }
1439-
1440- char thisTop = aDiff .text .charAt (0 );
1441- char thisEnd = aDiff .text .charAt (aDiff .text .length () - 1 );
1442-
1443- if (Character .isHighSurrogate (thisEnd )) {
1444- lastEnd = thisEnd ;
1445- aDiff .text = aDiff .text .substring (0 , aDiff .text .length () - 1 );
1446- }
1447-
1448- if (! isFirst && Character .isHighSurrogate (lastEnd ) && Character .isLowSurrogate (thisTop )) {
1449- aDiff .text = lastEnd + aDiff .text ;
1450- }
1451-
1452- isFirst = false ;
1453-
1454- if ( aDiff .text .isEmpty () ) {
1455- continue ;
1456- }
1457-
14581475 switch (aDiff .operation ) {
14591476 case INSERT :
14601477 try {
0 commit comments