From cfbadfde53ecfa6030f03928e112826e4b46e397 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Fri, 5 Jan 2024 16:57:04 +0000 Subject: [PATCH 1/7] Add a oneChangePerToken option to emit one change object per token instead of combining consecutive tokens into one change object --- src/diff/base.js | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/diff/base.js b/src/diff/base.js index 99f93d7a..aa8a2500 100644 --- a/src/diff/base.js +++ b/src/diff/base.js @@ -40,7 +40,7 @@ Diff.prototype = { let newPos = this.extractCommon(bestPath[0], newString, oldString, 0); if (bestPath[0].oldPos + 1 >= oldLen && newPos + 1 >= newLen) { // Identity per the equality and tokenizer - return done([{value: this.join(newString), count: newString.length}]); + return done(buildValues(self, bestPath[0].lastComponent, newString, oldString, self.useLongestToken)); } // Once we hit the right edge of the edit graph on some diagonal k, we can @@ -149,7 +149,7 @@ Diff.prototype = { addToPath(path, added, removed, oldPosInc) { let last = path.lastComponent; - if (last && last.added === added && last.removed === removed) { + if (last && !this.options.oneChangePerToken && last.added === added && last.removed === removed) { return { oldPos: path.oldPos + oldPosInc, lastComponent: {count: last.count + 1, added: added, removed: removed, previousComponent: last.previousComponent } @@ -172,9 +172,12 @@ Diff.prototype = { newPos++; oldPos++; commonCount++; + if (this.options.oneChangePerToken) { + basePath.lastComponent = {count: 1, previousComponent: basePath.lastComponent}; + } } - if (commonCount) { + if (commonCount && !this.options.oneChangePerToken) { basePath.lastComponent = {count: commonCount, previousComponent: basePath.lastComponent}; } From 9c3216b87dc824997620fca456da534b1a508bc5 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Mon, 8 Jan 2024 14:19:14 +0000 Subject: [PATCH 2/7] Document oneChangePerToken --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index bf0b8975..4b6c734f 100644 --- a/README.md +++ b/README.md @@ -167,6 +167,7 @@ Certain options can be provided in the `options` object of *any* method that cal (Note that if the ONLY option you want to provide is a callback, you can pass the callback function directly as the `options` parameter instead of passing an object with a `callback` property.) * `maxEditLength`: a number specifying the maximum edit distance to consider between the old and new texts. If the edit distance is higher than this, jsdiff will return `undefined` instead of a diff. You can use this to limit the computational cost of diffing large, very different texts by giving up early if the cost will be huge. Works for functions that return change objects and also for `structuredPatch`, but not other patch-generation functions. +* `oneChangePerToken`: if `true`, the array of change objects returned will contain one change object per token (e.g. one per line if calling `diffLines`), instead of runs of consecutive tokens that are all added / all removed / all conserved being combined into a single change object. ### Defining custom diffing behaviors From f7de269b71504d1222f9e7200940dad96d9164f2 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Mon, 8 Jan 2024 14:40:16 +0000 Subject: [PATCH 3/7] Add release notes --- release-notes.md | 1 + 1 file changed, 1 insertion(+) diff --git a/release-notes.md b/release-notes.md index 3919c740..f9b6fd69 100644 --- a/release-notes.md +++ b/release-notes.md @@ -10,6 +10,7 @@ - [#344](https://github.com/kpdecker/jsdiff/issues/344) `diffLines`, `createTwoFilesPatch`, and other patch-creation methods now take an optional `stripTrailingCr: true` option which causes Windows-style `\r\n` line endings to be replaced with Unix-style `\n` line endings before calculating the diff, just like GNU `diff`'s `--strip-trailing-cr` flag. - [#451](https://github.com/kpdecker/jsdiff/pull/451) Added `diff.formatPatch`. - [#450](https://github.com/kpdecker/jsdiff/pull/450) Added `diff.reversePatch`. +- [#460][https://github.com/kpdecker/jsdiff/pull/460] Added `oneChangePerToken` option. ## v5.1.0 From b351b9678214d0c30e7126fb0bc6138d645ca70f Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Mon, 8 Jan 2024 14:57:18 +0000 Subject: [PATCH 4/7] Add test (which fails; needs changes from https://github.com/kpdecker/jsdiff/pull/439) --- test/diff/character.js | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/test/diff/character.js b/test/diff/character.js index 9061e69b..713a3d49 100644 --- a/test/diff/character.js +++ b/test/diff/character.js @@ -6,8 +6,16 @@ import {expect} from 'chai'; describe('diff/character', function() { describe('#diffChars', function() { it('Should diff chars', function() { - const diffResult = diffChars('New Value.', 'New ValueMoreData.'); - expect(convertChangesToXML(diffResult)).to.equal('New ValueMoreData.'); + const diffResult = diffChars('Old Value.', 'New ValueMoreData.'); + expect(convertChangesToXML(diffResult)).to.equal('OldNew ValueMoreData.'); + }); + + describe('oneChangePerToken option', function() { + it('emits one change per token', function() { + const diffResult = diffChars('Old Value.', 'New ValueMoreData.', {oneChangePerToken: true}); + expect(diffResult.length).to.equal(21); + expect(convertChangesToXML(diffResult)).to.equal('OldNew ValueMoreData.'); + }); }); describe('case insensitivity', function() { From ae2911d76b015241c9119b51c1930fdfb49e6ab9 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Mon, 8 Jan 2024 15:04:43 +0000 Subject: [PATCH 5/7] Add test of case with identical texts --- test/diff/character.js | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/test/diff/character.js b/test/diff/character.js index 713a3d49..d2ccf9a4 100644 --- a/test/diff/character.js +++ b/test/diff/character.js @@ -11,11 +11,20 @@ describe('diff/character', function() { }); describe('oneChangePerToken option', function() { - it('emits one change per token', function() { + it('emits one change per character', function() { const diffResult = diffChars('Old Value.', 'New ValueMoreData.', {oneChangePerToken: true}); expect(diffResult.length).to.equal(21); expect(convertChangesToXML(diffResult)).to.equal('OldNew ValueMoreData.'); }); + + it('correctly handles the case where the texts are identical', function() { + const diffResult = diffChars('foo bar baz qux', 'foo bar baz qux', {oneChangePerToken: true}); + expect(diffResult).to.deep.equal( + ['f', 'o', 'o', ' ', 'b', 'a', 'r', ' ', 'b', 'a', 'z', ' ', 'q', 'u', 'x'].map( + char => ({value: char, count: 1, added: false, removed: false}) + ) + ); + }); }); describe('case insensitivity', function() { From aaee2d252b26cdd0a4e92a1c6222cde73a7aa418 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Mon, 8 Jan 2024 15:08:23 +0000 Subject: [PATCH 6/7] Add another test --- test/diff/line.js | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/test/diff/line.js b/test/diff/line.js index 7052b2a1..abd0ede9 100644 --- a/test/diff/line.js +++ b/test/diff/line.js @@ -101,6 +101,25 @@ describe('diff/line', function() { }); }); + describe('oneChangePerToken option', function() { + it('emits one change per line', function() { + const diffResult = diffLines( + 'foo\nbar\nbaz\nqux\n', + 'fox\nbar\nbaz\nqux\n', + { oneChangePerToken: true } + ); + expect(diffResult).to.deep.equal( + [ + {value: 'foo\n', count: 1, added: false, removed: true}, + {value: 'fox\n', count: 1, added: true, removed: false}, + {value: 'bar\n', count: 1, added: false, removed: false}, + {value: 'baz\n', count: 1, added: false, removed: false}, + {value: 'qux\n', count: 1, added: false, removed: false} + ] + ); + }); + }); + // Trimmed Line Diff describe('#TrimmedLineDiff', function() { it('should diff lines', function() { From d8d65bb91313e4241ce45620db198ad4a7cf5117 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Mon, 8 Jan 2024 15:09:53 +0000 Subject: [PATCH 7/7] Move release notes to correct place now that I'm planning this for 6.0.0 --- release-notes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release-notes.md b/release-notes.md index 6c60e427..d543cdf1 100644 --- a/release-notes.md +++ b/release-notes.md @@ -8,6 +8,7 @@ - [#439](https://github.com/kpdecker/jsdiff/pull/439) Prefer diffs that order deletions before insertions. When faced with a choice between two diffs with an equal total edit distance, the Myers diff algorithm generally prefers one that does deletions before insertions rather than insertions before deletions. For instance, when diffing `abcd` against `acbd`, it will prefer a diff that says to delete the `b` and then insert a new `b` after the `c`, over a diff that says to insert a `c` before the `b` and then delete the existing `c`. JsDiff deviated from the published Myers algorithm in a way that led to it having the opposite preference in many cases, including that example. This is now fixed, meaning diffs output by JsDiff will more accurately reflect what the published Myers diff algorithm would output. - [#455](https://github.com/kpdecker/jsdiff/pull/455) The `added` and `removed` properties of change objects are now guaranteed to be set to a boolean value. (Previously, they would be set to `undefined` or omitted entirely instead of setting them to false.) - [#464](https://github.com/kpdecker/jsdiff/pull/464) Specifying `{maxEditLength: 0}` now sets a max edit length of 0 instead of no maximum. +- [#460][https://github.com/kpdecker/jsdiff/pull/460] Added `oneChangePerToken` option. ## Development @@ -19,7 +20,6 @@ - [#344](https://github.com/kpdecker/jsdiff/issues/344) `diffLines`, `createTwoFilesPatch`, and other patch-creation methods now take an optional `stripTrailingCr: true` option which causes Windows-style `\r\n` line endings to be replaced with Unix-style `\n` line endings before calculating the diff, just like GNU `diff`'s `--strip-trailing-cr` flag. - [#451](https://github.com/kpdecker/jsdiff/pull/451) Added `diff.formatPatch`. - [#450](https://github.com/kpdecker/jsdiff/pull/450) Added `diff.reversePatch`. -- [#460][https://github.com/kpdecker/jsdiff/pull/460] Added `oneChangePerToken` option. ## v5.1.0