diff --git a/README.md b/README.md index 67010ac5..0b71855b 100644 --- a/README.md +++ b/README.md @@ -139,9 +139,13 @@ Broadly, jsdiff's diff functions all take an old text and a new text and perform Once all patches have been applied or an error occurs, the `options.complete(err)` callback is made. -* `Diff.parsePatch(diffStr)` - Parses a patch into structured data +* `Diff.parsePatch(diffStr[, options])` - Parses a unified diff format patch (possibly representing multiple diffs, to different files) into structured data. - Return a JSON object representation of the a patch, suitable for use with the `applyPatch` method. This parses to the same structure returned by `Diff.structuredPatch`. + Return an array of objects, one per file diff, suitable for use with the `applyPatch` method. This parses to the same structure returned by `Diff.structuredPatch`, except that each object by default also has a `leadingGarbage` key containing any content preceding the `---` and `+++` file headers. + + The optional `options` argument may have the following keys: + + - `discardGarbage`: if true, `parsePatch` won't set the `leadingGarbage` property. * `Diff.reversePatch(patch)` - Returns a new structured patch which when applied will undo the original `patch`. diff --git a/release-notes.md b/release-notes.md index b905801e..03b80800 100644 --- a/release-notes.md +++ b/release-notes.md @@ -26,6 +26,7 @@ - [#489](github.com/kpdecker/jsdiff/pull/489) **`this.options` no longer exists on `Diff` objects.** Instead, `options` is now passed as an argument to methods that rely on options, like `equals(left, right, options)`. This fixes a race condition in async mode, where diffing behaviour could be changed mid-execution if a concurrent usage of the same `Diff` instances overwrote its `options`. - [#518](https://github.com/kpdecker/jsdiff/pull/518) **`linedelimiters` no longer exists** on patch objects; instead, when a patch with Windows-style CRLF line endings is parsed, **the lines in `lines` will end with `\r`**. There is now a **new `autoConvertLineEndings` option, on by default**, which makes it so that when a patch with Windows-style line endings is applied to a source file with Unix style line endings, the patch gets autoconverted to use Unix-style line endings, and when a patch with Unix-style line endings is applied to a source file with Windows-style line endings, it gets autoconverted to use Windows-style line endings. - [#521](https://github.com/kpdecker/jsdiff/pull/521) **the `callback` option is now supported by `structuredPatch`, `createPatch`, and `createTwoFilesPatch`** +- [#522](https://github.com/kpdecker/jsdiff/pull/522) **`parsePatch` can now parse patches where lines starting with `--` or `++` are deleted/inserted**; previously, there were edge cases where the parser would choke on valid patches or give wrong results. Also, by default **`parsePatch` now preserves "leading garbage" from before each file in the patch in a `leadingGarbage` key**, and takes an **optional `discardGarbage` option** to disable this. ## v5.2.0 diff --git a/src/patch/create.js b/src/patch/create.js index ed3a2718..462a171a 100644 --- a/src/patch/create.js +++ b/src/patch/create.js @@ -117,10 +117,19 @@ export function structuredPatch(oldFileName, newFileName, oldStr, newStr, oldHea } } + let leadingGarbage; + if (oldFileName == newFileName) { + leadingGarbage = `Index: ${oldFileName}\n===================================================================`; + } else { + leadingGarbage = '==================================================================='; + } + + return { - oldFileName: oldFileName, newFileName: newFileName, - oldHeader: oldHeader, newHeader: newHeader, - hunks: hunks + oldFileName, newFileName, + oldHeader, newHeader, + hunks, + leadingGarbage }; } } @@ -131,10 +140,9 @@ export function formatPatch(diff) { } const ret = []; - if (diff.oldFileName == diff.newFileName) { - ret.push('Index: ' + diff.oldFileName); + if (diff.leadingGarbage) { + ret.push(diff.leadingGarbage); } - ret.push('==================================================================='); ret.push('--- ' + diff.oldFileName + (typeof diff.oldHeader === 'undefined' ? '' : '\t' + diff.oldHeader)); ret.push('+++ ' + diff.newFileName + (typeof diff.newHeader === 'undefined' ? '' : '\t' + diff.newHeader)); diff --git a/src/patch/parse.js b/src/patch/parse.js index caaf7881..c86f736d 100755 --- a/src/patch/parse.js +++ b/src/patch/parse.js @@ -1,4 +1,4 @@ -export function parsePatch(uniDiff) { +export function parsePatch(uniDiff, options = {discardGarbage: false}) { let diffstr = uniDiff.split(/\n/), list = [], i = 0; @@ -7,12 +7,17 @@ export function parsePatch(uniDiff) { let index = {}; list.push(index); + const leadingGarbageLines = []; + // Parse diff metadata while (i < diffstr.length) { let line = diffstr[i]; // File header found, end parsing diff metadata if ((/^(\-\-\-|\+\+\+|@@)\s/).test(line)) { + if (!options.discardGarbage) { + index.leadingGarbage = leadingGarbageLines.join('\n'); + } break; } @@ -22,6 +27,7 @@ export function parsePatch(uniDiff) { index.index = header[1]; } + leadingGarbageLines.push(line); i++; } @@ -92,17 +98,12 @@ export function parsePatch(uniDiff) { let addCount = 0, removeCount = 0; - for (; i < diffstr.length; i++) { - // Lines starting with '---' could be mistaken for the "remove line" operation - // But they could be the header for the next file. Therefore prune such cases out. - if (diffstr[i].indexOf('--- ') === 0 - && (i + 2 < diffstr.length) - && diffstr[i + 1].indexOf('+++ ') === 0 - && diffstr[i + 2].indexOf('@@') === 0) { - break; - } + for ( + ; + i < diffstr.length && (removeCount < hunk.oldLines || addCount < hunk.newLines || diffstr[i]?.startsWith('\\')); + i++ + ) { let operation = (diffstr[i].length == 0 && i != (diffstr.length - 1)) ? ' ' : diffstr[i][0]; - if (operation === '+' || operation === '-' || operation === ' ' || operation === '\\') { hunk.lines.push(diffstr[i]); @@ -115,7 +116,7 @@ export function parsePatch(uniDiff) { removeCount++; } } else { - break; + throw new Error(`Hunk at line ${chunkHeaderIndex + 1} contained invalid line ${diffstr[i]}`); } } diff --git a/test/patch/create.js b/test/patch/create.js index 7811b3b8..895903a3 100644 --- a/test/patch/create.js +++ b/test/patch/create.js @@ -819,6 +819,7 @@ describe('patch/create', function() { expect(res).to.eql({ oldFileName: 'oldfile', newFileName: 'newfile', oldHeader: 'header1', newHeader: 'header2', + leadingGarbage: '===================================================================', hunks: [{ oldStart: 1, oldLines: 3, newStart: 1, newLines: 3, lines: [' line2', ' line3', '-line4', '+line5', '\\ No newline at end of file'] @@ -835,6 +836,7 @@ describe('patch/create', function() { expect(res).to.eql({ oldFileName: 'oldfile', newFileName: 'newfile', oldHeader: 'header1', newHeader: 'header2', + leadingGarbage: '===================================================================', hunks: [{ oldStart: 1, oldLines: 3, newStart: 1, newLines: 3, lines: [' foo', '-bar', '+barcelona', ' baz'] @@ -854,6 +856,7 @@ describe('patch/create', function() { expect(res).to.eql({ oldFileName: 'oldfile', newFileName: 'newfile', oldHeader: 'header1', newHeader: 'header2', + leadingGarbage: '===================================================================', hunks: [{ oldStart: 1, oldLines: 3, newStart: 1, newLines: 3, lines: [' foo', '-bar', '+barcelona', ' baz'] @@ -931,14 +934,13 @@ describe('patch/create', function() { } ]; expect(formatPatch(patch)).to.equal( - '===================================================================\n' + + // TODO: Verify this is a legit patch before merging this PR. What does diff -u emit? '--- foo\t2023-12-29 15:48:17.976616966 +0000\n' + '+++ bar\t2023-12-29 15:48:21.400516845 +0000\n' + '@@ -1,1 +1,1 @@\n' + '-xxx\n' + '+yyy\n' + '\n' + - '===================================================================\n' + '--- baz\t2023-12-29 15:48:29.376283616 +0000\n' + '+++ qux\t2023-12-29 15:48:32.908180343 +0000\n' + '@@ -1,1 +1,1 @@\n' + @@ -948,10 +950,8 @@ describe('patch/create', function() { }); it('should roughly be the inverse of parsePatch', function() { // There are so many differences in how a semantically-equivalent patch - // can be formatted in unified diff format, AND in JsDiff's structured - // patch format as long as https://github.com/kpdecker/jsdiff/issues/434 - // goes unresolved, that a stronger claim than "roughly the inverse" is - // sadly not possible here. + // can be formatted in unified diff format that a stronger claim than "roughly the inverse" + // is sadly not possible here. // Check 1: starting with a patch in uniform diff format, does // formatPatch(parsePatch(...)) round-trip? diff --git a/test/patch/parse.js b/test/patch/parse.js index 2a6bf986..8c5d297d 100644 --- a/test/patch/parse.js +++ b/test/patch/parse.js @@ -13,6 +13,7 @@ describe('patch/parse', function() { +line4 line5`)) .to.eql([{ + leadingGarbage: '', hunks: [ { oldStart: 1, oldLines: 3, @@ -33,6 +34,7 @@ describe('patch/parse', function() { -line3 +line4`)) .to.eql([{ + leadingGarbage: '', hunks: [ { oldStart: 1, oldLines: 1, @@ -58,6 +60,7 @@ describe('patch/parse', function() { -line4 line5`)) .to.eql([{ + leadingGarbage: '', hunks: [ { oldStart: 1, oldLines: 3, @@ -82,6 +85,7 @@ describe('patch/parse', function() { ] }]); }); + it('should parse single index patches', function() { expect(parsePatch( `Index: test @@ -99,6 +103,7 @@ describe('patch/parse', function() { oldHeader: 'header1', newFileName: 'to', newHeader: 'header2', + leadingGarbage: 'Index: test\n===================================================================', hunks: [ { oldStart: 1, oldLines: 3, @@ -113,6 +118,7 @@ describe('patch/parse', function() { ] }]); }); + it('should parse multiple index files', function() { expect(parsePatch( `Index: test @@ -139,6 +145,7 @@ Index: test2 oldHeader: 'header1', newFileName: 'to', newHeader: 'header2', + leadingGarbage: 'Index: test\n===================================================================', hunks: [ { oldStart: 1, oldLines: 3, @@ -157,6 +164,7 @@ Index: test2 oldHeader: 'header1', newFileName: 'to', newHeader: 'header2', + leadingGarbage: 'Index: test2\n===================================================================', hunks: [ { oldStart: 1, oldLines: 3, @@ -193,6 +201,7 @@ Index: test2 oldHeader: 'header1', newFileName: 'to', newHeader: 'header2', + leadingGarbage: '', hunks: [ { oldStart: 1, oldLines: 3, @@ -210,6 +219,7 @@ Index: test2 oldHeader: 'header1', newFileName: 'to', newHeader: 'header2', + leadingGarbage: '', hunks: [ { oldStart: 1, oldLines: 3, @@ -225,7 +235,7 @@ Index: test2 }]); }); - it('should parse patches with filenames having quotes and back slashes', function() { + it('should parse patches with filenames having quotes and back slashes', function() { expect(parsePatch( `Index: test =================================================================== @@ -242,6 +252,7 @@ Index: test2 oldHeader: 'header1', newFileName: 'to\\a\\file\\with\\quotes\\and\\backslash', newHeader: 'header2', + leadingGarbage: 'Index: test\n===================================================================', hunks: [ { oldStart: 1, oldLines: 3, @@ -263,6 +274,7 @@ Index: test2 -line5 \\ No newline at end of file`)) .to.eql([{ + leadingGarbage: '', hunks: [ { oldStart: 1, oldLines: 1, @@ -275,12 +287,14 @@ Index: test2 ] }]); }); + it('should note removed EOFNL', function() { expect(parsePatch( `@@ -0,0 +1 @@ +line5 \\ No newline at end of file`)) .to.eql([{ + leadingGarbage: '', hunks: [ { oldStart: 1, oldLines: 0, @@ -293,6 +307,7 @@ Index: test2 ] }]); }); + it('should ignore context no EOFNL', function() { expect(parsePatch( `@@ -1 +1,2 @@ @@ -300,6 +315,7 @@ Index: test2 line5 \\ No newline at end of file`)) .to.eql([{ + leadingGarbage: '', hunks: [ { oldStart: 1, oldLines: 1, @@ -332,6 +348,7 @@ Index: test2 index: 'foo' }]); }); + it('should throw on invalid input', function() { expect(function() { parsePatch('Index: foo\n+++ bar\nblah'); @@ -374,6 +391,7 @@ Index: test2 oldHeader: '2023-12-20 16:11:20.908225554 +0000', newFileName: 'bar', newHeader: '2023-12-20 16:11:34.391473579 +0000', + leadingGarbage: '', hunks: [ { oldStart: 1, @@ -406,6 +424,7 @@ Index: test2 newFileName: 'foo', newHeader: '', index: 'foo', + leadingGarbage: 'Index: foo\n===================================================================', hunks: [ { oldStart: 1, @@ -426,12 +445,89 @@ Index: test2 ]); }); + it('should preserve leading garbage unless discardGarbage: true is specified', () => { + const patchStr = `diff --git a/bar b/bar +index dccca17..5b1bf3f 100644 +--- a/bar ++++ b/bar +@@ -2,3 +2,5 @@ wiggly + wobbly + squiggly + squabbly ++flippety ++floppety +diff --git a/foo b/foo +index 7a4a73a..38d82a3 100644 +--- a/foo ++++ b/foo +@@ -1,4 +1,4 @@ + first line + second line +-third line ++some other line + fourth line +`; + const expectedResult = [ + { + oldFileName: 'a/bar', + oldHeader: '', + newFileName: 'b/bar', + newHeader: '', + leadingGarbage: 'diff --git a/bar b/bar\nindex dccca17..5b1bf3f 100644', + hunks: [ + { + oldStart: 2, + oldLines: 3, + newStart: 2, + newLines: 5, + lines: [ + ' wobbly', + ' squiggly', + ' squabbly', + '+flippety', + '+floppety' + ] + } + ] + }, + { + oldFileName: 'a/foo', + oldHeader: '', + newFileName: 'b/foo', + newHeader: '', + leadingGarbage: 'diff --git a/foo b/foo\nindex 7a4a73a..38d82a3 100644', + hunks: [ + { + oldStart: 1, + oldLines: 4, + newStart: 1, + newLines: 4, + lines: [ + ' first line', + ' second line', + '-third line', + '+some other line', + ' fourth line' + ] + } + ] + } + ]; + + expect(parsePatch(patchStr)).to.eql(expectedResult); + + for (const file of expectedResult) { + delete file.leadingGarbage; + } + expect(parsePatch(patchStr, {discardGarbage: true})).to.eql(expectedResult); + }); + it('should tolerate patches with extra trailing newlines after hunks', () => { // Regression test for https://github.com/kpdecker/jsdiff/issues/524 // Not only are these considered valid by GNU patch, but jsdiff's own formatPatch method // emits patches like this, which jsdiff used to then be unable to parse! - const patchStr = `--- foo 2024-06-14 22:16:31.444276792 +0100 -+++ bar 2024-06-14 22:17:14.910611219 +0100 + const patchStr = `--- foo\t2024-06-14 22:16:31.444276792 +0100 ++++ bar\t2024-06-14 22:17:14.910611219 +0100 @@ -1,7 +1,7 @@ first second @@ -449,6 +545,7 @@ Index: test2 oldHeader: '2024-06-14 22:16:31.444276792 +0100', newFileName: 'bar', newHeader: '2024-06-14 22:17:14.910611219 +0100', + leadingGarbage: '', hunks: [ { oldStart: 1, @@ -503,8 +600,8 @@ Index: test2 // determine where a hunk or file ends in a unified diff patch without heeding those line // counts. - const patchStr = `--- foo 2024-06-14 21:57:04.341065736 +0100 -+++ bar 2024-06-14 22:00:57.988080321 +0100 + const patchStr = `--- foo\t2024-06-14 21:57:04.341065736 +0100 ++++ bar\t2024-06-14 22:00:57.988080321 +0100 @@ -4 +4 @@ --- bla +++ bla @@ -513,15 +610,120 @@ Index: test2 `; expect(parsePatch(patchStr)).to.eql([{ - oldFileName: 'foo 2024-06-14 21:57:04.341065736 +0100', - oldHeader: '', - newFileName: 'bar 2024-06-14 22:00:57.988080321 +0100', - newHeader: '', + oldFileName: 'foo', + oldHeader: '2024-06-14 21:57:04.341065736 +0100', + newFileName: 'bar', + newHeader: '2024-06-14 22:00:57.988080321 +0100', + leadingGarbage: '', hunks: [ { oldStart: 4, oldLines: 1, newStart: 4, newLines: 1, lines: ['--- bla', '+++ bla'] }, { oldStart: 7, oldLines: 0, newStart: 7, newLines: 1, lines: ['+seventh'] } ] }]); }); + + it('should emit an error if a hunk contains an invalid line', () => { + // Within a hunk, every line must either start with '+' (insertion), '-' (deletion), + // ' ' (context line, i.e. not deleted nor inserted) or a backslash (for + // '\\ No newline at end of file' lines). Seeing anything else before the end of the hunk is + // an error. + + const patchStr = `Index: test +=================================================================== +--- from\theader1 ++++ to\theader2 +@@ -1,3 +1,4 @@ + line2 +line3 ++line4 + line5`; + + // eslint-disable-next-line dot-notation + expect(() => {parsePatch(patchStr);}).to.throw('Hunk at line 5 contained invalid line line3'); + }); + + it("shouldn't choke on a diffx diff", () => { + // Taken from https://diffx.org/_/downloads/en/latest/pdf/. + // diffx is little-used and probably unimportant in and of itself but it's a good test of + // unusual but realistic leading garbage. + const patchStr = `#diffx: encoding=utf-8, version=1.0 +#.change: +#..preamble: indent=4, length=319, mimetype=text/markdown + Convert legacy header building code to Python 3. + Header building for messages used old Python 2.6-era list comprehensions + with tuples rather than modern dictionary comprehensions in order to build + a message list. This change modernizes that, and swaps out six for a + 3-friendly \`.items()\` call. +#..meta: format=json, length=270 +{ + "author": "Christian Hammond ", + "committer": "Christian Hammond ", + "committer date": "2021-06-02T13:12:06-07:00", + "date": "2021-06-01T19:26:31-07:00", + "id": "a25e7b28af5e3184946068f432122c68c1a30b23" +} +#..file: +#...meta: format=json, length=176 +{ + "path": "/src/message.py", + "revision": { + "new": "f814cf74766ba3e6d175254996072233ca18a690", + "old": "9f6a412b3aee0a55808928b43f848202b4ee0f8d" + } +} +#...diff: length=629 +--- /src/message.py ++++ /src/message.py +@@ -164,10 +164,10 @@ + not isinstance(headers, MultiValueDict)): + # Instantiating a MultiValueDict from a dict does not ensure that + # values are lists, so we have to ensure that ourselves. +- headers = MultiValueDict(dict( +- (key, [value]) +- for key, value in six.iteritems(headers) +- )) ++ headers = MultiValueDict({ ++ key: [value] ++ for key, value in headers.items() ++ }) + + if in_reply_to: + headers['In-Reply-To'] = in_reply_to +`; + + expect(parsePatch(patchStr)).to.eql([ + { + leadingGarbage: '#diffx: encoding=utf-8, version=1.0\n#.change:\n#..preamble: indent=4, length=319, mimetype=text/markdown\n Convert legacy header building code to Python 3.\n Header building for messages used old Python 2.6-era list comprehensions\n with tuples rather than modern dictionary comprehensions in order to build\n a message list. This change modernizes that, and swaps out six for a\n 3-friendly `.items()` call.\n#..meta: format=json, length=270\n{\n \"author\": \"Christian Hammond \",\n \"committer\": \"Christian Hammond \",\n \"committer date\": \"2021-06-02T13:12:06-07:00\",\n \"date\": \"2021-06-01T19:26:31-07:00\",\n \"id\": \"a25e7b28af5e3184946068f432122c68c1a30b23\"\n}\n#..file:\n#...meta: format=json, length=176\n{\n \"path\": \"/src/message.py\",\n \"revision\": {\n \"new\": \"f814cf74766ba3e6d175254996072233ca18a690\",\n \"old\": \"9f6a412b3aee0a55808928b43f848202b4ee0f8d\"\n }\n}\n#...diff: length=629', + oldFileName: '/src/message.py', + oldHeader: '', + newFileName: '/src/message.py', + newHeader: '', + hunks: [ + { + oldStart: 164, + oldLines: 10, + newStart: 164, + newLines: 10, + lines: [ + ' not isinstance(headers, MultiValueDict)):', + ' # Instantiating a MultiValueDict from a dict does not ensure that', + ' # values are lists, so we have to ensure that ourselves.', + '- headers = MultiValueDict(dict(', + '- (key, [value])', + '- for key, value in six.iteritems(headers)', + '- ))', + '+ headers = MultiValueDict({', + '+ key: [value]', + '+ for key, value in headers.items()', + '+ })', + '', + ' if in_reply_to:', + " headers['In-Reply-To'] = in_reply_to" + ] + } + ] + } + ]); + }); }); }); diff --git a/test/patch/reverse.js b/test/patch/reverse.js index 1781aa1d..c9b64c7b 100644 --- a/test/patch/reverse.js +++ b/test/patch/reverse.js @@ -61,7 +61,8 @@ describe('patch/reverse', function() { '+bar\n' ); expect(formatPatch(reversePatch(patch))).to.equal( - '===================================================================\n' + + 'diff --git a/README.md b/README.md\n' + + 'index 06eebfa..40919a6 100644\n' + '--- b/README.md\t\n' + '+++ a/README.md\t\n' + '@@ -1,7 +1,5 @@\n' + @@ -79,7 +80,8 @@ describe('patch/reverse', function() { '-\n' + '-bar\n' + '\n' + - '===================================================================\n' + + 'diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md\n' + + 'index 20b807a..4a96aff 100644\n' + '--- b/CONTRIBUTING.md\t\n' + '+++ a/CONTRIBUTING.md\t\n' + '@@ -2,8 +2,6 @@\n' +