From aaf375107156b98e66c9eac4b96115e75099a082 Mon Sep 17 00:00:00 2001 From: whitty Date: Mon, 8 Dec 2025 12:31:53 +1100 Subject: [PATCH 1/5] Refactor parse_samples/parse_wild_samples to reduce duplication and add known failures (#10) * add another sample * parse_samples: parameterize test code prior to adding failing test * tests: add wild-samples-fail directory for work in progress with merge-conflict example * tests: add wild-samples of submodule diffs --------- Co-authored-by: Wolf Vollprecht --- tests/parse_samples.rs | 93 ++++++++++---------- tests/wild-samples-fail/.gitattributes | 2 + tests/wild-samples-fail/merge-conflict.patch | 74 ++++++++++++++++ tests/wild-samples-fail/submodule-log.patch | 5 ++ tests/wild-samples/sample2.patch | 53 +++++++++++ tests/wild-samples/submodule-hex.patch | 7 ++ 6 files changed, 189 insertions(+), 45 deletions(-) create mode 100644 tests/wild-samples-fail/.gitattributes create mode 100644 tests/wild-samples-fail/merge-conflict.patch create mode 100644 tests/wild-samples-fail/submodule-log.patch create mode 100644 tests/wild-samples/sample2.patch create mode 100644 tests/wild-samples/submodule-hex.patch diff --git a/tests/parse_samples.rs b/tests/parse_samples.rs index 50e223a..c062dea 100644 --- a/tests/parse_samples.rs +++ b/tests/parse_samples.rs @@ -5,61 +5,64 @@ use pretty_assertions::assert_eq; use gitpatch::Patch; -#[test] -fn parse_samples() { - let samples_path = PathBuf::from(file!()).parent().unwrap().join("samples"); +fn visit_patches(test_dir: &str, extension: &str, f: F) +where + F: Fn(&str, &PathBuf), +{ + let samples_path = PathBuf::from(file!()).parent().unwrap().join(test_dir); for file in fs::read_dir(samples_path).unwrap() { let path = file.unwrap().path(); - if path.extension().unwrap_or_default() != "diff" { + if path.extension().unwrap_or_default() != extension { continue; } - let data = fs::read_to_string(dbg!(&path)).unwrap(); - let patches = Patch::from_multiple(&data) - .unwrap_or_else(|err| panic!("failed to parse {:?}, error: {}", path, err)); - - // Make sure that the patch file we produce parses to the same information as the original - // patch file. - #[allow(clippy::format_collect)] // Display::fmt is the only way to resolve Patch->str - let patch_file: String = patches.iter().map(|patch| format!("{}\n", patch)).collect(); - println!("{}", patch_file); - let patches2 = Patch::from_multiple(&patch_file).unwrap_or_else(|err| { - panic!( - "failed to re-parse {:?} after formatting, error: {}", - path, err - ) - }); - assert_eq!(patches, patches2); + f(fs::read_to_string(dbg!(&path)).unwrap().as_str(), &path); } } +// Make sure that the patch file we produce parses to the same information as the original +// patch file. +fn verify_patch_roundtrip(data: &str, path: &PathBuf) { + let patches = Patch::from_multiple(data) + .unwrap_or_else(|err| panic!("failed to parse {:?}, error: {}", path, err)); + + #[allow(clippy::format_collect)] // Display::fmt is the only way to resolve Patch->str + let patch_file: String = patches.iter().map(|patch| format!("{}\n", patch)).collect(); + println!("{}", patch_file); + let patches2 = Patch::from_multiple(&patch_file).unwrap_or_else(|err| { + panic!( + "failed to re-parse {:?} after formatting, error: {}", + path, err + ) + }); + assert_eq!(patches, patches2); +} + #[test] -fn parse_wild_samples() { - let samples_path = PathBuf::from(file!()) - .parent() - .unwrap() - .join("wild-samples"); - for file in fs::read_dir(samples_path).unwrap() { - let path = file.unwrap().path(); - if path.extension().unwrap_or_default() != "patch" { - continue; - } +fn parse_samples() { + visit_patches("samples", "diff", verify_patch_roundtrip); +} - let data = fs::read_to_string(dbg!(&path)).unwrap(); - let patches = Patch::from_multiple(&data) - .unwrap_or_else(|err| panic!("failed to parse {:?}, error: {}", path, err)); +#[test] +fn parse_wild_samples() { + visit_patches("wild-samples", "patch", verify_patch_roundtrip); +} - // Make sure that the patch file we produce parses to the same information as the original - // patch file. - #[allow(clippy::format_collect)] // Display::fmt is the only way to resolve Patch->str - let patch_file: String = patches.iter().map(|patch| format!("{}\n", patch)).collect(); +fn expect_parse_failure(data: &str, path: &PathBuf) { + println!("Expecting failure reading {:?}", path); + assert!( + Patch::from_multiple(data).is_err(), + "Expected failure parsing {:?}. Has this been fixed? \ + Consider moving it to wild-samples for full regression test", + path + ); +} - let patches2 = Patch::from_multiple(&patch_file).unwrap_or_else(|err| { - panic!( - "failed to re-parse {:?} after formatting, error: {}", - path, err - ) - }); - assert_eq!(patches, patches2); - } +// We have a zoo of known failing examples in wild-samples-fail. +// +// If we fix the underlying problem, this test will start to FAIL and patches +// should be migrated out of the failing list to the wild-samples collection. +#[test] +fn parse_failing_wild_samples() { + visit_patches("wild-samples-fail", "patch", expect_parse_failure); } diff --git a/tests/wild-samples-fail/.gitattributes b/tests/wild-samples-fail/.gitattributes new file mode 100644 index 0000000..3e07e53 --- /dev/null +++ b/tests/wild-samples-fail/.gitattributes @@ -0,0 +1,2 @@ +# Disable text processing on these wild sample - eg preserve their wild CRLF format +*.patch -text diff --git a/tests/wild-samples-fail/merge-conflict.patch b/tests/wild-samples-fail/merge-conflict.patch new file mode 100644 index 0000000..15726e0 --- /dev/null +++ b/tests/wild-samples-fail/merge-conflict.patch @@ -0,0 +1,74 @@ +diff --cc src/parser.rs +index cfc5da4,ac7ada2..0000000 +--- a/src/parser.rs ++++ b/src/parser.rs +@@@ -104,15 -95,13 +104,22 @@@ fn multiple_patches(input: Input) -> IR + many1(patch)(input) + } + +++<<<<<<< HEAD + +fn patch(input: Input) -> IResult { + + if let Ok(patch) = binary_files_differ(input) { + + return Ok(patch); + + } + + if let Ok(patch) = file_rename_only(input) { + + return Ok(patch); + + } +++======= ++ fn patch(input: Input<'_>) -> IResult, Patch> { ++ if let Ok(patch) = binary_files_differ(input) { ++ return Ok(patch); ++ } +++>>>>>>> 0b90edb (Handle "Binary files differ" lines) + let (input, files) = headers(input)?; + let (input, hunks) = chunks(input)?; + - let (input, no_newline_indicator) = no_newline_indicator(input)?; + // Ignore trailing empty lines produced by some diff programs + let (input, _) = many0(line_ending)(input)?; + +@@@ -200,10 -117,43 +207,43 @@@ fn file_rename_only(input: Input<'_>) - + )) + } + ++ /// Recognize a "binary files XX and YY differ" line as an empty patch. ++ fn binary_files_differ(input: Input<'_>) -> IResult, Patch> { ++ // let (input, _) = context("Binary file line", tag("Binary files "))(input)?; ++ // The names aren't quoted so this seems to require lookahead and then ++ // parsing the identified string. ++ let (input, (old, new)) = context( ++ "Binary file line", ++ delimited( ++ tag("Binary files "), ++ map_opt(take_until(" differ\n"), |names: Input<'_>| { ++ names.split_once(" and ") ++ }), ++ pair(tag(" differ"), line_ending), ++ ), ++ )(input)?; ++ dbg!(&old, &new); ++ Ok(( ++ input, ++ Patch { ++ old: File { ++ path: Cow::Borrowed(old), ++ meta: None, ++ }, ++ new: File { ++ path: Cow::Borrowed(new), ++ meta: None, ++ }, ++ hunks: Vec::new(), ++ end_newline: false, ++ }, ++ )) ++ } ++ + // Header lines + -fn headers(input: Input<'_>) -> IResult, (File, File)> { + +fn headers(input: Input) -> IResult { + // Ignore any preamble lines in produced diffs + - let (input, _) = take_until("---")(input)?; + + let (input, _) = take_until("--- ")(input)?; + let (input, _) = tag("--- ")(input)?; + let (input, oldfile) = header_line_content(input)?; + let (input, _) = line_ending(input)?; diff --git a/tests/wild-samples-fail/submodule-log.patch b/tests/wild-samples-fail/submodule-log.patch new file mode 100644 index 0000000..c2d2464 --- /dev/null +++ b/tests/wild-samples-fail/submodule-log.patch @@ -0,0 +1,5 @@ +Submodule submodule b0007f5..212f9bb: + < update README.md with Known Issues section + > add --verbose + > add --cached + > main: return error if we didn't pop diff --git a/tests/wild-samples/sample2.patch b/tests/wild-samples/sample2.patch new file mode 100644 index 0000000..857997d --- /dev/null +++ b/tests/wild-samples/sample2.patch @@ -0,0 +1,53 @@ +# HG changeset patch +# Parent 13ba6cbdb304cd251fbc22466cadb21019ee817f +# User Bill McCloskey + +diff --git a/content/base/src/nsContentUtils.cpp b/content/base/src/nsContentUtils.cpp +--- a/content/base/src/nsContentUtils.cpp ++++ b/content/base/src/nsContentUtils.cpp +@@ -6369,17 +6369,17 @@ public: + nsCycleCollectionParticipant* helper) + { + } + + NS_IMETHOD_(void) NoteNextEdgeName(const char* name) + { + } + +- NS_IMETHOD_(void) NoteWeakMapping(void* map, void* key, void* val) ++ NS_IMETHOD_(void) NoteWeakMapping(void* map, void* key, void* kdelegate, void* val) + { + } + + bool mFound; + + private: + void* mWrapper; + }; +diff --git a/js/src/jsfriendapi.cpp b/js/src/jsfriendapi.cpp +--- a/js/src/jsfriendapi.cpp ++++ b/js/src/jsfriendapi.cpp +@@ -527,16 +527,24 @@ js::VisitGrayWrapperTargets(JSCompartmen + { + for (WrapperMap::Enum e(comp->crossCompartmentWrappers); !e.empty(); e.popFront()) { + gc::Cell *thing = e.front().key.wrapped; + if (thing->isMarked(gc::GRAY)) + callback(closure, thing); + } + } + ++JS_FRIEND_API(JSObject *) ++js::GetWeakmapKeyDelegate(JSObject *key) ++{ ++ if (JSWeakmapKeyDelegateOp op = key->getClass()->ext.weakmapKeyDelegateOp) ++ return op(key); ++ return NULL; ++} ++ + JS_FRIEND_API(void) + JS_SetAccumulateTelemetryCallback(JSRuntime *rt, JSAccumulateTelemetryDataCallback callback) + { + rt->telemetryCallback = callback; + } + + JS_FRIEND_API(JSObject *) diff --git a/tests/wild-samples/submodule-hex.patch b/tests/wild-samples/submodule-hex.patch new file mode 100644 index 0000000..f14a66d --- /dev/null +++ b/tests/wild-samples/submodule-hex.patch @@ -0,0 +1,7 @@ +diff --git a/submodule b/submodule +index 744c3350..e1e93dd3 160000 +--- a/submodule ++++ b/submodule +@@ -1 +1 @@ +-Subproject commit 744c3350dd3ca95ac6ffadf294112d3b85f535d2 ++Subproject commit e1e93dd3218a9ea2ac6300e61c09f0b282e7b5bc From 8ef098f6c33d0960fc09d871fdc9363079f5a1bc Mon Sep 17 00:00:00 2001 From: Martin Pool Date: Mon, 8 Dec 2025 08:17:33 -0800 Subject: [PATCH 2/5] Don't error on empty input (#14) * clippy: remove useless_vec This currently breaks CI in e.g. https://github.com/gitpatch-rs/gitpatch/actions/runs/20013661647/job/57387162629#step:5:70 * Don't error on empty input Fixes #13 * Simplify the check that multiple_patches consumes the whole file --- CHANGELOG.md | 4 ++++ src/ast.rs | 6 ++++-- src/parser.rs | 24 ++++++++++++------------ tests/samples/empty.diff | 0 4 files changed, 20 insertions(+), 14 deletions(-) create mode 100644 tests/samples/empty.diff diff --git a/CHANGELOG.md b/CHANGELOG.md index 537c263..a43dde0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,10 @@ # CHANGELOG ## [Unreleased] +### Breaking + +- `Patch::from_multiple` no longer returns an error on an input that contains no patches, including an empty string. It instead returns an empty vector. + ### Changed ## [v0.7] diff --git a/src/ast.rs b/src/ast.rs index d923f10..332b856 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -81,7 +81,9 @@ impl<'a> Patch<'a> { } /// Attempt to parse as many patches as possible from the given string. This is useful for when - /// you have a complete diff of many files. String must contain at least one patch. + /// you have a complete diff of many files. + /// + /// It is not an error if the string contains no patches: this returns an empty vector. /// /// # Example /// @@ -299,7 +301,7 @@ mod tests { range_hint: "", lines: vec![], }; - for (input, expected) in vec![ + for (input, expected) in [ ("", None), (" ", None), (" ", None), diff --git a/src/parser.rs b/src/parser.rs index 487f3e2..dd1b2c9 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2,13 +2,12 @@ use std::borrow::Cow; use std::error::Error; use chrono::DateTime; -use nom::combinator::verify; use nom::*; use nom::{ branch::alt, bytes::complete::{is_not, tag, take_until}, character::complete::{char, digit1, line_ending, none_of, not_line_ending, one_of}, - combinator::{map, map_opt, not, opt}, + combinator::{all_consuming, map, map_opt, not, opt, verify}, error::context, multi::{many0, many1}, sequence::{delimited, preceded, terminated, tuple}, @@ -82,20 +81,15 @@ pub(crate) fn parse_single_patch(s: &str) -> Result, ParseError<'_>> { pub(crate) fn parse_multiple_patches(s: &str) -> Result>, ParseError<'_>> { let (remaining_input, patches) = multiple_patches(Input::new(s))?; - // Parser should return an error instead of producing remaining input - if !remaining_input.fragment().is_empty() { - return Err(ParseError { - line: remaining_input.location_line(), - offset: remaining_input.location_offset(), - fragment: remaining_input.fragment(), - kind: nom::error::ErrorKind::Eof, - }); - } + debug_assert!( + remaining_input.fragment().is_empty(), + "all_consuming should not have left over input" + ); Ok(patches) } fn multiple_patches(input: Input) -> IResult> { - many1(patch)(input) + all_consuming(many0(patch))(input) } fn patch(input: Input) -> IResult { @@ -424,6 +418,12 @@ mod tests { Ok(()) } + #[test] + fn test_empty_input() -> ParseResult<'static, ()> { + test_parser!(multiple_patches("") -> @("", Vec::new())); + Ok(()) + } + #[test] fn test_filename() -> ParseResult<'static, ()> { // bare diff --git a/tests/samples/empty.diff b/tests/samples/empty.diff new file mode 100644 index 0000000..e69de29 From 57eb5acca946a0d2c3830c0bb9f6e518edda9c1f Mon Sep 17 00:00:00 2001 From: Martin Pool Date: Wed, 10 Dec 2025 06:59:00 -0800 Subject: [PATCH 3/5] clippy: remove useless_vec (#16) This currently breaks CI in e.g. https://github.com/gitpatch-rs/gitpatch/actions/runs/20013661647/job/57387162629#step:5:70 From ea05b24892ef5a2c03a4fb7ad6b78952b1b6f4ae Mon Sep 17 00:00:00 2001 From: Martin Pool Date: Wed, 10 Dec 2025 07:36:08 -0800 Subject: [PATCH 4/5] Modernize GitHub actions and declare MSRV (#17) * Update to actions/checkout@v6 * Use dtolnay/rust-toolchain instead of deprecated actions-rs * Fold style checks into the same workers * Remove use of deprecated actions-rs * Just build once, including the examples * Also test on nightly * Edition 2021: Supported by every toolchain that we support, and doesn't actually require any changes to the codebase. * Declare MSRV 1.71, and test it: Seems to be the earliest version that actually builds. --- .github/workflows/checks.yml | 56 +++++++++--------------------------- Cargo.toml | 3 +- 2 files changed, 16 insertions(+), 43 deletions(-) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 7a52634..e43be4a 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -2,9 +2,9 @@ name: checks on: push: - branches: [ main ] + branches: [main] pull_request: - branches: [ main ] + branches: [main] # allow manual trigger workflow_dispatch: @@ -20,51 +20,23 @@ jobs: matrix: os: [ubuntu-latest, macos-latest, windows-latest] rust: + - 1.71 # MSRV from Cargo.toml - 1.84.1 # pre edition2024 - 1.87 - stable + - nightly steps: - - uses: actions/checkout@v2 - - - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: ${{matrix.rust}} - override: true - - - name: Build lib - run: cargo build --verbose - - name: Run tests - run: cargo test --verbose - - name: Build examples - run: cargo build --example '*' - - style: - name: Style checks for ${{matrix.rust}} on ${{matrix.os}} - runs-on: ${{matrix.os}} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, macos-latest, windows-latest] - rust: - - 1.84.1 # pre edition2024 - - 1.87 # clippy became more picky after this - - stable - steps: - - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 + - uses: actions/checkout@v6 + - uses: dtolnay/rust-toolchain@v1 with: - profile: minimal - toolchain: ${{matrix.rust}} - override: true components: rustfmt, clippy + toolchain: ${{matrix.rust}} + - name: Build lib + run: cargo build --verbose --all-targets --all-features + - name: Run tests + run: cargo test --verbose - name: fmt - uses: actions-rs/cargo@v1 - with: - command: fmt - args: --all -- --check + run: cargo fmt --all -- --check - name: clippy - uses: actions-rs/cargo@v1 - with: - command: clippy - args: --all-targets --all-features -- -D warnings + if: ${{ matrix.rust == 'stable' }} # to avoid churn about lints being unknown etc + run: cargo clippy --all-targets --all-features -- -D warnings diff --git a/Cargo.toml b/Cargo.toml index f1d99c1..33361c9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,8 @@ repository = "https://github.com/gitpatch-rs/gitpatch" readme = "README.md" keywords = ["patch", "diff", "parse", "nom"] license = "MIT" -edition = "2018" +edition = "2021" +rust-version = "1.71" # matches .github/workflows/checks.yml [dependencies] nom = "7.1.0" From 5131c5df98c185b78cc1502e0d294c5901ff7e75 Mon Sep 17 00:00:00 2001 From: romamik Date: Sun, 11 Jan 2026 22:22:38 +0100 Subject: [PATCH 5/5] Better handling of "No newline at end of file" marker (#5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #4 Co-authored-by: Vinícius Miguel <36349314+vrmiguel@users.noreply.github.com> Co-authored-by: vrmiguel --- CHANGELOG.md | 3 + examples/apply.rs | 7 +- src/ast.rs | 55 ++++++++---- src/parser.rs | 193 ++++++++++++++++++++++++++++++++----------- tests/parse_patch.rs | 107 +++++++++++++++++------- tests/regressions.rs | 3 +- 6 files changed, 272 insertions(+), 96 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a43dde0..79cb37e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,9 @@ - `Patch::from_multiple` no longer returns an error on an input that contains no patches, including an empty string. It instead returns an empty vector. +### Fixed +- Issue #4: Fixed parsing of “No newline at end of file” markers so they are recognized even when not the final line of a hunk. + ### Changed ## [v0.7] diff --git a/examples/apply.rs b/examples/apply.rs index 150365e..d3811e6 100644 --- a/examples/apply.rs +++ b/examples/apply.rs @@ -14,11 +14,16 @@ fn apply(diff: Patch, old: &str) -> String { old_line += hunk.old_range.count; for line in hunk.lines { match line { - Line::Add(s) | Line::Context(s) => out.push(s), + Line::Add(s) | Line::Context(s) => { + out.push(s); + } Line::Remove(_) => {} } } } + if !diff.new_missing_newline { + out.push(""); + } out.join("\n") } diff --git a/src/ast.rs b/src/ast.rs index 332b856..ae036c2 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -14,11 +14,10 @@ pub struct Patch<'a> { pub new: File<'a>, /// hunks of differences; each hunk shows one area where the files differ pub hunks: Vec>, - /// true if the last line of the file ends in a newline character - /// - /// This will only be false if at the end of the patch we encounter the text: - /// `\ No newline at end of file` - pub end_newline: bool, + /// If there was a `No newline at end of file` indicator after the last line of the old version of the file + pub old_missing_newline: bool, + /// If there was a `No newline at end of file` indicator after the last line of the new version of the file + pub new_missing_newline: bool, } impl fmt::Display for Patch<'_> { @@ -28,11 +27,13 @@ impl fmt::Display for Patch<'_> { write!(f, "--- {}", self.old)?; write!(f, "\n+++ {}", self.new)?; - for hunk in &self.hunks { - write!(f, "\n{}", hunk)?; - } - if !self.end_newline { - write!(f, "\n\\ No newline at end of file")?; + for (i, hunk) in self.hunks.iter().enumerate() { + writeln!(f)?; + if i == self.hunks.len() - 1 { + hunk.fmt(f, self.old_missing_newline, self.new_missing_newline)?; + } else { + hunk.fmt(f, false, false)?; + } } Ok(()) } @@ -72,7 +73,8 @@ impl<'a> Patch<'a> { /// let patch = Patch::from_single(sample)?; /// assert_eq!(&patch.old.path, "lao"); /// assert_eq!(&patch.new.path, "tzu"); - /// assert_eq!(patch.end_newline, false); + /// assert_eq!(patch.old_missing_newline, false); + /// assert_eq!(patch.new_missing_newline, true); /// # Ok(()) /// # } /// ``` @@ -234,18 +236,41 @@ impl Hunk<'_> { Some(h) } } -} -impl fmt::Display for Hunk<'_> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt( + &self, + f: &mut fmt::Formatter, + old_missing_newline: bool, + new_missing_newline: bool, + ) -> fmt::Result { write!( f, "@@ -{} +{} @@{}", self.old_range, self.new_range, self.range_hint )?; - for line in &self.lines { + // compute line indices to put "No newline at end of file" indicator after + let last_old_idx = old_missing_newline + .then(|| { + self.lines + .iter() + .rposition(|l| matches!(l, Line::Remove(_) | Line::Context(_))) + }) + .flatten(); + let last_new_idx = new_missing_newline + .then(|| { + self.lines + .iter() + .rposition(|l| matches!(l, Line::Add(_) | Line::Context(_))) + }) + .flatten(); + + for (i, line) in self.lines.iter().enumerate() { write!(f, "\n{}", line)?; + if Some(i) == last_old_idx || Some(i) == last_new_idx { + writeln!(f)?; + write!(f, "\\ No newline at end of file")?; + } } Ok(()) diff --git a/src/parser.rs b/src/parser.rs index dd1b2c9..2ca42c7 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -62,9 +62,15 @@ impl Error for ParseError<'_> { } } -fn consume_content_line(input: Input<'_>) -> IResult, &str> { +fn consume_content_line(input: Input<'_>) -> IResult, (&str, bool)> { let (input, raw) = terminated(not_line_ending, line_ending)(input)?; - Ok((input, raw.fragment())) + + let (input, missing_newline) = opt(terminated( + tag("\\ No newline at end of file"), + opt(line_ending), + ))(input)?; + + Ok((input, (raw.fragment(), missing_newline.is_some()))) } pub(crate) fn parse_single_patch(s: &str) -> Result, ParseError<'_>> { @@ -100,8 +106,14 @@ fn patch(input: Input) -> IResult { return Ok(patch); } let (input, files) = headers(input)?; - let (input, hunks) = chunks(input)?; - let (input, no_newline_indicator) = no_newline_indicator(input)?; + let ( + input, + ParsedHunks { + hunks, + old_missing_newline, + new_missing_newline, + }, + ) = chunks(input)?; // Ignore trailing empty lines produced by some diff programs let (input, _) = many0(line_ending)(input)?; @@ -112,7 +124,8 @@ fn patch(input: Input) -> IResult { old, new, hunks, - end_newline: !no_newline_indicator, + old_missing_newline, + new_missing_newline, }, )) } @@ -146,7 +159,8 @@ fn binary_files_differ(input: Input) -> IResult { meta: None, }, hunks: Vec::new(), - end_newline: false, + old_missing_newline: false, + new_missing_newline: false, }, )) } @@ -176,7 +190,8 @@ fn file_rename_only(input: Input<'_>) -> IResult, Patch<'_>> { meta: None, }, hunks: Vec::new(), - end_newline: false, + old_missing_newline: false, + new_missing_newline: false, }, )) } @@ -216,9 +231,29 @@ fn header_line_content(input: Input) -> IResult { )) } +struct ParsedHunks<'a> { + hunks: Vec>, + old_missing_newline: bool, + new_missing_newline: bool, +} + // Hunks of the file differences -fn chunks(input: Input) -> IResult> { - many1(chunk)(input) +fn chunks(input: Input) -> IResult { + let (span, hunks) = many1(chunk)(input)?; + + let (old_missing_newline, new_missing_newline) = hunks + .last() + .map(|hunk| (hunk.old_missing_newline, hunk.new_missing_newline)) + .unwrap_or_default(); + let hunks = hunks.into_iter().map(|hunk| hunk.hunk).collect(); + Ok(( + span, + ParsedHunks { + hunks, + old_missing_newline, + new_missing_newline, + }, + )) } fn is_next_header(input: Input<'_>) -> bool { @@ -229,6 +264,48 @@ fn is_next_header(input: Input<'_>) -> bool { || input.starts_with("@@ ") } +#[derive(Debug, PartialEq)] +struct ParsedHunk<'a> { + hunk: Hunk<'a>, + old_missing_newline: bool, + new_missing_newline: bool, +} + +enum LineOrEmptyLine<'a> { + Line(Line<'a>, bool), + EmptyLine, +} + +impl<'a> LineOrEmptyLine<'a> { + fn is_new(&self) -> bool { + matches!( + self, + Self::EmptyLine | Self::Line(Line::Context(_), _) | Self::Line(Line::Add(_), _) + ) + } + + fn is_old(&self) -> bool { + matches!( + self, + Self::EmptyLine | Self::Line(Line::Context(_), _) | Self::Line(Line::Remove(_), _) + ) + } + + fn missing_new_line(&self) -> bool { + match self { + Self::Line(_, missing_new_line) => *missing_new_line, + Self::EmptyLine => false, + } + } + + fn into_line(self) -> Line<'a> { + match self { + Self::Line(line, _) => line, + Self::EmptyLine => Line::Context(""), + } + } +} + /// Looks for lines starting with + or - or space, but not +++ or ---. Not a foolproof check. /// /// For example, if someone deletes a line that was using the pre-decrement (--) operator or adds a @@ -257,39 +334,67 @@ fn is_next_header(input: Input<'_>) -> bool { ///FIXME: Use the ranges in the chunk header to figure out how many chunk lines to parse. Will need /// to figure out how to count in nom more robustly than many1!(). Maybe using switch!()? ///FIXME: The test_parse_triple_plus_minus_hack test will no longer panic when this is fixed. -fn chunk(input: Input) -> IResult { +fn chunk(input: Input) -> IResult { let (input, ranges) = chunk_header(input)?; // Parse chunk lines, using the range information to guide parsing - let (input, lines) = many0(verify( + let (input, mut lines) = many0(verify( alt(( // Detect added lines map( preceded(tuple((char('+'), not(tag("++ ")))), consume_content_line), - Line::Add, + |(line, missing_new_line)| LineOrEmptyLine::Line(Line::Add(line), missing_new_line), ), // Detect removed lines map( preceded(tuple((char('-'), not(tag("-- ")))), consume_content_line), - Line::Remove, + |(line, missing_new_line)| { + LineOrEmptyLine::Line(Line::Remove(line), missing_new_line) + }, ), // Detect context lines - map(preceded(char(' '), consume_content_line), Line::Context), + map( + preceded(char(' '), consume_content_line), + |(line, missing_new_line)| { + LineOrEmptyLine::Line(Line::Context(line), missing_new_line) + }, + ), // Handle empty lines within the chunk - map(tag("\n"), |_| Line::Context("")), + map(tag("\n"), |_| LineOrEmptyLine::EmptyLine), )), // Stop parsing when we detect the next header or have parsed the expected number of lines |_| !is_next_header(input), ))(input)?; + // remove trailing empty lines + while matches!(lines.last(), Some(LineOrEmptyLine::EmptyLine)) { + lines.pop(); + } + + // was there "missing new line" indicator for any of the lines? + let old_missing_newline = lines + .iter() + .filter(|line| line.is_old()) + .any(|line| line.missing_new_line()); + let new_missing_newline = lines + .iter() + .filter(|line| line.is_new()) + .any(|line| line.missing_new_line()); + + let lines = lines.into_iter().map(LineOrEmptyLine::into_line).collect(); + let (old_range, new_range, range_hint) = ranges; Ok(( input, - Hunk { - old_range, - new_range, - range_hint, - lines, + ParsedHunk { + hunk: Hunk { + old_range, + new_range, + range_hint, + lines, + }, + old_missing_newline, + new_missing_newline, }, )) } @@ -320,17 +425,6 @@ fn u64_digit(input: Input<'_>) -> IResult, u64> { Ok((input, num)) } -// Trailing newline indicator -fn no_newline_indicator(input: Input<'_>) -> IResult, bool> { - map( - opt(terminated( - tag("\\ No newline at end of file"), - opt(line_ending), - )), - |matched| matched.is_some(), - )(input) -} - fn filename(input: Input) -> IResult> { alt((quoted, bare))(input) } @@ -578,21 +672,25 @@ mod tests { Therefore let there always be non-being, so we may see their subtlety, And let there always be being,\n"; - let expected = Hunk { - old_range: Range { start: 1, count: 7 }, - new_range: Range { start: 1, count: 6 }, - range_hint: "", - lines: vec![ - Line::Remove("The Way that can be told of is not the eternal Way;"), - Line::Remove("The name that can be named is not the eternal name."), - Line::Context("The Nameless is the origin of Heaven and Earth;"), - Line::Remove("The Named is the mother of all things."), - Line::Add("The named is the mother of all things."), - Line::Add(""), - Line::Context("Therefore let there always be non-being,"), - Line::Context(" so we may see their subtlety,"), - Line::Context("And let there always be being,"), - ], + let expected = ParsedHunk { + hunk: Hunk { + old_range: Range { start: 1, count: 7 }, + new_range: Range { start: 1, count: 6 }, + range_hint: "", + lines: vec![ + Line::Remove("The Way that can be told of is not the eternal Way;"), + Line::Remove("The name that can be named is not the eternal name."), + Line::Context("The Nameless is the origin of Heaven and Earth;"), + Line::Remove("The Named is the mother of all things."), + Line::Add("The named is the mother of all things."), + Line::Add(""), + Line::Context("Therefore let there always be non-being,"), + Line::Context(" so we may see their subtlety,"), + Line::Context("And let there always be being,"), + ], + }, + old_missing_newline: false, + new_missing_newline: false, }; test_parser!(chunk(sample) -> expected); Ok(()) @@ -666,7 +764,8 @@ mod tests { ], }, ], - end_newline: true, + old_missing_newline: false, + new_missing_newline: false, }; test_parser!(patch(sample) -> expected); diff --git a/tests/parse_patch.rs b/tests/parse_patch.rs index da25ada..d10362d 100644 --- a/tests/parse_patch.rs +++ b/tests/parse_patch.rs @@ -31,7 +31,7 @@ fn test_parse() -> Result<(), ParseError<'static>> { meta: None } ); - assert!(patch.end_newline); + assert!(!patch.old_missing_newline && !patch.new_missing_newline); assert_eq!(format!("{}\n", patch), sample); @@ -41,35 +41,78 @@ fn test_parse() -> Result<(), ParseError<'static>> { #[test] fn test_parse_no_newline_indicator() -> Result<(), ParseError<'static>> { let sample = "\ ---- before.py -+++ after.py -@@ -1,4 +1,4 @@ --bacon --eggs --ham -+python -+eggy -+hamster - guido +--- a/bar.txt ++++ b/bar.txt +@@ -1,3 +1,3 @@ + bar + Bar +-BAR ++BAR +\\ No newline at end of file +--- a/baz.txt ++++ b/baz.txt +@@ -1,3 +1,3 @@ + baz + Baz +-BAZ +\\ No newline at end of file ++ZAB +\\ No newline at end of file +--- a/foo.txt ++++ b/foo.txt +@@ -1,3 +1,3 @@ + foo + Foo +-FOO +\\ No newline at end of file ++FOO +--- a/foobar.txt ++++ b/foobar.txt +@@ -1,3 +1,3 @@ + foobar +-FooBar ++BarFoo + FOOBAR \\ No newline at end of file\n"; - let patch = Patch::from_single(sample)?; - assert_eq!( - patch.old, - File { - path: "before.py".into(), - meta: None - } - ); + let patches = Patch::from_multiple(sample)?; + + assert_eq!(patches.len(), 4); + + assert_eq!(patches[0].old.path, "a/bar.txt"); + assert_eq!(patches[0].new.path, "b/bar.txt"); + assert_eq!(patches[0].hunks.len(), 1); + assert_eq!(patches[0].hunks[0].lines.len(), 4); + assert_eq!(patches[0].old_missing_newline, false); + assert_eq!(patches[0].new_missing_newline, true); + + assert_eq!(patches[1].old.path, "a/baz.txt"); + assert_eq!(patches[1].new.path, "b/baz.txt"); + assert_eq!(patches[1].hunks.len(), 1); + assert_eq!(patches[1].hunks[0].lines.len(), 4); + assert_eq!(patches[1].old_missing_newline, true); + assert_eq!(patches[1].new_missing_newline, true); + + assert_eq!(patches[2].old.path, "a/foo.txt"); + assert_eq!(patches[2].new.path, "b/foo.txt"); + assert_eq!(patches[2].hunks.len(), 1); + assert_eq!(patches[2].hunks[0].lines.len(), 4); + assert_eq!(patches[2].old_missing_newline, true); + assert_eq!(patches[2].new_missing_newline, false); + + assert_eq!(patches[3].old.path, "a/foobar.txt"); + assert_eq!(patches[3].new.path, "b/foobar.txt"); + assert_eq!(patches[3].hunks.len(), 1); + assert_eq!(patches[3].hunks[0].lines.len(), 4); + assert_eq!(patches[3].old_missing_newline, true); + assert_eq!(patches[3].new_missing_newline, true); + assert_eq!( - patch.new, - File { - path: "after.py".into(), - meta: None - } + patches + .iter() + .map(|patch| format!("{}\n", patch)) + .collect::(), + sample ); - assert!(!patch.end_newline); - - assert_eq!(format!("{}\n", patch), sample); Ok(()) } @@ -107,7 +150,7 @@ fn test_parse_timestamps() -> Result<(), ParseError<'static>> { )), } ); - assert!(!patch.end_newline); + assert!(patch.old_missing_newline && patch.new_missing_newline); // to_string() uses Display but adds no trailing newline assert_eq!(patch.to_string(), sample); @@ -147,7 +190,7 @@ fn test_parse_other() -> Result<(), ParseError<'static>> { )), } ); - assert!(patch.end_newline); + assert!(!patch.old_missing_newline && !patch.new_missing_newline); assert_eq!(format!("{}\n", patch), sample); @@ -184,7 +227,7 @@ fn test_parse_escaped() -> Result<(), ParseError<'static>> { )), } ); - assert!(patch.end_newline); + assert!(!patch.old_missing_newline && !patch.new_missing_newline); assert_eq!(format!("{}\n", patch), sample); @@ -226,7 +269,7 @@ fn test_parse_triple_plus_minus() -> Result<(), ParseError<'static>> { meta: None } ); - assert!(patch.end_newline); + assert!(!patch.old_missing_newline && !patch.new_missing_newline); assert_eq!(patch.hunks.len(), 1); assert_eq!(patch.hunks[0].lines.len(), 8); @@ -278,7 +321,7 @@ fn test_parse_triple_plus_minus_hack() { meta: None } ); - assert!(patch.end_newline); + assert!(!patch.old_missing_newline && !patch.new_missing_newline); assert_eq!(patch.hunks.len(), 1); assert_eq!(patch.hunks[0].lines.len(), 8); diff --git a/tests/regressions.rs b/tests/regressions.rs index 04cd59f..f02afb7 100644 --- a/tests/regressions.rs +++ b/tests/regressions.rs @@ -41,7 +41,8 @@ fn crlf_breaks_stuff_17() -> Result<(), ParseError<'static>> { range_hint: "", lines: vec![Line::Context("x")], }], - end_newline: true, + old_missing_newline: false, + new_missing_newline: false } ); Ok(())