From 8d65c2baddac1226cc6d172acf3998b24d47e722 Mon Sep 17 00:00:00 2001 From: Olivier Tilloy Date: Mon, 4 Mar 2024 21:41:27 +0100 Subject: [PATCH 1/4] Implement -t/--expand-tabs option --- Cargo.lock | 7 +++ Cargo.toml | 1 + src/context_diff.rs | 29 +++++++++--- src/ed_diff.rs | 23 +++++---- src/lib.rs | 1 + src/main.rs | 16 +++++-- src/normal_diff.rs | 26 +++++----- src/params.rs | 53 +++++++++++++++++++++ src/unified_diff.rs | 21 +++++++-- src/utils.rs | 112 ++++++++++++++++++++++++++++++++++++++++++++ 10 files changed, 255 insertions(+), 34 deletions(-) create mode 100644 src/utils.rs diff --git a/Cargo.lock b/Cargo.lock index 92ad218..9dc8153 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -83,6 +83,7 @@ dependencies = [ "pretty_assertions", "same-file", "tempfile", + "unicode-width", ] [[package]] @@ -313,6 +314,12 @@ version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +[[package]] +name = "unicode-width" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" + [[package]] name = "wait-timeout" version = "0.2.0" diff --git a/Cargo.toml b/Cargo.toml index 3804a81..7eeb35b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ path = "src/main.rs" [dependencies] diff = "0.1.10" same-file = "1.0.6" +unicode-width = "0.1.11" [dev-dependencies] pretty_assertions = "1" diff --git a/src/context_diff.rs b/src/context_diff.rs index af262a3..408821f 100644 --- a/src/context_diff.rs +++ b/src/context_diff.rs @@ -6,6 +6,8 @@ use std::collections::VecDeque; use std::io::Write; +use crate::utils::do_write_line; + #[derive(Debug, PartialEq)] pub enum DiffLine { Context(Vec), @@ -270,6 +272,7 @@ pub fn diff( actual_filename: &str, context_size: usize, stop_early: bool, + expand_tabs: bool, ) -> Vec { let mut output = format!("*** {expected_filename}\t\n--- {actual_filename}\t\n").into_bytes(); let diff_results = make_diff(expected, actual, context_size, stop_early); @@ -314,17 +317,20 @@ pub fn diff( match line { DiffLine::Context(e) => { write!(output, " ").expect("write to Vec is infallible"); - output.write_all(&e).expect("write to Vec is infallible"); + do_write_line(&mut output, &e, expand_tabs) + .expect("write to Vec is infallible"); writeln!(output).unwrap(); } DiffLine::Change(e) => { write!(output, "! ").expect("write to Vec is infallible"); - output.write_all(&e).expect("write to Vec is infallible"); + do_write_line(&mut output, &e, expand_tabs) + .expect("write to Vec is infallible"); writeln!(output).unwrap(); } DiffLine::Add(e) => { write!(output, "- ").expect("write to Vec is infallible"); - output.write_all(&e).expect("write to Vec is infallible"); + do_write_line(&mut output, &e, expand_tabs) + .expect("write to Vec is infallible"); writeln!(output).unwrap(); } } @@ -341,17 +347,20 @@ pub fn diff( match line { DiffLine::Context(e) => { write!(output, " ").expect("write to Vec is infallible"); - output.write_all(&e).expect("write to Vec is infallible"); + do_write_line(&mut output, &e, expand_tabs) + .expect("write to Vec is infallible"); writeln!(output).unwrap(); } DiffLine::Change(e) => { write!(output, "! ").expect("write to Vec is infallible"); - output.write_all(&e).expect("write to Vec is infallible"); + do_write_line(&mut output, &e, expand_tabs) + .expect("write to Vec is infallible"); writeln!(output).unwrap(); } DiffLine::Add(e) => { write!(output, "+ ").expect("write to Vec is infallible"); - output.write_all(&e).expect("write to Vec is infallible"); + do_write_line(&mut output, &e, expand_tabs) + .expect("write to Vec is infallible"); writeln!(output).unwrap(); } } @@ -424,6 +433,7 @@ mod tests { &format!("{target}/alef"), 2, false, + false, ); File::create(&format!("{target}/ab.diff")) .unwrap() @@ -503,6 +513,7 @@ mod tests { &format!("{target}/alef_"), 2, false, + false, ); File::create(&format!("{target}/ab_.diff")) .unwrap() @@ -585,6 +596,7 @@ mod tests { &format!("{target}/alefx"), 2, false, + false, ); File::create(&format!("{target}/abx.diff")) .unwrap() @@ -670,6 +682,7 @@ mod tests { &format!("{target}/alefr"), 2, false, + false, ); File::create(&format!("{target}/abr.diff")) .unwrap() @@ -715,6 +728,7 @@ mod tests { to_filename, context_size, false, + false, ); let expected_full = [ "*** foo\t", @@ -740,6 +754,7 @@ mod tests { to_filename, context_size, true, + false, ); let expected_brief = ["*** foo\t", "--- bar\t", ""].join("\n"); assert_eq!(diff_brief, expected_brief.as_bytes()); @@ -751,6 +766,7 @@ mod tests { to_filename, context_size, false, + false, ); assert!(nodiff_full.is_empty()); @@ -761,6 +777,7 @@ mod tests { to_filename, context_size, true, + false, ); assert!(nodiff_brief.is_empty()); } diff --git a/src/ed_diff.rs b/src/ed_diff.rs index 7613b22..6d47b9f 100644 --- a/src/ed_diff.rs +++ b/src/ed_diff.rs @@ -5,6 +5,8 @@ use std::io::Write; +use crate::utils::do_write_line; + #[derive(Debug, PartialEq)] struct Mismatch { pub line_number_expected: usize, @@ -107,7 +109,12 @@ fn make_diff(expected: &[u8], actual: &[u8], stop_early: bool) -> Result Result, DiffError> { +pub fn diff( + expected: &[u8], + actual: &[u8], + stop_early: bool, + expand_tabs: bool, +) -> Result, DiffError> { let mut output = Vec::new(); let diff_results = make_diff(expected, actual, stop_early)?; if stop_early && !diff_results.is_empty() { @@ -145,7 +152,7 @@ pub fn diff(expected: &[u8], actual: &[u8], stop_early: bool) -> Result, if actual == b"." { writeln!(&mut output, "..\n.\ns/.//\na").unwrap(); } else { - output.write_all(actual).unwrap(); + do_write_line(&mut output, actual, expand_tabs).unwrap(); writeln!(&mut output).unwrap(); } } @@ -160,7 +167,7 @@ mod tests { use super::*; use pretty_assertions::assert_eq; pub fn diff_w(expected: &[u8], actual: &[u8], filename: &str) -> Result, DiffError> { - let mut output = diff(expected, actual, false)?; + let mut output = diff(expected, actual, false, false)?; writeln!(&mut output, "w {filename}").unwrap(); Ok(output) } @@ -169,7 +176,7 @@ mod tests { fn test_basic() { let from = b"a\n"; let to = b"b\n"; - let diff = diff(from, to, false).unwrap(); + let diff = diff(from, to, false, false).unwrap(); let expected = ["1c", "b", ".", ""].join("\n"); assert_eq!(diff, expected.as_bytes()); } @@ -404,18 +411,18 @@ mod tests { let from = ["a", "b", "c", ""].join("\n"); let to = ["a", "d", "c", ""].join("\n"); - let diff_full = diff(from.as_bytes(), to.as_bytes(), false).unwrap(); + let diff_full = diff(from.as_bytes(), to.as_bytes(), false, false).unwrap(); let expected_full = ["2c", "d", ".", ""].join("\n"); assert_eq!(diff_full, expected_full.as_bytes()); - let diff_brief = diff(from.as_bytes(), to.as_bytes(), true).unwrap(); + let diff_brief = diff(from.as_bytes(), to.as_bytes(), true, false).unwrap(); let expected_brief = "\0".as_bytes(); assert_eq!(diff_brief, expected_brief); - let nodiff_full = diff(from.as_bytes(), from.as_bytes(), false).unwrap(); + let nodiff_full = diff(from.as_bytes(), from.as_bytes(), false, false).unwrap(); assert!(nodiff_full.is_empty()); - let nodiff_brief = diff(from.as_bytes(), from.as_bytes(), true).unwrap(); + let nodiff_brief = diff(from.as_bytes(), from.as_bytes(), true, false).unwrap(); assert!(nodiff_brief.is_empty()); } } diff --git a/src/lib.rs b/src/lib.rs index a78b64d..faf5df2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,7 @@ pub mod context_diff; pub mod ed_diff; pub mod normal_diff; pub mod unified_diff; +pub mod utils; // Re-export the public functions/types you need pub use context_diff::diff as context_diff; diff --git a/src/main.rs b/src/main.rs index 6ff2a0f..f074cb1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -15,6 +15,7 @@ mod ed_diff; mod normal_diff; mod params; mod unified_diff; +mod utils; // Exit codes are documented at // https://www.gnu.org/software/diffutils/manual/html_node/Invoking-diff.html. @@ -30,6 +31,7 @@ fn main() -> ExitCode { format, report_identical_files, brief, + expand_tabs, } = parse_params(opts).unwrap_or_else(|error| { eprintln!("{error}"); exit(2); @@ -65,7 +67,7 @@ fn main() -> ExitCode { }; // run diff let result: Vec = match format { - Format::Normal => normal_diff::diff(&from_content, &to_content, brief), + Format::Normal => normal_diff::diff(&from_content, &to_content, brief, expand_tabs), Format::Unified => unified_diff::diff( &from_content, &from.to_string_lossy(), @@ -73,6 +75,7 @@ fn main() -> ExitCode { &to.to_string_lossy(), context_count, brief, + expand_tabs, ), Format::Context => context_diff::diff( &from_content, @@ -81,11 +84,14 @@ fn main() -> ExitCode { &to.to_string_lossy(), context_count, brief, + expand_tabs, ), - Format::Ed => ed_diff::diff(&from_content, &to_content, brief).unwrap_or_else(|error| { - eprintln!("{error}"); - exit(2); - }), + Format::Ed => { + ed_diff::diff(&from_content, &to_content, brief, expand_tabs).unwrap_or_else(|error| { + eprintln!("{error}"); + exit(2); + }) + } }; if brief && !result.is_empty() { println!( diff --git a/src/normal_diff.rs b/src/normal_diff.rs index aeef145..e25a6c6 100644 --- a/src/normal_diff.rs +++ b/src/normal_diff.rs @@ -5,6 +5,8 @@ use std::io::Write; +use crate::utils::do_write_line; + #[derive(Debug, PartialEq)] struct Mismatch { pub line_number_expected: usize, @@ -114,7 +116,7 @@ fn make_diff(expected: &[u8], actual: &[u8], stop_early: bool) -> Vec } #[must_use] -pub fn diff(expected: &[u8], actual: &[u8], stop_early: bool) -> Vec { +pub fn diff(expected: &[u8], actual: &[u8], stop_early: bool, expand_tabs: bool) -> Vec { // See https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Normal.html // for details on the syntax of the normal format. let mut output = Vec::new(); @@ -188,7 +190,7 @@ pub fn diff(expected: &[u8], actual: &[u8], stop_early: bool) -> Vec { } for expected in &result.expected { write!(&mut output, "< ").unwrap(); - output.write_all(expected).unwrap(); + do_write_line(&mut output, expected, expand_tabs).unwrap(); writeln!(&mut output).unwrap(); } if result.expected_missing_nl { @@ -199,7 +201,7 @@ pub fn diff(expected: &[u8], actual: &[u8], stop_early: bool) -> Vec { } for actual in &result.actual { write!(&mut output, "> ").unwrap(); - output.write_all(actual).unwrap(); + do_write_line(&mut output, actual, expand_tabs).unwrap(); writeln!(&mut output).unwrap(); } if result.actual_missing_nl { @@ -220,7 +222,7 @@ mod tests { a.write_all(b"a\n").unwrap(); let mut b = Vec::new(); b.write_all(b"b\n").unwrap(); - let diff = diff(&a, &b, false); + let diff = diff(&a, &b, false, false); let expected = b"1c1\n< a\n---\n> b\n".to_vec(); assert_eq!(diff, expected); } @@ -273,7 +275,7 @@ mod tests { } // This test diff is intentionally reversed. // We want it to turn the alef into bet. - let diff = diff(&alef, &bet, false); + let diff = diff(&alef, &bet, false, false); File::create(&format!("{target}/ab.diff")) .unwrap() .write_all(&diff) @@ -365,7 +367,7 @@ mod tests { } // This test diff is intentionally reversed. // We want it to turn the alef into bet. - let diff = diff(&alef, &bet, false); + let diff = diff(&alef, &bet, false, false); File::create(&format!("{target}/abn.diff")) .unwrap() .write_all(&diff) @@ -439,7 +441,7 @@ mod tests { } // This test diff is intentionally reversed. // We want it to turn the alef into bet. - let diff = diff(&alef, &bet, false); + let diff = diff(&alef, &bet, false, false); File::create(&format!("{target}/ab_.diff")) .unwrap() .write_all(&diff) @@ -517,7 +519,7 @@ mod tests { } // This test diff is intentionally reversed. // We want it to turn the alef into bet. - let diff = diff(&alef, &bet, false); + let diff = diff(&alef, &bet, false, false); File::create(&format!("{target}/abr.diff")) .unwrap() .write_all(&diff) @@ -552,18 +554,18 @@ mod tests { let from = ["a", "b", "c"].join("\n"); let to = ["a", "d", "c"].join("\n"); - let diff_full = diff(from.as_bytes(), to.as_bytes(), false); + let diff_full = diff(from.as_bytes(), to.as_bytes(), false, false); let expected_full = ["2c2", "< b", "---", "> d", ""].join("\n"); assert_eq!(diff_full, expected_full.as_bytes()); - let diff_brief = diff(from.as_bytes(), to.as_bytes(), true); + let diff_brief = diff(from.as_bytes(), to.as_bytes(), true, false); let expected_brief = "\0".as_bytes(); assert_eq!(diff_brief, expected_brief); - let nodiff_full = diff(from.as_bytes(), from.as_bytes(), false); + let nodiff_full = diff(from.as_bytes(), from.as_bytes(), false, false); assert!(nodiff_full.is_empty()); - let nodiff_brief = diff(from.as_bytes(), from.as_bytes(), true); + let nodiff_brief = diff(from.as_bytes(), from.as_bytes(), true, false); assert!(nodiff_brief.is_empty()); } } diff --git a/src/params.rs b/src/params.rs index 661cd37..a576f3d 100644 --- a/src/params.rs +++ b/src/params.rs @@ -27,6 +27,7 @@ pub struct Params { pub context_count: usize, pub report_identical_files: bool, pub brief: bool, + pub expand_tabs: bool, } pub fn parse_params>(opts: I) -> Result { @@ -42,6 +43,7 @@ pub fn parse_params>(opts: I) -> Result>(opts: I) -> Result>(opts: I) -> Result), @@ -241,6 +243,7 @@ pub fn diff( actual_filename: &str, context_size: usize, stop_early: bool, + expand_tabs: bool, ) -> Vec { let mut output = format!("--- {expected_filename}\t\n+++ {actual_filename}\t\n").into_bytes(); let diff_results = make_diff(expected, actual, context_size, stop_early); @@ -371,17 +374,20 @@ pub fn diff( match line { DiffLine::Expected(e) => { write!(output, "-").expect("write to Vec is infallible"); - output.write_all(&e).expect("write to Vec is infallible"); + do_write_line(&mut output, &e, expand_tabs) + .expect("write to Vec is infallible"); writeln!(output).unwrap(); } DiffLine::Context(c) => { write!(output, " ").expect("write to Vec is infallible"); - output.write_all(&c).expect("write to Vec is infallible"); + do_write_line(&mut output, &c, expand_tabs) + .expect("write to Vec is infallible"); writeln!(output).unwrap(); } DiffLine::Actual(r) => { write!(output, "+",).expect("write to Vec is infallible"); - output.write_all(&r).expect("write to Vec is infallible"); + do_write_line(&mut output, &r, expand_tabs) + .expect("write to Vec is infallible"); writeln!(output).unwrap(); } DiffLine::MissingNL => { @@ -454,6 +460,7 @@ mod tests { &format!("{target}/alef"), 2, false, + false, ); File::create(&format!("{target}/ab.diff")) .unwrap() @@ -568,6 +575,7 @@ mod tests { &format!("{target}/alefn"), 2, false, + false, ); File::create(&format!("{target}/abn.diff")) .unwrap() @@ -662,6 +670,7 @@ mod tests { &format!("{target}/alef_"), 2, false, + false, ); File::create(&format!("{target}/ab_.diff")) .unwrap() @@ -741,6 +750,7 @@ mod tests { &format!("{target}/alefx"), 2, false, + false, ); File::create(&format!("{target}/abx.diff")) .unwrap() @@ -825,6 +835,7 @@ mod tests { &format!("{target}/alefr"), 2, false, + false, ); File::create(&format!("{target}/abr.diff")) .unwrap() @@ -869,6 +880,7 @@ mod tests { to_filename, context_size, false, + false, ); let expected_full = [ "--- foo\t", @@ -890,6 +902,7 @@ mod tests { to_filename, context_size, true, + false, ); let expected_brief = ["--- foo\t", "+++ bar\t", ""].join("\n"); assert_eq!(diff_brief, expected_brief.as_bytes()); @@ -901,6 +914,7 @@ mod tests { to_filename, context_size, false, + false, ); assert!(nodiff_full.is_empty()); @@ -911,6 +925,7 @@ mod tests { to_filename, context_size, true, + false, ); assert!(nodiff_brief.is_empty()); } diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 0000000..1d13682 --- /dev/null +++ b/src/utils.rs @@ -0,0 +1,112 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +use std::io::Write; + +use unicode_width::UnicodeWidthStr; + +/// Replace tabs by spaces in the input line. +/// Correctly handle multi-bytes characters. +/// This assumes that line does not contain any line breaks (if it does, the result is undefined). +pub fn do_expand_tabs(line: &[u8], tabsize: usize) -> Vec { + let tab = b'\t'; + let ntabs = line.iter().filter(|c| **c == tab).count(); + if ntabs == 0 { + return line.to_vec(); + } + let mut result = Vec::with_capacity(line.len() + ntabs * (tabsize - 1)); + let mut offset = 0; + + let mut iter = line.split(|c| *c == tab).peekable(); + while let Some(chunk) = iter.next() { + match String::from_utf8(chunk.to_vec()) { + Ok(s) => offset += UnicodeWidthStr::width(s.as_str()), + Err(_) => offset += chunk.len(), + } + result.extend_from_slice(chunk); + if iter.peek().is_some() { + result.resize(result.len() + tabsize - offset % tabsize, b' '); + offset = 0; + } + } + + result +} + +/// Write a single line to an output stream, expanding tabs to space if necessary. +/// This assumes that line does not contain any line breaks +/// (if it does and tabs are to be expanded to spaces, the result is undefined). +pub fn do_write_line(output: &mut Vec, line: &[u8], expand_tabs: bool) -> std::io::Result<()> { + if expand_tabs { + output.write_all(do_expand_tabs(line, 8).as_slice()) + } else { + output.write_all(line) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + mod expand_tabs { + use super::*; + use pretty_assertions::assert_eq; + + fn assert_tab_expansion(line: &str, tabsize: usize, expected: &str) { + assert_eq!( + do_expand_tabs(line.as_bytes(), tabsize), + expected.as_bytes() + ); + } + + #[test] + fn basics() { + assert_tab_expansion("foo barr baz", 8, "foo barr baz"); + assert_tab_expansion("foo\tbarr\tbaz", 8, "foo barr baz"); + assert_tab_expansion("foo\tbarr\tbaz", 5, "foo barr baz"); + assert_tab_expansion("foo\tbarr\tbaz", 2, "foo barr baz"); + } + + #[test] + fn multibyte_chars() { + assert_tab_expansion("foo\tépée\tbaz", 8, "foo épée baz"); + assert_tab_expansion("foo\t😉\tbaz", 5, "foo 😉 baz"); + + // Note: The Woman Scientist emoji (👩‍🔬) is a ZWJ sequence combining + // the Woman emoji (👩) and the Microscope emoji (🔬). On supported platforms + // it is displayed as a single emoji and should have a print size of 2 columns, + // but terminal emulators tend to not support this, and display the two emojis + // side by side, thus accounting for a print size of 4 columns. + assert_tab_expansion("foo\t👩‍🔬\tbaz", 6, "foo 👩‍🔬 baz"); + } + + #[test] + fn invalid_utf8() { + // [240, 240, 152, 137] is an invalid UTF-8 sequence, so it is handled as 4 bytes + assert_eq!( + do_expand_tabs(&[240, 240, 152, 137, 9, 102, 111, 111], 8), + &[240, 240, 152, 137, 32, 32, 32, 32, 102, 111, 111] + ); + } + } + + mod write_line { + use super::*; + use pretty_assertions::assert_eq; + + fn assert_line_written(line: &str, expand_tabs: bool, expected: &str) { + let mut output: Vec = Vec::new(); + assert!(do_write_line(&mut output, line.as_bytes(), expand_tabs).is_ok()); + assert_eq!(output, expected.as_bytes()); + } + + #[test] + fn basics() { + assert_line_written("foo bar baz", false, "foo bar baz"); + assert_line_written("foo bar\tbaz", false, "foo bar\tbaz"); + assert_line_written("foo bar\tbaz", true, "foo bar baz"); + } + } +} From e0283083f297cd8f9f6e39e6f24cd18ab18e8b02 Mon Sep 17 00:00:00 2001 From: Olivier Tilloy Date: Tue, 5 Mar 2024 18:52:04 +0100 Subject: [PATCH 2/4] Implement --tabsize option --- Cargo.lock | 1 + Cargo.toml | 1 + src/context_diff.rs | 21 +++++-- src/ed_diff.rs | 15 ++--- src/main.rs | 14 +++-- src/normal_diff.rs | 30 ++++++---- src/params.rs | 130 ++++++++++++++++++++++++++++++++++++++++++++ src/unified_diff.rs | 16 +++++- src/utils.rs | 19 ++++--- 9 files changed, 207 insertions(+), 40 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9dc8153..15c81e6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -81,6 +81,7 @@ dependencies = [ "diff", "predicates", "pretty_assertions", + "regex", "same-file", "tempfile", "unicode-width", diff --git a/Cargo.toml b/Cargo.toml index 7eeb35b..4ddf5fb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ path = "src/main.rs" [dependencies] diff = "0.1.10" +regex = "1.10.3" same-file = "1.0.6" unicode-width = "0.1.11" diff --git a/src/context_diff.rs b/src/context_diff.rs index 408821f..9f1db55 100644 --- a/src/context_diff.rs +++ b/src/context_diff.rs @@ -273,6 +273,7 @@ pub fn diff( context_size: usize, stop_early: bool, expand_tabs: bool, + tabsize: usize, ) -> Vec { let mut output = format!("*** {expected_filename}\t\n--- {actual_filename}\t\n").into_bytes(); let diff_results = make_diff(expected, actual, context_size, stop_early); @@ -317,19 +318,19 @@ pub fn diff( match line { DiffLine::Context(e) => { write!(output, " ").expect("write to Vec is infallible"); - do_write_line(&mut output, &e, expand_tabs) + do_write_line(&mut output, &e, expand_tabs, tabsize) .expect("write to Vec is infallible"); writeln!(output).unwrap(); } DiffLine::Change(e) => { write!(output, "! ").expect("write to Vec is infallible"); - do_write_line(&mut output, &e, expand_tabs) + do_write_line(&mut output, &e, expand_tabs, tabsize) .expect("write to Vec is infallible"); writeln!(output).unwrap(); } DiffLine::Add(e) => { write!(output, "- ").expect("write to Vec is infallible"); - do_write_line(&mut output, &e, expand_tabs) + do_write_line(&mut output, &e, expand_tabs, tabsize) .expect("write to Vec is infallible"); writeln!(output).unwrap(); } @@ -347,19 +348,19 @@ pub fn diff( match line { DiffLine::Context(e) => { write!(output, " ").expect("write to Vec is infallible"); - do_write_line(&mut output, &e, expand_tabs) + do_write_line(&mut output, &e, expand_tabs, tabsize) .expect("write to Vec is infallible"); writeln!(output).unwrap(); } DiffLine::Change(e) => { write!(output, "! ").expect("write to Vec is infallible"); - do_write_line(&mut output, &e, expand_tabs) + do_write_line(&mut output, &e, expand_tabs, tabsize) .expect("write to Vec is infallible"); writeln!(output).unwrap(); } DiffLine::Add(e) => { write!(output, "+ ").expect("write to Vec is infallible"); - do_write_line(&mut output, &e, expand_tabs) + do_write_line(&mut output, &e, expand_tabs, tabsize) .expect("write to Vec is infallible"); writeln!(output).unwrap(); } @@ -434,6 +435,7 @@ mod tests { 2, false, false, + 8, ); File::create(&format!("{target}/ab.diff")) .unwrap() @@ -514,6 +516,7 @@ mod tests { 2, false, false, + 8, ); File::create(&format!("{target}/ab_.diff")) .unwrap() @@ -597,6 +600,7 @@ mod tests { 2, false, false, + 8, ); File::create(&format!("{target}/abx.diff")) .unwrap() @@ -683,6 +687,7 @@ mod tests { 2, false, false, + 8, ); File::create(&format!("{target}/abr.diff")) .unwrap() @@ -729,6 +734,7 @@ mod tests { context_size, false, false, + 8, ); let expected_full = [ "*** foo\t", @@ -755,6 +761,7 @@ mod tests { context_size, true, false, + 8, ); let expected_brief = ["*** foo\t", "--- bar\t", ""].join("\n"); assert_eq!(diff_brief, expected_brief.as_bytes()); @@ -767,6 +774,7 @@ mod tests { context_size, false, false, + 8, ); assert!(nodiff_full.is_empty()); @@ -778,6 +786,7 @@ mod tests { context_size, true, false, + 8, ); assert!(nodiff_brief.is_empty()); } diff --git a/src/ed_diff.rs b/src/ed_diff.rs index 6d47b9f..c02289c 100644 --- a/src/ed_diff.rs +++ b/src/ed_diff.rs @@ -114,6 +114,7 @@ pub fn diff( actual: &[u8], stop_early: bool, expand_tabs: bool, + tabsize: usize, ) -> Result, DiffError> { let mut output = Vec::new(); let diff_results = make_diff(expected, actual, stop_early)?; @@ -152,7 +153,7 @@ pub fn diff( if actual == b"." { writeln!(&mut output, "..\n.\ns/.//\na").unwrap(); } else { - do_write_line(&mut output, actual, expand_tabs).unwrap(); + do_write_line(&mut output, actual, expand_tabs, tabsize).unwrap(); writeln!(&mut output).unwrap(); } } @@ -167,7 +168,7 @@ mod tests { use super::*; use pretty_assertions::assert_eq; pub fn diff_w(expected: &[u8], actual: &[u8], filename: &str) -> Result, DiffError> { - let mut output = diff(expected, actual, false, false)?; + let mut output = diff(expected, actual, false, false, 8)?; writeln!(&mut output, "w {filename}").unwrap(); Ok(output) } @@ -176,7 +177,7 @@ mod tests { fn test_basic() { let from = b"a\n"; let to = b"b\n"; - let diff = diff(from, to, false, false).unwrap(); + let diff = diff(from, to, false, false, 8).unwrap(); let expected = ["1c", "b", ".", ""].join("\n"); assert_eq!(diff, expected.as_bytes()); } @@ -411,18 +412,18 @@ mod tests { let from = ["a", "b", "c", ""].join("\n"); let to = ["a", "d", "c", ""].join("\n"); - let diff_full = diff(from.as_bytes(), to.as_bytes(), false, false).unwrap(); + let diff_full = diff(from.as_bytes(), to.as_bytes(), false, false, 8).unwrap(); let expected_full = ["2c", "d", ".", ""].join("\n"); assert_eq!(diff_full, expected_full.as_bytes()); - let diff_brief = diff(from.as_bytes(), to.as_bytes(), true, false).unwrap(); + let diff_brief = diff(from.as_bytes(), to.as_bytes(), true, false, 8).unwrap(); let expected_brief = "\0".as_bytes(); assert_eq!(diff_brief, expected_brief); - let nodiff_full = diff(from.as_bytes(), from.as_bytes(), false, false).unwrap(); + let nodiff_full = diff(from.as_bytes(), from.as_bytes(), false, false, 8).unwrap(); assert!(nodiff_full.is_empty()); - let nodiff_brief = diff(from.as_bytes(), from.as_bytes(), true, false).unwrap(); + let nodiff_brief = diff(from.as_bytes(), from.as_bytes(), true, false, 8).unwrap(); assert!(nodiff_brief.is_empty()); } } diff --git a/src/main.rs b/src/main.rs index f074cb1..2a6d4ca 100644 --- a/src/main.rs +++ b/src/main.rs @@ -32,6 +32,7 @@ fn main() -> ExitCode { report_identical_files, brief, expand_tabs, + tabsize, } = parse_params(opts).unwrap_or_else(|error| { eprintln!("{error}"); exit(2); @@ -67,7 +68,9 @@ fn main() -> ExitCode { }; // run diff let result: Vec = match format { - Format::Normal => normal_diff::diff(&from_content, &to_content, brief, expand_tabs), + Format::Normal => { + normal_diff::diff(&from_content, &to_content, brief, expand_tabs, tabsize) + } Format::Unified => unified_diff::diff( &from_content, &from.to_string_lossy(), @@ -76,6 +79,7 @@ fn main() -> ExitCode { context_count, brief, expand_tabs, + tabsize, ), Format::Context => context_diff::diff( &from_content, @@ -85,13 +89,13 @@ fn main() -> ExitCode { context_count, brief, expand_tabs, + tabsize, ), - Format::Ed => { - ed_diff::diff(&from_content, &to_content, brief, expand_tabs).unwrap_or_else(|error| { + Format::Ed => ed_diff::diff(&from_content, &to_content, brief, expand_tabs, tabsize) + .unwrap_or_else(|error| { eprintln!("{error}"); exit(2); - }) - } + }), }; if brief && !result.is_empty() { println!( diff --git a/src/normal_diff.rs b/src/normal_diff.rs index e25a6c6..b26de77 100644 --- a/src/normal_diff.rs +++ b/src/normal_diff.rs @@ -116,7 +116,13 @@ fn make_diff(expected: &[u8], actual: &[u8], stop_early: bool) -> Vec } #[must_use] -pub fn diff(expected: &[u8], actual: &[u8], stop_early: bool, expand_tabs: bool) -> Vec { +pub fn diff( + expected: &[u8], + actual: &[u8], + stop_early: bool, + expand_tabs: bool, + tabsize: usize, +) -> Vec { // See https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Normal.html // for details on the syntax of the normal format. let mut output = Vec::new(); @@ -190,7 +196,7 @@ pub fn diff(expected: &[u8], actual: &[u8], stop_early: bool, expand_tabs: bool) } for expected in &result.expected { write!(&mut output, "< ").unwrap(); - do_write_line(&mut output, expected, expand_tabs).unwrap(); + do_write_line(&mut output, expected, expand_tabs, tabsize).unwrap(); writeln!(&mut output).unwrap(); } if result.expected_missing_nl { @@ -201,7 +207,7 @@ pub fn diff(expected: &[u8], actual: &[u8], stop_early: bool, expand_tabs: bool) } for actual in &result.actual { write!(&mut output, "> ").unwrap(); - do_write_line(&mut output, actual, expand_tabs).unwrap(); + do_write_line(&mut output, actual, expand_tabs, tabsize).unwrap(); writeln!(&mut output).unwrap(); } if result.actual_missing_nl { @@ -222,7 +228,7 @@ mod tests { a.write_all(b"a\n").unwrap(); let mut b = Vec::new(); b.write_all(b"b\n").unwrap(); - let diff = diff(&a, &b, false, false); + let diff = diff(&a, &b, false, false, 8); let expected = b"1c1\n< a\n---\n> b\n".to_vec(); assert_eq!(diff, expected); } @@ -275,7 +281,7 @@ mod tests { } // This test diff is intentionally reversed. // We want it to turn the alef into bet. - let diff = diff(&alef, &bet, false, false); + let diff = diff(&alef, &bet, false, false, 8); File::create(&format!("{target}/ab.diff")) .unwrap() .write_all(&diff) @@ -367,7 +373,7 @@ mod tests { } // This test diff is intentionally reversed. // We want it to turn the alef into bet. - let diff = diff(&alef, &bet, false, false); + let diff = diff(&alef, &bet, false, false, 8); File::create(&format!("{target}/abn.diff")) .unwrap() .write_all(&diff) @@ -441,7 +447,7 @@ mod tests { } // This test diff is intentionally reversed. // We want it to turn the alef into bet. - let diff = diff(&alef, &bet, false, false); + let diff = diff(&alef, &bet, false, false, 8); File::create(&format!("{target}/ab_.diff")) .unwrap() .write_all(&diff) @@ -519,7 +525,7 @@ mod tests { } // This test diff is intentionally reversed. // We want it to turn the alef into bet. - let diff = diff(&alef, &bet, false, false); + let diff = diff(&alef, &bet, false, false, 8); File::create(&format!("{target}/abr.diff")) .unwrap() .write_all(&diff) @@ -554,18 +560,18 @@ mod tests { let from = ["a", "b", "c"].join("\n"); let to = ["a", "d", "c"].join("\n"); - let diff_full = diff(from.as_bytes(), to.as_bytes(), false, false); + let diff_full = diff(from.as_bytes(), to.as_bytes(), false, false, 8); let expected_full = ["2c2", "< b", "---", "> d", ""].join("\n"); assert_eq!(diff_full, expected_full.as_bytes()); - let diff_brief = diff(from.as_bytes(), to.as_bytes(), true, false); + let diff_brief = diff(from.as_bytes(), to.as_bytes(), true, false, 8); let expected_brief = "\0".as_bytes(); assert_eq!(diff_brief, expected_brief); - let nodiff_full = diff(from.as_bytes(), from.as_bytes(), false, false); + let nodiff_full = diff(from.as_bytes(), from.as_bytes(), false, false, 8); assert!(nodiff_full.is_empty()); - let nodiff_brief = diff(from.as_bytes(), from.as_bytes(), true, false); + let nodiff_brief = diff(from.as_bytes(), from.as_bytes(), true, false, 8); assert!(nodiff_brief.is_empty()); } } diff --git a/src/params.rs b/src/params.rs index a576f3d..f511e7c 100644 --- a/src/params.rs +++ b/src/params.rs @@ -1,5 +1,7 @@ use std::ffi::{OsStr, OsString}; +use regex::Regex; + #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum Format { Normal, @@ -8,6 +10,8 @@ pub enum Format { Ed, } +const DEFAULT_TABSIZE: usize = 8; + #[cfg(unix)] fn osstr_bytes(osstr: &OsStr) -> &[u8] { use std::os::unix::ffi::OsStrExt; @@ -28,6 +32,7 @@ pub struct Params { pub report_identical_files: bool, pub brief: bool, pub expand_tabs: bool, + pub tabsize: usize, } pub fn parse_params>(opts: I) -> Result { @@ -44,6 +49,8 @@ pub fn parse_params>(opts: I) -> Result\d+)$").unwrap(); + let mut tabsize = DEFAULT_TABSIZE; while let Some(param) = opts.next() { if param == "--" { break; @@ -70,6 +77,22 @@ pub fn parse_params>(opts: I) -> Result() { + Ok(num) => num, + Err(_) => return Err(format!("invalid tabsize «{}»", tabsize_str)), + }; + continue; + } let p = osstr_bytes(¶m); if p.first() == Some(&b'-') && p.get(1) != Some(&b'-') { let mut bit = p[1..].iter().copied().peekable(); @@ -154,6 +177,7 @@ pub fn parse_params>(opts: I) -> Result Vec { let mut output = format!("--- {expected_filename}\t\n+++ {actual_filename}\t\n").into_bytes(); let diff_results = make_diff(expected, actual, context_size, stop_early); @@ -374,19 +375,19 @@ pub fn diff( match line { DiffLine::Expected(e) => { write!(output, "-").expect("write to Vec is infallible"); - do_write_line(&mut output, &e, expand_tabs) + do_write_line(&mut output, &e, expand_tabs, tabsize) .expect("write to Vec is infallible"); writeln!(output).unwrap(); } DiffLine::Context(c) => { write!(output, " ").expect("write to Vec is infallible"); - do_write_line(&mut output, &c, expand_tabs) + do_write_line(&mut output, &c, expand_tabs, tabsize) .expect("write to Vec is infallible"); writeln!(output).unwrap(); } DiffLine::Actual(r) => { write!(output, "+",).expect("write to Vec is infallible"); - do_write_line(&mut output, &r, expand_tabs) + do_write_line(&mut output, &r, expand_tabs, tabsize) .expect("write to Vec is infallible"); writeln!(output).unwrap(); } @@ -461,6 +462,7 @@ mod tests { 2, false, false, + 8, ); File::create(&format!("{target}/ab.diff")) .unwrap() @@ -576,6 +578,7 @@ mod tests { 2, false, false, + 8, ); File::create(&format!("{target}/abn.diff")) .unwrap() @@ -671,6 +674,7 @@ mod tests { 2, false, false, + 8, ); File::create(&format!("{target}/ab_.diff")) .unwrap() @@ -751,6 +755,7 @@ mod tests { 2, false, false, + 8, ); File::create(&format!("{target}/abx.diff")) .unwrap() @@ -836,6 +841,7 @@ mod tests { 2, false, false, + 8, ); File::create(&format!("{target}/abr.diff")) .unwrap() @@ -881,6 +887,7 @@ mod tests { context_size, false, false, + 8, ); let expected_full = [ "--- foo\t", @@ -903,6 +910,7 @@ mod tests { context_size, true, false, + 8, ); let expected_brief = ["--- foo\t", "+++ bar\t", ""].join("\n"); assert_eq!(diff_brief, expected_brief.as_bytes()); @@ -915,6 +923,7 @@ mod tests { context_size, false, false, + 8, ); assert!(nodiff_full.is_empty()); @@ -926,6 +935,7 @@ mod tests { context_size, true, false, + 8, ); assert!(nodiff_brief.is_empty()); } diff --git a/src/utils.rs b/src/utils.rs index 1d13682..94d950f 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -38,9 +38,14 @@ pub fn do_expand_tabs(line: &[u8], tabsize: usize) -> Vec { /// Write a single line to an output stream, expanding tabs to space if necessary. /// This assumes that line does not contain any line breaks /// (if it does and tabs are to be expanded to spaces, the result is undefined). -pub fn do_write_line(output: &mut Vec, line: &[u8], expand_tabs: bool) -> std::io::Result<()> { +pub fn do_write_line( + output: &mut Vec, + line: &[u8], + expand_tabs: bool, + tabsize: usize, +) -> std::io::Result<()> { if expand_tabs { - output.write_all(do_expand_tabs(line, 8).as_slice()) + output.write_all(do_expand_tabs(line, tabsize).as_slice()) } else { output.write_all(line) } @@ -96,17 +101,17 @@ mod tests { use super::*; use pretty_assertions::assert_eq; - fn assert_line_written(line: &str, expand_tabs: bool, expected: &str) { + fn assert_line_written(line: &str, expand_tabs: bool, tabsize: usize, expected: &str) { let mut output: Vec = Vec::new(); - assert!(do_write_line(&mut output, line.as_bytes(), expand_tabs).is_ok()); + assert!(do_write_line(&mut output, line.as_bytes(), expand_tabs, tabsize).is_ok()); assert_eq!(output, expected.as_bytes()); } #[test] fn basics() { - assert_line_written("foo bar baz", false, "foo bar baz"); - assert_line_written("foo bar\tbaz", false, "foo bar\tbaz"); - assert_line_written("foo bar\tbaz", true, "foo bar baz"); + assert_line_written("foo bar baz", false, 8, "foo bar baz"); + assert_line_written("foo bar\tbaz", false, 8, "foo bar\tbaz"); + assert_line_written("foo bar\tbaz", true, 8, "foo bar baz"); } } } From cfc68d58bcd0bfd1b339a84cb70950fbca875569 Mon Sep 17 00:00:00 2001 From: Olivier Tilloy Date: Tue, 19 Mar 2024 19:00:39 +0100 Subject: [PATCH 3/4] Fix fuzzers' invokations --- fuzz/fuzz_targets/fuzz_ed.rs | 2 +- fuzz/fuzz_targets/fuzz_normal.rs | 2 +- fuzz/fuzz_targets/fuzz_patch.rs | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/fuzz/fuzz_targets/fuzz_ed.rs b/fuzz/fuzz_targets/fuzz_ed.rs index 5c5132e..69461d1 100644 --- a/fuzz/fuzz_targets/fuzz_ed.rs +++ b/fuzz/fuzz_targets/fuzz_ed.rs @@ -8,7 +8,7 @@ use std::io::Write; use std::process::Command; fn diff_w(expected: &[u8], actual: &[u8], filename: &str) -> Result, DiffError> { - let mut output = ed_diff::diff(expected, actual)?; + let mut output = ed_diff::diff(expected, actual, false, false, 8)?; writeln!(&mut output, "w {filename}").unwrap(); Ok(output) } diff --git a/fuzz/fuzz_targets/fuzz_normal.rs b/fuzz/fuzz_targets/fuzz_normal.rs index a44ece3..2d38641 100644 --- a/fuzz/fuzz_targets/fuzz_normal.rs +++ b/fuzz/fuzz_targets/fuzz_normal.rs @@ -21,7 +21,7 @@ fuzz_target!(|x: (Vec, Vec)| { } else { return }*/ - let diff = normal_diff::diff(&from, &to); + let diff = normal_diff::diff(&from, &to, false, false, 8); File::create("target/fuzz.file.original") .unwrap() .write_all(&from) diff --git a/fuzz/fuzz_targets/fuzz_patch.rs b/fuzz/fuzz_targets/fuzz_patch.rs index d353523..15e4967 100644 --- a/fuzz/fuzz_targets/fuzz_patch.rs +++ b/fuzz/fuzz_targets/fuzz_patch.rs @@ -26,6 +26,9 @@ fuzz_target!(|x: (Vec, Vec, u8)| { &to, "target/fuzz.file", context as usize, + false, + false, + 8, ); File::create("target/fuzz.file.original") .unwrap() From f2fd2127ed866222639019ac61298c843f778187 Mon Sep 17 00:00:00 2001 From: Olivier Tilloy Date: Tue, 19 Mar 2024 19:02:26 +0100 Subject: [PATCH 4/4] Politely ask clippy to not complain about too many arguments --- src/context_diff.rs | 1 + src/unified_diff.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/src/context_diff.rs b/src/context_diff.rs index 9f1db55..d9c61b8 100644 --- a/src/context_diff.rs +++ b/src/context_diff.rs @@ -265,6 +265,7 @@ fn make_diff( } #[must_use] +#[allow(clippy::too_many_arguments)] pub fn diff( expected: &[u8], expected_filename: &str, diff --git a/src/unified_diff.rs b/src/unified_diff.rs index 5af52e9..0d3ec38 100644 --- a/src/unified_diff.rs +++ b/src/unified_diff.rs @@ -236,6 +236,7 @@ fn make_diff( } #[must_use] +#[allow(clippy::too_many_arguments)] pub fn diff( expected: &[u8], expected_filename: &str,