diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml index 9ad1c17..133946c 100644 --- a/.github/workflows/fuzzing.yml +++ b/.github/workflows/fuzzing.yml @@ -46,6 +46,7 @@ jobs: - { name: fuzz_ed, should_pass: true } - { name: fuzz_normal, should_pass: true } - { name: fuzz_patch, should_pass: true } + - { name: fuzz_side, should_pass: true } steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@nightly @@ -56,7 +57,7 @@ jobs: shared-key: "cargo-fuzz-cache-key" cache-directories: "fuzz/target" - name: Restore Cached Corpus - uses: actions/cache/restore@v4 + uses: actions/cache/restore@v5 with: key: corpus-cache-${{ matrix.test-target.name }} path: | @@ -67,7 +68,7 @@ jobs: run: | cargo +nightly fuzz run ${{ matrix.test-target.name }} -- -max_total_time=${{ env.RUN_FOR }} -detect_leaks=0 - name: Save Corpus Cache - uses: actions/cache/save@v4 + uses: actions/cache/save@v5 with: key: corpus-cache-${{ matrix.test-target.name }} path: | diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 4c1f4f8..3c59af5 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,10 +1,12 @@ +# This file was autogenerated by dist: https://axodotdev.github.io/cargo-dist +# # Copyright 2022-2024, axodotdev # SPDX-License-Identifier: MIT or Apache-2.0 # # CI that: # # * checks for a Git Tag that looks like a release -# * builds artifacts with cargo-dist (archives, installers, hashes) +# * builds artifacts with dist (archives, installers, hashes) # * uploads those artifacts to temporary workflow zip # * on success, uploads the artifacts to a GitHub Release # @@ -12,9 +14,8 @@ # title/body based on your changelogs. name: Release - permissions: - contents: write + "contents": "write" # This task will run whenever you push a git tag that looks like a version # like "1.0.0", "v0.1.0-prerelease.1", "my-app/0.1.0", "releases/v1.0.0", etc. @@ -23,10 +24,10 @@ permissions: # must be a Cargo-style SemVer Version (must have at least major.minor.patch). # # If PACKAGE_NAME is specified, then the announcement will be for that -# package (erroring out if it doesn't have the given version or isn't cargo-dist-able). +# package (erroring out if it doesn't have the given version or isn't dist-able). # # If PACKAGE_NAME isn't specified, then the announcement will be for all -# (cargo-dist-able) packages in the workspace with that version (this mode is +# (dist-able) packages in the workspace with that version (this mode is # intended for workspaces with only one dist-able package, or with all dist-able # packages versioned/released in lockstep). # @@ -38,15 +39,15 @@ permissions: # If there's a prerelease-style suffix to the version, then the release(s) # will be marked as a prerelease. on: + pull_request: push: tags: - '**[0-9]+.[0-9]+.[0-9]+*' - pull_request: jobs: - # Run 'cargo dist plan' (or host) to determine what tasks we need to do + # Run 'dist plan' (or host) to determine what tasks we need to do plan: - runs-on: ubuntu-latest + runs-on: "ubuntu-22.04" outputs: val: ${{ steps.plan.outputs.manifest }} tag: ${{ !github.event.pull_request && github.ref_name || '' }} @@ -57,12 +58,18 @@ jobs: steps: - uses: actions/checkout@v4 with: + persist-credentials: false submodules: recursive - - name: Install cargo-dist + - name: Install dist # we specify bash to get pipefail; it guards against the `curl` command # failing. otherwise `sh` won't catch that `curl` returned non-0 shell: bash - run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.13.3/cargo-dist-installer.sh | sh" + run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.30.3/cargo-dist-installer.sh | sh" + - name: Cache dist + uses: actions/upload-artifact@v4 + with: + name: cargo-dist-cache + path: ~/.cargo/bin/dist # sure would be cool if github gave us proper conditionals... # so here's a doubly-nested ternary-via-truthiness to try to provide the best possible # functionality based on whether this is a pull_request, and whether it's from a fork. @@ -70,8 +77,8 @@ jobs: # but also really annoying to build CI around when it needs secrets to work right.) - id: plan run: | - cargo dist ${{ (!github.event.pull_request && format('host --steps=create --tag={0}', github.ref_name)) || 'plan' }} --output-format=json > plan-dist-manifest.json - echo "cargo dist ran successfully" + dist ${{ (!github.event.pull_request && format('host --steps=create --tag={0}', github.ref_name)) || 'plan' }} --output-format=json > plan-dist-manifest.json + echo "dist ran successfully" cat plan-dist-manifest.json echo "manifest=$(jq -c "." plan-dist-manifest.json)" >> "$GITHUB_OUTPUT" - name: "Upload dist-manifest.json" @@ -89,18 +96,19 @@ jobs: if: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix.include != null && (needs.plan.outputs.publishing == 'true' || fromJson(needs.plan.outputs.val).ci.github.pr_run_mode == 'upload') }} strategy: fail-fast: false - # Target platforms/runners are computed by cargo-dist in create-release. + # Target platforms/runners are computed by dist in create-release. # Each member of the matrix has the following arguments: # # - runner: the github runner - # - dist-args: cli flags to pass to cargo dist - # - install-dist: expression to run to install cargo-dist on the runner + # - dist-args: cli flags to pass to dist + # - install-dist: expression to run to install dist on the runner # # Typically there will be: # - 1 "global" task that builds universal installers # - N "local" tasks that build each platform's binaries and platform-specific installers matrix: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix }} runs-on: ${{ matrix.runner }} + container: ${{ matrix.container && matrix.container.image || null }} env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} BUILD_MANIFEST_NAME: target/distrib/${{ join(matrix.targets, '-') }}-dist-manifest.json @@ -110,12 +118,17 @@ jobs: git config --global core.longpaths true - uses: actions/checkout@v4 with: + persist-credentials: false submodules: recursive - - uses: swatinem/rust-cache@v2 - with: - key: ${{ join(matrix.targets, '-') }} - - name: Install cargo-dist - run: ${{ matrix.install_dist }} + - name: Install Rust non-interactively if not already installed + if: ${{ matrix.container }} + run: | + if ! command -v cargo > /dev/null 2>&1; then + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + echo "$HOME/.cargo/bin" >> $GITHUB_PATH + fi + - name: Install dist + run: ${{ matrix.install_dist.run }} # Get the dist-manifest - name: Fetch local artifacts uses: actions/download-artifact@v4 @@ -129,8 +142,8 @@ jobs: - name: Build artifacts run: | # Actually do builds and make zips and whatnot - cargo dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json - echo "cargo dist ran successfully" + dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json + echo "dist ran successfully" - id: cargo-dist name: Post-build # We force bash here just because github makes it really hard to get values up @@ -140,7 +153,7 @@ jobs: run: | # Parse out what we just built and upload it to scratch storage echo "paths<> "$GITHUB_OUTPUT" - jq --raw-output ".upload_files[]" dist-manifest.json >> "$GITHUB_OUTPUT" + dist print-upload-files-from-manifest --manifest dist-manifest.json >> "$GITHUB_OUTPUT" echo "EOF" >> "$GITHUB_OUTPUT" cp dist-manifest.json "$BUILD_MANIFEST_NAME" @@ -157,17 +170,21 @@ jobs: needs: - plan - build-local-artifacts - runs-on: "ubuntu-20.04" + runs-on: "ubuntu-22.04" env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} BUILD_MANIFEST_NAME: target/distrib/global-dist-manifest.json steps: - uses: actions/checkout@v4 with: + persist-credentials: false submodules: recursive - - name: Install cargo-dist - shell: bash - run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.13.3/cargo-dist-installer.sh | sh" + - name: Install cached dist + uses: actions/download-artifact@v4 + with: + name: cargo-dist-cache + path: ~/.cargo/bin/ + - run: chmod +x ~/.cargo/bin/dist # Get all the local artifacts for the global tasks to use (for e.g. checksums) - name: Fetch local artifacts uses: actions/download-artifact@v4 @@ -178,8 +195,8 @@ jobs: - id: cargo-dist shell: bash run: | - cargo dist build ${{ needs.plan.outputs.tag-flag }} --output-format=json "--artifacts=global" > dist-manifest.json - echo "cargo dist ran successfully" + dist build ${{ needs.plan.outputs.tag-flag }} --output-format=json "--artifacts=global" > dist-manifest.json + echo "dist ran successfully" # Parse out what we just built and upload it to scratch storage echo "paths<> "$GITHUB_OUTPUT" @@ -200,19 +217,24 @@ jobs: - plan - build-local-artifacts - build-global-artifacts - # Only run if we're "publishing", and only if local and global didn't fail (skipped is fine) - if: ${{ always() && needs.plan.outputs.publishing == 'true' && (needs.build-global-artifacts.result == 'skipped' || needs.build-global-artifacts.result == 'success') && (needs.build-local-artifacts.result == 'skipped' || needs.build-local-artifacts.result == 'success') }} + # Only run if we're "publishing", and only if plan, local and global didn't fail (skipped is fine) + if: ${{ always() && needs.plan.result == 'success' && needs.plan.outputs.publishing == 'true' && (needs.build-global-artifacts.result == 'skipped' || needs.build-global-artifacts.result == 'success') && (needs.build-local-artifacts.result == 'skipped' || needs.build-local-artifacts.result == 'success') }} env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - runs-on: "ubuntu-20.04" + runs-on: "ubuntu-22.04" outputs: val: ${{ steps.host.outputs.manifest }} steps: - uses: actions/checkout@v4 with: + persist-credentials: false submodules: recursive - - name: Install cargo-dist - run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.13.3/cargo-dist-installer.sh | sh" + - name: Install cached dist + uses: actions/download-artifact@v4 + with: + name: cargo-dist-cache + path: ~/.cargo/bin/ + - run: chmod +x ~/.cargo/bin/dist # Fetch artifacts from scratch-storage - name: Fetch artifacts uses: actions/download-artifact@v4 @@ -220,11 +242,10 @@ jobs: pattern: artifacts-* path: target/distrib/ merge-multiple: true - # This is a harmless no-op for GitHub Releases, hosting for that happens in "announce" - id: host shell: bash run: | - cargo dist host ${{ needs.plan.outputs.tag-flag }} --steps=upload --steps=release --output-format=json > dist-manifest.json + dist host ${{ needs.plan.outputs.tag-flag }} --steps=upload --steps=release --output-format=json > dist-manifest.json echo "artifacts uploaded and released successfully" cat dist-manifest.json echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT" @@ -234,8 +255,29 @@ jobs: # Overwrite the previous copy name: artifacts-dist-manifest path: dist-manifest.json + # Create a GitHub Release while uploading all files to it + - name: "Download GitHub Artifacts" + uses: actions/download-artifact@v4 + with: + pattern: artifacts-* + path: artifacts + merge-multiple: true + - name: Cleanup + run: | + # Remove the granular manifests + rm -f artifacts/*-dist-manifest.json + - name: Create GitHub Release + env: + PRERELEASE_FLAG: "${{ fromJson(steps.host.outputs.manifest).announcement_is_prerelease && '--prerelease' || '' }}" + ANNOUNCEMENT_TITLE: "${{ fromJson(steps.host.outputs.manifest).announcement_title }}" + ANNOUNCEMENT_BODY: "${{ fromJson(steps.host.outputs.manifest).announcement_github_body }}" + RELEASE_COMMIT: "${{ github.sha }}" + run: | + # Write and read notes from a file to avoid quoting breaking things + echo "$ANNOUNCEMENT_BODY" > $RUNNER_TEMP/notes.txt + + gh release create "${{ needs.plan.outputs.tag }}" --target "$RELEASE_COMMIT" $PRERELEASE_FLAG --title "$ANNOUNCEMENT_TITLE" --notes-file "$RUNNER_TEMP/notes.txt" artifacts/* - # Create a GitHub Release while uploading all files to it announce: needs: - plan @@ -244,28 +286,11 @@ jobs: # still allowing individual publish jobs to skip themselves (for prereleases). # "host" however must run to completion, no skipping allowed! if: ${{ always() && needs.host.result == 'success' }} - runs-on: "ubuntu-20.04" + runs-on: "ubuntu-22.04" env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - uses: actions/checkout@v4 with: + persist-credentials: false submodules: recursive - - name: "Download GitHub Artifacts" - uses: actions/download-artifact@v4 - with: - pattern: artifacts-* - path: artifacts - merge-multiple: true - - name: Cleanup - run: | - # Remove the granular manifests - rm -f artifacts/*-dist-manifest.json - - name: Create GitHub Release - uses: ncipollo/release-action@v1 - with: - tag: ${{ needs.plan.outputs.tag }} - name: ${{ fromJson(needs.host.outputs.val).announcement_title }} - body: ${{ fromJson(needs.host.outputs.val).announcement_github_body }} - prerelease: ${{ fromJson(needs.host.outputs.val).announcement_is_prerelease }} - artifacts: "artifacts/*" diff --git a/Cargo.lock b/Cargo.lock index 29a8623..2ed2e14 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,12 +11,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - [[package]] name = "android_system_properties" version = "0.1.5" @@ -34,13 +28,12 @@ checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" [[package]] name = "assert_cmd" -version = "2.0.17" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bd389a4b2970a01282ee455294913c0a43724daedcd1a24c3eb0ec1c1320b66" +checksum = "9c5bcfa8749ac45dd12cb11055aeeb6b27a3895560d60d71e3c23bf979e60514" dependencies = [ "anstyle", "bstr", - "doc-comment", "libc", "predicates", "predicates-core", @@ -91,11 +84,10 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.40" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c" +checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" dependencies = [ - "android-tzdata", "iana-time-zone", "js-sys", "num-traits", @@ -123,7 +115,7 @@ checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" [[package]] name = "diffutils" -version = "0.4.2" +version = "0.5.0" dependencies = [ "assert_cmd", "chrono", @@ -137,12 +129,6 @@ dependencies = [ "unicode-width", ] -[[package]] -name = "doc-comment" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" - [[package]] name = "errno" version = "0.3.10" @@ -205,9 +191,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.15" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "js-sys" @@ -220,15 +206,15 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.170" +version = "0.2.178" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "875b3680cb2f8f71bdcf9a30f38d48282f5d3c95cbf9b3fa57269bb5d5c06828" +checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" [[package]] name = "linux-raw-sys" -version = "0.9.2" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db9c683daf087dc577b7506e9695b3d556a9f3849903fa28186283afd6809e9" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" [[package]] name = "log" @@ -323,9 +309,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.11.1" +version = "1.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" dependencies = [ "aho-corasick", "memchr", @@ -335,9 +321,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.8" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" +checksum = "722166aa0d7438abbaa4d5cc2c649dac844e8c56d82fb3d33e9c34b5cd268fc6" dependencies = [ "aho-corasick", "memchr", @@ -352,9 +338,9 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "rustix" -version = "1.0.0" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17f8dcd64f141950290e45c99f7710ede1b600297c91818bb30b3667c0f45dc0" +checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" dependencies = [ "bitflags", "errno", @@ -405,9 +391,9 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.19.1" +version = "3.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7437ac7763b9b123ccf33c338a5cc1bac6f69b45a136c19bdd8a65e3916435bf" +checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" dependencies = [ "fastrand", "getrandom", @@ -430,9 +416,9 @@ checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "unicode-width" -version = "0.2.0" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" [[package]] name = "wait-timeout" @@ -548,9 +534,9 @@ dependencies = [ [[package]] name = "windows-link" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dccfd733ce2b1753b03b6d3c65edf020262ea35e20ccdf3e288043e6dd620e3" +checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65" [[package]] name = "windows-sys" diff --git a/Cargo.toml b/Cargo.toml index 6fa1a3c..29331c3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "diffutils" -version = "0.4.2" +version = "0.5.0" edition = "2021" description = "A CLI app for generating diff files" license = "MIT OR Apache-2.0" @@ -28,20 +28,15 @@ assert_cmd = "2.0.14" predicates = "3.1.0" tempfile = "3.10.1" -# The profile that 'cargo dist' will build with +[profile.release] +lto = "thin" +codegen-units = 1 + +[profile.release-fast] +inherits = "release" +panic = "abort" + +# The profile that 'dist' will build with [profile.dist] inherits = "release" lto = "thin" - -# Config for 'cargo dist' -[workspace.metadata.dist] -# The preferred cargo-dist version to use in CI (Cargo.toml SemVer syntax) -cargo-dist-version = "0.13.3" -# CI backends to support -ci = ["github"] -# The installers to generate for each app -installers = [] -# Target platforms to build apps for (Rust target-triple syntax) -targets = ["aarch64-apple-darwin", "x86_64-apple-darwin", "x86_64-unknown-linux-gnu", "x86_64-pc-windows-msvc"] -# Publish jobs to run in CI -pr-run-mode = "plan" diff --git a/dist-workspace.toml b/dist-workspace.toml new file mode 100644 index 0000000..92c4095 --- /dev/null +++ b/dist-workspace.toml @@ -0,0 +1,13 @@ +[workspace] +members = ["cargo:."] + +# Config for 'dist' +[dist] +# The preferred dist version to use in CI (Cargo.toml SemVer syntax) +cargo-dist-version = "0.30.3" +# CI backends to support +ci = "github" +# The installers to generate for each app +installers = [] +# Target platforms to build apps for (Rust target-triple syntax) +targets = ["aarch64-apple-darwin", "aarch64-unknown-linux-gnu", "x86_64-apple-darwin", "x86_64-unknown-linux-gnu", "x86_64-pc-windows-msvc"] diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock new file mode 100644 index 0000000..a6e4746 --- /dev/null +++ b/fuzz/Cargo.lock @@ -0,0 +1,447 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "bumpalo" +version = "3.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" + +[[package]] +name = "cc" +version = "1.2.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a0aeaff4ff1a90589618835a598e545176939b97874f7abc7851caa0618f203" +dependencies = [ + "find-msvc-tools", + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "chrono" +version = "0.4.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-link", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "diff" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" + +[[package]] +name = "diffutils" +version = "0.5.0" +dependencies = [ + "chrono", + "diff", + "itoa", + "regex", + "same-file", + "unicode-width", +] + +[[package]] +name = "find-msvc-tools" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff" + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom", + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "libc" +version = "0.2.178" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" + +[[package]] +name = "libfuzzer-sys" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5037190e1f70cbeef565bd267599242926f724d3b8a9f510fd7e0b540cfa4404" +dependencies = [ + "arbitrary", + "cc", +] + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "proc-macro2" +version = "1.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9695f8df41bb4f3d222c95a67532365f569318332d03d5f3f67f37b20e6ebdf0" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "regex" +version = "1.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "syn" +version = "2.0.112" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21f182278bf2d2bcb3c88b1b08a37df029d71ce3d3ae26168e3c653b213b99d4" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + +[[package]] +name = "unified-diff-fuzz" +version = "0.0.0" +dependencies = [ + "diffutils", + "libfuzzer-sys", +] + +[[package]] +name = "wasip2" +version = "1.0.1+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "wit-bindgen" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 8b0b521..39efd70 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -47,4 +47,8 @@ path = "fuzz_targets/fuzz_ed.rs" test = false doc = false - +[[bin]] +name = "fuzz_side" +path = "fuzz_targets/fuzz_side.rs" +test = false +doc = false \ No newline at end of file diff --git a/fuzz/fuzz_targets/fuzz_side.rs b/fuzz/fuzz_targets/fuzz_side.rs new file mode 100644 index 0000000..8a69c07 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_side.rs @@ -0,0 +1,42 @@ +#![no_main] +#[macro_use] +extern crate libfuzzer_sys; + +use diffutilslib::side_diff; + +use std::fs::File; +use std::io::Write; +use diffutilslib::params::Params; + +fuzz_target!(|x: (Vec, Vec, /* usize, usize */ bool)| { + let (original, new, /* width, tabsize, */ expand) = x; + + // if width == 0 || tabsize == 0 { + // return; + // } + + let params = Params { + // width, + // tabsize, + expand_tabs: expand, + ..Default::default() + }; + let mut output_buf = vec![]; + side_diff::diff(&original, &new, &mut output_buf, ¶ms); + File::create("target/fuzz.file.original") + .unwrap() + .write_all(&original) + .unwrap(); + File::create("target/fuzz.file.new") + .unwrap() + .write_all(&new) + .unwrap(); + File::create("target/fuzz.file") + .unwrap() + .write_all(&original) + .unwrap(); + File::create("target/fuzz.diff") + .unwrap() + .write_all(&output_buf) + .unwrap(); +}); \ No newline at end of file diff --git a/src/cmp.rs b/src/cmp.rs index c0fc397..3e1eda6 100644 --- a/src/cmp.rs +++ b/src/cmp.rs @@ -35,7 +35,7 @@ pub struct Params { #[inline] fn usage_string(executable: &str) -> String { - format!("Usage: {} ", executable) + format!("Usage: {executable} ") } #[cfg(not(target_os = "windows"))] @@ -75,8 +75,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => usize::MAX, Err(_) => { return Err(format!( - "{}: invalid --ignore-initial value '{}'", - executable_str, skip_desc + "{executable_str}: invalid --ignore-initial value '{skip_desc}'" )) } }; @@ -91,20 +90,29 @@ pub fn parse_params>(mut opts: Peekable) -> Resu "M" => 1_048_576, "GB" => 1_000_000_000, "G" => 1_073_741_824, - "TB" => 1_000_000_000_000, - "T" => 1_099_511_627_776, - "PB" => 1_000_000_000_000_000, - "P" => 1_125_899_906_842_624, - "EB" => 1_000_000_000_000_000_000, - "E" => 1_152_921_504_606_846_976, + // This only generates a warning when compiling for target_pointer_width < 64 + #[allow(unused_variables)] + suffix @ ("TB" | "T" | "PB" | "P" | "EB" | "E") => { + #[cfg(target_pointer_width = "64")] + match suffix { + "TB" => 1_000_000_000_000, + "T" => 1_099_511_627_776, + "PB" => 1_000_000_000_000_000, + "P" => 1_125_899_906_842_624, + "EB" => 1_000_000_000_000_000_000, + "E" => 1_152_921_504_606_846_976, + _ => unreachable!(), + } + #[cfg(not(target_pointer_width = "64"))] + usize::MAX + } "ZB" => usize::MAX, // 1_000_000_000_000_000_000_000, "Z" => usize::MAX, // 1_180_591_620_717_411_303_424, "YB" => usize::MAX, // 1_000_000_000_000_000_000_000_000, "Y" => usize::MAX, // 1_208_925_819_614_629_174_706_176, _ => { return Err(format!( - "{}: invalid --ignore-initial value '{}'", - executable_str, skip_desc + "{executable_str}: invalid --ignore-initial value '{skip_desc}'" )); } }; @@ -170,8 +178,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => usize::MAX, Err(_) => { return Err(format!( - "{}: invalid --bytes value '{}'", - executable_str, max_bytes + "{executable_str}: invalid --bytes value '{max_bytes}'" )) } }; @@ -210,7 +217,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu std::process::exit(0); } if param_str.starts_with('-') { - return Err(format!("Unknown option: {:?}", param)); + return Err(format!("Unknown option: {param:?}")); } if from.is_none() { from = Some(param); @@ -236,8 +243,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu if params.quiet && params.verbose { return Err(format!( - "{}: options -l and -s are incompatible", - executable_str + "{executable_str}: options -l and -s are incompatible" )); } @@ -494,12 +500,6 @@ pub fn main(opts: Peekable) -> ExitCode { } } -#[inline] -fn is_ascii_printable(byte: u8) -> bool { - let c = byte as char; - c.is_ascii() && !c.is_ascii_control() -} - #[inline] fn format_octal(byte: u8, buf: &mut [u8; 3]) -> &str { *buf = [b' ', b' ', b'0']; @@ -519,32 +519,68 @@ fn format_octal(byte: u8, buf: &mut [u8; 3]) -> &str { } #[inline] -fn format_byte(byte: u8) -> String { - let mut byte = byte; - let mut quoted = vec![]; - - if !is_ascii_printable(byte) { - if byte >= 128 { - quoted.push(b'M'); - quoted.push(b'-'); - byte -= 128; +fn write_visible_byte(output: &mut Vec, byte: u8) -> usize { + match byte { + // Control characters: ^@, ^A, ..., ^_ + 0..=31 => { + output.push(b'^'); + output.push(byte + 64); + 2 } - - if byte < 32 { - quoted.push(b'^'); - byte += 64; - } else if byte == 127 { - quoted.push(b'^'); - byte = b'?'; + // Printable ASCII (space through ~) + 32..=126 => { + output.push(byte); + 1 + } + // DEL: ^? + 127 => { + output.extend_from_slice(b"^?"); + 2 + } + // High bytes with control equivalents: M-^@, M-^A, ..., M-^_ + 128..=159 => { + output.push(b'M'); + output.push(b'-'); + output.push(b'^'); + output.push(byte - 64); + 4 + } + // High bytes: M-, M-!, ..., M-~ + 160..=254 => { + output.push(b'M'); + output.push(b'-'); + output.push(byte - 128); + 3 + } + // Byte 255: M-^? + 255 => { + output.extend_from_slice(b"M-^?"); + 4 } - assert!((byte as char).is_ascii()); } +} - quoted.push(byte); +/// Writes a byte in visible form with right-padding to 4 spaces. +#[inline] +fn write_visible_byte_padded(output: &mut Vec, byte: u8) { + const SPACES: &[u8] = b" "; + const WIDTH: usize = SPACES.len(); + + let display_width = write_visible_byte(output, byte); - // SAFETY: the checks and shifts we do above match what cat and GNU + // Add right-padding spaces + let padding = WIDTH.saturating_sub(display_width); + output.extend_from_slice(&SPACES[..padding]); +} + +/// Formats a byte as a visible string (for non-performance-critical path) +#[inline] +fn format_visible_byte(byte: u8) -> String { + let mut result = Vec::with_capacity(4); + write_visible_byte(&mut result, byte); + // SAFETY: the checks and shifts in write_visible_byte match what cat and GNU // cmp do to ensure characters fall inside the ascii range. - unsafe { String::from_utf8_unchecked(quoted) } + unsafe { String::from_utf8_unchecked(result) } } // This function has been optimized to not use the Rust fmt system, which @@ -582,14 +618,7 @@ fn format_verbose_difference( output.push(b' '); - let from_byte_str = format_byte(from_byte); - let from_byte_padding = 4 - from_byte_str.len(); - - output.extend_from_slice(from_byte_str.as_bytes()); - - for _ in 0..from_byte_padding { - output.push(b' ') - } + write_visible_byte_padded(output, from_byte); output.push(b' '); @@ -597,7 +626,7 @@ fn format_verbose_difference( output.push(b' '); - output.extend_from_slice(format_byte(to_byte).as_bytes()); + write_visible_byte(output, to_byte); output.push(b'\n'); } else { @@ -700,9 +729,9 @@ fn report_difference(from_byte: u8, to_byte: u8, at_byte: usize, at_line: usize, print!( " is {:>3o} {:char_width$} {:>3o} {:char_width$}", from_byte, - format_byte(from_byte), + format_visible_byte(from_byte), to_byte, - format_byte(to_byte) + format_visible_byte(to_byte) ); } println!(); diff --git a/src/diff.rs b/src/diff.rs index f769a29..f4c0614 100644 --- a/src/diff.rs +++ b/src/diff.rs @@ -5,11 +5,11 @@ use crate::params::{parse_params, Format}; use crate::utils::report_failure_to_read_input_file; -use crate::{context_diff, ed_diff, normal_diff, unified_diff}; +use crate::{context_diff, ed_diff, normal_diff, side_diff, unified_diff}; use std::env::ArgsOs; use std::ffi::OsString; use std::fs; -use std::io::{self, Read, Write}; +use std::io::{self, stdout, Read, Write}; use std::iter::Peekable; use std::process::{exit, ExitCode}; @@ -79,6 +79,10 @@ pub fn main(opts: Peekable) -> ExitCode { eprintln!("{error}"); exit(2); }), + Format::SideBySide => { + let mut output = stdout().lock(); + side_diff::diff(&from_content, &to_content, &mut output, ¶ms) + } }; if params.brief && !result.is_empty() { println!( diff --git a/src/lib.rs b/src/lib.rs index a20ac56..342b01c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ pub mod ed_diff; pub mod macros; pub mod normal_diff; pub mod params; +pub mod side_diff; pub mod unified_diff; pub mod utils; @@ -11,4 +12,5 @@ pub mod utils; pub use context_diff::diff as context_diff; pub use ed_diff::diff as ed_diff; pub use normal_diff::diff as normal_diff; +pub use side_diff::diff as side_by_side_diff; pub use unified_diff::diff as unified_diff; diff --git a/src/main.rs b/src/main.rs index 8194d00..b7c2712 100644 --- a/src/main.rs +++ b/src/main.rs @@ -18,6 +18,7 @@ mod ed_diff; mod macros; mod normal_diff; mod params; +mod side_diff; mod unified_diff; mod utils; @@ -72,7 +73,7 @@ fn main() -> ExitCode { Some("diff") => diff::main(args), Some("cmp") => cmp::main(args), Some(name) => { - eprintln!("{}: utility not supported", name); + eprintln!("{name}: utility not supported"); ExitCode::from(2) } None => second_arg_error(exe_name), diff --git a/src/params.rs b/src/params.rs index 9b3abc4..7d7d4f8 100644 --- a/src/params.rs +++ b/src/params.rs @@ -11,6 +11,7 @@ pub enum Format { Unified, Context, Ed, + SideBySide, } #[derive(Clone, Debug, Eq, PartialEq)] @@ -24,6 +25,7 @@ pub struct Params { pub brief: bool, pub expand_tabs: bool, pub tabsize: usize, + pub width: usize, } impl Default for Params { @@ -38,6 +40,7 @@ impl Default for Params { brief: false, expand_tabs: false, tabsize: 8, + width: 130, } } } @@ -57,6 +60,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu let mut format = None; let mut context = None; let tabsize_re = Regex::new(r"^--tabsize=(?\d+)$").unwrap(); + let width_re = Regex::new(r"--width=(?P\d+)$").unwrap(); while let Some(param) = opts.next() { let next_param = opts.peek(); if param == "--" { @@ -101,6 +105,34 @@ pub fn parse_params>(mut opts: Peekable) -> Resu format = Some(Format::Ed); continue; } + if param == "-y" || param == "--side-by-side" { + if format.is_some() && format != Some(Format::SideBySide) { + return Err("Conflicting output style option".to_string()); + } + format = Some(Format::SideBySide); + continue; + } + if width_re.is_match(param.to_string_lossy().as_ref()) { + let param = param.into_string().unwrap(); + let width_str: &str = width_re + .captures(param.as_str()) + .unwrap() + .name("long") + .unwrap() + .as_str(); + + params.width = match width_str.parse::() { + Ok(num) => { + if num == 0 { + return Err("invalid width «0»".to_string()); + } + + num + } + Err(_) => return Err(format!("invalid width «{width_str}»")), + }; + continue; + } if tabsize_re.is_match(param.to_string_lossy().as_ref()) { // Because param matches the regular expression, // it is safe to assume it is valid UTF-8. @@ -112,9 +144,16 @@ pub fn parse_params>(mut opts: Peekable) -> Resu .unwrap() .as_str(); params.tabsize = match tabsize_str.parse::() { - Ok(num) => num, + Ok(num) => { + if num == 0 { + return Err("invalid tabsize «0»".to_string()); + } + + num + } Err(_) => return Err(format!("invalid tabsize «{tabsize_str}»")), }; + continue; } match match_context_diff_params(¶m, next_param, format) { @@ -156,7 +195,7 @@ pub fn parse_params>(mut opts: Peekable) -> Resu Err(error) => return Err(error), } if param.to_string_lossy().starts_with('-') { - return Err(format!("Unknown option: {:?}", param)); + return Err(format!("Unknown option: {param:?}")); } if from.is_none() { from = Some(param); @@ -240,17 +279,15 @@ fn match_context_diff_params( context_count = Some(numvalue.as_str().parse::().unwrap()); } } - if param == "-C" && next_param.is_some() { - match next_param.unwrap().to_string_lossy().parse::() { - Ok(context_size) => { - context_count = Some(context_size); - next_param_consumed = true; - } - Err(_) => { - return Err(format!( - "invalid context length '{}'", - next_param.unwrap().to_string_lossy() - )) + if param == "-C" { + if let Some(p) = next_param { + let size_str = p.to_string_lossy(); + match size_str.parse::() { + Ok(context_size) => { + context_count = Some(context_size); + next_param_consumed = true; + } + Err(_) => return Err(format!("invalid context length '{size_str}'")), } } } @@ -286,17 +323,15 @@ fn match_unified_diff_params( context_count = Some(numvalue.as_str().parse::().unwrap()); } } - if param == "-U" && next_param.is_some() { - match next_param.unwrap().to_string_lossy().parse::() { - Ok(context_size) => { - context_count = Some(context_size); - next_param_consumed = true; - } - Err(_) => { - return Err(format!( - "invalid context length '{}'", - next_param.unwrap().to_string_lossy() - )) + if param == "-U" { + if let Some(p) = next_param { + let size_str = p.to_string_lossy(); + match size_str.parse::() { + Ok(context_size) => { + context_count = Some(context_size); + next_param_consumed = true; + } + Err(_) => return Err(format!("invalid context length '{size_str}'")), } } } @@ -704,11 +739,11 @@ mod tests { executable: os("diff"), from: os("foo"), to: os("bar"), - tabsize: 0, + tabsize: 1, ..Default::default() }), parse_params( - [os("diff"), os("--tabsize=0"), os("foo"), os("bar")] + [os("diff"), os("--tabsize=1"), os("foo"), os("bar")] .iter() .cloned() .peekable() diff --git a/src/side_diff.rs b/src/side_diff.rs new file mode 100644 index 0000000..56953d2 --- /dev/null +++ b/src/side_diff.rs @@ -0,0 +1,1263 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +use core::cmp::{max, min}; +use diff::Result; +use std::{io::Write, vec}; +use unicode_width::UnicodeWidthStr; + +use crate::params::Params; + +const GUTTER_WIDTH_MIN: usize = 3; + +struct CharIter<'a> { + current: &'a [u8], +} + +struct Config { + sdiff_half_width: usize, + sdiff_column_two_offset: usize, + tab_size: usize, + expanded: bool, + separator_pos: usize, +} + +impl<'a> From<&'a [u8]> for CharIter<'a> { + fn from(value: &'a [u8]) -> Self { + CharIter { current: value } + } +} + +impl<'a> Iterator for CharIter<'a> { + // (bytes for the next char, visible width) + type Item = (&'a [u8], usize); + + fn next(&mut self) -> Option { + let max = self.current.len().min(4); + + // We reached the end. + if max == 0 { + return None; + } + + // Try to find the next utf-8 character, if present in the next 4 bytes. + let mut index = 1; + let mut view = &self.current[..index]; + let mut char = str::from_utf8(view); + while char.is_err() { + index += 1; + if index > max { + break; + } + view = &self.current[..index]; + char = str::from_utf8(view) + } + + match char { + Ok(c) => { + self.current = self + .current + .get(view.len()..) + .unwrap_or(&self.current[0..0]); + Some((view, UnicodeWidthStr::width(c))) + } + Err(_) => { + // We did not find an utf-8 char within the next 4 bytes, return the single byte. + self.current = &self.current[1..]; + Some((&view[..1], 1)) + } + } + } +} + +impl Config { + pub fn new(full_width: usize, tab_size: usize, expanded: bool) -> Self { + // diff uses this calculation to calculate the size of a half line + // based on the options passed (like -w, -t, etc.). It's actually + // pretty useless, because we (actually) don't have any size modifiers + // that can change this, however I just want to leave the calculate + // here, since it's not very clear and may cause some confusion + + let w = full_width as isize; + let t = tab_size as isize; + let t_plus_g = t + GUTTER_WIDTH_MIN as isize; + let unaligned_off = (w >> 1) + (t_plus_g >> 1) + (w & t_plus_g & 1); + let off = unaligned_off - unaligned_off % t; + let hw = max(0, min(off - GUTTER_WIDTH_MIN as isize, w - off)) as usize; + let c2o = if hw != 0 { off as usize } else { w as usize }; + + Self { + expanded, + sdiff_column_two_offset: c2o, + tab_size, + sdiff_half_width: hw, + separator_pos: ((hw + c2o - 1) >> 1), + } + } +} + +fn format_tabs_and_spaces( + from: usize, + to: usize, + config: &Config, + buf: &mut T, +) -> std::io::Result<()> { + let expanded = config.expanded; + let tab_size = config.tab_size; + let mut current = from; + + if current > to { + return Ok(()); + } + + if expanded { + while current < to { + buf.write_all(b" ")?; + current += 1; + } + return Ok(()); + } + + while current + (tab_size - current % tab_size) <= to { + let next_tab = current + (tab_size - current % tab_size); + buf.write_all(b"\t")?; + current = next_tab; + } + + while current < to { + buf.write_all(b" ")?; + current += 1; + } + + Ok(()) +} + +fn process_half_line( + s: &[u8], + max_width: usize, + is_right: bool, + white_space_gutter: bool, + config: &Config, + buf: &mut T, +) -> std::io::Result<()> { + if s.is_empty() { + if !is_right { + format_tabs_and_spaces( + 0, + max_width + + if white_space_gutter { + GUTTER_WIDTH_MIN + } else { + 1 + }, + config, + buf, + )?; + } + + return Ok(()); + } + + if max_width > config.sdiff_half_width { + return Ok(()); + } + + if max_width > config.sdiff_column_two_offset && !is_right { + return Ok(()); + } + + let expanded = config.expanded; + let tab_size = config.tab_size; + let sdiff_column_two_offset = config.sdiff_column_two_offset; + let mut current_width = 0; + let iter = CharIter::from(s); + + // the encoding will probably be compatible with utf8, so we can take advantage + // of that to get the size of the columns and iterate without breaking the encoding of anything. + // It seems like a good trade, since there is still a fallback in case it is not utf8. + // But I think it would be better if we used some lib that would allow us to handle this + // in the best way possible, in order to avoid overhead (currently 2 for loops are needed). + // There is a library called mcel (mcel.h) that is used in GNU diff, but the documentation + // about it is very scarce, nor is its use documented on the internet. In fact, from my + // research I didn't even find any information about it in the GNU lib's own documentation. + + for c in iter { + let (char, c_width) = c; + + if current_width + c_width > max_width { + break; + } + + match char { + b"\t" => { + if expanded && (current_width + tab_size - (current_width % tab_size)) <= max_width + { + let mut spaces = tab_size - (current_width % tab_size); + while spaces > 0 { + buf.write_all(b" ")?; + current_width += 1; + spaces -= 1; + } + } else if current_width + tab_size - (current_width % tab_size) <= max_width { + buf.write_all(b"\t")?; + current_width += tab_size - (current_width % tab_size); + } + } + b"\n" => { + break; + } + b"\r" => { + buf.write_all(b"\r")?; + format_tabs_and_spaces(0, sdiff_column_two_offset, config, buf)?; + current_width = 0; + } + b"\0" | b"\x07" | b"\x0C" | b"\x0B" => { + buf.write_all(char)?; + } + _ => { + buf.write_all(char)?; + current_width += c_width; + } + } + } + + // gnu sdiff do not tabulate the hole empty right line, instead, just keep the line empty + if !is_right { + // we always sum + 1 or + GUTTER_WIDTH_MIN cause we want to expand + // up to the third column of the gutter column if the gutter is gutter white space, + // otherwise we can expand to only the first column of the gutter middle column, cause + // the next is the sep char + format_tabs_and_spaces( + current_width, + max_width + + if white_space_gutter { + GUTTER_WIDTH_MIN + } else { + 1 + }, + config, + buf, + )?; + } + + Ok(()) +} + +fn push_output( + left_ln: &[u8], + right_ln: &[u8], + symbol: u8, + output: &mut T, + config: &Config, +) -> std::io::Result<()> { + if left_ln.is_empty() && right_ln.is_empty() { + writeln!(output)?; + return Ok(()); + } + + let white_space_gutter = symbol == b' '; + let half_width = config.sdiff_half_width; + let column_two_offset = config.sdiff_column_two_offset; + let separator_pos = config.separator_pos; + let put_new_line = true; // should be false when | is allowed + + // this involves a lot of the '|' mark, however, as it is not active, + // it is better to deactivate it as it introduces visual bug if + // the line is empty. + // if !left_ln.is_empty() { + // put_new_line = put_new_line || (left_ln.last() == Some(&b'\n')); + // } + // if !right_ln.is_empty() { + // put_new_line = put_new_line || (right_ln.last() == Some(&b'\n')); + // } + + process_half_line( + left_ln, + half_width, + false, + white_space_gutter, + config, + output, + )?; + if symbol != b' ' { + // the diff always want to put all tabs possible in the usable are, + // even in the middle space between the gutters if possible. + + output.write_all(&[symbol])?; + if !right_ln.is_empty() { + format_tabs_and_spaces(separator_pos + 1, column_two_offset, config, output)?; + } + } + process_half_line( + right_ln, + half_width, + true, + white_space_gutter, + config, + output, + )?; + + if put_new_line { + writeln!(output)?; + } + + Ok(()) +} + +pub fn diff( + from_file: &[u8], + to_file: &[u8], + output: &mut T, + params: &Params, +) -> Vec { + // ^ The left file ^ The right file + + let mut left_lines: Vec<&[u8]> = from_file.split_inclusive(|&c| c == b'\n').collect(); + let mut right_lines: Vec<&[u8]> = to_file.split_inclusive(|&c| c == b'\n').collect(); + let config = Config::new(params.width, params.tabsize, params.expand_tabs); + + if left_lines.last() == Some(&&b""[..]) { + left_lines.pop(); + } + + if right_lines.last() == Some(&&b""[..]) { + right_lines.pop(); + } + + /* + DISCLAIMER: + Currently the diff engine does not produce results like the diff engine used in GNU diff, + so some results may be inaccurate. For example, the line difference marker "|", according + to the GNU documentation, appears when the same lines (only the actual line, although the + relative line may change the result, so occasionally '|' markers appear with the same lines) + are different but exist in both files. In the current solution the same result cannot be + obtained because the diff engine does not return Both if both exist but are different, + but instead returns a Left and a Right for each one, implying that two lines were added + and deleted. Furthermore, the GNU diff program apparently stores some internal state + (this internal state is just a note about how the diff engine works) about the lines. + For example, an added or removed line directly counts in the line query of the original + lines to be printed in the output. Because of this imbalance caused by additions and + deletions, the characters ( and ) are introduced. They basically represent lines without + context, which have lost their pair in the other file due to additions or deletions. Anyway, + my goal with this disclaimer is to warn that for some reason, whether it's the diff engine's + inability to determine and predict/precalculate the result of GNU's sdiff, with this software it's + not possible to reproduce results that are 100% faithful to GNU's, however, the basic premise + e of side diff of showing added and removed lines and creating edit scripts is totally possible. + More studies are needed to cover GNU diff side by side with 100% accuracy, which is one of + the goals of this project : ) + */ + for result in diff::slice(&left_lines, &right_lines) { + match result { + Result::Left(left_ln) => push_output(left_ln, b"", b'<', output, &config).unwrap(), + Result::Right(right_ln) => push_output(b"", right_ln, b'>', output, &config).unwrap(), + Result::Both(left_ln, right_ln) => { + push_output(left_ln, right_ln, b' ', output, &config).unwrap() + } + } + } + + vec![] +} + +#[cfg(test)] +mod tests { + const DEF_TAB_SIZE: usize = 4; + + use super::*; + + mod format_tabs_and_spaces { + use super::*; + + const CONFIG_E_T: Config = Config { + sdiff_half_width: 60, + tab_size: DEF_TAB_SIZE, + expanded: true, + sdiff_column_two_offset: 0, + separator_pos: 0, + }; + + const CONFIG_E_F: Config = Config { + sdiff_half_width: 60, + tab_size: DEF_TAB_SIZE, + expanded: false, + sdiff_column_two_offset: 0, + separator_pos: 0, + }; + + #[test] + fn test_format_tabs_and_spaces_expanded_false() { + let mut buf = vec![]; + format_tabs_and_spaces(0, 5, &CONFIG_E_F, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t', b' ']); + } + + #[test] + fn test_format_tabs_and_spaces_expanded_true() { + let mut buf = vec![]; + format_tabs_and_spaces(0, 5, &CONFIG_E_T, &mut buf).unwrap(); + assert_eq!(buf, vec![b' '; 5]); + } + + #[test] + fn test_format_tabs_and_spaces_from_greater_than_to() { + let mut buf = vec![]; + format_tabs_and_spaces(6, 5, &CONFIG_E_F, &mut buf).unwrap(); + assert!(buf.is_empty()); + } + + #[test] + fn test_format_from_non_zero_position() { + let mut buf = vec![]; + format_tabs_and_spaces(2, 7, &CONFIG_E_F, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t', b' ', b' ', b' ']); + } + + #[test] + fn test_multiple_full_tabs_needed() { + let mut buf = vec![]; + format_tabs_and_spaces(0, 12, &CONFIG_E_F, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t', b'\t', b'\t']); + } + + #[test] + fn test_uneven_tab_boundary_with_spaces() { + let mut buf = vec![]; + format_tabs_and_spaces(3, 10, &CONFIG_E_F, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t', b'\t', b' ', b' ']); + } + + #[test] + fn test_expanded_true_with_offset() { + let mut buf = vec![]; + format_tabs_and_spaces(3, 9, &CONFIG_E_T, &mut buf).unwrap(); + assert_eq!(buf, vec![b' '; 6]); + } + + #[test] + fn test_exact_tab_boundary_from_midpoint() { + let mut buf = vec![]; + format_tabs_and_spaces(4, 8, &CONFIG_E_F, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t']); + } + + #[test] + fn test_mixed_tabs_and_spaces_edge_case() { + let mut buf = vec![]; + format_tabs_and_spaces(5, 9, &CONFIG_E_F, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t', b' ']); + } + + #[test] + fn test_minimal_gap_with_tab() { + let mut buf = vec![]; + format_tabs_and_spaces(7, 8, &CONFIG_E_F, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t']); + } + + #[test] + fn test_expanded_false_with_tab_at_end() { + let mut buf = vec![]; + format_tabs_and_spaces(6, 8, &CONFIG_E_F, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t']); + } + } + + mod process_half_line { + use super::*; + + fn create_test_config(expanded: bool, tab_size: usize) -> Config { + Config { + sdiff_half_width: 30, + sdiff_column_two_offset: 60, + tab_size, + expanded, + separator_pos: 15, + } + } + + #[test] + fn test_empty_line_left_expanded_false() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + process_half_line(b"", 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf.len(), 5); + assert_eq!(buf, vec![b'\t', b'\t', b' ', b' ', b' ']); + } + + #[test] + fn test_tabs_unexpanded() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + process_half_line(b"\tabc", 8, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t', b'a', b'b', b'c', b'\t', b' ']); + } + + #[test] + fn test_utf8_multibyte() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = "😉😉😉".as_bytes(); + process_half_line(s, 3, false, false, &config, &mut buf).unwrap(); + let mut r = vec![]; + r.write_all("😉\t".as_bytes()).unwrap(); + assert_eq!(buf, r) + } + + #[test] + fn test_newline_handling() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + process_half_line(b"abc\ndef", 5, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, vec![b'a', b'b', b'c', b'\t', b' ', b' ']); + } + + #[test] + fn test_carriage_return() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + process_half_line(b"\rxyz", 5, true, false, &config, &mut buf).unwrap(); + let mut r = vec![b'\r']; + r.extend(vec![b'\t'; 15]); + r.extend(vec![b'x', b'y', b'z']); + assert_eq!(buf, r); + } + + #[test] + fn test_exact_width_fit() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + process_half_line(b"abcd", 4, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf.len(), 5); + assert_eq!(buf, b"abcd ".to_vec()); + } + + #[test] + fn test_non_utf8_bytes() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + // ISO-8859-1 + process_half_line( + &[0x63, 0x61, 0x66, 0xE9], + 5, + false, + false, + &config, + &mut buf, + ) + .unwrap(); + assert_eq!(&buf, &[0x63, 0x61, 0x66, 0xE9, b' ', b' ']); + assert!(String::from_utf8(buf).is_err()); + } + + #[test] + fn test_non_utf8_bytes_ignore_padding_bytes() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + + let utf32le_bytes = [ + 0x63, 0x00, 0x00, 0x00, // 'c' + 0x61, 0x00, 0x00, 0x00, // 'a' + 0x66, 0x00, 0x00, 0x00, // 'f' + 0xE9, 0x00, 0x00, 0x00, // 'é' + ]; + // utf8 little endiand 32 bits (or 4 bytes per char) + process_half_line(&utf32le_bytes, 6, false, false, &config, &mut buf).unwrap(); + let mut r = utf32le_bytes.to_vec(); + r.extend(vec![b' '; 3]); + assert_eq!(buf, r); + } + + #[test] + fn test_non_utf8_non_preserve_ascii_bytes_cut() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + + let gb18030 = b"\x63\x61\x66\xA8\x80"; // some random chinese encoding + // ^ é char, start multi byte + process_half_line(gb18030, 4, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b"\x63\x61\x66\xA8 "); // break the encoding of 'é' letter + } + + #[test] + fn test_right_line_padding() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + process_half_line(b"xyz", 5, true, true, &config, &mut buf).unwrap(); + assert_eq!(buf.len(), 3); + } + + #[test] + fn test_mixed_tabs_spaces() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + process_half_line(b"\t \t", 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t', b' ', b' ', b'\t', b' ', b' ', b' ']); + } + + #[test] + fn test_overflow_multibyte() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = "日本語".as_bytes(); + process_half_line(s, 5, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, "日本 ".as_bytes()); + } + + #[test] + fn test_white_space_gutter() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"abc"; + process_half_line(s, 3, false, true, &config, &mut buf).unwrap(); + assert_eq!(buf, b"abc\t "); + } + + #[test] + fn test_expanded_true() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"abc"; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b"abc ") + } + + #[test] + fn test_expanded_true_with_gutter() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"abc"; + process_half_line(s, 10, false, true, &config, &mut buf).unwrap(); + assert_eq!(buf, b"abc ") + } + + #[test] + fn test_width0_chars() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"abc\0\x0B\x07\x0C"; + process_half_line(s, 4, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b"abc\0\x0B\x07\x0C\t ") + } + + #[test] + fn test_left_empty_white_space_gutter() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b""; + process_half_line(s, 9, false, true, &config, &mut buf).unwrap(); + assert_eq!(buf, b"\t\t\t"); + } + + #[test] + fn test_s_size_eq_max_width_p1() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"abcdefghij"; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b"abcdefghij "); + } + + #[test] + fn test_mixed_tabs_and_spaces_inversion() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b" \t \t "; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b" \t \t "); + } + + #[test] + fn test_expanded_with_tabs() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b" \t \t "; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b" "); + } + + #[test] + fn test_expanded_with_tabs_and_space_gutter() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b" \t \t "; + process_half_line(s, 10, false, true, &config, &mut buf).unwrap(); + assert_eq!(buf, b" "); + } + + #[test] + fn test_zero_width_unicode_chars() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = "\u{200B}".as_bytes(); + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, "\u{200B}\t\t ".as_bytes()); + } + + #[test] + fn test_multiple_carriage_returns() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"\r\r"; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + let mut r = vec![b'\r']; + r.extend(vec![b'\t'; 15]); + r.push(b'\r'); + r.extend(vec![b'\t'; 15]); + r.extend(vec![b'\t'; 2]); + r.extend(vec![b' '; 3]); + assert_eq!(buf, r); + } + + #[test] + fn test_multiple_carriage_returns_is_right_true() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"\r\r"; + process_half_line(s, 10, true, false, &config, &mut buf).unwrap(); + let mut r = vec![b'\r']; + r.extend(vec![b'\t'; 15]); + r.push(b'\r'); + r.extend(vec![b'\t'; 15]); + assert_eq!(buf, r); + } + + #[test] + fn test_mixed_invalid_utf8_with_valid() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"abc\xFF\xFEdef"; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert!(String::from_utf8(s.to_vec()).is_err()); + assert_eq!(buf, b"abc\xFF\xFEdef "); + } + + #[test] + fn test_max_width_zero() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"foo bar"; + process_half_line(s, 0, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, vec![b' ']); + } + + #[test] + fn test_line_only_with_tabs() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"\t\t\t"; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, vec![b'\t', b'\t', b' ', b' ', b' ']) + } + + #[test] + fn test_tabs_expanded() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"\t\t\t"; + process_half_line(s, 12, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b" ".repeat(13)); + } + + #[test] + fn test_mixed_tabs() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"a\tb\tc\t"; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b"a\tb\tc "); + } + + #[test] + fn test_mixed_tabs_with_gutter() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"a\tb\tc\t"; + process_half_line(s, 10, false, true, &config, &mut buf).unwrap(); + assert_eq!(buf, b"a\tb\tc\t "); + } + + #[test] + fn test_mixed_tabs_expanded() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"a\tb\tc\t"; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b"a b c "); + } + + #[test] + fn test_mixed_tabs_expanded_with_gutter() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"a\tb\tc\t"; + process_half_line(s, 10, false, true, &config, &mut buf).unwrap(); + assert_eq!(buf, b"a b c "); + } + + #[test] + fn test_break_if_invalid_max_width() { + let config = create_test_config(true, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"a\tb\tc\t"; + process_half_line(s, 61, false, true, &config, &mut buf).unwrap(); + assert_eq!(buf, b""); + assert_eq!(buf.len(), 0); + } + + #[test] + fn test_new_line() { + let config = create_test_config(false, DEF_TAB_SIZE); + let mut buf = vec![]; + let s = b"abc"; + process_half_line(s, 10, false, false, &config, &mut buf).unwrap(); + assert_eq!(buf, b"abc\t\t "); + } + } + + mod push_output { + // almost all behavior of the push_output was tested with tests on process_half_line + + use super::*; + + impl Default for Config { + fn default() -> Self { + Config::new(130, 8, false) + } + } + + fn create_test_config_def() -> Config { + Config::default() + } + + #[test] + fn test_left_empty_right_not_added() { + let config = create_test_config_def(); + let left_ln = b""; + let right_ln = b"bar"; + let symbol = b'>'; + let mut buf = vec![]; + push_output(&left_ln[..], &right_ln[..], symbol, &mut buf, &config).unwrap(); + assert_eq!(buf, b"\t\t\t\t\t\t\t >\tbar\n"); + } + + #[test] + fn test_right_empty_left_not_del() { + let config = create_test_config_def(); + let left_ln = b"bar"; + let right_ln = b""; + let symbol = b'>'; + let mut buf = vec![]; + push_output(&left_ln[..], &right_ln[..], symbol, &mut buf, &config).unwrap(); + assert_eq!(buf, b"bar\t\t\t\t\t\t\t >\n"); + } + + #[test] + fn test_both_empty() { + let config = create_test_config_def(); + let left_ln = b""; + let right_ln = b""; + let symbol = b' '; + let mut buf = vec![]; + push_output(&left_ln[..], &right_ln[..], symbol, &mut buf, &config).unwrap(); + assert_eq!(buf, b"\n"); + } + + #[test] + fn test_output_cut_with_maximization() { + let config = create_test_config_def(); + let left_ln = b"a".repeat(62); + let right_ln = b"a".repeat(62); + let symbol = b' '; + let mut buf = vec![]; + push_output(&left_ln[..], &right_ln[..], symbol, &mut buf, &config).unwrap(); + assert_eq!(buf.len(), 61 * 2 + 2); + assert_eq!(&buf[0..61], vec![b'a'; 61]); + assert_eq!(&buf[61..62], b"\t"); + let mut end = b"a".repeat(61); + end.push(b'\n'); + assert_eq!(&buf[62..], end); + } + + #[test] + fn test_both_lines_non_empty_with_space_symbol_max_tabs() { + let config = create_test_config_def(); + let left_ln = b"left"; + let right_ln = b"right"; + let symbol = b' '; + let mut buf = vec![]; + push_output(left_ln, right_ln, symbol, &mut buf, &config).unwrap(); + let expected_left = "left\t\t\t\t\t\t\t\t"; + let expected_right = "right"; + assert_eq!(buf, format!("{expected_left}{expected_right}\n").as_bytes()); + } + + #[test] + fn test_non_space_symbol_with_padding() { + let config = create_test_config_def(); + let left_ln = b"data"; + let right_ln = b""; + let symbol = b'<'; // impossible case, just to use different symbol + let mut buf = vec![]; + push_output(left_ln, right_ln, symbol, &mut buf, &config).unwrap(); + assert_eq!(buf, "data\t\t\t\t\t\t\t <\n".as_bytes()); + } + + #[test] + fn test_lines_exceeding_half_width() { + let config = create_test_config_def(); + let left_ln = vec![b'a'; 100]; + let left_ln = left_ln.as_slice(); + let right_ln = vec![b'b'; 100]; + let right_ln = right_ln.as_slice(); + let symbol = b' '; + let mut buf = vec![]; + push_output(left_ln, right_ln, symbol, &mut buf, &config).unwrap(); + let expected_left = "a".repeat(61); + let expected_right = "b".repeat(61); + assert_eq!(buf.len(), 61 + 1 + 61 + 1); + assert_eq!(&buf[0..61], expected_left.as_bytes()); + assert_eq!(buf[61], b'\t'); + assert_eq!(&buf[62..123], expected_right.as_bytes()); + assert_eq!(&buf[123..], b"\n"); + } + + #[test] + fn test_tabs_in_lines_expanded() { + let mut config = create_test_config_def(); + config.expanded = true; + let left_ln = b"\tleft"; + let right_ln = b"\tright"; + let symbol = b' '; + let mut buf = vec![]; + push_output(left_ln, right_ln, symbol, &mut buf, &config).unwrap(); + let expected_left = " left".to_string() + &" ".repeat(61 - 12); + let expected_right = " right"; + assert_eq!( + buf, + format!("{}{}{}\n", expected_left, " ", expected_right).as_bytes() + ); + } + + #[test] + fn test_unicode_characters() { + let config = create_test_config_def(); + let left_ln = "áéíóú".as_bytes(); + let right_ln = "😀😃😄".as_bytes(); + let symbol = b' '; + let mut buf = vec![]; + push_output(left_ln, right_ln, symbol, &mut buf, &config).unwrap(); + let expected_left = "áéíóú\t\t\t\t\t\t\t\t"; + let expected_right = "😀😃😄"; + assert_eq!(buf, format!("{expected_left}{expected_right}\n").as_bytes()); + } + } + + mod diff { + /* + Probably this hole section should be refactored when complete sdiff + arrives. I would say that these tests are more to document the + behavior of the engine than to actually test whether it is right, + because it is right, but right up to its limitations. + */ + + use super::*; + + fn generate_params() -> Params { + Params { + tabsize: 8, + expand_tabs: false, + width: 130, + ..Default::default() + } + } + + fn contains_string(vec: &[u8], s: &str) -> usize { + let pattern = s.as_bytes(); + vec.windows(pattern.len()).filter(|s| s == &pattern).count() + } + + fn calc_lines(input: &Vec) -> usize { + let mut lines_counter = 0; + + for c in input { + if c == &b'\n' { + lines_counter += 1; + } + } + + lines_counter + } + + #[test] + fn test_equal_lines() { + let params = generate_params(); + let from_file = b"equal"; + let to_file = b"equal"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + assert_eq!(calc_lines(&output), 1); + assert!(!output.contains(&b'<')); + assert!(!output.contains(&b'>')); + assert_eq!(contains_string(&output, "equal"), 2) + } + + #[test] + fn test_different_lines() { + let params = generate_params(); + let from_file = b"eq"; + let to_file = b"ne"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + assert_eq!(calc_lines(&output), 2); + assert!(output.contains(&b'>')); + assert!(output.contains(&b'<')); + assert_eq!(contains_string(&output, "eq"), 1); + assert_eq!(contains_string(&output, "ne"), 1); + } + + #[test] + fn test_added_line() { + let params = generate_params(); + let from_file = b""; + let to_file = b"new line"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 1); + assert_eq!(contains_string(&output, ">"), 1); + assert_eq!(contains_string(&output, "new line"), 1); + } + + #[test] + fn test_removed_line() { + let params = generate_params(); + let from_file = b"old line"; + let to_file = b""; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 1); + assert_eq!(contains_string(&output, "<"), 1); + assert_eq!(contains_string(&output, "old line"), 1); + } + + #[test] + fn test_multiple_changes() { + let params = generate_params(); + let from_file = b"line1\nline2\nline3"; + let to_file = b"line1\nmodified\nline4"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 5); + assert_eq!(contains_string(&output, "<"), 2); + assert_eq!(contains_string(&output, ">"), 2); + } + + #[test] + fn test_unicode_and_special_chars() { + let params = generate_params(); + let from_file = "á\t€".as_bytes(); + let to_file = "€\t😊".as_bytes(); + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert!(String::from_utf8_lossy(&output).contains("á")); + assert!(String::from_utf8_lossy(&output).contains("€")); + assert!(String::from_utf8_lossy(&output).contains("😊")); + assert_eq!(contains_string(&output, "<"), 1); + assert_eq!(contains_string(&output, ">"), 1); + } + + #[test] + fn test_mixed_whitespace() { + let params = generate_params(); + let from_file = b" \tspaces"; + let to_file = b"\t\t tabs"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert!(output.contains(&b'<')); + assert!(output.contains(&b'>')); + assert!(String::from_utf8_lossy(&output).contains("spaces")); + assert!(String::from_utf8_lossy(&output).contains("tabs")); + } + + #[test] + fn test_empty_files() { + let params = generate_params(); + let from_file = b""; + let to_file = b""; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(output, vec![]); + } + + #[test] + fn test_partially_matching_lines() { + let params = generate_params(); + let from_file = b"match\nchange"; + let to_file = b"match\nupdated"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 3); + assert_eq!(contains_string(&output, "match"), 2); + assert_eq!(contains_string(&output, "<"), 1); + assert_eq!(contains_string(&output, ">"), 1); + } + + #[test] + fn test_interleaved_add_remove() { + let params = generate_params(); + let from_file = b"A\nB\nC\nD"; + let to_file = b"B\nX\nD\nY"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 7); + assert_eq!(contains_string(&output, "A"), 1); + assert_eq!(contains_string(&output, "X"), 1); + assert_eq!(contains_string(&output, "Y"), 1); + assert_eq!(contains_string(&output, "<"), 3); + assert_eq!(contains_string(&output, ">"), 3); + } + + #[test] + fn test_swapped_lines() { + let params = generate_params(); + let from_file = b"1\n2\n3\n4"; + let to_file = b"4\n3\n2\n1"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 7); + assert_eq!(contains_string(&output, "<"), 3); + assert_eq!(contains_string(&output, ">"), 3); + } + + #[test] + fn test_gap_between_changes() { + let params = generate_params(); + let from_file = b"Start\nKeep1\nRemove\nKeep2\nEnd"; + let to_file = b"Start\nNew1\nKeep1\nKeep2\nNew2\nEnd"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 7); + assert_eq!(contains_string(&output, "Remove"), 1); + assert_eq!(contains_string(&output, "New1"), 1); + assert_eq!(contains_string(&output, "New2"), 1); + assert_eq!(contains_string(&output, "<"), 1); + assert_eq!(contains_string(&output, ">"), 2); + } + + #[test] + fn test_mixed_operations_complex() { + let params = generate_params(); + let from_file = b"Same\nOld1\nSameMid\nOld2\nSameEnd"; + let to_file = b"Same\nNew1\nSameMid\nNew2\nNew3\nSameEnd"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 8); + assert_eq!(contains_string(&output, "<"), 2); + assert_eq!(contains_string(&output, ">"), 3); + } + + #[test] + fn test_insert_remove_middle() { + let params = generate_params(); + let from_file = b"Header\nContent1\nFooter"; + let to_file = b"Header\nContent2\nFooter"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 4); + assert_eq!(contains_string(&output, "Content1"), 1); + assert_eq!(contains_string(&output, "Content2"), 1); + assert_eq!(contains_string(&output, "<"), 1); + assert_eq!(contains_string(&output, ">"), 1); + } + + #[test] + fn test_multiple_adjacent_changes() { + let params = generate_params(); + let from_file = b"A\nB\nC\nD\nE"; + let to_file = b"A\nX\nY\nD\nZ"; + let mut output = vec![]; + diff(from_file, to_file, &mut output, ¶ms); + + assert_eq!(calc_lines(&output), 8); + assert_eq!(contains_string(&output, "<"), 3); + assert_eq!(contains_string(&output, ">"), 3); + } + } + + mod config { + use super::*; + + fn create_config(full_width: usize, tab_size: usize, expanded: bool) -> Config { + Config::new(full_width, tab_size, expanded) + } + + #[test] + fn test_full_width_80_tab_4() { + let config = create_config(80, 4, false); + assert_eq!(config.sdiff_half_width, 37); + assert_eq!(config.sdiff_column_two_offset, 40); + assert_eq!(config.separator_pos, 38); + } + + #[test] + fn test_full_width_40_tab_8() { + let config = create_config(40, 8, true); + assert_eq!(config.sdiff_half_width, 16); + assert_eq!(config.sdiff_column_two_offset, 24); + assert_eq!(config.separator_pos, 19); // (16 +24 -1) /2 = 19.5 + } + + #[test] + fn test_full_width_30_tab_2() { + let config = create_config(30, 2, false); + assert_eq!(config.sdiff_half_width, 13); + assert_eq!(config.sdiff_column_two_offset, 16); + assert_eq!(config.separator_pos, 14); + } + + #[test] + fn test_small_width_10_tab_4() { + let config = create_config(10, 4, false); + assert_eq!(config.sdiff_half_width, 2); + assert_eq!(config.sdiff_column_two_offset, 8); + assert_eq!(config.separator_pos, 4); + } + + #[test] + fn test_minimal_width_3_tab_4() { + let config = create_config(3, 4, false); + assert_eq!(config.sdiff_half_width, 0); + assert_eq!(config.sdiff_column_two_offset, 3); + assert_eq!(config.separator_pos, 1); + } + + #[test] + fn test_odd_width_7_tab_3() { + let config = create_config(7, 3, false); + assert_eq!(config.sdiff_half_width, 1); + assert_eq!(config.sdiff_column_two_offset, 6); + assert_eq!(config.separator_pos, 3); + } + + #[test] + fn test_tab_size_larger_than_width() { + let config = create_config(5, 10, false); + assert_eq!(config.sdiff_half_width, 0); + assert_eq!(config.sdiff_column_two_offset, 5); + assert_eq!(config.separator_pos, 2); + } + } +} diff --git a/src/utils.rs b/src/utils.rs index 88b39ff..daca18d 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -3,9 +3,8 @@ // For the full copyright and license information, please view the LICENSE-* // files that was distributed with this source code. -use std::{ffi::OsString, io::Write}; - use regex::Regex; +use std::{ffi::OsString, io::Write}; use unicode_width::UnicodeWidthStr; /// Replace tabs by spaces in the input line. diff --git a/tests/integration.rs b/tests/integration.rs index cfbf529..b37d7e6 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -3,7 +3,7 @@ // For the full copyright and license information, please view the LICENSE-* // files that was distributed with this source code. -use assert_cmd::cmd::Command; +use assert_cmd::cargo::cargo_bin_cmd; use predicates::prelude::*; use std::fs::File; #[cfg(not(windows))] @@ -17,14 +17,14 @@ mod common { #[test] fn unknown_param() -> Result<(), Box> { - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("patch"); cmd.assert() .code(predicate::eq(2)) .failure() .stderr(predicate::eq("patch: utility not supported\n")); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.assert() .code(predicate::eq(0)) .success() @@ -33,7 +33,7 @@ mod common { )); for subcmd in ["diff", "cmp"] { - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg(subcmd); cmd.arg("--foobar"); cmd.assert() @@ -58,7 +58,7 @@ mod common { let error_message = "The system cannot find the file specified."; for subcmd in ["diff", "cmp"] { - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg(subcmd); cmd.arg(&nopath).arg(file.path()); cmd.assert() @@ -69,7 +69,7 @@ mod common { &nopath.as_os_str().to_string_lossy() ))); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg(subcmd); cmd.arg(file.path()).arg(&nopath); cmd.assert() @@ -81,7 +81,7 @@ mod common { ))); } - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("diff"); cmd.arg(&nopath).arg(&nopath); cmd.assert().code(predicate::eq(2)).failure().stderr( @@ -105,7 +105,7 @@ mod diff { fn no_differences() -> Result<(), Box> { let file = NamedTempFile::new()?; for option in ["", "-u", "-c", "-e"] { - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("diff"); if !option.is_empty() { cmd.arg(option); @@ -125,7 +125,7 @@ mod diff { let mut file1 = NamedTempFile::new()?; file1.write_all("foo\n".as_bytes())?; for option in ["", "-u", "-c", "-e"] { - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("diff"); if !option.is_empty() { cmd.arg(option); @@ -144,7 +144,7 @@ mod diff { let mut file2 = NamedTempFile::new()?; file2.write_all("foo\n".as_bytes())?; for option in ["", "-u", "-c", "-e"] { - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("diff"); if !option.is_empty() { cmd.arg(option); @@ -169,7 +169,7 @@ mod diff { let mut file2 = NamedTempFile::new()?; file2.write_all("bar\n".as_bytes())?; for option in ["", "-u", "-c", "-e"] { - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("diff"); if !option.is_empty() { cmd.arg(option); @@ -190,7 +190,7 @@ mod diff { let mut file2 = NamedTempFile::new()?; file2.write_all("bar\n".as_bytes())?; for option in ["", "-u", "-c", "-e"] { - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("diff"); if !option.is_empty() { cmd.arg(option); @@ -214,7 +214,7 @@ mod diff { file1.write_all("foo".as_bytes())?; let mut file2 = NamedTempFile::new()?; file2.write_all("bar".as_bytes())?; - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("diff"); cmd.arg("-e").arg(file1.path()).arg(file2.path()); cmd.assert() @@ -231,7 +231,7 @@ mod diff { let mut file2 = NamedTempFile::new()?; file2.write_all("bar\n".as_bytes())?; - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("diff"); cmd.arg("-u") .arg(file1.path()) @@ -248,7 +248,7 @@ mod diff { ) ); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("diff"); cmd.arg("-u") .arg("-") @@ -265,7 +265,7 @@ mod diff { ) ); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("diff"); cmd.arg("-u").arg("-").arg("-"); cmd.assert() @@ -275,7 +275,7 @@ mod diff { #[cfg(unix)] { - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("diff"); cmd.arg("-u") .arg(file1.path()) @@ -311,7 +311,7 @@ mod diff { let mut da = File::create(&da_path).unwrap(); da.write_all(b"da\n").unwrap(); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("diff"); cmd.arg("-u").arg(&directory).arg(&a_path); cmd.assert().code(predicate::eq(1)).failure(); @@ -326,7 +326,7 @@ mod diff { ) ); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("diff"); cmd.arg("-u").arg(&a_path).arg(&directory); cmd.assert().code(predicate::eq(1)).failure(); @@ -350,7 +350,7 @@ mod cmp { #[test] fn cmp_incompatible_params() -> Result<(), Box> { - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("cmp"); cmd.arg("-l"); cmd.arg("-s"); @@ -373,7 +373,7 @@ mod cmp { let mut a = File::create(&a_path).unwrap(); a.write_all(b"a\n").unwrap(); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("cmp"); cmd.arg(&a_path); cmd.write_stdin("a\n"); @@ -383,7 +383,7 @@ mod cmp { .stderr(predicate::str::is_empty()) .stdout(predicate::str::is_empty()); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.env("LC_ALL", "C"); cmd.arg("cmp"); cmd.arg(&a_path); @@ -409,7 +409,7 @@ mod cmp { let mut b = File::create(&b_path).unwrap(); b.write_all(b"a\n").unwrap(); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("cmp"); cmd.arg(&a_path).arg(&b_path); cmd.assert() @@ -432,7 +432,7 @@ mod cmp { let b_path = tmp_dir.path().join("b"); let _ = File::create(&b_path).unwrap(); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("cmp"); cmd.arg(&a_path).arg(&b_path); cmd.assert() @@ -456,7 +456,7 @@ mod cmp { let mut b = File::create(&b_path).unwrap(); b.write_all(b"bcd\n").unwrap(); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.env("LC_ALL", "C"); cmd.arg("cmp"); cmd.arg(&a_path).arg(&b_path); @@ -465,7 +465,7 @@ mod cmp { .failure() .stdout(predicate::str::ends_with(" differ: char 1, line 1\n")); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.env("LC_ALL", "C"); cmd.arg("cmp"); cmd.arg("-b"); @@ -478,7 +478,7 @@ mod cmp { " differ: byte 1, line 1 is 141 a 142 b\n", )); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.env("LC_ALL", "C"); cmd.arg("cmp"); cmd.arg("-l"); @@ -489,7 +489,7 @@ mod cmp { .stderr(predicate::str::is_empty()) .stdout(predicate::eq("1 141 142\n2 142 143\n3 143 144\n")); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.env("LC_ALL", "C"); cmd.arg("cmp"); cmd.arg("-l"); @@ -518,7 +518,7 @@ mod cmp { let mut b = File::create(&b_path).unwrap(); b.write_all(b"abc\ndef\ng").unwrap(); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.env("LC_ALL", "C"); cmd.arg("cmp"); cmd.arg(&a_path).arg(&b_path); @@ -528,7 +528,7 @@ mod cmp { .stderr(predicate::str::is_empty()) .stdout(predicate::str::ends_with(" differ: char 8, line 2\n")); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.env("LC_ALL", "C"); cmd.arg("cmp"); cmd.arg("-b"); @@ -541,7 +541,7 @@ mod cmp { " differ: byte 8, line 2 is 147 g 12 ^J\n", )); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.env("LC_ALL", "C"); cmd.arg("cmp"); cmd.arg("-l"); @@ -553,7 +553,7 @@ mod cmp { .stderr(predicate::str::contains(" EOF on")) .stderr(predicate::str::ends_with(" after byte 8\n")); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.env("LC_ALL", "C"); cmd.arg("cmp"); cmd.arg("-b"); @@ -581,7 +581,7 @@ mod cmp { let mut b = File::create(&b_path).unwrap(); b.write_all(b"abcdefghijkl\n").unwrap(); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("cmp"); cmd.arg("-l"); cmd.arg("-b"); @@ -594,7 +594,7 @@ mod cmp { .stderr(predicate::str::is_empty()) .stdout(predicate::str::is_empty()); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("cmp"); cmd.arg("-l"); cmd.arg("-b"); @@ -607,7 +607,7 @@ mod cmp { .stderr(predicate::str::is_empty()) .stdout(predicate::eq("4 40 144 d\n")); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("cmp"); cmd.arg("-l"); cmd.arg("-b"); @@ -634,7 +634,7 @@ mod cmp { let mut b = File::create(&b_path).unwrap(); b.write_all(b"###abc\n").unwrap(); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.env("LC_ALL", "C"); cmd.arg("cmp"); cmd.arg("-i"); @@ -647,7 +647,7 @@ mod cmp { .stdout(predicate::str::is_empty()); // Positional skips should be ignored - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.env("LC_ALL", "C"); cmd.arg("cmp"); cmd.arg("-i"); @@ -661,7 +661,7 @@ mod cmp { .stdout(predicate::str::is_empty()); // Single positional argument should only affect first file. - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.env("LC_ALL", "C"); cmd.arg("cmp"); cmd.arg(&a_path).arg(&b_path); @@ -672,7 +672,7 @@ mod cmp { .stderr(predicate::str::is_empty()) .stdout(predicate::str::ends_with(" differ: char 1, line 1\n")); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.env("LC_ALL", "C"); cmd.arg("cmp"); cmd.arg(&a_path).arg(&b_path); @@ -701,7 +701,7 @@ mod cmp { writeln!(b, "{}c", "b".repeat(1024)).unwrap(); b.flush().unwrap(); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("cmp"); cmd.arg("--ignore-initial=1K"); cmd.arg(&a_path).arg(&b_path); @@ -726,7 +726,7 @@ mod cmp { let mut b = File::create(&b_path).unwrap(); b.write_all(b"abcdefghijkl\n").unwrap(); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("cmp"); cmd.arg("-l"); cmd.arg("-b"); @@ -739,7 +739,7 @@ mod cmp { .stderr(predicate::str::is_empty()) .stdout(predicate::str::is_empty()); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("cmp"); cmd.arg("-b"); cmd.arg("-i"); @@ -772,7 +772,7 @@ mod cmp { let mut b = File::create(&b_path).unwrap(); b.write_all(&bytes).unwrap(); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("cmp"); cmd.arg("-l"); cmd.arg("-b"); @@ -817,7 +817,7 @@ mod cmp { let dev_null = OpenOptions::new().write(true).open("/dev/null").unwrap(); - let mut child = std::process::Command::new(assert_cmd::cargo::cargo_bin("diffutils")) + let mut child = std::process::Command::new(assert_cmd::cargo::cargo_bin!("diffutils")) .arg("cmp") .arg(&a_path) .arg(&b_path) @@ -825,12 +825,27 @@ mod cmp { .spawn() .unwrap(); - std::thread::sleep(std::time::Duration::from_millis(100)); - - assert_eq!(child.try_wait().unwrap().unwrap().code(), Some(1)); + // Bound the runtime to a very short time that still allows for some resource + // constraint to slow it down while also allowing very fast systems to exit as + // early as possible. + const MAX_TRIES: u8 = 50; + for tries in 0..=MAX_TRIES { + if tries == MAX_TRIES { + panic!("cmp took too long to run, /dev/null optimization probably not working") + } + match child.try_wait() { + Ok(Some(status)) => { + assert_eq!(status.code(), Some(1)); + break; + } + Ok(None) => (), + Err(e) => panic!("{e:#?}"), + } + std::thread::sleep(std::time::Duration::from_millis(10)); + } // Two stdins should be equal - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("cmp"); cmd.arg("-"); cmd.arg("-"); @@ -861,9 +876,10 @@ mod cmp { b.write_all(bytes).unwrap(); b.write_all(b"B").unwrap(); - let mut cmd = Command::cargo_bin("diffutils")?; + let mut cmd = cargo_bin_cmd!("diffutils"); cmd.arg("cmp"); cmd.arg(&a_path).arg(&b_path); + cmd.env("LC_ALL", "en_US"); cmd.assert() .code(predicate::eq(1)) .failure()