diff --git a/.cargo/config.toml b/.cargo/config.toml deleted file mode 100644 index a88db5f..0000000 --- a/.cargo/config.toml +++ /dev/null @@ -1,25 +0,0 @@ -[alias] -cov = "llvm-cov" -cov-lcov = "llvm-cov --lcov --output-path=./.coverage/lcov.info" -cov-html = "llvm-cov --html" -time = "build --timings --all-targets" - -[build] -rustflags = [ - "-D", - "warnings", - "-D", - "future-incompatible", - "-D", - "let-underscore", - "-D", - "nonstandard-style", - "-D", - "rust-2018-compatibility", - "-D", - "rust-2018-idioms", - "-D", - "rust-2021-compatibility", - "-D", - "unused", -] diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS deleted file mode 100644 index 2ae8963..0000000 --- a/.github/CODEOWNERS +++ /dev/null @@ -1 +0,0 @@ -/.github/**/* @torrust/maintainers diff --git a/.github/actions-rs/grcov.yml b/.github/actions-rs/grcov.yml deleted file mode 100644 index 95a8ce9..0000000 --- a/.github/actions-rs/grcov.yml +++ /dev/null @@ -1,2 +0,0 @@ -output-type: lcov -output-path: ./lcov.info \ No newline at end of file diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml deleted file mode 100644 index becfbc1..0000000 --- a/.github/dependabot.yaml +++ /dev/null @@ -1,19 +0,0 @@ -version: 2 -updates: - - package-ecosystem: github-actions - directory: / - schedule: - interval: daily - target-branch: "develop" - labels: - - "Continuous Integration" - - "Dependencies" - - - package-ecosystem: cargo - directory: / - schedule: - interval: daily - target-branch: "develop" - labels: - - "Build | Project System" - - "Dependencies" diff --git a/.github/workflows/contract.yaml b/.github/workflows/contract.yaml deleted file mode 100644 index 77417fb..0000000 --- a/.github/workflows/contract.yaml +++ /dev/null @@ -1,58 +0,0 @@ -name: Contract - -on: - push: - pull_request: - -env: - CARGO_TERM_COLOR: always - -jobs: - contract: - name: Contract - runs-on: ubuntu-latest - - strategy: - matrix: - toolchain: [nightly, stable] - - steps: - - id: checkout - name: Checkout Repository - uses: actions/checkout@v4 - - - id: setup - name: Setup Toolchain - uses: dtolnay/rust-toolchain@stable - with: - toolchain: ${{ matrix.toolchain }} - components: llvm-tools-preview - - - id: cache - name: Enable Job Cache - uses: Swatinem/rust-cache@v2 - - - id: tools - name: Install Tools - uses: taiki-e/install-action@v2 - with: - tool: cargo-llvm-cov, cargo-nextest - - - id: pretty-test - name: Install pretty-test - run: cargo install cargo-pretty-test - - - id: contract - name: Run contract - run: | - cargo test --lib --bins - cargo pretty-test --lib --bins - - - id: summary - name: Generate contract Summary - run: | - echo "### Living Contract! :rocket:" >> $GITHUB_STEP_SUMMARY - cargo pretty-test --lib --bins --color=never >> $GITHUB_STEP_SUMMARY - echo '```console' >> $GITHUB_STEP_SUMMARY - echo "$OUTPUT" >> $GITHUB_STEP_SUMMARY - echo '```' >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/coverage.yaml b/.github/workflows/coverage.yaml deleted file mode 100644 index 49eef67..0000000 --- a/.github/workflows/coverage.yaml +++ /dev/null @@ -1,56 +0,0 @@ -name: Coverage - -on: - push: - branches: - - develop - -env: - CARGO_TERM_COLOR: always - -jobs: - coverage: - name: Generate and Upload Coverage Report - environment: coverage - runs-on: ubuntu-latest - env: - CARGO_INCREMENTAL: "0" - RUSTFLAGS: "-Cinstrument-coverage" - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Install LLVM tools - run: sudo apt-get update && sudo apt-get install -y llvm - - - id: setup - name: Setup Toolchain - uses: dtolnay/rust-toolchain@nightly - with: - toolchain: nightly - components: llvm-tools-preview - - - id: cache - name: Enable Workflow Cache - uses: Swatinem/rust-cache@v2 - - - id: tools - name: Install Tools - uses: taiki-e/install-action@v2 - with: - tool: grcov,cargo-llvm-cov - - - id: coverage - name: Generate Coverage Report - run: | - cargo test - cargo llvm-cov --lcov --output-path=./lcov.info - - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v5 - with: - verbose: true - token: ${{ secrets.CODECOV_TOKEN }} - files: ${{ github.workspace }}/lcov.info - fail_ci_if_error: true diff --git a/.github/workflows/generate_coverage_pr.yaml b/.github/workflows/generate_coverage_pr.yaml deleted file mode 100644 index 47ab9ac..0000000 --- a/.github/workflows/generate_coverage_pr.yaml +++ /dev/null @@ -1,91 +0,0 @@ -name: Generate Coverage Report (PR) - -on: - pull_request: - branches: - - develop - -env: - CARGO_TERM_COLOR: always - -jobs: - coverage: - name: Generate Coverage Report - environment: coverage - runs-on: ubuntu-latest - env: - CARGO_INCREMENTAL: "0" - RUSTFLAGS: "-Cinstrument-coverage" - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Install LLVM tools - run: sudo apt-get update && sudo apt-get install -y llvm - - - id: setup - name: Setup Toolchain - uses: dtolnay/rust-toolchain@nightly - with: - toolchain: nightly - components: llvm-tools-preview - - - id: cache - name: Enable Workflow Cache - uses: Swatinem/rust-cache@v2 - - - id: tools - name: Install Tools - uses: taiki-e/install-action@v2 - with: - tool: grcov,cargo-llvm-cov - - - id: build_and_test - name: Build and test - run: | - echo "${{ secrets.CODECOV_TOKEN }}" - cargo clean - cargo build - cargo test -- --nocapture - - - id: coverage - name: Generate Coverage Report - run: | - cargo llvm-cov --lcov --output-path=./lcov.info - - - name: Store PR number and commit SHA - run: | - echo "Storing PR number ${{ github.event.number }}" - echo "${{ github.event.number }}" > pr_number.txt - - echo "Storing commit SHA ${{ github.event.pull_request.head.sha }}" - echo "${{ github.event.pull_request.head.sha }}" > commit_sha.txt - - # Workaround for https://github.com/orgs/community/discussions/25220 - # Triggered sub-workflow is not able to detect the original commit/PR which is available - # in this workflow. - - name: Store PR number - uses: actions/upload-artifact@v4 - with: - name: pr_number - path: pr_number.txt - - - name: Store commit SHA - uses: actions/upload-artifact@v4 - with: - name: commit_sha - path: commit_sha.txt - - # This stores the coverage report in artifacts. The actual upload to Codecov - # is executed by a different workflow `coverage-report.yml`. The reason for this - # split is because `on.pull_request` workflows don't have access to secrets. - - name: Store coverage report in artifacts - uses: actions/upload-artifact@v4 - with: - name: codecov_report - path: ./lcov.info - - - run: | - echo "The coverage report was stored in Github artifacts." - echo "It will be uploaded to Codecov using [coverage-report.yml] workflow shortly." diff --git a/.github/workflows/testing.yaml b/.github/workflows/testing.yml similarity index 100% rename from .github/workflows/testing.yaml rename to .github/workflows/testing.yml diff --git a/.github/workflows/upload_coverage_pr.yaml b/.github/workflows/upload_coverage_pr.yaml deleted file mode 100644 index 31cc31a..0000000 --- a/.github/workflows/upload_coverage_pr.yaml +++ /dev/null @@ -1,119 +0,0 @@ -name: Upload Coverage Report (PR) - -on: - # This workflow is triggered after every successfull execution - # of `Generate Coverage Report` workflow. - workflow_run: - workflows: ["Generate Coverage Report (PR)"] - types: - - completed - -permissions: - actions: write - contents: write - issues: write - pull-requests: write - -jobs: - coverage: - name: Upload Coverage Report - environment: coverage - runs-on: ubuntu-latest - steps: - - name: "Download existing coverage report" - id: prepare_report - uses: actions/github-script@v7 - with: - script: | - var fs = require('fs'); - - // List artifacts of the workflow run that triggered this workflow - var artifacts = await github.rest.actions.listWorkflowRunArtifacts({ - owner: context.repo.owner, - repo: context.repo.repo, - run_id: context.payload.workflow_run.id, - }); - - let codecovReport = artifacts.data.artifacts.filter((artifact) => { - return artifact.name == "codecov_report"; - }); - - if (codecovReport.length != 1) { - throw new Error("Unexpected number of {codecov_report} artifacts: " + codecovReport.length); - } - - var download = await github.rest.actions.downloadArtifact({ - owner: context.repo.owner, - repo: context.repo.repo, - artifact_id: codecovReport[0].id, - archive_format: 'zip', - }); - fs.writeFileSync('codecov_report.zip', Buffer.from(download.data)); - - let prNumber = artifacts.data.artifacts.filter((artifact) => { - return artifact.name == "pr_number"; - }); - - if (prNumber.length != 1) { - throw new Error("Unexpected number of {pr_number} artifacts: " + prNumber.length); - } - - var download = await github.rest.actions.downloadArtifact({ - owner: context.repo.owner, - repo: context.repo.repo, - artifact_id: prNumber[0].id, - archive_format: 'zip', - }); - fs.writeFileSync('pr_number.zip', Buffer.from(download.data)); - - let commitSha = artifacts.data.artifacts.filter((artifact) => { - return artifact.name == "commit_sha"; - }); - - if (commitSha.length != 1) { - throw new Error("Unexpected number of {commit_sha} artifacts: " + commitSha.length); - } - - var download = await github.rest.actions.downloadArtifact({ - owner: context.repo.owner, - repo: context.repo.repo, - artifact_id: commitSha[0].id, - archive_format: 'zip', - }); - fs.writeFileSync('commit_sha.zip', Buffer.from(download.data)); - - - id: parse_previous_artifacts - run: | - unzip codecov_report.zip - unzip pr_number.zip - unzip commit_sha.zip - - echo "Detected PR is: $(> "$GITHUB_OUTPUT" - echo "override_commit=$(> "$GITHUB_OUTPUT" - - - name: Checkout repository - uses: actions/checkout@v4 - with: - ref: ${{ steps.parse_previous_artifacts.outputs.override_commit || '' }} - path: repo_root - - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v5 - with: - verbose: true - token: ${{ secrets.CODECOV_TOKEN }} - files: ${{ github.workspace }}/lcov.info - fail_ci_if_error: true - # Manual overrides for these parameters are needed because automatic detection - # in codecov-action does not work for non-`pull_request` workflows. - # In `main` branch push, these default to empty strings since we want to run - # the analysis on HEAD. - override_commit: ${{ steps.parse_previous_artifacts.outputs.override_commit || '' }} - override_pr: ${{ steps.parse_previous_artifacts.outputs.override_pr || '' }} - working-directory: ${{ github.workspace }}/repo_root - # Location where coverage report files are searched for - directory: ${{ github.workspace }} diff --git a/.gitignore b/.gitignore index 4e96c2e..c009900 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ -/.coverage /target be2json output.json \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 2dafed3..caa48dd 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,14 +2,12 @@ "[rust]": { "editor.formatOnSave": true }, - "[ignore]": { - "rust-analyzer.cargo.extraEnv": { - "RUSTFLAGS": "-Z profile -C codegen-units=1 -C inline-threshold=0 -C link-dead-code -C overflow-checks=off -C panic=abort -Z panic_abort_tests", - "RUSTDOCFLAGS": "-Z profile -C codegen-units=1 -C inline-threshold=0 -C link-dead-code -C overflow-checks=off -C panic=abort -Z panic_abort_tests", - "CARGO_INCREMENTAL": "0", - "RUST_BACKTRACE": "1" - } - }, + "[ignore]": { "rust-analyzer.cargo.extraEnv" : { + "RUSTFLAGS": "-Z profile -C codegen-units=1 -C inline-threshold=0 -C link-dead-code -C overflow-checks=off -C panic=abort -Z panic_abort_tests", + "RUSTDOCFLAGS": "-Z profile -C codegen-units=1 -C inline-threshold=0 -C link-dead-code -C overflow-checks=off -C panic=abort -Z panic_abort_tests", + "CARGO_INCREMENTAL": "0", + "RUST_BACKTRACE": "1" + }}, "rust-analyzer.checkOnSave": true, "rust-analyzer.check.command": "clippy", "rust-analyzer.check.allTargets": true, @@ -33,4 +31,5 @@ "evenBetterToml.formatter.trailingNewline": true, "evenBetterToml.formatter.reorderKeys": true, "evenBetterToml.formatter.reorderArrays": true, + } \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index e5efd97..c01d76d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,15 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 4 - -[[package]] -name = "aho-corasick" -version = "1.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" -dependencies = [ - "memchr", -] +version = 3 [[package]] name = "anstream" @@ -47,7 +38,7 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" dependencies = [ - "windows-sys 0.52.0", + "windows-sys", ] [[package]] @@ -57,7 +48,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" dependencies = [ "anstyle", - "windows-sys 0.52.0", + "windows-sys", ] [[package]] @@ -76,33 +67,6 @@ dependencies = [ "wait-timeout", ] -[[package]] -name = "autocfg" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" - -[[package]] -name = "bencode2json" -version = "0.1.0" -dependencies = [ - "assert_cmd", - "clap", - "derive_more", - "hex", - "predicates", - "ringbuffer", - "serde_json", - "tempfile", - "thiserror", -] - -[[package]] -name = "bitflags" -version = "2.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" - [[package]] name = "bstr" version = "1.10.0" @@ -114,12 +78,6 @@ dependencies = [ "serde", ] -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - [[package]] name = "clap" version = "4.5.17" @@ -166,27 +124,6 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" -[[package]] -name = "derive_more" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a9b99b9cbbe49445b21764dc0625032a89b145a2642e67603e1c936f5458d05" -dependencies = [ - "derive_more-impl", -] - -[[package]] -name = "derive_more-impl" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb7330aeadfbe296029522e6c40f315320aba36fc43a5b3632f3795348f3bd22" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "unicode-xid", -] - [[package]] name = "difflib" version = "0.4.0" @@ -199,31 +136,6 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" -[[package]] -name = "errno" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" -dependencies = [ - "libc", - "windows-sys 0.52.0", -] - -[[package]] -name = "fastrand" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" - -[[package]] -name = "float-cmp" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" -dependencies = [ - "num-traits", -] - [[package]] name = "heck" version = "0.5.0" @@ -242,51 +154,18 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" -[[package]] -name = "itoa" -version = "1.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" - [[package]] name = "libc" version = "0.2.158" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439" -[[package]] -name = "linux-raw-sys" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" - [[package]] name = "memchr" version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" -[[package]] -name = "normalize-line-endings" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" - -[[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", -] - -[[package]] -name = "once_cell" -version = "1.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" - [[package]] name = "predicates" version = "3.1.2" @@ -295,10 +174,7 @@ checksum = "7e9086cc7640c29a356d1a29fd134380bee9d8f79a17410aa76e7ad295f42c97" dependencies = [ "anstyle", "difflib", - "float-cmp", - "normalize-line-endings", "predicates-core", - "regex", ] [[package]] @@ -335,59 +211,11 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "regex" -version = "1.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - [[package]] name = "regex-automata" version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" - -[[package]] -name = "ringbuffer" -version = "0.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3df6368f71f205ff9c33c076d170dd56ebf68e8161c733c0caa07a7a5509ed53" - -[[package]] -name = "rustix" -version = "0.38.37" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" -dependencies = [ - "bitflags", - "errno", - "libc", - "linux-raw-sys", - "windows-sys 0.52.0", -] - -[[package]] -name = "ryu" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" [[package]] name = "serde" @@ -409,18 +237,6 @@ dependencies = [ "syn", ] -[[package]] -name = "serde_json" -version = "1.0.128" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" -dependencies = [ - "itoa", - "memchr", - "ryu", - "serde", -] - [[package]] name = "strsim" version = "0.11.1" @@ -438,19 +254,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "tempfile" -version = "3.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" -dependencies = [ - "cfg-if", - "fastrand", - "once_cell", - "rustix", - "windows-sys 0.59.0", -] - [[package]] name = "termtree" version = "0.4.1" @@ -458,23 +261,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" [[package]] -name = "thiserror" -version = "1.0.64" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.64" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" +name = "torrust-bencode2json" +version = "0.1.0" dependencies = [ - "proc-macro2", - "quote", - "syn", + "assert_cmd", + "clap", + "hex", ] [[package]] @@ -483,12 +275,6 @@ version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" -[[package]] -name = "unicode-xid" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" - [[package]] name = "utf8parse" version = "0.2.2" @@ -513,15 +299,6 @@ dependencies = [ "windows-targets", ] -[[package]] -name = "windows-sys" -version = "0.59.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" -dependencies = [ - "windows-targets", -] - [[package]] name = "windows-targets" version = "0.52.6" diff --git a/Cargo.toml b/Cargo.toml index 7be1f14..b76c046 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,25 +1,11 @@ [package] -authors = ["Jose Celano ", "Cameron Garnham "] -description = "A Bencoded to JSON converter library and console app with no intermediary in-memory structure." -edition = "2021" -exclude = [".*", "cSpell.json", "rustfmt.toml"] -license = "LGPL-3.0" -name = "bencode2json" -repository = "https://github.com/torrust/bencode2json" +name = "torrust-bencode2json" version = "0.1.0" - -[workspace] -members = ["examples/*"] +edition = "2021" [dependencies] -clap = { version = "4.5", features = ["derive"] } -derive_more = { version = "1.0", features = ["display"] } +clap = { version = "4.5.17", features = ["derive"] } hex = "0.4" -ringbuffer = { version = "0.15.0", features = ["alloc"] } -serde_json = "1.0.128" -thiserror = "1.0.64" [dev-dependencies] assert_cmd = "2.0" -predicates = "3.1.2" -tempfile = "3.13.0" diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 0a04128..0000000 --- a/LICENSE +++ /dev/null @@ -1,165 +0,0 @@ - GNU LESSER GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - - This version of the GNU Lesser General Public License incorporates -the terms and conditions of version 3 of the GNU General Public -License, supplemented by the additional permissions listed below. - - 0. Additional Definitions. - - As used herein, "this License" refers to version 3 of the GNU Lesser -General Public License, and the "GNU GPL" refers to version 3 of the GNU -General Public License. - - "The Library" refers to a covered work governed by this License, -other than an Application or a Combined Work as defined below. - - An "Application" is any work that makes use of an interface provided -by the Library, but which is not otherwise based on the Library. -Defining a subclass of a class defined by the Library is deemed a mode -of using an interface provided by the Library. - - A "Combined Work" is a work produced by combining or linking an -Application with the Library. The particular version of the Library -with which the Combined Work was made is also called the "Linked -Version". - - The "Minimal Corresponding Source" for a Combined Work means the -Corresponding Source for the Combined Work, excluding any source code -for portions of the Combined Work that, considered in isolation, are -based on the Application, and not on the Linked Version. - - The "Corresponding Application Code" for a Combined Work means the -object code and/or source code for the Application, including any data -and utility programs needed for reproducing the Combined Work from the -Application, but excluding the System Libraries of the Combined Work. - - 1. Exception to Section 3 of the GNU GPL. - - You may convey a covered work under sections 3 and 4 of this License -without being bound by section 3 of the GNU GPL. - - 2. Conveying Modified Versions. - - If you modify a copy of the Library, and, in your modifications, a -facility refers to a function or data to be supplied by an Application -that uses the facility (other than as an argument passed when the -facility is invoked), then you may convey a copy of the modified -version: - - a) under this License, provided that you make a good faith effort to - ensure that, in the event an Application does not supply the - function or data, the facility still operates, and performs - whatever part of its purpose remains meaningful, or - - b) under the GNU GPL, with none of the additional permissions of - this License applicable to that copy. - - 3. Object Code Incorporating Material from Library Header Files. - - The object code form of an Application may incorporate material from -a header file that is part of the Library. You may convey such object -code under terms of your choice, provided that, if the incorporated -material is not limited to numerical parameters, data structure -layouts and accessors, or small macros, inline functions and templates -(ten or fewer lines in length), you do both of the following: - - a) Give prominent notice with each copy of the object code that the - Library is used in it and that the Library and its use are - covered by this License. - - b) Accompany the object code with a copy of the GNU GPL and this license - document. - - 4. Combined Works. - - You may convey a Combined Work under terms of your choice that, -taken together, effectively do not restrict modification of the -portions of the Library contained in the Combined Work and reverse -engineering for debugging such modifications, if you also do each of -the following: - - a) Give prominent notice with each copy of the Combined Work that - the Library is used in it and that the Library and its use are - covered by this License. - - b) Accompany the Combined Work with a copy of the GNU GPL and this license - document. - - c) For a Combined Work that displays copyright notices during - execution, include the copyright notice for the Library among - these notices, as well as a reference directing the user to the - copies of the GNU GPL and this license document. - - d) Do one of the following: - - 0) Convey the Minimal Corresponding Source under the terms of this - License, and the Corresponding Application Code in a form - suitable for, and under terms that permit, the user to - recombine or relink the Application with a modified version of - the Linked Version to produce a modified Combined Work, in the - manner specified by section 6 of the GNU GPL for conveying - Corresponding Source. - - 1) Use a suitable shared library mechanism for linking with the - Library. A suitable mechanism is one that (a) uses at run time - a copy of the Library already present on the user's computer - system, and (b) will operate properly with a modified version - of the Library that is interface-compatible with the Linked - Version. - - e) Provide Installation Information, but only if you would otherwise - be required to provide such information under section 6 of the - GNU GPL, and only to the extent that such information is - necessary to install and execute a modified version of the - Combined Work produced by recombining or relinking the - Application with a modified version of the Linked Version. (If - you use option 4d0, the Installation Information must accompany - the Minimal Corresponding Source and Corresponding Application - Code. If you use option 4d1, you must provide the Installation - Information in the manner specified by section 6 of the GNU GPL - for conveying Corresponding Source.) - - 5. Combined Libraries. - - You may place library facilities that are a work based on the -Library side by side in a single library together with other library -facilities that are not Applications and are not covered by this -License, and convey such a combined library under terms of your -choice, if you do both of the following: - - a) Accompany the combined library with a copy of the same work based - on the Library, uncombined with any other library facilities, - conveyed under the terms of this License. - - b) Give prominent notice with the combined library that part of it - is a work based on the Library, and explaining where to find the - accompanying uncombined form of the same work. - - 6. Revised Versions of the GNU Lesser General Public License. - - The Free Software Foundation may publish revised and/or new versions -of the GNU Lesser General Public License from time to time. Such new -versions will be similar in spirit to the present version, but may -differ in detail to address new problems or concerns. - - Each version is given a distinguishing version number. If the -Library as you received it specifies that a certain numbered version -of the GNU Lesser General Public License "or any later version" -applies to it, you have the option of following the terms and -conditions either of that published version or of any later version -published by the Free Software Foundation. If the Library as you -received it does not specify a version number of the GNU Lesser -General Public License, you may choose any version of the GNU Lesser -General Public License ever published by the Free Software Foundation. - - If the Library as you received it specifies that a proxy can decide -whether future versions of the GNU Lesser General Public License shall -apply, that proxy's public statement of acceptance of any version is -permanent authorization for you to choose that version for the -Library. diff --git a/NOTAS.txt b/NOTAS.txt new file mode 100644 index 0000000..4877f57 --- /dev/null +++ b/NOTAS.txt @@ -0,0 +1,60 @@ + // Parece que el ejemplo en C usa una pila pero solo para ver por qué + // que tipo de estructura se está parseando. + // Usa convenciones de estandos en los que se peude estar parseando + // una lista o diccionario. + // Puedo usar un enum para los estados. Usar los mismos que el programa de c pero + // con nombres. De hecho puedo hacer la misma implementación. + // Lo bueno de esa pila es que funciona con un solo byte para el enum. + // La verdad que consume poquísima memoria. + // Es como la maquina de estados del parser en done hay subestados. + // Al final se podría incluso hacer el diagrama. Creo que + // para los compiladores se usaba algúndigrama para prepresentar la + // gramática. Lo que no estoy seguro es que es un token. + // Al final el bucle parsea los tokens pero tambín monta la gramática. + // De hecho la gramática es la otra máquina de esatdos. + // En realidad el lengauage tiene solo un elemento que es expresión, + // que es recursivo y termina en elementos integer y string. + // Bueno es un lenguage con un solo elemento que puede tener 4 tipos. + +- Podría contar el número de items en la lista en vez de tener dos estados. Que el estado tengo un contador. +- Si hago lo del data provider para los tests, la estructura puede ser un strcut +quepuede contener un caso (title, beencode, expected json) o un vector de este enum. +Para poder anidar los casos (agruparlos como están ahora) + +- State -> + StackValue + ParsingState + +pub enum State { + ParsingInteger, + ParsingString(ParsingString), + ParsingList(ParsingList), + ParsingDictionary(ParsingDictionary), +} + +pub enum ParsingState { + Integer, + String(ParsingString), + List(ParsingList), + Dictionary(ParsingDictionary), +} + +- mover a mod stack los enums y el stack. +- extraer duplicados de empezar string porque voya meter un montón de duplicados para el segundo key-value pair. +- Y tambiénlas acciones que se hacen después de parsear los typs básicos integer o string de actualizar el estado. +- Podría contar el número de items en la lista en vez de tener dos estados. Que el estado tengo un contador. + Creo que se pueden simplificar los estados eliminando la diferencia entre primer y subsecuentes items en listas y diccionarios. + EL elemento padre, puede contener un valor en el stack que sea cuantos elementos tiene (items or key-value pairs). + Cuando se empieza a parsear un nuevo item, se mira en la pila cuantos elementos tiene el padre para saber si es el primero o no. + Esto no lo hice así porque era un coñazo manipular el top de la pila. Pero cuando esté refactorizado a lo mejor no es tan coñazo + porque se puede pregutar si el top de la pila es una lista y esta vacia o que devuelva el número de elementos de la pila si el top es una lista, etc. + + - HAcer un caso siple de diccionario por si fastidia todo el invento. Luego + sigo con los casos de las listas y más casos. Pero mejor terminar este último + tipo. +- Seguir con listas aninadas. +- Estaría bien poder ejecutar la batería de test contra otras implementaciones como la de C. + Para eso si estaría bien tener una lista de casos (data provider para los test unitoarios). + Para usar la misma lista para el test E2E del otro programa. + + diff --git a/README.md b/README.md index 5285fb7..50ccd0f 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,8 @@ -# Bencode2Json - -[![Testing](https://github.com/torrust/bencode2json/actions/workflows/testing.yaml/badge.svg)](https://github.com/torrust/bencode2json/actions/workflows/testing.yaml) [![codecov](https://codecov.io/gh/torrust/bencode2json/branch/develop/graph/badge.svg?token=G5IK5HV2EW)](https://codecov.io/gh/torrust/bencode2json) +# Torrust Bencode2Json A lib and console command to convert from bencoded data to JSON format. -Output is similar to: . When a bencoded string (byte string) contains only valid UTF-8 chars, those chars will print to the output. If the string contains non valid UTF-8 chars, them the string will be printed in hexadecimal. For example: +Output is similar to: . When a bencoded string (byte string) contains only valid UTF-8 chars, the output will print those chars. If the string contains non valid UTF-8 chars, them the string will be printed in hexadecimal. For example: Bencoded string (with 2 bytes): @@ -20,102 +18,30 @@ JSON string: More info: -## Console +## Run Run the binary with input and output file: ```console -cargo run -- -i ./tests/fixtures/sample.bencode -o output.json +cargo run -- -i tests/sample.bencode -o output.json ``` -Run the binary with stdin and stdout (UTF-8): +Run the binary with stdin and stdout: ```console echo "4:spam" | cargo run -"spam" +"spam" ``` -Run the binary with stdin and stdout (non UTF-8): +Run the binary with stdin and stdout: ```console printf "d3:bar2:\xFF\xFEe" | cargo run -{"bar":"fffe"} -``` - -```console -printf "d2:\xFF\xFE3:bare" | cargo run -{"fffe":"bar"} +{"bar":"fffe"} ``` > NOTICE: We need two escape the two bytes `FF` and `FE` with `\x` inside the string. -More examples: - -```console -cat ./tests/fixtures/sample.bencode | cargo run -["spam"] -``` - -More examples with invalid Bencode: - -```console -printf "i42" | cargo run -Error: Unexpected end of input parsing integer; read context: input pos 3, latest input bytes dump: [105, 52, 50] (UTF-8 string: `i42`); write context: output pos 2, latest output bytes dump: [52, 50] (UTF-8 string: `42`) -``` - -```console -printf "3:ab" | cargo run -Error: Unexpected end of input parsing string value; read context: input pos 4, latest input bytes dump: [51, 58, 97, 98] (UTF-8 string: `3:ab`) -``` - -```console -echo "i00e" | cargo run -Error: Leading zeros in integers are not allowed, for example b'i00e'; read context: byte `48` (char: `0`), input pos 3, latest input bytes dump: [105, 48, 48] (UTF-8 string: `i00`) -``` - -Generating pretty JSON with [jq][jq]: - -```console -echo "d3:foold3:bari42eeee" | cargo run | jq -``` - -```json -{ - "foo": [ - { - "bar": 42 - } - ] -} -``` - -You can install the binary with: - -```console -cargo install bencode2json -``` - -Or by using [cargo-binstall](https://github.com/cargo-bins/cargo-binstall): - -```console -cargo binstall bencode2json -``` - -## Library - -You can install the library with: - -```console -cargo add bencode2json -``` - -There two ways of using the library: - -- With high-level wrappers. -- With the low-level generators. - -See [examples](./examples/). - ## Test Run unit and integration tests: @@ -124,7 +50,7 @@ Run unit and integration tests: cargo test ``` -We have included a copy of another C implementation ["be2json.c"](./contrib/be2json.c). You can execute it with the following: +We have included a copy of another [C implementation "be2json.c"](./contrib/be2json.c). You can execute it with the following: ```console gcc ./contrib/be2json.c -o be2json @@ -132,28 +58,14 @@ chmod +x ./be2json echo "4:spam" | ./be2json ``` -You can generate the coverage report with: - -```console -cargo cov -``` - -## Performance - -In terms of memory usage this implementation consumes at least the size of the -biggest bencoded integer or string. The string and integer parsers keeps all the bytes in memory until -it parses the whole value. - -The library also wraps the input and output streams in a [BufReader](https://doc.rust-lang.org/std/io/struct.BufReader.html) - and [BufWriter](https://doc.rust-lang.org/std/io/struct.BufWriter.html) because it can be excessively inefficient to work directly with something that implements [Read](https://doc.rust-lang.org/std/io/trait.Read.html) or [Write](https://doc.rust-lang.org/std/io/trait.Write.html). - ## TODO -- [ ] Counter for number of items in a list for debugging and errors. -- [ ] Fuzz testing: Generate random valid bencoded values. -- [ ] Install tracing crate. Add verbose mode that enables debugging. -- [ ] Option to check if the final JSON it's valid at the end of the process. -- [ ] Benchmarking for this implementation and the original C implementation. +- Counter for number of items in a list for debugging and errors. +- Return errors with position. +- Use Property-Based Testing. Generate random valid bencoded values. +- Refactor: Use only one tests with a data provider containing all cases. +- Install tracing crate. Add verbose mode that enables debugging. +- Option to store the JSON and check if it's valid at the end of the process. ## Alternatives @@ -181,29 +93,3 @@ This implementation is basically a port to Rust from ff` for non UTF-8 string came from the -[bencode online]() repo by [@Chocobo1](https://github.com/Chocobo1). - -We also want to thank [@da2ce7](https://github.com/da2ce7) for his feedback and review that has improved this project significantly. - -## License - -**Copyright (c) 2024 The Torrust Developers.** - -This program is free software: you can redistribute it and/or modify it under the terms of the [GNU Lesser General Public License][LGPL_3_0] as published by the [Free Software Foundation][FSF], version 3. - -This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the [GNU Lesser General Public License][LGPL_3_0] for more details. - -You should have received a copy of the *GNU Lesser General Public License* along with this program. If not, see . - -Some files include explicit copyright notices and/or license notices. - -### Legacy Exception - -For prosperity, versions of Bencode2Json that are older than five years are automatically granted the [MIT-0][MIT_0] license in addition to the existing [LGPL-3.0-only][LGPL_3_0] license. - -[LGPL_3_0]: ./LICENSE -[MIT_0]: ./docs/licenses/LICENSE-MIT_0 -[FSF]: https://www.fsf.org/ -[jq]: https://jqlang.github.io/jq/ diff --git a/docs/licenses/LICENSE-MIT_0 b/docs/licenses/LICENSE-MIT_0 deleted file mode 100644 index fc06cc4..0000000 --- a/docs/licenses/LICENSE-MIT_0 +++ /dev/null @@ -1,14 +0,0 @@ -MIT No Attribution - -Permission is hereby granted, free of charge, to any person obtaining a copy of this -software and associated documentation files (the "Software"), to deal in the Software -without restriction, including without limitation the rights to use, copy, modify, -merge, publish, distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/examples/parser_file_in_file_out.rs b/examples/parser_file_in_file_out.rs deleted file mode 100644 index 06acb13..0000000 --- a/examples/parser_file_in_file_out.rs +++ /dev/null @@ -1,68 +0,0 @@ -//! Run with: -//! -//! ```not_rust -//! cargo run --example parser_file_in_file_out -- -i ./tests/fixtures/sample.bencode -o output.json -//! ``` -//! -//! It should create the `output.json` with this content: `["spam"]`. -use std::{ - fs::File, - io::{Read, Write}, -}; - -use bencode2json::generators::json::Generator; -use clap::{Arg, Command}; - -fn main() { - let matches = Command::new("parser_file_in_file_out") - .version("0.1.0") - .author("Torrust Organization") - .about("Converts Bencode to JSON") - .arg( - Arg::new("input") - .short('i') - .long("input") - .help("Input file"), - ) - .arg( - Arg::new("output") - .short('o') - .long("output") - .help("Output file"), - ) - .get_matches(); - - // Handle input stream (file or stdin) - let input: Box = if let Some(input_path) = matches.get_one::("input") { - match File::open(input_path) { - Ok(file) => Box::new(file), - Err(e) => { - eprintln!("Error: {e}"); - std::process::exit(1); - } - } - } else { - eprintln!("Error: missing input file path. Provide a file path with -i or --input"); - std::process::exit(1); - }; - - // Handle output stream (file or stdout) - let mut output: Box = if let Some(output_path) = matches.get_one::("output") - { - match File::create(output_path) { - Ok(file) => Box::new(file), - Err(e) => { - eprintln!("Error: {e}"); - std::process::exit(1); - } - } - } else { - eprintln!("Error: missing output file path. Provide a file path with -o or --output"); - std::process::exit(1); - }; - - if let Err(e) = Generator::new(input).write_bytes(&mut output) { - eprintln!("Error: {e}"); - std::process::exit(1); - } -} diff --git a/examples/parser_stdin_stdout.rs b/examples/parser_stdin_stdout.rs deleted file mode 100644 index 01456cb..0000000 --- a/examples/parser_stdin_stdout.rs +++ /dev/null @@ -1,20 +0,0 @@ -//! Run with: -//! -//! ```not_rust -//! echo "4:spam" | cargo run --example parser_stdin_stdout -//! ``` -//! -//! It prints "spam". -use std::io; - -use bencode2json::generators::json::Generator; - -fn main() { - let input = Box::new(io::stdin()); - let mut output = Box::new(io::stdout()); - - if let Err(e) = Generator::new(input).write_bytes(&mut output) { - eprintln!("Error: {e}"); - std::process::exit(1); - } -} diff --git a/examples/parser_string_in_string_out.rs b/examples/parser_string_in_string_out.rs deleted file mode 100644 index 6c7bf7c..0000000 --- a/examples/parser_string_in_string_out.rs +++ /dev/null @@ -1,20 +0,0 @@ -//! Run with: -//! -//! ```not_rust -//! cargo run --example parser_string_in_string_out -//! ``` -//! -//! It prints "spam". -use bencode2json::generators::json::Generator; - -fn main() { - let input = "4:spam".to_string(); - let mut output = String::new(); - - if let Err(e) = Generator::new(input.as_bytes()).write_str(&mut output) { - eprintln!("Error: {e}"); - std::process::exit(1); - } - - println!("{output}"); -} diff --git a/examples/parser_string_in_vec_out.rs b/examples/parser_string_in_vec_out.rs deleted file mode 100644 index 3d89dfd..0000000 --- a/examples/parser_string_in_vec_out.rs +++ /dev/null @@ -1,20 +0,0 @@ -//! Run with: -//! -//! ```not_rust -//! cargo run --example parser_string_in_vec_out -//! ``` -//! -//! It prints "spam". -use bencode2json::generators::json::Generator; - -fn main() { - let input = "4:spam".to_string(); - let mut output = Vec::new(); - - if let Err(e) = Generator::new(input.as_bytes()).write_bytes(&mut output) { - eprintln!("Error: {e}"); - std::process::exit(1); - } - - println!("{}", String::from_utf8_lossy(&output)); -} diff --git a/examples/parser_vec_in_string_out.rs b/examples/parser_vec_in_string_out.rs deleted file mode 100644 index 1388f51..0000000 --- a/examples/parser_vec_in_string_out.rs +++ /dev/null @@ -1,20 +0,0 @@ -//! Run with: -//! -//! ```not_rust -//! cargo run --example parser_vec_in_string_out -//! ``` -//! -//! It prints "spam". -use bencode2json::generators::json::Generator; - -fn main() { - let input = b"4:spam".to_vec(); - let mut output = String::new(); - - if let Err(e) = Generator::new(&input[..]).write_str(&mut output) { - eprintln!("Error: {e}"); - std::process::exit(1); - } - - println!("{output}"); -} diff --git a/examples/parser_vec_in_vec_out.rs b/examples/parser_vec_in_vec_out.rs deleted file mode 100644 index d678bff..0000000 --- a/examples/parser_vec_in_vec_out.rs +++ /dev/null @@ -1,20 +0,0 @@ -//! Run with: -//! -//! ```not_rust -//! cargo run --example parser_vec_in_vec_out -//! ``` -//! -//! It prints "spam". -use bencode2json::generators::json::Generator; - -fn main() { - let input = b"4:spam".to_vec(); - let mut output = Vec::new(); - - if let Err(e) = Generator::new(&input[..]).write_bytes(&mut output) { - eprintln!("Error: {e}"); - std::process::exit(1); - } - - println!("{}", String::from_utf8_lossy(&output)); -} diff --git a/examples/try_bencode_to_json.rs b/examples/try_bencode_to_json.rs deleted file mode 100644 index adf0323..0000000 --- a/examples/try_bencode_to_json.rs +++ /dev/null @@ -1,15 +0,0 @@ -//! Run with: -//! -//! ```not_rust -//! cargo run --example try_bencode_to_json -//! ``` -use bencode2json::try_bencode_to_json; - -fn main() { - let result = try_bencode_to_json(b"d4:spam4:eggse").unwrap(); - - assert_eq!( - result, - r#"{"spam":"eggs"}"# - ); -} diff --git a/output.json b/output.json new file mode 100644 index 0000000..ea381a5 --- /dev/null +++ b/output.json @@ -0,0 +1 @@ +["spam"] diff --git a/project-words.txt b/project-words.txt index a446faa..cf95d51 100644 --- a/project-words.txt +++ b/project-words.txt @@ -1,60 +1,13 @@ -aliced -alicedee alicee -aliceee alicei -alicelee -aliceli -bardee -bardei -baree -bareee -barle -barlee -barlei -barli bencoded -binstall bobe -bobee -Chocobo -codecov -ddei Deque -dlei -edee -eedee -eelee -eeli eggse -elee fdfc fffe fffefdfc -foodee -foodeee -foodeeee -fooe fooi -foold -foolee -fooli -ldedee -ldee -ldei -ldelee -ldeli -lled -lledee llee -llei -llelee llleee -ñandú -println -ringbuffer spame -spamee -tempdir -tempfile -thiserror diff --git a/src/error.rs b/src/error.rs deleted file mode 100644 index 2f581a3..0000000 --- a/src/error.rs +++ /dev/null @@ -1,278 +0,0 @@ -//! Parser errors. -use core::str; -use std::{ - fmt::{self}, - io, -}; - -use thiserror::Error; - -use crate::rw; - -use super::generators::BencodeType; - -/// Errors that can occur while parsing a bencoded value. -#[derive(Debug, Error)] -pub enum Error { - /// I/O error. - #[error("I/O error: {0}")] - Io(#[from] io::Error), - - /// R/W error. - #[error("R/W error: {0}")] - Rw(#[from] rw::error::Error), - - /// Read byte after peeking does match peeked byte. - /// - /// The main parser peeks one byte ahead to know what kind of bencoded value - /// is being parsed. If the byte read after peeking does not match the - /// peeked byte, it means the input is being consumed somewhere else. - #[error("Read byte after peeking does match peeked byte; {0}")] - ReadByteAfterPeekingDoesMatchPeekedByte(ReadContext), - - /// Unrecognized first byte for new bencoded value. - /// - /// The main parser peeks one byte ahead to know what kind of bencoded value - /// is being parsed. This error is raised when the peeked byte is not a - /// valid first byte for a bencoded value. - #[error("Unrecognized first byte for new bencoded value; {0}")] - UnrecognizedFirstBencodeValueByte(ReadContext), - - // Integers - /// Unexpected byte parsing integer. - /// - /// The main parser parses integers by reading bytes until it finds the - /// end of the integer. This error is raised when the byte read is not a - /// valid byte for an integer bencoded value. - #[error("Unexpected byte parsing integer; {0}")] - UnexpectedByteParsingInteger(ReadContext), - - /// Unexpected end of input parsing integer. - /// - /// The input ends before the integer ends. - #[error("Unexpected end of input parsing integer; {0}")] - UnexpectedEndOfInputParsingInteger(ReadContext), - - /// Leading zeros in integers are not allowed, for example b'i00e'. - #[error("Leading zeros in integers are not allowed, for example b'i00e'; {0}")] - LeadingZerosInIntegersNotAllowed(ReadContext), - - // Strings - /// Invalid string length byte, expected a digit. - /// - /// The string parser found an invalid byte for the string length. The - /// length can only be made of digits (0-9). - #[error("Invalid string length byte, expected a digit; {0}")] - InvalidStringLengthByte(ReadContext), - - /// Unexpected end of input parsing string length. - /// - /// The input ends before the string length ends. - #[error("Unexpected end of input parsing string length; {0}")] - UnexpectedEndOfInputParsingStringLength(ReadContext), - - /// Unexpected end of input parsing string value. - /// - /// The input ends before the string value ends. - #[error("Unexpected end of input parsing string value; {0}")] - UnexpectedEndOfInputParsingStringValue(ReadContext), - - // Lists - /// Unexpected end of input parsing list. Expecting first list item or list end. - #[error( - "Unexpected end of input parsing list. Expecting first list item or list end; {0}; {1}" - )] - UnexpectedEndOfInputExpectingFirstListItemOrEnd(ReadContext, WriteContext), - - /// Unexpected end of input parsing list. Expecting next list item. - #[error("Unexpected end of input parsing list. Expecting next list item; {0}; {1}")] - UnexpectedEndOfInputExpectingNextListItem(ReadContext, WriteContext), - - // Dictionaries - /// Unexpected end of input parsing dictionary. Expecting first dictionary field or dictionary end. - #[error("Unexpected end of input parsing dictionary. Expecting first dictionary field or dictionary end; {0}; {1}")] - UnexpectedEndOfInputExpectingFirstDictFieldOrEnd(ReadContext, WriteContext), - - /// Unexpected end of input parsing dictionary. Expecting dictionary field value. - #[error( - "Unexpected end of input parsing dictionary. Expecting dictionary field value; {0}; {1}" - )] - UnexpectedEndOfInputExpectingDictFieldValue(ReadContext, WriteContext), - - /// Unexpected end of input parsing dictionary. Expecting dictionary field key or end. - #[error( - "Unexpected end of input parsing dictionary. Expecting dictionary field key or end; {0}; {1}" - )] - UnexpectedEndOfInputExpectingDictFieldKeyOrEnd(ReadContext, WriteContext), - - /// Unexpected end of dictionary. Premature end of dictionary. - #[error("Unexpected end of dictionary. Premature end of dictionary; {0}; {1}")] - PrematureEndOfDict(ReadContext, WriteContext), - - /// Expected string for dictionary field key. - #[error("Expected string for dictionary field key, but got: {0}, {1}")] - ExpectedStringForDictKeyGot(BencodeType, ReadContext, WriteContext), - - // List and dictionaries - /// Unexpected end of list or dict. No matching start for the list or dict end. - #[error( - "Unexpected end of list or dict. No matching start for the list or dict end: {0}, {1}" - )] - NoMatchingStartForListOrDictEnd(ReadContext, WriteContext), -} - -/// The reader context when the error occurred. -#[derive(Debug)] -pub struct ReadContext { - /// The read byte that caused the error if any. - pub byte: Option, - - /// The position of the read byte that caused the error. - pub pos: u64, - - /// The latest bytes read from input. - pub latest_bytes: Vec, -} - -impl fmt::Display for ReadContext { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "read context:")?; - - match self.byte { - None => {} - Some(byte) => write!(f, " byte `{}` (char: `{}`),", byte, byte as char)?, - } - - write!( - f, - " input pos {}, latest input bytes dump: {:?}", - self.pos, self.latest_bytes - )?; - - if let Ok(utf8_string) = str::from_utf8(&self.latest_bytes) { - write!(f, " (UTF-8 string: `{utf8_string}`)")?; - } - - Ok(()) - } -} - -/// The writer context when the error occurred. -#[derive(Debug)] -pub struct WriteContext { - /// The written byte that caused the error if any. - pub byte: Option, - - /// The position of the written byte that caused the error. - pub pos: u64, - - /// The latest bytes written to the output. - pub latest_bytes: Vec, -} - -impl fmt::Display for WriteContext { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "write context:")?; - - match self.byte { - None => {} - Some(byte) => write!(f, " byte `{}` (char: `{}`),", byte, byte as char)?, - } - - write!( - f, - " output pos {}, latest output bytes dump: {:?}", - self.pos, self.latest_bytes - )?; - - if let Ok(utf8_string) = str::from_utf8(&self.latest_bytes) { - write!(f, " (UTF-8 string: `{utf8_string}`)")?; - } - - Ok(()) - } -} - -#[cfg(test)] -mod tests { - - mod for_read_context { - use crate::error::ReadContext; - - #[test] - fn it_should_display_the_read_context() { - let read_context = ReadContext { - byte: Some(b'a'), - pos: 10, - latest_bytes: vec![b'a', b'b', b'c'], - }; - - assert_eq!( read_context.to_string(),"read context: byte `97` (char: `a`), input pos 10, latest input bytes dump: [97, 98, 99] (UTF-8 string: `abc`)"); - } - - #[test] - fn it_should_not_display_the_byte_if_it_is_none() { - let read_context = ReadContext { - byte: None, - pos: 10, - latest_bytes: vec![b'a', b'b', b'c'], - }; - - assert_eq!(read_context.to_string(), "read context: input pos 10, latest input bytes dump: [97, 98, 99] (UTF-8 string: `abc`)"); - } - - #[test] - fn it_should_not_display_the_latest_bytes_as_string_if_it_is_not_a_valid_string() { - let read_context = ReadContext { - byte: None, - pos: 10, - latest_bytes: vec![b'\xFF', b'\xFE'], - }; - - assert_eq!( - read_context.to_string(), - "read context: input pos 10, latest input bytes dump: [255, 254]" - ); - } - } - - mod for_write_context { - use crate::error::WriteContext; - - #[test] - fn it_should_display_the_read_context() { - let read_context = WriteContext { - byte: Some(b'a'), - pos: 10, - latest_bytes: vec![b'a', b'b', b'c'], - }; - - assert_eq!( read_context.to_string(),"write context: byte `97` (char: `a`), output pos 10, latest output bytes dump: [97, 98, 99] (UTF-8 string: `abc`)"); - } - - #[test] - fn it_should_not_display_the_byte_if_it_is_none() { - let read_context = WriteContext { - byte: None, - pos: 10, - latest_bytes: vec![b'a', b'b', b'c'], - }; - - assert_eq!(read_context.to_string(), "write context: output pos 10, latest output bytes dump: [97, 98, 99] (UTF-8 string: `abc`)"); - } - - #[test] - fn it_should_not_display_the_latest_bytes_as_string_if_it_is_not_a_valid_string() { - let read_context = WriteContext { - byte: None, - pos: 10, - latest_bytes: vec![b'\xFF', b'\xFE'], - }; - - assert_eq!( - read_context.to_string(), - "write context: output pos 10, latest output bytes dump: [255, 254]" - ); - } - } -} diff --git a/src/generators/json.rs b/src/generators/json.rs deleted file mode 100644 index ccfa6af..0000000 --- a/src/generators/json.rs +++ /dev/null @@ -1,2077 +0,0 @@ -//! Json generator for bencoded data. -use core::str; -use std::{ - fmt::Write as FmtWrite, - io::{Read, Write as IoWrite}, -}; - -use super::{ - stack::{Stack, State}, - BencodeType, -}; -use tokenizer::{BencodeToken, Tokenizer}; - -use crate::{ - error::{self, ReadContext, WriteContext}, - rw::{byte_writer::ByteWriter, string_writer::StringWriter, writer::Writer}, - tokenizer, -}; - -pub struct Generator { - tokenizer: Tokenizer, - num_processed_tokens: u64, - stack: Stack, -} - -impl Generator { - const JSON_ARRAY_BEGIN: u8 = b'['; - const JSON_ARRAY_ITEMS_SEPARATOR: u8 = b','; - const JSON_ARRAY_END: u8 = b']'; - - const JSON_OBJ_BEGIN: u8 = b'{'; - const JSON_OBJ_FIELDS_SEPARATOR: u8 = b','; - const JSON_OBJ_FIELD_KEY_VALUE_SEPARATOR: u8 = b':'; - const JSON_OBJ_END: u8 = b'}'; - - pub fn new(reader: R) -> Self { - Generator { - tokenizer: Tokenizer::new(reader), - num_processed_tokens: 1, - stack: Stack::default(), - } - } - - /// It parses a bencoded value read from input and writes the corresponding - /// JSON UTF-8 string value to the output. - /// - /// # Errors - /// - /// Will return an error if it can't read from the input or write to the - /// output. - /// - /// # Panics - /// - /// Will panic if receives a byte that isn't a valid begin or end of a - /// bencoded type: integer, string, list or dictionary. - pub fn write_str(&mut self, writer: W) -> Result<(), error::Error> { - let mut writer = StringWriter::new(writer); - self.parse(&mut writer) - } - - /// It parses a bencoded value read from input and writes the corresponding - /// JSON UTF-8 string value as bytes to the output. - /// - /// # Errors - /// - /// Will return an error if it can't read from the input or write to the - /// output. - /// - /// # Panics - /// - /// Will panic if receives a byte that isn't a valid begin or end of a - /// bencoded type: integer, string, list or dictionary. - pub fn write_bytes(&mut self, writer: W) -> Result<(), error::Error> { - let mut writer = ByteWriter::new(writer); - self.parse(&mut writer) - } - - /// It parses a bencoded value read from input and writes the corresponding - /// JSON value to the output. - /// - /// # Errors - /// - /// Will return an error if: - /// - /// - It can't read from the input or write to the output. - /// - The input is invalid Bencode. - fn parse(&mut self, writer: &mut W) -> Result<(), error::Error> { - while let Some(token) = self.tokenizer.next_token()? { - match token { - BencodeToken::Integer(integer_bytes) => { - self.begin_bencoded_value(BencodeType::Integer, writer)?; - // todo: add `write_bytes` to writer. - for bytes in integer_bytes { - writer.write_byte(bytes)?; - } - } - BencodeToken::String(string_bytes) => { - self.begin_bencoded_value(BencodeType::String, writer)?; - - let html_tag_style_string = match str::from_utf8(&string_bytes) { - Ok(string) => { - // String only contains valid UTF-8 chars -> print it as it's - &format!("{}", string.to_owned()) - } - Err(_) => { - // String contains non valid UTF-8 chars -> print it as hex bytes - &format!("{}", hex::encode(string_bytes)) - } - }; - - writer.write_str( - &serde_json::to_string(&html_tag_style_string) - .expect("Failed to serialize to JSON. This should not happen because non UTF-8 bencoded string are serialized as hex bytes"), - )?; - } - BencodeToken::BeginList => { - self.begin_bencoded_value(BencodeType::List, writer)?; - writer.write_byte(Self::JSON_ARRAY_BEGIN)?; - self.stack.push(State::ExpectingFirstListItemOrEnd); - } - BencodeToken::BeginDict => { - self.begin_bencoded_value(BencodeType::Dict, writer)?; - writer.write_byte(Self::JSON_OBJ_BEGIN)?; - self.stack.push(State::ExpectingFirstDictFieldOrEnd); - } - BencodeToken::EndListOrDict => { - self.end_list_or_dict(writer)?; - } - BencodeToken::LineBreak => { - // Ignore line breaks at the beginning, the end, or between values - } - } - - self.num_processed_tokens += 1; - } - - self.check_bad_end_stack_state(writer) - } - - /// It updates the stack state and prints the delimiters when needed. - /// - /// Called when the first byt of a bencoded value (integer, string, list or dict) - /// is received. - /// - /// # Errors - /// - /// Will return an error if the writer can't write to the output. - pub fn begin_bencoded_value( - &mut self, - bencode_type: BencodeType, - writer: &mut W, - ) -> Result<(), error::Error> { - match self.stack.peek() { - State::Initial => {} - State::ExpectingFirstListItemOrEnd => { - self.stack.swap_top(State::ExpectingNextListItem); - } - State::ExpectingNextListItem => { - writer.write_byte(Self::JSON_ARRAY_ITEMS_SEPARATOR)?; - } - State::ExpectingFirstDictFieldOrEnd => { - if bencode_type != BencodeType::String { - return Err(error::Error::ExpectedStringForDictKeyGot( - bencode_type, - ReadContext { - byte: None, - pos: self.tokenizer.input_byte_counter(), - latest_bytes: self.tokenizer.captured_bytes(), - }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); - } - - self.stack.swap_top(State::ExpectingDictFieldValue); - } - State::ExpectingDictFieldValue => { - writer.write_byte(Self::JSON_OBJ_FIELD_KEY_VALUE_SEPARATOR)?; - - self.stack.swap_top(State::ExpectingDictFieldKeyOrEnd); - } - State::ExpectingDictFieldKeyOrEnd => { - if bencode_type != BencodeType::String { - return Err(error::Error::ExpectedStringForDictKeyGot( - bencode_type, - ReadContext { - byte: None, - pos: self.tokenizer.input_byte_counter(), - latest_bytes: self.tokenizer.captured_bytes(), - }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )); - } - - writer.write_byte(Self::JSON_OBJ_FIELDS_SEPARATOR)?; - - self.stack.swap_top(State::ExpectingDictFieldValue); - } - } - - Ok(()) - } - - /// It updates the stack state and prints the delimiters when needed. - /// - /// Called when the end of list or dictionary byte is received. End of - /// integers or strings are processed while parsing them. - /// - /// # Errors - /// - /// Will return an error if the writer can't write to the output. - pub fn end_list_or_dict(&mut self, writer: &mut W) -> Result<(), error::Error> { - match self.stack.peek() { - State::ExpectingFirstListItemOrEnd | State::ExpectingNextListItem => { - writer.write_byte(Self::JSON_ARRAY_END)?; - self.stack.pop(); - } - State::ExpectingFirstDictFieldOrEnd | State::ExpectingDictFieldKeyOrEnd => { - writer.write_byte(Self::JSON_OBJ_END)?; - self.stack.pop(); - } - State::ExpectingDictFieldValue => { - return Err(error::Error::PrematureEndOfDict( - ReadContext { - byte: None, - pos: self.tokenizer.input_byte_counter(), - latest_bytes: self.tokenizer.captured_bytes(), - }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )) - } - State::Initial => { - return Err(error::Error::NoMatchingStartForListOrDictEnd( - ReadContext { - byte: None, - pos: self.tokenizer.input_byte_counter(), - latest_bytes: self.tokenizer.captured_bytes(), - }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )) - } - } - - Ok(()) - } - - /// It checks if the stack state is correct at the end of the parsing. - /// - /// That could happen, for example, when bencode values are not finished. - /// - /// # Errors - /// - /// Will return an error if the stack state is not correct. - fn check_bad_end_stack_state(&self, writer: &W) -> Result<(), error::Error> { - match self.stack.peek() { - State::Initial => Ok(()), - State::ExpectingFirstListItemOrEnd => Err( - error::Error::UnexpectedEndOfInputExpectingFirstListItemOrEnd( - ReadContext { - byte: None, - pos: self.tokenizer.input_byte_counter(), - latest_bytes: self.tokenizer.captured_bytes(), - }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - ), - ), - State::ExpectingNextListItem => { - Err(error::Error::UnexpectedEndOfInputExpectingNextListItem( - ReadContext { - byte: None, - pos: self.tokenizer.input_byte_counter(), - latest_bytes: self.tokenizer.captured_bytes(), - }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )) - } - State::ExpectingFirstDictFieldOrEnd => Err( - error::Error::UnexpectedEndOfInputExpectingFirstDictFieldOrEnd( - ReadContext { - byte: None, - pos: self.tokenizer.input_byte_counter(), - latest_bytes: self.tokenizer.captured_bytes(), - }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - ), - ), - State::ExpectingDictFieldValue => { - Err(error::Error::UnexpectedEndOfInputExpectingDictFieldValue( - ReadContext { - byte: None, - pos: self.tokenizer.input_byte_counter(), - latest_bytes: self.tokenizer.captured_bytes(), - }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - )) - } - State::ExpectingDictFieldKeyOrEnd => Err( - error::Error::UnexpectedEndOfInputExpectingDictFieldKeyOrEnd( - ReadContext { - byte: None, - pos: self.tokenizer.input_byte_counter(), - latest_bytes: self.tokenizer.captured_bytes(), - }, - WriteContext { - byte: None, - pos: writer.output_byte_counter(), - latest_bytes: writer.captured_bytes(), - }, - ), - ), - } - } -} - -#[cfg(test)] -mod tests { - - use std::io::{self, Read}; - - use crate::generators::json::Generator; - - mod it_should_allow_writing { - use crate::generators::json::Generator; - - #[test] - fn to_any_type_implementing_io_write_trait() { - let mut output = Vec::new(); - - let mut parser = Generator::new(&b"i0e"[..]); - - parser - .write_bytes(&mut output) - .expect("Bencode to JSON conversion failed"); - - assert_eq!(output, vec!(b'0')); - } - - #[test] - fn writing_to_any_type_implementing_fmt_write_trait() { - let mut output = String::new(); - - let mut parser = Generator::new(&b"i0e"[..]); - - parser - .write_str(&mut output) - .expect("Bencode to JSON conversion failed"); - - assert_eq!(output, "0".to_string()); - } - } - - #[test] - fn it_should_allow_reading_from_an_empty_input() { - struct EmptyReader; - - impl Read for EmptyReader { - fn read(&mut self, _buf: &mut [u8]) -> io::Result { - Err(io::Error::new( - io::ErrorKind::UnexpectedEof, - "Unexpected EOF", - )) - } - } - - let mut output = String::new(); - - let mut parser = Generator::new(EmptyReader); - - parser.write_str(&mut output).unwrap(); - - assert_eq!(output, ""); - } - - mod it_should_allow_special_bencode_cases { - - use crate::{generators::json::Generator, test::bencode_to_json_unchecked}; - - #[test] - fn an_empty_input() { - let mut output = String::new(); - - let mut parser = Generator::new(&b""[..]); - - parser - .write_str(&mut output) - .expect("Bencode to JSON conversion failed"); - - assert_eq!(output, String::new()); - } - - #[test] - fn line_breaks_at_the_beginning_of_the_input_stream() { - assert_eq!(bencode_to_json_unchecked(b"\ni0e"), "0".to_string()); - } - - #[test] - fn line_breaks_at_the_end_of_the_input_stream() { - assert_eq!(bencode_to_json_unchecked(b"i0e\n"), "0".to_string()); - } - - #[test] - fn line_breaks_between_bencoded_values() { - assert_eq!( - bencode_to_json_unchecked(b"li0e\ni1ee"), - "[0,1]".to_string() - ); - } - } - - mod it_should_fail { - use std::io::{self, Read}; - - use crate::{error::Error, generators::json::Generator, try_bencode_to_json}; - - #[test] - fn when_there_is_a_problem_reading_from_input() { - struct FaultyReader; - - impl Read for FaultyReader { - fn read(&mut self, _buf: &mut [u8]) -> io::Result { - Err(io::Error::new( - io::ErrorKind::PermissionDenied, - "Permission denied", - )) - } - } - - let mut output = String::new(); - - let mut parser = Generator::new(FaultyReader); - - let result = parser.write_str(&mut output); - - assert!(matches!(result, Err(Error::Io(_)))); - } - - #[test] - fn when_it_cannot_recognized_the_fist_byte_of_a_new_bencoded_value() { - let invalid_bencoded_value = b"a"; - - let result = try_bencode_to_json(invalid_bencoded_value); - - assert!(matches!( - result, - Err(Error::UnrecognizedFirstBencodeValueByte { .. }) - )); - } - - #[test] - fn when_it_reaches_the_end_of_the_input_without_finishing_parsing_a_valid_bencoded_value() { - let integer_with_missing_end_byte = b"i42"; - - let result = try_bencode_to_json(integer_with_missing_end_byte); - - assert!(matches!( - result, - Err(Error::UnexpectedEndOfInputParsingInteger { .. }) - )); - } - } - - mod integers { - use crate::test::bencode_to_json_unchecked; - - #[test] - fn zero() { - assert_eq!(bencode_to_json_unchecked(b"i0e"), "0".to_string()); - } - - #[test] - fn one_digit_integer() { - assert_eq!(bencode_to_json_unchecked(b"i1e"), "1".to_string()); - } - - #[test] - fn two_digits_integer() { - assert_eq!(bencode_to_json_unchecked(b"i42e"), "42".to_string()); - } - - #[test] - fn negative_integer() { - assert_eq!(bencode_to_json_unchecked(b"i-1e"), "-1".to_string()); - } - - #[test] - fn positive_integer_greater_than_i64_max() { - let big_positive_integer = i64::MAX.to_string() + "1"; - - let bencoded_big_positive_integer = format!("i{big_positive_integer}e"); - - assert_eq!( - bencode_to_json_unchecked(bencoded_big_positive_integer.as_bytes()), - big_positive_integer - ); - } - - #[test] - fn negative_integer_smaller_than_i64_min() { - let big_negative_integer = i64::MIN.to_string() + "1"; - - let bencoded_big_negative_integer = format!("i{big_negative_integer}e"); - - assert_eq!( - bencode_to_json_unchecked(bencoded_big_negative_integer.as_bytes()), - big_negative_integer - ); - } - - mod should_fail { - use crate::{error::Error, try_bencode_to_json}; - - #[test] - fn when_it_finds_an_invalid_byte() { - let int_with_invalid_byte = b"iae"; - - let result = try_bencode_to_json(int_with_invalid_byte); - - assert!(matches!( - result, - Err(Error::UnexpectedByteParsingInteger { .. }) - )); - } - - #[test] - fn with_duplicate_sign() { - let int_with_invalid_byte = b"i--42e"; - - let result = try_bencode_to_json(int_with_invalid_byte); - - assert!(matches!( - result, - Err(Error::UnexpectedByteParsingInteger { .. }) - )); - } - } - } - - mod strings { - use crate::{ - test::{bencode_to_json_unchecked, bencoded_string_with_repeated_byte}, - to_bencode, - }; - - #[test] - fn length_can_contain_leading_zeros() { - assert_eq!( - bencode_to_json_unchecked(b"00:"), - r#""""#.to_string() - ); - } - - #[test] - fn empty_string() { - assert_eq!( - bencode_to_json_unchecked(b"0:"), - r#""""#.to_string() - ); - } - - #[test] - fn utf8() { - assert_eq!( - bencode_to_json_unchecked(b"4:spam"), - r#""spam""#.to_string() - ); - } - - #[test] - fn non_utf8() { - assert_eq!( - bencode_to_json_unchecked(b"4:\xFF\xFE\xFD\xFC"), - r#""fffefdfc""#.to_string() - ); - } - - #[test] - fn big_utf8_string() { - let big_string = "a".repeat(1_000_000); - - assert_eq!( - bencode_to_json_unchecked(&to_bencode(&big_string)), - format!(r#""{big_string}""#) - ); - } - - #[test] - fn big_non_utf8_string() { - let big_non_utf8_string = bencoded_string_with_repeated_byte(b'\xFF', 1_000_000); - - let expected = format!(r#""{}""#, "ff".repeat(1_000_000)); - - assert_eq!(bencode_to_json_unchecked(&big_non_utf8_string), expected); - } - - #[test] - fn ending_with_bencode_end_char() { - assert_eq!( - bencode_to_json_unchecked(b"1:e"), - r#""e""#.to_string() - ); - } - - #[test] - fn containing_a_reserved_char() { - assert_eq!( - bencode_to_json_unchecked(b"1:i"), - r#""i""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:l"), - r#""l""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:d"), - r#""d""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:l"), - r#""l""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:e"), - r#""e""#.to_string() - ); - } - - #[test] - fn containing_a_digit() { - assert_eq!( - bencode_to_json_unchecked(b"1:0"), - r#""0""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:1"), - r#""1""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:2"), - r#""2""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:3"), - r#""3""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:4"), - r#""4""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:5"), - r#""5""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:6"), - r#""6""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:7"), - r#""7""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:8"), - r#""8""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked(b"1:9"), - r#""9""#.to_string() - ); - } - - mod should_escape_json { - use crate::{test::bencode_to_json_unchecked, to_bencode}; - - #[test] - fn containing_a_double_quote() { - assert_eq!( - bencode_to_json_unchecked("1:\"".as_bytes()), - r#""\"""#.to_string() - ); - } - - #[test] - fn containing_backslashes() { - assert_eq!( - bencode_to_json_unchecked("1:\\".as_bytes()), - r#""\\""#.to_string() - ); - } - - #[test] - fn containing_control_characters() { - assert_eq!( - bencode_to_json_unchecked("1:\n".as_bytes()), - r#""\n""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked("1:\r".as_bytes()), - r#""\r""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked("1:\t".as_bytes()), - r#""\t""#.to_string() - ); - } - - #[test] - fn containing_unicode_characters() { - assert_eq!( - bencode_to_json_unchecked(&to_bencode("ñandú")), - r#""ñandú""#.to_string() - ); - } - } - - mod it_should_fail_parsing_when { - use crate::{error::Error, try_bencode_to_json}; - - #[test] - fn it_reaches_the_end_of_the_input_parsing_the_string_length() { - let incomplete_string_length = b"4"; - - let result = try_bencode_to_json(incomplete_string_length); - - assert!(matches!( - result, - Err(Error::UnexpectedEndOfInputParsingStringLength { .. }) - )); - } - - #[test] - fn it_reaches_the_end_of_the_input_parsing_the_string_value() { - let incomplete_string_value = b"4:123"; - - let result = try_bencode_to_json(incomplete_string_value); - - assert!(matches!( - result, - Err(Error::UnexpectedEndOfInputParsingStringValue { .. }) - )); - } - - #[test] - fn it_receives_a_non_digit_byte_in_the_string_length() { - let incomplete_string_value = b"4a:1234"; - - let result = try_bencode_to_json(incomplete_string_value); - - assert!(matches!(result, Err(Error::InvalidStringLengthByte { .. }))); - } - } - } - - mod lists { - use crate::test::{ - bencode_to_json_unchecked, generate_n_nested_empty_bencoded_lists, - generate_n_nested_empty_json_arrays, - }; - - #[test] - fn empty_list() { - assert_eq!(bencode_to_json_unchecked(b"le"), "[]".to_string()); - } - - #[test] - fn one_nested_empty_list() { - assert_eq!(bencode_to_json_unchecked(b"llee"), "[[]]".to_string()); - } - - #[test] - fn two_nested_empty_list() { - assert_eq!(bencode_to_json_unchecked(b"llleee"), "[[[]]]".to_string()); - } - - #[test] - fn many_nested_empty_list() { - assert_eq!( - bencode_to_json_unchecked(&generate_n_nested_empty_bencoded_lists(100)), - generate_n_nested_empty_json_arrays(100) - ); - } - - mod with_one_item { - use crate::test::bencode_to_json_unchecked; - - #[test] - fn integer() { - assert_eq!(bencode_to_json_unchecked(b"li42ee"), "[42]".to_string()); - } - - #[test] - fn utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"l4:spame"), - r#"["spam"]"#.to_string() - ); - } - - #[test] - fn non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"l4:\xFF\xFE\xFD\xFCe"), - r#"["fffefdfc"]"#.to_string() - ); - } - - mod of_type_list { - use crate::test::bencode_to_json_unchecked; - - #[test] - fn two_nested_empty_list() { - assert_eq!(bencode_to_json_unchecked(b"llee"), "[[]]".to_string()); - } - - #[test] - fn three_nested_empty_lists() { - assert_eq!(bencode_to_json_unchecked(b"llleee"), "[[[]]]".to_string()); - } - - #[test] - fn one_nested_list_which_contains_one_integer() { - assert_eq!(bencode_to_json_unchecked(b"lli42eee"), "[[42]]".to_string()); - } - - #[test] - fn one_nested_list_which_contains_two_integers() { - assert_eq!( - bencode_to_json_unchecked(b"lli42ei43eee"), - "[[42,43]]".to_string() - ); - } - - #[test] - fn one_nested_list_which_contains_one_utf_8_string() { - assert_eq!( - bencode_to_json_unchecked(b"ll4:spamee"), - r#"[["spam"]]"#.to_string() - ); - } - - #[test] - fn one_nested_list_which_contains_two_utf_8_strings() { - assert_eq!( - bencode_to_json_unchecked(b"ll5:alice3:bobee"), - r#"[["alice","bob"]]"#.to_string() - ); - } - - #[test] - fn one_nested_list_which_contains_one_non_utf_8_string() { - assert_eq!( - bencode_to_json_unchecked(b"ll4:\xFF\xFE\xFD\xFCee"), - r#"[["fffefdfc"]]"#.to_string() - ); - } - - #[test] - fn one_nested_list_which_contains_two_non_utf_8_string() { - assert_eq!( - bencode_to_json_unchecked(b"ll2:\xFF\xFE2:\xFD\xFCee"), - r#"[["fffe","fdfc"]]"#.to_string() - ); - } - } - - mod of_type_dict { - use crate::test::bencode_to_json_unchecked; - - #[test] - fn empty() { - assert_eq!(bencode_to_json_unchecked(b"ldee"), "[{}]".to_string()); - } - - #[test] - fn with_one_field() { - assert_eq!( - bencode_to_json_unchecked(b"ld3:foo3:baree"), - r#"[{"foo":"bar"}]"#.to_string() - ); - } - - #[test] - fn with_two_fields() { - assert_eq!( - bencode_to_json_unchecked(b"ld3:bar4:spam3:fooi42eee"), - r#"[{"bar":"spam","foo":42}]"# - .to_string() - ); - } - - #[test] - fn with_nested_empty_dict() { - assert_eq!( - bencode_to_json_unchecked(b"ld3:foodeee"), - r#"[{"foo":{}}]"#.to_string() - ); - } - - #[test] - fn with_two_nested_empty_dicts() { - assert_eq!( - bencode_to_json_unchecked(b"ld3:food3:foodeeee"), - r#"[{"foo":{"foo":{}}}]"#.to_string() - ); - } - - #[test] - fn with_nested_dict_with_one_field() { - assert_eq!( - bencode_to_json_unchecked(b"ld3:food3:foo3:bareee"), - r#"[{"foo":{"foo":"bar"}}]"#.to_string() - ); - } - - #[test] - fn with_nested_dict_with_two_fields() { - assert_eq!( - bencode_to_json_unchecked(b"ld3:food3:foo3:bar3:fooi42eeee"), - r#"[{"foo":{"foo":"bar","foo":42}}]"#.to_string() - ); - } - } - } - - mod with_two_items_of_the_same_type { - use crate::test::bencode_to_json_unchecked; - - #[test] - fn two_integers() { - assert_eq!( - bencode_to_json_unchecked(b"li42ei43ee"), - "[42,43]".to_string() - ); - } - - #[test] - fn two_utf8_strings() { - assert_eq!( - bencode_to_json_unchecked(b"l5:alice3:bobe"), - r#"["alice","bob"]"#.to_string() - ); - } - - #[test] - fn two_non_utf8_strings() { - assert_eq!( - bencode_to_json_unchecked(b"l2:\xFF\xFE2:\xFD\xFCe"), - r#"["fffe","fdfc"]"#.to_string() - ); - } - - #[test] - fn two_empty_lists() { - assert_eq!(bencode_to_json_unchecked(b"llelee"), r"[[],[]]".to_string()); - } - - #[test] - fn two_empty_dicts() { - assert_eq!(bencode_to_json_unchecked(b"ldedee"), r"[{},{}]".to_string()); - } - - #[test] - fn two_lists_with_one_item() { - assert_eq!( - bencode_to_json_unchecked(b"lli42eeli42eee"), - r"[[42],[42]]".to_string() - ); - } - - #[test] - fn two_dicts_with_one_item() { - assert_eq!( - bencode_to_json_unchecked(b"ld3:fooi42eed3:fooi42eee"), - r#"[{"foo":42},{"foo":42}]"#.to_string() - ); - } - } - - mod with_two_items_of_different_types { - use crate::test::bencode_to_json_unchecked; - - #[test] - fn integer_and_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"li42e5:alicee"), - r#"[42,"alice"]"#.to_string() - ); - } - - #[test] - fn integer_and_non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"li42e2:\xFF\xFEe"), - r#"[42,"fffe"]"#.to_string() - ); - } - - #[test] - fn integer_and_empty_list() { - assert_eq!( - bencode_to_json_unchecked(b"li42elee"), - r"[42,[]]".to_string() - ); - } - - #[test] - fn integer_and_list() { - assert_eq!( - bencode_to_json_unchecked(b"li42eli43eee"), - r"[42,[43]]".to_string() - ); - } - - #[test] - fn integer_and_empty_dict() { - assert_eq!( - bencode_to_json_unchecked(b"li42edee"), - r"[42,{}]".to_string() - ); - } - - #[test] - fn integer_and_dict() { - assert_eq!( - bencode_to_json_unchecked(b"li42ed3:fooi42eee"), - r#"[42,{"foo":42}]"#.to_string() - ); - } - - #[test] - fn utf8_string_and_integer() { - assert_eq!( - bencode_to_json_unchecked(b"l5:alicei42ee"), - r#"["alice",42]"#.to_string() - ); - } - - #[test] - fn utf8_string_and_non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"l5:alice1:\xFFe"), - r#"["alice","ff"]"#.to_string() - ); - } - - #[test] - fn utf8_string_and_empty_list() { - assert_eq!( - bencode_to_json_unchecked(b"l5:alicelee"), - r#"["alice",[]]"#.to_string() - ); - } - - #[test] - fn utf8_string_and_list() { - assert_eq!( - bencode_to_json_unchecked(b"l5:aliceli42eee"), - r#"["alice",[42]]"#.to_string() - ); - } - - #[test] - fn utf8_string_and_empty_dict() { - assert_eq!( - bencode_to_json_unchecked(b"l5:alicedee"), - r#"["alice",{}]"#.to_string() - ); - } - - #[test] - fn utf8_string_and_dict() { - assert_eq!( - bencode_to_json_unchecked(b"l5:aliced3:fooi42eee"), - r#"["alice",{"foo":42}]"#.to_string() - ); - } - - #[test] - fn non_utf8_string_and_integer() { - assert_eq!( - bencode_to_json_unchecked(b"l1:\xFFi42ee"), - r#"["ff",42]"#.to_string() - ); - } - - #[test] - fn non_utf8_string_and_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"l1:\xFF3:fooe"), - r#"["ff","foo"]"#.to_string() - ); - } - - #[test] - fn non_utf8_string_and_empty_list() { - assert_eq!( - bencode_to_json_unchecked(b"l1:\xFFlee"), - r#"["ff",[]]"#.to_string() - ); - } - - #[test] - fn non_utf8_string_and_list() { - assert_eq!( - bencode_to_json_unchecked(b"l1:\xFFli42eee"), - r#"["ff",[42]]"#.to_string() - ); - } - - #[test] - fn non_utf8_string_and_empty_dict() { - assert_eq!( - bencode_to_json_unchecked(b"l1:\xFFdee"), - r#"["ff",{}]"#.to_string() - ); - } - - #[test] - fn non_utf8_string_and_dict() { - assert_eq!( - bencode_to_json_unchecked(b"l1:\xFFd3:fooi42eee"), - r#"["ff",{"foo":42}]"#.to_string() - ); - } - - #[test] - fn empty_list_and_integer() { - assert_eq!( - bencode_to_json_unchecked(b"llei42ee"), - r"[[],42]".to_string() - ); - } - - #[test] - fn empty_list_and_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"lle3:fooe"), - r#"[[],"foo"]"#.to_string() - ); - } - - #[test] - fn empty_list_and_non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"lle1:\xFFe"), - r#"[[],"ff"]"#.to_string() - ); - } - - #[test] - fn empty_list_and_empty_dict() { - assert_eq!(bencode_to_json_unchecked(b"lledee"), r"[[],{}]".to_string()); - } - - #[test] - fn empty_list_and_dict() { - assert_eq!( - bencode_to_json_unchecked(b"lled3:fooi42eee"), - r#"[[],{"foo":42}]"#.to_string() - ); - } - - #[test] - fn list_and_integer() { - assert_eq!( - bencode_to_json_unchecked(b"lli42eei43ee"), - r"[[42],43]".to_string() - ); - } - - #[test] - fn list_and_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"lli42ee3:fooe"), - r#"[[42],"foo"]"#.to_string() - ); - } - - #[test] - fn list_and_non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"lli42ee1:\xFFe"), - r#"[[42],"ff"]"#.to_string() - ); - } - - #[test] - fn list_and_empty_dict() { - assert_eq!( - bencode_to_json_unchecked(b"lli42eedee"), - r"[[42],{}]".to_string() - ); - } - - #[test] - fn list_and_dict() { - assert_eq!( - bencode_to_json_unchecked(b"lli42eed3:fooi43eee"), - r#"[[42],{"foo":43}]"#.to_string() - ); - } - - #[test] - fn empty_dict_and_integer() { - assert_eq!( - bencode_to_json_unchecked(b"ldei42ee"), - r"[{},42]".to_string() - ); - } - - #[test] - fn empty_dict_and_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"lde3:fooe"), - r#"[{},"foo"]"#.to_string() - ); - } - - #[test] - fn empty_dict_and_non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"lde1:\xFFe"), - r#"[{},"ff"]"#.to_string() - ); - } - - #[test] - fn empty_dict_and_empty_list() { - assert_eq!(bencode_to_json_unchecked(b"ldelee"), r"[{},[]]".to_string()); - } - - #[test] - fn empty_dict_and_list() { - assert_eq!( - bencode_to_json_unchecked(b"ldeli42eee"), - r"[{},[42]]".to_string() - ); - } - - #[test] - fn dict_and_integer() { - assert_eq!( - bencode_to_json_unchecked(b"ld3:fooi42eei43ee"), - r#"[{"foo":42},43]"#.to_string() - ); - } - - #[test] - fn dict_and_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"ld3:fooi42ee3:fooe"), - r#"[{"foo":42},"foo"]"#.to_string() - ); - } - - #[test] - fn dict_and_non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"ld3:fooi42ee1:\xFFe"), - r#"[{"foo":42},"ff"]"#.to_string() - ); - } - - #[test] - fn dict_and_empty_list() { - assert_eq!( - bencode_to_json_unchecked(b"ld3:fooi42eelee"), - r#"[{"foo":42},[]]"#.to_string() - ); - } - - #[test] - fn dict_and_list() { - assert_eq!( - bencode_to_json_unchecked(b"ld3:fooi42eeli43eee"), - r#"[{"foo":42},[43]]"#.to_string() - ); - } - - #[test] - fn non_utf8_string_and_an_integer() { - assert_eq!( - bencode_to_json_unchecked(b"l2:\xFF\xFEi42ee"), - r#"["fffe",42]"#.to_string() - ); - } - } - - mod should_fail { - use crate::{error::Error, try_bencode_to_json}; - - #[test] - fn when_an_empty_list_does_not_have_the_matching_close_byte() { - let list_without_closing_list_byte = b"l"; - - let result = try_bencode_to_json(list_without_closing_list_byte); - - assert!(matches!( - result, - Err(Error::UnexpectedEndOfInputExpectingFirstListItemOrEnd { .. }) - )); - } - - #[test] - fn when_a_list_does_not_have_the_matching_close_byte() { - let list_without_closing_list_byte = b"li42e"; - - let result = try_bencode_to_json(list_without_closing_list_byte); - - assert!(matches!( - result, - Err(Error::UnexpectedEndOfInputExpectingNextListItem { .. }) - )); - } - - #[test] - fn when_it_receives_an_end_list_byte_without_the_matching_open_byte() { - let end_list_byte_without_start = b"e"; - - let result = try_bencode_to_json(end_list_byte_without_start); - - assert!(matches!( - result, - Err(Error::NoMatchingStartForListOrDictEnd { .. }) - )); - } - } - } - - mod dictionary { - use crate::test::{ - bencode_to_json_unchecked, generate_n_nested_empty_bencoded_dictionaries, - generate_n_nested_empty_json_objects, - }; - - #[test] - fn empty_dictionary() { - assert_eq!(bencode_to_json_unchecked(b"de"), "{}".to_string()); - } - - #[test] - fn one_nested_empty_dictionary() { - assert_eq!( - bencode_to_json_unchecked(b"d3:foodee"), - r#"{"foo":{}}"#.to_string() - ); - } - - #[test] - fn two_nested_empty_dictionaries() { - assert_eq!( - bencode_to_json_unchecked(b"d3:food3:foodeee"), - r#"{"foo":{"foo":{}}}"#.to_string() - ); - } - - #[test] - fn many_nested_empty_dictionaries() { - assert_eq!( - bencode_to_json_unchecked(&generate_n_nested_empty_bencoded_dictionaries(100)), - generate_n_nested_empty_json_objects(100) - ); - } - - mod with_a_key { - use crate::test::bencode_to_json_unchecked; - - #[test] - fn starting_with_a_digit() { - assert_eq!( - bencode_to_json_unchecked(b"d4:1fooi42ee"), - r#"{"1foo":42}"#.to_string() - ); - } - - #[test] - fn which_is_not_a_utf_8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d2:\xFF\xFEi42ee"), - r#"{"fffe":42}"#.to_string() - ); - } - } - - mod with_one_field { - use crate::test::bencode_to_json_unchecked; - - #[test] - fn integer() { - assert_eq!( - bencode_to_json_unchecked(b"d3:fooi42ee"), - r#"{"foo":42}"#.to_string() - ); - } - - #[test] - fn utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar4:spame"), - r#"{"bar":"spam"}"#.to_string() - ); - } - - #[test] - fn non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar2:\xFF\xFEe"), - r#"{"bar":"fffe"}"#.to_string() - ); - } - - #[test] - fn empty_list() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barlee"), - r#"{"bar":[]}"#.to_string() - ); - } - - #[test] - fn empty_dict() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bardee"), - r#"{"bar":{}}"#.to_string() - ); - } - } - - mod with_two_fields_of_the_same_type { - use crate::test::bencode_to_json_unchecked; - - #[test] - fn two_integers() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bari42e3:fooi43ee"), - r#"{"bar":42,"foo":43}"#.to_string() - ); - } - - #[test] - fn two_empty_utf8_strings() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar0:3:foo0:e"), - r#"{"bar":"","foo":""}"#.to_string() - ); - } - - #[test] - fn two_utf8_strings() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar4:spam3:foo5:alicee"), - r#"{"bar":"spam","foo":"alice"}"#.to_string() - ); - } - - #[test] - fn two_non_utf8_strings() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar1:\xFF3:foo1:\xFEe"), - r#"{"bar":"ff","foo":"fe"}"#.to_string() - ); - } - - #[test] - fn two_empty_lists() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barle3:foolee"), - r#"{"bar":[],"foo":[]}"#.to_string() - ); - } - - #[test] - fn two_empty_dicts() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barde3:foodee"), - r#"{"bar":{},"foo":{}}"#.to_string() - ); - } - - #[test] - fn two_lists() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barli42ee3:fooli43eee"), - r#"{"bar":[42],"foo":[43]}"#.to_string() - ); - } - - #[test] - fn two_dicts() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bard3:bardee3:food3:foodeee"), - r#"{"bar":{"bar":{}},"foo":{"foo":{}}}"# - .to_string() - ); - } - } - - mod with_two_fields_of_different_type { - use crate::test::bencode_to_json_unchecked; - - #[test] - fn integer_and_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bari42e3:foo5:alicee"), - r#"{"bar":42,"foo":"alice"}"#.to_string() - ); - } - - #[test] - fn integer_and_non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bari42e3:foo1:\xFFe"), - r#"{"bar":42,"foo":"ff"}"# - .to_string() - ); - } - - #[test] - fn integer_and_empty_list() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bari42e3:foolee"), - r#"{"bar":42,"foo":[]}"#.to_string() - ); - } - - #[test] - fn integer_and_list() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bari42e3:fooli43eee"), - r#"{"bar":42,"foo":[43]}"#.to_string() - ); - } - - #[test] - fn integer_and_empty_dict() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bari42e3:foodee"), - r#"{"bar":42,"foo":{}}"#.to_string() - ); - } - - #[test] - fn integer_and_dict() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bari42e3:food3:fooi43eee"), - r#"{"bar":42,"foo":{"foo":43}}"#.to_string() - ); - } - - #[test] - fn utf8_string_and_integer() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar5:alice3:fooi43ee"), - r#"{"bar":"alice","foo":43}"#.to_string() - ); - } - - #[test] - fn utf8_string_and_non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar5:alice3:foo1:\xFFe"), - r#"{"bar":"alice","foo":"ff"}"# - .to_string() - ); - } - - #[test] - fn utf8_string_and_empty_list() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar5:alice3:foolee"), - r#"{"bar":"alice","foo":[]}"#.to_string() - ); - } - - #[test] - fn utf8_string_and_list() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar5:alice3:fooli42eee"), - r#"{"bar":"alice","foo":[42]}"#.to_string() - ); - } - - #[test] - fn utf8_string_and_empty_dict() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar5:alice3:foodee"), - r#"{"bar":"alice","foo":{}}"#.to_string() - ); - } - - #[test] - fn utf8_string_and_dict() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar5:alice3:food3:fooi42eee"), - r#"{"bar":"alice","foo":{"foo":42}}"#.to_string() - ); - } - - #[test] - fn non_utf8_string_and_integer() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar1:\xFF3:fooi43ee"), - r#"{"bar":"ff","foo":43}"# - .to_string() - ); - } - - #[test] - fn non_utf8_string_and_non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar1:\xFF3:foo1:\xFFe"), - r#"{"bar":"ff","foo":"ff"}"#.to_string() - ); - } - - #[test] - fn non_utf8_string_and_empty_list() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar1:\xFF3:foolee"), - r#"{"bar":"ff","foo":[]}"# - .to_string() - ); - } - - #[test] - fn non_utf8_string_and_list() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar1:\xFF3:fooli42eee"), - r#"{"bar":"ff","foo":[42]}"# - .to_string() - ); - } - - #[test] - fn non_utf8_string_and_empty_dict() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar1:\xFF3:foodee"), - r#"{"bar":"ff","foo":{}}"# - .to_string() - ); - } - - #[test] - fn non_utf8_string_and_dict() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bar1:\xFF3:food3:fooi42eee"), - r#"{"bar":"ff","foo":{"foo":42}}"#.to_string() - ); - } - - #[test] - fn empty_list_and_integer() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barle3:fooi42ee"), - r#"{"bar":[],"foo":42}"#.to_string() - ); - } - - #[test] - fn empty_list_and_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barle3:foo5:alicee"), - r#"{"bar":[],"foo":"alice"}"#.to_string() - ); - } - - #[test] - fn empty_list_and_non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barle3:foo1:\xFFe"), - r#"{"bar":[],"foo":"ff"}"# - .to_string() - ); - } - - #[test] - fn empty_list_and_empty_dict() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barle3:foodee"), - r#"{"bar":[],"foo":{}}"#.to_string() - ); - } - - #[test] - fn empty_list_and_dict() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barle3:food3:foo5:aliceee"), - r#"{"bar":[],"foo":{"foo":"alice"}}"#.to_string() - ); - } - - #[test] - fn list_and_integer() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barli42ee3:fooi42ee"), - r#"{"bar":[42],"foo":42}"#.to_string() - ); - } - - #[test] - fn list_and_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barli42ee3:foo5:alicee"), - r#"{"bar":[42],"foo":"alice"}"#.to_string() - ); - } - - #[test] - fn list_and_non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barli42ee3:foo1:\xFFe"), - r#"{"bar":[42],"foo":"ff"}"# - .to_string() - ); - } - - #[test] - fn list_and_empty_dict() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barli42ee3:foodee"), - r#"{"bar":[42],"foo":{}}"#.to_string() - ); - } - - #[test] - fn list_and_dict() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barli42ee3:food3:foo5:aliceee"), - r#"{"bar":[42],"foo":{"foo":"alice"}}"#.to_string() - ); - } - - #[test] - fn empty_dict_and_integer() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barde3:fooi42ee"), - r#"{"bar":{},"foo":42}"#.to_string() - ); - } - - #[test] - fn empty_dict_and_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barde3:foo5:alicee"), - r#"{"bar":{},"foo":"alice"}"#.to_string() - ); - } - - #[test] - fn empty_dict_and_non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barde3:foo1:\xFFe"), - r#"{"bar":{},"foo":"ff"}"# - .to_string() - ); - } - - #[test] - fn empty_dict_and_empty_list() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barde3:foolee"), - r#"{"bar":{},"foo":[]}"#.to_string() - ); - } - - #[test] - fn empty_dict_and_list() { - assert_eq!( - bencode_to_json_unchecked(b"d3:barde3:fooli42eee"), - r#"{"bar":{},"foo":[42]}"#.to_string() - ); - } - - #[test] - fn dict_and_integer() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bard3:bari42ee3:fooi43ee"), - r#"{"bar":{"bar":42},"foo":43}"#.to_string() - ); - } - - #[test] - fn dict_and_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bard3:bari42ee3:foo5:alicee"), - r#"{"bar":{"bar":42},"foo":"alice"}"# - .to_string() - ); - } - - #[test] - fn dict_and_non_utf8_string() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bard3:bari42ee3:foo1:\xFFe"), - r#"{"bar":{"bar":42},"foo":"ff"}"# - .to_string() - ); - } - - #[test] - fn dict_and_empty_list() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bard3:bari42ee3:foolee"), - r#"{"bar":{"bar":42},"foo":[]}"#.to_string() - ); - } - - #[test] - fn dict_and_list() { - assert_eq!( - bencode_to_json_unchecked(b"d3:bard3:bari42ee3:fooli42eee"), - r#"{"bar":{"bar":42},"foo":[42]}"# - .to_string() - ); - } - } - - mod should_escape_json { - - mod in_field_keys { - - // Only one especial char is tested. The string parser contains - // other tests for the rest of the special chars that need to be - // escaped. - - use crate::test::bencode_to_json_unchecked; - - #[test] - fn containing_a_line_break_at_the_beginning_of_the_string() { - assert_eq!( - bencode_to_json_unchecked("d4:\nfoo3:bare".as_bytes()), - r#"{"\nfoo":"bar"}"#.to_string() - ); - } - - #[test] - fn containing_a_line_break_in_the_middle_of_the_string() { - assert_eq!( - bencode_to_json_unchecked("d4:f\noo3:bare".as_bytes()), - r#"{"f\noo":"bar"}"#.to_string() - ); - } - - #[test] - fn containing_a_line_break_at_the_end_of_the_string() { - assert_eq!( - bencode_to_json_unchecked("d4:foo\n3:bare".as_bytes()), - r#"{"foo\n":"bar"}"#.to_string() - ); - } - } - - mod in_field_values { - use crate::test::bencode_to_json_unchecked; - - #[test] - fn containing_a_line_break_at_the_beginning_of_the_string() { - assert_eq!( - bencode_to_json_unchecked("d3:foo4:\nbare".as_bytes()), - r#"{"foo":"\nbar"}"#.to_string() - ); - } - - #[test] - fn containing_a_line_break_in_the_middle_of_the_string() { - assert_eq!( - bencode_to_json_unchecked("d3:foo4:ba\nre".as_bytes()), - r#"{"foo":"ba\nr"}"#.to_string() - ); - } - - #[test] - fn containing_a_line_break_at_the_end_of_the_string() { - assert_eq!( - bencode_to_json_unchecked("d3:foo4:bar\ne".as_bytes()), - r#"{"foo":"bar\n"}"#.to_string() - ); - } - } - } - - mod should_fail { - use crate::{error::Error, try_bencode_to_json}; - - #[test] - fn when_an_empty_dict_does_not_have_the_matching_close_byte() { - let dict_without_closing_dict_byte = b"d"; - - let result = try_bencode_to_json(dict_without_closing_dict_byte); - - assert!(matches!( - result, - Err(Error::UnexpectedEndOfInputExpectingFirstDictFieldOrEnd { .. }) - )); - } - - #[test] - fn when_a_dict_field_does_not_have_the_value() { - let dict_without_closing_dict_byte = b"d3:foo"; - - let result = try_bencode_to_json(dict_without_closing_dict_byte); - - assert!(matches!( - result, - Err(Error::UnexpectedEndOfInputExpectingDictFieldValue { .. }) - )); - } - - #[test] - fn when_a_dict_does_not_have_the_matching_close_byte() { - let dict_without_closing_dict_byte = b"d3:fooi42e"; - - let result = try_bencode_to_json(dict_without_closing_dict_byte); - - assert!(matches!( - result, - Err(Error::UnexpectedEndOfInputExpectingDictFieldKeyOrEnd { .. }) - )); - } - - #[test] - fn when_it_receives_an_end_dict_byte_without_the_matching_open_byte() { - let end_dict_byte_without_start = b"e"; - - let result = try_bencode_to_json(end_dict_byte_without_start); - - assert!(matches!( - result, - Err(Error::NoMatchingStartForListOrDictEnd { .. }) - )); - } - - #[test] - fn when_it_receives_a_premature_end_dict_byte() { - let dict_with_missing_key_value = b"d3:fooe"; - - let result = try_bencode_to_json(dict_with_missing_key_value); - - assert!(matches!(result, Err(Error::PrematureEndOfDict { .. }))); - } - - #[test] - fn when_the_first_field_value_is_empty() { - let dict_with_missing_key_value = b"d3:fooe"; - - let result = try_bencode_to_json(dict_with_missing_key_value); - - assert!(matches!(result, Err(Error::PrematureEndOfDict { .. }))); - } - - #[test] - fn when_the_second_field_value_is_empty() { - let dict_with_missing_key_value = b"d3:foo3:bar3:fooe"; - - let result = try_bencode_to_json(dict_with_missing_key_value); - - assert!(matches!(result, Err(Error::PrematureEndOfDict { .. }))); - } - - mod when_the_field_key_is_not_a_string_for_example { - use crate::error::Error; - use crate::generators::json::BencodeType; - use crate::try_bencode_to_json; - - #[test] - fn when_the_key_in_the_first_dict_field_is_an_integer() { - let field_with_integer_key = b"di42ei43ee"; - - let result = try_bencode_to_json(field_with_integer_key); - - assert!(matches!( - result, - Err(Error::ExpectedStringForDictKeyGot( - BencodeType::Integer, - _, - _ - )) - )); - } - - #[test] - fn when_the_key_in_the_second_dict_field_is_an_integer() { - let field_with_integer_key = b"d3:foo3:bari42ei43ee"; - - let result = try_bencode_to_json(field_with_integer_key); - - assert!(matches!( - result, - Err(Error::ExpectedStringForDictKeyGot( - BencodeType::Integer, - _, - _ - )) - )); - } - - #[test] - fn when_the_key_in_the_first_dict_field_is_a_list() { - let field_with_list_key = b"dlei42ee"; - - let result = try_bencode_to_json(field_with_list_key); - - assert!(matches!( - result, - Err(Error::ExpectedStringForDictKeyGot(BencodeType::List, _, _)) - )); - } - - #[test] - fn when_the_key_in_the_second_dict_field_is_a_list() { - let field_with_list_key = b"d3:foo3:barlei42ee"; - - let result = try_bencode_to_json(field_with_list_key); - - assert!(matches!( - result, - Err(Error::ExpectedStringForDictKeyGot(BencodeType::List, _, _)) - )); - } - - #[test] - fn when_the_key_in_the_first_dict_field_is_a_dict() { - let field_with_list_key = b"ddei42ee"; - - let result = try_bencode_to_json(field_with_list_key); - - assert!(matches!( - result, - Err(Error::ExpectedStringForDictKeyGot(BencodeType::Dict, _, _)) - )); - } - - #[test] - fn when_the_key_in_the_second_dict_field_is_a_dict() { - let field_with_list_key = b"d3:foo3:bardei42ee"; - - let result = try_bencode_to_json(field_with_list_key); - - assert!(matches!( - result, - Err(Error::ExpectedStringForDictKeyGot(BencodeType::Dict, _, _)) - )); - } - } - } - } -} diff --git a/src/generators/mod.rs b/src/generators/mod.rs deleted file mode 100644 index c1911f0..0000000 --- a/src/generators/mod.rs +++ /dev/null @@ -1,14 +0,0 @@ -pub mod json; -pub mod stack; - -// todo: extract trait for generators when we implement a new one. - -use derive_more::derive::Display; - -#[derive(Debug, PartialEq, Display)] -pub enum BencodeType { - Integer, - String, - List, - Dict, -} diff --git a/src/generators/stack.rs b/src/generators/stack.rs deleted file mode 100644 index 6bf6dec..0000000 --- a/src/generators/stack.rs +++ /dev/null @@ -1,257 +0,0 @@ -//! The stack used by the generators to keep track of the current parsing state. -use std::fmt::Display; - -/// Stack containing states for nested Bencoded values. -/// -/// The stack has an immutable initial state. -/// -/// > NOTICE!: It's not allowed to pop or change the initial state. -#[derive(Debug)] -pub(crate) struct Stack { - /// The stack of states. - states: Vec, -} - -/// States while parsing list or dictionaries. -/// -/// There are no states for integers and strings because it's a straightforward -/// operation. We know when they finish and there is no recursion. -/// -/// States are displayed with a short name using only one letter: -/// -/// `I`, `L`, `M`, `D`, `E`, `F` -/// -/// This comes from the original implementation in C. -#[derive(Debug, PartialEq, Clone)] -pub enum State { - /// The initial state. - /// /// The sort display name for the state is L. - Initial, // I - - // States while parsing lists - /// Expecting the first list item or the end of the list. - /// The sort display name for the state is L. - ExpectingFirstListItemOrEnd, - - /// Expecting the next list item. List contains at least one item. - /// The sort display name for the state is M. - ExpectingNextListItem, - - // States while parsing dictionaries - /// Expecting the first dict field or the end of the dict. - /// The sort display name for the state is D. - ExpectingFirstDictFieldOrEnd, - - /// Expecting the dict field value. - /// The sort display name for the state is E. - ExpectingDictFieldValue, - - /// Expecting the dict field key or the end of the dict. - /// The sort display name for the state is F. - ExpectingDictFieldKeyOrEnd, -} - -impl Display for State { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let output = match self { - State::Initial => "I", - State::ExpectingFirstListItemOrEnd => "L", - State::ExpectingNextListItem => "M", - State::ExpectingFirstDictFieldOrEnd => "D", - State::ExpectingDictFieldValue => "E", - State::ExpectingDictFieldKeyOrEnd => "F", - }; - write!(f, "{output}") - } -} - -impl Default for Stack { - fn default() -> Self { - let states = vec![State::Initial]; - Self { states } - } -} - -impl Display for Stack { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "[")?; - for (idx, state) in as Clone>::clone(&self.states) - .into_iter() - .enumerate() - { - if idx > 0 { - write!(f, ", ")?; - } - write!(f, "{state}")?; - } - write!(f, "]")?; - Ok(()) - } -} - -impl Stack { - /// It adds a new state to the stack. - pub fn push(&mut self, state: State) { - self.states.push(state); - } - - /// It returns and consumes the stack top. - /// - /// It doesn't allow popping the initial state. - /// - /// # Panics - /// - /// Will panic is the stack state is the initial state. - pub fn pop(&mut self) { - self.guard_immutable_initial_state(); - self.states.pop(); - } - - /// It swaps the stack top with the new state. - /// - /// It doesn't allow swapping the initial state. - /// - /// # Panics - /// - /// Will panic is the stack state is the initial state. - pub fn swap_top(&mut self, new_state: State) { - self.guard_immutable_initial_state(); - self.states.pop(); - self.push(new_state); - } - - /// It returns the top element on the stack without consuming it. - /// - /// # Panics - /// - /// Will panic is the stack is empty. The stack is never empty because it's - /// not allowed to pop or change the initial state. - #[must_use] - pub fn peek(&self) -> State { - match self.states.last() { - Some(top) => top.clone(), - None => panic!("empty stack!"), - } - } - - /// Prevent from mutating the initial state. - fn guard_immutable_initial_state(&self) { - if let Some(top) = self.states.last() { - if *top != State::Initial { - return; - } - }; - - panic!("trying to mutate immutable initial state. It can't be popped or swapped!") - } -} - -#[cfg(test)] -mod tests { - mod the_stack_state { - use crate::generators::stack::State; - - #[test] - fn should_be_displayed_with_single_letter_abbreviations() { - assert_eq!(format!("{}", State::Initial), "I"); - assert_eq!(format!("{}", State::ExpectingFirstListItemOrEnd), "L"); - assert_eq!(format!("{}", State::ExpectingNextListItem), "M"); - assert_eq!(format!("{}", State::ExpectingFirstDictFieldOrEnd), "D"); - assert_eq!(format!("{}", State::ExpectingDictFieldValue), "E"); - assert_eq!(format!("{}", State::ExpectingDictFieldKeyOrEnd), "F"); - } - } - - mod the_stack { - mod it_should { - use crate::generators::stack::{Stack, State}; - - #[test] - fn have_an_initial_state() { - assert_eq!(Stack::default().peek(), State::Initial); - } - - #[test] - fn allow_peeking_the_top_element_without_consuming_it() { - let stack = Stack::default(); - - let _ = stack.peek(); - - assert_eq!(stack.peek(), State::Initial); - } - - #[test] - #[should_panic(expected = "empty stack!")] - fn panic_peeking_the_top_element_if_the_stack_is_empty() { - let mut stack = Stack::default(); - - stack.states.clear(); - let _ = stack.peek(); - - assert_eq!(stack.peek(), State::Initial); - } - - #[test] - fn allow_pushing_new_states() { - let mut stack = Stack::default(); - - stack.push(State::ExpectingDictFieldKeyOrEnd); - - assert_eq!(stack.peek(), State::ExpectingDictFieldKeyOrEnd); - } - - #[test] - fn allow_popping_the_current_top_state() { - let mut stack = Stack::default(); - - stack.push(State::ExpectingDictFieldKeyOrEnd); - stack.pop(); - - assert_eq!(stack.peek(), State::Initial); - } - - #[test] - #[should_panic(expected = "trying to mutate")] - fn not_allow_popping_the_initial_state() { - Stack::default().pop(); - } - - #[test] - fn allow_swapping_the_top_state() { - let mut stack = Stack::default(); - - stack.push(State::ExpectingDictFieldKeyOrEnd); - stack.swap_top(State::ExpectingDictFieldValue); - - assert_eq!(stack.peek(), State::ExpectingDictFieldValue); - } - - #[test] - #[should_panic(expected = "trying to mutate")] - fn not_allow_swapping_the_initial_state() { - Stack::default().swap_top(State::Initial); - } - - mod be_displayed_with_single_letter_abbreviations_for_states { - - use crate::generators::stack::{Stack, State}; - - #[test] - fn with_the_initial_state() { - let stack = Stack::default(); - - assert_eq!(format!("{stack}"), "[I]"); - } - - #[test] - fn after_pushing_one_more_state() { - let mut stack = Stack::default(); - - stack.push(State::ExpectingDictFieldKeyOrEnd); - - assert_eq!(format!("{stack}"), "[I, F]"); - } - } - } - } -} diff --git a/src/lib.rs b/src/lib.rs index 9ca27cb..8cbec35 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,122 +1,1098 @@ -//! This lib contains functions to convert bencoded bytes into a JSON string. -//! -//! Bencode is a simple encoding format that is used to encode arbitrary -//! data structures. It is commonly used in the context of torrent files, -//! where the data structures are used to describe the contents of the torrent -//! file. -//! -//! To learn more about bencode, you can refer to the following resources: -//! -//! - -//! - -//! -//! Thi lib has high-level functions for common purposes that call the lower -//! level parser. You can use the low-lever parser if the high-level wrappers -//! are not suitable for your needs. -//! -//! The most straightforward way to use this lib is to use the `try_bencode_to_json` -//! function: -//! -//! ```rust -//! use bencode2json::{try_bencode_to_json}; -//! -//! let result = try_bencode_to_json(b"d4:spam4:eggse").unwrap(); -//! -//! assert_eq!(result, r#"{"spam":"eggs"}"#); -//! ``` -//! -//! The primary goal of this lib is to provide a simple and easy-to-use API for -//! converting bencoded data into JSON. It's also designed to be flexible and -//! efficient, making it suitable for a wide range of use cases. -//! -//! A design requirement is to be able to parse bencoded data without building -//! an in-memory representation of the whole bencoded data structure. -//! -//! > __NOTICE__: In the context of this lib, parser is a function that takes an input -//! > containing bencoded data and produces a JSON output (raw bytes or UTF-8 string). -pub mod error; -pub mod generators; -pub mod rw; -pub mod tokenizer; - -use error::Error; -use generators::json::Generator; -mod test; - -/// It converts bencoded bytes into a JSON string. -/// -/// # Errors -/// -/// Will return an error if the conversion fails. -pub fn try_bencode_to_json(input_buffer: &[u8]) -> Result { - let mut output = String::new(); - - let mut parser = Generator::new(input_buffer); - - match parser.write_str(&mut output) { - Ok(()) => Ok(output), - Err(err) => Err(err), +use std::io::{self, Read}; +use std::str; + +#[derive(Debug, PartialEq)] +pub enum Parsing { + Integer, // todo: add ParsingInteger + String(ParsingString), + List(ParsingList), + Dictionary(ParsingDictionary), +} + +#[derive(Debug, PartialEq)] +pub enum ParsingInteger { + Length, + Chars, +} + +#[derive(Debug, PartialEq)] +pub enum ParsingString { + Length, + Chars, +} + +#[derive(Debug, PartialEq)] +pub enum ParsingList { + FirstItem, + NextItem, +} + +#[derive(Debug, PartialEq)] +pub enum ParsingDictionary { + Start, + FirstKeyValuePair(ParsingKeyValuePair), + NextKeyValuePair(ParsingKeyValuePair), +} + +#[derive(Debug, PartialEq)] +pub enum ParsingKeyValuePair { + Key, + Value, +} + +pub struct BencodeParser { + pub debug: bool, + pub json: String, + pub iter: u64, + pub pos: u64, + reader: R, + stack: Vec, + string_parser: StringParser, + captured_input: Option>, +} + +// todo: we don't have an integer parser because we simple print all bytes between +// the start (`i`) and end (`e`) delimiters for integer values. However, what +// should happen when the integer contains a byte that is not a digit. For +// example: b"i12G345e"? + +#[derive(Default, Debug)] +struct StringParser { + // String length + bytes_for_string_length: Vec, + string_length: usize, + + // String value bytes + string_bytes: Vec, + string_bytes_counter: usize, +} + +impl StringParser { + fn new_string_starting_with(&mut self, byte: u8) { + self.new_string(); + self.add_length_byte(byte); + } + + fn new_string(&mut self) { + self.bytes_for_string_length = Vec::new(); + self.string_length = 0; + self.string_bytes = Vec::new(); + self.string_bytes_counter = 0; + } + + fn add_length_byte(&mut self, byte: u8) { + // todo: should we fail here is the byte is not a digit (0..9)? + // or we can wait until we try to convert all bytes in the into a number? + self.bytes_for_string_length.push(byte); + } + + fn add_byte(&mut self, byte: u8) { + // todo: return an error if we try to push a new byte but the end of the + // string has been reached. + self.string_bytes.push(byte); + self.string_bytes_counter += 1; + } + + /// This function is called when we receive the ':' byte which is the + /// delimiter for the end of bytes representing the string length. + fn process_end_of_string_length(&mut self) { + // todo: maybe we should simply fail when we receive a byte that is not a digit (0..9). + // This error cannot be understood by users because we first convert into a UTF-8 string + // and later into a number. + let length_str = str::from_utf8(&self.bytes_for_string_length) + .expect("invalid string length, non UTF-8 string length"); + + //println!("length_str: {length_str}"); + + self.string_length = length_str + .parse::() + .expect("invalid string length, non zero or positive integer"); + + //println!("string_length_number: {string_length}"); + } + + fn has_finished_capturing_bytes(&self) -> bool { + self.string_bytes_counter == self.string_length + } + + fn utf8(&self) -> String { + match str::from_utf8(&self.string_bytes) { + Ok(string) => { + // String only contains valid UTF-8 chars -> print it as it's + string.to_owned() + } + Err(_) => { + // String contains non valid UTF-8 chars -> print it as hex bytes + Self::bytes_to_hex(&self.string_bytes) + } + } + } + + fn json(&self) -> String { + format!("\"{}\"", self.utf8()) + } + + fn bytes_to_hex(data: &[u8]) -> String { + format!("{}", hex::encode(data)) } } -/// Helper to convert a string into a bencoded string. -#[must_use] -pub fn to_bencode(value: &str) -> Vec { - let bencoded_str = format!("{}:{}", value.len(), value); - bencoded_str.as_bytes().to_vec() +impl BencodeParser { + pub fn new(reader: R) -> Self { + BencodeParser { + debug: false, + reader, + stack: Vec::new(), + json: String::new(), + pos: 0, + iter: 1, + string_parser: StringParser::default(), + captured_input: Some(Vec::new()), + } + } + + /// todo + /// + /// # Errors + /// + /// + /// + /// # Panics + /// + /// Will panic if ... + #[allow(clippy::match_on_vec_items)] + #[allow(clippy::single_match)] + #[allow(clippy::too_many_lines)] + #[allow(clippy::match_same_arms)] + #[allow(clippy::single_match_else)] + pub fn parse(&mut self) -> io::Result<()> { + loop { + let byte = match self.read_byte() { + Ok(byte) => byte, + Err(ref err) if err.kind() == io::ErrorKind::UnexpectedEof => { + //println!("Reached the end of file."); + break; + } + Err(err) => return Err(err), + }; + + if self.debug { + println!("iter: {}", self.iter); + println!("pos: {}", self.pos); + println!("byte: {} ({})", byte, byte as char); + } + + match byte { + b'i' => { + match self.stack.last() { + Some(state) => { + match state { + Parsing::List(parsing_list) => match parsing_list { + ParsingList::FirstItem => { + self.stack.push(Parsing::Integer); + } + ParsingList::NextItem => { + self.stack.push(Parsing::Integer); + self.json.push(','); + } + }, + Parsing::Dictionary(parsing_dictionary) => { + match parsing_dictionary { + ParsingDictionary::Start => { + panic!("invalid byte 'i', expecting string for dictionary key"); + } + ParsingDictionary::FirstKeyValuePair( + first_key_value_pair, + ) => { + match first_key_value_pair { + ParsingKeyValuePair::Key => { + panic!("invalid byte 'i', dictionary key can't be an integer"); + } + ParsingKeyValuePair::Value => { + // First key value in the dictionary is an integer + self.stack.push(Parsing::Integer); + } + } + } + ParsingDictionary::NextKeyValuePair( + next_key_value_pair, + ) => { + match next_key_value_pair { + ParsingKeyValuePair::Key => { + panic!("invalid byte 'i', dictionary key can't be an integer"); + } + ParsingKeyValuePair::Value => { + // Next key value in the dictionary is an integer + self.stack.push(Parsing::Integer); + } + } + } + } + } + Parsing::Integer => { + panic!("invalid byte, parsing integer expected digit") + } + Parsing::String(parsing_string) => match parsing_string { + ParsingString::Length => { + panic!("unexpected byte 'i', parsing string length ") + } + ParsingString::Chars => { + self.process_string_value_byte(byte); + } + }, + } + } + None => { + self.stack.push(Parsing::Integer); + } + } + } + b'0'..=b'9' => { + match self.stack.last() { + Some(state) => match state { + Parsing::Integer => { + self.json.push(byte as char); + } + Parsing::String(parsing_string) => match parsing_string { + ParsingString::Length => { + // Add a digit for the string length + self.process_string_length_byte(byte); + } + ParsingString::Chars => { + // Add a byte for the string value + self.process_string_value_byte(byte); + } + }, + Parsing::List(parsing_list) => { + match parsing_list { + ParsingList::FirstItem => { + // First item in the list and it is a string + + self.string_parser.new_string_starting_with(byte); + + self.stack.push(Parsing::String(ParsingString::Length)); + } + ParsingList::NextItem => { + // Non first item in the list and it is a string + + self.string_parser.new_string_starting_with(byte); + + self.stack.push(Parsing::String(ParsingString::Length)); + + self.json.push(','); + } + } + } + Parsing::Dictionary(parsing_dictionary) => { + match parsing_dictionary { + ParsingDictionary::Start => { + // First key in the dictionary + + self.stack.push(Parsing::Dictionary( + ParsingDictionary::FirstKeyValuePair( + ParsingKeyValuePair::Key, + ), + )); + + self.string_parser.new_string_starting_with(byte); + + self.stack.push(Parsing::String(ParsingString::Length)); + } + ParsingDictionary::FirstKeyValuePair( + parsing_first_key_value_pair, + ) => { + match parsing_first_key_value_pair { + ParsingKeyValuePair::Key => { + todo!() + } + ParsingKeyValuePair::Value => { + // First key value in the dictionary and it's an string + + self.string_parser.new_string_starting_with(byte); + + self.stack + .push(Parsing::String(ParsingString::Length)); + } + } + } + ParsingDictionary::NextKeyValuePair( + parsing_next_key_value_pair, + ) => { + match parsing_next_key_value_pair { + ParsingKeyValuePair::Key => { + /*self.stack.push(State::ParsingDictionary( + ParsingDictionary::NextKeyValuePair( + ParsingKeyValuePair::Key, + ), + ));*/ + + self.string_parser.new_string_starting_with(byte); + + self.stack + .push(Parsing::String(ParsingString::Length)); + + self.json.push(','); + } + ParsingKeyValuePair::Value => { + // Next key value in the dictionary and it's an string + + self.string_parser.new_string_starting_with(byte); + + self.stack + .push(Parsing::String(ParsingString::Length)); + } + } + } + } + } + }, + None => { + // First byte in input and it is a string + self.stack.push(Parsing::String(ParsingString::Length)); + + self.string_parser.new_string_starting_with(byte); + } + }; + } + b':' => match self.stack.last() { + Some(state) => match state { + Parsing::String(parsing_string) => { + match parsing_string { + ParsingString::Length => { + // We reach the end of the string length + self.string_parser.process_end_of_string_length(); + + // We have finished parsing the string length + self.stack.pop(); + self.stack.push(Parsing::String(ParsingString::Chars)); + } + ParsingString::Chars => { + self.process_string_value_byte(byte); + } + } + } + _ => panic!("unexpected byte: ':', not parsing a string"), + }, + None => { + panic!("unexpected byte: ':', not parsing a string"); + } + }, + b'l' => match self.stack.last() { + Some(state) => match state { + Parsing::List(parsing_list) => match parsing_list { + ParsingList::FirstItem => { + self.stack.push(Parsing::List(ParsingList::FirstItem)); + self.json.push('['); + } + ParsingList::NextItem => {} + }, + Parsing::Dictionary(parsing_dictionary) => match parsing_dictionary { + ParsingDictionary::Start => todo!(), + ParsingDictionary::FirstKeyValuePair(_) => todo!(), + ParsingDictionary::NextKeyValuePair(_) => todo!(), + }, + Parsing::Integer => {} + Parsing::String(parsing_string) => match parsing_string { + ParsingString::Length => { + panic!("unexpected byte: 'l', parsing string length") + } + ParsingString::Chars => { + self.process_string_value_byte(byte); + } + }, + }, + None => { + self.stack.push(Parsing::List(ParsingList::FirstItem)); + self.json.push('['); + } + }, + b'd' => match self.stack.last() { + Some(_) => todo!(), + None => { + self.stack + .push(Parsing::Dictionary(ParsingDictionary::Start)); + self.json.push('{'); + } + }, + b'e' => { + match self.stack.last() { + Some(state) => match state { + Parsing::List(_) => { + // We have finished parsing the list + self.stack.pop(); + self.json.push(']'); + } + Parsing::Dictionary(parsing_dictionary) => { + match parsing_dictionary { + ParsingDictionary::Start => { + // We have finished parsing the dictionary (empty dictionary) + self.stack.pop(); + self.json.push('}'); + } + ParsingDictionary::FirstKeyValuePair( + parsing_first_key_value_pair, + ) => { + match parsing_first_key_value_pair { + ParsingKeyValuePair::Key => todo!(), + ParsingKeyValuePair::Value => { + { + // We have finished parsing the dictionary (with one key/value pair) + self.stack.pop(); + + self.json.push('}'); + + self.stack.pop(); + } + } + } + } + ParsingDictionary::NextKeyValuePair( + parsing_next_key_value_pair, + ) => match parsing_next_key_value_pair { + ParsingKeyValuePair::Key => { + { + // We have finished parsing the dictionary (with one key/value pair) + self.stack.pop(); + + self.json.push('}'); + + self.stack.pop(); + } + } + ParsingKeyValuePair::Value => todo!(), + }, + } + } + Parsing::Integer => { + // We have finished parsing the integer + self.stack.pop(); + self.check_end_first_key_value_pair_in_dictionary(); + self.check_end_next_key_value_pair_in_dictionary(); + } + Parsing::String(parsing_string) => match parsing_string { + ParsingString::Length => { + panic!("unexpected byte: 'e', parsing string length") + } + ParsingString::Chars => { + self.process_string_value_byte(byte); + } + }, + }, + None => panic!("invalid byte, unexpected end byte `e`"), + } + + self.check_first_list_item(); + } + _ => match self.stack.last() { + Some(state) => match state { + Parsing::List(_) => {} + Parsing::Dictionary(_) => {} + Parsing::Integer => {} + Parsing::String(parsing_string) => match parsing_string { + ParsingString::Length => {} + ParsingString::Chars => { + self.process_string_value_byte(byte); + } + }, + }, + None => {} + }, + } + + if self.debug { + println!("stack: {:?}", self.stack); + //println!("string_parser: {:#?}", self.string_parser); + match &self.captured_input { + Some(captured_input) => match str::from_utf8(captured_input) { + Ok(string) => println!("input: {string}"), + Err(_) => println!("input: {captured_input:#?}"), + }, + None => {} + } + println!("output: {}", self.json); + println!(); + } + + self.iter += 1; + } + + // todo: if we exit the loop with a non empty stack, that's an error (incomplete bencode value). + + Ok(()) + } + + fn process_string_length_byte(&mut self, byte: u8) { + self.string_parser.add_length_byte(byte); + } + + fn process_string_value_byte(&mut self, byte: u8) { + self.string_parser.add_byte(byte); + + if self.string_parser.has_finished_capturing_bytes() { + // We have finishing capturing the string bytes + + self.json.push_str(&self.string_parser.json()); + + // We have finished parsing the string + self.stack.pop(); + self.check_first_list_item(); + self.check_end_dictionary_key(); + } + } + + #[allow(clippy::single_match)] + fn check_first_list_item(&mut self) { + match self.stack.last() { + Some(state) => match state { + Parsing::List(parsing_list) => match parsing_list { + ParsingList::FirstItem => { + self.stack.pop(); + self.stack.push(Parsing::List(ParsingList::NextItem)); + } + ParsingList::NextItem => {} + }, + Parsing::Integer => {} + Parsing::String(_parsing_string) => {} + Parsing::Dictionary(_parsing_dictionary) => {} + }, + None => {} + } + } + + #[allow(clippy::single_match)] + #[allow(clippy::match_same_arms)] + fn check_end_dictionary_key(&mut self) { + match self.stack.last() { + Some(state) => match state { + Parsing::Integer => {} + Parsing::String(_) => {} + Parsing::List(_) => {} + Parsing::Dictionary(parsing_dictionary) => match parsing_dictionary { + ParsingDictionary::Start => {} + ParsingDictionary::FirstKeyValuePair(parsing_first_key_value_pair) => { + match parsing_first_key_value_pair { + ParsingKeyValuePair::Key => { + self.stack.pop(); + self.stack.push(Parsing::Dictionary( + ParsingDictionary::FirstKeyValuePair( + ParsingKeyValuePair::Value, + ), + )); + self.json.push(':'); + } + ParsingKeyValuePair::Value => {} + } + } + ParsingDictionary::NextKeyValuePair(parsing_next_key_value_pair) => { + match parsing_next_key_value_pair { + ParsingKeyValuePair::Key => { + self.stack.pop(); + self.stack.push(Parsing::Dictionary( + ParsingDictionary::NextKeyValuePair(ParsingKeyValuePair::Value), + )); + self.json.push(':'); + } + ParsingKeyValuePair::Value => {} + } + } + }, + }, + None => {} + } + } + + // todo: check end dictionary key/value first pair + + #[allow(clippy::single_match)] + #[allow(clippy::match_same_arms)] + fn check_end_first_key_value_pair_in_dictionary(&mut self) { + match self.stack.last() { + Some(state) => match state { + Parsing::Integer => {} + Parsing::String(_) => {} + Parsing::List(_) => {} + Parsing::Dictionary(parsing_dictionary) => match parsing_dictionary { + ParsingDictionary::Start => {} + ParsingDictionary::FirstKeyValuePair(parsing_first_key_value_pair) => { + match parsing_first_key_value_pair { + ParsingKeyValuePair::Key => {} + ParsingKeyValuePair::Value => { + self.stack.pop(); + self.stack.push(Parsing::Dictionary( + ParsingDictionary::NextKeyValuePair(ParsingKeyValuePair::Key), + )); + } + } + } + ParsingDictionary::NextKeyValuePair(parsing_next_key_value_pair) => { + match parsing_next_key_value_pair { + ParsingKeyValuePair::Key => {} + ParsingKeyValuePair::Value => { + self.stack.pop(); + /*self.stack.push(State::ParsingDictionary( + ParsingDictionary::NextKeyValuePair(ParsingKeyValuePair::Key), + ));*/ + } + } + } + }, + }, + None => {} + } + } + + #[allow(clippy::single_match)] + #[allow(clippy::match_same_arms)] + fn check_end_next_key_value_pair_in_dictionary(&mut self) { + match self.stack.last() { + Some(state) => match state { + Parsing::Integer => {} + Parsing::String(_) => {} + Parsing::List(_) => {} + Parsing::Dictionary(parsing_dictionary) => match parsing_dictionary { + ParsingDictionary::Start => {} + ParsingDictionary::FirstKeyValuePair(parsing_first_key_value_pair) => { + match parsing_first_key_value_pair { + ParsingKeyValuePair::Key => {} + ParsingKeyValuePair::Value => { + self.stack.pop(); + self.stack.push(Parsing::Dictionary( + ParsingDictionary::NextKeyValuePair(ParsingKeyValuePair::Key), + )); + } + } + } + ParsingDictionary::NextKeyValuePair(parsing_next_key_value_pair) => { + match parsing_next_key_value_pair { + ParsingKeyValuePair::Key => {} + ParsingKeyValuePair::Value => { + self.stack.pop(); + /*self.stack.push(State::ParsingDictionary( + ParsingDictionary::NextKeyValuePair(ParsingKeyValuePair::Key), + ));*/ + } + } + } + }, + }, + None => {} + } + } + + fn read_byte(&mut self) -> io::Result { + let mut byte = [0; 1]; + + self.reader.read_exact(&mut byte)?; + + self.pos += 1; + + let byte = byte[0]; + + if let Some(ref mut captured_input) = self.captured_input { + captured_input.push(byte); + } + + Ok(byte) + } } #[cfg(test)] mod tests { - mod converting_bencode_to_json { - use crate::try_bencode_to_json; + mod integers { + use crate::BencodeParser; + + #[test] + fn integer() { + let data = b"i42e"; + let mut parser = BencodeParser::new(&data[..]); + parser.parse().unwrap(); + assert_eq!(parser.json, "42".to_string()); + } + + // todo: all encodings with a leading zero, such as i03e, are invalid, other + // than i0e, which of course corresponds to 0. + } + + mod strings { + use crate::BencodeParser; + + /* todo: + - String with size 0 (empty string) are allowed: b"0:" + - String ending with reserved charts 'i', 'l', 'd', 'l', ':', 'e' + - String ending with digit + */ #[test] - fn when_it_succeeds() { - let result = try_bencode_to_json(b"d4:spam4:eggse").unwrap(); + fn utf8() { + let data = b"4:spam"; + + let mut parser = BencodeParser::new(&data[..]); + parser.parse().unwrap(); - assert_eq!( - result, - r#"{"spam":"eggs"}"# - ); + assert_eq!(parser.json, "\"spam\"".to_string()); } #[test] - fn when_it_fails() { - let result = try_bencode_to_json(b"invalid bencode value"); + fn non_utf8() { + let data = b"4:\xFF\xFE\xFD\xFC"; - assert!(result.is_err()); + let mut parser = BencodeParser::new(&data[..]); + parser.parse().unwrap(); + + assert_eq!(parser.json, "\"fffefdfc\"".to_string()); } + + /* todo: + - String containing special chars: 'i', ':', 'l', 'd', 'e'. The + bencoded string can contain reserved chars in bencode format. + */ } - mod converting_string_to_bencode { - use crate::to_bencode; + mod lists { + use crate::BencodeParser; #[test] - fn empty_string() { - assert_eq!(to_bencode(r""), b"0:"); + fn empty_list() { + let data = b"le"; + + let mut parser = BencodeParser::new(&data[..]); + parser.parse().unwrap(); + + assert_eq!(parser.json, "[]".to_string()); } + mod with_one_item { + use crate::BencodeParser; + + #[test] + fn integer() { + let data = b"li42ee"; + + let mut parser = BencodeParser::new(&data[..]); + parser.parse().unwrap(); + + assert_eq!(parser.json, "[42]".to_string()); + } + + #[test] + fn utf8_string() { + // List with one UTF8 string: l4:spame + // 1 2 3 4 5 6 7 8 (pos) + // l 4 : s p a m e (byte) + // 108 52 58 115 112 97 109 101 (byte decimal) + + let data = b"l4:spame"; + + let mut parser = BencodeParser::new(&data[..]); + parser.parse().unwrap(); + + assert_eq!(parser.json, "[\"spam\"]".to_string()); + } + + #[test] + fn non_utf8_string() { + // List with one UTF8 string: l4:\xFF\xFE\xFD\xFCe + // 1 2 3 4 5 6 7 8 (pos) + // l 4 : xFF xFE xFD xFC e (byte) + // 108 52 58 255 254 253 252 101 (byte decimal) + + let data = b"l4:\xFF\xFE\xFD\xFCe"; + + let mut parser = BencodeParser::new(&data[..]); + parser.parse().unwrap(); + + assert_eq!(parser.json, "[\"fffefdfc\"]".to_string()); + } + + mod of_type_list { + use crate::BencodeParser; + + #[test] + fn nested_empty_list() { + // List with one empty list: llee + // 1 2 3 4 (pos) + // l l e e (byte) + // 108 108 101 101 (byte decimal) + + let data = b"llee"; + + let mut parser = BencodeParser::new(&data[..]); + parser.parse().unwrap(); + + assert_eq!(parser.json, "[[]]".to_string()); + } + + #[test] + fn two_nested_empty_lists() { + // List with two nested empty lists: llleee + // 1 2 3 4 5 6 (pos) + // l l l e e e (byte) + // 108 108 108 101 101 101 (byte decimal) + + let data = b"llleee"; + + let mut parser = BencodeParser::new(&data[..]); + parser.parse().unwrap(); + + assert_eq!(parser.json, "[[[]]]".to_string()); + } + + #[test] + fn nested_list_with_integer() { + // List with one empty list: lli42eee + // 1 2 3 4 5 6 7 4 (pos) + // l l i 4 2 e e e (byte) + // 108 108 105 52 50 101 101 101 (byte decimal) + + let data = b"lli42eee"; + + let mut parser = BencodeParser::new(&data[..]); + parser.parse().unwrap(); + + assert_eq!(parser.json, "[[42]]".to_string()); + } + + /* todo: + - Nested list with UTF-8 string + - Nested list with non UTF-8 string + + - Two nested lists with one integer each + - Two nested lists with one UTF-8 string each + - Two nested lists with one non UTF-8 string each + */ + } + + /* todo: + - With one dictionary + */ + } + + mod with_two_items_of_the_same_type { + use crate::BencodeParser; + + #[test] + fn two_integers() { + // List with two integers: li42ei43ee + // 1 2 3 4 5 6 7 8 9 10 (pos) + // l i 4 2 e i 4 3 e e (byte) + // 108 105 52 50 101 105 52 51 101 101 (byte decimal) + + let data = b"li42ei43ee"; + + let mut parser = BencodeParser::new(&data[..]); + parser.parse().unwrap(); + + assert_eq!(parser.json, "[42,43]".to_string()); + } + + #[test] + fn two_utf8_strings() { + // List with two UTF8 strings: l5:alice3:bobe + // 1 2 3 4 5 6 7 8 9 10 11 12 13 14 (pos) + // l 5 : a l i c e 3 : b o b e (byte) + // 108 53 58 97 108 105 99 101 51 58 98 111 98 101 (byte decimal) + + let data = b"l5:alice3:bobe"; + let mut parser = BencodeParser::new(&data[..]); + parser.parse().unwrap(); + + assert_eq!(parser.json, "[\"alice\",\"bob\"]".to_string()); + } + + #[test] + fn two_non_utf8_strings() { + // List with two UTF8 strings: l2:\xFF\xFE2:\xFD\xFCe + // 1 2 3 4 5 6 7 8 9 10 (pos) + // l 2 : xFF xFE 2 : xFD xFC e (byte) + // 108 53 58 255 254 105 99 253 252 101 (byte decimal) + + let data = b"l2:\xFF\xFE2:\xFD\xFCe"; + + let mut parser = BencodeParser::new(&data[..]); + parser.parse().unwrap(); + + assert_eq!( + parser.json, + "[\"fffe\",\"fdfc\"]".to_string() + ); + } + } + + mod with_two_items_of_different_types { + use crate::BencodeParser; + + #[test] + fn integer_and_utf8_string() { + // List with an integer and a UTF-8 string: li42e5:alicee + // 1 2 3 4 5 6 7 8 9 10 11 12 13 (pos) + // l i 4 2 e 5 : a l i c e e (byte) + // 108 105 52 50 101 53 58 97 108 105 99 101 101 (byte decimal) + + let data = b"li42e5:alicee"; + + let mut parser = BencodeParser::new(&data[..]); + parser.parse().unwrap(); + + assert_eq!(parser.json, "[42,\"alice\"]".to_string()); + } + + #[test] + fn integer_and_non_utf8_string() { + // List with an integer a non UTF-8 string: li42e2:\xFF\xFEe + // 1 2 3 4 5 6 7 8 9 10 (pos) + // l i 4 2 e 2 : xFF xFE e (byte) + // 108 105 52 50 101 50 58 255 254 105 (byte decimal) + + let data = b"li42e2:\xFF\xFEe"; + + let mut parser = BencodeParser::new(&data[..]); + parser.parse().unwrap(); + + assert_eq!(parser.json, "[42,\"fffe\"]".to_string()); + } + + #[test] + fn utf8_string_and_integer() { + // List with a UTF-8 string and an integer: l5:alicei42ee + // 1 2 3 4 5 6 7 8 9 10 11 12 13 (pos) + // l 5 : a l i c e i 4 2 e e (byte) + // 108 53 58 97 108 105 99 101 105 52 50 101 101 101 (byte decimal) + + let data = b"l5:alicei42ee"; + + let mut parser = BencodeParser::new(&data[..]); + parser.parse().unwrap(); + + assert_eq!(parser.json, "[\"alice\",42]".to_string()); + } + + #[test] + fn non_utf8_string_and_an_integer() { + // List with a non UTF-8 string and an integer: l2:\xFF\xFEi42ee + // 1 2 3 4 5 6 7 8 9 10 (pos) + // l 2 : xFF xFE i 4 2 e e (byte) + // 108 50 58 255 254 105 52 50 101105 (byte decimal) + + let data = b"l2:\xFF\xFEi42ee"; + + let mut parser = BencodeParser::new(&data[..]); + parser.parse().unwrap(); + + assert_eq!(parser.json, "[\"fffe\",42]".to_string()); + } + + /* todo: + - Integer and list + - Integer and dictionary + + - UTF-8 string and list + - UTF-8 string and dictionary + + - Non UTF-8 string and list + - Non UTF-8 string and dictionary + */ + } + } + + mod dictionary { + use crate::BencodeParser; + + // Note: Keys must be bencoded strings. + + /* todo: + + Valid cases: + + - A key starting with a digit. + - A key with non UTF-8 value: + Bencode: d2:\xFF\xFEi42ee + JSON: {"fffe": 42} + + Error cases: + + - A dictionary key can't be an integer. + - A dictionary with one key pair, but only the key without value. + */ + #[test] - fn non_empty_string() { - assert_eq!(to_bencode(r"alice"), b"5:alice"); + fn empty_dictionary() { + let data = b"de"; + + let mut parser = BencodeParser::new(&data[..]); + parser.parse().unwrap(); + + assert_eq!(parser.json, "{}".to_string()); } - mod string_with_special_chars { - use crate::to_bencode; + mod with_one_key_of_type { + use crate::BencodeParser; #[test] - fn line_break() { - assert_eq!(to_bencode(r"alice\n"), b"7:alice\x5C\x6E"); + fn integer() { + let data = b"d3:fooi42ee"; + + let mut parser = BencodeParser::new(&data[..]); + parser.parse().unwrap(); + + assert_eq!(parser.json, "{\"foo\":42}".to_string()); } #[test] - fn utf8_chars() { - let word = "ñandú"; + fn utf8_string() { + let data = b"d3:bar4:spame"; + + let mut parser = BencodeParser::new(&data[..]); + parser.parse().unwrap(); + + assert_eq!(parser.json, "{\"bar\":\"spam\"}".to_string()); + } + + #[test] + fn non_utf8_string() { + let data = b"d3:bar2:\xFF\xFEe"; + + let mut parser = BencodeParser::new(&data[..]); + parser.parse().unwrap(); + + assert_eq!(parser.json, "{\"bar\":\"fffe\"}".to_string()); + } + } + + mod with_two_keys_of_the_same_type { + use crate::BencodeParser; + + #[test] + fn two_integers() { + // Dictionary with two integers: d3:bari42e3:fooi43ee + // 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 (pos) + // d 3 : b a r i 4 2 e 3 : f o o i 4 3 e e (byte) + // 100 51 58 98 97 114 105 52 50 101 51 58 102 111 111 105 52 51 101 101 (byte decimal) + + let data = b"d3:bari42e3:fooi43ee"; + + let mut parser = BencodeParser::new(&data[..]); + parser.parse().unwrap(); + + assert_eq!(parser.json, "{\"bar\":42,\"foo\":43}".to_string()); + } + + #[test] + #[ignore] + fn two_utf8_strings() { + // Dictionary with two UTF-8 strings: d3:bar4:spam3:foo5:alicee + // 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 (pos) + // d 3 : b a r 4 : s p a m 3 : f o o 5 : a l i c e e (byte) + // 100 51 58 98 97 114 52 58 115 112 97 109 51 58 102 111 111 53 58 97 108 105 99 101 101 (byte decimal) + + let data = b"d3:bar4:spam3:foo5:alicee"; + + let mut parser = BencodeParser::new(&data[..]); + parser.parse().unwrap(); + assert_eq!( - to_bencode(word), - format!("{}:{}", word.len(), word).as_bytes() + parser.json, + "{\"bar\":\"spam,\"foo\":\"alice\"}".to_string() ); } } diff --git a/src/main.rs b/src/main.rs index e7e5b9a..f6036f3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,31 +1,12 @@ -//! Converts Bencode to JSON. -//! -//! Usage: -//! -//! Using stdin and stdout: -//! -//! ```text -//! echo "i42e" | cargo run -//! ``` -//! -//! Using files: -//! -//! ```text -//! cargo run -- -i ./tests/fixtures/sample.bencode -o output.json -//! ``` -use bencode2json::generators::json::Generator; use clap::{Arg, Command}; use std::fs::File; use std::io::{self, Read, Write}; +use torrust_bencode2json::BencodeParser; -fn main() { - run(); -} - -fn run() { - let matches = Command::new("bencode2json") +fn main() -> io::Result<()> { + let matches = Command::new("torrust-bencode2json") .version("0.1.0") - .author("Torrust Organization") + .author("Your Name") .about("Converts Bencode to JSON") .arg( Arg::new("input") @@ -45,13 +26,7 @@ fn run() { // Handle input stream (file or stdin) let input: Box = if let Some(input_path) = matches.get_one::("input") { - match File::open(input_path) { - Ok(file) => Box::new(file), - Err(e) => { - eprintln!("Error: {e}"); - std::process::exit(1); - } - } + Box::new(File::open(input_path)?) } else { Box::new(io::stdin()) }; @@ -59,19 +34,17 @@ fn run() { // Handle output stream (file or stdout) let mut output: Box = if let Some(output_path) = matches.get_one::("output") { - match File::create(output_path) { - Ok(file) => Box::new(file), - Err(e) => { - eprintln!("Error: {e}"); - std::process::exit(1); - } - } + Box::new(File::create(output_path)?) } else { Box::new(io::stdout()) }; - if let Err(e) = Generator::new(input).write_bytes(&mut output) { - eprintln!("Error: {e}"); - std::process::exit(1); - } + let mut parser = BencodeParser::new(input); + + parser.parse()?; + + // Write the JSON value to the output + writeln!(output, "{}", parser.json)?; + + Ok(()) } diff --git a/src/rw/byte_reader.rs b/src/rw/byte_reader.rs deleted file mode 100644 index 9b1d885..0000000 --- a/src/rw/byte_reader.rs +++ /dev/null @@ -1,238 +0,0 @@ -//! A reader that reads bytes from an input. -//! -//! The input is any type that implements the `std::io::Read` trait. -use std::io::BufReader; -use std::io::Error; -use std::io::Read; - -use ringbuffer::AllocRingBuffer; -use ringbuffer::RingBuffer; - -/// A reader that reads bytes from an input. -/// -/// It's wrapper of a basic reader with extra functionality. -pub struct ByteReader { - /// It's a buffered reader. - reader: BufReader, - - /// Number of bytes read from the input. - input_byte_counter: u64, - - /// The peeked byte when we peek instead or reading. - peeked_byte: Option, - - /// The last byte read from the input. - last_byte: Option, - - /// A buffer to capture the latest bytes read from the input. - captured_bytes: AllocRingBuffer, -} - -impl ByteReader { - pub fn new(reader: R) -> Self { - Self { - reader: BufReader::new(reader), - input_byte_counter: 0, - peeked_byte: None, - last_byte: None, - captured_bytes: AllocRingBuffer::new(1024), - } - } - - /// It reads one byte from the input. - /// - /// # Errors - /// - /// Will return an error if it can't read the byte from the input. - pub fn read_byte(&mut self) -> Result { - if let Some(byte) = self.peeked_byte.take() { - return Ok(byte); - } - - let mut byte = [0; 1]; - - self.reader.read_exact(&mut byte)?; - - self.input_byte_counter += 1; - - let byte = byte[0]; - - self.last_byte = Some(byte); - self.captured_bytes.push(byte); - - Ok(byte) - } - - /// Peeks at the next byte in the input without consuming it. - /// - /// # Errors - /// - /// Will return an error if it can't read the byte from the input. - pub fn peek_byte(&mut self) -> Result { - let byte = if let Some(byte) = self.peeked_byte { - byte - } else { - let byte = self.read_byte()?; - self.peeked_byte = Some(byte); - byte - }; - - Ok(byte) - } - - /// Returns the number of bytes that have been read from the input. - pub fn input_byte_counter(&self) -> u64 { - self.input_byte_counter - } - - /// Returns a copy of the bytes that have been read from the input. - pub fn captured_bytes(&self) -> Vec { - self.captured_bytes.to_vec() - } - - /// Returns the last byte that was read from the input. - pub fn last_byte(&self) -> Option { - self.last_byte - } -} - -#[cfg(test)] -mod tests { - - mod for_reading { - use crate::rw::byte_reader::ByteReader; - - #[test] - fn it_should_read_one_byte_from_the_input_consuming_it() { - let input = vec![b'l', b'e']; - - let mut byte_reader = ByteReader::new(input.as_slice()); - - assert_eq!(byte_reader.read_byte().unwrap(), b'l'); - assert_eq!(byte_reader.read_byte().unwrap(), b'e'); - } - - #[test] - fn it_should_fail_when_there_are_no_more_bytes_to_read() { - let input = vec![b'l', b'e']; - - let mut byte_reader = ByteReader::new(input.as_slice()); - - assert_eq!(byte_reader.read_byte().unwrap(), b'l'); - assert_eq!(byte_reader.read_byte().unwrap(), b'e'); - assert!(byte_reader.read_byte().is_err()); - } - - #[test] - fn it_should_increase_the_input_byte_counter_by_one_when_reading_a_new_byte() { - let input = vec![b'l']; - - let mut byte_reader = ByteReader::new(input.as_slice()); - - assert_eq!(byte_reader.read_byte().unwrap(), b'l'); - assert_eq!(byte_reader.input_byte_counter(), 1); - } - - #[test] - fn it_should_return_the_last_read_byte() { - let input = vec![b'l', b'e']; - - let mut byte_reader = ByteReader::new(input.as_slice()); - - byte_reader.read_byte().unwrap(); - byte_reader.read_byte().unwrap(); - - assert_eq!(byte_reader.last_byte(), Some(b'e')); - } - } - - mod for_peeking { - use crate::rw::byte_reader::ByteReader; - - #[test] - fn it_should_allow_peeking_one_byte_from_the_input_without_consuming_it() { - let input = vec![b'l']; - - let mut byte_reader = ByteReader::new(input.as_slice()); - - assert_eq!(byte_reader.peek_byte().unwrap(), b'l'); - assert_eq!(byte_reader.peek_byte().unwrap(), b'l'); - } - - #[test] - fn when_reading_a_byte_it_should_use_a_peeked_one_if_there_is() { - let input = vec![b'l']; - - let mut byte_reader = ByteReader::new(input.as_slice()); - - assert_eq!(byte_reader.peek_byte().unwrap(), b'l'); - assert_eq!(byte_reader.read_byte().unwrap(), b'l'); - } - - #[test] - fn when_reading_a_byte_it_should_use_a_peeked_one_and_discard_it_after_using_it() { - let input = vec![b'l']; - - let mut byte_reader = ByteReader::new(input.as_slice()); - - assert_eq!(byte_reader.peek_byte().unwrap(), b'l'); // It peeks - assert_eq!(byte_reader.read_byte().unwrap(), b'l'); // It uses the previously peeked byte - assert!(byte_reader.peek_byte().is_err()); // There are no more bytes to peek - } - - #[test] - fn it_should_increase_the_input_byte_counter_the_first_time_it_peeks_a_new_byte() { - let input = vec![b'l']; - - let mut byte_reader = ByteReader::new(input.as_slice()); - - assert_eq!(byte_reader.peek_byte().unwrap(), b'l'); - assert_eq!(byte_reader.input_byte_counter(), 1); - } - - #[test] - fn it_should_not_increase_the_input_byte_counter_when_peeking_a_cached_peeked_byte() { - let input = vec![b'l']; - - let mut byte_reader = ByteReader::new(input.as_slice()); - - // It peeks the first time - assert_eq!(byte_reader.peek_byte().unwrap(), b'l'); - assert_eq!(byte_reader.input_byte_counter(), 1); - - // It peeks the second time - assert_eq!(byte_reader.peek_byte().unwrap(), b'l'); - assert_eq!(byte_reader.input_byte_counter(), 1); - } - } - - mod for_capturing { - use crate::rw::byte_reader::ByteReader; - - #[test] - fn it_should_capture_the_latest_read_byte() { - let input = vec![b'a']; - - let mut byte_reader = ByteReader::new(input.as_slice()); - - byte_reader.read_byte().unwrap(); - - assert_eq!(byte_reader.captured_bytes(), input); - } - - #[test] - fn it_should_capture_1024_bytes_at_the_most() { - let mut part1 = vec![b'a'; 1024]; - let part2 = vec![b'b'; 1024]; - part1.extend_from_slice(&part2); - - let mut byte_reader = ByteReader::new(part1.as_slice()); - - for _i in 1..=1024 * 2 { - byte_reader.read_byte().unwrap(); - } - - assert_eq!(byte_reader.captured_bytes(), part2); - } - } -} diff --git a/src/rw/byte_writer.rs b/src/rw/byte_writer.rs deleted file mode 100644 index c5ef312..0000000 --- a/src/rw/byte_writer.rs +++ /dev/null @@ -1,186 +0,0 @@ -//! A writer that writes bytes to an output. -//! -//! The output is any type that implements the `std::io::Write` trait. -use core::str; -use std::io::BufWriter; -use std::io::Write; - -use ringbuffer::AllocRingBuffer; -use ringbuffer::RingBuffer; - -use super::{error::Error, writer::Writer}; - -/// A writer that writes to an output implementing `std::io::Write`. -/// -/// It's wrapper of a basic writer with extra functionality. -pub struct ByteWriter { - /// It's a buffered writer. - writer: BufWriter, - - /// Number of bytes written to the output. - output_byte_counter: u64, - - /// The last byte written to the output. - last_byte: Option, - - /// A buffer to capture the latest bytes written to the output. - captured_bytes: AllocRingBuffer, -} - -impl ByteWriter { - pub fn new(writer: W) -> Self { - Self { - output_byte_counter: 0, - writer: BufWriter::new(writer), - last_byte: None, - captured_bytes: AllocRingBuffer::new(1024), - } - } - - /// Returns the number of bytes that have been written to the output. - pub fn output_byte_counter(&self) -> u64 { - self.output_byte_counter - } - - /// Returns a copy of the bytes that have been written to the output. - pub fn captured_bytes(&self) -> Vec { - self.captured_bytes.to_vec() - } - - /// Returns the last byte that was written to the output. - pub fn last_byte(&self) -> Option { - self.last_byte - } -} - -impl Writer for ByteWriter { - fn write_byte(&mut self, byte: u8) -> Result<(), Error> { - let bytes = [byte]; - - self.writer.write_all(&bytes)?; - - self.output_byte_counter += 1; - - self.last_byte = Some(byte); - - self.captured_bytes.push(byte); - - Ok(()) - } - - fn write_str(&mut self, value: &str) -> Result<(), Error> { - for byte in value.bytes() { - self.write_byte(byte)?; - } - - Ok(()) - } - - fn output_byte_counter(&self) -> u64 { - self.output_byte_counter - } - - fn captured_bytes(&self) -> Vec { - self.captured_bytes() - } -} - -#[cfg(test)] -mod tests { - - mod for_writing { - use crate::rw::{byte_writer::ByteWriter, writer::Writer}; - - #[test] - fn it_should_write_one_byte_to_the_output() { - let mut output = Vec::new(); - - let mut byte_writer = ByteWriter::new(&mut output); - - byte_writer.write_byte(b'l').unwrap(); - - drop(byte_writer); - - assert_eq!(output, vec![b'l']); - } - - #[test] - fn it_should_increase_the_output_byte_counter_by_one_after_writing_a_new_byte() { - let mut output = Vec::new(); - - let mut byte_writer = ByteWriter::new(&mut output); - - byte_writer.write_byte(b'l').unwrap(); - - assert_eq!(byte_writer.output_byte_counter(), 1); - } - - #[test] - fn it_should_write_strings_bytes_to_the_output() { - let mut output = Vec::new(); - - let mut byte_writer = ByteWriter::new(&mut output); - - byte_writer.write_str("l").unwrap(); - - drop(byte_writer); - - assert_eq!(output, vec![b'l']); - } - - #[test] - fn it_should_increase_the_output_byte_counter_by_the_string_len_after_writing_a_string() { - let mut output = Vec::new(); - - let mut byte_writer = ByteWriter::new(&mut output); - - byte_writer.write_str("le").unwrap(); - - assert_eq!(byte_writer.output_byte_counter(), 2); - } - } - - mod for_capturing { - - use crate::rw::{byte_writer::ByteWriter, writer::Writer}; - - #[test] - fn it_should_return_the_last_written_byte() { - let mut output = Vec::new(); - - let mut byte_writer = ByteWriter::new(&mut output); - - byte_writer.write_byte(b'l').unwrap(); - - assert_eq!(byte_writer.last_byte(), Some(b'l')); - } - - #[test] - fn it_should_capture_the_latest_written_bytes() { - let mut output = Vec::new(); - - let mut byte_writer = ByteWriter::new(&mut output); - - byte_writer.write_byte(b'l').unwrap(); - - assert_eq!(byte_writer.captured_bytes(), vec![b'l']); - } - - #[test] - fn it_should_capture_1024_bytes_at_the_most() { - let mut output = Vec::new(); - - let mut data = vec![b'a'; 1024]; - let last_kilobyte = vec![b'b'; 1024]; - data.extend_from_slice(&last_kilobyte); - - let mut byte_writer = ByteWriter::new(&mut output); - - for byte in data { - byte_writer.write_byte(byte).unwrap(); - } - - assert_eq!(byte_writer.captured_bytes(), last_kilobyte); - } - } -} diff --git a/src/rw/error.rs b/src/rw/error.rs deleted file mode 100644 index 6d521ca..0000000 --- a/src/rw/error.rs +++ /dev/null @@ -1,13 +0,0 @@ -//! Custom error type for both I/O and formatting strings errors. -use std::{fmt, io}; -use thiserror::Error; - -/// Custom error type for both I/O and formatting errors. -#[derive(Debug, Error)] -pub enum Error { - #[error("I/O error: {0}")] - Io(#[from] io::Error), - - #[error("Formatting error: {0}")] - Fmt(#[from] fmt::Error), -} diff --git a/src/rw/mod.rs b/src/rw/mod.rs deleted file mode 100644 index 0bbe4fd..0000000 --- a/src/rw/mod.rs +++ /dev/null @@ -1,6 +0,0 @@ -//! Read and write operations. -pub mod byte_reader; -pub mod byte_writer; -pub mod error; -pub mod string_writer; -pub mod writer; diff --git a/src/rw/string_writer.rs b/src/rw/string_writer.rs deleted file mode 100644 index f2fcf70..0000000 --- a/src/rw/string_writer.rs +++ /dev/null @@ -1,197 +0,0 @@ -//! A writer that writes to an output implementing `std::fmt::Write`. -use core::str; -use std::fmt::Write; - -use ringbuffer::{AllocRingBuffer, RingBuffer}; - -use super::{error::Error, writer::Writer}; - -/// A writer that writes to an output implementing `std::fmt::Write`. -/// -/// It's wrapper of a basic writer with extra functionality. -pub struct StringWriter { - /// A `std::fmt::Write` writer. - writer: W, - - /// Number of bytes written to the output. - output_byte_counter: u64, - - /// The last byte written to the output. - last_char: Option, - - /// A buffer to capture the latest bytes written to the output. - captured_chars: AllocRingBuffer, -} - -impl StringWriter { - pub fn new(writer: W) -> Self { - Self { - writer, - output_byte_counter: 0, - - last_char: None, - captured_chars: AllocRingBuffer::new(1024), - } - } - - /// Returns the number of bytes that have been written to the output. - pub fn output_byte_counter(&self) -> u64 { - self.output_byte_counter - } - - /// Returns a copy of the bytes that have been written to the output. - pub fn captured_chars(&self) -> Vec { - self.captured_chars.to_vec() - } - - /// Returns the last byte that was written to the output. - pub fn last_byte(&self) -> Option { - self.last_char - } -} - -impl Writer for StringWriter { - fn write_byte(&mut self, byte: u8) -> Result<(), Error> { - let c = byte as char; - - self.writer.write_char(c)?; - - self.output_byte_counter += 1; - - self.last_char = Some(c); - - self.captured_chars.push(c); - - Ok(()) - } - - fn write_str(&mut self, value: &str) -> Result<(), Error> { - self.writer.write_str(value)?; - - self.output_byte_counter += value.len() as u64; - - if let Some(last_char) = value.chars().last() { - self.last_char = Some(last_char); - } - - for c in value.chars() { - self.captured_chars.push(c); - } - - Ok(()) - } - - fn output_byte_counter(&self) -> u64 { - self.output_byte_counter - } - - fn captured_bytes(&self) -> Vec { - self.captured_chars() - .into_iter() - .flat_map(|ch| { - let mut buf = [0; 4]; - ch.encode_utf8(&mut buf).as_bytes().to_vec() - }) - .collect() - } -} - -#[cfg(test)] -mod tests { - - mod for_writing { - use crate::rw::{string_writer::StringWriter, writer::Writer}; - - #[test] - fn it_should_write_one_byte_to_the_output() { - let mut output = String::new(); - - let mut string_writer = StringWriter::new(&mut output); - - string_writer.write_byte(b'l').unwrap(); - - drop(string_writer); - - assert_eq!(output, "l"); - } - - #[test] - fn it_should_increase_the_output_byte_counter_by_one_after_writing_a_new_byte() { - let mut output = String::new(); - - let mut string_writer = StringWriter::new(&mut output); - - string_writer.write_byte(b'l').unwrap(); - - assert_eq!(string_writer.output_byte_counter(), 1); - } - - #[test] - fn it_should_write_strings_to_the_output() { - let mut output = String::new(); - - let mut string_writer = StringWriter::new(&mut output); - - string_writer.write_str("le").unwrap(); - - drop(string_writer); - - assert_eq!(output, "le"); - } - - #[test] - fn it_should_increase_the_output_byte_counter_by_the_string_len_after_writing_a_string() { - let mut output = String::new(); - - let mut string_writer = StringWriter::new(&mut output); - - string_writer.write_str("le").unwrap(); - - assert_eq!(string_writer.output_byte_counter(), 2); - } - } - - mod for_capturing { - - use crate::rw::{string_writer::StringWriter, writer::Writer}; - - #[test] - fn it_should_return_the_last_written_char() { - let mut output = String::new(); - - let mut string_writer = StringWriter::new(&mut output); - - string_writer.write_byte(b'l').unwrap(); - - assert_eq!(string_writer.last_byte(), Some('l')); - } - - #[test] - fn it_should_capture_the_latest_written_char() { - let mut output = String::new(); - - let mut string_writer = StringWriter::new(&mut output); - - string_writer.write_byte(b'l').unwrap(); - - assert_eq!(string_writer.captured_chars(), vec!['l']); - } - - #[test] - fn it_should_capture_1024_chars_at_the_most() { - let mut output = String::new(); - - let mut data = vec!['a'; 1024]; - let latest_104_chars = vec!['b'; 1024]; - data.extend_from_slice(&latest_104_chars); - - let mut string_writer = StringWriter::new(&mut output); - - for c in data { - string_writer.write_str(&c.to_string()).unwrap(); - } - - assert_eq!(string_writer.captured_chars(), latest_104_chars); - } - } -} diff --git a/src/rw/writer.rs b/src/rw/writer.rs deleted file mode 100644 index ebd9eb1..0000000 --- a/src/rw/writer.rs +++ /dev/null @@ -1,40 +0,0 @@ -//! This module contains the `Writer` trait. - -/* code-review: - - The function `write_byte` only writes: - - - Bytes used in integers: - - Digits: '0','1','2','3','4','5','6','7','8','9' - - 'e', '-' - - JSON reservers chars: '[', ',', ']', '{', ',', ':', '}' defined as constants. - - It could be refactored to be more restrictive. However, in the future we also - want to print Bencoded strings as bytes streams, without trying to convert - them into UTF-8 strings. -*/ - -use super::error::Error; - -pub trait Writer { - /// It writes one byte to the output. - /// - /// # Errors - /// - /// Will return an error if it can't write the byte. - fn write_byte(&mut self, byte: u8) -> Result<(), Error>; - - /// It writes a string to the output. - /// - /// # Errors - /// - /// Will return an error if it can't write the string. - fn write_str(&mut self, value: &str) -> Result<(), Error>; - - /// It return the number of bytes that have been written to the output. - fn output_byte_counter(&self) -> u64; - - /// It returns a copy of the latest bytes that have been written to the - /// output. - fn captured_bytes(&self) -> Vec; -} diff --git a/src/test.rs b/src/test.rs deleted file mode 100644 index fef1001..0000000 --- a/src/test.rs +++ /dev/null @@ -1,143 +0,0 @@ -//! Helpers for testing. - -/// It converts bencoded bytes into a JSON string. -/// -/// # Panics -/// -/// Will panic if the conversion fails. -#[cfg(test)] -#[must_use] -pub(crate) fn bencode_to_json_unchecked(input_buffer: &[u8]) -> String { - use crate::generators::json::Generator; - - let mut output = String::new(); - - let mut parser = Generator::new(input_buffer); - - parser - .write_str(&mut output) - .expect("Bencode to JSON conversion failed"); - - output -} - -/// Generates a vector of bytes representing `n` nested empty Bencode -/// lists. -/// -/// This function is a helper for generating a Bencode representation -/// of a JSON array with `n` nested empty arrays. It repeats the Bencode -/// list opener `l` character `n` times, and the Bencode list closer -/// `e` character `n` times, to create the desired nested structure. -/// -/// # Examples -/// -/// ```rust -/// let nested_bencode = generate_n_nested_empty_bencoded_lists(2); -/// assert_eq!(nested_bencode, b"llee"); -/// ``` -#[cfg(test)] -pub(crate) fn generate_n_nested_empty_bencoded_lists(n: usize) -> Vec { - let mut bencode_value = vec![b'l'; n]; - bencode_value.extend_from_slice(&vec![b'e'; n]); - bencode_value -} - -/// Generates a JSON array with `n` nested empty arrays. -/// -/// This function is a helper for generating a JSON array with a -/// specific number of nested empty arrays. It repeats the opening `[` -/// character `n` times, and the closing `]` character `n` times, to -/// create the desired nested structure. -/// -/// # Examples -/// -/// ```rust -/// let nested_json = generate_n_nested_empty_json_arrays(2); -/// assert_eq!(nested_json, "[[]]"); -/// ``` -#[cfg(test)] -pub(crate) fn generate_n_nested_empty_json_arrays(n: usize) -> String { - "[".repeat(n) + &"]".repeat(n) -} - -#[cfg(test)] -/// Generates a vector of bytes representing `n` nested empty Bencode -/// dictionaries. -/// -/// This function is a helper for generating a Bencode representation -/// of a JSON object with `n` nested empty objects. It repeats the Bencode -/// dictionary opener `d` character, the field key `3:foo`, and the Bencode -/// dictionary closer `e` character `n` times to create the desired nested -/// structure. -/// -/// # Examples -/// -/// ```rust -/// let nested_bencode = generate_n_nested_empty_bencoded_dictionaries(2); -/// assert_eq!(nested_bencode, b"d3:food3:foodeee"); -/// `````` -pub(crate) fn generate_n_nested_empty_bencoded_dictionaries(n: usize) -> Vec { - if n == 0 { - return b"de".to_vec(); - } - - let mut dict = vec![b'd']; // Dictionary start - dict.extend_from_slice(b"3:foo"); // Field key - dict.extend_from_slice(&generate_n_nested_empty_bencoded_dictionaries(n - 1)); - dict.extend_from_slice(b"e"); // Dictionary end - - dict -} - -#[cfg(test)] -/// Generates a JSON object with `n` nested empty objects. -/// -/// This function is a helper for generating a JSON object with a -/// specific number of nested empty objects. It repeats the opening `{` -/// character `n` times, and the closing `}` character `n` times, to -/// create the desired nested structure. -/// -/// # Examples -/// -/// ```rust -/// let nested_json = generate_n_nested_empty_json_objects(2); -/// assert_eq!(nested_json, r#"{"foo":{"foo":{}}}"#.to_string()); -/// ` -pub(crate) fn generate_n_nested_empty_json_objects(n: usize) -> String { - if n == 0 { - return "{}".to_string(); - } - - let mut object = "{".to_string(); - object.push_str(r#""foo":"#); - object.push_str(&generate_n_nested_empty_json_objects(n - 1)); - object.push('}'); - - object -} - -#[cfg(test)] -/// Generates a bencoded string with a repeated byte. -/// -/// This function creates a bencoded string where the string value consists of a -/// repeated byte. -/// -/// # Arguments -/// -/// * `byte` - The byte to repeat in the string value. -/// * `n` - The number of times to repeat the byte. -/// -/// # Returns -/// -/// A `Vec` containing the bencoded string. -pub(crate) fn bencoded_string_with_repeated_byte(byte: u8, n: usize) -> Vec { - let string_length = n.to_string().into_bytes(); - let string_value = vec![byte; n]; - - let mut bencoded_string = Vec::new(); - bencoded_string.extend_from_slice(&string_length); - bencoded_string.push(b':'); // Length/value separator - bencoded_string.extend_from_slice(&string_value); - - bencoded_string -} diff --git a/src/tokenizer/integer.rs b/src/tokenizer/integer.rs deleted file mode 100644 index 5722bcc..0000000 --- a/src/tokenizer/integer.rs +++ /dev/null @@ -1,310 +0,0 @@ -//! Bencoded integer parser. -//! -//! It reads bencoded bytes from the input and writes JSON bytes to the output. -use std::io::{self, Read}; - -use crate::rw::byte_reader::ByteReader; - -use super::{ - error::{Error, ReadContext}, - BENCODE_END_INTEGER, -}; - -/// The current state parsing the integer. -#[derive(PartialEq)] -#[allow(clippy::enum_variant_names)] -enum StateExpecting { - Start, // S - DigitOrSign, // DoS - DigitAfterSign, // DaS - DigitOrEnd, // DoE -} - -/// It parses an integer bencoded value. -/// -/// # Errors -/// -/// Will return an error if it can't read from the input or write to the -/// output. -/// -/// # Panics -/// -/// Will panic if we reach the end of the input without completing the integer -/// (without reaching the end of the integer `e`). -pub fn parse(reader: &mut ByteReader) -> Result, Error> { - let mut state = StateExpecting::Start; - let mut first_digit_is_zero = false; - let mut value = vec![]; - - loop { - let byte = next_byte(reader)?; - - let char = byte as char; - - state = match state { - StateExpecting::Start => { - // Discard the 'i' byte - StateExpecting::DigitOrSign - } - StateExpecting::DigitOrSign => { - if char == '-' { - value.push(byte); - - StateExpecting::DigitAfterSign - } else if char.is_ascii_digit() { - value.push(byte); - - if char == '0' { - first_digit_is_zero = true; - } - - StateExpecting::DigitOrEnd - } else { - return Err(Error::UnexpectedByteParsingInteger(ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - })); - } - } - StateExpecting::DigitAfterSign => { - if char.is_ascii_digit() { - value.push(byte); - - if char == '0' { - first_digit_is_zero = true; - } - - StateExpecting::DigitOrEnd - } else { - return Err(Error::UnexpectedByteParsingInteger(ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - })); - } - } - StateExpecting::DigitOrEnd => { - if char.is_ascii_digit() { - value.push(byte); - - if char == '0' && first_digit_is_zero { - return Err(Error::LeadingZerosInIntegersNotAllowed(ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - })); - } - - StateExpecting::DigitOrEnd - } else if byte == BENCODE_END_INTEGER { - return Ok(value); - } else { - return Err(Error::UnexpectedByteParsingInteger(ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - })); - } - } - }; - } -} - -/// It reads the next byte from the input. -/// -/// # Errors -/// -/// Will return an error if the end of input was reached. -fn next_byte(reader: &mut ByteReader) -> Result { - match reader.read_byte() { - Ok(byte) => Ok(byte), - Err(err) => { - if err.kind() == io::ErrorKind::UnexpectedEof { - return Err(Error::UnexpectedEndOfInputParsingInteger(ReadContext { - byte: None, - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - })); - } - Err(err.into()) - } - } -} - -#[cfg(test)] -mod tests { - use crate::{error::Error, rw::byte_reader::ByteReader}; - - use super::parse; - - fn bencode_to_json_unchecked(input_buffer: &[u8]) -> Vec { - parse_bencode(input_buffer).expect("Bencode to JSON conversion failed") - } - - fn try_bencode_to_json(input_buffer: &[u8]) -> Result, Error> { - parse_bencode(input_buffer) - } - - fn parse_bencode(input_buffer: &[u8]) -> Result, Error> { - let mut reader = ByteReader::new(input_buffer); - - parse(&mut reader) - } - - mod for_helpers { - use crate::tokenizer::integer::tests::try_bencode_to_json; - - #[test] - fn bencode_to_json_wrapper_succeeds() { - assert_eq!(try_bencode_to_json(b"i0e").unwrap(), "0".as_bytes()); - } - - #[test] - fn bencode_to_json_wrapper_fails() { - assert!(try_bencode_to_json(b"i").is_err()); - } - } - - #[test] - fn zero() { - assert_eq!(bencode_to_json_unchecked(b"i0e"), "0".as_bytes()); - } - - #[test] - fn one_digit_integer() { - assert_eq!(bencode_to_json_unchecked(b"i1e"), "1".as_bytes()); - } - - #[test] - fn two_digits_integer() { - assert_eq!(bencode_to_json_unchecked(b"i42e"), "42".as_bytes()); - } - - #[test] - fn negative_integer() { - assert_eq!(bencode_to_json_unchecked(b"i-1e"), "-1".as_bytes()); - } - - mod it_should_fail { - use std::io::{self, Read}; - - use crate::{ - error::Error, - rw::byte_reader::ByteReader, - tokenizer::integer::{parse, tests::try_bencode_to_json}, - }; - - #[test] - fn when_it_cannot_read_more_bytes_from_input() { - let unfinished_int = b"i42"; - - let result = try_bencode_to_json(unfinished_int); - - assert!(matches!( - result, - Err(Error::UnexpectedEndOfInputParsingInteger { .. }) - )); - } - - #[test] - fn when_it_finds_an_invalid_byte() { - let int_with_invalid_byte = b"iae"; - - let result = try_bencode_to_json(int_with_invalid_byte); - - assert!(matches!( - result, - Err(Error::UnexpectedByteParsingInteger { .. }) - )); - } - - #[test] - fn when_it_finds_leading_zeros() { - // Leading zeros are not allowed.Only the zero integer can start with zero. - - let int_with_invalid_byte = b"i00e"; - - let result = try_bencode_to_json(int_with_invalid_byte); - - assert!(matches!( - result, - Err(Error::LeadingZerosInIntegersNotAllowed { .. }) - )); - } - - #[test] - fn when_it_finds_leading_zeros_in_a_negative_integer() { - // Leading zeros are not allowed.Only the zero integer can start with zero. - - let int_with_invalid_byte = b"i-00e"; - - let result = try_bencode_to_json(int_with_invalid_byte); - - assert!(matches!( - result, - Err(Error::LeadingZerosInIntegersNotAllowed { .. }) - )); - } - - mod when_it_receives_a_unexpected_byte { - use crate::{error::Error, tokenizer::integer::tests::try_bencode_to_json}; - - #[test] - fn while_expecting_a_digit_or_sign() { - let int_with_invalid_byte = b"ia"; - - let result = try_bencode_to_json(int_with_invalid_byte); - - assert!(matches!( - result, - Err(Error::UnexpectedByteParsingInteger { .. }) - )); - } - - #[test] - fn while_expecting_digit_after_the_sign() { - let int_with_invalid_byte = b"i-a"; - - let result = try_bencode_to_json(int_with_invalid_byte); - - assert!(matches!( - result, - Err(Error::UnexpectedByteParsingInteger { .. }) - )); - } - - #[test] - fn while_expecting_digit_or_end() { - let int_with_invalid_byte = b"i-1a"; - - let result = try_bencode_to_json(int_with_invalid_byte); - - assert!(matches!( - result, - Err(Error::UnexpectedByteParsingInteger { .. }) - )); - } - } - - #[test] - fn when_it_receives_a_non_eof_io_error() { - struct FaultyReader; - - impl Read for FaultyReader { - fn read(&mut self, _buf: &mut [u8]) -> io::Result { - Err(io::Error::new( - io::ErrorKind::PermissionDenied, - "Permission denied", - )) - } - } - - let mut reader = ByteReader::new(FaultyReader); - - let result = parse(&mut reader); - - assert!(matches!(result, Err(Error::Io(_)))); - } - } -} diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs deleted file mode 100644 index cd13a45..0000000 --- a/src/tokenizer/mod.rs +++ /dev/null @@ -1,153 +0,0 @@ -//! Bencode tokenizer. Given an input stream, it returns a stream of tokens. -pub mod integer; -pub mod string; - -use std::io::{self, Read}; - -use super::error::{self, ReadContext}; - -use crate::rw::byte_reader::ByteReader; - -// todo: Implement trait Iterator for tokenizer. - -// Bencoded reserved bytes -const BENCODE_BEGIN_INTEGER: u8 = b'i'; -pub const BENCODE_END_INTEGER: u8 = b'e'; -const BENCODE_BEGIN_LIST: u8 = b'l'; -const BENCODE_BEGIN_DICT: u8 = b'd'; -const BENCODE_END_LIST_OR_DICT: u8 = b'e'; - -#[derive(Debug, PartialEq)] -pub enum BencodeToken { - Integer(Vec), - String(Vec), - BeginList, - BeginDict, - EndListOrDict, - LineBreak, -} - -pub struct Tokenizer { - byte_reader: ByteReader, -} - -impl Tokenizer { - pub fn new(reader: R) -> Self { - Tokenizer { - byte_reader: ByteReader::new(reader), - } - } - - /// It parses the next bencoded token from input. - /// - /// # Errors - /// - /// Will return an error if: - /// - /// - It can't read from the input. - pub fn next_token(&mut self) -> Result, error::Error> { - match Self::peek_byte(&mut self.byte_reader)? { - Some(peeked_byte) => { - match peeked_byte { - BENCODE_BEGIN_INTEGER => { - let value = integer::parse(&mut self.byte_reader)?; - Ok(Some(BencodeToken::Integer(value))) - } - b'0'..=b'9' => { - let value = string::parse(&mut self.byte_reader)?; - Ok(Some(BencodeToken::String(value))) - } - BENCODE_BEGIN_LIST => { - let _byte = Self::read_peeked_byte(peeked_byte, &mut self.byte_reader)?; - Ok(Some(BencodeToken::BeginList)) - } - BENCODE_BEGIN_DICT => { - let _byte = Self::read_peeked_byte(peeked_byte, &mut self.byte_reader)?; - Ok(Some(BencodeToken::BeginDict)) - } - BENCODE_END_LIST_OR_DICT => { - let _byte = Self::read_peeked_byte(peeked_byte, &mut self.byte_reader)?; - Ok(Some(BencodeToken::EndListOrDict)) - } - b'\n' => { - // todo: we should not return any token and continue to the next token. - // Ignore line breaks at the beginning, the end, or between values - let _byte = Self::read_peeked_byte(peeked_byte, &mut self.byte_reader)?; - Ok(Some(BencodeToken::LineBreak)) - } - _ => Err(error::Error::UnrecognizedFirstBencodeValueByte( - ReadContext { - byte: Some(peeked_byte), - pos: self.byte_reader.input_byte_counter(), - latest_bytes: self.byte_reader.captured_bytes(), - }, - )), - } - } - None => Ok(None), - } - } - - /// It reads the next byte from the input consuming it. It returns `None` if - /// the input has ended. - /// - /// # Errors - /// - /// Will return and errors if: - /// - /// - It can't read from the input. - /// - The byte read is not the expected one (the previously peeked byte). - fn read_peeked_byte( - peeked_byte: u8, - reader: &mut ByteReader, - ) -> Result, error::Error> { - match reader.read_byte() { - Ok(byte) => { - if byte == peeked_byte { - return Ok(Some(byte)); - } - Err(error::Error::ReadByteAfterPeekingDoesMatchPeekedByte( - ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - )) - } - Err(err) => { - if err.kind() == io::ErrorKind::UnexpectedEof { - return Ok(None); - } - Err(err.into()) - } - } - } - - /// It peeks the next byte from the input without consuming it. It returns - /// `None` if the input has ended. - /// - /// # Errors - /// - /// Will return and errors if it can't read from the input. - fn peek_byte(reader: &mut ByteReader) -> Result, error::Error> { - match reader.peek_byte() { - Ok(byte) => Ok(Some(byte)), - Err(err) => { - if err.kind() == io::ErrorKind::UnexpectedEof { - return Ok(None); - } - Err(err.into()) - } - } - } - - /// Returns the number of bytes that have been read from the input. - pub fn input_byte_counter(&self) -> u64 { - self.byte_reader.input_byte_counter() - } - - /// Returns a copy of the bytes that have been read from the input. - pub fn captured_bytes(&self) -> Vec { - self.byte_reader.captured_bytes() - } -} diff --git a/src/tokenizer/string.rs b/src/tokenizer/string.rs deleted file mode 100644 index 5d23c95..0000000 --- a/src/tokenizer/string.rs +++ /dev/null @@ -1,463 +0,0 @@ -//! Bencoded string parser. -//! -//! It reads bencoded bytes from the input and writes JSON bytes to the output. -use std::io::{self, Read}; - -use crate::rw::byte_reader::ByteReader; - -/* todo: Optimize UTF-8 conversion. Try to convert to string partially and stop - converting if we reach a point when input is not valid UTF-8 anymore. This - way we don't consume more memory and we can print the bytes directly to the - output from that point on. -*/ - -use core::str; - -use super::error::{Error, ReadContext}; - -/// It parses a string bencoded value. -/// -/// # Errors -/// -/// Will return an error if it can't read from the input. -/// -/// # Panics -/// -/// Will panic if we reach the end of the input without completing the string. -pub fn parse(reader: &mut ByteReader) -> Result, Error> { - let mut string_parser = StringParser::default(); - string_parser.parse(reader) -} - -/// Strings bencode format have two parts: `length:value`. -/// -/// - Length is a sequence of bytes (only digits 0..9). -/// - Value is an arbitrary sequence of bytes (not only valid UTF-8). -#[derive(Default, Debug)] -#[allow(clippy::module_name_repetitions)] -struct StringParser { - /// The final parsed string. - parsed_value: String, -} - -impl StringParser { - fn parse(&mut self, reader: &mut ByteReader) -> Result, Error> { - let mut length = Length::default(); - - length.parse(reader)?; - - let mut value = Value::new(length.number); - - let value_bytes = value.parse(reader)?; - - self.parsed_value = value.utf8(); - - Ok(value_bytes) - } -} - -#[derive(Default, Debug)] -struct Length { - /// A list of parsed bytes. It's only for debugging. - bytes: Vec, - - /// The parsed length at the current read digit. - number: usize, -} - -impl Length { - const END_OF_STRING_LENGTH_BYTE: u8 = b':'; - - fn parse(&mut self, reader: &mut ByteReader) -> Result<(), Error> { - loop { - let byte = Self::next_byte(reader)?; - - match byte { - Self::END_OF_STRING_LENGTH_BYTE => { - break; - } - _ => { - self.add_byte(byte, reader)?; - } - } - } - - Ok(()) - } - - /// It reads the next byte from the input. - /// - /// # Errors - /// - /// Will return an error if the end of input was reached. - fn next_byte(reader: &mut ByteReader) -> Result { - match reader.read_byte() { - Ok(byte) => Ok(byte), - Err(err) => { - if err.kind() == io::ErrorKind::UnexpectedEof { - return Err(Error::UnexpectedEndOfInputParsingStringLength( - ReadContext { - byte: None, - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - }, - )); - } - Err(err.into()) - } - } - } - - /// It adds a new byte (digit) to the string length. - /// - /// # Errors - /// - /// Will return an error if the byte is not a digit (0..9). - fn add_byte(&mut self, byte: u8, reader: &mut ByteReader) -> Result<(), Error> { - if !byte.is_ascii_digit() { - return Err(Error::InvalidStringLengthByte(ReadContext { - byte: Some(byte), - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - })); - } - - self.bytes.push(byte); - - self.add_digit_to_length(Self::byte_to_digit(byte)); - - Ok(()) - } - - /// It converts a byte containing an ASCII digit into a number `usize`. - fn byte_to_digit(byte: u8) -> usize { - (byte - b'0') as usize - } - - /// It adds the new digit to the number. - fn add_digit_to_length(&mut self, digit: usize) { - self.number = (self.number * 10) + digit; - } -} - -#[derive(Debug)] -struct Value { - length: usize, - bytes: Vec, - bytes_counter: usize, -} - -impl Value { - fn new(length: usize) -> Self { - Self { - length, - bytes: vec![], - bytes_counter: 0, - } - } - - fn parse(&mut self, reader: &mut ByteReader) -> Result, Error> { - for _i in 1..=self.length { - self.add_byte(Self::next_byte(reader)?); - } - - Ok(self.bytes.clone()) - } - - /// It reads the next byte from the input. - /// - /// # Errors - /// - /// Will return an error if the end of input was reached. - fn next_byte(reader: &mut ByteReader) -> Result { - match reader.read_byte() { - Ok(byte) => Ok(byte), - Err(err) => { - if err.kind() == io::ErrorKind::UnexpectedEof { - return Err(Error::UnexpectedEndOfInputParsingStringValue(ReadContext { - byte: None, - pos: reader.input_byte_counter(), - latest_bytes: reader.captured_bytes(), - })); - } - Err(err.into()) - } - } - } - - fn add_byte(&mut self, byte: u8) { - self.bytes.push(byte); - self.bytes_counter += 1; - } - - fn utf8(&self) -> String { - match str::from_utf8(&self.bytes) { - Ok(string) => { - // String only contains valid UTF-8 chars -> print it as it's - format!("{}", string.to_owned()) - } - Err(_) => { - // String contains non valid UTF-8 chars -> print it as hex bytes - Self::bytes_to_hex(&self.bytes) - } - } - } - - fn bytes_to_hex(data: &[u8]) -> String { - format!("{}", hex::encode(data)) - } -} - -#[cfg(test)] -mod tests { - use crate::{error::Error, rw::byte_reader::ByteReader}; - - use super::parse; - - fn bencode_to_json_unchecked(input_buffer: &[u8]) -> Vec { - parse_bencode(input_buffer).expect("Bencode to JSON conversion failed") - } - - fn try_bencode_to_json(input_buffer: &[u8]) -> Result, Error> { - parse_bencode(input_buffer) - } - - fn parse_bencode(input_buffer: &[u8]) -> Result, Error> { - let mut reader = ByteReader::new(input_buffer); - parse(&mut reader) - } - - mod for_helpers { - use crate::tokenizer::string::tests::try_bencode_to_json; - - #[test] - fn bencode_to_json_wrapper_succeeds() { - assert_eq!(try_bencode_to_json(b"4:spam").unwrap(), r"spam".as_bytes()); - } - - #[test] - fn bencode_to_json_wrapper_fails() { - assert!(try_bencode_to_json(b"4:").is_err()); - } - } - - #[test] - fn length_can_contain_leading_zeros() { - assert_eq!(bencode_to_json_unchecked(b"00:"), r"".as_bytes()); - } - - #[test] - fn empty_string() { - assert_eq!(bencode_to_json_unchecked(b"0:"), r"".as_bytes()); - } - - #[test] - fn string_with_tags() { - assert_eq!( - bencode_to_json_unchecked(b"8:"), - r"".as_bytes() - ); - } - - #[test] - fn utf8() { - assert_eq!(bencode_to_json_unchecked(b"4:spam"), r"spam".as_bytes()); - } - - #[test] - fn non_utf8() { - assert_eq!( - bencode_to_json_unchecked(b"4:\xFF\xFE\xFD\xFC"), - vec![0xFF, 0xFE, 0xFD, 0xFC] - ); - } - - #[test] - fn ending_with_bencode_end_char() { - assert_eq!(bencode_to_json_unchecked(b"1:e"), r"e".as_bytes()); - } - - #[test] - fn containing_a_reserved_char() { - assert_eq!(bencode_to_json_unchecked(b"1:i"), r"i".as_bytes()); - assert_eq!(bencode_to_json_unchecked(b"1:l"), r"l".as_bytes()); - assert_eq!(bencode_to_json_unchecked(b"1:d"), r"d".as_bytes()); - assert_eq!(bencode_to_json_unchecked(b"1:l"), r"l".as_bytes()); - assert_eq!(bencode_to_json_unchecked(b"1:e"), r"e".as_bytes()); - } - - #[test] - fn containing_a_digit() { - assert_eq!(bencode_to_json_unchecked(b"1:0"), r"0".as_bytes()); - assert_eq!(bencode_to_json_unchecked(b"1:1"), r"1".as_bytes()); - assert_eq!(bencode_to_json_unchecked(b"1:2"), r"2".as_bytes()); - assert_eq!(bencode_to_json_unchecked(b"1:3"), r"3".as_bytes()); - assert_eq!(bencode_to_json_unchecked(b"1:4"), r"4".as_bytes()); - assert_eq!(bencode_to_json_unchecked(b"1:5"), r"5".as_bytes()); - assert_eq!(bencode_to_json_unchecked(b"1:6"), r"6".as_bytes()); - assert_eq!(bencode_to_json_unchecked(b"1:7"), r"7".as_bytes()); - assert_eq!(bencode_to_json_unchecked(b"1:8"), r"8".as_bytes()); - assert_eq!(bencode_to_json_unchecked(b"1:9"), r"9".as_bytes()); - } - - mod should_escape_json { - use crate::{test::bencode_to_json_unchecked, to_bencode}; - - #[test] - fn containing_a_double_quote() { - assert_eq!( - bencode_to_json_unchecked("1:\"".as_bytes()), - r#""\"""#.to_string() - ); - } - - #[test] - fn containing_backslashes() { - assert_eq!( - bencode_to_json_unchecked("1:\\".as_bytes()), - r#""\\""#.to_string() - ); - } - - #[test] - fn containing_control_characters() { - assert_eq!( - bencode_to_json_unchecked("1:\n".as_bytes()), - r#""\n""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked("1:\r".as_bytes()), - r#""\r""#.to_string() - ); - assert_eq!( - bencode_to_json_unchecked("1:\t".as_bytes()), - r#""\t""#.to_string() - ); - } - - #[test] - fn containing_unicode_characters() { - assert_eq!( - bencode_to_json_unchecked(&to_bencode("ñandú")), - r#""ñandú""#.to_string() - ); - } - - #[test] - fn containing_non_unicode_characters() { - assert_eq!( - bencode_to_json_unchecked(&[b'4', b':', 0x80, 0xFF, 0x00, 0xAB]), - r#""80ff00ab""#.to_string() - ); - } - } - - mod it_should_fail_parsing_when { - use std::io::{self, Read}; - - use crate::{ - error::Error, - rw::byte_reader::ByteReader, - tokenizer::string::{parse, tests::try_bencode_to_json}, - }; - - #[test] - fn it_reaches_the_end_of_the_input_parsing_the_string_length() { - let incomplete_string_length = b"4"; - - let result = try_bencode_to_json(incomplete_string_length); - - assert!(matches!( - result, - Err(Error::UnexpectedEndOfInputParsingStringLength { .. }) - )); - } - - #[test] - fn it_reaches_the_end_of_the_input_parsing_the_string_value() { - let incomplete_string_value = b"4:123"; - - let result = try_bencode_to_json(incomplete_string_value); - - assert!(matches!( - result, - Err(Error::UnexpectedEndOfInputParsingStringValue { .. }) - )); - } - - #[test] - fn it_receives_a_non_digit_byte_in_the_string_length() { - let incomplete_string_value = b"4a:1234"; - - let result = try_bencode_to_json(incomplete_string_value); - - assert!(matches!(result, Err(Error::InvalidStringLengthByte { .. }))); - } - - /// Fake reader that fails after reading a certain number of bytes - struct FaultyReader { - /// The bytes the reader will return - bytes: Vec, - - /// The position in the bytes vector where the reader will fail - fail_in_pos: usize, - - /// The current number of bytes read - counter: usize, - } - - impl FaultyReader { - fn new(bytes: Vec, fail_in_pos: usize) -> Self { - Self { - bytes, - fail_in_pos, - counter: 0, - } - } - } - - impl Read for FaultyReader { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - // Fail exactly at the position set by `fail_in_pos` - if self.counter >= self.fail_in_pos { - return Err(io::Error::new( - io::ErrorKind::PermissionDenied, - "Permission denied", - )); - } - - // Check if we have any bytes left to read - if self.counter >= self.bytes.len() { - return Ok(0); // No more bytes to read (EOF) - } - - // Write one byte at a time to the buffer - buf[0] = self.bytes[self.counter]; - - // Increment the counter to reflect one byte read - self.counter += 1; - - // Return that we read exactly 1 byte - Ok(1) - } - } - - #[test] - fn it_cannot_read_more_bytes_without_finishing_parsing_the_string_length() { - let mut reader = ByteReader::new(FaultyReader::new(b"4:spam".to_vec(), 1)); - - let result = parse(&mut reader); - - assert!(matches!(result, Err(Error::Io(_)))); - } - - #[test] - fn it_cannot_read_more_bytes_without_finishing_parsing_the_string_value() { - let mut reader = ByteReader::new(FaultyReader::new(b"4:spam".to_vec(), 3)); - - let result = parse(&mut reader); - - assert!(matches!(result, Err(Error::Io(_)))); - } - } -} diff --git a/tests/fixtures/ubuntu-23.04-desktop-amd64.iso.torrent b/tests/fixtures/ubuntu-23.04-desktop-amd64.iso.torrent new file mode 100644 index 0000000..f5a8ec2 Binary files /dev/null and b/tests/fixtures/ubuntu-23.04-desktop-amd64.iso.torrent differ diff --git a/tests/integration.rs b/tests/integration.rs index a72a2a8..4a76589 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -1,97 +1,24 @@ -mod it_should { - - use assert_cmd::Command; - use predicates::prelude::*; - use std::fs; - use tempfile::tempdir; - - #[test] - fn read_from_stdin_and_write_to_stdout() { - let mut cmd = Command::cargo_bin("bencode2json").unwrap(); - cmd.write_stdin("4:spam") - .assert() - .success() - .stdout(r#""spam""#); - } - - #[test] - fn read_from_a_file_and_write_to_a_file() { - let temp_dir = tempdir().unwrap(); - - let output_file = temp_dir.path().join("output.json"); - - let mut cmd = Command::cargo_bin("bencode2json").unwrap(); - - cmd.arg("-i") - .arg("tests/fixtures/sample.bencode") - .arg("-o") - .arg(output_file.to_str().unwrap()) - .assert() - .success(); - - let output_content = fs::read_to_string(output_file).expect("Failed to read output file"); - - assert_eq!(output_content.trim(), r#"["spam"]"#); - } - - #[test] - fn create_the_output_file_if_it_does_not_exist() { - let temp_dir = tempdir().unwrap(); - - let output_file = temp_dir.path().join("new_file.json"); - - let mut cmd = Command::cargo_bin("bencode2json").unwrap(); - - cmd.arg("-i") - .arg("tests/fixtures/sample.bencode") - .arg("-o") - .arg(output_file.to_str().unwrap()) - .assert() - .success(); - - let output_content = fs::read_to_string(output_file).expect("Failed to read output file"); - - assert_eq!(output_content.trim(), r#"["spam"]"#); - } - - #[test] - fn fail_when_the_bencoded_input_is_invalid() { - let mut cmd = Command::cargo_bin("bencode2json").unwrap(); - cmd.write_stdin("a") - .assert() - .failure() - .stderr(predicate::str::contains("Error: Unrecognized first")); - } - - #[test] - fn fail_reading_from_non_existing_file() { - let temp_dir = tempdir().unwrap(); - - let output_file = temp_dir.path().join("output.json"); - - let mut cmd = Command::cargo_bin("bencode2json").unwrap(); - - cmd.arg("-i") - .arg("non_existing_file.bencode") - .arg("-o") - .arg(output_file.to_str().unwrap()) - .assert() - .failure(); - } - - #[test] - fn fail_creating_the_output_file_if_the_dir_does_not_exist() { - let temp_dir = tempdir().unwrap(); - - let output_file = temp_dir.path().join("non_existing_dir/new_file.json"); - - let mut cmd = Command::cargo_bin("bencode2json").unwrap(); +use assert_cmd::Command; + +#[test] +fn test_from_file() { + let mut cmd = Command::cargo_bin("torrust-bencode2json").unwrap(); + cmd.arg("-i") + .arg("tests/fixtures/sample.bencode") + .arg("-o") + .arg("output.json") + .assert() + .success(); + + // todo: check contents + // Read the file. It should contain: ["spam"] +} - cmd.arg("-i") - .arg("tests/fixtures/sample.bencode") - .arg("-o") - .arg(output_file.to_str().unwrap()) - .assert() - .failure(); - } +#[test] +fn test_stdin_stdout() { + let mut cmd = Command::cargo_bin("torrust-bencode2json").unwrap(); + cmd.write_stdin("4:spam") + .assert() + .success() + .stdout("\"spam\"\n"); }