diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index ad371798fd..c08369020c 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -10,5 +10,5 @@ contact_links: url: https://robotics.stackexchange.com/questions/ask about: Get help from the robotics community - name: "💬 Live chat" - url: https://foxglove.dev/slack - about: Join the discussion in our Slack community + url: https://foxglove.dev/chat + about: Join the discussion in our Discord community diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8e9eff44b6..bb6d09b145 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,7 +11,7 @@ jobs: spellcheck: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: lfs: true - run: corepack enable @@ -26,7 +26,7 @@ jobs: conformance-lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: lfs: true - run: corepack enable @@ -41,7 +41,7 @@ jobs: conformance-cpp: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: lfs: true - run: corepack enable @@ -61,7 +61,7 @@ jobs: conformance-go: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: lfs: true - run: corepack enable @@ -80,7 +80,7 @@ jobs: conformance-python: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: lfs: true - run: corepack enable @@ -99,7 +99,7 @@ jobs: conformance-typescript: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: lfs: true - run: corepack enable @@ -114,7 +114,7 @@ jobs: conformance-kaitai-struct: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: lfs: true - run: corepack enable @@ -129,7 +129,7 @@ jobs: conformance-swift: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: lfs: true - run: corepack enable @@ -148,7 +148,7 @@ jobs: conformance-rust: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: lfs: true - run: corepack enable @@ -160,7 +160,8 @@ jobs: with: toolchain: stable default: true - - run: cd rust && cargo build --example=conformance_reader + - run: cargo build --example=conformance_reader --example=conformance_reader_async --features=tokio + working-directory: rust - run: yarn install --immutable - run: yarn test:conformance:generate-inputs --verify - run: yarn test:conformance --runner rust- @@ -171,7 +172,7 @@ jobs: run: working-directory: cpp steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: lfs: true - uses: actions/cache@v4 @@ -190,7 +191,7 @@ jobs: run: working-directory: cpp steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: lfs: true - uses: actions/cache@v4 @@ -212,7 +213,7 @@ jobs: id-token: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: lfs: true - run: corepack enable @@ -269,7 +270,7 @@ jobs: typescript-examples: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: lfs: true - run: corepack enable @@ -299,7 +300,7 @@ jobs: permissions: id-token: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: lfs: true - uses: actions/setup-python@v5 @@ -380,14 +381,14 @@ jobs: run: working-directory: go steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: lfs: true - uses: actions/setup-go@v5 with: go-version-file: go/go.work - name: install golangci-lint - run: go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.56.2 + run: go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.59.1 - run: make lint - run: make test - name: Check library version @@ -438,7 +439,7 @@ jobs: env: ${{ matrix.env }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: "0" - run: git fetch --depth=1 origin +refs/tags/*:refs/tags/* @@ -465,7 +466,7 @@ jobs: swift: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: lfs: true - uses: swift-actions/setup-swift@v2 @@ -482,7 +483,7 @@ jobs: run: working-directory: rust steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: lfs: true - uses: actions-rs/toolchain@v1 @@ -490,13 +491,19 @@ jobs: toolchain: stable default: true components: "rustfmt, clippy" + - run: rustup target add wasm32-unknown-unknown - run: cargo fmt --all -- --check - run: cargo clippy -- --no-deps - run: cargo clippy --no-default-features -- --no-deps - run: cargo clippy --no-default-features --features lz4 -- --no-deps - run: cargo clippy --no-default-features --features zstd -- --no-deps - - run: cargo build - - run: cargo test + - run: cargo clippy --no-default-features --features tokio -- --no-deps + - run: cargo clippy --no-default-features --features tokio,lz4 -- --no-deps + - run: cargo clippy --no-default-features --features tokio,zstd -- --no-deps + - run: cargo build --all-features + - run: cargo test --all-features + - run: cargo build --all-features --target wasm32-unknown-unknown + - run: cargo check --all-features --target wasm32-unknown-unknown - name: "publish to crates.io" if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/releases/rust/v') run: cargo publish --token ${{ secrets.RUST_CRATES_IO_TOKEN }} diff --git a/.github/workflows/website.yml b/.github/workflows/website.yml index e732b0c377..97e4043abb 100644 --- a/.github/workflows/website.yml +++ b/.github/workflows/website.yml @@ -3,8 +3,6 @@ name: Website on: push: branches: [main] - # deploy the website for every new MCAP CLI release, so the version gets populated in CLI installation docs - tags: ["releases/mcap-cli/*"] pull_request: branches: ["*"] @@ -12,12 +10,9 @@ jobs: docs-home: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: lfs: true - # https://github.com/actions/checkout/issues/701 - causes tags to be fetched, which - # are needed to determine the latest MCAP CLI release - fetch-depth: 0 - run: corepack enable - uses: actions/setup-node@v4 @@ -42,7 +37,7 @@ jobs: docs-cpp: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - run: make -C cpp ci-docs @@ -57,7 +52,7 @@ jobs: docs-python: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: @@ -78,7 +73,7 @@ jobs: docs-swift: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: swift-actions/setup-swift@v2 with: @@ -117,7 +112,7 @@ jobs: deployments: write steps: # need checkout so that cloudflare can detect git commit - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/download-artifact@v4 diff --git a/.vscode/settings.json b/.vscode/settings.json index 92e718ef0d..05b6fab2c7 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -44,5 +44,11 @@ // https://github.com/microsoft/vscode-cpptools/issues/722 "C_Cpp.autoAddFileAssociations": false, - "C_Cpp.default.cppStandard": "c++17" + "C_Cpp.default.cppStandard": "c++17", + "[go]": { + "editor.defaultFormatter": "golang.go" + }, + "[rust]": { + "editor.defaultFormatter": "rust-lang.rust-analyzer" + } } diff --git a/README.md b/README.md index ba8760c7a6..f59379e2e8 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ Download the latest `mcap-cli` version from the [releases page](https://github.c ## License -[MIT License](/LICENSE). Contributors are required to accept the [Contributor License Agreement](https://github.com/foxglove/cla). +[MIT License](/LICENSE). ## Release process @@ -93,3 +93,25 @@ Tag a release matching the version number `releases/swift/vX.Y.Z` 1. Update the version in rust/Cargo.toml 2. Tag a release matching the version number `releases/rust/vX.Y.Z` + +## Citations + +If you use MCAP in your research, please cite it in your work. Our suggested +citation format is: + +``` +@software{MCAP, + title = {MCAP: serialization-agnostic log container file format}, + author = {{Foxglove Developers}}, + url = {https://mcap.dev}, + version = {your version}, + date = {your date of access}, + year = {2024}, + publisher = {{Foxglove Technologies}}, + note = {Available from https://github.com/foxglove/mcap} +} +``` + +Please replace the version and date fields with the version of the software you +used, and the date you obtained it. Citing MCAP will help spread awareness of +the project and strengthen the ecosystem. diff --git a/cpp/README.md b/cpp/README.md index fb09b4dbb3..a986e72cad 100644 --- a/cpp/README.md +++ b/cpp/README.md @@ -86,9 +86,18 @@ following dependencies: If your project does not need `lz4` or `zstd` support, you can optionally disable these by defining `MCAP_COMPRESSION_NO_LZ4` or `MCAP_COMPRESSION_NO_ZSTD` respectively. +### Conan + To simplify installation of dependencies, the [Conan](https://conan.io/) package manager can be used with the included [conanfile.py](https://github.com/foxglove/mcap/blob/main/cpp/mcap/conanfile.py). + +### CMake + +For using MCAP with CMake, the third-party [olympus-robotics/mcap_builder](https://github.com/olympus-robotics/mcap_builder) repository provides a helpful wrapper. + +### Alternatives + If you use an alternative approach, such as CMake's FetchContent or directly vendoring the dependencies, make sure you use versions equal or greater than the versions listed above. diff --git a/cpp/bench/conanfile.py b/cpp/bench/conanfile.py index 57c7862ee2..ffc1efff3f 100644 --- a/cpp/bench/conanfile.py +++ b/cpp/bench/conanfile.py @@ -4,7 +4,7 @@ class McapBenchmarksConan(ConanFile): settings = "os", "compiler", "build_type", "arch" generators = "cmake" - requires = "benchmark/1.7.0", "mcap/1.4.0" + requires = "benchmark/1.7.0", "mcap/1.4.1" def build(self): cmake = CMake(self) diff --git a/cpp/build-docs.sh b/cpp/build-docs.sh index b93d09141b..f2e94fc348 100755 --- a/cpp/build-docs.sh +++ b/cpp/build-docs.sh @@ -4,7 +4,7 @@ set -e conan config init -conan editable add ./mcap mcap/1.4.0 +conan editable add ./mcap mcap/1.4.1 conan install docs --install-folder docs/build/Release \ -s compiler.cppstd=17 -s build_type=Release --build missing diff --git a/cpp/build.sh b/cpp/build.sh index 77ec913e08..9831bd1e1b 100755 --- a/cpp/build.sh +++ b/cpp/build.sh @@ -4,7 +4,7 @@ set -e conan config init -conan editable add ./mcap mcap/1.4.0 +conan editable add ./mcap mcap/1.4.1 conan install test --install-folder test/build/Debug \ -s compiler.cppstd=17 -s build_type=Debug --build missing diff --git a/cpp/docs/conanfile.py b/cpp/docs/conanfile.py index 8545b8e193..d1645bcff8 100644 --- a/cpp/docs/conanfile.py +++ b/cpp/docs/conanfile.py @@ -4,7 +4,7 @@ class McapDocsConan(ConanFile): settings = "os", "compiler", "build_type", "arch" generators = "cmake" - requires = "mcap/1.4.0" + requires = "mcap/1.4.1" def build(self): cmake = CMake(self) diff --git a/cpp/examples/conanfile.py b/cpp/examples/conanfile.py index 0e8845d226..b3f682916d 100644 --- a/cpp/examples/conanfile.py +++ b/cpp/examples/conanfile.py @@ -5,7 +5,7 @@ class McapExamplesConan(ConanFile): settings = "os", "compiler", "build_type", "arch" generators = "cmake" requires = [ - "mcap/1.4.0", + "mcap/1.4.1", "protobuf/3.21.1", "nlohmann_json/3.10.5", "catch2/2.13.8", diff --git a/cpp/examples/protobuf/dynamic_reader.cpp b/cpp/examples/protobuf/dynamic_reader.cpp index 5c38547765..2476415d1a 100644 --- a/cpp/examples/protobuf/dynamic_reader.cpp +++ b/cpp/examples/protobuf/dynamic_reader.cpp @@ -8,6 +8,7 @@ #include #include "mcap/reader.hpp" +#include #include namespace gp = google::protobuf; @@ -82,7 +83,7 @@ int main(int argc, char** argv) { return 1; } } - gp::Message* message = protoFactory.GetPrototype(descriptor)->New(); + auto message = std::unique_ptr(protoFactory.GetPrototype(descriptor)->New()); if (!message->ParseFromArray(it->message.data, static_cast(it->message.dataSize))) { std::cerr << "failed to parse message using included foxglove.PointCloud schema" << std::endl; reader.close(); diff --git a/cpp/examples/protobuf/writer.cpp b/cpp/examples/protobuf/writer.cpp index a3c28a2481..645814c54c 100644 --- a/cpp/examples/protobuf/writer.cpp +++ b/cpp/examples/protobuf/writer.cpp @@ -156,7 +156,6 @@ int main(int argc, char** argv) { if (!res.ok()) { std::cerr << "Failed to write message: " << res.message << "\n"; writer.terminate(); - writer.close(); std::ignore = std::remove(outputFilename); return 1; } diff --git a/cpp/mcap/conanfile.py b/cpp/mcap/conanfile.py index dc9bc27158..0c9d7b8278 100644 --- a/cpp/mcap/conanfile.py +++ b/cpp/mcap/conanfile.py @@ -3,7 +3,7 @@ class McapConan(ConanFile): name = "mcap" - version = "1.4.0" + version = "1.4.1" url = "https://github.com/foxglove/mcap" homepage = "https://github.com/foxglove/mcap" description = "A C++ implementation of the MCAP file format" diff --git a/cpp/mcap/include/mcap/internal.hpp b/cpp/mcap/include/mcap/internal.hpp index 4faedd0b27..69b1dd9d43 100644 --- a/cpp/mcap/include/mcap/internal.hpp +++ b/cpp/mcap/include/mcap/internal.hpp @@ -138,7 +138,11 @@ inline Status ParseByteArray(const std::byte* data, uint64_t maxSize, ByteArray* return Status(StatusCode::InvalidRecord, msg); } output->resize(size); - std::memcpy(output->data(), data + 4, size); + // output->data() may return nullptr if 'output' is empty, but memcpy() does not accept nullptr. + // 'output' will be empty only if the 'size' is equal to 0. + if (size > 0) { + std::memcpy(output->data(), data + 4, size); + } return StatusCode::Success; } diff --git a/cpp/mcap/include/mcap/reader.hpp b/cpp/mcap/include/mcap/reader.hpp index 907f9450e9..4bdefc65c6 100644 --- a/cpp/mcap/include/mcap/reader.hpp +++ b/cpp/mcap/include/mcap/reader.hpp @@ -493,8 +493,6 @@ class MCAP_PUBLIC McapReader final { std::unordered_map channels_; ByteOffset dataStart_ = 0; ByteOffset dataEnd_ = EndOffset; - Timestamp startTime_ = 0; - Timestamp endTime_ = 0; bool parsedSummary_ = false; void reset_(); diff --git a/cpp/mcap/include/mcap/reader.inl b/cpp/mcap/include/mcap/reader.inl index 8f3ab8b815..ec1343de9a 100644 --- a/cpp/mcap/include/mcap/reader.inl +++ b/cpp/mcap/include/mcap/reader.inl @@ -363,8 +363,6 @@ void McapReader::reset_() { channels_.clear(); dataStart_ = 0; dataEnd_ = EndOffset; - startTime_ = 0; - endTime_ = 0; parsedSummary_ = false; } diff --git a/cpp/mcap/include/mcap/types.hpp b/cpp/mcap/include/mcap/types.hpp index 555a020ace..16059dff6d 100644 --- a/cpp/mcap/include/mcap/types.hpp +++ b/cpp/mcap/include/mcap/types.hpp @@ -13,7 +13,7 @@ namespace mcap { -#define MCAP_LIBRARY_VERSION "1.4.0" +#define MCAP_LIBRARY_VERSION "1.4.1" using SchemaId = uint16_t; using ChannelId = uint16_t; diff --git a/cpp/mcap/include/mcap/writer.inl b/cpp/mcap/include/mcap/writer.inl index a99333aa2f..08465500a8 100644 --- a/cpp/mcap/include/mcap/writer.inl +++ b/cpp/mcap/include/mcap/writer.inl @@ -473,7 +473,6 @@ void McapWriter::terminate() { zstdChunk_.reset(); #endif - channels_.clear(); attachmentIndex_.clear(); metadataIndex_.clear(); chunkIndex_.clear(); diff --git a/cpp/test/conanfile.py b/cpp/test/conanfile.py index 7cddbc2707..683014746a 100644 --- a/cpp/test/conanfile.py +++ b/cpp/test/conanfile.py @@ -4,7 +4,7 @@ class McapTestConan(ConanFile): settings = "os", "compiler", "build_type", "arch" generators = "cmake" - requires = "catch2/2.13.8", "mcap/1.4.0", "nlohmann_json/3.10.5" + requires = "catch2/2.13.8", "mcap/1.4.1", "nlohmann_json/3.10.5" def build(self): cmake = CMake(self) diff --git a/go/.golangci.yaml b/go/.golangci.yaml index d79ccf5330..c5c1d92dc3 100644 --- a/go/.golangci.yaml +++ b/go/.golangci.yaml @@ -4,20 +4,16 @@ run: linters: disable-all: true enable: - - deadcode - errcheck - gosimple - govet - ineffassign - staticcheck - - structcheck - typecheck - unused - - varcheck - gocritic - godot - gofmt - - ifshort - misspell - prealloc - sqlclosecheck diff --git a/go/cli/mcap/cmd/cat.go b/go/cli/mcap/cmd/cat.go index 5cc687d028..97f0d3ad12 100644 --- a/go/cli/mcap/cmd/cat.go +++ b/go/cli/mcap/cmd/cat.go @@ -201,12 +201,12 @@ func printMessages( ) error { msg := &bytes.Buffer{} msgReader := &bytes.Reader{} - buf := make([]byte, 1024*1024) + message := mcap.Message{Data: make([]byte, 0, 1024*1024)} transcoders := make(map[uint16]*ros1msg.JSONTranscoder) descriptors := make(map[uint16]protoreflect.MessageDescriptor) jsonWriter := newJSONOutputWriter(w) for { - schema, channel, message, err := it.Next(buf) + schema, channel, _, err := it.NextInto(&message) if err != nil { if errors.Is(err, io.EOF) { break diff --git a/go/cli/mcap/cmd/doctor.go b/go/cli/mcap/cmd/doctor.go index c9681b389a..4b21d4c912 100644 --- a/go/cli/mcap/cmd/doctor.go +++ b/go/cli/mcap/cmd/doctor.go @@ -9,6 +9,7 @@ import ( "io" "math" "os" + "reflect" "github.com/fatih/color" "github.com/foxglove/mcap/go/cli/mcap/utils" @@ -26,27 +27,33 @@ var ( type mcapDoctor struct { reader io.ReadSeeker - channels map[uint16]*mcap.Channel - schemas map[uint16]*mcap.Schema + channelsInDataSection map[uint16]*mcap.Channel + schemasInDataSection map[uint16]*mcap.Schema + channelsReferencedInChunksByOffset map[uint64][]uint16 + channelIDsInSummarySection map[uint16]bool + schemaIDsInSummarySection map[uint16]bool // Map from chunk offset to chunk index chunkIndexes map[uint64]*mcap.ChunkIndex + inSummarySection bool + messageCount uint64 minLogTime uint64 maxLogTime uint64 statistics *mcap.Statistics - errorCount uint32 + diagnosis Diagnosis } func (doctor *mcapDoctor) warn(format string, v ...any) { color.Yellow(format, v...) + doctor.diagnosis.Warnings = append(doctor.diagnosis.Warnings, fmt.Sprintf(format, v...)) } func (doctor *mcapDoctor) error(format string, v ...any) { color.Red(format, v...) - doctor.errorCount++ + doctor.diagnosis.Errors = append(doctor.diagnosis.Errors, fmt.Sprintf(format, v...)) } func (doctor *mcapDoctor) fatal(v ...any) { @@ -61,7 +68,101 @@ func (doctor *mcapDoctor) fatalf(format string, v ...any) { os.Exit(1) } -func (doctor *mcapDoctor) examineChunk(chunk *mcap.Chunk) { +func (doctor *mcapDoctor) examineSchema(schema *mcap.Schema) { + if schema.Encoding == "" { + if len(schema.Data) == 0 { + doctor.warn("Schema with ID: %d, Name: %q has empty Encoding and Data fields", schema.ID, schema.Name) + } else { + doctor.error("Schema with ID: %d has empty Encoding but Data contains: %q", schema.ID, string(schema.Data)) + } + } + + if schema.ID == 0 { + doctor.error("Schema.ID 0 is reserved. Do not make Schema records with ID 0.") + } + previous := doctor.schemasInDataSection[schema.ID] + if previous != nil { + if schema.Name != previous.Name { + doctor.error("Two schema records with same ID %d but different names (%q != %q)", + schema.ID, + schema.Name, + previous.Name, + ) + } + if schema.Encoding != previous.Encoding { + doctor.error("Two schema records with same ID %d but different encodings (%q != %q)", + schema.ID, + schema.Encoding, + previous.Encoding, + ) + } + if !bytes.Equal(schema.Data, previous.Data) { + doctor.error("Two schema records with different data present with same ID %d", schema.ID) + } + } + if doctor.inSummarySection { + if previous == nil { + doctor.error("Schema with id %d in summary section does not exist in data section", schema.ID) + } + doctor.schemaIDsInSummarySection[schema.ID] = true + } else { + if previous != nil { + doctor.warn("Duplicate schema records in data section with ID %d", schema.ID) + } + doctor.schemasInDataSection[schema.ID] = schema + } +} + +func (doctor *mcapDoctor) examineChannel(channel *mcap.Channel) { + previous := doctor.channelsInDataSection[channel.ID] + if previous != nil { + if channel.SchemaID != previous.SchemaID { + doctor.error("Two channel records with same ID %d but different schema IDs (%d != %d)", + channel.ID, + channel.SchemaID, + previous.SchemaID, + ) + } + if channel.Topic != previous.Topic { + doctor.error("Two channel records with same ID %d but different topics (%q != %q)", + channel.ID, + channel.Topic, + previous.Topic, + ) + } + if channel.MessageEncoding != previous.MessageEncoding { + doctor.error("Two channel records with same ID %d but different message encodings (%q != %q)", + channel.ID, + channel.MessageEncoding, + previous.MessageEncoding, + ) + } + if !reflect.DeepEqual(channel.Metadata, previous.Metadata) { + doctor.error("Two channel records with different metadata present with same ID %d", + channel.ID) + } + } + if doctor.inSummarySection { + if previous == nil { + doctor.error("Channel with ID %d in summary section does not exist in data section", channel.ID) + } + doctor.channelIDsInSummarySection[channel.ID] = true + } else { + if previous != nil { + doctor.warn("Duplicate channel records in data section with ID %d", channel.ID) + } + doctor.channelsInDataSection[channel.ID] = channel + } + + if channel.SchemaID != 0 { + if _, ok := doctor.schemasInDataSection[channel.SchemaID]; !ok { + doctor.error("Encountered Channel (%d) with unknown Schema (%d)", channel.ID, channel.SchemaID) + } + } +} + +func (doctor *mcapDoctor) examineChunk(chunk *mcap.Chunk, startOffset uint64) { + referencedChannels := make(map[uint16]bool) compressionFormat := mcap.CompressionFormat(chunk.Compression) var uncompressedBytes []byte @@ -90,7 +191,7 @@ func (doctor *mcapDoctor) examineChunk(chunk *mcap.Chunk) { return } default: - doctor.error("Unsupported compression format: %s", chunk.Compression) + doctor.error("Unsupported compression format: %q", chunk.Compression) return } @@ -115,7 +216,7 @@ func (doctor *mcapDoctor) examineChunk(chunk *mcap.Chunk) { EmitChunks: true, }) if err != nil { - doctor.error("Failed to make lexer for chunk bytes", err) + doctor.error("Failed to make lexer for chunk bytes: %s", err) return } defer lexer.Close() @@ -144,47 +245,29 @@ func (doctor *mcapDoctor) examineChunk(chunk *mcap.Chunk) { case mcap.TokenSchema: schema, err := mcap.ParseSchema(data) if err != nil { - doctor.error("Failed to parse schema:", err) + doctor.error("Failed to parse schema: %s", err) } - - if schema.Encoding == "" { - if len(schema.Data) == 0 { - doctor.warn("Schema with ID: %d, Name: %s has empty Encoding and Data fields", schema.ID, schema.Name) - } else { - doctor.error("Schema with ID: %d has empty Encoding but Data contains: %s", schema.ID, string(schema.Data)) - } - } - - if schema.ID == 0 { - doctor.error("Schema.ID 0 is reserved. Do not make Schema records with ID 0.") - } - - doctor.schemas[schema.ID] = schema + doctor.examineSchema(schema) case mcap.TokenChannel: channel, err := mcap.ParseChannel(data) if err != nil { doctor.error("Error parsing Channel: %s", err) } - - doctor.channels[channel.ID] = channel - if channel.SchemaID != 0 { - if _, ok := doctor.schemas[channel.SchemaID]; !ok { - doctor.error("Encountered Channel (%d) with unknown Schema (%d)", channel.ID, channel.SchemaID) - } - } + doctor.examineChannel(channel) case mcap.TokenMessage: message, err := mcap.ParseMessage(data) if err != nil { doctor.error("Error parsing Message: %s", err) } + referencedChannels[message.ChannelID] = true - channel := doctor.channels[message.ChannelID] + channel := doctor.channelsInDataSection[message.ChannelID] if channel == nil { - doctor.error("Got a Message record for channel: %d before a channel info.", message.ChannelID) + doctor.error("Got a Message record for channel: %d before a channel record.", message.ChannelID) } if message.LogTime < doctor.maxLogTime { - errStr := fmt.Sprintf("Message.log_time %d on %s is less than the latest log time %d", + errStr := fmt.Sprintf("Message.log_time %d on %q is less than the latest log time %d", message.LogTime, channel.Topic, doctor.maxLogTime) if strictMessageOrder { doctor.error(errStr) @@ -237,9 +320,19 @@ func (doctor *mcapDoctor) examineChunk(chunk *mcap.Chunk) { doctor.maxLogTime = maxLogTime } } + asArray := make([]uint16, 0, len(referencedChannels)) + for id := range referencedChannels { + asArray = append(asArray, id) + } + doctor.channelsReferencedInChunksByOffset[startOffset] = asArray +} + +type Diagnosis struct { + Errors []string + Warnings []string } -func (doctor *mcapDoctor) Examine() error { +func (doctor *mcapDoctor) Examine() Diagnosis { lexer, err := mcap.NewLexer(doctor.reader, &mcap.LexerOptions{ SkipMagic: false, ValidateChunkCRCs: true, @@ -286,61 +379,37 @@ func (doctor *mcapDoctor) Examine() error { } if header.Profile != "" && header.Profile != "ros1" && header.Profile != "ros2" { - doctor.warn(`Header.profile field "%s" is not a well-known profile.`, header.Profile) + doctor.warn(`Header.profile field %q is not a well-known profile.`, header.Profile) } case mcap.TokenFooter: footer, err = mcap.ParseFooter(data) if err != nil { - doctor.error("Failed to parse footer:", err) + doctor.error("Failed to parse footer: %s", err) } case mcap.TokenSchema: schema, err := mcap.ParseSchema(data) if err != nil { - doctor.error("Failed to parse schema:", err) + doctor.error("Failed to parse schema: %s", err) } - - if schema.Encoding == "" { - if len(schema.Data) == 0 { - doctor.warn("Schema with ID: %d, Name: %s has empty Encoding and Data fields", schema.ID, schema.Name) - } else { - doctor.error("Schema with ID: %d has empty Encoding but Data contains: %s", schema.ID, string(schema.Data)) - } - } - - if schema.ID == 0 { - doctor.error("Schema.ID 0 is reserved. Do not make Schema records with ID 0.") - } - - doctor.schemas[schema.ID] = schema + doctor.examineSchema(schema) case mcap.TokenChannel: channel, err := mcap.ParseChannel(data) if err != nil { doctor.error("Error parsing Channel: %s", err) } - - doctor.channels[channel.ID] = channel - - if channel.SchemaID != 0 { - if _, ok := doctor.schemas[channel.SchemaID]; !ok { - doctor.error( - "Encountered Channel (%d) with unknown Schema (%d)", - channel.ID, - channel.SchemaID, - ) - } - } + doctor.examineChannel(channel) case mcap.TokenMessage: message, err := mcap.ParseMessage(data) if err != nil { doctor.error("Error parsing Message: %s", err) } messageOutsideChunk = true - channel := doctor.channels[message.ChannelID] + channel := doctor.channelsInDataSection[message.ChannelID] if channel == nil { doctor.error("Got a Message record for channel: %d before a channel info.", message.ChannelID) } if message.LogTime < lastMessageTime { - doctor.error("Message.log_time %d on %s is less than the previous message record time %d", + doctor.error("Message.log_time %d on %q is less than the previous message record time %d", message.LogTime, channel.Topic, lastMessageTime) } lastMessageTime = message.LogTime @@ -359,11 +428,17 @@ func (doctor *mcapDoctor) Examine() error { if err != nil { doctor.error("Error parsing Message: %s", err) } - doctor.examineChunk(chunk) + pos, err := doctor.reader.Seek(0, io.SeekCurrent) + if err != nil { + // cannot continue if seek fails + doctor.fatalf("failed to determine read cursor: %s", err) + } + chunkStartOffset := uint64(pos - int64(len(data)) - 9) + doctor.examineChunk(chunk, chunkStartOffset) case mcap.TokenMessageIndex: _, err := mcap.ParseMessageIndex(data) if err != nil { - doctor.error("Failed to parse message index:", err) + doctor.error("Failed to parse message index: %s", err) } if messageOutsideChunk { doctor.warn("Message index in file has message records outside chunks. Indexed readers will miss these messages.") @@ -371,24 +446,24 @@ func (doctor *mcapDoctor) Examine() error { case mcap.TokenChunkIndex: chunkIndex, err := mcap.ParseChunkIndex(data) if err != nil { - doctor.error("Failed to parse chunk index:", err) + doctor.error("Failed to parse chunk index: %s", err) } if messageOutsideChunk { doctor.warn("Message index in file has message records outside chunks. Indexed readers will miss these messages.") } if _, ok := doctor.chunkIndexes[chunkIndex.ChunkStartOffset]; ok { - doctor.error("Multiple chunk indexes found for chunk at offset", chunkIndex.ChunkStartOffset) + doctor.error("Multiple chunk indexes found for chunk at offset %d", chunkIndex.ChunkStartOffset) } doctor.chunkIndexes[chunkIndex.ChunkStartOffset] = chunkIndex case mcap.TokenAttachmentIndex: _, err := mcap.ParseAttachmentIndex(data) if err != nil { - doctor.error("Failed to parse attachment index:", err) + doctor.error("Failed to parse attachment index: %s", err) } case mcap.TokenStatistics: statistics, err := mcap.ParseStatistics(data) if err != nil { - doctor.error("Failed to parse statistics:", err) + doctor.error("Failed to parse statistics: %s", err) } if doctor.statistics != nil { doctor.error("File contains multiple Statistics records") @@ -397,23 +472,24 @@ func (doctor *mcapDoctor) Examine() error { case mcap.TokenMetadata: _, err := mcap.ParseMetadata(data) if err != nil { - doctor.error("Failed to parse metadata:", err) + doctor.error("Failed to parse metadata: %s", err) } case mcap.TokenMetadataIndex: _, err := mcap.ParseMetadataIndex(data) if err != nil { - doctor.error("Failed to parse metadata index:", err) + doctor.error("Failed to parse metadata index: %s", err) } case mcap.TokenSummaryOffset: _, err := mcap.ParseSummaryOffset(data) if err != nil { - doctor.error("Failed to parse summary offset:", err) + doctor.error("Failed to parse summary offset: %s", err) } case mcap.TokenDataEnd: dataEnd, err = mcap.ParseDataEnd(data) if err != nil { - doctor.error("Failed to parse data end:", err) + doctor.error("Failed to parse data end: %s", err) } + doctor.inSummarySection = true case mcap.TokenError: // this is the value of the tokenType when there is an error // from the lexer, which we caught at the top. @@ -422,9 +498,32 @@ func (doctor *mcapDoctor) Examine() error { } for chunkOffset, chunkIndex := range doctor.chunkIndexes { + channelsReferenced := doctor.channelsReferencedInChunksByOffset[chunkOffset] + for _, id := range channelsReferenced { + if present := doctor.channelIDsInSummarySection[id]; !present { + doctor.error( + "Indexed chunk at offset %d contains messages referencing channel (%d) not duplicated in summary section", + chunkOffset, + id, + ) + } + channel := doctor.channelsInDataSection[id] + if channel == nil { + // message with unknown channel, this is checked when that message is scanned + continue + } + if present := doctor.schemaIDsInSummarySection[channel.SchemaID]; !present { + doctor.error( + "Indexed chunk at offset %d contains messages referencing schema (%d) not duplicated in summary section", + chunkOffset, + channel.SchemaID, + ) + } + } + _, err := doctor.reader.Seek(int64(chunkOffset), io.SeekStart) if err != nil { - die("failed to seek to chunk offset: %s", err) + doctor.fatalf("failed to seek to chunk offset: %s", err) } tokenType, data, err := lexer.Next(msg) if err != nil { @@ -475,7 +574,7 @@ func (doctor *mcapDoctor) Examine() error { } if chunk.Compression != chunkIndex.Compression.String() { doctor.error( - "Chunk at offset %d has compression %s, but its chunk index has compression %s", + "Chunk at offset %d has compression %q, but its chunk index has compression %q", chunkOffset, chunk.Compression, chunkIndex.Compression, @@ -483,7 +582,7 @@ func (doctor *mcapDoctor) Examine() error { } if uint64(len(chunk.Records)) != chunkIndex.CompressedSize { doctor.error( - "Chunk at offset %d has data length %d, but its chunk index has compressed size %s", + "Chunk at offset %d has data length %d, but its chunk index has compressed size %d", chunkOffset, len(chunk.Records), chunkIndex.CompressedSize, @@ -524,19 +623,19 @@ func (doctor *mcapDoctor) Examine() error { ) } } - if doctor.errorCount == 0 { - return nil - } - return fmt.Errorf("encountered %d errors", doctor.errorCount) + return doctor.diagnosis } func newMcapDoctor(reader io.ReadSeeker) *mcapDoctor { return &mcapDoctor{ - reader: reader, - channels: make(map[uint16]*mcap.Channel), - schemas: make(map[uint16]*mcap.Schema), - chunkIndexes: make(map[uint64]*mcap.ChunkIndex), - minLogTime: math.MaxUint64, + reader: reader, + channelsInDataSection: make(map[uint16]*mcap.Channel), + channelsReferencedInChunksByOffset: make(map[uint64][]uint16), + channelIDsInSummarySection: make(map[uint16]bool), + schemaIDsInSummarySection: make(map[uint16]bool), + schemasInDataSection: make(map[uint16]*mcap.Schema), + chunkIndexes: make(map[uint64]*mcap.ChunkIndex), + minLogTime: math.MaxUint64, } } @@ -555,7 +654,11 @@ func main(_ *cobra.Command, args []string) { if verbose { fmt.Printf("Examining %s\n", args[0]) } - return doctor.Examine() + diagnosis := doctor.Examine() + if len(diagnosis.Errors) > 0 { + return fmt.Errorf("encountered %d errors", len(diagnosis.Errors)) + } + return nil }) if err != nil { die("Doctor command failed: %s", err) diff --git a/go/cli/mcap/cmd/doctor_test.go b/go/cli/mcap/cmd/doctor_test.go index f191f6e1dc..3b6b47c629 100644 --- a/go/cli/mcap/cmd/doctor_test.go +++ b/go/cli/mcap/cmd/doctor_test.go @@ -2,9 +2,11 @@ package cmd import ( "bytes" + "os" "testing" "github.com/foxglove/mcap/go/mcap" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -29,6 +31,36 @@ func TestNoErrorOnMessagelessChunks(t *testing.T) { rs := bytes.NewReader(buf.Bytes()) doctor := newMcapDoctor(rs) - err = doctor.Examine() + diagnosis := doctor.Examine() + assert.Empty(t, diagnosis.Errors) +} + +func TestRequiresDuplicatedSchemasForIndexedMessages(t *testing.T) { + rs, err := os.Open("../../../../tests/conformance/data/OneMessage/OneMessage-ch-chx-pad.mcap") require.NoError(t, err) + defer func() { + require.NoError(t, rs.Close()) + }() + doctor := newMcapDoctor(rs) + diagnosis := doctor.Examine() + assert.Len(t, diagnosis.Errors, 2) + assert.Equal(t, + "Indexed chunk at offset 28 contains messages referencing channel (1) not duplicated in summary section", + diagnosis.Errors[0], + ) + assert.Equal(t, + "Indexed chunk at offset 28 contains messages referencing schema (1) not duplicated in summary section", + diagnosis.Errors[1], + ) +} + +func TestPassesIndexedMessagesWithRepeatedSchemas(t *testing.T) { + rs, err := os.Open("../../../../tests/conformance/data/OneMessage/OneMessage-ch-chx-pad-rch-rsh.mcap") + require.NoError(t, err) + defer func() { + require.NoError(t, rs.Close()) + }() + doctor := newMcapDoctor(rs) + diagnosis := doctor.Examine() + assert.Empty(t, diagnosis.Errors) } diff --git a/go/cli/mcap/cmd/info.go b/go/cli/mcap/cmd/info.go index be50601a0d..6c0171eb48 100644 --- a/go/cli/mcap/cmd/info.go +++ b/go/cli/mcap/cmd/info.go @@ -1,6 +1,7 @@ package cmd import ( + "bufio" "bytes" "context" "fmt" @@ -13,6 +14,7 @@ import ( "github.com/foxglove/mcap/go/cli/mcap/utils" "github.com/foxglove/mcap/go/mcap" + "github.com/olekukonko/tablewriter" "github.com/spf13/cobra" ) @@ -89,7 +91,9 @@ func printInfo(w io.Writer, info *mcap.Info) error { header = addRow(header, "end:", "%s", decimalTime(endtime)) } } - utils.FormatTable(buf, header) + if err := printSummaryRows(buf, header); err != nil { + return err + } if len(info.ChunkIndexes) > 0 { compressionFormatStats := make(map[mcap.CompressionFormat]struct { count int @@ -166,12 +170,15 @@ func printInfo(w io.Writer, info *mcap.Info) error { } rows = append(rows, row) } - utils.FormatTable(buf, rows) - fmt.Fprintf(buf, "channels: %d\n", len(chanIDs)) + if err := printSummaryRows(buf, rows); err != nil { + return err + } if info.Statistics != nil { + fmt.Fprintf(buf, "channels: %d\n", info.Statistics.ChannelCount) fmt.Fprintf(buf, "attachments: %d\n", info.Statistics.AttachmentCount) fmt.Fprintf(buf, "metadata: %d\n", info.Statistics.MetadataCount) } else { + fmt.Fprintf(buf, "channels: %d\n", len(chanIDs)) fmt.Fprintf(buf, "attachments: unknown\n") fmt.Fprintf(buf, "metadata: unknown\n") } @@ -179,6 +186,26 @@ func printInfo(w io.Writer, info *mcap.Info) error { return err } +// Similar to utils.FormatTable, but optimized for 'expanded' display of nested data. +func printSummaryRows(w io.Writer, rows [][]string) error { + buf := &bytes.Buffer{} + tw := tablewriter.NewWriter(buf) + tw.SetBorder(false) + tw.SetAutoWrapText(false) + tw.SetAlignment(tablewriter.ALIGN_LEFT) + tw.SetHeaderAlignment(tablewriter.ALIGN_LEFT) + tw.SetColumnSeparator("") + tw.AppendBulk(rows) + tw.Render() + // This tablewriter puts a leading space on the lines for some reason, so + // remove it. + scanner := bufio.NewScanner(buf) + for scanner.Scan() { + fmt.Fprintln(w, strings.TrimLeft(scanner.Text(), " ")) + } + return scanner.Err() +} + var infoCmd = &cobra.Command{ Use: "info", Short: "Report statistics about an MCAP file", diff --git a/go/cli/mcap/cmd/merge.go b/go/cli/mcap/cmd/merge.go index 5142ecd6f9..2c3bf2ef28 100644 --- a/go/cli/mcap/cmd/merge.go +++ b/go/cli/mcap/cmd/merge.go @@ -300,7 +300,7 @@ func (m *mcapMerger) mergeInputs(w io.Writer, inputs []namedReader) error { } for inputID, iterator := range iterators { inputName := inputs[inputID].name - schema, channel, message, err := iterator.Next(nil) + schema, channel, message, err := iterator.NextInto(nil) if err != nil { if errors.Is(err, io.EOF) { // the file may be an empty mcap. if so, just ignore it. @@ -336,7 +336,7 @@ func (m *mcapMerger) mergeInputs(w io.Writer, inputs []namedReader) error { // Pull the next message off the iterator, to replace the one just // popped from the queue. Before pushing this message, it must be // renumbered and the related channels/schemas may need to be inserted. - newSchema, newChannel, newMessage, err := iterators[msg.InputID].Next(nil) + newSchema, newChannel, newMessage, err := iterators[msg.InputID].NextInto(nil) if err != nil { if errors.Is(err, io.EOF) { // if the iterator is empty, skip this read. No further messages diff --git a/go/cli/mcap/cmd/metadata.go b/go/cli/mcap/cmd/metadata.go index f46a547183..b3475b6323 100644 --- a/go/cli/mcap/cmd/metadata.go +++ b/go/cli/mcap/cmd/metadata.go @@ -134,7 +134,7 @@ var addMetadataCmd = &cobra.Command{ var getMetadataCmd = &cobra.Command{ Use: "metadata", - Short: "get metadata by name", + Short: "Get metadata by name", Run: func(_ *cobra.Command, args []string) { ctx := context.Background() if len(args) != 1 { @@ -214,7 +214,7 @@ func init() { } getCmd.AddCommand(getMetadataCmd) - getMetadataCmd.PersistentFlags().StringVarP(&getMetadataName, "name", "n", "", "name of metadata record to create") + getMetadataCmd.PersistentFlags().StringVarP(&getMetadataName, "name", "n", "", "name of metadata record to get") err = getMetadataCmd.MarkPersistentFlagRequired("name") if err != nil { die("failed to mark --name flag as required: %s", err) diff --git a/go/cli/mcap/cmd/sort.go b/go/cli/mcap/cmd/sort.go index 5687abb939..e7dacfda2e 100644 --- a/go/cli/mcap/cmd/sort.go +++ b/go/cli/mcap/cmd/sort.go @@ -45,7 +45,7 @@ func fileHasNoMessages(r io.ReadSeeker) (bool, error) { if err != nil { return false, err } - _, _, _, err = it.Next(nil) + _, _, _, err = it.NextInto(nil) if err != nil { if errors.Is(err, io.EOF) { return true, nil @@ -123,9 +123,10 @@ func sortFile(w io.Writer, r io.ReadSeeker) error { return fmt.Errorf("failed to read messages: %w", err) } schemas := make(map[uint16]*mcap.Schema) - channels := make(map[uint16]*mcap.Schema) + channels := make(map[uint16]*mcap.Channel) + message := mcap.Message{} for { - schema, channel, message, err := it.Next(nil) + schema, channel, _, err := it.NextInto(&message) if err != nil { if errors.Is(err, io.EOF) { break @@ -137,6 +138,7 @@ func sortFile(w io.Writer, r io.ReadSeeker) error { if err != nil { return fmt.Errorf("failed to write schema: %w", err) } + schemas[schema.ID] = schema } } if _, ok := channels[channel.ID]; !ok { @@ -144,8 +146,9 @@ func sortFile(w io.Writer, r io.ReadSeeker) error { if err != nil { return fmt.Errorf("failed to write channel: %w", err) } + channels[channel.ID] = channel } - err = writer.WriteMessage(message) + err = writer.WriteMessage(&message) if err != nil { return fmt.Errorf("failed to write message: %w", err) } diff --git a/go/cli/mcap/cmd/sort_test.go b/go/cli/mcap/cmd/sort_test.go index cb3e750397..3530726771 100644 --- a/go/cli/mcap/cmd/sort_test.go +++ b/go/cli/mcap/cmd/sort_test.go @@ -2,6 +2,8 @@ package cmd import ( "bytes" + "errors" + "io" "testing" "github.com/foxglove/mcap/go/mcap" @@ -62,14 +64,32 @@ func TestSortFile(t *testing.T) { w := &bytes.Buffer{} require.NoError(t, sortFile(w, reader)) - // verify it is now sorted - r, err := mcap.NewReader(bytes.NewReader(w.Bytes())) + lexer, err := mcap.NewLexer(bytes.NewReader(w.Bytes())) require.NoError(t, err) - - it, err := r.Messages(mcap.UsingIndex(false)) - require.NoError(t, err) - - _, _, msg, err := it.Next(nil) - require.NoError(t, err) - assert.Equal(t, 25, int(msg.LogTime)) + var schemaCount, channelCount, messageCount int + var lastMessageTime uint64 +top: + for { + token, record, err := lexer.Next(nil) + if errors.Is(err, io.EOF) { + break + } + require.NoError(t, err) + switch token { + case mcap.TokenMessage: + messageCount++ + message, err := mcap.ParseMessage(record) + require.NoError(t, err) + require.GreaterOrEqual(t, message.LogTime, lastMessageTime) + lastMessageTime = message.LogTime + case mcap.TokenSchema: + schemaCount++ + case mcap.TokenChannel: + channelCount++ + case mcap.TokenDataEnd: + break top + } + } + assert.Equal(t, 1, schemaCount, "incorrect schema count") + assert.Equal(t, 2, channelCount, "incorrect channel count") } diff --git a/go/cli/mcap/go.sum b/go/cli/mcap/go.sum index f3afefde15..fc12da2ecb 100644 --- a/go/cli/mcap/go.sum +++ b/go/cli/mcap/go.sum @@ -100,8 +100,6 @@ github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1/go. github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w= github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= -github.com/foxglove/go-rosbag v0.0.5 h1:UCkYdiBGagpgAql8cNO0d8wX8QZMfGUte0+8aWB2MM4= -github.com/foxglove/go-rosbag v0.0.5/go.mod h1:Kz3doYZfPO6OIawx4tFm9MU9COkuzcYaI963psJeLrA= github.com/foxglove/go-rosbag v0.0.6 h1:LcWr1LqdS1NxWO4+mbPfo7d1jpL3gybqRmX1abD8eAw= github.com/foxglove/go-rosbag v0.0.6/go.mod h1:Kz3doYZfPO6OIawx4tFm9MU9COkuzcYaI963psJeLrA= github.com/foxglove/mcap/go/mcap v0.4.0 h1:jsDZZ6qmMKa174EE8Tw0hxeMUdgjz8emTlN8+6FEnXE= @@ -250,8 +248,6 @@ github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3v github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= github.com/pelletier/go-toml/v2 v2.0.2 h1:+jQXlF3scKIcSEKkdHzXhCTDLPFi5r1wnK6yPS+49Gw= github.com/pelletier/go-toml/v2 v2.0.2/go.mod h1:MovirKjgVRESsAvNZlAjtFwV867yGuwRkXbG66OzopI= -github.com/pierrec/lz4/v4 v4.1.18 h1:xaKrnTkyoqfh1YItXl56+6KJNVYWlEEPuAQW9xsplYQ= -github.com/pierrec/lz4/v4 v4.1.18/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -288,8 +284,6 @@ github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5 github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals= -github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= -github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/subosito/gotenv v1.4.0 h1:yAzM1+SmVcz5R4tXGsNMu1jUl2aOJXoiWUCEwwnGrvs= diff --git a/go/cli/mcap/utils/utils.go b/go/cli/mcap/utils/utils.go index 0af2ad0ca3..be9853223c 100644 --- a/go/cli/mcap/utils/utils.go +++ b/go/cli/mcap/utils/utils.go @@ -1,7 +1,6 @@ package utils import ( - "bufio" "bytes" "context" "encoding/json" @@ -9,7 +8,6 @@ import ( "io" "os" "regexp" - "strings" "time" "cloud.google.com/go/storage" @@ -107,21 +105,17 @@ func WithReader(ctx context.Context, filename string, f func(remote bool, rs io. } func FormatTable(w io.Writer, rows [][]string) { - buf := &bytes.Buffer{} - tw := tablewriter.NewWriter(buf) + tw := tablewriter.NewWriter(w) tw.SetBorder(false) tw.SetAutoWrapText(false) tw.SetAlignment(tablewriter.ALIGN_LEFT) tw.SetHeaderAlignment(tablewriter.ALIGN_LEFT) tw.SetColumnSeparator("") + tw.SetTablePadding("\t") + tw.SetNoWhiteSpace(true) + tw.AppendBulk(rows) tw.Render() - // This tablewriter puts a leading space on the lines for some reason, so - // remove it. - scanner := bufio.NewScanner(buf) - for scanner.Scan() { - fmt.Fprintln(w, strings.TrimLeft(scanner.Text(), " ")) - } } func Keys[T any](m map[string]T) []string { diff --git a/go/conformance/test-read-conformance/go.mod b/go/conformance/test-read-conformance/go.mod index bafb30161b..c26a531003 100644 --- a/go/conformance/test-read-conformance/go.mod +++ b/go/conformance/test-read-conformance/go.mod @@ -2,13 +2,17 @@ module github.com/foxglove/mcap/go/conformance/test-read-conformance go 1.18 -require github.com/foxglove/mcap/go/mcap v0.0.0-20220328132551-ffb9c0b0ebdc +require ( + github.com/foxglove/mcap/go/mcap v0.0.0-20220328132551-ffb9c0b0ebdc + github.com/stretchr/testify v1.9.0 +) require ( - github.com/davecgh/go-spew v1.1.0 // indirect - github.com/klauspost/compress v1.14.1 // indirect - github.com/pierrec/lz4/v4 v4.1.12 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/klauspost/compress v1.16.7 // indirect + github.com/kr/pretty v0.3.0 // indirect + github.com/pierrec/lz4/v4 v4.1.21 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/stretchr/testify v1.7.0 // indirect - gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect + gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go/conformance/test-read-conformance/go.sum b/go/conformance/test-read-conformance/go.sum index 9620dd0980..5224437c70 100644 --- a/go/conformance/test-read-conformance/go.sum +++ b/go/conformance/test-read-conformance/go.sum @@ -1,15 +1,13 @@ -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/klauspost/compress v1.14.1 h1:hLQYb23E8/fO+1u53d02A97a8UnsddcvYzq4ERRU4ds= -github.com/klauspost/compress v1.14.1/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= -github.com/pierrec/lz4/v4 v4.1.12 h1:44l88ehTZAUGW4VlO1QC4zkilL99M6Y9MXNwEs0uzP8= -github.com/pierrec/lz4/v4 v4.1.12/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/foxglove/mcap/go/mcap v0.0.0-20220328132551-ffb9c0b0ebdc h1:v4dm5b/Z412h6gPY8kwixeVtFRiixK4KIY7yV90p1T4= +github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= +github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +github.com/rogpeppe/go-internal v1.6.1 h1:/FiVV8dS/e+YqF2JvO3yXRFbBLTIuSDkuC7aBOAvL+k= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/go/conformance/test-read-conformance/main.go b/go/conformance/test-read-conformance/main.go index 9bb44ccf02..fcbfb77360 100644 --- a/go/conformance/test-read-conformance/main.go +++ b/go/conformance/test-read-conformance/main.go @@ -354,7 +354,7 @@ func readIndexed(w io.Writer, filepath string) error { knownChannelIDs := make(map[uint16]bool) for { - schema, channel, message, err := it.Next(nil) + schema, channel, message, err := it.NextInto(nil) if errors.Is(err, io.EOF) { break } diff --git a/go/conformance/test-write-conformance/go.mod b/go/conformance/test-write-conformance/go.mod index 12d48f373e..81200a7908 100644 --- a/go/conformance/test-write-conformance/go.mod +++ b/go/conformance/test-write-conformance/go.mod @@ -4,13 +4,15 @@ go 1.18 require ( github.com/foxglove/mcap/go/mcap v0.0.0-20220328132551-ffb9c0b0ebdc - github.com/stretchr/testify v1.7.0 + github.com/stretchr/testify v1.9.0 ) require ( - github.com/davecgh/go-spew v1.1.0 // indirect - github.com/klauspost/compress v1.14.1 // indirect - github.com/pierrec/lz4/v4 v4.1.12 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/klauspost/compress v1.16.7 // indirect + github.com/kr/pretty v0.3.0 // indirect + github.com/pierrec/lz4/v4 v4.1.21 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect + gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go/conformance/test-write-conformance/go.sum b/go/conformance/test-write-conformance/go.sum index 9620dd0980..5224437c70 100644 --- a/go/conformance/test-write-conformance/go.sum +++ b/go/conformance/test-write-conformance/go.sum @@ -1,15 +1,13 @@ -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/klauspost/compress v1.14.1 h1:hLQYb23E8/fO+1u53d02A97a8UnsddcvYzq4ERRU4ds= -github.com/klauspost/compress v1.14.1/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= -github.com/pierrec/lz4/v4 v4.1.12 h1:44l88ehTZAUGW4VlO1QC4zkilL99M6Y9MXNwEs0uzP8= -github.com/pierrec/lz4/v4 v4.1.12/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/foxglove/mcap/go/mcap v0.0.0-20220328132551-ffb9c0b0ebdc h1:v4dm5b/Z412h6gPY8kwixeVtFRiixK4KIY7yV90p1T4= +github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= +github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +github.com/rogpeppe/go-internal v1.6.1 h1:/FiVV8dS/e+YqF2JvO3yXRFbBLTIuSDkuC7aBOAvL+k= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/go/go.work b/go/go.work index d4baeee12a..7326675cdf 100644 --- a/go/go.work +++ b/go/go.work @@ -1,4 +1,4 @@ -go 1.22 +go 1.22.5 use ( ./cli/mcap diff --git a/go/mcap/go.mod b/go/mcap/go.mod index 0d508af31e..d4ad84fc17 100644 --- a/go/mcap/go.mod +++ b/go/mcap/go.mod @@ -3,13 +3,15 @@ module github.com/foxglove/mcap/go/mcap go 1.18 require ( - github.com/klauspost/compress v1.15.12 - github.com/pierrec/lz4/v4 v4.1.12 - github.com/stretchr/testify v1.7.0 + github.com/klauspost/compress v1.16.7 + github.com/pierrec/lz4/v4 v4.1.21 + github.com/stretchr/testify v1.9.0 ) require ( - github.com/davecgh/go-spew v1.1.0 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/kr/pretty v0.3.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect + gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go/mcap/go.sum b/go/mcap/go.sum index 6097f86018..9b52a5a7c1 100644 --- a/go/mcap/go.sum +++ b/go/mcap/go.sum @@ -1,17 +1,12 @@ -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/klauspost/compress v1.14.1 h1:hLQYb23E8/fO+1u53d02A97a8UnsddcvYzq4ERRU4ds= -github.com/klauspost/compress v1.14.1/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= -github.com/klauspost/compress v1.15.12 h1:YClS/PImqYbn+UILDnqxQCZ3RehC9N318SU3kElDUEM= -github.com/klauspost/compress v1.15.12/go.mod h1:QPwzmACJjUTFsnSHH934V6woptycfrDDJnH7hvFVbGM= -github.com/pierrec/lz4/v4 v4.1.12 h1:44l88ehTZAUGW4VlO1QC4zkilL99M6Y9MXNwEs0uzP8= -github.com/pierrec/lz4/v4 v4.1.12/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= +github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +github.com/rogpeppe/go-internal v1.6.1 h1:/FiVV8dS/e+YqF2JvO3yXRFbBLTIuSDkuC7aBOAvL+k= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/go/mcap/indexed_message_iterator.go b/go/mcap/indexed_message_iterator.go index f6dcc11746..c5a7176623 100644 --- a/go/mcap/indexed_message_iterator.go +++ b/go/mcap/indexed_message_iterator.go @@ -4,53 +4,98 @@ import ( "bufio" "bytes" "encoding/binary" + "errors" "fmt" "io" + "math/bits" + "slices" + "sort" + + "math" "github.com/klauspost/compress/zstd" "github.com/pierrec/lz4/v4" ) +var ErrBadOffset = errors.New("invalid offset") + const ( chunkBufferGrowthMultiple = 1.2 ) -// indexedMessageIterator is an iterator over an indexed mcap read seeker (as -// seeking is required). It makes reads in alternation from the index data -// section, the message index at the end of a chunk, and the chunk's contents. +type chunkSlot struct { + buf []byte + unreadMessages uint64 +} + +type messageIndexWithChunkSlot struct { + timestamp uint64 + offset uint64 + chunkSlotIndex int +} + +// indexedMessageIterator is an iterator over an indexed mcap io.ReadSeeker (as +// seeking is required). It reads index information from the MCAP summary section first, then +// seeks to chunk records in the data section. +// +// This iterator reads in order by maintaining two ordered queues, one for chunk indexes and one +// for message indexes. On every call to NextInto(), the front element of both queues is checked and +// the earlier is used. When a chunk index is first, the chunk is decompressed, indexed, the +// new message indexes are pushed onto the message index queue and sorted. +// When a message index is first, that message is copied out of the decompressed chunk and yielded +// to the caller. type indexedMessageIterator struct { lexer *Lexer rs io.ReadSeeker topics map[string]bool start uint64 end uint64 + order ReadOrder - channels map[uint16]*Channel - schemas map[uint16]*Schema + channels slicemap[Channel] + schemas slicemap[Schema] statistics *Statistics chunkIndexes []*ChunkIndex attachmentIndexes []*AttachmentIndex metadataIndexes []*MetadataIndex footer *Footer + fileSize int64 - indexHeap rangeIndexHeap + curChunkIndex int + messageIndexes []messageIndexWithChunkSlot + curMessageIndex int + chunkSlots []chunkSlot zstdDecoder *zstd.Decoder lz4Reader *lz4.Reader hasReadSummarySection bool - compressedChunkAndMessageIndex []byte - metadataCallback func(*Metadata) error + recordBuf []byte + metadataCallback func(*Metadata) error +} + +func (it *indexedMessageIterator) seekTo(offset uint64) error { + if offset > uint64(math.MaxInt64) { + return fmt.Errorf("%w: %d > int64 max", ErrBadOffset, offset) + } + signedOffset := int64(offset) + if signedOffset >= it.fileSize { + return fmt.Errorf("%w: %d past file end %d", ErrBadOffset, offset, it.fileSize) + } + _, err := it.rs.Seek(signedOffset, io.SeekStart) + return err } // parseIndexSection parses the index section of the file and populates the // related fields of the structure. It must be called prior to any of the other // access methods. func (it *indexedMessageIterator) parseSummarySection() error { - _, err := it.rs.Seek(-8-4-8-8, io.SeekEnd) // magic, plus 20 bytes footer + const footerStartOffsetFromEnd = 8 + 4 + 8 + 8 // magic, plus 20 bytes footer + footerStartPos, err := it.rs.Seek(-footerStartOffsetFromEnd, io.SeekEnd) if err != nil { return err } + it.fileSize = footerStartPos + footerStartOffsetFromEnd buf := make([]byte, 8+20) _, err = io.ReadFull(it.rs, buf) if err != nil { @@ -58,7 +103,7 @@ func (it *indexedMessageIterator) parseSummarySection() error { } magic := buf[20:] if !bytes.Equal(magic, Magic) { - return fmt.Errorf("not an MCAP file") + return &ErrBadMagic{location: magicLocationEnd, actual: magic} } footer, err := ParseFooter(buf[:20]) if err != nil { @@ -71,9 +116,9 @@ func (it *indexedMessageIterator) parseSummarySection() error { it.hasReadSummarySection = true return nil } - _, err = it.rs.Seek(int64(footer.SummaryStart), io.SeekStart) + err = it.seekTo(footer.SummaryStart) if err != nil { - return fmt.Errorf("failed to seek to summary start") + return fmt.Errorf("failed to seek to summary start: %w", err) } lexer, err := NewLexer(bufio.NewReader(it.rs), &LexerOptions{ @@ -95,14 +140,14 @@ func (it *indexedMessageIterator) parseSummarySection() error { if err != nil { return fmt.Errorf("failed to parse schema: %w", err) } - it.schemas[schema.ID] = schema + it.schemas.Set(schema.ID, schema) case TokenChannel: channelInfo, err := ParseChannel(record) if err != nil { return fmt.Errorf("failed to parse channel info: %w", err) } if len(it.topics) == 0 || it.topics[channelInfo.Topic] { - it.channels[channelInfo.ID] = channelInfo + it.channels.Set(channelInfo.ID, channelInfo) } case TokenAttachmentIndex: idx, err := ParseAttachmentIndex(record) @@ -121,20 +166,9 @@ func (it *indexedMessageIterator) parseSummarySection() error { if err != nil { return fmt.Errorf("failed to parse attachment index: %w", err) } - it.chunkIndexes = append(it.chunkIndexes, idx) // if the chunk overlaps with the requested parameters, load it - for _, channel := range it.channels { - if idx.MessageIndexOffsets[channel.ID] > 0 { - if (it.end == 0 && it.start == 0) || (idx.MessageStartTime < it.end && idx.MessageEndTime >= it.start) { - rangeIndex := rangeIndex{ - chunkIndex: idx, - } - if err := it.indexHeap.HeapPush(rangeIndex); err != nil { - return err - } - } - break - } + if (it.end == 0 && it.start == 0) || (idx.MessageStartTime < it.end && idx.MessageEndTime >= it.start) { + it.chunkIndexes = append(it.chunkIndexes, idx) } case TokenStatistics: stats, err := ParseStatistics(record) @@ -143,36 +177,79 @@ func (it *indexedMessageIterator) parseSummarySection() error { } it.statistics = stats case TokenFooter: + // sort chunk indexes in the order that they will need to be loaded, depending on the specified + // read order. + switch it.order { + case FileOrder: + sort.Slice(it.chunkIndexes, func(i, j int) bool { + return it.chunkIndexes[i].ChunkStartOffset < it.chunkIndexes[j].ChunkStartOffset + }) + case LogTimeOrder: + sort.Slice(it.chunkIndexes, func(i, j int) bool { + if it.chunkIndexes[i].MessageStartTime == it.chunkIndexes[j].MessageStartTime { + return it.chunkIndexes[i].ChunkStartOffset < it.chunkIndexes[j].ChunkStartOffset + } + return it.chunkIndexes[i].MessageStartTime < it.chunkIndexes[j].MessageStartTime + }) + case ReverseLogTimeOrder: + sort.Slice(it.chunkIndexes, func(i, j int) bool { + if it.chunkIndexes[i].MessageEndTime == it.chunkIndexes[j].MessageEndTime { + return it.chunkIndexes[i].ChunkStartOffset > it.chunkIndexes[j].ChunkStartOffset + } + return it.chunkIndexes[i].MessageEndTime > it.chunkIndexes[j].MessageEndTime + }) + } it.hasReadSummarySection = true return nil } } } +// loadChunk seeks to and decompresses a chunk into a chunk slot, then populates it.messageIndexes +// with the offsets of messages in that chunk. func (it *indexedMessageIterator) loadChunk(chunkIndex *ChunkIndex) error { - _, err := it.rs.Seek(int64(chunkIndex.ChunkStartOffset), io.SeekStart) + err := it.seekTo(chunkIndex.ChunkStartOffset) if err != nil { return err } - compressedChunkLength := chunkIndex.ChunkLength + chunkIndex.MessageIndexLength - if len(it.compressedChunkAndMessageIndex) < int(compressedChunkLength) { - newSize := int(float64(compressedChunkLength) * chunkBufferGrowthMultiple) - it.compressedChunkAndMessageIndex = make([]byte, newSize) + compressedChunkLength := chunkIndex.ChunkLength + if uint64(cap(it.recordBuf)) < compressedChunkLength { + newCapacity := int(float64(compressedChunkLength) * chunkBufferGrowthMultiple) + it.recordBuf = make([]byte, compressedChunkLength, newCapacity) + } else { + it.recordBuf = it.recordBuf[:compressedChunkLength] } - _, err = io.ReadFull(it.rs, it.compressedChunkAndMessageIndex[:compressedChunkLength]) + _, err = io.ReadFull(it.rs, it.recordBuf) if err != nil { return fmt.Errorf("failed to read chunk data: %w", err) } - parsedChunk, err := ParseChunk(it.compressedChunkAndMessageIndex[9:chunkIndex.ChunkLength]) + parsedChunk, err := ParseChunk(it.recordBuf[9:]) if err != nil { return fmt.Errorf("failed to parse chunk: %w", err) } // decompress the chunk data - var chunkData []byte + chunkSlotIndex := -1 + for i, chunkSlot := range it.chunkSlots { + if chunkSlot.unreadMessages == 0 { + chunkSlotIndex = i + break + } + } + if chunkSlotIndex == -1 { + it.chunkSlots = append(it.chunkSlots, chunkSlot{}) + chunkSlotIndex = len(it.chunkSlots) - 1 + } + chunkSlot := &it.chunkSlots[chunkSlotIndex] + bufSize := parsedChunk.UncompressedSize + if uint64(cap(chunkSlot.buf)) < bufSize { + chunkSlot.buf = make([]byte, bufSize) + } else { + chunkSlot.buf = chunkSlot.buf[:bufSize] + } switch CompressionFormat(parsedChunk.Compression) { case CompressionNone: - chunkData = parsedChunk.Records + copy(chunkSlot.buf, parsedChunk.Records) case CompressionZSTD: if it.zstdDecoder == nil { it.zstdDecoder, err = zstd.NewReader(nil) @@ -180,8 +257,7 @@ func (it *indexedMessageIterator) loadChunk(chunkIndex *ChunkIndex) error { return fmt.Errorf("failed to instantiate zstd decoder: %w", err) } } - chunkData = make([]byte, 0, parsedChunk.UncompressedSize) - chunkData, err = it.zstdDecoder.DecodeAll(parsedChunk.Records, chunkData) + chunkSlot.buf, err = it.zstdDecoder.DecodeAll(parsedChunk.Records, chunkSlot.buf[:0]) if err != nil { return fmt.Errorf("failed to decode chunk data: %w", err) } @@ -191,83 +267,144 @@ func (it *indexedMessageIterator) loadChunk(chunkIndex *ChunkIndex) error { } else { it.lz4Reader.Reset(bytes.NewReader(parsedChunk.Records)) } - chunkData = make([]byte, parsedChunk.UncompressedSize) - _, err = io.ReadFull(it.lz4Reader, chunkData) + _, err = io.ReadFull(it.lz4Reader, chunkSlot.buf) if err != nil { return fmt.Errorf("failed to decompress lz4 chunk: %w", err) } default: return fmt.Errorf("unsupported compression %s", parsedChunk.Compression) } - // use the message index to find the messages we want from the chunk - messageIndexSection := it.compressedChunkAndMessageIndex[chunkIndex.ChunkLength:compressedChunkLength] - var recordLen uint64 - offset := 0 - for offset < len(messageIndexSection) { - if op := OpCode(messageIndexSection[offset]); op != OpMessageIndex { - return fmt.Errorf("unexpected token %s in message index section", op) - } - offset++ - recordLen, offset, err = getUint64(messageIndexSection, offset) - if err != nil { - return fmt.Errorf("failed to get message index record length: %w", err) + // produce message indexes for the newly decompressed chunk data. + var maxLogTime uint64 + // if there are no message indexes outstanding, truncate now. + if it.curMessageIndex == len(it.messageIndexes) { + it.curMessageIndex = 0 + it.messageIndexes = it.messageIndexes[:0] + } + sortingRequired := it.curMessageIndex != 0 + startIdx := len(it.messageIndexes) + for offset := uint64(0); offset < bufSize; { + if bufSize < offset+1+8 { + return fmt.Errorf("expected another record in chunk, but left with %d bytes", bufSize-offset) } - messageIndex, err := ParseMessageIndex(messageIndexSection[offset : uint64(offset)+recordLen]) - if err != nil { - return fmt.Errorf("failed to parse message index: %w", err) + opcodeAndLengthBuf := chunkSlot.buf[offset : offset+1+8] + op := OpCode(opcodeAndLengthBuf[0]) + recordLen := binary.LittleEndian.Uint64(opcodeAndLengthBuf[1:]) + recordStart := offset + 1 + 8 + recordEnd, overflow := checkedAdd(recordStart, recordLen) + if overflow { + return fmt.Errorf("record length extends past uint64 range: start: %d, len: %d", recordStart, recordLen) } - offset += int(recordLen) - // skip message indexes for channels we don't need - if _, ok := it.channels[messageIndex.ChannelID]; !ok { - continue + if bufSize < recordEnd { + return fmt.Errorf( + "%s record in chunk has length %d bytes but only %d remaining in chunk", + op, recordLen, bufSize-recordStart) } - // push any message index entries in the requested time range to the heap to read. - for i := range messageIndex.Records { - timestamp := messageIndex.Records[i].Timestamp - if timestamp >= it.start && timestamp < it.end { - if err := it.indexHeap.HeapPush(rangeIndex{ - chunkIndex: chunkIndex, - messageIndexEntry: &messageIndex.Records[i], - buf: chunkData, - }); err != nil { - return err + recordContent := chunkSlot.buf[recordStart:recordEnd] + if op == OpMessage { + msg := Message{} + if err := msg.PopulateFrom(recordContent, false); err != nil { + return fmt.Errorf("could not parse message in chunk: %w", err) + } + if it.channels.Get(msg.ChannelID) != nil { + if msg.LogTime >= it.start && msg.LogTime < it.end { + it.messageIndexes = append(it.messageIndexes, messageIndexWithChunkSlot{ + timestamp: msg.LogTime, + offset: offset, + chunkSlotIndex: chunkSlotIndex, + }) + if msg.LogTime < maxLogTime { + sortingRequired = true + } else { + maxLogTime = msg.LogTime + } + chunkSlot.unreadMessages++ } } } + offset = recordEnd } + unreadMessageIndexes := it.messageIndexes[it.curMessageIndex:] + switch it.order { + case FileOrder: + // message indexes are already in file order, no sorting needed + case LogTimeOrder: + if sortingRequired { + // We stable-sort to ensure that if messages in different chunks have the + // same timestamp, the one from the earlier-loaded chunk is returned first. The offset + // field of the message index is not comparable between indexes of different chunks. + sort.SliceStable(unreadMessageIndexes, func(i, j int) bool { + return unreadMessageIndexes[i].timestamp < unreadMessageIndexes[j].timestamp + }) + } + case ReverseLogTimeOrder: + // assume message indexes will always be mostly-in-order, so reversing the newly-added + // indexes will put them mostly into reverse order, which speeds up sorting. + // If the chunk is in order, no sorting is needed after reversing. + slices.Reverse(it.messageIndexes[startIdx:]) + if sortingRequired { + sort.SliceStable(unreadMessageIndexes, func(i, j int) bool { + return unreadMessageIndexes[i].timestamp > unreadMessageIndexes[j].timestamp + }) + } + } + // if there is more dead space at the front than there is live, remove the dead space by + // copying the live data to the front and truncating. + if len(unreadMessageIndexes) < it.curMessageIndex { + copy(it.messageIndexes[:len(unreadMessageIndexes)], unreadMessageIndexes) + it.messageIndexes = it.messageIndexes[:len(unreadMessageIndexes)] + it.curMessageIndex = 0 + } + return nil } -func readRecord(r io.Reader) (OpCode, []byte, error) { - buf := make([]byte, 9) +func readRecord(r io.Reader, buf []byte) (OpCode, []byte, error) { + if cap(buf) < 9 { + buf = make([]byte, 9) + } else { + buf = buf[:9] + } _, err := io.ReadFull(r, buf) if err != nil { return 0, nil, fmt.Errorf("failed to read record header: %w", err) } opcode := OpCode(buf[0]) recordLen := binary.LittleEndian.Uint64(buf[1:]) - record := make([]byte, recordLen) - _, err = io.ReadFull(r, record) + if uint64(cap(buf)) < recordLen { + buf = make([]byte, recordLen) + } else { + buf = buf[:recordLen] + } + _, err = io.ReadFull(r, buf) if err != nil { return 0, nil, fmt.Errorf("failed to read record: %w", err) } - return opcode, record, nil + return opcode, buf, nil } -func (it *indexedMessageIterator) Next(_ []byte) (*Schema, *Channel, *Message, error) { +// NextInto yields the next message from the iterator, writing the result into the provided Message +// struct. The msg.Data buffer will be reused if it has enough capacity. If `msg` is nil, a new +// Message will be allocated. +func (it *indexedMessageIterator) NextInto(msg *Message) (*Schema, *Channel, *Message, error) { + if msg == nil { + msg = &Message{} + } if !it.hasReadSummarySection { - err := it.parseSummarySection() - if err != nil { + if err := it.parseSummarySection(); err != nil { return nil, nil, nil, err } // take care of the metadata here if it.metadataCallback != nil { for _, idx := range it.metadataIndexes { - _, err = it.rs.Seek(int64(idx.Offset), io.SeekStart) + err := it.seekTo(idx.Offset) if err != nil { return nil, nil, nil, fmt.Errorf("failed to seek to metadata: %w", err) } - opcode, data, err := readRecord(it.rs) + opcode, data, err := readRecord(it.rs, it.recordBuf) + if cap(data) > cap(it.recordBuf) { + it.recordBuf = data + } if err != nil { return nil, nil, nil, fmt.Errorf("failed to read metadata record: %w", err) } @@ -285,29 +422,71 @@ func (it *indexedMessageIterator) Next(_ []byte) (*Schema, *Channel, *Message, e } } } - - for it.indexHeap.Len() > 0 { - ri, err := it.indexHeap.HeapPop() - if err != nil { - return nil, nil, nil, err - } - if ri.messageIndexEntry == nil { - err := it.loadChunk(ri.chunkIndex) - if err != nil { + for { + // if there are no indexed messages to yield, load a chunk + if it.curMessageIndex >= len(it.messageIndexes) { + // if there are no more chunks, iteration ends + if it.curChunkIndex >= len(it.chunkIndexes) { + return nil, nil, nil, io.EOF + } + chunkIndex := it.chunkIndexes[it.curChunkIndex] + if err := it.loadChunk(chunkIndex); err != nil { return nil, nil, nil, err } + it.curChunkIndex++ continue } - chunkOffset := ri.messageIndexEntry.Offset - length := binary.LittleEndian.Uint64(ri.buf[chunkOffset+1:]) - messageData := ri.buf[chunkOffset+1+8 : chunkOffset+1+8+length] - message, err := ParseMessage(messageData) - if err != nil { + // if there are more chunks left, check if the next one should be loaded before yielding another + // message + if it.curChunkIndex < len(it.chunkIndexes) { + chunkIndex := it.chunkIndexes[it.curChunkIndex] + messageIndex := it.messageIndexes[it.curMessageIndex] + if (it.order == LogTimeOrder && chunkIndex.MessageStartTime < messageIndex.timestamp) || + (it.order == ReverseLogTimeOrder && chunkIndex.MessageEndTime > messageIndex.timestamp) { + if err := it.loadChunk(chunkIndex); err != nil { + return nil, nil, nil, err + } + it.curChunkIndex++ + continue + } + } + // yield the next message + messageIndex := it.messageIndexes[it.curMessageIndex] + chunkSlot := &it.chunkSlots[messageIndex.chunkSlotIndex] + messageDataStart, overflow := checkedAdd(messageIndex.offset, 1+8) + if overflow { + return nil, nil, nil, fmt.Errorf("message offset in chunk too close to uint64 max: %d", messageIndex.offset) + } + length := binary.LittleEndian.Uint64(chunkSlot.buf[messageIndex.offset+1:]) + messageDataEnd, overflow := checkedAdd(messageDataStart, length) + if overflow { + return nil, nil, nil, fmt.Errorf("message record length extends past uint64 range: %d", length) + } + messageData := chunkSlot.buf[messageDataStart:messageDataEnd] + if err := msg.PopulateFrom(messageData, true); err != nil { return nil, nil, nil, err } - channel := it.channels[message.ChannelID] - schema := it.schemas[channel.SchemaID] - return schema, channel, message, nil + chunkSlot.unreadMessages-- + it.curMessageIndex++ + channel := it.channels.Get(msg.ChannelID) + if channel == nil { + return nil, nil, nil, fmt.Errorf("message with unrecognized channel ID %d", msg.ChannelID) + } + schema := it.schemas.Get(channel.SchemaID) + if schema == nil && channel.SchemaID != 0 { + return nil, nil, nil, fmt.Errorf("channel %d with unrecognized schema ID %d", msg.ChannelID, channel.SchemaID) + } + return schema, channel, msg, nil } - return nil, nil, nil, io.EOF +} + +func (it *indexedMessageIterator) Next(buf []byte) (*Schema, *Channel, *Message, error) { + msg := &Message{Data: buf} + return it.NextInto(msg) +} + +// returns the sum of two uint64s, with a boolean indicating if the sum overflowed. +func checkedAdd(a, b uint64) (uint64, bool) { + res, carry := bits.Add64(a, b, 0) + return res, carry != 0 } diff --git a/go/mcap/lexer.go b/go/mcap/lexer.go index fe10c665fa..9c245194ce 100644 --- a/go/mcap/lexer.go +++ b/go/mcap/lexer.go @@ -265,7 +265,7 @@ func (l *Lexer) Next(p []byte) (TokenType, []byte, error) { continue } - if recordLen > uint64(len(p)) { + if recordLen > uint64(cap(p)) { p, err = makeSafe(recordLen) if err != nil { return TokenError, nil, fmt.Errorf("failed to allocate %d bytes for %s token: %w", recordLen, opcode, err) diff --git a/go/mcap/parse.go b/go/mcap/parse.go index 992b35eaf7..0619db6648 100644 --- a/go/mcap/parse.go +++ b/go/mcap/parse.go @@ -99,32 +99,44 @@ func ParseChannel(buf []byte) (*Channel, error) { }, nil } -// ParseMessage parses a message record. -func ParseMessage(buf []byte) (*Message, error) { +// PopulateFrom populates the fields of a Message struct from the message data slice. +func (m *Message) PopulateFrom(buf []byte, copyData bool) error { channelID, offset, err := getUint16(buf, 0) if err != nil { - return nil, fmt.Errorf("failed to read channel ID: %w", err) + return fmt.Errorf("failed to read channel ID: %w", err) } sequence, offset, err := getUint32(buf, offset) if err != nil { - return nil, fmt.Errorf("failed to read sequence: %w", err) + return fmt.Errorf("failed to read sequence: %w", err) } logTime, offset, err := getUint64(buf, offset) if err != nil { - return nil, fmt.Errorf("failed to read record time: %w", err) + return fmt.Errorf("failed to read record time: %w", err) } publishTime, offset, err := getUint64(buf, offset) if err != nil { - return nil, fmt.Errorf("failed to read publish time: %w", err) + return fmt.Errorf("failed to read publish time: %w", err) } data := buf[offset:] - return &Message{ - ChannelID: channelID, - Sequence: sequence, - LogTime: logTime, - PublishTime: publishTime, - Data: data, - }, nil + m.ChannelID = channelID + m.Sequence = sequence + m.LogTime = logTime + m.PublishTime = publishTime + if copyData { + m.Data = append(m.Data[:0], data...) + } else { + m.Data = data + } + return nil +} + +// ParseMessage parses a message record. +func ParseMessage(buf []byte) (*Message, error) { + msg := &Message{} + if err := msg.PopulateFrom(buf, false); err != nil { + return nil, err + } + return msg, nil } // ParseChunk parses a chunk record. diff --git a/go/mcap/range_index_heap.go b/go/mcap/range_index_heap.go deleted file mode 100644 index 284ecaf084..0000000000 --- a/go/mcap/range_index_heap.go +++ /dev/null @@ -1,107 +0,0 @@ -package mcap - -import ( - "container/heap" - "fmt" -) - -// rangeIndex refers to either a chunk (via the ChunkIndex, with other fields nil) -// or to a message in a chunk, in which case all fields are set. -type rangeIndex struct { - chunkIndex *ChunkIndex - messageIndexEntry *MessageIndexEntry - buf []uint8 // if messageIndexEntry is not nil, `buf` should point to the underlying chunk. -} - -// heap of rangeIndex entries, where the entries are sorted by their log time. -type rangeIndexHeap struct { - indices []rangeIndex - order ReadOrder - lastErr error -} - -// key returns the comparison key used for elements in this heap. -func (h rangeIndexHeap) timestamp(i int) uint64 { - ri := h.indices[i] - if ri.messageIndexEntry == nil { - if h.order == ReverseLogTimeOrder { - return ri.chunkIndex.MessageEndTime - } - return ri.chunkIndex.MessageStartTime - } - return ri.messageIndexEntry.Timestamp -} - -func (h *rangeIndexHeap) filePositionLess(i, j int) bool { - a := h.indices[i] - b := h.indices[j] - - // if comparing two chunks, whichever chunk comes earlier wins. - // if comparing messages in two different chunks, the message in the earlier chunk wins. - // if comparing a message in one chunk to another chunk, whichever chunk is earlier wins. - if a.chunkIndex.ChunkStartOffset != b.chunkIndex.ChunkStartOffset { - return a.chunkIndex.ChunkStartOffset < b.chunkIndex.ChunkStartOffset - } - // If comparing two messages in the same chunk, the earlier message in the chunk wins. - if a.messageIndexEntry != nil && b.messageIndexEntry != nil { - return a.messageIndexEntry.Offset < b.messageIndexEntry.Offset - } - // If we came this far, we're comparing a message in a chunk against the same chunk! - // this is a problem, because when the chunk reaches the top of the heap it will be expanded, - // and the same message will be pushed into the heap twice. - h.lastErr = fmt.Errorf("detected duplicate data: a: %v, b: %v", a, b) - return false -} - -// Required for sort.Interface. -func (h rangeIndexHeap) Len() int { return len(h.indices) } -func (h rangeIndexHeap) Swap(i, j int) { h.indices[i], h.indices[j] = h.indices[j], h.indices[i] } - -// Push is required by `heap.Interface`. Note that this is not the same as `heap.Push`! -// expected behavior by `heap` is: "add x as element len()". -func (h *rangeIndexHeap) Push(x interface{}) { - h.indices = append(h.indices, x.(rangeIndex)) -} - -// Pop is required by `heap.Interface`. Note that this is not the same as `heap.Pop`! -// expected behavior by `heap` is: "remove and return element Len() - 1". -func (h *rangeIndexHeap) Pop() interface{} { - old := h.indices - n := len(old) - x := old[n-1] - h.indices = old[0 : n-1] - return x -} - -// Less is required by `heap.Interface`. -func (h *rangeIndexHeap) Less(i, j int) bool { - switch h.order { - case FileOrder: - return h.filePositionLess(i, j) - case LogTimeOrder: - if h.timestamp(i) == h.timestamp(j) { - return h.filePositionLess(i, j) - } - return h.timestamp(i) < h.timestamp(j) - case ReverseLogTimeOrder: - if h.timestamp(i) == h.timestamp(j) { - return h.filePositionLess(j, i) - } - return h.timestamp(i) > h.timestamp(j) - } - h.lastErr = fmt.Errorf("ReadOrder case not handled: %v", h.order) - return false -} - -func (h *rangeIndexHeap) HeapPush(ri rangeIndex) error { - heap.Push(h, ri) - return h.lastErr -} - -func (h *rangeIndexHeap) HeapPop() (*rangeIndex, error) { - result := heap.Pop(h).(rangeIndex) - if h.lastErr != nil { - return nil, h.lastErr - } - return &result, nil -} diff --git a/go/mcap/range_index_heap_test.go b/go/mcap/range_index_heap_test.go deleted file mode 100644 index ef8044f059..0000000000 --- a/go/mcap/range_index_heap_test.go +++ /dev/null @@ -1,89 +0,0 @@ -package mcap - -import ( - "reflect" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -var rangeIndexHeapTestItems = []rangeIndex{ - { - chunkIndex: &ChunkIndex{ - ChunkStartOffset: 1, - MessageStartTime: 100, - MessageEndTime: 300, - }, - }, - { - chunkIndex: &ChunkIndex{ - ChunkStartOffset: 2, - MessageStartTime: 200, - MessageEndTime: 400, - }, - messageIndexEntry: &MessageIndexEntry{Offset: 3, Timestamp: 200}, - }, - { - chunkIndex: &ChunkIndex{ - ChunkStartOffset: 2, - MessageStartTime: 200, - MessageEndTime: 400, - }, - messageIndexEntry: &MessageIndexEntry{Offset: 2, Timestamp: 250}, - }, - { - chunkIndex: &ChunkIndex{ - ChunkStartOffset: 3, - MessageStartTime: 300, - MessageEndTime: 400, - }, - }, -} - -func TestMessageOrdering(t *testing.T) { - cases := []struct { - assertion string - order ReadOrder - expectedIndexOrder []int - }{ - { - assertion: "read time order forwards", - order: LogTimeOrder, - expectedIndexOrder: []int{0, 1, 2, 3}, - }, - { - assertion: "read time order backwards", - order: ReverseLogTimeOrder, - expectedIndexOrder: []int{3, 0, 2, 1}, - }, - { - assertion: "read file order", - order: FileOrder, - expectedIndexOrder: []int{0, 2, 1, 3}, - }, - } - for _, c := range cases { - t.Run(c.assertion, func(t *testing.T) { - h := &rangeIndexHeap{order: c.order} - for _, item := range rangeIndexHeapTestItems { - require.NoError(t, h.HeapPush(item)) - } - assert.Len(t, rangeIndexHeapTestItems, h.Len()) - i := 0 - for h.Len() > 0 { - poppedItem, err := h.HeapPop() - require.NoError(t, err) - found := false - for index, item := range rangeIndexHeapTestItems { - if reflect.DeepEqual(item, *poppedItem) { - assert.Equal(t, c.expectedIndexOrder[i], index) - found = true - } - } - assert.True(t, found) - i++ - } - }) - } -} diff --git a/go/mcap/reader.go b/go/mcap/reader.go index c6566ef385..d013c85b4e 100644 --- a/go/mcap/reader.go +++ b/go/mcap/reader.go @@ -65,7 +65,14 @@ type Reader struct { } type MessageIterator interface { + // Deprecated: use NextInto to avoid repeatedly heap-allocating Message structs while iterating. Next([]byte) (*Schema, *Channel, *Message, error) + // NextInto returns the next message from the MCAP. If the returned error is io.EOF, + // this signals the end of the MCAP. + // If `msg` is not nil, NextInto will populate it with new data and + // return the same pointer, re-using or resizing `msg.Data` as needed. + // If `msg` is nil, NextInto will allocate and return a new Message on the heap. + NextInto(msg *Message) (*Schema, *Channel, *Message, error) } func Range(it MessageIterator, f func(*Schema, *Channel, *Message) error) error { @@ -93,8 +100,6 @@ func (r *Reader) unindexedIterator(opts *ReadOptions) *unindexedMessageIterator r.l.emitChunks = false return &unindexedMessageIterator{ lexer: r.l, - channels: make(map[uint16]*Channel), - schemas: make(map[uint16]*Schema), topics: topicMap, start: opts.StartNanos, end: opts.EndNanos, @@ -114,12 +119,10 @@ func (r *Reader) indexedMessageIterator( return &indexedMessageIterator{ lexer: r.l, rs: r.rs, - channels: make(map[uint16]*Channel), - schemas: make(map[uint16]*Schema), topics: topicMap, start: opts.StartNanos, end: opts.EndNanos, - indexHeap: rangeIndexHeap{order: opts.Order}, + order: opts.Order, metadataCallback: opts.MetadataCallback, } } @@ -192,11 +195,11 @@ func (r *Reader) Info() (*Info, error) { } info := &Info{ Statistics: it.statistics, - Channels: it.channels, + Channels: it.channels.ToMap(), ChunkIndexes: it.chunkIndexes, AttachmentIndexes: it.attachmentIndexes, MetadataIndexes: it.metadataIndexes, - Schemas: it.schemas, + Schemas: it.schemas.ToMap(), Footer: it.footer, Header: r.header, } diff --git a/go/mcap/reader_test.go b/go/mcap/reader_test.go index 45d540fa50..e1322f16a7 100644 --- a/go/mcap/reader_test.go +++ b/go/mcap/reader_test.go @@ -2,12 +2,15 @@ package mcap import ( "bytes" + "crypto/rand" + "encoding/binary" "errors" "fmt" "io" "math" "os" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -72,6 +75,7 @@ func TestIndexedReaderBreaksTiesOnChunkOffset(t *testing.T) { if errors.Is(err, io.EOF) { break } + require.NoError(t, err) assert.Equal(t, expectedTopics[i], channel.Topic) } } @@ -754,29 +758,15 @@ func TestReadingMessageOrderWithOverlappingChunks(t *testing.T) { }) require.NoError(t, err) require.NoError(t, writer.WriteHeader(&Header{})) - require.NoError(t, writer.WriteSchema(&Schema{ - ID: 1, - Name: "", - Encoding: "", - Data: []byte{}, - })) - require.NoError(t, writer.WriteChannel(&Channel{ - ID: 0, - Topic: "", - SchemaID: 0, - MessageEncoding: "", - Metadata: map[string]string{ - "": "", - }, - })) + require.NoError(t, writer.WriteSchema(&Schema{ID: 1})) + require.NoError(t, writer.WriteChannel(&Channel{ID: 0})) msgCount := 0 addMsg := func(timestamp uint64) { require.NoError(t, writer.WriteMessage(&Message{ ChannelID: 0, - Sequence: 0, LogTime: timestamp, PublishTime: timestamp, - Data: []byte{'h', 'e', 'l', 'l', 'o'}, + Data: []byte("hello"), })) msgCount++ } @@ -831,7 +821,7 @@ func TestReadingMessageOrderWithOverlappingChunks(t *testing.T) { // check that timestamps monotonically decrease from the returned iterator for i := 0; i < msgCount; i++ { - _, _, msg, err := reverseIt.Next(nil) + _, _, msg, err := reverseIt.NextInto(nil) require.NoError(t, err) if i != 0 { assert.Less(t, msg.LogTime, lastSeenTimestamp) @@ -843,6 +833,89 @@ func TestReadingMessageOrderWithOverlappingChunks(t *testing.T) { require.ErrorIs(t, io.EOF, err) } +func TestOrderStableWithEquivalentTimestamps(t *testing.T) { + buf := &bytes.Buffer{} + // write an MCAP with two chunks, where in each chunk all messages have ascending timestamps, + // but their timestamp ranges overlap. + writer, err := NewWriter(buf, &WriterOptions{ + Chunked: true, + ChunkSize: 200, + Compression: CompressionLZ4, + }) + require.NoError(t, err) + require.NoError(t, writer.WriteHeader(&Header{})) + require.NoError(t, writer.WriteSchema(&Schema{ID: 1})) + require.NoError(t, writer.WriteChannel(&Channel{ID: 0, Topic: "a"})) + require.NoError(t, writer.WriteChannel(&Channel{ID: 1, Topic: "b"})) + var msgCount uint64 + msgData := make([]byte, 8) + for len(writer.ChunkIndexes) < 3 { + binary.LittleEndian.PutUint64(msgData, msgCount) + require.NoError(t, writer.WriteMessage(&Message{ + ChannelID: uint16(msgCount % 2), + LogTime: msgCount % 2, + PublishTime: msgCount % 2, + Data: msgData, + })) + msgCount++ + } + require.NoError(t, writer.Close()) + + reader, err := NewReader(bytes.NewReader(buf.Bytes())) + require.NoError(t, err) + + it, err := reader.Messages( + UsingIndex(true), + InOrder(LogTimeOrder), + ) + require.NoError(t, err) + var lastMessageNumber uint64 + var numRead uint64 + for { + _, _, msg, err := it.NextInto(nil) + if errors.Is(err, io.EOF) { + break + } + require.NoError(t, err) + if msg.ChannelID != 0 { + continue + } + assert.Equal(t, uint64(0), msg.LogTime) + msgNumber := binary.LittleEndian.Uint64(msg.Data) + if numRead != 0 { + assert.Greater(t, msgNumber, lastMessageNumber) + } + lastMessageNumber = msgNumber + numRead++ + } + assert.Equal(t, msgCount/2, numRead) + + reverseIt, err := reader.Messages( + UsingIndex(true), + InOrder(ReverseLogTimeOrder), + ) + require.NoError(t, err) + lastMessageNumber = 0 + numRead = 0 + for { + _, _, msg, err := reverseIt.NextInto(nil) + if errors.Is(err, io.EOF) { + break + } + require.NoError(t, err) + if msg.ChannelID != 0 { + continue + } + assert.Equal(t, uint64(0), msg.LogTime) + msgNumber := binary.LittleEndian.Uint64(msg.Data) + if numRead != 0 { + assert.Less(t, msgNumber, lastMessageNumber) + } + lastMessageNumber = msgNumber + numRead++ + } +} + func TestReadingBigTimestamps(t *testing.T) { buf := &bytes.Buffer{} w, err := NewWriter(buf, &WriterOptions{ @@ -881,3 +954,199 @@ func TestReadingBigTimestamps(t *testing.T) { assert.Equal(t, 1, count) }) } + +func BenchmarkReader(b *testing.B) { + inputParameters := []struct { + name string + outOfOrderWithinChunks bool + chunksOverlap bool + }{ + { + name: "msgs_in_order", + }, + { + name: "jitter_in_chunk", + outOfOrderWithinChunks: true, + }, + { + name: "chunks_overlap", + outOfOrderWithinChunks: true, + chunksOverlap: true, + }, + } + for _, inputCfg := range inputParameters { + b.Run(inputCfg.name, func(b *testing.B) { + b.StopTimer() + buf := &bytes.Buffer{} + writer, err := NewWriter(buf, &WriterOptions{ + Chunked: true, + Compression: CompressionZSTD, + }) + require.NoError(b, err) + messageCount := uint64(4000000) + require.NoError(b, writer.WriteHeader(&Header{})) + require.NoError(b, writer.WriteSchema(&Schema{ID: 1, Name: "empty", Encoding: "none"})) + channelCount := 200 + for i := 0; i < channelCount; i++ { + require.NoError(b, writer.WriteChannel(&Channel{ + ID: uint16(i), + SchemaID: 1, + Topic: "/chat", + MessageEncoding: "none", + })) + } + contentBuf := make([]byte, 32) + lastChunkMax := uint64(0) + thisChunkMax := uint64(0) + for i := uint64(0); i < messageCount; i++ { + channelID := uint16(i % uint64(channelCount)) + _, err := rand.Read(contentBuf) + require.NoError(b, err) + timestamp := i + if inputCfg.outOfOrderWithinChunks { + timestamp += (2 * (10 - (i % 10))) + if !inputCfg.chunksOverlap { + if timestamp < lastChunkMax { + timestamp = lastChunkMax + } + } + } + if timestamp > thisChunkMax { + thisChunkMax = timestamp + } + chunkCount := len(writer.ChunkIndexes) + require.NoError(b, writer.WriteMessage(&Message{ + ChannelID: channelID, + Sequence: uint32(i), + LogTime: timestamp, + PublishTime: timestamp, + Data: contentBuf, + })) + if len(writer.ChunkIndexes) != chunkCount { + lastChunkMax = thisChunkMax + } + } + require.NoError(b, writer.Close()) + b.StartTimer() + readerConfigs := []struct { + name string + order ReadOrder + useIndex bool + }{ + { + name: "no_index", + order: FileOrder, + useIndex: false, + }, + { + name: "file_order", + order: FileOrder, + useIndex: true, + }, + { + name: "time_order", + order: LogTimeOrder, + useIndex: true, + }, + { + name: "rev_order", + order: ReverseLogTimeOrder, + useIndex: true, + }, + } + for _, cfg := range readerConfigs { + b.Run(cfg.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + s := time.Now() + reader, err := NewReader(bytes.NewReader(buf.Bytes())) + require.NoError(b, err) + it, err := reader.Messages(UsingIndex(cfg.useIndex), InOrder(cfg.order)) + require.NoError(b, err) + readMessages := uint64(0) + msgBytes := uint64(0) + msg := Message{} + var lastErr error + orderErrors := 0 + var lastSeenTimestamp uint64 + for { + _, _, msg, err := it.NextInto(&msg) + if err != nil { + lastErr = err + break + } + if cfg.order == LogTimeOrder && msg.LogTime < lastSeenTimestamp { + orderErrors++ + } + if cfg.order == ReverseLogTimeOrder && msg.LogTime > lastSeenTimestamp && readMessages != 0 { + orderErrors++ + } + lastSeenTimestamp = msg.LogTime + readMessages++ + msgBytes += uint64(len(msg.Data)) + } + require.ErrorIs(b, lastErr, io.EOF) + require.Equal(b, messageCount, readMessages) + require.Equal(b, 0, orderErrors) + + b.ReportMetric(float64(messageCount)/time.Since(s).Seconds(), "msg/s") + b.ReportMetric(float64(msgBytes)/(time.Since(s).Seconds()*1024*1024), "MB/s") + } + }) + } + b.Run("bare_lexer", func(b *testing.B) { + for i := 0; i < b.N; i++ { + s := time.Now() + lexer, err := NewLexer(bytes.NewReader(buf.Bytes())) + require.NoError(b, err) + readMessages := uint64(0) + msgBytes := uint64(0) + var p []byte + var lastErr error + for { + token, record, err := lexer.Next(p) + if err != nil { + lastErr = err + break + } + if cap(record) > cap(p) { + p = record + } + if token == TokenMessage { + readMessages++ + msgBytes += uint64(len(record) - 22) + } + } + require.ErrorIs(b, lastErr, io.EOF) + require.Equal(b, messageCount, readMessages) + b.ReportMetric(float64(messageCount)/time.Since(s).Seconds(), "msg/s") + b.ReportMetric(float64(msgBytes)/(time.Since(s).Seconds()*1024*1024), "MB/s") + } + }) + }) + } +} + +func TestFooterOffsetErrorDetected(t *testing.T) { + buf := &bytes.Buffer{} + writer, err := NewWriter(buf, &WriterOptions{ + Chunked: true, + ChunkSize: 1024, + Compression: "", + }) + require.NoError(t, err) + require.NoError(t, writer.WriteHeader(&Header{})) + require.NoError(t, writer.WriteChannel(&Channel{ID: 1})) + require.NoError(t, writer.WriteMessage(&Message{ChannelID: 1})) + require.NoError(t, writer.Close()) + + // break the footer summary offset field. This is 8 + 8 + 4 + 8 bytes from end of file. + mcapBytes := buf.Bytes() + end := len(mcapBytes) + binary.LittleEndian.PutUint64(mcapBytes[end-8-8-4-8:], 999999999) + + reader, err := NewReader(bytes.NewReader(mcapBytes)) + require.NoError(t, err) + + _, err = reader.Info() + require.ErrorIs(t, err, ErrBadOffset) +} diff --git a/go/mcap/slicemap.go b/go/mcap/slicemap.go new file mode 100644 index 0000000000..d8eb99b706 --- /dev/null +++ b/go/mcap/slicemap.go @@ -0,0 +1,44 @@ +package mcap + +import "math" + +// slicemap is an arraymap implementation with uint16 keys. This is useful for associating a set of +// Schema or Channel records with their IDs. +type slicemap[T any] struct { + items []*T +} + +func (s *slicemap[T]) Set(key uint16, val *T) { + if int(key) >= len(s.items) { + // extend the len() of s.items up to key + 1 + toAdd := int(key) + 1 - len(s.items) + // let append decide how much to expand the capacity of the slice + s.items = append(s.items, make([]*T, toAdd)...) + } + s.items[key] = val +} + +func (s *slicemap[T]) Get(key uint16) *T { + if int(key) >= len(s.items) { + return nil + } + return s.items[key] +} + +func (s *slicemap[T]) Slice() []*T { + return s.items +} + +func (s *slicemap[T]) ToMap() map[uint16]*T { + out := make(map[uint16]*T) + for key, val := range s.items { + if key > math.MaxUint16 { + break + } + if val == nil { + continue + } + out[uint16(key)] = val + } + return out +} diff --git a/go/mcap/slicemap_test.go b/go/mcap/slicemap_test.go new file mode 100644 index 0000000000..e599e499f3 --- /dev/null +++ b/go/mcap/slicemap_test.go @@ -0,0 +1,33 @@ +package mcap + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestSlicemapLength(t *testing.T) { + var s slicemap[string] + val := "hello" + assert.Empty(t, s.Slice()) + + // setting the first value expands the slice enough to fit it + s.Set(0, &val) + assert.Equal(t, &val, s.Get(0)) + assert.Len(t, s.Slice(), 1) + + // setting another higher expands the slice enough to fit it + s.Set(5, &val) + assert.Equal(t, &val, s.Get(5)) + assert.Len(t, s.Slice(), 6) + + // setting a value <= len does not expand the slice + s.Set(1, &val) + assert.Equal(t, &val, s.Get(1)) + assert.Len(t, s.Slice(), 6) + + // getting a value > len does not expand the slice + var nilString *string + assert.Equal(t, nilString, s.Get(10)) + assert.Len(t, s.Slice(), 6) +} diff --git a/go/mcap/unindexed_message_iterator.go b/go/mcap/unindexed_message_iterator.go index bce77cc935..5796c8c359 100644 --- a/go/mcap/unindexed_message_iterator.go +++ b/go/mcap/unindexed_message_iterator.go @@ -6,56 +6,67 @@ import ( type unindexedMessageIterator struct { lexer *Lexer - schemas map[uint16]*Schema - channels map[uint16]*Channel + schemas slicemap[Schema] + channels slicemap[Channel] topics map[string]bool start uint64 end uint64 + recordBuf []byte + metadataCallback func(*Metadata) error } func (it *unindexedMessageIterator) Next(p []byte) (*Schema, *Channel, *Message, error) { + msg := &Message{Data: p} + return it.NextInto(msg) +} + +func (it *unindexedMessageIterator) NextInto(msg *Message) (*Schema, *Channel, *Message, error) { + if msg == nil { + msg = &Message{} + } for { - tokenType, record, err := it.lexer.Next(p) + tokenType, record, err := it.lexer.Next(it.recordBuf) if err != nil { return nil, nil, nil, err } + if cap(record) > cap(it.recordBuf) { + it.recordBuf = record + } switch tokenType { case TokenSchema: schema, err := ParseSchema(record) if err != nil { return nil, nil, nil, fmt.Errorf("failed to parse schema: %w", err) } - if _, ok := it.schemas[schema.ID]; !ok { - it.schemas[schema.ID] = schema - } + it.schemas.Set(schema.ID, schema) case TokenChannel: channelInfo, err := ParseChannel(record) if err != nil { return nil, nil, nil, fmt.Errorf("failed to parse channel info: %w", err) } - if _, ok := it.channels[channelInfo.ID]; !ok { - if len(it.topics) == 0 || it.topics[channelInfo.Topic] { - it.channels[channelInfo.ID] = channelInfo - } + if len(it.topics) == 0 || it.topics[channelInfo.Topic] { + it.channels.Set(channelInfo.ID, channelInfo) } case TokenMessage: - message, err := ParseMessage(record) - if err != nil { + if err := msg.PopulateFrom(record, true); err != nil { return nil, nil, nil, err } - if _, ok := it.channels[message.ChannelID]; !ok { + channel := it.channels.Get(msg.ChannelID) + if channel == nil { // skip messages on channels we don't know about. Note that if // an unindexed reader encounters a message it would be // interested in, but has not yet encountered the corresponding // channel ID, it has no option but to skip. continue } - if message.LogTime >= it.start && message.LogTime < it.end { - channel := it.channels[message.ChannelID] - schema := it.schemas[channel.SchemaID] - return schema, channel, message, nil + if msg.LogTime >= it.start && msg.LogTime < it.end { + schema := it.schemas.Get(channel.SchemaID) + if schema == nil && channel.SchemaID != 0 { + return nil, nil, nil, fmt.Errorf("channel %d with unrecognized schema ID %d", msg.ChannelID, channel.SchemaID) + } + return schema, channel, msg, nil } case TokenMetadata: if it.metadataCallback != nil { diff --git a/go/mcap/version.go b/go/mcap/version.go index 04a419096e..21e0c3e3ea 100644 --- a/go/mcap/version.go +++ b/go/mcap/version.go @@ -1,4 +1,4 @@ package mcap // Version of the MCAP library. -var Version = "v1.2.0" +var Version = "v1.6.0" diff --git a/go/mcap/writer.go b/go/mcap/writer.go index 681d73630d..6a0e71d5a4 100644 --- a/go/mcap/writer.go +++ b/go/mcap/writer.go @@ -38,7 +38,6 @@ type Writer struct { w *writeSizer buf []byte msg []byte - chunk []byte uncompressed *bytes.Buffer compressed *bytes.Buffer compressedWriter *countingCRCWriter @@ -434,7 +433,9 @@ func (w *Writer) flushActiveChunk() error { crc := w.compressedWriter.CRC() compressedlen := w.compressed.Len() uncompressedlen := w.compressedWriter.Size() - msglen := 8 + 8 + 8 + 4 + 4 + len(w.opts.Compression) + 8 + compressedlen + // the "top fields" are all fields of the chunk record except for the compressed records. + topFieldsLen := 8 + 8 + 8 + 4 + 4 + len(w.opts.Compression) + 8 + msglen := topFieldsLen + compressedlen chunkStartOffset := w.w.Size() var start, end uint64 if w.currentChunkMessageCount != 0 { @@ -445,24 +446,25 @@ func (w *Writer) flushActiveChunk() error { // when writing a chunk, we don't go through writerecord to avoid needing to // materialize the compressed data again. Instead, write the leading bytes // then copy from the compressed data buffer. - recordlen := 1 + 8 + msglen - if len(w.chunk) < recordlen { - w.chunk = make([]byte, recordlen*2) - } - offset, err := putByte(w.chunk, byte(OpChunk)) + recordHeaderLen := 1 + 8 + topFieldsLen + w.ensureSized(recordHeaderLen) + offset, err := putByte(w.msg, byte(OpChunk)) if err != nil { return err } - offset += putUint64(w.chunk[offset:], uint64(msglen)) - offset += putUint64(w.chunk[offset:], start) - offset += putUint64(w.chunk[offset:], end) - offset += putUint64(w.chunk[offset:], uint64(uncompressedlen)) - offset += putUint32(w.chunk[offset:], crc) - offset += putPrefixedString(w.chunk[offset:], string(w.opts.Compression)) - offset += putUint64(w.chunk[offset:], uint64(w.compressed.Len())) - offset += copy(w.chunk[offset:recordlen], w.compressed.Bytes()) - _, err = w.w.Write(w.chunk[:offset]) + offset += putUint64(w.msg[offset:], uint64(msglen)) + offset += putUint64(w.msg[offset:], start) + offset += putUint64(w.msg[offset:], end) + offset += putUint64(w.msg[offset:], uint64(uncompressedlen)) + offset += putUint32(w.msg[offset:], crc) + offset += putPrefixedString(w.msg[offset:], string(w.opts.Compression)) + offset += putUint64(w.msg[offset:], uint64(w.compressed.Len())) + _, err = w.w.Write(w.msg[:offset]) + if err != nil { + return err + } + _, err = w.w.Write(w.compressed.Bytes()) if err != nil { return err } diff --git a/go/mcap/writer_test.go b/go/mcap/writer_test.go index 59e5e9303b..f562271159 100644 --- a/go/mcap/writer_test.go +++ b/go/mcap/writer_test.go @@ -741,3 +741,95 @@ func TestBYOCompressor(t *testing.T) { assertReadable(t, bytes.NewReader(buf.Bytes())) assert.Positive(t, blockCount) } + +func BenchmarkManyWriterAllocs(b *testing.B) { + cases := []struct { + assertion string + chunkSize int + messageCount int + channelCount int + }{ + { + "big chunks many messages", + 8 * 1024 * 1024, + 2e6, + 100, + }, + { + "small chunks many messages", + 8 * 1024, + 2e6, + 100, + }, + { + "many channels", + 4 * 1024 * 1024, + 2e6, + 55000, + }, + } + + stringData := "hello, world!" + messageData := []byte("hello, world") + schema := Schema{ + Name: stringData, + Encoding: "ros1msg", + Data: messageData, + } + channel := Channel{ + Topic: stringData, + MessageEncoding: "msg", + Metadata: map[string]string{ + "": "", + }, + } + message := Message{ + Sequence: 0, + Data: messageData, + } + writers := make([]*Writer, 100) + for _, c := range cases { + b.ResetTimer() + b.Run(c.assertion, func(b *testing.B) { + for n := 0; n < b.N; n++ { + t0 := time.Now() + for i := 0; i < len(writers); i++ { + writer, err := NewWriter(io.Discard, &WriterOptions{ + ChunkSize: int64(c.chunkSize), + Chunked: true, + }) + require.NoError(b, err) + require.NoError(b, writer.WriteHeader(&Header{ + Profile: "ros1", + Library: "foo", + })) + for j := 0; j < c.channelCount; j++ { + schema.ID = uint16(j + 1) + require.NoError(b, writer.WriteSchema(&schema)) + channel.SchemaID = uint16(j + 1) + channel.ID = uint16(j) + require.NoError(b, writer.WriteChannel(&channel)) + } + writers[i] = writer + } + channelID := 0 + messageCount := 0 + for messageCount < c.messageCount { + writerIdx := messageCount % len(writers) + message.ChannelID = uint16(channelID) + message.LogTime = uint64(messageCount) + message.PublishTime = uint64(messageCount) + require.NoError(b, writers[writerIdx].WriteMessage(&message)) + messageCount++ + channelID++ + channelID %= c.channelCount + } + for _, writer := range writers { + require.NoError(b, writer.Close()) + } + elapsed := time.Since(t0) + b.ReportMetric(float64(c.messageCount)/elapsed.Seconds(), "messages/sec") + } + }) + } +} diff --git a/go/ros/go.mod b/go/ros/go.mod index 342b997784..d259c05b10 100644 --- a/go/ros/go.mod +++ b/go/ros/go.mod @@ -3,15 +3,18 @@ module github.com/foxglove/mcap/go/ros go 1.18 require ( + github.com/foxglove/go-rosbag v0.0.6 github.com/foxglove/mcap/go/mcap v0.4.0 - github.com/mattn/go-sqlite3 v1.14.11 - github.com/pierrec/lz4/v4 v4.1.17 - github.com/stretchr/testify v1.8.1 + github.com/mattn/go-sqlite3 v1.14.14 + github.com/pierrec/lz4/v4 v4.1.21 + github.com/stretchr/testify v1.9.0 ) require ( github.com/davecgh/go-spew v1.1.1 // indirect - github.com/klauspost/compress v1.15.15 // indirect + github.com/klauspost/compress v1.16.7 // indirect + github.com/kr/pretty v0.3.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go/ros/go.sum b/go/ros/go.sum index 8e2341bcef..ed6e789d23 100644 --- a/go/ros/go.sum +++ b/go/ros/go.sum @@ -1,37 +1,18 @@ -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/foxglove/mcap/go/mcap v0.0.0-20220316142927-cc81709134cd h1:IG8HSe6kkuB4TyoRhp1XYTrjYvy0iUGJXAzfhkJ5By8= -github.com/foxglove/mcap/go/mcap v0.0.0-20220316142927-cc81709134cd/go.mod h1:gQrB8PzccHW69xedSZ0uVDQVgDd3h1qX+otbS6fjSkE= +github.com/foxglove/go-rosbag v0.0.6 h1:LcWr1LqdS1NxWO4+mbPfo7d1jpL3gybqRmX1abD8eAw= github.com/foxglove/mcap/go/mcap v0.4.0 h1:jsDZZ6qmMKa174EE8Tw0hxeMUdgjz8emTlN8+6FEnXE= github.com/foxglove/mcap/go/mcap v0.4.0/go.mod h1:3UsmtxZGHWURgxEgQh3t0cGfyPyLoCGsa/gtS/Y6UPM= -github.com/klauspost/compress v1.14.1 h1:hLQYb23E8/fO+1u53d02A97a8UnsddcvYzq4ERRU4ds= -github.com/klauspost/compress v1.14.1/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= -github.com/klauspost/compress v1.15.12 h1:YClS/PImqYbn+UILDnqxQCZ3RehC9N318SU3kElDUEM= -github.com/klauspost/compress v1.15.12/go.mod h1:QPwzmACJjUTFsnSHH934V6woptycfrDDJnH7hvFVbGM= -github.com/klauspost/compress v1.15.15 h1:EF27CXIuDsYJ6mmvtBRlEuB2UVOqHG1tAXgZ7yIO+lw= -github.com/klauspost/compress v1.15.15/go.mod h1:ZcK2JAFqKOpnBlxcLsJzYfrS9X1akm9fHZNnD9+Vo/4= -github.com/mattn/go-sqlite3 v1.14.11 h1:gt+cp9c0XGqe9S/wAHTL3n/7MqY+siPWgWJgqdsFrzQ= -github.com/mattn/go-sqlite3 v1.14.11/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= -github.com/pierrec/lz4/v4 v4.1.12 h1:44l88ehTZAUGW4VlO1QC4zkilL99M6Y9MXNwEs0uzP8= -github.com/pierrec/lz4/v4 v4.1.12/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= -github.com/pierrec/lz4/v4 v4.1.17 h1:kV4Ip+/hUBC+8T6+2EgburRtkE9ef4nbY3f4dFhGjMc= -github.com/pierrec/lz4/v4 v4.1.17/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= +github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/mattn/go-sqlite3 v1.14.14 h1:qZgc/Rwetq+MtyE18WhzjokPD93dNqLGNT3QJuLvBGw= +github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= -github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= -github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= -github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +github.com/rogpeppe/go-internal v1.6.1 h1:/FiVV8dS/e+YqF2JvO3yXRFbBLTIuSDkuC7aBOAvL+k= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/go/ros/ros2db3_to_mcap_test.go b/go/ros/ros2db3_to_mcap_test.go index 8dd02c3a06..603e1a156e 100644 --- a/go/ros/ros2db3_to_mcap_test.go +++ b/go/ros/ros2db3_to_mcap_test.go @@ -67,7 +67,7 @@ func TestDB3MCAPConversion(t *testing.T) { it, err := reader.Messages(mcap.WithTopics([]string{c.expectedTopic})) require.NoError(t, err) for { - schema, channel, message, err := it.Next(nil) + schema, channel, message, err := it.NextInto(nil) if err != nil { if errors.Is(err, io.EOF) { break diff --git a/python/mcap-protobuf-support/README.md b/python/mcap-protobuf-support/README.md index 9d89c0fa8b..9fd4a1a7c6 100644 --- a/python/mcap-protobuf-support/README.md +++ b/python/mcap-protobuf-support/README.md @@ -35,4 +35,4 @@ pipenv run python point_cloud_example.py output.mcap ## Stay in touch -Join our [Slack channel](https://foxglove.dev/slack) to ask questions, share feedback, and stay up to date on what our team is working on. +Join our [Discord community](https://foxglove.dev/chat) to ask questions, share feedback, and stay up to date on what our team is working on. diff --git a/python/mcap-ros1-support/README.md b/python/mcap-ros1-support/README.md index 48088f23e5..a7202c5b01 100644 --- a/python/mcap-ros1-support/README.md +++ b/python/mcap-ros1-support/README.md @@ -36,5 +36,5 @@ ros_writer.finish() ## Stay in touch -Join our [Slack channel](https://foxglove.dev/slack) to ask questions, +Join our [Discord community](https://foxglove.dev/chat) to ask questions, share feedback, and stay up to date on what our team is working on. diff --git a/python/mcap-ros2-support/README.md b/python/mcap-ros2-support/README.md index 28d11a2d25..2184acd90a 100644 --- a/python/mcap-ros2-support/README.md +++ b/python/mcap-ros2-support/README.md @@ -22,5 +22,5 @@ for msg in read_ros2_messages("my_data.mcap"): ## Stay in touch -Join our [Slack channel](https://foxglove.dev/slack) to ask questions, +Join our [Discord community](https://foxglove.dev/chat) to ask questions, share feedback, and stay up to date on what our team is working on. diff --git a/python/mcap/mcap/__init__.py b/python/mcap/mcap/__init__.py index a82b376d2d..c68196d1cb 100644 --- a/python/mcap/mcap/__init__.py +++ b/python/mcap/mcap/__init__.py @@ -1 +1 @@ -__version__ = "1.1.1" +__version__ = "1.2.0" diff --git a/python/mcap/mcap/exceptions.py b/python/mcap/mcap/exceptions.py index e51bad2d22..024acfd7a5 100644 --- a/python/mcap/mcap/exceptions.py +++ b/python/mcap/mcap/exceptions.py @@ -1,9 +1,14 @@ +from typing import Any + +from mcap.opcode import Opcode + + class McapError(Exception): pass class InvalidMagic(McapError): - def __init__(self, bad_magic): + def __init__(self, bad_magic: Any): super().__init__(f"not a valid MCAP file, invalid magic: {bad_magic}") @@ -13,3 +18,16 @@ class DecoderNotFoundError(McapError): class EndOfFile(McapError): pass + + +class RecordLengthLimitExceeded(McapError): + def __init__(self, opcode: int, length: int, limit: int): + opcode_name = f"unknown (opcode {opcode})" + try: + opcode_name = Opcode(opcode).name + except ValueError: + # unknown opcode will trigger a ValueError + pass + super().__init__( + f"{opcode_name} record has length {length} that exceeds limit {limit}", + ) diff --git a/python/mcap/mcap/reader.py b/python/mcap/mcap/reader.py index f30c906201..96a47d8337 100644 --- a/python/mcap/mcap/reader.py +++ b/python/mcap/mcap/reader.py @@ -240,6 +240,11 @@ class SeekingReader(McapReader): :param decoder_factories: An iterable of :py:class:`~mcap.decoder.DecoderFactory` instances which can provide decoding functionality to :py:meth:`~mcap.reader.McapReader.iter_decoded_messages`. + :param record_size_limit: An upper bound to the size of MCAP records that this reader will + attempt to load in bytes, defaulting to 4 GiB. If this reader encounters a record with a + greater length, it will throw an :py:class:`~mcap.exceptions.RecordLengthLimitExceeded` + error. Setting to ``None`` removes the limit, but can allow corrupted MCAP files to trigger + a `MemoryError` exception. """ def __init__( @@ -247,12 +252,14 @@ def __init__( stream: IO[bytes], validate_crcs: bool = False, decoder_factories: Iterable[DecoderFactory] = (), + record_size_limit: Optional[int] = 4 * 2**30, ): super().__init__(decoder_factories=decoder_factories) read_magic(ReadDataStream(stream, calculate_crc=False)) self._stream = stream self._validate_crcs = validate_crcs self._summary: Optional[Summary] = None + self._record_size_limit = record_size_limit def iter_messages( self, @@ -323,7 +330,13 @@ def iter_messages( def get_header(self) -> Header: """Reads the Header record from the beginning of the MCAP file.""" self._stream.seek(0) - header = next(StreamReader(self._stream, skip_magic=False).records) + header = next( + StreamReader( + self._stream, + skip_magic=False, + record_size_limit=self._record_size_limit, + ).records + ) if not isinstance(header, Header): raise McapError( f"expected header at beginning of MCAP file, found {type(header)}" @@ -335,7 +348,13 @@ def get_summary(self) -> Optional[Summary]: if self._summary is not None: return self._summary self._stream.seek(-(FOOTER_SIZE + MAGIC_SIZE), io.SEEK_END) - footer = next(StreamReader(self._stream, skip_magic=True).records) + footer = next( + StreamReader( + self._stream, + skip_magic=True, + record_size_limit=self._record_size_limit, + ).records + ) if not isinstance(footer, Footer): raise McapError( f"expected footer at end of MCAP file, found {type(footer)}" @@ -344,7 +363,9 @@ def get_summary(self) -> Optional[Summary]: return None self._stream.seek(footer.summary_start, io.SEEK_SET) self._summary = _read_summary_from_stream_reader( - StreamReader(self._stream, skip_magic=True) + StreamReader( + self._stream, skip_magic=True, record_size_limit=self._record_size_limit + ) ) return self._summary @@ -358,7 +379,13 @@ def iter_attachments(self) -> Iterator[Attachment]: return for attachment_index in summary.attachment_indexes: self._stream.seek(attachment_index.offset) - record = next(StreamReader(self._stream, skip_magic=True).records) + record = next( + StreamReader( + self._stream, + skip_magic=True, + record_size_limit=self._record_size_limit, + ).records + ) if isinstance(record, Attachment): yield record else: @@ -374,7 +401,13 @@ def iter_metadata(self) -> Iterator[Metadata]: return for metadata_index in summary.metadata_indexes: self._stream.seek(metadata_index.offset) - record = next(StreamReader(self._stream, skip_magic=True).records) + record = next( + StreamReader( + self._stream, + skip_magic=True, + record_size_limit=self._record_size_limit, + ).records + ) if isinstance(record, Metadata): yield record else: @@ -389,6 +422,11 @@ class NonSeekingReader(McapReader): :param decoder_factories: An iterable of :py:class:`~mcap.decoder.DecoderFactory` instances which can provide decoding functionality to :py:meth:`~mcap.reader.McapReader.iter_decoded_messages`. + :param record_size_limit: An upper bound to the size of MCAP records that this reader will + attempt to load in bytes, defaulting to 4 GiB. If this reader encounters a record with a + greater length, it will throw an :py:class:`~mcap.exceptions.RecordLengthLimitExceeded` + error. Setting to ``None`` removes the limit, but can allow corrupted MCAP files to trigger + a `MemoryError` exception. """ def __init__( @@ -396,9 +434,14 @@ def __init__( stream: IO[bytes], validate_crcs: bool = False, decoder_factories: Iterable[DecoderFactory] = (), + record_size_limit: Optional[int] = 4 * 2**30, ): super().__init__(decoder_factories=decoder_factories) - self._stream_reader = StreamReader(stream, validate_crcs=validate_crcs) + self._stream_reader = StreamReader( + stream, + validate_crcs=validate_crcs, + record_size_limit=record_size_limit, + ) self._schemas: Dict[int, Schema] = {} self._channels: Dict[int, Channel] = {} self._spent: bool = False diff --git a/python/mcap/mcap/stream_reader.py b/python/mcap/mcap/stream_reader.py index eed23d9ef7..a519202434 100644 --- a/python/mcap/mcap/stream_reader.py +++ b/python/mcap/mcap/stream_reader.py @@ -7,7 +7,7 @@ import zstandard from .data_stream import ReadDataStream -from .exceptions import InvalidMagic +from .exceptions import InvalidMagic, RecordLengthLimitExceeded from .opcode import Opcode from .records import ( Attachment, @@ -98,8 +98,47 @@ def read_magic(stream: ReadDataStream) -> bool: class StreamReader: """ Reads MCAP data sequentially from an input stream. + + :param input: a file-like object for reading the source data from. + :param skip_magic: if ``True``, will not expect MCAP magic at start or end of stream. + :param emit_chunks: if ``True``, will return Chunk records directly and do not parse out the + records inside. + :param validate_crcs: if ``True``, will validate chunk and data section CRC values. + :param record_size_limit: An upper bound to the size of MCAP records that this reader will + attempt to load in bytes, defaulting to 4 GiB. If this reader encounters a record with a + greater length, it will throw an :py:class:`~mcap.exceptions.RecordLengthLimitExceeded` + error. Setting to ``None`` removes the limit, but can allow corrupted MCAP files to trigger + a `MemoryError` exception. """ + def __init__( + self, + input: Union[str, BytesIO, RawIOBase, BufferedReader, IO[bytes]], + skip_magic: bool = False, + emit_chunks: bool = False, + validate_crcs: bool = False, + record_size_limit: Optional[int] = (4 * 2**30), # 4 Gib + ): + """ + input: The input stream from which to read records. + """ + if isinstance(input, str): + self._stream = ReadDataStream( + open(input, "rb"), calculate_crc=validate_crcs + ) + elif isinstance(input, RawIOBase): + self._stream = ReadDataStream( + BufferedReader(input), calculate_crc=validate_crcs + ) + else: + self._stream = ReadDataStream(input, calculate_crc=validate_crcs) + self._footer: Optional[Footer] = None + self._skip_magic: bool = skip_magic + self._emit_chunks: bool = emit_chunks + self._validate_crcs: bool = validate_crcs + self._calculated_data_section_crc = None + self._record_size_limit = record_size_limit + @property def records(self) -> Iterator[McapRecord]: """ @@ -116,6 +155,8 @@ def records(self) -> Iterator[McapRecord]: checksum_before_read = self._stream.checksum() opcode = self._stream.read1() length = self._stream.read8() + if self._record_size_limit is not None and length > self._record_size_limit: + raise RecordLengthLimitExceeded(opcode, length, self._record_size_limit) count = self._stream.count record = self._read_record(opcode, length) if ( @@ -143,32 +184,6 @@ def records(self) -> Iterator[McapRecord]: self._footer = record read_magic(self._stream) - def __init__( - self, - input: Union[str, BytesIO, RawIOBase, BufferedReader, IO[bytes]], - skip_magic: bool = False, - emit_chunks: bool = False, - validate_crcs: bool = False, - ): - """ - input: The input stream from which to read records. - """ - if isinstance(input, str): - self._stream = ReadDataStream( - open(input, "rb"), calculate_crc=validate_crcs - ) - elif isinstance(input, RawIOBase): - self._stream = ReadDataStream( - BufferedReader(input), calculate_crc=validate_crcs - ) - else: - self._stream = ReadDataStream(input, calculate_crc=validate_crcs) - self._footer: Optional[Footer] = None - self._skip_magic: bool = skip_magic - self._emit_chunks: bool = emit_chunks - self._validate_crcs: bool = validate_crcs - self._calculated_data_section_crc = None - def _read_record(self, opcode: int, length: int) -> Optional[McapRecord]: if opcode == Opcode.ATTACHMENT: return Attachment.read(self._stream) diff --git a/python/mcap/mcap/writer.py b/python/mcap/mcap/writer.py index e1ff466a46..0383454e2c 100644 --- a/python/mcap/mcap/writer.py +++ b/python/mcap/mcap/writer.py @@ -169,9 +169,10 @@ def add_message( :param channel_id: The id of the channel to which the message should be added. :param sequence: Optional message counter assigned by publisher. - :param log_time: Time at which the message was recorded. - :param publish_time: Time at which the message was published. If not available, must be set - to the log time. + :param log_time: Time at which the message was recorded as nanoseconds since a + user-understood epoch (i.e unix epoch, robot boot time, etc.). + :param publish_time: Time at which the message was published as nanoseconds since a + user-understood epoch (i.e unix epoch, robot boot time, etc.). :param data: Message data, to be decoded according to the schema of the channel. """ message = Message( diff --git a/python/mcap/tests/test_reader.py b/python/mcap/tests/test_reader.py index 1b4e9654d5..d34423f335 100644 --- a/python/mcap/tests/test_reader.py +++ b/python/mcap/tests/test_reader.py @@ -1,15 +1,22 @@ """tests for the McapReader implementations.""" +# cspell:words getbuffer import json import os +from io import BytesIO from pathlib import Path from typing import IO, Any, Optional, Tuple, Type, Union import pytest from mcap.decoder import DecoderFactory -from mcap.exceptions import DecoderNotFoundError, InvalidMagic +from mcap.exceptions import ( + DecoderNotFoundError, + InvalidMagic, + RecordLengthLimitExceeded, +) from mcap.reader import McapReader, NonSeekingReader, SeekingReader, make_reader from mcap.records import Channel, Message, Schema +from mcap.stream_reader import StreamReader from mcap.writer import IndexType, Writer DEMO_MCAP = ( @@ -212,7 +219,7 @@ def write_no_summary_mcap(filepath: Path): writer.add_attachment(10, 10, "my_attach", "text", b"some data") writer.add_metadata("my_meta", {"foo": "bar"}) foo_channel = writer.register_channel("/foo", "json", 0) - for i in range(200): + for _ in range(200): writer.add_message(foo_channel, 10, json.dumps({"a": 0}).encode("utf8"), 10) writer.finish() @@ -252,3 +259,68 @@ def test_detect_invalid_initial_magic(tmpdir: Path): with open(filepath, "rb") as f: with pytest.raises(InvalidMagic): NonSeekingReader(f).get_header() + + +def test_record_size_limit(): + # create a simple small MCAP + write_stream = BytesIO() + writer = Writer(write_stream) + writer.start("profile", "library") + writer.finish() + + # default stream reader can read it + stream_reader = StreamReader( + BytesIO(write_stream.getbuffer()), record_size_limit=100 + ) + records = [r for r in stream_reader.records] + assert len(records) == 10 + + # can cause "large" records to raise an error by setting a low limit + stream_reader = StreamReader( + BytesIO(write_stream.getbuffer()), record_size_limit=10 + ) + with pytest.raises( + RecordLengthLimitExceeded, + match="HEADER record has length 22 that exceeds limit 10", + ): + next(stream_reader.records) + + # default seeking reader can read it + seeking_reader = SeekingReader( + BytesIO(write_stream.getbuffer()), record_size_limit=100 + ) + seeking_reader.get_header() + seeking_reader.get_summary() + assert len([m for m in seeking_reader.iter_messages()]) == 0 + + # can cause "large" records to raise an error by setting a low limit + seeking_reader = SeekingReader( + BytesIO(write_stream.getbuffer()), record_size_limit=10 + ) + with pytest.raises( + RecordLengthLimitExceeded, + match="HEADER record has length 22 that exceeds limit 10", + ): + seeking_reader.get_header() + + with pytest.raises( + RecordLengthLimitExceeded, + match="FOOTER record has length 20 that exceeds limit 10", + ): + seeking_reader.get_summary() + + # default non-seeking reader can read it + non_seeking_reader = NonSeekingReader( + BytesIO(write_stream.getbuffer()), record_size_limit=100 + ) + non_seeking_reader.get_header() + + # can cause "large" records to raise an error by setting a low limit + non_seeking_reader = NonSeekingReader( + BytesIO(write_stream.getbuffer()), record_size_limit=10 + ) + with pytest.raises( + RecordLengthLimitExceeded, + match="HEADER record has length 22 that exceeds limit 10", + ): + non_seeking_reader.get_header() diff --git a/rust/Cargo.toml b/rust/Cargo.toml index d194adf369..0cd54d80ed 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -7,7 +7,7 @@ categories = [ "science::robotics", "compression" ] repository = "https://github.com/foxglove/mcap" documentation = "https://docs.rs/mcap" readme = "README.md" -version = "0.9.0" +version = "0.10.0" edition = "2021" license = "MIT" @@ -22,7 +22,9 @@ log = "0.4" num_cpus = "1.13" paste = "1.0" thiserror = "1.0" -lz4_flex = { version = "0.11.1", optional = true } +lz4 = { version = "1.27", optional = true } +async-compression = { version = "*", features = ["tokio"], optional = true } +tokio = { version = "1", features = ["io-util"] , optional = true } [target.'cfg(target_arch = "wasm32")'.dependencies] zstd = { version = "0.11", features = ["wasm"], optional = true } @@ -32,14 +34,16 @@ zstd = { version = "0.11", features = ["zstdmt"], optional = true } [features] default = ["zstd", "lz4"] -zstd = ["dep:zstd"] -lz4 = ["dep:lz4_flex"] +zstd = ["dep:zstd", "async-compression/zstd"] +lz4 = ["dep:lz4"] +tokio = ["dep:async-compression", "dep:tokio"] [dev-dependencies] anyhow = "1" atty = "0.2" camino = "1.0" clap = { version = "3.2", features = ["derive"]} +criterion = { version = "0.5.1", features = ["async_tokio"] } itertools = "0.10" memmap = "0.7" rayon = "1.5" @@ -47,3 +51,17 @@ serde = { version = "1.0.145", features = ["derive"] } serde_json = "1" simplelog = "0.12" tempfile = "3.3" +tokio = { version = "1", features = ["io-util", "macros", "rt", "fs"] } + +[[bench]] +name = "reader" +harness = false + +[profile.bench] +opt-level = 3 +debug = true +lto = true + +[[example]] +name = "conformance_reader_async" +required-features = ["tokio"] diff --git a/rust/benches/reader.rs b/rust/benches/reader.rs new file mode 100644 index 0000000000..d234ccf6d3 --- /dev/null +++ b/rust/benches/reader.rs @@ -0,0 +1,91 @@ +use criterion::{criterion_group, criterion_main, Criterion}; +use mcap::{Channel, Message, MessageStream, Schema}; +use std::borrow::Cow; +use std::io::Cursor; +use std::sync::Arc; +use std::time::Duration; + +fn create_test_mcap(n: usize, compression: Option) -> Vec { + let mut buffer = Vec::new(); + { + let mut writer = mcap::WriteOptions::new() + .compression(compression) + .profile("fooey") + .create(Cursor::new(&mut buffer)) + .unwrap(); + // Mock message data to align with reader benchmarks in ts + const MESSAGE_DATA: &[u8] = &[42; 10]; + + let schema = Arc::new(Schema { + name: "TestSchema".to_string(), + encoding: "raw".to_string(), + data: Cow::Borrowed(b"{}"), + }); + + let channel = Arc::new(Channel { + topic: "test_topic".to_string(), + message_encoding: "raw".to_string(), + metadata: Default::default(), + schema: Some(schema), + }); + + for i in 0..n { + let message = Message { + channel: channel.clone(), + sequence: i as u32, + log_time: i as u64, + publish_time: i as u64, + data: Cow::Borrowed(&MESSAGE_DATA), + }; + writer.write(&message).unwrap(); + } + + writer.finish().unwrap(); + } + buffer +} + +fn bench_read_messages(c: &mut Criterion) { + const N: usize = 1_000_000; + let mcap_data_uncompressed = create_test_mcap(N, None); + let mcap_data_lz4 = create_test_mcap(N, Some(mcap::Compression::Lz4)); + let mcap_data_zstd = create_test_mcap(N, Some(mcap::Compression::Zstd)); + let mut group = c.benchmark_group("mcap_read"); + group.throughput(criterion::Throughput::Elements(N as u64)); + + group.bench_function("MessageStream_1M_uncompressed", |b| { + b.iter(|| { + let stream = MessageStream::new(&mcap_data_uncompressed).unwrap(); + for message in stream { + std::hint::black_box(message.unwrap()); + } + }); + }); + + group.bench_function("MessageStream_1M_lz4", |b| { + b.iter(|| { + let stream = MessageStream::new(&mcap_data_lz4).unwrap(); + for message in stream { + std::hint::black_box(message.unwrap()); + } + }); + }); + + group.bench_function("MessageStream_1M_zstd", |b| { + b.iter(|| { + let stream = MessageStream::new(&mcap_data_zstd).unwrap(); + for message in stream { + std::hint::black_box(message.unwrap()); + } + }); + }); + + group.finish(); +} + +criterion_group! { + name = benches; + config = Criterion::default().warm_up_time(Duration::from_secs(1)).sample_size(10); + targets = bench_read_messages +} +criterion_main!(benches); diff --git a/rust/examples/common/serialization.rs b/rust/examples/common/serialization.rs new file mode 100644 index 0000000000..73c1c7f9e4 --- /dev/null +++ b/rust/examples/common/serialization.rs @@ -0,0 +1,133 @@ +use mcap::records::Record; + +use std::collections::BTreeMap; + +use serde_json::{json, Value}; + +// We don't want to force Serde on users just for the sake of the conformance tests. +// (In what context would you want to serialize individual records of a MCAP?) +// Stamp out and stringify them ourselves: + +fn get_type(rec: &Record<'_>) -> &'static str { + match rec { + Record::Header(_) => "Header", + Record::Footer(_) => "Footer", + Record::Schema { .. } => "Schema", + Record::Channel(_) => "Channel", + Record::Message { .. } => "Message", + Record::Chunk { .. } => "Chunk", + Record::MessageIndex(_) => "MessageIndex", + Record::ChunkIndex(_) => "ChunkIndex", + Record::Attachment { .. } => "Attachment", + Record::AttachmentIndex(_) => "AttachmentIndex", + Record::Statistics(_) => "Statistics", + Record::Metadata(_) => "Metadata", + Record::MetadataIndex(_) => "MetadataIndex", + Record::SummaryOffset(_) => "SummaryOffset", + Record::DataEnd(_) => "DataEnd", + Record::Unknown { opcode, .. } => { + panic!("Unknown record in conformance test: (op {opcode})") + } + } +} + +fn get_fields(rec: &Record<'_>) -> Value { + fn b2s(bytes: &[u8]) -> Vec { + bytes.iter().map(|b| b.to_string()).collect() + } + fn m2s(map: &BTreeMap) -> BTreeMap { + map.iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect() + } + + match rec { + Record::Header(h) => json!([["library", h.library], ["profile", h.profile]]), + Record::Footer(f) => json!([ + ["summary_crc", f.summary_crc.to_string()], + ["summary_offset_start", f.summary_offset_start.to_string()], + ["summary_start", f.summary_start.to_string()] + ]), + Record::Schema { header, data } => json!([ + ["data", b2s(data)], + ["encoding", header.encoding], + ["id", header.id.to_string()], + ["name", header.name] + ]), + Record::Channel(c) => json!([ + ["id", c.id.to_string()], + ["message_encoding", c.message_encoding], + ["metadata", c.metadata], + ["schema_id", c.schema_id.to_string()], + ["topic", c.topic] + ]), + Record::Message { header, data } => json!([ + ["channel_id", header.channel_id.to_string()], + ["data", b2s(data)], + ["log_time", header.log_time.to_string()], + ["publish_time", header.publish_time.to_string()], + ["sequence", header.sequence.to_string()] + ]), + Record::Chunk { .. } => unreachable!("Chunks are flattened"), + Record::MessageIndex(_) => unreachable!("MessageIndexes are skipped"), + Record::ChunkIndex(i) => json!([ + ["chunk_length", i.chunk_length.to_string()], + ["chunk_start_offset", i.chunk_start_offset.to_string()], + ["compressed_size", i.compressed_size.to_string()], + ["compression", i.compression], + ["message_end_time", i.message_end_time.to_string()], + ["message_index_length", i.message_index_length.to_string()], + ["message_index_offsets", m2s(&i.message_index_offsets)], + ["message_start_time", i.message_start_time.to_string()], + ["uncompressed_size", i.uncompressed_size.to_string()] + ]), + Record::Attachment { header, data } => json!([ + ["create_time", header.create_time.to_string()], + ["data", b2s(data)], + ["log_time", header.log_time.to_string()], + ["media_type", header.media_type], + ["name", header.name] + ]), + Record::AttachmentIndex(i) => json!([ + ["create_time", i.create_time.to_string()], + ["data_size", i.data_size.to_string()], + ["length", i.length.to_string()], + ["log_time", i.log_time.to_string()], + ["media_type", i.media_type], + ["name", i.name], + ["offset", i.offset.to_string()] + ]), + Record::Statistics(s) => json!([ + ["attachment_count", s.attachment_count.to_string()], + ["channel_count", s.channel_count.to_string()], + ["channel_message_counts", m2s(&s.channel_message_counts)], + ["chunk_count", s.chunk_count.to_string()], + ["message_count", s.message_count.to_string()], + ["message_end_time", s.message_end_time.to_string()], + ["message_start_time", s.message_start_time.to_string()], + ["metadata_count", s.metadata_count.to_string()], + ["schema_count", s.schema_count.to_string()] + ]), + Record::Metadata(m) => json!([["metadata", m.metadata], ["name", m.name]]), + Record::MetadataIndex(i) => json!([ + ["length", i.length.to_string()], + ["name", i.name], + ["offset", i.offset.to_string()] + ]), + Record::SummaryOffset(s) => json!([ + ["group_length", s.group_length.to_string()], + ["group_opcode", s.group_opcode.to_string()], + ["group_start", s.group_start.to_string()] + ]), + Record::DataEnd(d) => json!([["data_section_crc", d.data_section_crc.to_string()]]), + Record::Unknown { opcode, .. } => { + panic!("Unknown record in conformance test: (op {opcode})") + } + } +} + +pub fn as_json(view: &Record<'_>) -> Value { + let typename = get_type(view); + let fields = get_fields(view); + json!({"type": typename, "fields": fields}) +} diff --git a/rust/examples/conformance_reader.rs b/rust/examples/conformance_reader.rs index 941ee9386e..3bfa97ae19 100644 --- a/rust/examples/conformance_reader.rs +++ b/rust/examples/conformance_reader.rs @@ -1,136 +1,11 @@ -use mcap::records::Record; - -use std::{collections::BTreeMap, env, process}; +#[path = "common/serialization.rs"] +mod serialization; use serde_json::{json, Value}; -// We don't want to force Serde on users just for the sake of the conformance tests. -// (In what context would you want to serialize individual records of a MCAP?) -// Stamp out and stringify them ourselves: - -fn get_type(rec: &Record<'_>) -> &'static str { - match rec { - Record::Header(_) => "Header", - Record::Footer(_) => "Footer", - Record::Schema { .. } => "Schema", - Record::Channel(_) => "Channel", - Record::Message { .. } => "Message", - Record::Chunk { .. } => "Chunk", - Record::MessageIndex(_) => "MessageIndex", - Record::ChunkIndex(_) => "ChunkIndex", - Record::Attachment { .. } => "Attachment", - Record::AttachmentIndex(_) => "AttachmentIndex", - Record::Statistics(_) => "Statistics", - Record::Metadata(_) => "Metadata", - Record::MetadataIndex(_) => "MetadataIndex", - Record::SummaryOffset(_) => "SummaryOffset", - Record::DataEnd(_) => "DataEnd", - Record::Unknown { opcode, .. } => { - panic!("Unknown record in conformance test: (op {opcode})") - } - } -} - -fn get_fields(rec: &Record<'_>) -> Value { - fn b2s(bytes: &[u8]) -> Vec { - bytes.iter().map(|b| b.to_string()).collect() - } - fn m2s(map: &BTreeMap) -> BTreeMap { - map.iter() - .map(|(k, v)| (k.to_string(), v.to_string())) - .collect() - } - - match rec { - Record::Header(h) => json!([["library", h.library], ["profile", h.profile]]), - Record::Footer(f) => json!([ - ["summary_crc", f.summary_crc.to_string()], - ["summary_offset_start", f.summary_offset_start.to_string()], - ["summary_start", f.summary_start.to_string()] - ]), - Record::Schema { header, data } => json!([ - ["data", b2s(data)], - ["encoding", header.encoding], - ["id", header.id.to_string()], - ["name", header.name] - ]), - Record::Channel(c) => json!([ - ["id", c.id.to_string()], - ["message_encoding", c.message_encoding], - ["metadata", c.metadata], - ["schema_id", c.schema_id.to_string()], - ["topic", c.topic] - ]), - Record::Message { header, data } => json!([ - ["channel_id", header.channel_id.to_string()], - ["data", b2s(data)], - ["log_time", header.log_time.to_string()], - ["publish_time", header.publish_time.to_string()], - ["sequence", header.sequence.to_string()] - ]), - Record::Chunk { .. } => unreachable!("Chunks are flattened"), - Record::MessageIndex(_) => unreachable!("MessageIndexes are skipped"), - Record::ChunkIndex(i) => json!([ - ["chunk_length", i.chunk_length.to_string()], - ["chunk_start_offset", i.chunk_start_offset.to_string()], - ["compressed_size", i.compressed_size.to_string()], - ["compression", i.compression], - ["message_end_time", i.message_end_time.to_string()], - ["message_index_length", i.message_index_length.to_string()], - ["message_index_offsets", m2s(&i.message_index_offsets)], - ["message_start_time", i.message_start_time.to_string()], - ["uncompressed_size", i.uncompressed_size.to_string()] - ]), - Record::Attachment { header, data } => json!([ - ["create_time", header.create_time.to_string()], - ["data", b2s(data)], - ["log_time", header.log_time.to_string()], - ["media_type", header.media_type], - ["name", header.name] - ]), - Record::AttachmentIndex(i) => json!([ - ["create_time", i.create_time.to_string()], - ["data_size", i.data_size.to_string()], - ["length", i.length.to_string()], - ["log_time", i.log_time.to_string()], - ["media_type", i.media_type], - ["name", i.name], - ["offset", i.offset.to_string()] - ]), - Record::Statistics(s) => json!([ - ["attachment_count", s.attachment_count.to_string()], - ["channel_count", s.channel_count.to_string()], - ["channel_message_counts", m2s(&s.channel_message_counts)], - ["chunk_count", s.chunk_count.to_string()], - ["message_count", s.message_count.to_string()], - ["message_end_time", s.message_end_time.to_string()], - ["message_start_time", s.message_start_time.to_string()], - ["metadata_count", s.metadata_count.to_string()], - ["schema_count", s.schema_count.to_string()] - ]), - Record::Metadata(m) => json!([["metadata", m.metadata], ["name", m.name]]), - Record::MetadataIndex(i) => json!([ - ["length", i.length.to_string()], - ["name", i.name], - ["offset", i.offset.to_string()] - ]), - Record::SummaryOffset(s) => json!([ - ["group_length", s.group_length.to_string()], - ["group_opcode", s.group_opcode.to_string()], - ["group_start", s.group_start.to_string()] - ]), - Record::DataEnd(d) => json!([["data_section_crc", d.data_section_crc.to_string()]]), - Record::Unknown { opcode, .. } => { - panic!("Unknown record in conformance test: (op {opcode})") - } - } -} - -fn as_json(view: &Record<'_>) -> Value { - let typename = get_type(view); - let fields = get_fields(view); - json!({"type": typename, "fields": fields}) -} +use mcap::records::Record; +use std::env; +use std::process; pub fn main() { let args: Vec = env::args().collect(); @@ -143,7 +18,7 @@ pub fn main() { for rec in mcap::read::ChunkFlattener::new(&file).expect("Couldn't read file") { let r = rec.expect("failed to read next record"); if !matches!(r, Record::MessageIndex(_)) { - json_records.push(as_json(&r)); + json_records.push(serialization::as_json(&r)); } } let out = json!({ "records": json_records }); diff --git a/rust/examples/conformance_reader_async.rs b/rust/examples/conformance_reader_async.rs new file mode 100644 index 0000000000..ba8dfb94a8 --- /dev/null +++ b/rust/examples/conformance_reader_async.rs @@ -0,0 +1,34 @@ +#[path = "common/serialization.rs"] +mod serialization; + +use serde_json::{json, Value}; + +use serialization::as_json; +use std::env; +use std::process; +use tokio::fs::File; + +use tokio; + +#[tokio::main(flavor = "current_thread")] +async fn main() { + let args: Vec = env::args().collect(); + if args.len() < 2 { + eprintln!("Please supply an MCAP file as argument"); + process::exit(1); + } + let file = File::open(&args[1]).await.expect("couldn't open file"); + let mut reader = mcap::tokio::RecordReader::new(file); + + let mut json_records: Vec = vec![]; + let mut buf: Vec = Vec::new(); + while let Some(opcode) = reader.next_record(&mut buf).await { + let opcode = opcode.expect("failed to read next record"); + if opcode != mcap::records::op::MESSAGE_INDEX { + let parsed = mcap::parse_record(opcode, &buf[..]).expect("failed to parse record"); + json_records.push(as_json(&parsed)); + } + } + let out = json!({ "records": json_records }); + print!("{}", serde_json::to_string_pretty(&out).unwrap()); +} diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 9b2eb1b1c7..bdb1f389d9 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -75,6 +75,8 @@ pub mod read; pub mod records; +#[cfg(feature = "tokio")] +pub mod tokio; pub mod write; mod io_utils; @@ -119,6 +121,8 @@ pub enum McapError { UnexpectedEof, #[error("Chunk ended in the middle of a record")] UnexpectedEoc, + #[error("Record with opcode {opcode:02X} has length {len}, need at least {expected} to parse")] + RecordTooShort { opcode: u8, len: u64, expected: u64 }, #[error("Message {0} referenced unknown channel {1}")] UnknownChannel(u32, u16), #[error("Channel `{0}` referenced unknown schema {1}")] @@ -196,5 +200,5 @@ pub struct Attachment<'a> { pub data: Cow<'a, [u8]>, } -pub use read::{MessageStream, Summary}; +pub use read::{parse_record, MessageStream, Summary}; pub use write::{WriteOptions, Writer}; diff --git a/rust/src/read.rs b/rust/src/read.rs index 414c062b21..94d8d53607 100644 --- a/rust/src/read.rs +++ b/rust/src/read.rs @@ -136,15 +136,16 @@ fn read_record_from_slice<'a>(buf: &mut &'a [u8]) -> McapResult McapResult> { +/// Given a records' opcode and data, parse into a Record. The resulting Record will contain +/// borrowed slices from `body`. +pub fn parse_record(op: u8, body: &[u8]) -> McapResult> { macro_rules! record { ($b:ident) => {{ let mut cur = Cursor::new($b); @@ -278,7 +279,7 @@ impl<'a> ChunkReader<'a> { #[cfg(feature = "lz4")] "lz4" => ChunkDecompressor::Compressed(Some(CountingCrcReader::new(Box::new( - lz4_flex::frame::FrameDecoder::new(data), + lz4::Decoder::new(data)?, )))), #[cfg(not(feature = "lz4"))] @@ -368,7 +369,7 @@ fn read_record_from_chunk_stream<'a, R: Read>(r: &mut R) -> McapResult { - let mut record = Vec::new(); + let mut record = Vec::with_capacity(len as usize); r.take(len).read_to_end(&mut record)?; if len as usize != record.len() { return Err(McapError::UnexpectedEoc); @@ -396,7 +397,7 @@ fn read_record_from_chunk_stream<'a, R: Read>(r: &mut R) -> McapResult { - let mut record = Vec::new(); + let mut record = Vec::with_capacity(len as usize); r.take(len).read_to_end(&mut record)?; if len as usize != record.len() { return Err(McapError::UnexpectedEoc); @@ -421,14 +422,14 @@ fn read_record_from_chunk_stream<'a, R: Read>(r: &mut R) -> McapResult RawMessageStream<'a> { } pub struct RawMessage<'a> { - header: records::MessageHeader, - data: Cow<'a, [u8]>, + pub header: records::MessageHeader, + pub data: Cow<'a, [u8]>, } impl<'a> Iterator for RawMessageStream<'a> { diff --git a/rust/src/records.rs b/rust/src/records.rs index eeb842c495..0ac5d7f72a 100644 --- a/rust/src/records.rs +++ b/rust/src/records.rs @@ -99,6 +99,44 @@ impl Record<'_> { Record::Unknown { opcode, .. } => *opcode, } } + + /// Moves this value into a fully-owned variant with no borrows. This should be free for + /// already-owned values. + pub fn into_owned(self) -> Record<'static> { + match self { + Record::Header(header) => Record::Header(header), + Record::Footer(footer) => Record::Footer(footer), + Record::Schema { header, data } => Record::Schema { + header, + data: Cow::Owned(data.into_owned()), + }, + Record::Channel(channel) => Record::Channel(channel), + Record::Message { header, data } => Record::Message { + header, + data: Cow::Owned(data.into_owned()), + }, + Record::Chunk { header, data } => Record::Chunk { + header, + data: Cow::Owned(data.into_owned()), + }, + Record::MessageIndex(index) => Record::MessageIndex(index), + Record::ChunkIndex(index) => Record::ChunkIndex(index), + Record::Attachment { header, data } => Record::Attachment { + header, + data: Cow::Owned(data.into_owned()), + }, + Record::AttachmentIndex(index) => Record::AttachmentIndex(index), + Record::Statistics(statistics) => Record::Statistics(statistics), + Record::Metadata(metadata) => Record::Metadata(metadata), + Record::MetadataIndex(index) => Record::MetadataIndex(index), + Record::SummaryOffset(offset) => Record::SummaryOffset(offset), + Record::DataEnd(end) => Record::DataEnd(end), + Record::Unknown { opcode, data } => Record::Unknown { + opcode, + data: Cow::Owned(data.into_owned()), + }, + } + } } #[binrw] diff --git a/rust/src/tokio.rs b/rust/src/tokio.rs new file mode 100644 index 0000000000..cf3d68ebd5 --- /dev/null +++ b/rust/src/tokio.rs @@ -0,0 +1,8 @@ +//! Read MCAP data from a stream asynchronously +#[cfg(feature = "lz4")] +mod lz4; +pub mod read; +mod read_exact_or_zero; + +pub use read::{RecordReader, RecordReaderOptions}; +use read_exact_or_zero::read_exact_or_zero; diff --git a/rust/src/tokio/lz4.rs b/rust/src/tokio/lz4.rs new file mode 100644 index 0000000000..05a0f65a9e --- /dev/null +++ b/rust/src/tokio/lz4.rs @@ -0,0 +1,146 @@ +use std::io::{Error, ErrorKind, Result}; +use std::pin::{pin, Pin}; +use std::ptr; +use std::task::{Context, Poll}; + +use lz4::liblz4::{ + check_error, LZ4FDecompressionContext, LZ4F_createDecompressionContext, LZ4F_decompress, + LZ4F_freeDecompressionContext, LZ4F_VERSION, +}; +use tokio::io::{AsyncRead, ReadBuf}; + +const BUFFER_SIZE: usize = 32 * 1024; + +#[derive(Debug)] +struct DecoderContext { + c: LZ4FDecompressionContext, +} + +// An adaptation of the [`lz4::Decoder`] [`std::io::Read`] impl, but for [`tokio::io::AsyncRead`]. +// Code below is adapted from the [lz4](https://github.com/10XGenomics/lz4-rs) crate source. +#[derive(Debug)] +pub struct Lz4Decoder { + c: DecoderContext, + r: R, + input_buf: Box<[u8]>, + unread_input_start: usize, + unread_input_end: usize, + next: usize, +} + +impl Lz4Decoder { + /// Creates a new decoder which reads its input from the given + /// input stream. The input stream can be re-acquired by calling + /// `finish()` + pub fn new(r: R) -> Result> { + Ok(Lz4Decoder { + r, + c: DecoderContext::new()?, + input_buf: vec![0; BUFFER_SIZE].into_boxed_slice(), + unread_input_start: BUFFER_SIZE, + unread_input_end: BUFFER_SIZE, + // Minimal LZ4 stream size + next: 11, + }) + } + + pub fn finish(self) -> (R, Result<()>) { + ( + self.r, + match self.next { + 0 => Ok(()), + _ => Err(Error::new( + ErrorKind::Interrupted, + "Finish called before end of compressed stream", + )), + }, + ) + } +} + +impl AsyncRead for Lz4Decoder { + fn poll_read( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + output_buf: &mut ReadBuf<'_>, + ) -> Poll> { + // Thre's nothing left to read. + if self.next == 0 || output_buf.remaining() == 0 { + return Poll::Ready(Ok(())); + } + let mut written_len: usize = 0; + let this = self.get_mut(); + while written_len == 0 { + // this reader buffers input data until it has enough to present to the lz4 frame decoder. + // if there's nothing unread, request more data from the reader. + if this.unread_input_start >= this.unread_input_end { + // request a full BUFFER_SIZE or the amount requested by the lz4 frame decoder, + // whichever is less. + let need = std::cmp::min(BUFFER_SIZE, this.next); + // try reading more input data. If it's not ready, return and try again later. + // NOTE: we don't need to save this stack frame as a future and re-enter it later + // because the only frame-local state `written_len` has not been modified and can be + // discarded. + { + let mut input_buf = ReadBuf::new(&mut this.input_buf[..need]); + let result = pin!(&mut this.r).poll_read(cx, &mut input_buf); + match result { + Poll::Pending => return result, + Poll::Ready(Err(_)) => return result, + _ => {} + }; + this.unread_input_start = 0; + this.unread_input_end = input_buf.filled().len(); + this.next -= this.unread_input_end; + } + // The read succeeded. If zero bytes were read, we're at the end of the stream. + if this.unread_input_end == 0 { + return Poll::Ready(Ok(())); + } + } + // feed bytes from our input buffer into the compressor, writing into the output + // buffer until either the output buffer is full or the input buffer is consumed. + while (written_len < output_buf.remaining()) + && (this.unread_input_start < this.unread_input_end) + { + let mut src_size = this.unread_input_end - this.unread_input_start; + let mut dst_size = output_buf.remaining() - written_len; + let prev_filled = output_buf.filled().len(); + let len = check_error(unsafe { + LZ4F_decompress( + this.c.c, + output_buf.initialize_unfilled().as_mut_ptr(), + &mut dst_size, + this.input_buf[this.unread_input_start..].as_ptr(), + &mut src_size, + ptr::null(), + ) + })?; + this.unread_input_start += src_size; + written_len += dst_size; + output_buf.set_filled(prev_filled + written_len); + if len == 0 { + this.next = 0; + return Poll::Ready(Ok(())); + } else if this.next < len { + this.next = len; + } + } + } + Poll::Ready(Ok(())) + } +} + +impl DecoderContext { + fn new() -> Result { + let mut context = LZ4FDecompressionContext(ptr::null_mut()); + check_error(unsafe { LZ4F_createDecompressionContext(&mut context, LZ4F_VERSION) })?; + Ok(DecoderContext { c: context }) + } +} + +impl Drop for DecoderContext { + fn drop(&mut self) { + unsafe { LZ4F_freeDecompressionContext(self.c) }; + } +} diff --git a/rust/src/tokio/read.rs b/rust/src/tokio/read.rs new file mode 100644 index 0000000000..50a1384250 --- /dev/null +++ b/rust/src/tokio/read.rs @@ -0,0 +1,434 @@ +use std::pin::{pin, Pin}; +use std::task::{Context, Poll}; + +#[cfg(feature = "zstd")] +use async_compression::tokio::bufread::ZstdDecoder; +use binrw::BinReaderExt; +use tokio::io::{AsyncRead, AsyncReadExt, ReadBuf, Take}; + +#[cfg(feature = "lz4")] +use crate::tokio::lz4::Lz4Decoder; +use crate::tokio::read_exact_or_zero; +use crate::{records, McapError, McapResult, MAGIC}; + +enum ReaderState { + Base(R), + UncompressedChunk(Take), + #[cfg(feature = "zstd")] + ZstdChunk(ZstdDecoder>>), + #[cfg(feature = "lz4")] + Lz4Chunk(Lz4Decoder>), + Empty, +} + +impl AsyncRead for ReaderState +where + R: AsyncRead + std::marker::Unpin, +{ + fn poll_read( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut ReadBuf<'_>, + ) -> Poll> { + match self.get_mut() { + ReaderState::Base(r) => pin!(r).poll_read(cx, buf), + ReaderState::UncompressedChunk(r) => pin!(r).poll_read(cx, buf), + #[cfg(feature = "zstd")] + ReaderState::ZstdChunk(r) => pin!(r).poll_read(cx, buf), + #[cfg(feature = "lz4")] + ReaderState::Lz4Chunk(r) => pin!(r).poll_read(cx, buf), + ReaderState::Empty => { + panic!("invariant: reader is only set to empty while swapping with another valid variant") + } + } + } +} +impl ReaderState +where + R: AsyncRead, +{ + pub fn into_inner(self) -> McapResult { + match self { + ReaderState::Base(reader) => Ok(reader), + ReaderState::UncompressedChunk(take) => Ok(take.into_inner()), + #[cfg(feature = "zstd")] + ReaderState::ZstdChunk(decoder) => Ok(decoder.into_inner().into_inner().into_inner()), + #[cfg(feature = "lz4")] + ReaderState::Lz4Chunk(decoder) => { + let (output, result) = decoder.finish(); + result?; + Ok(output.into_inner()) + } + ReaderState::Empty => { + panic!("invariant: reader is only set to empty while swapping with another valid variant") + } + } + } +} + +/// Reads an MCAP file record-by-record, writing the raw record data into a caller-provided Vec. +/// ```no_run +/// use std::fs; +/// +/// use tokio::fs::File; +/// +/// async fn read_it() { +/// let file = File::open("in.mcap").await.expect("couldn't open file"); +/// let mut record_buf: Vec = Vec::new(); +/// let mut reader = mcap::tokio::RecordReader::new(file); +/// while let Some(result) = reader.next_record(&mut record_buf).await { +/// let opcode = result.expect("couldn't read next record"); +/// let raw_record = mcap::parse_record(opcode, &record_buf[..]).expect("couldn't parse"); +/// // do something with the record... +/// } +/// } +/// ``` +pub struct RecordReader { + reader: ReaderState, + options: RecordReaderOptions, + start_magic_seen: bool, + footer_seen: bool, + to_discard_after_chunk: usize, + scratch: Box<[u8]>, +} + +#[derive(Default, Clone)] +pub struct RecordReaderOptions { + /// If true, the reader will not expect the MCAP magic at the start of the stream. + pub skip_start_magic: bool, + /// If true, the reader will not expect the MCAP magic at the end of the stream. + pub skip_end_magic: bool, + /// If true, the reader will yield entire chunk records. Otherwise, the reader will decompress + /// and read into the chunk, yielding the records inside. + pub emit_chunks: bool, +} + +enum Cmd { + YieldRecord(u8), + EnterChunk { + header: records::ChunkHeader, + len: u64, + }, + ExitChunk, + Stop, +} + +impl RecordReader +where + R: AsyncRead + std::marker::Unpin, +{ + pub fn new(reader: R) -> Self { + Self::new_with_options(reader, &RecordReaderOptions::default()) + } + + pub fn new_with_options(reader: R, options: &RecordReaderOptions) -> Self { + Self { + reader: ReaderState::Base(reader), + options: options.clone(), + start_magic_seen: false, + footer_seen: false, + to_discard_after_chunk: 0, + scratch: vec![0; 1024].into_boxed_slice(), + } + } + + pub fn into_inner(self) -> McapResult { + self.reader.into_inner() + } + + /// Reads the next record from the input stream and copies the raw content into `data`. + /// Returns the record's opcode as a result. + pub async fn next_record(&mut self, data: &mut Vec) -> Option> { + loop { + let cmd = match self.next_record_inner(data).await { + Ok(cmd) => cmd, + Err(err) => return Some(Err(err)), + }; + match cmd { + Cmd::Stop => return None, + Cmd::YieldRecord(opcode) => return Some(Ok(opcode)), + Cmd::EnterChunk { header, len } => { + let mut reader_state = ReaderState::Empty; + std::mem::swap(&mut reader_state, &mut self.reader); + match header.compression.as_str() { + #[cfg(feature = "zstd")] + "zstd" => { + let reader = match reader_state.into_inner() { + Ok(reader) => reader, + Err(err) => return Some(Err(err)), + }; + self.reader = ReaderState::ZstdChunk(ZstdDecoder::new( + tokio::io::BufReader::new(reader.take(header.compressed_size)), + )); + } + #[cfg(feature = "lz4")] + "lz4" => { + let reader = match reader_state.into_inner() { + Ok(reader) => reader, + Err(err) => return Some(Err(err)), + }; + let decoder = match Lz4Decoder::new(reader.take(header.compressed_size)) + { + Ok(decoder) => decoder, + Err(err) => return Some(Err(err.into())), + }; + self.reader = ReaderState::Lz4Chunk(decoder); + } + "" => { + let reader = match reader_state.into_inner() { + Ok(reader) => reader, + Err(err) => return Some(Err(err)), + }; + self.reader = + ReaderState::UncompressedChunk(reader.take(header.compressed_size)); + } + _ => { + std::mem::swap(&mut reader_state, &mut self.reader); + return Some(Err(McapError::UnsupportedCompression( + header.compression.clone(), + ))); + } + } + self.to_discard_after_chunk = len as usize + - (40 + header.compression.len() + header.compressed_size as usize); + } + Cmd::ExitChunk => { + let mut reader_state = ReaderState::Empty; + std::mem::swap(&mut reader_state, &mut self.reader); + self.reader = ReaderState::Base(match reader_state.into_inner() { + Ok(reader) => reader, + Err(err) => return Some(Err(err)), + }); + while self.to_discard_after_chunk > 0 { + let to_read = if self.to_discard_after_chunk > self.scratch.len() { + self.scratch.len() + } else { + self.to_discard_after_chunk + }; + match self.reader.read(&mut self.scratch[..to_read]).await { + Ok(n) => self.to_discard_after_chunk -= n, + Err(err) => return Some(Err(err.into())), + }; + } + } + }; + } + } + + async fn next_record_inner(&mut self, data: &mut Vec) -> McapResult { + if let ReaderState::Base(reader) = &mut self.reader { + if !self.start_magic_seen && !self.options.skip_start_magic { + reader.read_exact(&mut self.scratch[..MAGIC.len()]).await?; + if &self.scratch[..MAGIC.len()] != MAGIC { + return Err(McapError::BadMagic); + } + self.start_magic_seen = true; + } + if self.footer_seen && !self.options.skip_end_magic { + reader.read_exact(&mut self.scratch[..MAGIC.len()]).await?; + if &self.scratch[..MAGIC.len()] != MAGIC { + return Err(McapError::BadMagic); + } + return Ok(Cmd::Stop); + } + let readlen = read_exact_or_zero(reader, &mut self.scratch[..9]).await?; + if readlen == 0 { + if self.options.skip_end_magic { + return Ok(Cmd::Stop); + } else { + return Err(McapError::UnexpectedEof); + } + } + let opcode = self.scratch[0]; + if opcode == records::op::FOOTER { + self.footer_seen = true; + } + let record_len = u64::from_le_bytes(self.scratch[1..9].try_into().unwrap()); + if opcode == records::op::CHUNK && !self.options.emit_chunks { + let header = read_chunk_header(reader, data, record_len).await?; + return Ok(Cmd::EnterChunk { + header, + len: record_len, + }); + } + data.resize(record_len as usize, 0); + reader.read_exact(&mut data[..]).await?; + Ok(Cmd::YieldRecord(opcode)) + } else { + let len = read_exact_or_zero(&mut self.reader, &mut self.scratch[..9]).await?; + if len == 0 { + return Ok(Cmd::ExitChunk); + } + let opcode = self.scratch[0]; + let record_len = u64::from_le_bytes(self.scratch[1..9].try_into().unwrap()); + data.resize(record_len as usize, 0); + self.reader.read_exact(&mut data[..]).await?; + Ok(Cmd::YieldRecord(opcode)) + } + } +} + +async fn read_chunk_header( + reader: &mut R, + scratch: &mut Vec, + record_len: u64, +) -> McapResult { + let mut header = records::ChunkHeader { + message_start_time: 0, + message_end_time: 0, + uncompressed_size: 0, + uncompressed_crc: 0, + compression: String::new(), + compressed_size: 0, + }; + if record_len < 40 { + return Err(McapError::RecordTooShort { + opcode: records::op::CHUNK, + len: record_len, + expected: 40, + }); + } + scratch.resize(32, 0); + reader.read_exact(&mut scratch[..]).await?; + let compression_len: u32 = { + let mut cursor = std::io::Cursor::new(&scratch); + header.message_start_time = cursor.read_le()?; + header.message_end_time = cursor.read_le()?; + header.uncompressed_size = cursor.read_le()?; + header.uncompressed_crc = cursor.read_le()?; + cursor.read_le()? + }; + scratch.resize(compression_len as usize, 0); + if record_len < (40 + compression_len) as u64 { + return Err(McapError::RecordTooShort { + opcode: records::op::CHUNK, + len: record_len, + expected: (40 + compression_len) as u64, + }); + } + reader.read_exact(&mut scratch[..]).await?; + header.compression = match std::str::from_utf8(&scratch[..]) { + Ok(val) => val.to_owned(), + Err(err) => { + return Err(McapError::Parse(binrw::error::Error::Custom { + pos: 32, + err: Box::new(err), + })); + } + }; + scratch.resize(8, 0); + reader.read_exact(&mut scratch[..]).await?; + header.compressed_size = u64::from_le_bytes(scratch[..].try_into().unwrap()); + let available = record_len - (32 + compression_len as u64 + 8); + if available < header.compressed_size { + return Err(McapError::BadChunkLength { + header: header.compressed_size, + available, + }); + } + Ok(header) +} + +#[cfg(test)] +mod tests { + use crate::read::parse_record; + use std::collections::BTreeMap; + + use super::*; + #[tokio::test] + async fn test_record_reader() -> Result<(), McapError> { + for compression in [ + None, + #[cfg(feature = "zstd")] + Some(crate::Compression::Zstd), + #[cfg(feature = "lz4")] + Some(crate::Compression::Lz4), + ] { + let mut buf = std::io::Cursor::new(Vec::new()); + { + let mut writer = crate::WriteOptions::new() + .compression(compression) + .create(&mut buf)?; + let channel = std::sync::Arc::new(crate::Channel { + topic: "chat".to_owned(), + schema: None, + message_encoding: "json".to_owned(), + metadata: BTreeMap::new(), + }); + writer.add_channel(&channel)?; + writer.write(&crate::Message { + channel, + sequence: 0, + log_time: 0, + publish_time: 0, + data: (&[0, 1, 2]).into(), + })?; + writer.finish()?; + } + let mut reader = RecordReader::new(std::io::Cursor::new(buf.into_inner())); + let mut record = Vec::new(); + let mut opcodes: Vec = Vec::new(); + while let Some(opcode) = reader.next_record(&mut record).await { + let opcode = opcode?; + opcodes.push(opcode); + parse_record(opcode, &record)?; + } + assert_eq!( + opcodes.as_slice(), + [ + records::op::HEADER, + records::op::CHANNEL, + records::op::MESSAGE, + records::op::MESSAGE_INDEX, + records::op::DATA_END, + records::op::CHANNEL, + records::op::CHUNK_INDEX, + records::op::STATISTICS, + records::op::SUMMARY_OFFSET, + records::op::SUMMARY_OFFSET, + records::op::SUMMARY_OFFSET, + records::op::FOOTER, + ], + "reads opcodes from MCAP compressed with {:?}", + compression + ); + } + Ok(()) + } + #[cfg(feature = "lz4")] + #[tokio::test] + async fn test_lz4_decompression() -> Result<(), McapError> { + let mut buf = std::io::Cursor::new(Vec::new()); + { + let mut writer = crate::WriteOptions::new() + .compression(Some(crate::Compression::Lz4)) + .create(&mut buf)?; + let channel = std::sync::Arc::new(crate::Channel { + topic: "chat".to_owned(), + schema: None, + message_encoding: "json".to_owned(), + metadata: BTreeMap::new(), + }); + let data: Vec = vec![0; 1024]; + writer.add_channel(&channel)?; + for n in 0..10000 { + { + writer.write(&crate::Message { + channel: channel.clone(), + log_time: n, + publish_time: n, + sequence: n as u32, + data: std::borrow::Cow::Owned(data.clone()), + })?; + } + } + writer.finish()?; + } + let mut reader = RecordReader::new(std::io::Cursor::new(buf.into_inner())); + let mut record = Vec::new(); + while let Some(opcode) = reader.next_record(&mut record).await { + parse_record(opcode?, &record)?; + } + Ok(()) + } +} diff --git a/rust/src/tokio/read_exact_or_zero.rs b/rust/src/tokio/read_exact_or_zero.rs new file mode 100644 index 0000000000..cc4eb902d5 --- /dev/null +++ b/rust/src/tokio/read_exact_or_zero.rs @@ -0,0 +1,103 @@ +use tokio::io::{AsyncRead, AsyncReadExt}; + +/// read up to `buf.len()` bytes from `r` into `buf`. This repeatedly calls read() on `r` until +/// either the buffer is full or EOF is reached. If either 0 or buf.len() bytes were read before +/// EOF, Ok(n) is returned. If EOF is reached after 0 bytes but before buf.len(), Err(UnexpectedEOF) +/// is returned. +/// This is useful for cases where we expect either to read either a whole MCAP record or EOF. +pub(crate) async fn read_exact_or_zero( + r: &mut R, + buf: &mut [u8], +) -> Result { + let mut pos: usize = 0; + loop { + let readlen = r.read(&mut buf[pos..]).await?; + if readlen == 0 { + if pos != 0 { + return Err(std::io::ErrorKind::UnexpectedEof.into()); + } else { + return Ok(0); + } + } + pos += readlen; + if pos == buf.len() { + return Ok(pos); + } + } +} + +#[cfg(test)] +mod tests { + + use super::*; + use std::cmp::min; + + struct ZeroReader { + remaining: usize, + max_read_len: usize, + } + + impl AsyncRead for ZeroReader { + fn poll_read( + mut self: std::pin::Pin<&mut Self>, + _cx: &mut std::task::Context<'_>, + buf: &mut tokio::io::ReadBuf<'_>, + ) -> std::task::Poll> { + let max_read_len = self.as_ref().max_read_len; + let remaining = self.as_ref().remaining; + if remaining == 0 { + return std::task::Poll::Ready(Ok(())); + } + let to_fill = min(min(remaining, buf.remaining()), max_read_len); + buf.initialize_unfilled_to(to_fill).fill(0); + buf.set_filled(to_fill); + self.as_mut().remaining -= to_fill; + return std::task::Poll::Ready(Ok(())); + } + } + #[tokio::test] + async fn test_full_read_is_not_error() { + let mut r = ZeroReader { + remaining: 10, + max_read_len: 10, + }; + let mut buf: Vec = vec![1; 10]; + let result = read_exact_or_zero(&mut r, &mut buf).await; + assert_eq!(result.ok(), Some(10)); + assert_eq!(&buf[..], &[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + } + + #[tokio::test] + async fn test_eof_is_not_error() { + let mut r = ZeroReader { + remaining: 0, + max_read_len: 10, + }; + let mut buf: Vec = vec![1; 10]; + let result = read_exact_or_zero(&mut r, &mut buf).await; + assert_eq!(result.ok(), Some(0)); + assert_eq!(&buf[..], &[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) + } + #[tokio::test] + async fn test_repeated_read_calls() { + let mut r = ZeroReader { + remaining: 10, + max_read_len: 4, + }; + let mut buf: Vec = vec![1; 10]; + let result = read_exact_or_zero(&mut r, &mut buf).await; + assert_eq!(result.ok(), Some(10)); + assert_eq!(&buf[..], &[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + } + #[tokio::test] + async fn test_partial_read_is_error() { + let mut r = ZeroReader { + remaining: 4, + max_read_len: 2, + }; + let mut buf: Vec = vec![1; 10]; + let result = read_exact_or_zero(&mut r, &mut buf).await; + assert!(!result.is_ok()); + assert_eq!(&buf[..], &[0, 0, 0, 0, 1, 1, 1, 1, 1, 1]); + } +} diff --git a/rust/src/write.rs b/rust/src/write.rs index 4d3c44d33b..3be18f9282 100644 --- a/rust/src/write.rs +++ b/rust/src/write.rs @@ -108,6 +108,7 @@ pub struct WriteOptions { compression: Option, profile: String, chunk_size: Option, + use_chunks: bool, } impl Default for WriteOptions { @@ -119,6 +120,7 @@ impl Default for WriteOptions { compression: None, profile: String::new(), chunk_size: Some(1024 * 768), + use_chunks: true, } } } @@ -151,10 +153,19 @@ impl WriteOptions { /// If `None`, chunks will not be automatically closed and the user must call `flush()` to /// begin a new chunk. pub fn chunk_size(self, chunk_size: Option) -> Self { - Self { - chunk_size: chunk_size, - ..self - } + Self { chunk_size, ..self } + } + + /// specifies whether to use chunks for storing messages. + /// + /// If `false`, messages will be written directly to the data section of the file. + /// This prevents using compression or indexing, but may be useful on small embedded systems + /// that cannot afford the memory overhead of storing chunk metadata for the entire recording. + /// + /// Note that it's often useful to post-process a non-chunked file using `mcap recover` to add + /// indexes for efficient processing. + pub fn use_chunks(self, use_chunks: bool) -> Self { + Self { use_chunks, ..self } } /// Creates a [`Writer`] whch writes to `w` using the given options @@ -227,8 +238,21 @@ impl<'a, W: Write + Seek> Writer<'a, W> { .channels .insert(chan.clone(), next_channel_id) .is_none()); - self.chunkin_time()? - .write_channel(next_channel_id, schema_id, chan)?; + if self.options.use_chunks { + self.chunkin_time()? + .write_channel(next_channel_id, schema_id, chan)?; + } else { + write_record( + self.finish_chunk()?, + &Record::Channel(records::Channel { + id: next_channel_id, + schema_id, + topic: chan.topic.clone(), + message_encoding: chan.message_encoding.clone(), + metadata: chan.metadata.clone(), + }), + )?; + } Ok(next_channel_id) } @@ -244,7 +268,21 @@ impl<'a, W: Write + Seek> Writer<'a, W> { .schemas .insert(schema.clone(), next_schema_id) .is_none()); - self.chunkin_time()?.write_schema(next_schema_id, schema)?; + if self.options.use_chunks { + self.chunkin_time()?.write_schema(next_schema_id, schema)?; + } else { + write_record( + self.finish_chunk()?, + &Record::Schema { + header: records::SchemaHeader { + id: next_schema_id, + name: schema.name.clone(), + encoding: schema.encoding.clone(), + }, + data: Cow::Borrowed(&schema.data), + }, + )?; + } Ok(next_schema_id) } @@ -301,7 +339,17 @@ impl<'a, W: Write + Seek> Writer<'a, W> { } } - self.chunkin_time()?.write_message(header, data)?; + if self.options.use_chunks { + self.chunkin_time()?.write_message(header, data)?; + } else { + write_record( + self.finish_chunk()?, + &Record::Message { + header: *header, + data: Cow::Borrowed(data), + }, + )?; + } Ok(()) } @@ -389,6 +437,11 @@ impl<'a, W: Write + Seek> Writer<'a, W> { // (That would leave it in an unspecified state if we bailed here!) // Instead briefly swap it out for a null writer while we set up the chunker // The writer will only be None if finish() was called. + assert!( + self.options.use_chunks, + "Trying to write to a chunk when chunking is disabled" + ); + let prev_writer = self.writer.take().expect(Self::WHERE_WRITER); self.writer = Some(match prev_writer { @@ -566,18 +619,23 @@ impl<'a, W: Write + Seek> Writer<'a, W> { }); } - // Write all chunk indexes. - let chunk_indexes_start = channels_end; - for index in chunk_indexes { - write_record(&mut ccw, &Record::ChunkIndex(index))?; - } - let chunk_indexes_end = posit(&mut ccw)?; - if chunk_indexes_end - chunk_indexes_start > 0 { - offsets.push(records::SummaryOffset { - group_opcode: op::CHUNK_INDEX, - group_start: chunk_indexes_start, - group_length: chunk_indexes_end - chunk_indexes_start, - }); + let chunk_indexes_end; + if self.options.use_chunks { + // Write all chunk indexes. + let chunk_indexes_start = channels_end; + for index in chunk_indexes { + write_record(&mut ccw, &Record::ChunkIndex(index))?; + } + chunk_indexes_end = posit(&mut ccw)?; + if chunk_indexes_end - chunk_indexes_start > 0 { + offsets.push(records::SummaryOffset { + group_opcode: op::CHUNK_INDEX, + group_start: chunk_indexes_start, + group_length: chunk_indexes_end - chunk_indexes_start, + }); + } + } else { + chunk_indexes_end = channels_end; } // ...and attachment indexes @@ -650,7 +708,7 @@ enum Compressor { #[cfg(feature = "zstd")] Zstd(zstd::Encoder<'static, W>), #[cfg(feature = "lz4")] - Lz4(lz4_flex::frame::FrameEncoder), + Lz4(lz4::Encoder), } impl Compressor { @@ -660,7 +718,11 @@ impl Compressor { #[cfg(feature = "zstd")] Compressor::Zstd(w) => w.finish()?, #[cfg(feature = "lz4")] - Compressor::Lz4(w) => w.finish()?, + Compressor::Lz4(w) => { + let (output, result) = w.finish(); + result?; + output + } }) } } @@ -736,7 +798,7 @@ impl ChunkWriter { Compressor::Zstd(enc) } #[cfg(feature = "lz4")] - Some(Compression::Lz4) => Compressor::Lz4(lz4_flex::frame::FrameEncoder::new(writer)), + Some(Compression::Lz4) => Compressor::Lz4(lz4::EncoderBuilder::new().build(writer)?), #[cfg(not(any(feature = "zstd", feature = "lz4")))] Some(_) => unreachable!("`Compression` is an empty enum that cannot be instantiated"), None => Compressor::Null(writer), diff --git a/rust/tests/attachment.rs b/rust/tests/attachment.rs index 76b6762777..57eb98b1eb 100644 --- a/rust/tests/attachment.rs +++ b/rust/tests/attachment.rs @@ -66,7 +66,8 @@ fn round_trip() -> Result<()> { ..Default::default() }), attachment_indexes: vec![mcap::records::AttachmentIndex { - offset: 38, // Finicky - depends on the length of the library version string + // offset depends on the length of the embedded library string, which includes the crate version + offset: 33 + (env!("CARGO_PKG_VERSION").len() as u64), length: 78, log_time: 2, create_time: 1, diff --git a/rust/tests/message.rs b/rust/tests/message.rs index 1b63f094ed..eb92a2b61c 100644 --- a/rust/tests/message.rs +++ b/rust/tests/message.rs @@ -39,11 +39,22 @@ fn smoke() -> Result<()> { #[test] fn round_trip() -> Result<()> { + run_round_trip(true) +} + +#[test] +fn round_trip_no_chunks() -> Result<()> { + run_round_trip(false) +} + +fn run_round_trip(use_chunks: bool) -> Result<()> { let mapped = map_mcap("../tests/conformance/data/OneMessage/OneMessage.mcap")?; let messages = mcap::MessageStream::new(&mapped)?; let mut tmp = tempfile()?; - let mut writer = mcap::Writer::new(BufWriter::new(&mut tmp))?; + let mut writer = mcap::WriteOptions::default() + .use_chunks(use_chunks) + .create(BufWriter::new(&mut tmp))?; for m in messages { writer.write(&m?)?; @@ -71,7 +82,7 @@ fn round_trip() -> Result<()> { message_count: 1, schema_count: 1, channel_count: 1, - chunk_count: 1, + chunk_count: if use_chunks { 1 } else { 0 }, message_start_time: 2, message_end_time: 2, channel_message_counts: [(0, 1)].into(), diff --git a/rust/tests/metadata.rs b/rust/tests/metadata.rs index 606d81e791..905ab98ff7 100644 --- a/rust/tests/metadata.rs +++ b/rust/tests/metadata.rs @@ -56,7 +56,8 @@ fn round_trip() -> Result<()> { ..Default::default() }), metadata_indexes: vec![mcap::records::MetadataIndex { - offset: 38, // Finicky - depends on the length of the library version string + // offset depends on the length of the embedded library string, which includes the crate version + offset: 33 + (env!("CARGO_PKG_VERSION").len() as u64), length: 41, name: String::from("myMetadata"), }], diff --git a/tests/conformance/scripts/run-tests/runners/RustAsyncReaderTestRunner.ts b/tests/conformance/scripts/run-tests/runners/RustAsyncReaderTestRunner.ts new file mode 100644 index 0000000000..8418d62aed --- /dev/null +++ b/tests/conformance/scripts/run-tests/runners/RustAsyncReaderTestRunner.ts @@ -0,0 +1,22 @@ +import { exec } from "child_process"; +import { join } from "path"; +import { promisify } from "util"; +import { TestVariant } from "variants/types"; + +import { StreamedReadTestRunner } from "./TestRunner"; +import { StreamedReadTestResult } from "../types"; + +export default class RustAsyncReaderTestRunner extends StreamedReadTestRunner { + readonly name = "rust-async-streamed-reader"; + + async runReadTest(filePath: string): Promise { + const { stdout } = await promisify(exec)(`./conformance_reader_async ${filePath}`, { + cwd: join(__dirname, "../../../../../rust/target/debug/examples"), + }); + return JSON.parse(stdout.trim()) as StreamedReadTestResult; + } + + supportsVariant(_variant: TestVariant): boolean { + return true; + } +} diff --git a/tests/conformance/scripts/run-tests/runners/index.ts b/tests/conformance/scripts/run-tests/runners/index.ts index 3c76f02148..6af475da47 100644 --- a/tests/conformance/scripts/run-tests/runners/index.ts +++ b/tests/conformance/scripts/run-tests/runners/index.ts @@ -8,6 +8,7 @@ import KaitaiStructReaderTestRunner from "./KaitaiStructReaderTestRunner"; import PythonIndexedReaderTestRunner from "./PythonIndexedReaderTestRunner"; import PythonStreamedReaderTestRunner from "./PythonStreamedReaderTestRunner"; import PythonWriterTestRunner from "./PythonWriterTestRunner"; +import RustAsyncReaderTestRunner from "./RustAsyncReaderTestRunner"; import RustReaderTestRunner from "./RustReaderTestRunner"; import RustWriterTestRunner from "./RustWriterTestRunner"; import SwiftIndexedReaderTestRunner from "./SwiftIndexedReaderTestRunner"; @@ -31,6 +32,7 @@ const runners: readonly (IndexedReadTestRunner | StreamedReadTestRunner | WriteT new TypescriptIndexedReaderTestRunner(), new TypescriptStreamedReaderTestRunner(), new TypescriptWriterTestRunner(), + new RustAsyncReaderTestRunner(), new RustReaderTestRunner(), new RustWriterTestRunner(), new SwiftWriterTestRunner(), diff --git a/typescript/benchmarks/bench.ts b/typescript/benchmarks/bench.ts new file mode 100644 index 0000000000..a37926fe47 --- /dev/null +++ b/typescript/benchmarks/bench.ts @@ -0,0 +1,116 @@ +import { hrtime, memoryUsage } from "process"; +import { getHeapStatistics } from "v8"; + +const COUNT = 5; + +type BenchmarkResult = + | { + name: string; + gcExposed: true; + samples: { + duration: bigint; + memoryUsage: { + usedHeapSize: number; + totalHeapSize: number; + arrayBuffers: number; + }; + }[]; + } + | { + name: string; + gcExposed: false; + samples: { + duration: bigint; + }[]; + }; + +/** runs a benchmark and logs statistics about runtime and memory usage afterward. + * + * @param name A name for the benchmark. + * @param run a routine that runs the benchmark code. + */ +export async function runBenchmark(name: string, run: () => Promise): Promise { + let result: BenchmarkResult; + if (global.gc != undefined) { + result = { + name, + gcExposed: true, + samples: [], + }; + for (let i = 0; i < COUNT; i++) { + global.gc(); + const baseline = getHeapStatistics(); + const baselineArrayBuffers = memoryUsage().arrayBuffers; + const before = hrtime.bigint(); + + await run(); + + const after = hrtime.bigint(); + const currentMemoryUsage = getHeapStatistics(); + const currentArrayBuffers = process.memoryUsage().arrayBuffers; + result.samples.push({ + duration: after - before, + memoryUsage: { + usedHeapSize: currentMemoryUsage.used_heap_size - baseline.used_heap_size, + totalHeapSize: currentMemoryUsage.total_heap_size - baseline.total_heap_size, + arrayBuffers: currentArrayBuffers - baselineArrayBuffers, + }, + }); + } + } else { + result = { + name, + gcExposed: false, + samples: [], + }; + for (let i = 0; i < COUNT; i++) { + const before = hrtime.bigint(); + await run(); + const after = hrtime.bigint(); + result.samples.push({ duration: after - before }); + } + } + printStats(result); +} + +function humanReadableStatistics(values: number[], unit: string): string { + const count = values.length; + if (count < 1) { + return "(No samples)"; + } + if (count < 2) { + return `${values[0]} ${unit}`; + } + const mean = values.reduce((a, b) => a + b, 0) / count; + const stdDev = Math.sqrt( + values.map((value) => (mean - value) ** 2).reduce((a, b) => a + b, 0) / (count - 1), + ); + const stdErr = stdDev / Math.sqrt(count); + return `${mean.toFixed(2)}±${stdErr.toFixed(2)} ${unit}`; +} + +function printStats(result: BenchmarkResult) { + let memoryResult = "(use --expose-gc to gather memory statistics)"; + if (result.gcExposed) { + const used = humanReadableStatistics( + result.samples.map((sample) => sample.memoryUsage.usedHeapSize / 2 ** 20), + "MB/op", + ); + const total = humanReadableStatistics( + result.samples.map((sample) => sample.memoryUsage.totalHeapSize / 2 ** 20), + "MB/op", + ); + const arrayBuffers = humanReadableStatistics( + result.samples.map((sample) => sample.memoryUsage.arrayBuffers / 2 ** 20), + "MB/op", + ); + memoryResult = `Heap Used: ${used}\tHeap Total: ${total}\tArrayBuffers: ${arrayBuffers}`; + } + const name = result.name; + const timeStat = humanReadableStatistics( + result.samples.map((r) => 1 / (Number(r.duration) / 1e9)), + "op/s", + ); + console.log(name); + console.log(`\t${timeStat}\t${memoryResult}`); +} diff --git a/typescript/benchmarks/index.ts b/typescript/benchmarks/index.ts index 19f8069406..818f8f6073 100644 --- a/typescript/benchmarks/index.ts +++ b/typescript/benchmarks/index.ts @@ -1,5 +1,8 @@ -import { McapWriter } from "@mcap/core"; -import { add, complete, cycle, suite } from "benny"; +import { McapIndexedReader, McapStreamReader, McapWriter, TempBuffer } from "@mcap/core"; +import assert from "assert"; +import { program } from "commander"; + +import { runBenchmark } from "./bench"; /** * An IWritable that copies data to memory, but overwrites previous data. This allows benchmarking @@ -30,7 +33,78 @@ class FakeMemoryWritable { } } -function addWriteBenchmark({ +async function benchmarkReaders() { + const messageSize = 10; + const chunkSize = 1024 * 1024 * 4; + const numMessages = 1_000_000; + const messageData = new Uint8Array(messageSize).fill(42); + const buf = new TempBuffer(); + const writer = new McapWriter({ writable: buf, chunkSize }); + await writer.start({ library: "", profile: "" }); + const channelId = await writer.registerChannel({ + schemaId: 0, + topic: "", + messageEncoding: "", + metadata: new Map([]), + }); + for (let i = 0; i < numMessages; i++) { + await writer.addMessage({ + channelId, + sequence: i, + logTime: BigInt(i), + publishTime: BigInt(i), + data: messageData, + }); + } + await writer.end(); + await runBenchmark(McapStreamReader.name, async () => { + const reader = new McapStreamReader(); + reader.append(buf.get()); + let messageCount = 0; + for (;;) { + const rec = reader.nextRecord(); + if (rec != undefined) { + if (rec.type === "Message") { + messageCount++; + } + } else { + break; + } + } + assert(messageCount === numMessages, `expected ${numMessages} messages, got ${messageCount}`); + }); + await runBenchmark(McapIndexedReader.name, async () => { + const reader = await McapIndexedReader.Initialize({ readable: buf }); + let messageCount = 0; + for await (const _ of reader.readMessages()) { + messageCount++; + } + assert(messageCount === numMessages, `expected ${numMessages} messages, got ${messageCount}`); + }); + await runBenchmark(McapIndexedReader.name + "_reverse", async () => { + const reader = await McapIndexedReader.Initialize({ readable: buf }); + let messageCount = 0; + for await (const _ of reader.readMessages({ reverse: true })) { + messageCount++; + } + assert(messageCount === numMessages, `expected ${numMessages} messages, got ${messageCount}`); + }); +} + +export async function benchmarkWriter(): Promise { + await runWriteBenchmark({ numMessages: 1_000_000, messageSize: 1, chunkSize: 1024 * 1024 }); + await runWriteBenchmark({ numMessages: 100_000, messageSize: 1000, chunkSize: 1024 * 1024 }); + await runWriteBenchmark({ numMessages: 100, messageSize: 1_000_000, chunkSize: 1024 * 1024 }); + await runWriteBenchmark({ numMessages: 1_000_000, messageSize: 1, chunkSize: 10 * 1024 * 1024 }); + await runWriteBenchmark({ numMessages: 100_000, messageSize: 1000, chunkSize: 10 * 1024 * 1024 }); + await runWriteBenchmark({ + numMessages: 100, + messageSize: 1_000_000, + chunkSize: 10 * 1024 * 1024, + }); +} + +async function runWriteBenchmark({ numMessages, messageSize, chunkSize, @@ -39,54 +113,49 @@ function addWriteBenchmark({ messageSize: number; chunkSize: number; }) { - return add( + const messageData = new Uint8Array(messageSize).fill(42); + const writable = new FakeMemoryWritable(2 * chunkSize); + await runBenchmark( `count=${numMessages.toLocaleString()} size=${messageSize.toLocaleString()} chunkSize=${chunkSize.toLocaleString()} (1 op ≈ ${( numMessages * messageSize ).toLocaleString()} bytes)`, async () => { - const messageData = new Uint8Array(messageSize).fill(42); - const writable = new FakeMemoryWritable(2 * chunkSize); - return async () => { - writable.reset(); - const writer = new McapWriter({ writable, chunkSize }); - await writer.start({ library: "", profile: "" }); - const channelId = await writer.registerChannel({ - schemaId: 0, - topic: "", - messageEncoding: "", - metadata: new Map([]), + writable.reset(); + const writer = new McapWriter({ writable, chunkSize }); + await writer.start({ library: "", profile: "" }); + const channelId = await writer.registerChannel({ + schemaId: 0, + topic: "", + messageEncoding: "", + metadata: new Map([]), + }); + for (let i = 0; i < numMessages; i++) { + await writer.addMessage({ + channelId, + sequence: i, + logTime: BigInt(i), + publishTime: BigInt(i), + data: messageData, }); - for (let i = 0; i < numMessages; i++) { - await writer.addMessage({ - channelId, - sequence: i, - logTime: BigInt(i), - publishTime: BigInt(i), - data: messageData, - }); - } - await writer.end(); - }; + } + await writer.end(); }, ); } -async function benchmarkWriter() { - await suite( - McapWriter.name, - addWriteBenchmark({ numMessages: 1_000_000, messageSize: 1, chunkSize: 1024 * 1024 }), - addWriteBenchmark({ numMessages: 100_000, messageSize: 1000, chunkSize: 1024 * 1024 }), - addWriteBenchmark({ numMessages: 100, messageSize: 1_000_000, chunkSize: 1024 * 1024 }), - addWriteBenchmark({ numMessages: 1_000_000, messageSize: 1, chunkSize: 10 * 1024 * 1024 }), - addWriteBenchmark({ numMessages: 100_000, messageSize: 1000, chunkSize: 10 * 1024 * 1024 }), - addWriteBenchmark({ numMessages: 100, messageSize: 1_000_000, chunkSize: 10 * 1024 * 1024 }), - cycle(), - complete(), - ); -} - -async function main() { - await benchmarkWriter(); +async function main(args: { suite?: string }) { + const { suite } = args; + if (suite == undefined || suite === "writer") { + console.log("Running 'writer' suite"); + await benchmarkWriter(); + } + if (suite == undefined || suite === "reader") { + console.log("Running 'reader' suite"); + await benchmarkReaders(); + } } -void main(); +program + .addOption(program.createOption("--suite ", "Name of suite to run")) + .action(main) + .parse(); diff --git a/typescript/benchmarks/package.json b/typescript/benchmarks/package.json index 925a49dce3..686bfd7c12 100644 --- a/typescript/benchmarks/package.json +++ b/typescript/benchmarks/package.json @@ -17,8 +17,8 @@ "typecheck": "tsc -p tsconfig.json --noEmit", "lint:ci": "eslint --report-unused-disable-directives .", "lint": "eslint --report-unused-disable-directives --fix .", - "bench": "ts-node --files --project tsconfig.cjs.json index.ts", - "bench:debug": "NODE_OPTIONS='--inspect-brk' ts-node --files --project tsconfig.cjs.json index.ts" + "bench": "TS_NODE_TRANSPILE_ONLY=true TS_NODE_FILES=true TS_NODE_PROJECT=tsconfig.cjs.json node --huge-max-old-generation-size --expose-gc -r 'ts-node/register' index.ts", + "bench:debug": "TS_NODE_FILES=true TS_NODE_PROJECT=tsconfig.cjs.json node --huge-max-old-generation-size --inspect-brk --expose-gc -r 'ts-node/register' index.ts" }, "devDependencies": { "@foxglove/eslint-plugin": "1.0.1", @@ -27,7 +27,7 @@ "@types/node": "18.13.0", "@typescript-eslint/eslint-plugin": "6.11.0", "@typescript-eslint/parser": "6.11.0", - "benny": "^3.7.1", + "commander": "12.1.0", "eslint": "8.54.0", "eslint-config-prettier": "9.0.0", "eslint-plugin-es": "4.1.0", diff --git a/typescript/browser/README.md b/typescript/browser/README.md index d3050d47f4..0a0c1b5a2c 100644 --- a/typescript/browser/README.md +++ b/typescript/browser/README.md @@ -30,4 +30,4 @@ async function onInputOrDrop(event: InputEvent | DragEvent) { ## Stay in touch -Join our [Slack channel](https://foxglove.dev/slack) to ask questions, share feedback, and stay up to date on what our team is working on. +Join our [Discord community](https://foxglove.dev/chat) to ask questions, share feedback, and stay up to date on what our team is working on. diff --git a/typescript/core/README.md b/typescript/core/README.md index 2e007000cd..38a49310f7 100644 --- a/typescript/core/README.md +++ b/typescript/core/README.md @@ -14,4 +14,4 @@ Examples of how to use the `@mcap/core` APIs can be found in the [TypeScript exa ## Stay in touch -Join our [Slack channel](https://foxglove.dev/slack) to ask questions, share feedback, and stay up to date on what our team is working on. +Join our [Discord community](https://foxglove.dev/chat) to ask questions, share feedback, and stay up to date on what our team is working on. diff --git a/typescript/core/package.json b/typescript/core/package.json index cf4b2b05e0..07103c5809 100644 --- a/typescript/core/package.json +++ b/typescript/core/package.json @@ -1,6 +1,6 @@ { "name": "@mcap/core", - "version": "2.1.1", + "version": "2.1.5", "description": "MCAP file support in TypeScript", "license": "MIT", "repository": { diff --git a/typescript/core/src/ChunkCursor.ts b/typescript/core/src/ChunkCursor.ts index 3a33303010..2113cc2b42 100644 --- a/typescript/core/src/ChunkCursor.ts +++ b/typescript/core/src/ChunkCursor.ts @@ -1,3 +1,4 @@ +import Reader from "./Reader"; import { parseRecord } from "./parse"; import { sortedIndexBy } from "./sortedIndexBy"; import { sortedLastIndexBy } from "./sortedLastIndex"; @@ -136,31 +137,25 @@ export class ChunkCursor { messageIndexes.byteLength, ); - let offset = 0; + const reader = new Reader(messageIndexesView); const arrayOfMessageOffsets: [logTime: bigint, offset: bigint][][] = []; - for ( - let result; - (result = parseRecord({ view: messageIndexesView, startOffset: offset, validateCrcs: true })), - result.record; - offset += result.usedBytes - ) { - if (result.record.type !== "MessageIndex") { + let record; + while ((record = parseRecord(reader, true))) { + if (record.type !== "MessageIndex") { continue; } if ( - result.record.records.length === 0 || - (this.#relevantChannels && !this.#relevantChannels.has(result.record.channelId)) + record.records.length === 0 || + (this.#relevantChannels && !this.#relevantChannels.has(record.channelId)) ) { continue; } - arrayOfMessageOffsets.push(result.record.records); + arrayOfMessageOffsets.push(record.records); } - if (offset !== messageIndexesView.byteLength) { - throw new Error( - `${messageIndexesView.byteLength - offset} bytes remaining in message index section`, - ); + if (reader.bytesRemaining() !== 0) { + throw new Error(`${reader.bytesRemaining()} bytes remaining in message index section`); } this.#orderedMessageOffsets = arrayOfMessageOffsets diff --git a/typescript/core/src/McapIndexedReader.ts b/typescript/core/src/McapIndexedReader.ts index 5955300a40..51ec00044e 100644 --- a/typescript/core/src/McapIndexedReader.ts +++ b/typescript/core/src/McapIndexedReader.ts @@ -2,6 +2,7 @@ import { crc32, crc32Final, crc32Init, crc32Update } from "@foxglove/crc"; import Heap from "heap-js"; import { ChunkCursor } from "./ChunkCursor"; +import Reader from "./Reader"; import { MCAP_MAGIC } from "./constants"; import { parseMagic, parseRecord } from "./parse"; import { DecompressHandlers, IReadable, TypedMcapRecords } from "./types"; @@ -111,7 +112,7 @@ export class McapIndexedReader { headerPrefix.byteOffset, headerPrefix.byteLength, ); - void parseMagic(headerPrefixView, 0); + void parseMagic(new Reader(headerPrefixView)); const headerContentLength = headerPrefixView.getBigUint64( MCAP_MAGIC.length + /* Opcode.HEADER */ 1, true, @@ -121,26 +122,19 @@ export class McapIndexedReader { const headerRecord = await readable.read(BigInt(MCAP_MAGIC.length), headerReadLength); headerEndOffset = BigInt(MCAP_MAGIC.length) + headerReadLength; - const headerResult = parseRecord({ - view: new DataView(headerRecord.buffer, headerRecord.byteOffset, headerRecord.byteLength), - startOffset: 0, - validateCrcs: true, - }); - if (headerResult.record?.type !== "Header") { + const headerReader = new Reader( + new DataView(headerRecord.buffer, headerRecord.byteOffset, headerRecord.byteLength), + ); + const headerResult = parseRecord(headerReader, true); + if (headerResult?.type !== "Header") { throw new Error( - `Unable to read header at beginning of file; found ${ - headerResult.record?.type ?? "nothing" - }`, + `Unable to read header at beginning of file; found ${headerResult?.type ?? "nothing"}`, ); } - if (headerResult.usedBytes !== headerRecord.byteLength) { - throw new Error( - `${ - headerRecord.byteLength - headerResult.usedBytes - } bytes remaining after parsing header`, - ); + if (headerReader.bytesRemaining() !== 0) { + throw new Error(`${headerReader.bytesRemaining()} bytes remaining after parsing header`); } - header = headerResult.record; + header = headerResult; } function errorWithLibrary(message: string): Error { @@ -179,33 +173,32 @@ export class McapIndexedReader { } try { - void parseMagic(footerAndMagicView, footerAndMagicView.byteLength - MCAP_MAGIC.length); + void parseMagic( + new Reader(footerAndMagicView, footerAndMagicView.byteLength - MCAP_MAGIC.length), + ); } catch (error) { throw errorWithLibrary((error as Error).message); } let footer: TypedMcapRecords["Footer"]; { - const footerResult = parseRecord({ - view: footerAndMagicView, - startOffset: 0, - validateCrcs: true, - }); - if (footerResult.record?.type !== "Footer") { + const footerReader = new Reader(footerAndMagicView); + const footerRecord = parseRecord(footerReader, true); + if (footerRecord?.type !== "Footer") { throw errorWithLibrary( `Unable to read footer from end of file (offset ${footerOffset}); found ${ - footerResult.record?.type ?? "nothing" + footerRecord?.type ?? "nothing" }`, ); } - if (footerResult.usedBytes !== footerAndMagicView.byteLength - MCAP_MAGIC.length) { + if (footerReader.bytesRemaining() !== MCAP_MAGIC.length) { throw errorWithLibrary( `${ - footerAndMagicView.byteLength - MCAP_MAGIC.length - footerResult.usedBytes + footerReader.bytesRemaining() - MCAP_MAGIC.length } bytes remaining after parsing footer`, ); } - footer = footerResult.record; + footer = footerRecord; } if (footer.summaryStart === 0n) { throw errorWithLibrary("File is not indexed"); @@ -261,6 +254,7 @@ export class McapIndexedReader { dataEndAndSummarySection.byteOffset, dataEndAndSummarySection.byteLength, ); + const indexReader = new Reader(indexView); const channelsById = new Map(); const schemasById = new Map(); @@ -271,46 +265,42 @@ export class McapIndexedReader { let statistics: TypedMcapRecords["Statistics"] | undefined; let dataSectionCrc: number | undefined; - let offset = 0; - for ( - let result; - (result = parseRecord({ view: indexView, startOffset: offset, validateCrcs: true })), - result.record; - offset += result.usedBytes - ) { - if (offset === 0 && result.record.type !== "DataEnd") { + let first = true; + let result; + while ((result = parseRecord(indexReader, true))) { + if (first && result.type !== "DataEnd") { throw errorWithLibrary( - `Expected DataEnd record to precede summary section, but found ${result.record.type}`, + `Expected DataEnd record to precede summary section, but found ${result.type}`, ); } - switch (result.record.type) { + first = false; + switch (result.type) { case "Schema": - schemasById.set(result.record.id, result.record); + schemasById.set(result.id, result); break; case "Channel": - channelsById.set(result.record.id, result.record); + channelsById.set(result.id, result); break; case "ChunkIndex": - chunkIndexes.push(result.record); + chunkIndexes.push(result); break; case "AttachmentIndex": - attachmentIndexes.push(result.record); + attachmentIndexes.push(result); break; case "MetadataIndex": - metadataIndexes.push(result.record); + metadataIndexes.push(result); break; case "Statistics": if (statistics) { throw errorWithLibrary("Duplicate Statistics record"); } - statistics = result.record; + statistics = result; break; case "SummaryOffset": - summaryOffsetsByOpcode.set(result.record.groupOpcode, result.record); + summaryOffsetsByOpcode.set(result.groupOpcode, result); break; case "DataEnd": - dataSectionCrc = - result.record.dataSectionCrc === 0 ? undefined : result.record.dataSectionCrc; + dataSectionCrc = result.dataSectionCrc === 0 ? undefined : result.dataSectionCrc; break; case "Header": case "Footer": @@ -319,13 +309,13 @@ export class McapIndexedReader { case "MessageIndex": case "Attachment": case "Metadata": - throw errorWithLibrary(`${result.record.type} record not allowed in index section`); + throw errorWithLibrary(`${result.type} record not allowed in index section`); case "Unknown": break; } } - if (offset !== indexView.byteLength) { - throw errorWithLibrary(`${indexView.byteLength - offset} bytes remaining in index section`); + if (indexReader.bytesRemaining() !== 0) { + throw errorWithLibrary(`${indexReader.bytesRemaining()} bytes remaining in index section`); } return new McapIndexedReader({ @@ -395,6 +385,7 @@ export class McapIndexedReader { // cursor becomes active (i.e. when we first need to access messages from the chunk) and removed // when the cursor is removed from the heap. const chunkViewCache = new Map(); + const chunkReader = new Reader(new DataView(new ArrayBuffer(0))); for (let cursor; (cursor = chunkCursors.peek()); ) { if (!cursor.hasMessageIndexes()) { // If we encounter a chunk whose message indexes have not been loaded yet, load them and re-organize the heap. @@ -421,27 +412,24 @@ export class McapIndexedReader { `Message offset beyond chunk bounds (log time ${logTime}, offset ${offset}, chunk data length ${chunkView.byteLength}) in chunk at offset ${cursor.chunkIndex.chunkStartOffset}`, ); } - const result = parseRecord({ - view: chunkView, - startOffset: Number(offset), - validateCrcs: validateCrcs ?? true, - }); - if (!result.record) { + chunkReader.reset(chunkView, Number(offset)); + const record = parseRecord(chunkReader, validateCrcs ?? true); + if (!record) { throw this.#errorWithLibrary( `Unable to parse record at offset ${offset} in chunk at offset ${cursor.chunkIndex.chunkStartOffset}`, ); } - if (result.record.type !== "Message") { + if (record.type !== "Message") { throw this.#errorWithLibrary( - `Unexpected record type ${result.record.type} in message index (time ${logTime}, offset ${offset} in chunk at offset ${cursor.chunkIndex.chunkStartOffset})`, + `Unexpected record type ${record.type} in message index (time ${logTime}, offset ${offset} in chunk at offset ${cursor.chunkIndex.chunkStartOffset})`, ); } - if (result.record.logTime !== logTime) { + if (record.logTime !== logTime) { throw this.#errorWithLibrary( - `Message log time ${result.record.logTime} did not match message index entry (${logTime} at offset ${offset} in chunk at offset ${cursor.chunkIndex.chunkStartOffset})`, + `Message log time ${record.logTime} did not match message index entry (${logTime} at offset ${offset} in chunk at offset ${cursor.chunkIndex.chunkStartOffset})`, ); } - yield result.record; + yield record; if (cursor.hasMoreMessages()) { // There is no need to reorganize the heap when chunks are ordered and not overlapping. @@ -468,19 +456,18 @@ export class McapIndexedReader { continue; } const metadataData = await this.#readable.read(metadataIndex.offset, metadataIndex.length); - const metadataResult = parseRecord({ - view: new DataView(metadataData.buffer, metadataData.byteOffset, metadataData.byteLength), - startOffset: 0, - validateCrcs: false, - }); - if (metadataResult.record?.type !== "Metadata") { + const metadataReader = new Reader( + new DataView(metadataData.buffer, metadataData.byteOffset, metadataData.byteLength), + ); + const metadataRecord = parseRecord(metadataReader, false); + if (metadataRecord?.type !== "Metadata") { throw this.#errorWithLibrary( `Metadata data at offset ${ metadataIndex.offset - } does not point to metadata record (found ${String(metadataResult.record?.type)})`, + } does not point to metadata record (found ${String(metadataRecord?.type)})`, ); } - yield metadataResult.record; + yield metadataRecord; } } @@ -519,23 +506,18 @@ export class McapIndexedReader { attachmentIndex.offset, attachmentIndex.length, ); - const attachmentResult = parseRecord({ - view: new DataView( - attachmentData.buffer, - attachmentData.byteOffset, - attachmentData.byteLength, - ), - startOffset: 0, - validateCrcs: validateCrcs ?? true, - }); - if (attachmentResult.record?.type !== "Attachment") { + const attachmentReader = new Reader( + new DataView(attachmentData.buffer, attachmentData.byteOffset, attachmentData.byteLength), + ); + const attachmentRecord = parseRecord(attachmentReader, validateCrcs ?? true); + if (attachmentRecord?.type !== "Attachment") { throw this.#errorWithLibrary( `Attachment data at offset ${ attachmentIndex.offset - } does not point to attachment record (found ${String(attachmentResult.record?.type)})`, + } does not point to attachment record (found ${String(attachmentRecord?.type)})`, ); } - yield attachmentResult.record; + yield attachmentRecord; } } @@ -547,20 +529,19 @@ export class McapIndexedReader { chunkIndex.chunkStartOffset, chunkIndex.chunkLength, ); - const chunkResult = parseRecord({ - view: new DataView(chunkData.buffer, chunkData.byteOffset, chunkData.byteLength), - startOffset: 0, - validateCrcs: options?.validateCrcs ?? true, - }); - if (chunkResult.record?.type !== "Chunk") { + const chunkReader = new Reader( + new DataView(chunkData.buffer, chunkData.byteOffset, chunkData.byteLength), + ); + const chunkRecord = parseRecord(chunkReader, options?.validateCrcs ?? true); + if (chunkRecord?.type !== "Chunk") { throw this.#errorWithLibrary( `Chunk start offset ${ chunkIndex.chunkStartOffset - } does not point to chunk record (found ${String(chunkResult.record?.type)})`, + } does not point to chunk record (found ${String(chunkRecord?.type)})`, ); } - const chunk = chunkResult.record; + const chunk = chunkRecord; let buffer = chunk.records; if (chunk.compression !== "" && buffer.byteLength > 0) { const decompress = this.#decompressHandlers?.[chunk.compression]; diff --git a/typescript/core/src/McapStreamReader.test.ts b/typescript/core/src/McapStreamReader.test.ts index 2d392979c3..4b5bdde719 100644 --- a/typescript/core/src/McapStreamReader.test.ts +++ b/typescript/core/src/McapStreamReader.test.ts @@ -1,5 +1,6 @@ import { crc32 } from "@foxglove/crc"; +import { McapRecordBuilder } from "./McapRecordBuilder"; import McapStreamReader from "./McapStreamReader"; import { MCAP_MAGIC, Opcode } from "./constants"; import { @@ -654,4 +655,64 @@ describe("McapStreamReader", () => { }); expect(reader.done()).toBe(true); }); + + it("correctly appends new data to internal buffer", () => { + const streamReader = new McapStreamReader({ includeChunks: true, noMagicPrefix: true }); + const recordBuilder = new McapRecordBuilder(); + + const channel = { + id: 0, + messageEncoding: "json", + schemaId: 0, + topic: "foo", + metadata: new Map(), + }; + const messageSize = 1_000; + const messageRecordBytes = 1 + 8 + 2 + 4 + 8 + 8 + messageSize; + + const makeMessage = (fillNumber: number) => ({ + channelId: 0, + data: new Uint8Array(messageSize).fill(fillNumber), + logTime: 0n, + publishTime: 0n, + sequence: 0, + }); + + const channelByteSize = recordBuilder.writeChannel(channel); + streamReader.append(recordBuilder.buffer); + expect(streamReader.bytesRemaining()).toBe(Number(channelByteSize)); + expect(streamReader.nextRecord()).toEqual({ ...channel, type: "Channel" }); + expect(streamReader.bytesRemaining()).toBe(0); + + // Add some messages and append them to the reader. + recordBuilder.reset(); + recordBuilder.writeMessage(makeMessage(1)); + recordBuilder.writeMessage(makeMessage(2)); + streamReader.append(recordBuilder.buffer); + expect(streamReader.bytesRemaining()).toBe(2 * messageRecordBytes); + + // Add one more message. Nothing has been consumed yet, but the internal buffer should be + // large enough to simply append the new data. + recordBuilder.reset(); + recordBuilder.writeMessage(makeMessage(3)); + streamReader.append(recordBuilder.buffer); + expect(streamReader.bytesRemaining()).toBe(3 * messageRecordBytes); + + // Read some (but not all) messages to forward the reader's internal offset + expect(streamReader.nextRecord()).toEqual({ ...makeMessage(1), type: "Message" }); + expect(streamReader.nextRecord()).toEqual({ ...makeMessage(2), type: "Message" }); + expect(streamReader.bytesRemaining()).toBe(1 * messageRecordBytes); + + // Add more messages. This will cause existing data to be shifted to the beginning of the buffer. + recordBuilder.reset(); + recordBuilder.writeMessage(makeMessage(4)); + recordBuilder.writeMessage(makeMessage(5)); + streamReader.append(recordBuilder.buffer); + expect(streamReader.bytesRemaining()).toBe(3 * messageRecordBytes); + + expect(streamReader.nextRecord()).toEqual({ ...makeMessage(3), type: "Message" }); + expect(streamReader.nextRecord()).toEqual({ ...makeMessage(4), type: "Message" }); + expect(streamReader.nextRecord()).toEqual({ ...makeMessage(5), type: "Message" }); + expect(streamReader.bytesRemaining()).toBe(0); + }); }); diff --git a/typescript/core/src/McapStreamReader.ts b/typescript/core/src/McapStreamReader.ts index ddd412ef38..f04db47739 100644 --- a/typescript/core/src/McapStreamReader.ts +++ b/typescript/core/src/McapStreamReader.ts @@ -1,6 +1,6 @@ import { crc32 } from "@foxglove/crc"; -import StreamBuffer from "./StreamBuffer"; +import Reader from "./Reader"; import { MCAP_MAGIC } from "./constants"; import { parseMagic, parseRecord } from "./parse"; import { Channel, DecompressHandlers, McapMagic, TypedMcapRecord, TypedMcapRecords } from "./types"; @@ -50,7 +50,9 @@ type McapReaderOptions = { * ``` */ export default class McapStreamReader { - #buffer = new StreamBuffer(MCAP_MAGIC.length * 2); + #buffer = new ArrayBuffer(MCAP_MAGIC.length * 2); + #view = new DataView(this.#buffer, 0, 0); + #reader = new Reader(this.#view); #decompressHandlers; #includeChunks; #validateCrcs; @@ -78,7 +80,7 @@ export default class McapStreamReader { /** @returns The number of bytes that have been received by `append()` but not yet parsed. */ bytesRemaining(): number { - return this.#buffer.bytesRemaining(); + return this.#reader.bytesRemaining(); } /** @@ -89,7 +91,62 @@ export default class McapStreamReader { if (this.#doneReading) { throw new Error("Already done reading"); } - this.#buffer.append(data); + this.#appendOrShift(data); + } + + #appendOrShift(data: Uint8Array): void { + /** Add data to the buffer, shifting existing data or reallocating if necessary. */ + const consumedBytes = this.#reader.offset; + const unconsumedBytes = this.#view.byteLength - consumedBytes; + const neededCapacity = unconsumedBytes + data.byteLength; + + if (neededCapacity <= this.#buffer.byteLength) { + // Data fits in the current buffer + if ( + this.#view.byteOffset + this.#view.byteLength + data.byteLength <= + this.#buffer.byteLength + ) { + // Data fits by appending only + const array = new Uint8Array(this.#buffer, this.#view.byteOffset); + array.set(data, this.#view.byteLength); + this.#view = new DataView( + this.#buffer, + this.#view.byteOffset, + this.#view.byteLength + data.byteLength, + ); + // Reset the reader to use the new larger view. We keep the reader's previous offset as the + // view's byte offset didn't change, it only got larger. + this.#reader.reset(this.#view, this.#reader.offset); + } else { + // Data fits but requires moving existing data to start of buffer + const existingData = new Uint8Array( + this.#buffer, + this.#view.byteOffset + consumedBytes, + unconsumedBytes, + ); + const array = new Uint8Array(this.#buffer); + array.set(existingData, 0); + array.set(data, existingData.byteLength); + this.#view = new DataView(this.#buffer, 0, existingData.byteLength + data.byteLength); + this.#reader.reset(this.#view); + } + } else { + // New data doesn't fit, copy to a new buffer + + // Currently, the new buffer size may be smaller than the old size. For future optimizations, + // we could consider making the buffer size increase monotonically. + this.#buffer = new ArrayBuffer(neededCapacity * 2); + const array = new Uint8Array(this.#buffer); + const existingData = new Uint8Array( + this.#view.buffer, + this.#view.byteOffset + consumedBytes, + unconsumedBytes, + ); + array.set(existingData, 0); + array.set(data, existingData.byteLength); + this.#view = new DataView(this.#buffer, 0, existingData.byteLength + data.byteLength); + this.#reader.reset(this.#view); + } } /** @@ -129,11 +186,10 @@ export default class McapStreamReader { *#read(): Generator { if (!this.#noMagicPrefix) { - let magic: McapMagic | undefined, usedBytes: number | undefined; - while ((({ magic, usedBytes } = parseMagic(this.#buffer.view, 0)), !magic)) { + let magic: McapMagic | undefined; + while (((magic = parseMagic(this.#reader)), !magic)) { yield; } - this.#buffer.consume(usedBytes); } let header: TypedMcapRecords["Header"] | undefined; @@ -144,20 +200,10 @@ export default class McapStreamReader { for (;;) { let record; - { - let usedBytes; - while ( - (({ record, usedBytes } = parseRecord({ - view: this.#buffer.view, - startOffset: 0, - validateCrcs: this.#validateCrcs, - })), - !record) - ) { - yield; - } - this.#buffer.consume(usedBytes); + while (((record = parseRecord(this.#reader, this.#validateCrcs)), !record)) { + yield; } + switch (record.type) { case "Unknown": break; @@ -206,18 +252,10 @@ export default class McapStreamReader { } } const view = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength); - let chunkOffset = 0; - for ( - let chunkResult; - (chunkResult = parseRecord({ - view, - startOffset: chunkOffset, - validateCrcs: this.#validateCrcs, - })), - chunkResult.record; - chunkOffset += chunkResult.usedBytes - ) { - switch (chunkResult.record.type) { + const chunkReader = new Reader(view); + let chunkRecord; + while ((chunkRecord = parseRecord(chunkReader, this.#validateCrcs))) { + switch (chunkRecord.type) { case "Unknown": break; case "Header": @@ -232,34 +270,31 @@ export default class McapStreamReader { case "MetadataIndex": case "SummaryOffset": case "DataEnd": - throw errorWithLibrary( - `${chunkResult.record.type} record not allowed inside a chunk`, - ); + throw errorWithLibrary(`${chunkRecord.type} record not allowed inside a chunk`); case "Schema": case "Channel": case "Message": - yield chunkResult.record; + yield chunkRecord; break; } } - if (chunkOffset !== buffer.byteLength) { - throw errorWithLibrary(`${buffer.byteLength - chunkOffset} bytes remaining in chunk`); + if (chunkReader.bytesRemaining() !== 0) { + throw errorWithLibrary(`${chunkReader.bytesRemaining()} bytes remaining in chunk`); } break; } case "Footer": try { - let magic, usedBytes; - while ((({ magic, usedBytes } = parseMagic(this.#buffer.view, 0)), !magic)) { + let magic; + while (((magic = parseMagic(this.#reader)), !magic)) { yield; } - this.#buffer.consume(usedBytes); } catch (error) { throw errorWithLibrary((error as Error).message); } - if (this.#buffer.bytesRemaining() !== 0) { + if (this.#reader.bytesRemaining() !== 0) { throw errorWithLibrary( - `${this.#buffer.bytesRemaining()} bytes remaining after MCAP footer and trailing magic`, + `${this.#reader.bytesRemaining()} bytes remaining after MCAP footer and trailing magic`, ); } return record; diff --git a/typescript/core/src/McapWriter.test.ts b/typescript/core/src/McapWriter.test.ts index 57bdcd82fc..e16bc515d5 100644 --- a/typescript/core/src/McapWriter.test.ts +++ b/typescript/core/src/McapWriter.test.ts @@ -3,6 +3,7 @@ import { crc32 } from "@foxglove/crc"; import { McapIndexedReader } from "./McapIndexedReader"; import McapStreamReader from "./McapStreamReader"; import { McapWriter } from "./McapWriter"; +import Reader from "./Reader"; import { TempBuffer } from "./TempBuffer"; import { MCAP_MAGIC, Opcode } from "./constants"; import { parseMagic, parseRecord } from "./parse"; @@ -278,13 +279,12 @@ describe("McapWriter", () => { const array = tempBuffer.get(); const view = new DataView(array.buffer, array.byteOffset, array.byteLength); + const reader = new Reader(view); const records: TypedMcapRecord[] = []; - for ( - let offset = parseMagic(view, 0).usedBytes, result; - (result = parseRecord({ view, startOffset: offset, validateCrcs: true })), result.record; - offset += result.usedBytes - ) { - records.push(result.record); + parseMagic(reader); + let result; + while ((result = parseRecord(reader, true))) { + records.push(result); } const expectedChunkData = new Uint8Array([ diff --git a/typescript/core/src/Reader.ts b/typescript/core/src/Reader.ts index fcc2887237..d0136c648b 100644 --- a/typescript/core/src/Reader.ts +++ b/typescript/core/src/Reader.ts @@ -7,13 +7,27 @@ const textDecoder = new TextDecoder(); export default class Reader { #view: DataView; + #viewU8: Uint8Array; offset: number; constructor(view: DataView, offset = 0) { this.#view = view; + this.#viewU8 = new Uint8Array(view.buffer, view.byteOffset, view.byteLength); this.offset = offset; } + // Should be ~identical to the constructor, it allows us to reinitialize the reader when + // the view changes, without creating a new instance, avoiding allocation / GC overhead + reset(view: DataView, offset = 0): void { + this.#view = view; + this.#viewU8 = new Uint8Array(view.buffer, view.byteOffset, view.byteLength); + this.offset = offset; + } + + bytesRemaining(): number { + return this.#viewU8.length - this.offset; + } + uint8(): number { const value = this.#view.getUint8(this.offset); this.offset += 1; @@ -40,14 +54,12 @@ export default class Reader { string(): string { const length = this.uint32(); - if (this.offset + length > this.#view.byteLength) { + if (length === 0) { + return ""; + } else if (length > this.bytesRemaining()) { throw new Error(`String length ${length} exceeds bounds of buffer`); } - const value = textDecoder.decode( - new Uint8Array(this.#view.buffer, this.#view.byteOffset + this.offset, length), - ); - this.offset += length; - return value; + return textDecoder.decode(this.u8ArrayBorrow(length)); } keyValuePairs(readKey: (reader: Reader) => K, readValue: (reader: Reader) => V): [K, V][] { @@ -103,4 +115,18 @@ export default class Reader { } return result; } + + // Read a borrowed Uint8Array, useful temp references or borrow semantics + u8ArrayBorrow(length: number): Uint8Array { + const result = this.#viewU8.subarray(this.offset, this.offset + length); + this.offset += length; + return result; + } + + // Read a copied Uint8Array from the underlying buffer, use when you need to keep the data around + u8ArrayCopy(length: number): Uint8Array { + const result = this.#viewU8.slice(this.offset, this.offset + length); + this.offset += length; + return result; + } } diff --git a/typescript/core/src/StreamBuffer.test.ts b/typescript/core/src/StreamBuffer.test.ts deleted file mode 100644 index a45175b3e3..0000000000 --- a/typescript/core/src/StreamBuffer.test.ts +++ /dev/null @@ -1,47 +0,0 @@ -import StreamBuffer from "./StreamBuffer"; - -function toArray(view: DataView) { - return new Uint8Array(view.buffer, view.byteOffset, view.byteLength); -} - -describe("ByteStorage", () => { - it("handles basic append and consume", () => { - const buffer = new StreamBuffer(); - expect(buffer.bytesRemaining()).toBe(0); - - buffer.append(new Uint8Array([1, 2, 3])); - expect(buffer.bytesRemaining()).toBe(3); - expect(() => { - buffer.consume(4); - }).toThrow(); - - expect(toArray(buffer.view)).toEqual(new Uint8Array([1, 2, 3])); - buffer.consume(3); - expect(buffer.bytesRemaining()).toBe(0); - }); - - it("handles partial consume", () => { - const buffer = new StreamBuffer(); - - buffer.append(new Uint8Array([1, 2, 3, 4, 5])); - expect(buffer.bytesRemaining()).toBe(5); - buffer.consume(2); - expect(buffer.bytesRemaining()).toBe(3); - - expect(toArray(buffer.view)).toEqual(new Uint8Array([3, 4, 5])); - buffer.consume(3); - expect(buffer.bytesRemaining()).toBe(0); - }); - - it("reuses buffer within allocated capacity", () => { - const buffer = new StreamBuffer(5); - const rawBuffer = buffer.view.buffer; - buffer.append(new Uint8Array([1, 2])); - expect(buffer.view.buffer).toBe(rawBuffer); - buffer.append(new Uint8Array([3, 4, 5])); - expect(buffer.view.buffer).toBe(rawBuffer); - buffer.append(new Uint8Array([6, 7])); - expect(buffer.view.buffer).not.toBe(rawBuffer); - expect(toArray(buffer.view)).toEqual(new Uint8Array([1, 2, 3, 4, 5, 6, 7])); - }); -}); diff --git a/typescript/core/src/StreamBuffer.ts b/typescript/core/src/StreamBuffer.ts deleted file mode 100644 index 98eaa785d5..0000000000 --- a/typescript/core/src/StreamBuffer.ts +++ /dev/null @@ -1,58 +0,0 @@ -/** - * A growable buffer for use when processing a stream of data. - */ -export default class StreamBuffer { - #buffer: ArrayBuffer; - public view: DataView; - - constructor(initialCapacity = 0) { - this.#buffer = new ArrayBuffer(initialCapacity); - this.view = new DataView(this.#buffer, 0, 0); - } - - bytesRemaining(): number { - return this.view.byteLength; - } - - /** Mark some data as consumed, so the memory can be reused when new data is appended. */ - consume(count: number): void { - this.view = new DataView( - this.#buffer, - this.view.byteOffset + count, - this.view.byteLength - count, - ); - } - - /** Add data to the buffer, shifting existing data or reallocating if necessary. */ - append(data: Uint8Array): void { - if (this.view.byteOffset + this.view.byteLength + data.byteLength <= this.#buffer.byteLength) { - // Data fits by appending only - const array = new Uint8Array(this.view.buffer, this.view.byteOffset); - array.set(data, this.view.byteLength); - this.view = new DataView( - this.#buffer, - this.view.byteOffset, - this.view.byteLength + data.byteLength, - ); - } else if (this.view.byteLength + data.byteLength <= this.#buffer.byteLength) { - // Data fits in allocated buffer but requires moving existing data to start of buffer - const oldData = new Uint8Array(this.#buffer, this.view.byteOffset, this.view.byteLength); - const array = new Uint8Array(this.#buffer); - array.set(oldData, 0); - array.set(data, oldData.byteLength); - this.view = new DataView(this.#buffer, 0, this.view.byteLength + data.byteLength); - } else { - // New data doesn't fit, copy to a new buffer - - // Currently, the new buffer size may be smaller than the old size. For future optimizations, - // we could consider making the buffer size increase monotonically. - - const oldData = new Uint8Array(this.#buffer, this.view.byteOffset, this.view.byteLength); - this.#buffer = new ArrayBuffer((this.view.byteLength + data.byteLength) * 2); - const array = new Uint8Array(this.#buffer); - array.set(oldData, 0); - array.set(data, oldData.byteLength); - this.view = new DataView(this.#buffer, 0, this.view.byteLength + data.byteLength); - } - } -} diff --git a/typescript/core/src/parse.ts b/typescript/core/src/parse.ts index 1d9672fea8..95d0105750 100644 --- a/typescript/core/src/parse.ts +++ b/typescript/core/src/parse.ts @@ -1,373 +1,419 @@ import { crc32 } from "@foxglove/crc"; import Reader from "./Reader"; -import { isKnownOpcode, MCAP_MAGIC, Opcode } from "./constants"; +import { MCAP_MAGIC, Opcode } from "./constants"; import { McapMagic, TypedMcapRecord } from "./types"; /** * Parse a MCAP magic string at `startOffset` in `view`. */ -export function parseMagic( - view: DataView, - startOffset: number, -): { magic: McapMagic; usedBytes: number } | { magic?: undefined; usedBytes: 0 } { - if (startOffset + MCAP_MAGIC.length > view.byteLength) { - return { usedBytes: 0 }; +export function parseMagic(reader: Reader): McapMagic | undefined { + if (reader.bytesRemaining() < MCAP_MAGIC.length) { + return undefined; } - if (!MCAP_MAGIC.every((val, i) => val === view.getUint8(startOffset + i))) { + const magic = reader.u8ArrayBorrow(MCAP_MAGIC.length); + if (!MCAP_MAGIC.every((val, i) => val === magic[i])) { throw new Error( `Expected MCAP magic '${MCAP_MAGIC.map((val) => val.toString(16).padStart(2, "0")).join( " ", - )}', found '${Array.from(MCAP_MAGIC, (_, i) => - view - .getUint8(startOffset + i) - .toString(16) - .padStart(2, "0"), - ).join(" ")}'`, + )}', found '${Array.from(magic, (_, i) => magic[i]!.toString(16).padStart(2, "0")).join( + " ", + )}'`, ); } - return { - magic: { specVersion: "0" }, - usedBytes: MCAP_MAGIC.length, - }; + return { specVersion: "0" }; } /** - * Parse a MCAP record beginning at `startOffset` in `view`. + * Parse a MCAP record from the given reader */ -export function parseRecord({ - view, - startOffset, - validateCrcs, -}: { - view: DataView; - startOffset: number; - validateCrcs: boolean; -}): { record: TypedMcapRecord; usedBytes: number } | { record?: undefined; usedBytes: 0 } { - if (startOffset + /*opcode*/ 1 + /*record content length*/ 8 >= view.byteLength) { - return { usedBytes: 0 }; +// NOTE: internal function in the hot path, (de)structuring args would be wasteful, acceptable perf/clarity tradeoff +// eslint-disable-next-line @foxglove/no-boolean-parameters +export function parseRecord(reader: Reader, validateCrcs = false): TypedMcapRecord | undefined { + const RECORD_HEADER_SIZE = 1 /*opcode*/ + 8; /*record content length*/ + if (reader.bytesRemaining() < RECORD_HEADER_SIZE) { + return undefined; } - const headerReader = new Reader(view, startOffset); + const start = reader.offset; + const opcode = reader.uint8(); + const recordLength = reader.uint64(); - const opcode = headerReader.uint8(); - - const recordLength = headerReader.uint64(); if (recordLength > Number.MAX_SAFE_INTEGER) { throw new Error(`Record content length ${recordLength} is too large`); } + const recordLengthNum = Number(recordLength); - const recordEndOffset = headerReader.offset + recordLengthNum; - if (recordEndOffset > view.byteLength) { - return { usedBytes: 0 }; + + if (reader.bytesRemaining() < recordLengthNum) { + reader.offset = start; // Rewind to the start of the record + return undefined; } - if (!isKnownOpcode(opcode)) { - const record: TypedMcapRecord = { - type: "Unknown", - opcode, - data: new Uint8Array(view.buffer, view.byteOffset + headerReader.offset, recordLengthNum), - }; - return { record, usedBytes: recordEndOffset - startOffset }; + let result: TypedMcapRecord; + switch (opcode as Opcode) { + case Opcode.HEADER: + result = parseHeader(reader, recordLengthNum); + break; + case Opcode.FOOTER: + result = parseFooter(reader, recordLengthNum); + break; + case Opcode.SCHEMA: + result = parseSchema(reader, recordLengthNum); + break; + case Opcode.CHANNEL: + result = parseChannel(reader, recordLengthNum); + break; + case Opcode.MESSAGE: + result = parseMessage(reader, recordLengthNum); + break; + case Opcode.CHUNK: + result = parseChunk(reader, recordLengthNum); + break; + case Opcode.MESSAGE_INDEX: + result = parseMessageIndex(reader, recordLengthNum); + break; + case Opcode.CHUNK_INDEX: + result = parseChunkIndex(reader, recordLengthNum); + break; + case Opcode.ATTACHMENT: + result = parseAttachment(reader, recordLengthNum, validateCrcs); + break; + case Opcode.ATTACHMENT_INDEX: + result = parseAttachmentIndex(reader, recordLengthNum); + break; + case Opcode.STATISTICS: + result = parseStatistics(reader, recordLengthNum); + break; + case Opcode.METADATA: + result = parseMetadata(reader, recordLengthNum); + break; + case Opcode.METADATA_INDEX: + result = parseMetadataIndex(reader, recordLengthNum); + break; + case Opcode.SUMMARY_OFFSET: + result = parseSummaryOffset(reader, recordLengthNum); + break; + case Opcode.DATA_END: + result = parseDataEnd(reader, recordLengthNum); + break; + default: + result = parseUnknown(reader, recordLengthNum, opcode); + break; } - const recordView = new DataView( - view.buffer, - view.byteOffset + headerReader.offset, - recordLengthNum, + // NOTE: a bit redundant, but ensures we've advanced by the full record length + // TODO: simplify this when we explore monomorphic paths + reader.offset = start + RECORD_HEADER_SIZE + recordLengthNum; + + return result; +} + +function parseUnknown(reader: Reader, recordLength: number, opcode: number): TypedMcapRecord { + const data = reader.u8ArrayBorrow(recordLength); + return { + type: "Unknown", + opcode, + data, + }; +} + +function parseHeader(reader: Reader, recordLength: number): TypedMcapRecord { + const startOffset = reader.offset; + const profile = reader.string(); + const library = reader.string(); + reader.offset = startOffset + recordLength; + return { type: "Header", profile, library }; +} + +function parseFooter(reader: Reader, recordLength: number): TypedMcapRecord { + const startOffset = reader.offset; + const summaryStart = reader.uint64(); + const summaryOffsetStart = reader.uint64(); + const summaryCrc = reader.uint32(); + reader.offset = startOffset + recordLength; + return { + type: "Footer", + summaryStart, + summaryOffsetStart, + summaryCrc, + }; +} + +function parseSchema(reader: Reader, recordLength: number): TypedMcapRecord { + const start = reader.offset; + const id = reader.uint16(); + const name = reader.string(); + const encoding = reader.string(); + const dataLen = reader.uint32(); + const end = reader.offset; + if (recordLength - (end - start) < dataLen) { + throw new Error(`Schema data length ${dataLen} exceeds bounds of record`); + } + const data = reader.u8ArrayCopy(dataLen); + reader.offset = start + recordLength; + + return { + type: "Schema", + id, + encoding, + name, + data, + }; +} + +function parseChannel(reader: Reader, recordLength: number): TypedMcapRecord { + const startOffset = reader.offset; + const channelId = reader.uint16(); + const schemaId = reader.uint16(); + const topicName = reader.string(); + const messageEncoding = reader.string(); + const metadata = reader.map( + (r) => r.string(), + (r) => r.string(), ); - const reader = new Reader(recordView); - - switch (opcode) { - case Opcode.HEADER: { - const profile = reader.string(); - const library = reader.string(); - const record: TypedMcapRecord = { type: "Header", profile, library }; - return { record, usedBytes: recordEndOffset - startOffset }; - } + reader.offset = startOffset + recordLength; - case Opcode.FOOTER: { - const summaryStart = reader.uint64(); - const summaryOffsetStart = reader.uint64(); - const summaryCrc = reader.uint32(); - const record: TypedMcapRecord = { - type: "Footer", - summaryStart, - summaryOffsetStart, - summaryCrc, - }; - return { record, usedBytes: recordEndOffset - startOffset }; - } + return { + type: "Channel", + id: channelId, + schemaId, + topic: topicName, + messageEncoding, + metadata, + }; +} - case Opcode.SCHEMA: { - const id = reader.uint16(); - const name = reader.string(); - const encoding = reader.string(); - const dataLen = reader.uint32(); - if (reader.offset + dataLen > recordView.byteLength) { - throw new Error(`Schema data length ${dataLen} exceeds bounds of record`); - } - const data = new Uint8Array( - recordView.buffer.slice( - recordView.byteOffset + reader.offset, - recordView.byteOffset + reader.offset + dataLen, - ), - ); - reader.offset += dataLen; - - const record: TypedMcapRecord = { - type: "Schema", - id, - encoding, - name, - data, - }; - - return { record, usedBytes: recordEndOffset - startOffset }; - } +function parseMessage(reader: Reader, recordLength: number): TypedMcapRecord { + const MESSAGE_PREFIX_SIZE = 2 + 4 + 8 + 8; // channelId, sequence, logTime, publishTime + const channelId = reader.uint16(); + const sequence = reader.uint32(); + const logTime = reader.uint64(); + const publishTime = reader.uint64(); + const data = reader.u8ArrayCopy(recordLength - MESSAGE_PREFIX_SIZE); + return { + type: "Message", + channelId, + sequence, + logTime, + publishTime, + data, + }; +} - case Opcode.CHANNEL: { - const channelId = reader.uint16(); - const schemaId = reader.uint16(); - const topicName = reader.string(); - const messageEncoding = reader.string(); - const metadata = reader.map( - (r) => r.string(), - (r) => r.string(), - ); - - const record: TypedMcapRecord = { - type: "Channel", - id: channelId, - schemaId, - topic: topicName, - messageEncoding, - metadata, - }; - - return { record, usedBytes: recordEndOffset - startOffset }; - } +function parseChunk(reader: Reader, recordLength: number): TypedMcapRecord { + const start = reader.offset; + const startTime = reader.uint64(); + const endTime = reader.uint64(); + const uncompressedSize = reader.uint64(); + const uncompressedCrc = reader.uint32(); + const compression = reader.string(); + const recordsByteLength = Number(reader.uint64()); + const end = reader.offset; + const prefixSize = end - start; + if (recordsByteLength + prefixSize > recordLength) { + throw new Error("Chunk records length exceeds remaining record size"); + } + const records = reader.u8ArrayCopy(recordsByteLength); + reader.offset = start + recordLength; + return { + type: "Chunk", + messageStartTime: startTime, + messageEndTime: endTime, + compression, + uncompressedSize, + uncompressedCrc, + records, + }; +} - case Opcode.MESSAGE: { - const channelId = reader.uint16(); - const sequence = reader.uint32(); - const logTime = reader.uint64(); - const publishTime = reader.uint64(); - const data = new Uint8Array( - recordView.buffer.slice( - recordView.byteOffset + reader.offset, - recordView.byteOffset + recordView.byteLength, - ), - ); - const record: TypedMcapRecord = { - type: "Message", - channelId, - sequence, - logTime, - publishTime, - data, - }; - return { record, usedBytes: recordEndOffset - startOffset }; - } +function parseMessageIndex(reader: Reader, recordLength: number): TypedMcapRecord { + const startOffset = reader.offset; + const channelId = reader.uint16(); + const records = reader.keyValuePairs( + (r) => r.uint64(), + (r) => r.uint64(), + ); + reader.offset = startOffset + recordLength; + return { + type: "MessageIndex", + channelId, + records, + }; +} - case Opcode.CHUNK: { - const startTime = reader.uint64(); - const endTime = reader.uint64(); - const uncompressedSize = reader.uint64(); - const uncompressedCrc = reader.uint32(); - const compression = reader.string(); - const recordByteLength = Number(reader.uint64()); - if (recordByteLength + reader.offset > recordView.byteLength) { - throw new Error("Chunk records length exceeds remaining record size"); - } - const records = new Uint8Array( - recordView.buffer.slice( - recordView.byteOffset + reader.offset, - recordView.byteOffset + reader.offset + recordByteLength, - ), - ); - const record: TypedMcapRecord = { - type: "Chunk", - messageStartTime: startTime, - messageEndTime: endTime, - compression, - uncompressedSize, - uncompressedCrc, - records, - }; - return { record, usedBytes: recordEndOffset - startOffset }; - } +function parseChunkIndex(reader: Reader, recordLength: number): TypedMcapRecord { + const startOffset = reader.offset; + const messageStartTime = reader.uint64(); + const messageEndTime = reader.uint64(); + const chunkStartOffset = reader.uint64(); + const chunkLength = reader.uint64(); + const messageIndexOffsets = reader.map( + (r) => r.uint16(), + (r) => r.uint64(), + ); + const messageIndexLength = reader.uint64(); + const compression = reader.string(); + const compressedSize = reader.uint64(); + const uncompressedSize = reader.uint64(); + reader.offset = startOffset + recordLength; + return { + type: "ChunkIndex", + messageStartTime, + messageEndTime, + chunkStartOffset, + chunkLength, + messageIndexOffsets, + messageIndexLength, + compression, + compressedSize, + uncompressedSize, + }; +} - case Opcode.MESSAGE_INDEX: { - const channelId = reader.uint16(); - const records = reader.keyValuePairs( - (r) => r.uint64(), - (r) => r.uint64(), - ); - const record: TypedMcapRecord = { - type: "MessageIndex", - channelId, - records, - }; - return { record, usedBytes: recordEndOffset - startOffset }; - } - case Opcode.CHUNK_INDEX: { - const messageStartTime = reader.uint64(); - const messageEndTime = reader.uint64(); - const chunkStartOffset = reader.uint64(); - const chunkLength = reader.uint64(); - const messageIndexOffsets = reader.map( - (r) => r.uint16(), - (r) => r.uint64(), - ); - const messageIndexLength = reader.uint64(); - const compression = reader.string(); - const compressedSize = reader.uint64(); - const uncompressedSize = reader.uint64(); - const record: TypedMcapRecord = { - type: "ChunkIndex", - messageStartTime, - messageEndTime, - chunkStartOffset, - chunkLength, - messageIndexOffsets, - messageIndexLength, - compression, - compressedSize, - uncompressedSize, - }; - return { record, usedBytes: recordEndOffset - startOffset }; - } - case Opcode.ATTACHMENT: { - const logTime = reader.uint64(); - const createTime = reader.uint64(); - const name = reader.string(); - const mediaType = reader.string(); - const dataLen = reader.uint64(); - if (BigInt(recordView.byteOffset + reader.offset) + dataLen > Number.MAX_SAFE_INTEGER) { - throw new Error(`Attachment too large: ${dataLen}`); - } - if (reader.offset + Number(dataLen) + 4 /*crc*/ > recordView.byteLength) { - throw new Error(`Attachment data length ${dataLen} exceeds bounds of record`); - } - const data = new Uint8Array( - recordView.buffer.slice( - recordView.byteOffset + reader.offset, - recordView.byteOffset + reader.offset + Number(dataLen), - ), - ); - reader.offset += Number(dataLen); - const crcLength = reader.offset; - const expectedCrc = reader.uint32(); - if (validateCrcs && expectedCrc !== 0) { - const actualCrc = crc32(new DataView(recordView.buffer, recordView.byteOffset, crcLength)); - if (actualCrc !== expectedCrc) { - throw new Error( - `Attachment CRC32 mismatch: expected ${expectedCrc}, actual ${actualCrc}`, - ); - } - } - - const record: TypedMcapRecord = { - type: "Attachment", - logTime, - createTime, - name, - mediaType, - data, - }; - return { record, usedBytes: recordEndOffset - startOffset }; - } - case Opcode.ATTACHMENT_INDEX: { - const offset = reader.uint64(); - const length = reader.uint64(); - const logTime = reader.uint64(); - const createTime = reader.uint64(); - const dataSize = reader.uint64(); - const name = reader.string(); - const mediaType = reader.string(); - - const record: TypedMcapRecord = { - type: "AttachmentIndex", - offset, - length, - logTime, - createTime, - dataSize, - name, - mediaType, - }; - return { record, usedBytes: recordEndOffset - startOffset }; - } - case Opcode.STATISTICS: { - const messageCount = reader.uint64(); - const schemaCount = reader.uint16(); - const channelCount = reader.uint32(); - const attachmentCount = reader.uint32(); - const metadataCount = reader.uint32(); - const chunkCount = reader.uint32(); - const messageStartTime = reader.uint64(); - const messageEndTime = reader.uint64(); - const channelMessageCounts = reader.map( - (r) => r.uint16(), - (r) => r.uint64(), - ); - - const record: TypedMcapRecord = { - type: "Statistics", - messageCount, - schemaCount, - channelCount, - attachmentCount, - metadataCount, - chunkCount, - messageStartTime, - messageEndTime, - channelMessageCounts, - }; - return { record, usedBytes: recordEndOffset - startOffset }; - } - case Opcode.METADATA: { - const name = reader.string(); - const metadata = reader.map( - (r) => r.string(), - (r) => r.string(), - ); - const record: TypedMcapRecord = { type: "Metadata", metadata, name }; - return { record, usedBytes: recordEndOffset - startOffset }; - } - case Opcode.METADATA_INDEX: { - const offset = reader.uint64(); - const length = reader.uint64(); - const name = reader.string(); - - const record: TypedMcapRecord = { - type: "MetadataIndex", - offset, - length, - name, - }; - return { record, usedBytes: recordEndOffset - startOffset }; - } - case Opcode.SUMMARY_OFFSET: { - const groupOpcode = reader.uint8(); - const groupStart = reader.uint64(); - const groupLength = reader.uint64(); - - const record: TypedMcapRecord = { - type: "SummaryOffset", - groupOpcode, - groupStart, - groupLength, - }; - return { record, usedBytes: recordEndOffset - startOffset }; - } - case Opcode.DATA_END: { - const dataSectionCrc = reader.uint32(); - const record: TypedMcapRecord = { - type: "DataEnd", - dataSectionCrc, - }; - return { record, usedBytes: recordEndOffset - startOffset }; +function parseAttachment( + reader: Reader, + recordLength: number, + // NOTE: internal function in the hot path, (de)structuring args would be wasteful, acceptable perf/clarity tradeoff + // eslint-disable-next-line @foxglove/no-boolean-parameters + validateCrcs: boolean, +): TypedMcapRecord { + const startOffset = reader.offset; + const logTime = reader.uint64(); + const createTime = reader.uint64(); + const name = reader.string(); + const mediaType = reader.string(); + const dataLen = reader.uint64(); + // NOTE: probably not necessary, but just in case + if (BigInt(reader.offset) + dataLen > Number.MAX_SAFE_INTEGER) { + throw new Error(`Attachment too large: ${dataLen}`); + } + if (reader.offset + Number(dataLen) + 4 /*crc*/ > startOffset + recordLength) { + throw new Error(`Attachment data length ${dataLen} exceeds bounds of record`); + } + const data = reader.u8ArrayCopy(Number(dataLen)); + const crcLength = reader.offset - startOffset; + const expectedCrc = reader.uint32(); + if (validateCrcs && expectedCrc !== 0) { + reader.offset = startOffset; + const fullData = reader.u8ArrayBorrow(crcLength); + const actualCrc = crc32(fullData); + reader.offset = startOffset + crcLength + 4; + if (actualCrc !== expectedCrc) { + throw new Error(`Attachment CRC32 mismatch: expected ${expectedCrc}, actual ${actualCrc}`); } } + reader.offset = startOffset + recordLength; + + return { + type: "Attachment", + logTime, + createTime, + name, + mediaType, + data, + }; +} + +function parseAttachmentIndex(reader: Reader, recordLength: number): TypedMcapRecord { + const startOffset = reader.offset; + const offset = reader.uint64(); + const length = reader.uint64(); + const logTime = reader.uint64(); + const createTime = reader.uint64(); + const dataSize = reader.uint64(); + const name = reader.string(); + const mediaType = reader.string(); + reader.offset = startOffset + recordLength; + + return { + type: "AttachmentIndex", + offset, + length, + logTime, + createTime, + dataSize, + name, + mediaType, + }; +} + +function parseStatistics(reader: Reader, recordLength: number): TypedMcapRecord { + const startOffset = reader.offset; + const messageCount = reader.uint64(); + const schemaCount = reader.uint16(); + const channelCount = reader.uint32(); + const attachmentCount = reader.uint32(); + const metadataCount = reader.uint32(); + const chunkCount = reader.uint32(); + const messageStartTime = reader.uint64(); + const messageEndTime = reader.uint64(); + const channelMessageCounts = reader.map( + (r) => r.uint16(), + (r) => r.uint64(), + ); + reader.offset = startOffset + recordLength; + + return { + type: "Statistics", + messageCount, + schemaCount, + channelCount, + attachmentCount, + metadataCount, + chunkCount, + messageStartTime, + messageEndTime, + channelMessageCounts, + }; +} + +function parseMetadata(reader: Reader, recordLength: number): TypedMcapRecord { + const startOffset = reader.offset; + const name = reader.string(); + const metadata = reader.map( + (r) => r.string(), + (r) => r.string(), + ); + reader.offset = startOffset + recordLength; + return { type: "Metadata", metadata, name }; +} + +function parseMetadataIndex(reader: Reader, recordLength: number): TypedMcapRecord { + const startOffset = reader.offset; + const offset = reader.uint64(); + const length = reader.uint64(); + const name = reader.string(); + reader.offset = startOffset + recordLength; + + return { + type: "MetadataIndex", + offset, + length, + name, + }; +} + +function parseSummaryOffset(reader: Reader, recordLength: number): TypedMcapRecord { + const startOffset = reader.offset; + const groupOpcode = reader.uint8(); + const groupStart = reader.uint64(); + const groupLength = reader.uint64(); + reader.offset = startOffset + recordLength; + + return { + type: "SummaryOffset", + groupOpcode, + groupStart, + groupLength, + }; +} + +function parseDataEnd(reader: Reader, recordLength: number): TypedMcapRecord { + const startOffset = reader.offset; + const dataSectionCrc = reader.uint32(); + reader.offset = startOffset + recordLength; + return { + type: "DataEnd", + dataSectionCrc, + }; } diff --git a/typescript/examples/validate/scripts/validate.ts b/typescript/examples/validate/scripts/validate.ts index 150186f794..457bec0fcc 100644 --- a/typescript/examples/validate/scripts/validate.ts +++ b/typescript/examples/validate/scripts/validate.ts @@ -139,7 +139,7 @@ async function validate( throw new Error(`Missing schema ${record.schemaId} for channel ${record.id}`); } let messageDeserializer: (data: ArrayBufferView) => unknown; - if (record.messageEncoding === "ros1") { + if (schema.encoding === "ros1msg" && record.messageEncoding === "ros1") { const reader = new ROS1LazyMessageReader( parseMessageDefinition(new TextDecoder().decode(schema.data)), ); @@ -150,14 +150,14 @@ async function validate( } return reader.readMessage(data).toJSON(); }; - } else if (record.messageEncoding === "ros2") { + } else if (schema.encoding === "ros2msg" && record.messageEncoding === "cdr") { const reader = new ROS2MessageReader( parseMessageDefinition(new TextDecoder().decode(schema.data), { ros2: true, }), ); messageDeserializer = (data) => reader.readMessage(data); - } else if (record.messageEncoding === "protobuf") { + } else if (schema.encoding === "protobuf" && record.messageEncoding === "protobuf") { const root = protobufjs.Root.fromDescriptor(FileDescriptorSet.decode(schema.data)); const type = root.lookupType(schema.name); @@ -167,7 +167,9 @@ async function validate( const textDecoder = new TextDecoder(); messageDeserializer = (data) => JSON.parse(textDecoder.decode(data)); } else { - throw new Error(`unsupported encoding ${record.messageEncoding}`); + throw new Error( + `unsupported message encoding ${record.messageEncoding} with schema encoding ${schema.encoding}`, + ); } channelInfoById.set(record.id, { info: record, messageDeserializer }); break; diff --git a/typescript/nodejs/README.md b/typescript/nodejs/README.md index cf4b87998d..fe3e135290 100644 --- a/typescript/nodejs/README.md +++ b/typescript/nodejs/README.md @@ -47,4 +47,4 @@ const writer = new McapWriter({ ## Stay in touch -Join our [Slack channel](https://foxglove.dev/slack) to ask questions, share feedback, and stay up to date on what our team is working on. +Join our [Discord community](https://foxglove.dev/chat) to ask questions, share feedback, and stay up to date on what our team is working on. diff --git a/typescript/support/README.md b/typescript/support/README.md index 73491b3c73..4bd7261041 100644 --- a/typescript/support/README.md +++ b/typescript/support/README.md @@ -46,4 +46,4 @@ const reader = await McapIndexedReader.Initialize({ ## Stay in touch -Join our [Slack channel](https://foxglove.dev/slack) to ask questions, share feedback, and stay up to date on what our team is working on. +Join our [Discord community](https://foxglove.dev/chat) to ask questions, share feedback, and stay up to date on what our team is working on. diff --git a/website/docs/guides/cli.md b/website/docs/guides/cli.md new file mode 100644 index 0000000000..feac70e4d2 --- /dev/null +++ b/website/docs/guides/cli.md @@ -0,0 +1,188 @@ +--- +sidebar_position: 3 +--- + +# CLI + +The MCAP command line tool is useful for working with MCAP files. + +## Installation + +### Release binaries + +Download binaries for your platform from [the latest Github release](https://github.com/foxglove/mcap/releases/latest). + +Then, mark it executable: + + $ chmod +x mcap + +If required, move the binary onto your path. + +### Homebrew + +To install using [Homebrew](https://brew.sh) on macOS or Linux, run: + + $ brew install mcap + +### From Source + +:::caution +Installing via `go install` is not supported. To build from source you must clone the repository. +::: + +1. Clone the [mcap repository](https://github.com/foxglove/mcap). +2. `$ cd go/cli/mcap` +3. `$ make build` +4. The binary will be built into the a newly created `bin` folder. + +## Usage + +Run `mcap --help` for detailed usage information. + + $ mcap --help + + Usage: + mcap [command] + + Available Commands: + add Add records to an existing MCAP file + cat Cat the messages in an MCAP file to stdout + completion Generate the autocompletion script for the specified shell + compress Create a compressed copy of an MCAP file + convert Convert a bag file to an MCAP file + decompress Create an uncompressed copy of an MCAP file + doctor Check an MCAP file structure + filter Copy some filtered MCAP data to a new file + get Get a record from an MCAP file + help Help about any command + info Report statistics about an MCAP file + list List records of an MCAP file + merge Merge a selection of MCAP files by record timestamp + recover Recover data from a potentially corrupt MCAP file + version Output version information + + Flags: + --config string Config file (default is $HOME/.mcap.yaml) + -h, --help help for mcap + -v, --verbose Verbose output + + Use "mcap [command] --help" for more information about a command. + +### ROS Bag to MCAP conversion + +Convert a ROS 1 bag file to mcap: + + + + $ mcap convert demo.bag demo.mcap + + + +Convert a ROS 2 db3 file to mcap: + + + + $ mcap convert demo.db3 demo.mcap + + + +In ROS 2 releases prior to Iron, db3 files did not contain message definitions (schemas). When converting to MCAP, you should first source the same ROS 2 workspace that the original file was recorded with. If this is not available, you will need to specify a search directory for message definitions (e.g `/opt/ros/humble` from the original system): + + $ mcap convert demo.db3 demo.mcap --ament-prefix-path /path/to/humble + +Alternatively, the [`ros2 bag convert`](https://github.com/ros2/rosbag2#converting-bags) utility may be used to convert between db3 and mcap. + +### File summarization + +Report summary statistics on an MCAP file: + + + + $ mcap info demo.mcap + library: mcap go #(devel) + profile: ros1 + messages: 1606 + duration: 7.780758504s + start: 2017-03-21T19:26:20.103843113-07:00 (1490149580.103843113) + end: 2017-03-21T19:26:27.884601617-07:00 (1490149587.884601617) + compression: + zstd: [14/14 chunks] (50.73%) + channels: + (0) /diagnostics 52 msgs (6.68 Hz) : diagnostic_msgs/DiagnosticArray [ros1msg] + (1) /image_color/compressed 234 msgs (30.07 Hz) : sensor_msgs/CompressedImage [ros1msg] + (2) /tf 774 msgs (99.48 Hz) : tf2_msgs/TFMessage [ros1msg] + (3) /radar/points 156 msgs (20.05 Hz) : sensor_msgs/PointCloud2 [ros1msg] + (4) /radar/range 156 msgs (20.05 Hz) : sensor_msgs/Range [ros1msg] + (5) /radar/tracks 156 msgs (20.05 Hz) : radar_driver/RadarTracks [ros1msg] + (6) /velodyne_points 78 msgs (10.02 Hz) : sensor_msgs/PointCloud2 [ros1msg] + attachments: 0 + + + +### Indexed reading + +Echo messages for a specific topic to stdout as JSON: + + $ mcap cat demo.mcap --topics /tf --json | head -n 10 + {"topic":"/tf","sequence":2,"log_time":1490149580.103843113,"publish_time":1490149580.103843113,"data":{"transforms":[{"header":{"seq":0,"stamp":1490149580.117017840,"frame_id":"base_link"},"child_frame_id":"radar","transform":{"translation":{"x":3.835,"y":0,"z":0},"rotation":{"x":0,"y":0,"z":0,"w":1}}}]}} + {"topic":"/tf","sequence":3,"log_time":1490149580.113944947,"publish_time":1490149580.113944947,"data":{"transforms":[{"header":{"seq":0,"stamp":1490149580.127078895,"frame_id":"base_link"},"child_frame_id":"radar","transform":{"translation":{"x":3.835,"y":0,"z":0},"rotation":{"x":0,"y":0,"z":0,"w":1}}}]}} + {"topic":"/tf","sequence":8,"log_time":1490149580.124028613,"publish_time":1490149580.124028613,"data":{"transforms":[{"header":{"seq":0,"stamp":1490149580.137141823,"frame_id":"base_link"},"child_frame_id":"radar","transform":{"translation":{"x":3.835,"y":0,"z":0},"rotation":{"x":0,"y":0,"z":0,"w":1}}}]}} + {"topic":"/tf","sequence":10,"log_time":1490149580.134219155,"publish_time":1490149580.134219155,"data":{"transforms":[{"header":{"seq":0,"stamp":1490149580.147199242,"frame_id":"base_link"},"child_frame_id":"radar","transform":{"translation":{"x":3.835,"y":0,"z":0},"rotation":{"x":0,"y":0,"z":0,"w":1}}}]}} + {"topic":"/tf","sequence":11,"log_time":1490149580.144292780,"publish_time":1490149580.144292780,"data":{"transforms":[{"header":{"seq":0,"stamp":1490149580.157286100,"frame_id":"base_link"},"child_frame_id":"radar","transform":{"translation":{"x":3.835,"y":0,"z":0},"rotation":{"x":0,"y":0,"z":0,"w":1}}}]}} + {"topic":"/tf","sequence":12,"log_time":1490149580.154895238,"publish_time":1490149580.154895238,"data":{"transforms":[{"header":{"seq":0,"stamp":1490149580.167376974,"frame_id":"base_link"},"child_frame_id":"radar","transform":{"translation":{"x":3.835,"y":0,"z":0},"rotation":{"x":0,"y":0,"z":0,"w":1}}}]}} + {"topic":"/tf","sequence":15,"log_time":1490149580.165152280,"publish_time":1490149580.165152280,"data":{"transforms":[{"header":{"seq":0,"stamp":1490149580.177463023,"frame_id":"base_link"},"child_frame_id":"radar","transform":{"translation":{"x":3.835,"y":0,"z":0},"rotation":{"x":0,"y":0,"z":0,"w":1}}}]}} + {"topic":"/tf","sequence":20,"log_time":1490149580.175192697,"publish_time":1490149580.175192697,"data":{"transforms":[{"header":{"seq":0,"stamp":1490149580.187523449,"frame_id":"base_link"},"child_frame_id":"radar","transform":{"translation":{"x":3.835,"y":0,"z":0},"rotation":{"x":0,"y":0,"z":0,"w":1}}}]}} + {"topic":"/tf","sequence":21,"log_time":1490149580.185428613,"publish_time":1490149580.185428613,"data":{"transforms":[{"header":{"seq":0,"stamp":1490149580.197612248,"frame_id":"base_link"},"child_frame_id":"radar","transform":{"translation":{"x":3.835,"y":0,"z":0},"rotation":{"x":0,"y":0,"z":0,"w":1}}}]}} + {"topic":"/tf","sequence":22,"log_time":1490149580.196638030,"publish_time":1490149580.196638030,"data":{"transforms":[{"header":{"seq":0,"stamp":1490149580.207699065,"frame_id":"base_link"},"child_frame_id":"radar","transform":{"translation":{"x":3.835,"y":0,"z":0},"rotation":{"x":0,"y":0,"z":0,"w":1}}}]}} + +### Remote file support + +All commands except `convert` support reading from remote files stored in GCS: + + + + $ mcap info gs://your-remote-bucket/demo.mcap + library: mcap go #(devel) + profile: ros1 + messages: 1606 + duration: 7.780758504s + start: 2017-03-21T19:26:20.103843113-07:00 (1490149580.103843113) + end: 2017-03-21T19:26:27.884601617-07:00 (1490149587.884601617) + compression: + zstd: [14/14 chunks] (50.73%) + channels: + (0) /diagnostics 52 msgs (6.68 Hz) : diagnostic_msgs/DiagnosticArray [ros1msg] + (1) /image_color/compressed 234 msgs (30.07 Hz) : sensor_msgs/CompressedImage [ros1msg] + (2) /tf 774 msgs (99.48 Hz) : tf2_msgs/TFMessage [ros1msg] + (3) /radar/points 156 msgs (20.05 Hz) : sensor_msgs/PointCloud2 [ros1msg] + (4) /radar/range 156 msgs (20.05 Hz) : sensor_msgs/Range [ros1msg] + (5) /radar/tracks 156 msgs (20.05 Hz) : radar_driver/RadarTracks [ros1msg] + (6) /velodyne_points 78 msgs (10.02 Hz) : sensor_msgs/PointCloud2 [ros1msg] + attachments: 0 + + + +Remote reads will use the index at the end of the file to minimize latency and data transfer. + +### File Diagnostics + +#### List chunks in a file + +The `mcap list` command can be used with chunks or attachments: + + $ mcap list chunks ~/data/mcap/demo.mcap + offset length start end compression compressed size uncompressed size compression ratio + 43 4529455 1490149580103843113 1490149580608392239 zstd 4529402 9400437 0.481829 + 4531299 4751426 1490149580618484655 1490149581212757989 zstd 4751373 9621973 0.493804 + 9284910 4726518 1490149581222848447 1490149581811286531 zstd 4726465 9617327 0.491453 + 14013453 4734289 1490149581821378989 1490149582418243031 zstd 4734236 9624850 0.491876 + 18749879 4742989 1490149582428402906 1490149583010292990 zstd 4742936 9646234 0.491688 + 23494877 4712785 1490149583020377156 1490149583617657323 zstd 4712732 9619341 0.489923 + 28209799 4662983 1490149583627720990 1490149584217852199 zstd 4662930 9533042 0.489133 + 32874919 4643191 1490149584227924615 1490149584813214116 zstd 4643138 9499481 0.488778 + 37520119 4726655 1490149584823300282 1490149585411567366 zstd 4726602 9591399 0.492796 + 42248895 4748884 1490149585421596866 1490149586021460449 zstd 4748831 9621776 0.493550 + 46999820 4746828 1490149586031607908 1490149586617282658 zstd 4746775 9632302 0.492798 + 51748769 4759213 1490149586627453408 1490149587217501700 zstd 4759160 9634744 0.493958 + 56510103 4750731 1490149587227624742 1490149587814043200 zstd 4750678 9622778 0.493691 + 61262859 217330 1490149587824113700 1490149587884601617 zstd 217277 217255 1.000101 diff --git a/website/docs/guides/cli.mdx b/website/docs/guides/cli.mdx deleted file mode 100644 index 974c1d0759..0000000000 --- a/website/docs/guides/cli.mdx +++ /dev/null @@ -1,216 +0,0 @@ ---- -sidebar_position: 3 ---- - -import CLIDownloadScript from "../../src/components/CLIDownloadScript"; - -# CLI - -The MCAP command line tool is useful for working with MCAP files. - -## Installation - -### Release binaries - -Use the asset links on https://github.com/foxglove/mcap/releases to download the latest binary for your platform: - - - -Then, mark it executable: - -```bash -$ chmod +x mcap -``` - -If required, move the binary onto your path. - -### Homebrew - -To install using [Homebrew](https://brew.sh) on macOS or Linux, run: - -```bash -$ brew install mcap -``` - -### From Source - -:::caution -Installing via `go install` is not supported. To build from source you must clone the repository. -::: - -1. Clone the [mcap repository](https://github.com/foxglove/mcap). -2. `$ cd go/cli/mcap` -3. `$ make build` -4. The binary will be built into the a newly created `bin` folder. - -## Usage - -Run `mcap --help` for detailed usage information. - -``` -$ mcap --help - -Usage: -mcap [command] - -Available Commands: -add Add records to an existing MCAP file -cat Cat the messages in an MCAP file to stdout -completion Generate the autocompletion script for the specified shell -compress Create a compressed copy of an MCAP file -convert Convert a bag file to an MCAP file -decompress Create an uncompressed copy of an MCAP file -doctor Check an MCAP file structure -filter Copy some filtered MCAP data to a new file -get Get a record from an MCAP file -help Help about any command -info Report statistics about an MCAP file -list List records of an MCAP file -merge Merge a selection of MCAP files by record timestamp -recover Recover data from a potentially corrupt MCAP file -version Output version information - -Flags: - --config string Config file (default is $HOME/.mcap.yaml) --h, --help help for mcap --v, --verbose Verbose output - -Use "mcap [command] --help" for more information about a command. -``` - -### ROS Bag to MCAP conversion - -Convert a ROS 1 bag file to mcap: - -{/* cspell: disable */} - -```bash -$ mcap convert demo.bag demo.mcap -``` - -{/* cspell: enable */} - -Convert a ROS 2 db3 file to mcap: - -{/* cspell: disable */} - -```bash -$ mcap convert demo.db3 demo.mcap -``` - -{/* cspell: enable */} - -In ROS 2 releases prior to Iron, db3 files did not contain message definitions (schemas). When converting to MCAP, you should first source the same ROS 2 workspace that the original file was recorded with. If this is not available, you will need to specify a search directory for message definitions (e.g `/opt/ros/humble` from the original system): - -```bash -$ mcap convert demo.db3 demo.mcap --ament-prefix-path /path/to/humble -``` - -Alternatively, the [`ros2 bag convert`](https://github.com/ros2/rosbag2#converting-bags) utility may be used to convert between db3 and mcap. - -### File summarization - -Report summary statistics on an MCAP file: - -{/* cspell: disable */} - -``` -$ mcap info demo.mcap -library: mcap go #(devel) -profile: ros1 -messages: 1606 -duration: 7.780758504s -start: 2017-03-21T19:26:20.103843113-07:00 (1490149580.103843113) -end: 2017-03-21T19:26:27.884601617-07:00 (1490149587.884601617) -compression: - zstd: [14/14 chunks] (50.73%) -channels: - (0) /diagnostics 52 msgs (6.68 Hz) : diagnostic_msgs/DiagnosticArray [ros1msg] - (1) /image_color/compressed 234 msgs (30.07 Hz) : sensor_msgs/CompressedImage [ros1msg] - (2) /tf 774 msgs (99.48 Hz) : tf2_msgs/TFMessage [ros1msg] - (3) /radar/points 156 msgs (20.05 Hz) : sensor_msgs/PointCloud2 [ros1msg] - (4) /radar/range 156 msgs (20.05 Hz) : sensor_msgs/Range [ros1msg] - (5) /radar/tracks 156 msgs (20.05 Hz) : radar_driver/RadarTracks [ros1msg] - (6) /velodyne_points 78 msgs (10.02 Hz) : sensor_msgs/PointCloud2 [ros1msg] -attachments: 0 -``` - -{/* cspell: enable */} - -### Indexed reading - -Echo messages for a specific topic to stdout as JSON: - -``` -$ mcap cat demo.mcap --topics /tf --json | head -n 10 -{"topic":"/tf","sequence":2,"log_time":1490149580.103843113,"publish_time":1490149580.103843113,"data":{"transforms":[{"header":{"seq":0,"stamp":1490149580.117017840,"frame_id":"base_link"},"child_frame_id":"radar","transform":{"translation":{"x":3.835,"y":0,"z":0},"rotation":{"x":0,"y":0,"z":0,"w":1}}}]}} -{"topic":"/tf","sequence":3,"log_time":1490149580.113944947,"publish_time":1490149580.113944947,"data":{"transforms":[{"header":{"seq":0,"stamp":1490149580.127078895,"frame_id":"base_link"},"child_frame_id":"radar","transform":{"translation":{"x":3.835,"y":0,"z":0},"rotation":{"x":0,"y":0,"z":0,"w":1}}}]}} -{"topic":"/tf","sequence":8,"log_time":1490149580.124028613,"publish_time":1490149580.124028613,"data":{"transforms":[{"header":{"seq":0,"stamp":1490149580.137141823,"frame_id":"base_link"},"child_frame_id":"radar","transform":{"translation":{"x":3.835,"y":0,"z":0},"rotation":{"x":0,"y":0,"z":0,"w":1}}}]}} -{"topic":"/tf","sequence":10,"log_time":1490149580.134219155,"publish_time":1490149580.134219155,"data":{"transforms":[{"header":{"seq":0,"stamp":1490149580.147199242,"frame_id":"base_link"},"child_frame_id":"radar","transform":{"translation":{"x":3.835,"y":0,"z":0},"rotation":{"x":0,"y":0,"z":0,"w":1}}}]}} -{"topic":"/tf","sequence":11,"log_time":1490149580.144292780,"publish_time":1490149580.144292780,"data":{"transforms":[{"header":{"seq":0,"stamp":1490149580.157286100,"frame_id":"base_link"},"child_frame_id":"radar","transform":{"translation":{"x":3.835,"y":0,"z":0},"rotation":{"x":0,"y":0,"z":0,"w":1}}}]}} -{"topic":"/tf","sequence":12,"log_time":1490149580.154895238,"publish_time":1490149580.154895238,"data":{"transforms":[{"header":{"seq":0,"stamp":1490149580.167376974,"frame_id":"base_link"},"child_frame_id":"radar","transform":{"translation":{"x":3.835,"y":0,"z":0},"rotation":{"x":0,"y":0,"z":0,"w":1}}}]}} -{"topic":"/tf","sequence":15,"log_time":1490149580.165152280,"publish_time":1490149580.165152280,"data":{"transforms":[{"header":{"seq":0,"stamp":1490149580.177463023,"frame_id":"base_link"},"child_frame_id":"radar","transform":{"translation":{"x":3.835,"y":0,"z":0},"rotation":{"x":0,"y":0,"z":0,"w":1}}}]}} -{"topic":"/tf","sequence":20,"log_time":1490149580.175192697,"publish_time":1490149580.175192697,"data":{"transforms":[{"header":{"seq":0,"stamp":1490149580.187523449,"frame_id":"base_link"},"child_frame_id":"radar","transform":{"translation":{"x":3.835,"y":0,"z":0},"rotation":{"x":0,"y":0,"z":0,"w":1}}}]}} -{"topic":"/tf","sequence":21,"log_time":1490149580.185428613,"publish_time":1490149580.185428613,"data":{"transforms":[{"header":{"seq":0,"stamp":1490149580.197612248,"frame_id":"base_link"},"child_frame_id":"radar","transform":{"translation":{"x":3.835,"y":0,"z":0},"rotation":{"x":0,"y":0,"z":0,"w":1}}}]}} -{"topic":"/tf","sequence":22,"log_time":1490149580.196638030,"publish_time":1490149580.196638030,"data":{"transforms":[{"header":{"seq":0,"stamp":1490149580.207699065,"frame_id":"base_link"},"child_frame_id":"radar","transform":{"translation":{"x":3.835,"y":0,"z":0},"rotation":{"x":0,"y":0,"z":0,"w":1}}}]}} -``` - -### Remote file support - -All commands except `convert` support reading from remote files stored in GCS: - -{/* cspell: disable */} - -``` -$ mcap info gs://your-remote-bucket/demo.mcap -library: mcap go #(devel) -profile: ros1 -messages: 1606 -duration: 7.780758504s -start: 2017-03-21T19:26:20.103843113-07:00 (1490149580.103843113) -end: 2017-03-21T19:26:27.884601617-07:00 (1490149587.884601617) -compression: - zstd: [14/14 chunks] (50.73%) -channels: - (0) /diagnostics 52 msgs (6.68 Hz) : diagnostic_msgs/DiagnosticArray [ros1msg] - (1) /image_color/compressed 234 msgs (30.07 Hz) : sensor_msgs/CompressedImage [ros1msg] - (2) /tf 774 msgs (99.48 Hz) : tf2_msgs/TFMessage [ros1msg] - (3) /radar/points 156 msgs (20.05 Hz) : sensor_msgs/PointCloud2 [ros1msg] - (4) /radar/range 156 msgs (20.05 Hz) : sensor_msgs/Range [ros1msg] - (5) /radar/tracks 156 msgs (20.05 Hz) : radar_driver/RadarTracks [ros1msg] - (6) /velodyne_points 78 msgs (10.02 Hz) : sensor_msgs/PointCloud2 [ros1msg] -attachments: 0 -``` - -{/* cspell: enable */} - -Remote reads will use the index at the end of the file to minimize latency and data transfer. - -### File Diagnostics - -#### List chunks in a file - -The `mcap list` command can be used with chunks or attachments: - -{/* cspell: disable */} - -``` -$ mcap list chunks ~/data/mcap/demo.mcap -offset length start end compression compressed size uncompressed size compression ratio -43 4529455 1490149580103843113 1490149580608392239 zstd 4529402 9400437 0.481829 -4531299 4751426 1490149580618484655 1490149581212757989 zstd 4751373 9621973 0.493804 -9284910 4726518 1490149581222848447 1490149581811286531 zstd 4726465 9617327 0.491453 -14013453 4734289 1490149581821378989 1490149582418243031 zstd 4734236 9624850 0.491876 -18749879 4742989 1490149582428402906 1490149583010292990 zstd 4742936 9646234 0.491688 -23494877 4712785 1490149583020377156 1490149583617657323 zstd 4712732 9619341 0.489923 -28209799 4662983 1490149583627720990 1490149584217852199 zstd 4662930 9533042 0.489133 -32874919 4643191 1490149584227924615 1490149584813214116 zstd 4643138 9499481 0.488778 -37520119 4726655 1490149584823300282 1490149585411567366 zstd 4726602 9591399 0.492796 -42248895 4748884 1490149585421596866 1490149586021460449 zstd 4748831 9621776 0.493550 -46999820 4746828 1490149586031607908 1490149586617282658 zstd 4746775 9632302 0.492798 -51748769 4759213 1490149586627453408 1490149587217501700 zstd 4759160 9634744 0.493958 -56510103 4750731 1490149587227624742 1490149587814043200 zstd 4750678 9622778 0.493691 -61262859 217330 1490149587824113700 1490149587884601617 zstd 217277 217255 1.000101 -``` - -{/* cspell: enable */} diff --git a/website/docs/guides/cpp/cmake.md b/website/docs/guides/cpp/cmake.md new file mode 100644 index 0000000000..af42e95d7b --- /dev/null +++ b/website/docs/guides/cpp/cmake.md @@ -0,0 +1,9 @@ +--- +description: Build and use MCAP C++ library with CMake. +--- + +# Build MCAP with CMake + +If you want to add MCAP to your C++ project that uses CMake, the third-party [olympus-robotics/mcap_builder](https://github.com/olympus-robotics/mcap_builder) repository provides a helpful wrapper. + +The readme file in that repository provides the steps and the context needed. diff --git a/website/docs/guides/cpp/protobuf.md b/website/docs/guides/cpp/protobuf.md index 89affa05e6..4930db5da8 100644 --- a/website/docs/guides/cpp/protobuf.md +++ b/website/docs/guides/cpp/protobuf.md @@ -31,6 +31,12 @@ We also include the MCAP reader implementation: #include "mcap/reader.hpp" ``` +And standard library dependencies: + +```cpp +#include +``` + Use the `mcap::McapReader::open()` method to open an MCAP file for reading: ```cpp @@ -103,7 +109,7 @@ auto messageView = reader.readMessages(); #### Load schema definitions -We build a `DynamicMessageFactory`, using a `google::Protobuf::SimpleDescriptorDatabase` as the underlying descriptor database. By constructing this ourselves and retaining a reference to the database, we can more easily load that database with definitions from the MCAP file. +We build a `DynamicMessageFactory`, using a `google::protobuf::SimpleDescriptorDatabase` as the underlying descriptor database. By constructing this ourselves and retaining a reference to the database, we can more easily load that database with definitions from the MCAP file. ```cpp gp::SimpleDescriptorDatabase protoDb; @@ -157,7 +163,7 @@ descriptor = protoPool.FindMessageTypeByName(it->schema->name); We can use this descriptor to parse our message: ```cpp -gp::Message* message = protoFactory.GetPrototype(descriptor)->New(); +auto message = std::unique_ptr(protoFactory.GetPrototype(descriptor)->New()); if (!message->ParseFromArray(static_cast(it->message.data), it->message.dataSize)) { std::cerr << "failed to parse message using included schema" << std::endl; diff --git a/website/docs/spec/index.md b/website/docs/spec/index.md index a2b031ba80..05b6794e83 100644 --- a/website/docs/spec/index.md +++ b/website/docs/spec/index.md @@ -181,7 +181,7 @@ The message encoding and schema must match that of the Channel record correspond ### Chunk (op=0x06) -A Chunk contains a batch of Schema, Channel, and Message records. The batch of records contained in a chunk may be compressed or uncompressed. +A Chunk contains a batch of records. Readers should expect Schema, Channel, and Message records to be present in chunks, but future spec changes or user extensions may include others. The batch of records contained in a chunk may be compressed or uncompressed. All messages in the chunk must reference channels recorded earlier in the file (in a previous chunk, earlier in the current chunk, or earlier in the data section). @@ -225,7 +225,7 @@ A Chunk Index record exists for every Chunk in the file. | 8 | compressed_size | uint64 | The size of the chunk `records` field. | | 8 | uncompressed_size | uint64 | The uncompressed size of the chunk `records` field. This field should match the value in the corresponding Chunk record. | -A Schema and Channel record MUST exist in the summary section for all channels referenced by chunk index records. +A Schema and Channel record MUST exist in the summary section for all messages in chunks that are indexed by Chunk Index records. > Why? The typical use case for file readers using an index is fast random access to a specific message timestamp. Channel is a prerequisite for decoding Message record data. Without an easy-to-access copy of the Channel records, readers would need to search for Channel records from the start of the file, degrading random access read performance. diff --git a/website/docs/spec/registry.md b/website/docs/spec/registry.md index eb50a9c5ec..722f143157 100644 --- a/website/docs/spec/registry.md +++ b/website/docs/spec/registry.md @@ -60,7 +60,7 @@ Schema `encoding` may only be omitted for self-describing message encodings such - `name`: Fully qualified name to the message within the descriptor set. For example, in a proto file containing `package foo.bar; message Baz {}` the fully qualified message name is `foo.bar.Baz`. - `encoding`: `protobuf` -- `data`: A binary [FileDescriptorSet](https://github.com/protocolbuffers/protobuf/blob/master/src/google/protobuf/descriptor.proto) as produced by `protoc --descriptor_set_out`. +- `data`: A binary [FileDescriptorSet](https://github.com/protocolbuffers/protobuf/blob/master/src/google/protobuf/descriptor.proto) as produced by `protoc --include_imports --descriptor_set_out`. ### flatbuffer diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js index afd8b2ac37..a2c78a0d09 100644 --- a/website/docusaurus.config.js +++ b/website/docusaurus.config.js @@ -5,9 +5,7 @@ const path = require("path"); const darkCodeTheme = require("prism-react-renderer/themes/dracula"); const lightCodeTheme = require("prism-react-renderer/themes/github"); -const util = require("util"); const webpack = require("webpack"); -const execAsync = util.promisify(require("child_process").exec); const modifySvgoConfigInPlace = require("./modifySvgoConfigInPlace"); @@ -67,32 +65,6 @@ const config = { }; }, }), - () => { - // determines the current CLI download link to display by fetching the latest tag matching - // releases/mcap-cli/* at build time. - return { - name: "latestCLIReleaseTag", - async loadContent() { - /* cspell:disable */ - const result = await execAsync( - `git tag --sort=-creatordate --list "releases/mcap-cli/*"`, - ); - /* cspell:enable */ - if (result.stdout.length === 0) { - throw new Error( - `could not determine latest MCAP CLI tag ${JSON.stringify( - result, - )}`, - ); - } - const latest = result.stdout.split("\n")[0]; - return latest; - }, - async contentLoaded({ content, actions }) { - actions.setGlobalData({ tag: content }); - }, - }; - }, ], presets: [ @@ -143,8 +115,8 @@ const config = { label: "Specification", }, { - href: "https://foxglove.dev/slack", - label: "Slack", + href: "https://foxglove.dev/chat", + label: "Discord", position: "right", }, { @@ -182,8 +154,8 @@ const config = { href: "https://github.com/foxglove/mcap", }, { - label: "Slack", - href: "https://foxglove.dev/slack", + label: "Discord", + href: "https://foxglove.dev/chat", }, { label: "Stack Overflow", diff --git a/website/src/components/CLIDownloadScript.tsx b/website/src/components/CLIDownloadScript.tsx deleted file mode 100644 index b0bda8daa6..0000000000 --- a/website/src/components/CLIDownloadScript.tsx +++ /dev/null @@ -1,15 +0,0 @@ -import { usePluginData } from "@docusaurus/useGlobalData"; -import CodeBlock from "@theme/CodeBlock"; -import React from "react"; - -export default function CLIDownloadScript(): JSX.Element { - const latestVersion = usePluginData("latestCLIReleaseTag") as { tag: string }; - const tag = encodeURIComponent(latestVersion.tag); - - return ( - - $ wget https://github.com/foxglove/mcap/releases/download/{tag} - /mcap-linux-$(arch) -O mcap - - ); -} diff --git a/yarn.lock b/yarn.lock index 8d6b97d997..90c5ddef7e 100644 --- a/yarn.lock +++ b/yarn.lock @@ -189,50 +189,6 @@ __metadata: languageName: node linkType: hard -"@arrows/array@npm:^1.4.1": - version: 1.4.1 - resolution: "@arrows/array@npm:1.4.1" - dependencies: - "@arrows/composition": ^1.2.2 - checksum: 39de47a49709376d91360955665f5cc33ad6fce85125a5b1fde777bf963bd2d053cc77a587253a55e6f4241a75ad7db991aacc26eb36edb7a746d824eb8ebd8a - languageName: node - linkType: hard - -"@arrows/composition@npm:^1.0.0, @arrows/composition@npm:^1.2.2": - version: 1.2.2 - resolution: "@arrows/composition@npm:1.2.2" - checksum: 3219e9a4e220c9778d8919fef329608b9966667b61f26e403d368646ebc65d96b68abcb7a73621992baad678e444ceb36914f1f2db2d6502ddfe738e9230e737 - languageName: node - linkType: hard - -"@arrows/dispatch@npm:^1.0.2": - version: 1.0.3 - resolution: "@arrows/dispatch@npm:1.0.3" - dependencies: - "@arrows/composition": ^1.2.2 - checksum: 2bd0b1ad5345b056cd300b63eedf3a1b9f17e8f891a5b5d1e70e9a3d8c426ec05828c38cd437f742e75387fbc98b3082fef23f62fe97688b63d060376d50dcd9 - languageName: node - linkType: hard - -"@arrows/error@npm:^1.0.2": - version: 1.0.2 - resolution: "@arrows/error@npm:1.0.2" - checksum: 35ad67e8d2781879a22711f5c7ba3907d6772ff42b24abc8b94b5165414e802f6c207f2024f50508c8f40637465a91da268ebf321c0eef5aaf44fc3d4acc7a58 - languageName: node - linkType: hard - -"@arrows/multimethod@npm:^1.1.6": - version: 1.4.1 - resolution: "@arrows/multimethod@npm:1.4.1" - dependencies: - "@arrows/array": ^1.4.1 - "@arrows/composition": ^1.2.2 - "@arrows/error": ^1.0.2 - fast-deep-equal: ^3.1.3 - checksum: 2a3a6b62debb163448ce1e90c9a0508866e605895967a67ef3c65f5248e5e7318ae95a92d4a62aff0518eea63755cc0467deb3265c3c9b41e00a892802ae729a - languageName: node - linkType: hard - "@babel/code-frame@npm:^7.0.0, @babel/code-frame@npm:^7.10.4, @babel/code-frame@npm:^7.12.13, @babel/code-frame@npm:^7.16.0, @babel/code-frame@npm:^7.18.6, @babel/code-frame@npm:^7.21.4, @babel/code-frame@npm:^7.8.3": version: 7.21.4 resolution: "@babel/code-frame@npm:7.21.4" @@ -2832,7 +2788,7 @@ __metadata: "@types/node": 18.13.0 "@typescript-eslint/eslint-plugin": 6.11.0 "@typescript-eslint/parser": 6.11.0 - benny: ^3.7.1 + commander: 12.1.0 eslint: 8.54.0 eslint-config-prettier: 9.0.0 eslint-plugin-es: 4.1.0 @@ -5123,7 +5079,7 @@ __metadata: languageName: node linkType: hard -"ansi-escapes@npm:^4.2.1, ansi-escapes@npm:^4.3.0": +"ansi-escapes@npm:^4.2.1": version: 4.3.2 resolution: "ansi-escapes@npm:4.3.2" dependencies: @@ -5374,13 +5330,6 @@ __metadata: languageName: node linkType: hard -"astral-regex@npm:^2.0.0": - version: 2.0.0 - resolution: "astral-regex@npm:2.0.0" - checksum: 876231688c66400473ba505731df37ea436e574dd524520294cc3bbc54ea40334865e01fa0d074d74d036ee874ee7e62f486ea38bc421ee8e6a871c06f011766 - languageName: node - linkType: hard - "asynciterator.prototype@npm:^1.0.0": version: 1.0.0 resolution: "asynciterator.prototype@npm:1.0.0" @@ -5623,33 +5572,6 @@ __metadata: languageName: node linkType: hard -"benchmark@npm:^2.1.4": - version: 2.1.4 - resolution: "benchmark@npm:2.1.4" - dependencies: - lodash: ^4.17.4 - platform: ^1.3.3 - checksum: aa466561d4f2b0a2419a3069b8f90fd35ffacf26849697eea9de525ecfbd10b44da11070cc51c88d772076db8cb2415641b493de7d6c024fdf8551019c6fcf1c - languageName: node - linkType: hard - -"benny@npm:^3.7.1": - version: 3.7.1 - resolution: "benny@npm:3.7.1" - dependencies: - "@arrows/composition": ^1.0.0 - "@arrows/dispatch": ^1.0.2 - "@arrows/multimethod": ^1.1.6 - benchmark: ^2.1.4 - common-tags: ^1.8.0 - fs-extra: ^10.0.0 - json2csv: ^5.0.6 - kleur: ^4.1.4 - log-update: ^4.0.0 - checksum: 8dcca91afb6e97b986a16fc73a2a12b2d51c306dc1e9fca6ace988b3ca26405dffcb85309083a449d27cfab440d8164b5cff3a0deba034879da401305412af34 - languageName: node - linkType: hard - "big-integer@npm:^1.6.44": version: 1.6.51 resolution: "big-integer@npm:1.6.51" @@ -6162,15 +6084,6 @@ __metadata: languageName: node linkType: hard -"cli-cursor@npm:^3.1.0": - version: 3.1.0 - resolution: "cli-cursor@npm:3.1.0" - dependencies: - restore-cursor: ^3.1.0 - checksum: 2692784c6cd2fd85cfdbd11f53aea73a463a6d64a77c3e098b2b4697a20443f430c220629e1ca3b195ea5ac4a97a74c2ee411f3807abf6df2b66211fec0c0a29 - languageName: node - linkType: hard - "cli-table3@npm:^0.6.2": version: 0.6.3 resolution: "cli-table3@npm:0.6.3" @@ -6326,6 +6239,13 @@ __metadata: languageName: node linkType: hard +"commander@npm:12.1.0": + version: 12.1.0 + resolution: "commander@npm:12.1.0" + checksum: 68e9818b00fc1ed9cdab9eb16905551c2b768a317ae69a5e3c43924c2b20ac9bb65b27e1cab36aeda7b6496376d4da908996ba2c0b5d79463e0fb1e77935d514 + languageName: node + linkType: hard + "commander@npm:^2.20.0": version: 2.20.3 resolution: "commander@npm:2.20.3" @@ -6340,13 +6260,6 @@ __metadata: languageName: node linkType: hard -"commander@npm:^6.1.0": - version: 6.2.1 - resolution: "commander@npm:6.2.1" - checksum: d7090410c0de6bc5c67d3ca41c41760d6d268f3c799e530aafb73b7437d1826bbf0d2a3edac33f8b57cc9887b4a986dce307fa5557e109be40eadb7c43b21742 - languageName: node - linkType: hard - "commander@npm:^7.2.0": version: 7.2.0 resolution: "commander@npm:7.2.0" @@ -6374,13 +6287,6 @@ __metadata: languageName: node linkType: hard -"common-tags@npm:^1.8.0": - version: 1.8.2 - resolution: "common-tags@npm:1.8.2" - checksum: 767a6255a84bbc47df49a60ab583053bb29a7d9687066a18500a516188a062c4e4cd52de341f22de0b07062e699b1b8fe3cfa1cb55b241cb9301aeb4f45b4dff - languageName: node - linkType: hard - "commondir@npm:^1.0.1": version: 1.0.1 resolution: "commondir@npm:1.0.1" @@ -8527,7 +8433,7 @@ __metadata: languageName: node linkType: hard -"fs-extra@npm:^10.0.0, fs-extra@npm:^10.1.0": +"fs-extra@npm:^10.1.0": version: 10.1.0 resolution: "fs-extra@npm:10.1.0" dependencies: @@ -10735,19 +10641,6 @@ __metadata: languageName: node linkType: hard -"json2csv@npm:^5.0.6": - version: 5.0.7 - resolution: "json2csv@npm:5.0.7" - dependencies: - commander: ^6.1.0 - jsonparse: ^1.3.1 - lodash.get: ^4.4.2 - bin: - json2csv: bin/json2csv.js - checksum: 81b511e4f5abba1dcda90593c193d15e5f05f1def91377b6289536e31fdb629889da6a2b4612b9ff699116a29b1758d20c0d71f7921fcfb09863da5b2d883139 - languageName: node - linkType: hard - "json5@npm:^1.0.2": version: 1.0.2 resolution: "json5@npm:1.0.2" @@ -10795,13 +10688,6 @@ __metadata: languageName: node linkType: hard -"jsonparse@npm:^1.3.1": - version: 1.3.1 - resolution: "jsonparse@npm:1.3.1" - checksum: 6514a7be4674ebf407afca0eda3ba284b69b07f9958a8d3113ef1005f7ec610860c312be067e450c569aab8b89635e332cee3696789c750692bb60daba627f4d - languageName: node - linkType: hard - "jsx-ast-utils@npm:^2.4.1 || ^3.0.0": version: 3.3.3 resolution: "jsx-ast-utils@npm:3.3.3" @@ -10858,13 +10744,6 @@ __metadata: languageName: node linkType: hard -"kleur@npm:^4.1.4": - version: 4.1.5 - resolution: "kleur@npm:4.1.5" - checksum: 1dc476e32741acf0b1b5b0627ffd0d722e342c1b0da14de3e8ae97821327ca08f9fb944542fb3c126d90ac5f27f9d804edbe7c585bf7d12ef495d115e0f22c12 - languageName: node - linkType: hard - "klona@npm:^2.0.6": version: 2.0.6 resolution: "klona@npm:2.0.6" @@ -11012,13 +10891,6 @@ __metadata: languageName: node linkType: hard -"lodash.get@npm:^4.4.2": - version: 4.4.2 - resolution: "lodash.get@npm:4.4.2" - checksum: e403047ddb03181c9d0e92df9556570e2b67e0f0a930fcbbbd779370972368f5568e914f913e93f3b08f6d492abc71e14d4e9b7a18916c31fa04bd2306efe545 - languageName: node - linkType: hard - "lodash.kebabcase@npm:4.1.1": version: 4.1.1 resolution: "lodash.kebabcase@npm:4.1.1" @@ -11061,25 +10933,13 @@ __metadata: languageName: node linkType: hard -"lodash@npm:4.17.21, lodash@npm:^4.17.19, lodash@npm:^4.17.20, lodash@npm:^4.17.21, lodash@npm:^4.17.4": +"lodash@npm:4.17.21, lodash@npm:^4.17.19, lodash@npm:^4.17.20, lodash@npm:^4.17.21": version: 4.17.21 resolution: "lodash@npm:4.17.21" checksum: eb835a2e51d381e561e508ce932ea50a8e5a68f4ebdd771ea240d3048244a8d13658acbd502cd4829768c56f2e16bdd4340b9ea141297d472517b83868e677f7 languageName: node linkType: hard -"log-update@npm:^4.0.0": - version: 4.0.0 - resolution: "log-update@npm:4.0.0" - dependencies: - ansi-escapes: ^4.3.0 - cli-cursor: ^3.1.0 - slice-ansi: ^4.0.0 - wrap-ansi: ^6.2.0 - checksum: ae2f85bbabc1906034154fb7d4c4477c79b3e703d22d78adee8b3862fa913942772e7fa11713e3d96fb46de4e3cabefbf5d0a544344f03b58d3c4bff52aa9eb2 - languageName: node - linkType: hard - "long@npm:^5.0.0": version: 5.2.1 resolution: "long@npm:5.2.1" @@ -11883,7 +11743,7 @@ __metadata: languageName: node linkType: hard -"onetime@npm:^5.1.0, onetime@npm:^5.1.2": +"onetime@npm:^5.1.2": version: 5.1.2 resolution: "onetime@npm:5.1.2" dependencies: @@ -12291,13 +12151,6 @@ __metadata: languageName: node linkType: hard -"platform@npm:^1.3.3": - version: 1.3.6 - resolution: "platform@npm:1.3.6" - checksum: 6f472a09c61d418c7e26c1c16d0bdc029549d512dbec6526216a1e59ec68100d07007d0097dcba69dddad883d6f2a83361b4bdfe0094a3d9a2af24158643d85e - languageName: node - linkType: hard - "postcss-calc@npm:^8.2.3": version: 8.2.4 resolution: "postcss-calc@npm:8.2.4" @@ -13656,16 +13509,6 @@ __metadata: languageName: node linkType: hard -"restore-cursor@npm:^3.1.0": - version: 3.1.0 - resolution: "restore-cursor@npm:3.1.0" - dependencies: - onetime: ^5.1.0 - signal-exit: ^3.0.2 - checksum: f877dd8741796b909f2a82454ec111afb84eb45890eb49ac947d87991379406b3b83ff9673a46012fca0d7844bb989f45cc5b788254cf1a39b6b5a9659de0630 - languageName: node - linkType: hard - "retry@npm:^0.12.0": version: 0.12.0 resolution: "retry@npm:0.12.0" @@ -14183,17 +14026,6 @@ __metadata: languageName: node linkType: hard -"slice-ansi@npm:^4.0.0": - version: 4.0.0 - resolution: "slice-ansi@npm:4.0.0" - dependencies: - ansi-styles: ^4.0.0 - astral-regex: ^2.0.0 - is-fullwidth-code-point: ^3.0.0 - checksum: 4a82d7f085b0e1b070e004941ada3c40d3818563ac44766cca4ceadd2080427d337554f9f99a13aaeb3b4a94d9964d9466c807b3d7b7541d1ec37ee32d308756 - languageName: node - linkType: hard - "smart-buffer@npm:^4.2.0": version: 4.2.0 resolution: "smart-buffer@npm:4.2.0" @@ -15948,17 +15780,6 @@ __metadata: languageName: node linkType: hard -"wrap-ansi@npm:^6.2.0": - version: 6.2.0 - resolution: "wrap-ansi@npm:6.2.0" - dependencies: - ansi-styles: ^4.0.0 - string-width: ^4.1.0 - strip-ansi: ^6.0.0 - checksum: 6cd96a410161ff617b63581a08376f0cb9162375adeb7956e10c8cd397821f7eb2a6de24eb22a0b28401300bf228c86e50617cd568209b5f6775b93c97d2fe3a - languageName: node - linkType: hard - "wrap-ansi@npm:^7.0.0": version: 7.0.0 resolution: "wrap-ansi@npm:7.0.0"