From a6b5f8c42b36420f1b8b4578380dd4481ba0789b Mon Sep 17 00:00:00 2001 From: Ramkumar Chinchani Date: Fri, 14 Jun 2024 05:06:59 +0000 Subject: [PATCH] feat(erofs): initial commit for erofs support Fixes https://github.com/opencontainers/image-spec/issues/1190 Signed-off-by: Ramkumar Chinchani --- pkg/erofs/README.md | 20 + pkg/erofs/compression.go | 2369 ++++++++++++++++++++++++ pkg/erofs/compression_test.go | 91 + pkg/erofs/defs.go | 99 + pkg/erofs/defs_test.go | 52 + pkg/erofs/erofs.go | 282 +++ pkg/erofs/erofs_test.go | 281 +++ pkg/erofs/inode_types.go | 174 ++ pkg/erofs/mediatype.go | 3 + pkg/erofs/uncompressed_inode_writer.go | 126 ++ 10 files changed, 3497 insertions(+) create mode 100644 pkg/erofs/README.md create mode 100644 pkg/erofs/compression.go create mode 100644 pkg/erofs/compression_test.go create mode 100644 pkg/erofs/defs.go create mode 100644 pkg/erofs/defs_test.go create mode 100644 pkg/erofs/erofs.go create mode 100644 pkg/erofs/erofs_test.go create mode 100644 pkg/erofs/inode_types.go create mode 100644 pkg/erofs/mediatype.go create mode 100644 pkg/erofs/uncompressed_inode_writer.go diff --git a/pkg/erofs/README.md b/pkg/erofs/README.md new file mode 100644 index 00000000..fb309122 --- /dev/null +++ b/pkg/erofs/README.md @@ -0,0 +1,20 @@ +Files in this package have been initially copied from The Monogon Monorepo [1]. +The copied portion is under these copyright and license terms. + +// Copyright 2020 The Monogon Project Authors. +// +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +[1] https://github.com/monogon-dev/monogon/tree/main/metropolis/pkg/erofs diff --git a/pkg/erofs/compression.go b/pkg/erofs/compression.go new file mode 100644 index 00000000..dd8236ea --- /dev/null +++ b/pkg/erofs/compression.go @@ -0,0 +1,2369 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + monogon/metropolis/pkg/erofs/compression.go at main · monogon-dev/monogon · GitHub + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +
+ Skip to content + + + + + + + + + + + + + + + + + + + + +
+
+ + + + + + + + + + + + + + +
+ +
+ + + + + + + + +
+ + + + + + +
+ + + + + + + + + +
+
+
+ + + + + + + + + + + + +
+ +
+ +
+ +
+ + + + / + + monogon + + + Public +
+ + +
+ +
+ + +
+
+ +
+
+ + + + +
+ + + + + + +
+ + + + + + + + + + + + + + + + +

Latest commit

 

History

History
59 lines (53 loc) · 2 KB

compression.go

File metadata and controls

59 lines (53 loc) · 2 KB
+
+ + + + +
+ +
+ +
+
+ +
+ +
+

Footer

+ + + + +
+
+ + + + + © 2024 GitHub, Inc. + +
+ + +
+
+ + + + + + + + + + + + + + + + + + + +
+ +
+
+ + + diff --git a/pkg/erofs/compression_test.go b/pkg/erofs/compression_test.go new file mode 100644 index 00000000..8d5d656e --- /dev/null +++ b/pkg/erofs/compression_test.go @@ -0,0 +1,91 @@ +// Copyright 2020 The Monogon Project Authors. +// +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package erofs + +import ( + "reflect" + "testing" +) + +func TestEncodeSmallVLEBlock(t *testing.T) { + type args struct { + vals [2]uint16 + blkaddr uint32 + } + tests := []struct { + name string + args args + want [8]byte + }{ + { + name: "Reference", + args: args{vals: [2]uint16{vleClusterTypeHead | 1527, vleClusterTypeNonhead | 1}, blkaddr: 1}, + want: [8]byte{0xf7, 0x15, 0x01, 0x20, 0x01, 0x00, 0x00, 0x00}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := encodeSmallVLEBlock(tt.args.vals, tt.args.blkaddr); !reflect.DeepEqual(got, tt.want) { + t.Errorf("encodeSmallVLEBlock() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestEncodeBigVLEBlock(t *testing.T) { + type args struct { + vals [16]uint16 + blkaddr uint32 + } + tests := []struct { + name string + args args + want [32]byte + }{ + { + name: "Reference", + args: args{ + vals: [16]uint16{ + vleClusterTypeNonhead | 2, + vleClusterTypeHead | 1460, + vleClusterTypeNonhead | 1, + vleClusterTypeNonhead | 2, + vleClusterTypeHead | 2751, + vleClusterTypeNonhead | 1, + vleClusterTypeNonhead | 2, + vleClusterTypeHead | 940, + vleClusterTypeNonhead | 1, + vleClusterTypeHead | 3142, + vleClusterTypeNonhead | 1, + vleClusterTypeNonhead | 2, + vleClusterTypeHead | 1750, + vleClusterTypeNonhead | 1, + vleClusterTypeNonhead | 2, + vleClusterTypeHead | 683, + }, + blkaddr: 3, + }, + want: [32]byte{0x02, 0x20, 0x6d, 0x15, 0x00, 0x0a, 0x80, 0xbf, 0x5a, 0x00, 0x28, 0x00, 0xb2, 0x4e, 0x01, 0xa0, 0x11, 0x17, 0x00, 0x0a, 0x80, 0xd6, 0x56, 0x00, 0x28, 0x00, 0xae, 0x4a, 0x03, 0x00, 0x00, 0x00}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := encodeBigVLEBlock(tt.args.vals, tt.args.blkaddr); !reflect.DeepEqual(got, tt.want) { + t.Errorf("encodeBigVLEBlock() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/pkg/erofs/defs.go b/pkg/erofs/defs.go new file mode 100644 index 00000000..85898bf6 --- /dev/null +++ b/pkg/erofs/defs.go @@ -0,0 +1,99 @@ +// Copyright 2020 The Monogon Project Authors. +// +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package erofs + +// This file contains definitions coming from the in-Kernel implementation of +// the EROFS filesystem. All definitions come from @linux//fs/erofs:erofs_fs.h +// unless stated otherwise. + +// Magic contains the 4 magic bytes starting at position 1024 identifying an +// EROFS filesystem. Defined in @linux//include/uapi/linux/magic.h +// EROFS_SUPER_MAGIC_V1 +var Magic = [4]byte{0xe2, 0xe1, 0xf5, 0xe0} + +const blockSizeBits = 12 +const BlockSize = 1 << blockSizeBits + +// Defined in @linux//include/linux:fs_types.h starting at FT_UNKNOWN +const ( + fileTypeUnknown = iota + fileTypeRegularFile + fileTypeDirectory + fileTypeCharacterDevice + fileTypeBlockDevice + fileTypeFIFO + fileTypeSocket + fileTypeSymbolicLink +) + +// Anonymous enum starting at EROFS_INODE_FLAT_PLAIN +const ( + inodeFlatPlain = 0 + inodeFlatCompressionLegacy = 1 + inodeFlatInline = 2 + inodeFlatCompression = 3 +) + +// struct erofs_dirent +type directoryEntryRaw struct { + NodeNumber uint64 + NameStartOffset uint16 + FileType uint8 + Reserved uint8 +} + +// struct erofs_super_block +type superblock struct { + Magic [4]byte + Checksum uint32 + FeatureCompat uint32 + BlockSizeBits uint8 + Reserved0 uint8 + RootNodeNumber uint16 + TotalInodes uint64 + BuildTimeSeconds uint64 + BuildTimeNanoseconds uint32 + Blocks uint32 + MetaStartAddr uint32 + SharedXattrStartAddr uint32 + UUID [16]byte + VolumeName [16]byte + FeaturesIncompatible uint32 + Reserved1 [44]byte +} + +// struct erofs_inode_compact +type inodeCompact struct { + Format uint16 + XattrCount uint16 + Mode uint16 + HardlinkCount uint16 + Size uint32 + Reserved0 uint32 + Union uint32 + InodeNumCompat uint32 + UID uint16 + GID uint16 + Reserved1 uint32 +} + +// Anonymous enum starting at Z_EROFS_VLE_CLUSTER_TYPE_PLAIN +const ( + vleClusterTypePlain = iota << 12 + vleClusterTypeHead + vleClusterTypeNonhead +) diff --git a/pkg/erofs/defs_test.go b/pkg/erofs/defs_test.go new file mode 100644 index 00000000..1d31bffe --- /dev/null +++ b/pkg/erofs/defs_test.go @@ -0,0 +1,52 @@ +// Copyright 2020 The Monogon Project Authors. +// +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package erofs + +import ( + "bytes" + "encoding/binary" + "testing" + + "github.com/stretchr/testify/assert" +) + +// These test that the specified structures serialize to the same number of +// bytes as the ones in the EROFS kernel module. + +func TestSuperblockSize(t *testing.T) { + var buf bytes.Buffer + if err := binary.Write(&buf, binary.LittleEndian, &superblock{}); err != nil { + t.Fatalf("failed to write superblock: %v", err) + } + assert.Equal(t, 128, buf.Len()) +} + +func TestDirectoryEntrySize(t *testing.T) { + var buf bytes.Buffer + if err := binary.Write(&buf, binary.LittleEndian, &directoryEntryRaw{}); err != nil { + t.Fatalf("failed to write directory entry: %v", err) + } + assert.Equal(t, 12, buf.Len()) +} + +func TestInodeCompactSize(t *testing.T) { + var buf bytes.Buffer + if err := binary.Write(&buf, binary.LittleEndian, &inodeCompact{}); err != nil { + t.Fatalf("failed to write compact inode: %v", err) + } + assert.Equal(t, 32, buf.Len()) +} diff --git a/pkg/erofs/erofs.go b/pkg/erofs/erofs.go new file mode 100644 index 00000000..ab871158 --- /dev/null +++ b/pkg/erofs/erofs.go @@ -0,0 +1,282 @@ +// Copyright 2020 The Monogon Project Authors. +// +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package erofs + +import ( + "encoding/binary" + "errors" + "fmt" + "io" + "path" + + "golang.org/x/sys/unix" +) + +// Writer writes a new EROFS filesystem. +type Writer struct { + w io.WriteSeeker + // fixDirectoryEntry contains for each referenced path where it is + // referenced from. Since self-references are required anyways (for the "." + // and ".." entries) we let the user write files in any order and just + // point the directory entries to the right target nid and file type on + // Close(). + fixDirectoryEntry map[string][]direntFixupLocation + pathInodeMeta map[string]*uncompressedInodeMeta + // legacyInodeIndex stores the next legacy (32-bit) inode to be allocated. + // 64 bit inodes are automatically calculated by EROFS on mount. + legacyInodeIndex uint32 + blockAllocatorIndex uint32 + metadataBlocksFree metadataBlocksMeta +} + +// NewWriter creates a new EROFS filesystem writer. The given WriteSeeker needs +// to be at the start. +func NewWriter(w io.WriteSeeker) (*Writer, error) { + erofsWriter := &Writer{ + w: w, + fixDirectoryEntry: make(map[string][]direntFixupLocation), + pathInodeMeta: make(map[string]*uncompressedInodeMeta), + } + _, err := erofsWriter.allocateMetadata(1024+binary.Size(&superblock{}), 0) + if err != nil { + return nil, fmt.Errorf("cannot allocate first metadata block: %w", err) + } + if _, err := erofsWriter.w.Write(make([]byte, 1024)); err != nil { // Padding + return nil, fmt.Errorf("failed to write initial padding: %w", err) + } + if err := binary.Write(erofsWriter.w, binary.LittleEndian, &superblock{ + Magic: Magic, + BlockSizeBits: blockSizeBits, + // 1024 (padding) + 128 (superblock) / 32, not eligible for fixup as + // different int size + RootNodeNumber: 36, + }); err != nil { + return nil, fmt.Errorf("failed to write superblock: %w", err) + } + return erofsWriter, nil +} + +// allocateMetadata allocates metadata space of size bytes with a given +// alignment and seeks to the first byte of the newly-allocated metadata space. +// It also returns the position of that first byte. +func (w *Writer) allocateMetadata(size int, alignment uint16) (int64, error) { + if size > BlockSize { + panic("cannot allocate a metadata object bigger than BlockSize bytes") + } + sizeU16 := uint16(size) + pos, ok := w.metadataBlocksFree.findBlock(sizeU16, alignment) + if !ok { + blockNumber, err := w.allocateBlocks(1) + if err != nil { + return 0, fmt.Errorf("failed to allocate additional metadata space: %w", err) + } + w.metadataBlocksFree = append(w.metadataBlocksFree, metadataBlockMeta{blockNumber: blockNumber, freeBytes: BlockSize - sizeU16}) + if _, err := w.w.Write(make([]byte, BlockSize)); err != nil { + return 0, fmt.Errorf("failed to write metadata: %w", err) + } + pos = int64(blockNumber) * BlockSize // Always aligned to BlockSize, bigger alignments are unsupported anyways + } + if _, err := w.w.Seek(pos, io.SeekStart); err != nil { + return 0, fmt.Errorf("cannot seek to existing metadata nid, likely misaligned meta write") + } + return pos, nil +} + +// allocateBlocks allocates n new BlockSize-sized block and seeks to the +// beginning of the first newly-allocated block. It also returns the first +// newly-allocated block number. The caller is expected to write these blocks +// completely before calling allocateBlocks again. +func (w *Writer) allocateBlocks(n uint32) (uint32, error) { + if _, err := w.w.Seek(int64(w.blockAllocatorIndex)*BlockSize, io.SeekStart); err != nil { + return 0, fmt.Errorf("cannot seek to end of last block, check write alignment: %w", err) + } + firstBlock := w.blockAllocatorIndex + w.blockAllocatorIndex += n + return firstBlock, nil +} + +func (w *Writer) create(pathname string, inode Inode) *uncompressedInodeWriter { + i := &uncompressedInodeWriter{ + writer: w, + inode: *inode.inode(), + legacyInodeNumber: w.legacyInodeIndex, + pathname: path.Clean(pathname), + } + w.legacyInodeIndex++ + return i +} + +// CreateFile adds a new file to the EROFS. It returns a WriteCloser to which +// the file contents should be written and which then needs to be closed. The +// last writer obtained by calling CreateFile() needs to be closed first before +// opening a new one. The given pathname needs to be referenced by a directory +// created using Create(), otherwise it will not be accessible. +func (w *Writer) CreateFile(pathname string, meta *FileMeta) io.WriteCloser { + return w.create(pathname, meta) +} + +// Create adds a new non-file inode to the EROFS. This includes directories, +// device nodes, symlinks and FIFOs. The first call to Create() needs to be +// with pathname "." and a directory inode. The given pathname needs to be +// referenced by a directory, otherwise it will not be accessible (with the +// exception of the directory "."). +func (w *Writer) Create(pathname string, inode Inode) error { + iw := w.create(pathname, inode) + switch i := inode.(type) { + case *Directory: + if err := i.writeTo(iw); err != nil { + return fmt.Errorf("failed to write directory contents: %w", err) + } + case *SymbolicLink: + if err := i.writeTo(iw); err != nil { + return fmt.Errorf("failed to write symbolic link contents: %w", err) + } + } + return iw.Close() +} + +// Close finishes writing an EROFS filesystem. Errors by this function need to +// be handled as they indicate if the written filesystem is consistent (i.e. +// there are no directory entries pointing to nonexistent inodes). +func (w *Writer) Close() error { + for targetPath, entries := range w.fixDirectoryEntry { + for _, entry := range entries { + targetMeta, ok := w.pathInodeMeta[targetPath] + if !ok { + return fmt.Errorf("failed to link filesystem tree: dangling reference to %v", targetPath) + } + if err := direntFixup(w.pathInodeMeta[entry.path], int64(entry.entryIndex), targetMeta); err != nil { + return err + } + } + } + return nil +} + +// uncompressedInodeMeta tracks enough metadata about a written inode to be +// able to point dirents to it and to provide a WriteSeeker into the inode +// itself. +type uncompressedInodeMeta struct { + nid uint64 + ftype uint8 + + // Physical placement metdata + blockStart int64 + blockLength int64 + inlineStart int64 + inlineLength int64 + + writer *Writer + currentOffset int64 +} + +func (a *uncompressedInodeMeta) Seek(offset int64, whence int) (int64, error) { + switch whence { + case io.SeekCurrent: + break + case io.SeekStart: + a.currentOffset = 0 + case io.SeekEnd: + a.currentOffset = a.blockLength + a.inlineLength + } + a.currentOffset += offset + return a.currentOffset, nil +} + +func (a *uncompressedInodeMeta) Write(p []byte) (int, error) { + if a.currentOffset < a.blockLength { + // TODO(lorenz): Handle the special case where a directory inode is + // spread across multiple blocks (depending on other factors this + // occurs around ~200 direct children). + return 0, errors.New("relocating dirents in multi-block directory inodes is unimplemented") + } + if _, err := a.writer.w.Seek(a.inlineStart+a.currentOffset, io.SeekStart); err != nil { + return 0, err + } + a.currentOffset += int64(len(p)) + return a.writer.w.Write(p) +} + +type direntFixupLocation struct { + path string + entryIndex uint16 +} + +// direntFixup overrides nid and file type from the path the dirent is pointing +// to. The given iw is expected to be at the start of the dirent inode to be +// fixed up. +func direntFixup(iw io.WriteSeeker, entryIndex int64, meta *uncompressedInodeMeta) error { + if _, err := iw.Seek(entryIndex*12, io.SeekStart); err != nil { + return fmt.Errorf("failed to seek to dirent: %w", err) + } + if err := binary.Write(iw, binary.LittleEndian, meta.nid); err != nil { + return fmt.Errorf("failed to write nid: %w", err) + } + if _, err := iw.Seek(2, io.SeekCurrent); err != nil { // Skip NameStartOffset + return fmt.Errorf("failed to seek to dirent: %w", err) + } + if err := binary.Write(iw, binary.LittleEndian, meta.ftype); err != nil { + return fmt.Errorf("failed to write ftype: %w", err) + } + return nil +} + +type metadataBlockMeta struct { + blockNumber uint32 + freeBytes uint16 +} + +// metadataBlocksMeta contains metadata about all metadata blocks, most +// importantly the amount of free bytes in each block. This is not a map for +// reproducibility (map ordering). +type metadataBlocksMeta []metadataBlockMeta + +// findBlock returns the absolute position where `size` bytes with the +// specified alignment can still fit. If there is not enough space in any +// metadata block it returns false as the second return value. +func (m metadataBlocksMeta) findBlock(size uint16, alignment uint16) (int64, bool) { + for i, blockMeta := range m { + freeBytesAligned := blockMeta.freeBytes + if alignment > 0 { + freeBytesAligned = blockMeta.freeBytes - (blockMeta.freeBytes % alignment) + } + if freeBytesAligned > size { + m[i] = metadataBlockMeta{ + blockNumber: blockMeta.blockNumber, + freeBytes: freeBytesAligned - size, + } + pos := int64(blockMeta.blockNumber+1)*BlockSize - int64(freeBytesAligned) + return pos, true + } + } + return 0, false +} + +var unixModeToFTMap = map[uint16]uint8{ + unix.S_IFREG: fileTypeRegularFile, + unix.S_IFDIR: fileTypeDirectory, + unix.S_IFCHR: fileTypeCharacterDevice, + unix.S_IFBLK: fileTypeBlockDevice, + unix.S_IFIFO: fileTypeFIFO, + unix.S_IFSOCK: fileTypeSocket, + unix.S_IFLNK: fileTypeSymbolicLink, +} + +// unixModeToFT maps a Unix file type to an EROFS file type. +func unixModeToFT(mode uint16) uint8 { + return unixModeToFTMap[mode&unix.S_IFMT] +} diff --git a/pkg/erofs/erofs_test.go b/pkg/erofs/erofs_test.go new file mode 100644 index 00000000..fa5f2481 --- /dev/null +++ b/pkg/erofs/erofs_test.go @@ -0,0 +1,281 @@ +// Copyright 2020 The Monogon Project Authors. +// +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package erofs + +import ( + "io" + "log" + "math/rand" + "os" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "golang.org/x/sys/unix" +) + +func TestKernelInterop(t *testing.T) { + if os.Getenv("IN_KTEST") != "true" { + t.Skip("Not in ktest") + } + + type testCase struct { + name string + setup func(w *Writer) error + validate func(t *testing.T) error + } + + tests := []testCase{ + { + name: "SimpleFolder", + setup: func(w *Writer) error { + return w.Create(".", &Directory{ + Base: Base{GID: 123, UID: 124, Permissions: 0753}, + Children: []string{}, + }) + }, + validate: func(t *testing.T) error { + var stat unix.Stat_t + if err := unix.Stat("/test", &stat); err != nil { + t.Errorf("failed to stat output: %v", err) + } + require.EqualValues(t, 124, stat.Uid, "wrong Uid") + require.EqualValues(t, 123, stat.Gid, "wrong Gid") + require.EqualValues(t, 0753, stat.Mode&^unix.S_IFMT, "wrong mode") + return nil + }, + }, + { + name: "FolderHierarchy", + setup: func(w *Writer) error { + if err := w.Create(".", &Directory{ + Base: Base{GID: 123, UID: 124, Permissions: 0753}, + Children: []string{"subdir"}, + }); err != nil { + return err + } + if err := w.Create("subdir", &Directory{ + Base: Base{GID: 123, UID: 124, Permissions: 0753}, + Children: []string{}, + }); err != nil { + return err + } + return nil + }, + validate: func(t *testing.T) error { + dirInfo, err := os.ReadDir("/test") + if err != nil { + t.Fatalf("Failed to read top-level directory: %v", err) + } + require.Len(t, dirInfo, 1, "more subdirs than expected") + require.Equal(t, "subdir", dirInfo[0].Name(), "unexpected subdir") + require.True(t, dirInfo[0].IsDir(), "subdir not a directory") + subdirInfo, err := os.ReadDir("/test/subdir") + assert.NoError(t, err, "cannot read empty subdir") + require.Len(t, subdirInfo, 0, "unexpected subdirs in empty directory") + return nil + }, + }, + { + name: "SmallFile", + setup: func(w *Writer) error { + if err := w.Create(".", &Directory{ + Base: Base{GID: 123, UID: 123, Permissions: 0755}, + Children: []string{"test.bin"}, + }); err != nil { + return err + } + writer := w.CreateFile("test.bin", &FileMeta{ + Base: Base{GID: 123, UID: 124, Permissions: 0644}, + }) + r := rand.New(rand.NewSource(0)) // Random but deterministic data + if _, err := io.CopyN(writer, r, 128); err != nil { + return err + } + if err := writer.Close(); err != nil { + return err + } + return nil + }, + validate: func(t *testing.T) error { + var stat unix.Stat_t + err := unix.Stat("/test/test.bin", &stat) + assert.NoError(t, err, "failed to stat file") + require.EqualValues(t, 124, stat.Uid, "wrong Uid") + require.EqualValues(t, 123, stat.Gid, "wrong Gid") + require.EqualValues(t, 0644, stat.Mode&^unix.S_IFMT, "wrong mode") + file, err := os.Open("/test/test.bin") + assert.NoError(t, err, "failed to open test file") + defer file.Close() + r := io.LimitReader(rand.New(rand.NewSource(0)), 128) // Random but deterministic data + expected, _ := io.ReadAll(r) + actual, err := io.ReadAll(file) + assert.NoError(t, err, "failed to read test file") + assert.Equal(t, expected, actual, "content not identical") + return nil + }, + }, + { + name: "Chardev", + setup: func(w *Writer) error { + if err := w.Create(".", &Directory{ + Base: Base{GID: 123, UID: 123, Permissions: 0755}, + Children: []string{"ttyS0"}, + }); err != nil { + return err + } + err := w.Create("ttyS0", &CharacterDevice{ + Base: Base{GID: 0, UID: 0, Permissions: 0600}, + Major: 4, + Minor: 64, + }) + if err != nil { + return err + } + return nil + }, + validate: func(t *testing.T) error { + var stat unix.Statx_t + err := unix.Statx(0, "/test/ttyS0", 0, unix.STATX_ALL, &stat) + assert.NoError(t, err, "failed to statx file") + require.EqualValues(t, 0, stat.Uid, "wrong Uid") + require.EqualValues(t, 0, stat.Gid, "wrong Gid") + require.EqualValues(t, 0600, stat.Mode&^unix.S_IFMT, "wrong mode") + require.EqualValues(t, unix.S_IFCHR, stat.Mode&unix.S_IFMT, "wrong file type") + require.EqualValues(t, 4, stat.Rdev_major, "wrong dev major") + require.EqualValues(t, 64, stat.Rdev_minor, "wrong dev minor") + return nil + }, + }, + { + name: "LargeFile", + setup: func(w *Writer) error { + if err := w.Create(".", &Directory{ + Base: Base{GID: 123, UID: 123, Permissions: 0755}, + Children: []string{"test.bin"}, + }); err != nil { + return err + } + writer := w.CreateFile("test.bin", &FileMeta{ + Base: Base{GID: 123, UID: 124, Permissions: 0644}, + }) + r := rand.New(rand.NewSource(1)) // Random but deterministic data + if _, err := io.CopyN(writer, r, 6500); err != nil { + return err + } + if err := writer.Close(); err != nil { + return err + } + return nil + }, + validate: func(t *testing.T) error { + var stat unix.Stat_t + rawContents, err := os.ReadFile("/dev/ram0") + assert.NoError(t, err, "failed to read test data") + log.Printf("%x", rawContents) + err = unix.Stat("/test/test.bin", &stat) + assert.NoError(t, err, "failed to stat file") + require.EqualValues(t, 124, stat.Uid, "wrong Uid") + require.EqualValues(t, 123, stat.Gid, "wrong Gid") + require.EqualValues(t, 0644, stat.Mode&^unix.S_IFMT, "wrong mode") + require.EqualValues(t, 6500, stat.Size, "wrong size") + file, err := os.Open("/test/test.bin") + assert.NoError(t, err, "failed to open test file") + defer file.Close() + r := io.LimitReader(rand.New(rand.NewSource(1)), 6500) // Random but deterministic data + expected, _ := io.ReadAll(r) + actual, err := io.ReadAll(file) + assert.NoError(t, err, "failed to read test file") + assert.Equal(t, expected, actual, "content not identical") + return nil + }, + }, + { + name: "MultipleMetaBlocks", + setup: func(w *Writer) error { + testFileNames := []string{"test1.bin", "test2.bin", "test3.bin"} + if err := w.Create(".", &Directory{ + Base: Base{GID: 123, UID: 123, Permissions: 0755}, + Children: testFileNames, + }); err != nil { + return err + } + for i, fileName := range testFileNames { + writer := w.CreateFile(fileName, &FileMeta{ + Base: Base{GID: 123, UID: 124, Permissions: 0644}, + }) + r := rand.New(rand.NewSource(int64(i))) // Random but deterministic data + if _, err := io.CopyN(writer, r, 2053); err != nil { + return err + } + if err := writer.Close(); err != nil { + return err + } + } + return nil + }, + validate: func(t *testing.T) error { + testFileNames := []string{"test1.bin", "test2.bin", "test3.bin"} + for i, fileName := range testFileNames { + file, err := os.Open("/test/" + fileName) + assert.NoError(t, err, "failed to open test file") + defer file.Close() + r := io.LimitReader(rand.New(rand.NewSource(int64(i))), 2053) // Random but deterministic data + expected, _ := io.ReadAll(r) + actual, err := io.ReadAll(file) + assert.NoError(t, err, "failed to read test file") + require.Equal(t, expected, actual, "content not identical") + } + return nil + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + file, err := os.OpenFile("/dev/ram0", os.O_WRONLY, 0644) + if err != nil { + t.Fatalf("failed to create test image: %v", err) + } + defer file.Close() + w, err := NewWriter(file) + if err != nil { + t.Fatalf("failed to initialize EROFS writer: %v", err) + } + if err := test.setup(w); err != nil { + t.Fatalf("setup failed: %v", err) + } + if err := w.Close(); err != nil { + t.Errorf("failed close: %v", err) + } + _ = file.Close() + if err := os.MkdirAll("/test", 0755); err != nil { + t.Error(err) + } + if err := unix.Mount("/dev/ram0", "/test", "erofs", unix.MS_NOEXEC|unix.MS_NODEV, ""); err != nil { + t.Fatal(err) + } + if err := test.validate(t); err != nil { + t.Errorf("validation failure: %v", err) + } + if err := unix.Unmount("/test", 0); err != nil { + t.Fatalf("failed to unmount: %v", err) + } + }) + + } +} diff --git a/pkg/erofs/inode_types.go b/pkg/erofs/inode_types.go new file mode 100644 index 00000000..8147892e --- /dev/null +++ b/pkg/erofs/inode_types.go @@ -0,0 +1,174 @@ +// Copyright 2020 The Monogon Project Authors. +// +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package erofs + +import ( + "encoding/binary" + "errors" + "fmt" + "io" + "math" + "path" + "sort" + + "golang.org/x/sys/unix" +) + +// Inode specifies an interface that all inodes that can be written to an EROFS +// filesystem implement. +type Inode interface { + inode() *inodeCompact +} + +// Base contains generic inode metadata independent from the specific inode +// type. +type Base struct { + Permissions uint16 + UID, GID uint16 +} + +func (b *Base) baseInode(fileType uint16) *inodeCompact { + return &inodeCompact{ + UID: b.UID, + GID: b.GID, + Mode: b.Permissions | fileType, + } +} + +// Directory represents a directory inode. The Children property contains the +// directories' direct children (just the name, not the full path). +type Directory struct { + Base + Children []string +} + +func (d *Directory) inode() *inodeCompact { + return d.baseInode(unix.S_IFDIR) +} + +func (d *Directory) writeTo(w *uncompressedInodeWriter) error { + // children is d.Children with appended backrefs (. and ..), copied to not + // pollute source + children := make([]string, len(d.Children)) + copy(children, d.Children) + children = append(children, ".", "..") + sort.Strings(children) + + nameStartOffset := binary.Size(directoryEntryRaw{}) * len(children) + var rawEntries []directoryEntryRaw + for _, ent := range children { + if nameStartOffset > math.MaxUint16 { + return errors.New("directory name offset out of range, too many or too big entries") + } + var entData directoryEntryRaw + entData.NameStartOffset = uint16(nameStartOffset) + rawEntries = append(rawEntries, entData) + nameStartOffset += len(ent) + } + for i, ent := range rawEntries { + targetPath := path.Join(w.pathname, children[i]) + if targetPath == ".." { + targetPath = "." + } + w.writer.fixDirectoryEntry[targetPath] = append(w.writer.fixDirectoryEntry[targetPath], direntFixupLocation{ + path: w.pathname, + entryIndex: uint16(i), + }) + if err := binary.Write(w, binary.LittleEndian, ent); err != nil { + return fmt.Errorf("failed to write dirent: %w", err) + } + } + for _, childName := range children { + if _, err := w.Write([]byte(childName)); err != nil { + return fmt.Errorf("failed to write dirent name: %w", err) + } + } + return nil +} + +// CharacterDevice represents a Unix character device inode with major and +// minor numbers. +type CharacterDevice struct { + Base + Major uint32 + Minor uint32 +} + +func (c *CharacterDevice) inode() *inodeCompact { + i := c.baseInode(unix.S_IFCHR) + i.Union = uint32(unix.Mkdev(c.Major, c.Minor)) + return i +} + +// BlockDevice represents a Unix block device inode with major and minor +// numbers. +type BlockDevice struct { + Base + Major uint32 + Minor uint32 +} + +func (b *BlockDevice) inode() *inodeCompact { + i := b.baseInode(unix.S_IFBLK) + i.Union = uint32(unix.Mkdev(b.Major, b.Minor)) + return i +} + +// FIFO represents a Unix FIFO inode. +type FIFO struct { + Base +} + +func (f *FIFO) inode() *inodeCompact { + return f.baseInode(unix.S_IFIFO) +} + +// Socket represents a Unix socket inode. +type Socket struct { + Base +} + +func (s *Socket) inode() *inodeCompact { + return s.baseInode(unix.S_IFSOCK) +} + +// SymbolicLink represents a symbolic link/symlink to another inode. Target is +// the literal string target of the symlink. +type SymbolicLink struct { + Base + Target string +} + +func (s *SymbolicLink) inode() *inodeCompact { + return s.baseInode(unix.S_IFLNK) +} + +func (s *SymbolicLink) writeTo(w io.Writer) error { + _, err := w.Write([]byte(s.Target)) + return err +} + +// FileMeta represents the metadata of a regular file. In this case the +// contents are written to a Writer returned by the CreateFile function on the +// EROFS Writer and not included in the structure itself. +type FileMeta struct { + Base +} + +func (f *FileMeta) inode() *inodeCompact { + return f.baseInode(unix.S_IFREG) +} diff --git a/pkg/erofs/mediatype.go b/pkg/erofs/mediatype.go new file mode 100644 index 00000000..c601c25f --- /dev/null +++ b/pkg/erofs/mediatype.go @@ -0,0 +1,3 @@ +package erofs + +const BaseMediaTypeLayerErofs = "application/vnd.erofs" diff --git a/pkg/erofs/uncompressed_inode_writer.go b/pkg/erofs/uncompressed_inode_writer.go new file mode 100644 index 00000000..97aefc07 --- /dev/null +++ b/pkg/erofs/uncompressed_inode_writer.go @@ -0,0 +1,126 @@ +// Copyright 2020 The Monogon Project Authors. +// +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package erofs + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "math" +) + +// uncompressedInodeWriter exposes a io.Write-style interface for a single +// uncompressed inode. It splits the Write-calls into blocks and writes both +// the blocks and inode metadata. It is required to call Close() to ensure +// everything is properly written down before writing another inode. +type uncompressedInodeWriter struct { + buf bytes.Buffer + writer *Writer + inode inodeCompact + baseBlock uint32 // baseBlock == 0 implies this inode didn't allocate a block (yet). + writtenBytes int + legacyInodeNumber uint32 + pathname string +} + +func (i *uncompressedInodeWriter) allocateBlock() error { + bb, err := i.writer.allocateBlocks(1) + if err != nil { + return err + } + if i.baseBlock == 0 { + i.baseBlock = bb + } + return nil +} + +func (i *uncompressedInodeWriter) flush(n int) error { + if err := i.allocateBlock(); err != nil { + return err + } + slice := i.buf.Next(n) + if _, err := i.writer.w.Write(slice); err != nil { + return err + } + // Always pad to BlockSize. + _, err := i.writer.w.Write(make([]byte, BlockSize-len(slice))) + return err +} + +func (i *uncompressedInodeWriter) Write(b []byte) (int, error) { + i.writtenBytes += len(b) + if _, err := i.buf.Write(b); err != nil { + return 0, err + } + for i.buf.Len() >= BlockSize { + if err := i.flush(BlockSize); err != nil { + return 0, err + } + } + return len(b), nil +} + +func (i *uncompressedInodeWriter) Close() error { + if i.buf.Len() > BlockSize { + panic("programming error") + } + inodeSize := binary.Size(i.inode) + if i.buf.Len()+inodeSize > BlockSize { + // Can't fit last part of data inline, write it in its own block. + if err := i.flush(i.buf.Len()); err != nil { + return err + } + } + if i.buf.Len() == 0 { + i.inode.Format = inodeFlatPlain << 1 + } else { + // Colocate last part of data with inode. + i.inode.Format = inodeFlatInline << 1 + } + if i.writtenBytes > math.MaxUint32 { + return errors.New("inodes bigger than 2^32 need the extended inode format which is unsupported by this library") + } + i.inode.Size = uint32(i.writtenBytes) + if i.baseBlock != 0 { + i.inode.Union = i.baseBlock + } + i.inode.HardlinkCount = 1 + i.inode.InodeNumCompat = i.legacyInodeNumber + basePos, err := i.writer.allocateMetadata(inodeSize+i.buf.Len(), 32) + if err != nil { + return fmt.Errorf("failed to allocate metadata: %w", err) + } + i.writer.pathInodeMeta[i.pathname] = &uncompressedInodeMeta{ + nid: uint64(basePos) / 32, + ftype: unixModeToFT(i.inode.Mode), + blockStart: int64(i.baseBlock), + blockLength: (int64(i.writtenBytes) / BlockSize) * BlockSize, + inlineStart: basePos + 32, + inlineLength: int64(i.buf.Len()), + writer: i.writer, + } + if err := binary.Write(i.writer.w, binary.LittleEndian, &i.inode); err != nil { + return err + } + if i.inode.Format&(inodeFlatInline<<1) != 0 { + // Data colocated in inode, if any. + _, err := i.writer.w.Write(i.buf.Bytes()) + return err + } + return nil +}