From b607d23e6b629e9174cb910ee2539ba961aff748 Mon Sep 17 00:00:00 2001 From: Aditya Garg Date: Sat, 8 Jun 2024 17:51:03 +0530 Subject: [PATCH] Add APFS driver --- 8001-Add-APFS-driver.patch | 29279 ++++++++++++++++ ...cations-to-build-APFS-with-the-kerne.patch | 92 + extra_config | 1 + 3 files changed, 29372 insertions(+) create mode 100644 8001-Add-APFS-driver.patch create mode 100644 8002-Necessary-modifications-to-build-APFS-with-the-kerne.patch diff --git a/8001-Add-APFS-driver.patch b/8001-Add-APFS-driver.patch new file mode 100644 index 0000000..7090a4b --- /dev/null +++ b/8001-Add-APFS-driver.patch @@ -0,0 +1,29279 @@ +From 5832ef25589bd06d9625f3e5c000961007a7eada Mon Sep 17 00:00:00 2001 +From: Aditya Garg +Date: Sat, 8 Jun 2024 17:37:54 +0530 +Subject: [PATCH] Add APFS driver + +--- + fs/apfs/Makefile | 23 + + fs/apfs/apfs.h | 1188 ++++++++++ + fs/apfs/apfs_raw.h | 1562 +++++++++++++ + fs/apfs/btree.c | 1174 ++++++++++ + fs/apfs/compress.c | 474 ++++ + fs/apfs/dir.c | 1544 +++++++++++++ + fs/apfs/extents.c | 2392 ++++++++++++++++++++ + fs/apfs/file.c | 220 ++ + fs/apfs/inode.c | 2569 ++++++++++++++++++++++ + fs/apfs/key.c | 334 +++ + fs/apfs/libzbitmap.c | 444 ++++ + fs/apfs/libzbitmap.h | 31 + + fs/apfs/lzfse/lzfse.h | 136 ++ + fs/apfs/lzfse/lzfse_decode.c | 74 + + fs/apfs/lzfse/lzfse_decode_base.c | 652 ++++++ + fs/apfs/lzfse/lzfse_encode.c | 163 ++ + fs/apfs/lzfse/lzfse_encode_base.c | 826 +++++++ + fs/apfs/lzfse/lzfse_encode_tables.h | 218 ++ + fs/apfs/lzfse/lzfse_fse.c | 217 ++ + fs/apfs/lzfse/lzfse_fse.h | 606 +++++ + fs/apfs/lzfse/lzfse_internal.h | 612 ++++++ + fs/apfs/lzfse/lzfse_main.c | 336 +++ + fs/apfs/lzfse/lzfse_tunables.h | 60 + + fs/apfs/lzfse/lzvn_decode_base.c | 721 ++++++ + fs/apfs/lzfse/lzvn_decode_base.h | 68 + + fs/apfs/lzfse/lzvn_encode_base.c | 593 +++++ + fs/apfs/lzfse/lzvn_encode_base.h | 116 + + fs/apfs/message.c | 29 + + fs/apfs/namei.c | 142 ++ + fs/apfs/node.c | 2069 ++++++++++++++++++ + fs/apfs/object.c | 315 +++ + fs/apfs/snapshot.c | 612 ++++++ + fs/apfs/spaceman.c | 1305 +++++++++++ + fs/apfs/super.c | 1788 +++++++++++++++ + fs/apfs/symlink.c | 80 + + fs/apfs/transaction.c | 978 +++++++++ + fs/apfs/unicode.c | 3157 +++++++++++++++++++++++++++ + fs/apfs/unicode.h | 27 + + fs/apfs/xattr.c | 922 ++++++++ + fs/apfs/xfield.c | 171 ++ + 40 files changed, 28948 insertions(+) + create mode 100644 fs/apfs/Makefile + create mode 100644 fs/apfs/apfs.h + create mode 100644 fs/apfs/apfs_raw.h + create mode 100644 fs/apfs/btree.c + create mode 100644 fs/apfs/compress.c + create mode 100644 fs/apfs/dir.c + create mode 100644 fs/apfs/extents.c + create mode 100644 fs/apfs/file.c + create mode 100644 fs/apfs/inode.c + create mode 100644 fs/apfs/key.c + create mode 100644 fs/apfs/libzbitmap.c + create mode 100644 fs/apfs/libzbitmap.h + create mode 100644 fs/apfs/lzfse/lzfse.h + create mode 100644 fs/apfs/lzfse/lzfse_decode.c + create mode 100644 fs/apfs/lzfse/lzfse_decode_base.c + create mode 100644 fs/apfs/lzfse/lzfse_encode.c + create mode 100644 fs/apfs/lzfse/lzfse_encode_base.c + create mode 100644 fs/apfs/lzfse/lzfse_encode_tables.h + create mode 100644 fs/apfs/lzfse/lzfse_fse.c + create mode 100644 fs/apfs/lzfse/lzfse_fse.h + create mode 100644 fs/apfs/lzfse/lzfse_internal.h + create mode 100644 fs/apfs/lzfse/lzfse_main.c + create mode 100644 fs/apfs/lzfse/lzfse_tunables.h + create mode 100644 fs/apfs/lzfse/lzvn_decode_base.c + create mode 100644 fs/apfs/lzfse/lzvn_decode_base.h + create mode 100644 fs/apfs/lzfse/lzvn_encode_base.c + create mode 100644 fs/apfs/lzfse/lzvn_encode_base.h + create mode 100644 fs/apfs/message.c + create mode 100644 fs/apfs/namei.c + create mode 100644 fs/apfs/node.c + create mode 100644 fs/apfs/object.c + create mode 100644 fs/apfs/snapshot.c + create mode 100644 fs/apfs/spaceman.c + create mode 100644 fs/apfs/super.c + create mode 100644 fs/apfs/symlink.c + create mode 100644 fs/apfs/transaction.c + create mode 100644 fs/apfs/unicode.c + create mode 100644 fs/apfs/unicode.h + create mode 100644 fs/apfs/xattr.c + create mode 100644 fs/apfs/xfield.c + +diff --git a/fs/apfs/Makefile b/fs/apfs/Makefile +new file mode 100644 +index 000000000..ab4c49d55 +--- /dev/null ++++ b/fs/apfs/Makefile +@@ -0,0 +1,23 @@ ++# SPDX-License-Identifier: GPL-2.0-only ++# ++# Makefile for the out-of-tree Linux APFS module. ++# ++ ++KERNELRELEASE ?= $(shell uname -r) ++KERNEL_DIR ?= /lib/modules/$(KERNELRELEASE)/build ++PWD := $(shell pwd) ++ ++obj-m = apfs.o ++apfs-y := btree.o compress.o dir.o extents.o file.o inode.o key.o libzbitmap.o \ ++ lzfse/lzfse_decode.o lzfse/lzfse_decode_base.o lzfse/lzfse_fse.o \ ++ lzfse/lzvn_decode_base.o message.o namei.o node.o object.o snapshot.o \ ++ spaceman.o super.o symlink.o transaction.o unicode.o xattr.o xfield.o ++ ++default: ++ ./genver.sh ++ make -C $(KERNEL_DIR) M=$(PWD) ++install: ++ make -C $(KERNEL_DIR) M=$(PWD) modules_install ++clean: ++ rm -f version.h ++ make -C $(KERNEL_DIR) M=$(PWD) clean +diff --git a/fs/apfs/apfs.h b/fs/apfs/apfs.h +new file mode 100644 +index 000000000..b6d135c63 +--- /dev/null ++++ b/fs/apfs/apfs.h +@@ -0,0 +1,1188 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* ++ * Copyright (C) 2018 Ernesto A. Fernández ++ */ ++ ++#ifndef _APFS_H ++#define _APFS_H ++ ++#include ++#include ++#include ++#include ++#include ++#include "apfs_raw.h" ++ ++#define EFSBADCRC EBADMSG /* Bad CRC detected */ ++#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0) /* SB_RDONLY came in 4.14 */ ++#define SB_RDONLY MS_RDONLY ++#define SB_SILENT MS_SILENT ++#define SB_NOSEC MS_NOSEC ++#define SB_ACTIVE MS_ACTIVE ++static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags & SB_RDONLY; } ++#endif ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 3, 0) ++#define lockdep_assert_held_write(l) ((void)(l)) ++#endif ++ ++/* Compatibility wrapper around submit_bh() */ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 0, 0) ++#define apfs_submit_bh(op, op_flags, bh) submit_bh(op, op_flags, bh) ++#else ++#define apfs_submit_bh(op, op_flags, bh) submit_bh(op | op_flags, bh) ++#endif ++ ++/* ++ * Parameter for the snapshot creation ioctl ++ */ ++struct apfs_ioctl_snap_name { ++ char name[APFS_SNAP_MAX_NAMELEN + 1]; ++}; ++ ++#define APFS_IOC_SET_DFLT_PFK _IOW('@', 0x80, struct apfs_wrapped_crypto_state) ++#define APFS_IOC_SET_DIR_CLASS _IOW('@', 0x81, u32) ++#define APFS_IOC_SET_PFK _IOW('@', 0x82, struct apfs_wrapped_crypto_state) ++#define APFS_IOC_GET_CLASS _IOR('@', 0x83, u32) ++#define APFS_IOC_GET_PFK _IOR('@', 0x84, struct apfs_wrapped_crypto_state) ++#define APFS_IOC_TAKE_SNAPSHOT _IOW('@', 0x85, struct apfs_ioctl_snap_name) ++ ++/* ++ * In-memory representation of an APFS object ++ */ ++struct apfs_object { ++ struct super_block *sb; ++ u64 block_nr; ++ u64 oid; ++ ++ /* ++ * Buffer head containing the one block of the object, may be NULL if ++ * the object is only in memory. TODO: support objects with more than ++ * one block. ++ */ ++ struct buffer_head *o_bh; ++ char *data; /* The raw object */ ++ bool ephemeral; /* Is this an ephemeral object? */ ++}; ++ ++/* Constants used in managing the size of a node's table of contents */ ++#define APFS_BTREE_TOC_ENTRY_INCREMENT 8 ++#define APFS_BTREE_TOC_ENTRY_MAX_UNUSED (2 * BTREE_TOC_ENTRY_INCREMENT) ++ ++/* ++ * In-memory representation of an APFS node ++ */ ++struct apfs_node { ++ u32 tree_type; /* Tree type (subtype of the node object) */ ++ u16 flags; /* Node flags */ ++ u32 records; /* Number of records in the node */ ++ ++ int key; /* Offset of the key area in the block */ ++ int free; /* Offset of the free area in the block */ ++ int data; /* Offset of the data area in the block */ ++ ++ int key_free_list_len; /* Length of the fragmented free key space */ ++ int val_free_list_len; /* Length of the fragmented free value space */ ++ ++ struct apfs_object object; /* Object holding the node */ ++}; ++ ++/** ++ * apfs_node_is_leaf - Check if a b-tree node is a leaf ++ * @node: the node to check ++ */ ++static inline bool apfs_node_is_leaf(struct apfs_node *node) ++{ ++ return (node->flags & APFS_BTNODE_LEAF) != 0; ++} ++ ++/** ++ * apfs_node_is_root - Check if a b-tree node is the root ++ * @node: the node to check ++ */ ++static inline bool apfs_node_is_root(struct apfs_node *node) ++{ ++ return (node->flags & APFS_BTNODE_ROOT) != 0; ++} ++ ++/** ++ * apfs_node_has_fixed_kv_size - Check if a b-tree node has fixed key/value ++ * sizes ++ * @node: the node to check ++ */ ++static inline bool apfs_node_has_fixed_kv_size(struct apfs_node *node) ++{ ++ return (node->flags & APFS_BTNODE_FIXED_KV_SIZE) != 0; ++} ++ ++/* ++ * Space manager data in memory. ++ */ ++struct apfs_spaceman { ++ struct apfs_spaceman_phys *sm_raw; /* On-disk spaceman structure */ ++ struct apfs_nxsb_info *sm_nxi; /* Container superblock */ ++ u32 sm_size; /* Size of @sm_raw in bytes */ ++ ++ u32 sm_blocks_per_chunk; /* Blocks covered by a bitmap block */ ++ u32 sm_chunks_per_cib; /* Chunk count in a chunk-info block */ ++ u64 sm_block_count; /* Block count for the container */ ++ u64 sm_chunk_count; /* Number of bitmap blocks */ ++ u32 sm_cib_count; /* Number of chunk-info blocks */ ++ u64 sm_free_count; /* Number of free blocks */ ++ u32 sm_addr_offset; /* Offset of cib addresses in @sm_raw */ ++ ++ /* ++ * A range of freed blocks not yet put in the free queue. Extend this as ++ * much as possible before creating an actual record. ++ */ ++ u64 sm_free_cache_base; ++ u64 sm_free_cache_blkcnt; ++ ++ /* Shift to match an ip block with its bitmap in the array */ ++ int sm_ip_bmaps_shift; ++ /* Mask to find an ip block's offset inside its ip bitmap */ ++ u32 sm_ip_bmaps_mask; ++ /* Number of ip bitmaps */ ++ u32 sm_ip_bmaps_count; ++ /* List of ip bitmaps, in order */ ++ struct buffer_head *sm_ip_bmaps[]; ++}; ++ ++#define TRANSACTION_MAIN_QUEUE_MAX 4096 ++#define TRANSACTION_BUFFERS_MAX 65536 ++#define TRANSACTION_STARTS_MAX 65536 ++ ++/* Possible states for the container transaction structure */ ++#define APFS_NX_TRANS_FORCE_COMMIT 1 /* Commit guaranteed */ ++#define APFS_NX_TRANS_DEFER_COMMIT 2 /* Commit banned right now */ ++#define APFS_NX_TRANS_COMMITTING 4 /* Commit ongoing */ ++#define APFS_NX_TRANS_INCOMPLETE_BLOCK 8 /* A data block is not written in full */ ++ ++/* ++ * Structure that keeps track of a container transaction. ++ */ ++struct apfs_nx_transaction { ++ unsigned int t_state; ++ ++ struct list_head t_inodes; /* List of inodes in the transaction */ ++ struct list_head t_buffers; /* List of buffers in the transaction */ ++ size_t t_buffers_count; /* Count of items on the list */ ++ int t_starts_count; /* Count of starts for transaction */ ++}; ++ ++/* ++ * Structure that keeps track of a volume transaction. ++ */ ++struct apfs_vol_transaction { ++ struct buffer_head *t_old_vsb; /* Volume superblock being replaced */ ++ ++ struct apfs_node t_old_omap_root; /* Omap root node being replaced */ ++ struct apfs_node t_old_cat_root; /* Catalog root node being replaced */ ++}; ++ ++/* State bits for buffer heads in a transaction */ ++#define BH_TRANS BH_PrivateStart /* Attached to a transaction */ ++#define BH_CSUM (BH_PrivateStart + 1) /* Requires checksum update */ ++BUFFER_FNS(TRANS, trans); ++BUFFER_FNS(CSUM, csum); ++ ++/* ++ * Additional information for a buffer in a transaction. ++ */ ++struct apfs_bh_info { ++ struct buffer_head *bh; /* The buffer head */ ++ struct list_head list; /* List of buffers in the transaction */ ++}; ++ ++/* ++ * Used to report how many operations may be needed for a transaction ++ */ ++struct apfs_max_ops { ++ int cat; /* Maximum catalog records that may need changing */ ++ int blks; /* Maximum extent blocks that may need changing */ ++}; ++ ++/* ++ * List entry for an in-memory ephemeral object ++ */ ++struct apfs_ephemeral_object_info { ++ u64 oid; /* Ephemeral object id */ ++ u32 size; /* Size of the object in bytes */ ++ void *object; /* In-memory address of the object */ ++}; ++ ++/* ++ * We allocate a fixed space for the list of ephemeral objects. I don't ++ * actually know how big this should be allowed to get, but all the objects ++ * must be written down with each transaction commit, so probably not too big. ++ */ ++#define APFS_EPHEMERAL_LIST_SIZE 32768 ++#define APFS_EPHEMERAL_LIST_LIMIT (APFS_EPHEMERAL_LIST_SIZE / sizeof(struct apfs_ephemeral_object_info)) ++ ++/* Mount option flags for a container */ ++#define APFS_CHECK_NODES 1 ++#define APFS_READWRITE 2 ++ ++/* ++ * Container superblock data in memory ++ */ ++struct apfs_nxsb_info { ++ struct block_device *nx_bdev; /* Device for the container */ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 9, 0) ++ struct file *nx_bdev_file; ++#elif LINUX_VERSION_CODE >= KERNEL_VERSION(6, 8, 0) ++ struct bdev_handle *nx_bdev_handle; ++#endif ++ struct apfs_nx_superblock *nx_raw; /* On-disk main sb */ ++ u64 nx_bno; /* Current block number for the checkpoint superblock */ ++ u64 nx_xid; /* Latest transaction id */ ++ ++ /* List of ephemeral objects in memory (except the superblock) */ ++ struct apfs_ephemeral_object_info *nx_eph_list; ++ int nx_eph_count; ++ ++ struct list_head vol_list; /* List of mounted volumes in container */ ++ ++ unsigned int nx_flags; /* Mount options shared by all volumes */ ++ unsigned int nx_refcnt; /* Number of mounted volumes in container */ ++ ++ /* TODO: handle block sizes above the maximum of PAGE_SIZE? */ ++ unsigned long nx_blocksize; ++ unsigned char nx_blocksize_bits; ++ ++ struct apfs_spaceman *nx_spaceman; ++ struct apfs_nx_transaction nx_transaction; ++ ++ /* For now, a single semaphore for every operation */ ++ struct rw_semaphore nx_big_sem; ++ ++ /* List of currently mounted containers */ ++ struct list_head nx_list; ++}; ++ ++extern struct mutex nxs_mutex; ++ ++/* ++ * Omap mapping in memory. ++ * TODO: could this and apfs_omap_rec be the same struct? ++ */ ++struct apfs_omap_map { ++ u64 xid; ++ u64 bno; ++ u32 flags; ++}; ++ ++/* ++ * Omap record data in memory ++ */ ++struct apfs_omap_rec { ++ u64 oid; ++ u64 bno; ++}; ++ ++#define APFS_OMAP_CACHE_SLOTS 128 ++#define APFS_OMAP_CACHE_SLOT_MASK (APFS_OMAP_CACHE_SLOTS - 1) ++ ++/** ++ * Cache of omap records ++ */ ++struct apfs_omap_cache { ++ struct apfs_omap_rec recs[APFS_OMAP_CACHE_SLOTS]; ++ bool disabled; ++ spinlock_t lock; ++}; ++ ++/* ++ * Omap structure shared by all snapshots for the same volume. ++ */ ++struct apfs_omap { ++ struct apfs_node *omap_root; ++ struct apfs_omap_cache omap_cache; ++ ++ /* Transaction id for most recent snapshot */ ++ u64 omap_latest_snap; ++ ++ /* Number of snapshots sharing this omap */ ++ unsigned int omap_refcnt; ++}; ++ ++/* ++ * Volume superblock data in memory ++ */ ++struct apfs_sb_info { ++ struct apfs_nxsb_info *s_nxi; /* In-memory container sb for volume */ ++ struct list_head list; /* List of mounted volumes in container */ ++ struct apfs_superblock *s_vsb_raw; /* On-disk volume sb */ ++ ++ dev_t s_anon_dev; /* Anonymous device for this volume-snapshot */ ++ ++ char *s_snap_name; /* Label for the mounted snapshot */ ++ u64 s_snap_xid; /* Transaction id for mounted snapshot */ ++ ++ struct apfs_node *s_cat_root; /* Root of the catalog tree */ ++ struct apfs_omap *s_omap; /* The object map */ ++ ++ struct apfs_object s_vobject; /* Volume superblock object */ ++ ++ /* Mount options */ ++ unsigned int s_vol_nr; /* Index of the volume in the sb list */ ++ kuid_t s_uid; /* uid to override on-disk uid */ ++ kgid_t s_gid; /* gid to override on-disk gid */ ++ ++ struct apfs_crypto_state_val *s_dflt_pfk; /* default per-file key */ ++ ++ struct apfs_vol_transaction s_transaction; ++ int s_trans_buffers_max; ++ ++ struct inode *s_private_dir; /* Inode for the private directory */ ++ struct work_struct s_orphan_cleanup_work; ++}; ++ ++static inline struct apfs_sb_info *APFS_SB(struct super_block *sb) ++{ ++ return sb->s_fs_info; ++} ++ ++static inline bool apfs_is_sealed(struct super_block *sb) ++{ ++ u64 flags = le64_to_cpu(APFS_SB(sb)->s_vsb_raw->apfs_incompatible_features); ++ ++ return flags & APFS_INCOMPAT_SEALED_VOLUME; ++} ++ ++/** ++ * apfs_vol_is_encrypted - Check if a volume is encrypting files ++ * @sb: superblock ++ */ ++static inline bool apfs_vol_is_encrypted(struct super_block *sb) ++{ ++ struct apfs_superblock *vsb_raw = APFS_SB(sb)->s_vsb_raw; ++ ++ return (vsb_raw->apfs_fs_flags & cpu_to_le64(APFS_FS_UNENCRYPTED)) == 0; ++} ++ ++/** ++ * APFS_NXI - Get the shared container info for a volume's superblock ++ * @sb: superblock structure ++ */ ++static inline struct apfs_nxsb_info *APFS_NXI(struct super_block *sb) ++{ ++ return APFS_SB(sb)->s_nxi; ++} ++ ++/** ++ * APFS_SM - Get the shared spaceman struct for a volume's superblock ++ * @sb: superblock structure ++ */ ++static inline struct apfs_spaceman *APFS_SM(struct super_block *sb) ++{ ++ return APFS_NXI(sb)->nx_spaceman; ++} ++ ++static inline bool apfs_is_case_insensitive(struct super_block *sb) ++{ ++ return (APFS_SB(sb)->s_vsb_raw->apfs_incompatible_features & ++ cpu_to_le64(APFS_INCOMPAT_CASE_INSENSITIVE)) != 0; ++} ++ ++static inline bool apfs_is_normalization_insensitive(struct super_block *sb) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ u64 flags = le64_to_cpu(sbi->s_vsb_raw->apfs_incompatible_features); ++ ++ if (apfs_is_case_insensitive(sb)) ++ return true; ++ if (flags & APFS_INCOMPAT_NORMALIZATION_INSENSITIVE) ++ return true; ++ return false; ++} ++ ++/** ++ * apfs_max_maps_per_block - Find the maximum map count for a mapping block ++ * @sb: superblock structure ++ */ ++static inline int apfs_max_maps_per_block(struct super_block *sb) ++{ ++ unsigned long maps_size; ++ ++ maps_size = (sb->s_blocksize - sizeof(struct apfs_checkpoint_map_phys)); ++ return maps_size / sizeof(struct apfs_checkpoint_mapping); ++} ++ ++/* ++ * In-memory representation of a key, as relevant for a b-tree query. ++ */ ++struct apfs_key { ++ u64 id; ++ u64 number; /* Extent offset, name hash or transaction id */ ++ const char *name; /* On-disk name string */ ++ u8 type; /* Record type (0 for the omap) */ ++}; ++ ++/** ++ * apfs_init_free_queue_key - Initialize an in-memory key for a free queue query ++ * @xid: transaction id ++ * @paddr: block number ++ * @key: apfs_key structure to initialize ++ */ ++static inline void apfs_init_free_queue_key(u64 xid, u64 paddr, ++ struct apfs_key *key) ++{ ++ key->id = xid; ++ key->type = 0; ++ key->number = paddr; ++ key->name = NULL; ++} ++ ++/** ++ * apfs_init_omap_key - Initialize an in-memory key for an omap query ++ * @oid: object id ++ * @xid: latest transaction id ++ * @key: apfs_key structure to initialize ++ */ ++static inline void apfs_init_omap_key(u64 oid, u64 xid, struct apfs_key *key) ++{ ++ key->id = oid; ++ key->type = 0; ++ key->number = xid; ++ key->name = NULL; ++} ++ ++/** ++ * apfs_init_extent_key - Initialize an in-memory key for an extentref query ++ * @bno: physical block number for the start of the extent ++ * @key: apfs_key structure to initialize ++ */ ++static inline void apfs_init_extent_key(u64 bno, struct apfs_key *key) ++{ ++ key->id = bno; ++ key->type = APFS_TYPE_EXTENT; ++ key->number = 0; ++ key->name = NULL; ++} ++ ++/** ++ * apfs_init_inode_key - Initialize an in-memory key for an inode query ++ * @ino: inode number ++ * @key: apfs_key structure to initialize ++ */ ++static inline void apfs_init_inode_key(u64 ino, struct apfs_key *key) ++{ ++ key->id = ino; ++ key->type = APFS_TYPE_INODE; ++ key->number = 0; ++ key->name = NULL; ++} ++ ++/** ++ * apfs_init_file_extent_key - Initialize an in-memory key for an extent query ++ * @id: extent id ++ * @offset: logical address (0 for a multiple query) ++ * @key: apfs_key structure to initialize ++ */ ++static inline void apfs_init_file_extent_key(u64 id, u64 offset, ++ struct apfs_key *key) ++{ ++ key->id = id; ++ key->type = APFS_TYPE_FILE_EXTENT; ++ key->number = offset; ++ key->name = NULL; ++} ++ ++static inline void apfs_init_fext_key(u64 id, u64 offset, struct apfs_key *key) ++{ ++ key->id = id; ++ key->type = 0; ++ key->number = offset; ++ key->name = NULL; ++} ++ ++/** ++ * apfs_init_dstream_id_key - Initialize an in-memory key for a dstream query ++ * @id: data stream id ++ * @key: apfs_key structure to initialize ++ */ ++static inline void apfs_init_dstream_id_key(u64 id, struct apfs_key *key) ++{ ++ key->id = id; ++ key->type = APFS_TYPE_DSTREAM_ID; ++ key->number = 0; ++ key->name = NULL; ++} ++ ++/** ++ * apfs_init_crypto_state_key - Initialize an in-memory key for a crypto query ++ * @id: crypto state id ++ * @key: apfs_key structure to initialize ++ */ ++static inline void apfs_init_crypto_state_key(u64 id, struct apfs_key *key) ++{ ++ key->id = id; ++ key->type = APFS_TYPE_CRYPTO_STATE; ++ key->number = 0; ++ key->name = NULL; ++} ++ ++/** ++ * apfs_init_sibling_link_key - Initialize an in-memory key for a sibling query ++ * @ino: inode number ++ * @id: sibling id ++ * @key: apfs_key structure to initialize ++ */ ++static inline void apfs_init_sibling_link_key(u64 ino, u64 id, ++ struct apfs_key *key) ++{ ++ key->id = ino; ++ key->type = APFS_TYPE_SIBLING_LINK; ++ key->number = id; /* Only guessing (TODO) */ ++ key->name = NULL; ++} ++ ++/** ++ * apfs_init_sibling_map_key - Initialize in-memory key for a sibling map query ++ * @id: sibling id ++ * @key: apfs_key structure to initialize ++ */ ++static inline void apfs_init_sibling_map_key(u64 id, struct apfs_key *key) ++{ ++ key->id = id; ++ key->type = APFS_TYPE_SIBLING_MAP; ++ key->number = 0; ++ key->name = NULL; ++} ++ ++extern void apfs_init_drec_key(struct super_block *sb, u64 ino, const char *name, ++ unsigned int name_len, struct apfs_key *key); ++ ++/** ++ * apfs_init_xattr_key - Initialize an in-memory key for a xattr query ++ * @ino: inode number of the parent file ++ * @name: xattr name (NULL for a multiple query) ++ * @key: apfs_key structure to initialize ++ */ ++static inline void apfs_init_xattr_key(u64 ino, const char *name, ++ struct apfs_key *key) ++{ ++ key->id = ino; ++ key->type = APFS_TYPE_XATTR; ++ key->number = 0; ++ key->name = name; ++} ++ ++static inline void apfs_init_snap_metadata_key(u64 xid, struct apfs_key *key) ++{ ++ key->id = xid; ++ key->type = APFS_TYPE_SNAP_METADATA; ++ key->number = 0; ++ key->name = NULL; ++} ++ ++static inline void apfs_init_snap_name_key(const char *name, struct apfs_key *key) ++{ ++ key->id = APFS_SNAP_NAME_OBJ_ID; ++ key->type = APFS_TYPE_SNAP_NAME; ++ key->number = 0; ++ key->name = name; ++} ++ ++static inline void apfs_init_omap_snap_key(u64 xid, struct apfs_key *key) ++{ ++ key->id = xid; ++ key->type = 0; ++ key->number = 0; ++ key->name = NULL; ++} ++ ++/** ++ * apfs_key_set_hdr - Set the header for a raw catalog key ++ * @type: record type ++ * @id: record id ++ * @key: the key to initialize ++ */ ++static inline void apfs_key_set_hdr(u64 type, u64 id, void *key) ++{ ++ struct apfs_key_header *hdr = key; ++ ++ hdr->obj_id_and_type = cpu_to_le64(id | type << APFS_OBJ_TYPE_SHIFT); ++} ++ ++/** ++ * apfs_cat_type - Read the record type of a catalog key ++ * @key: the raw catalog key ++ */ ++static inline int apfs_cat_type(struct apfs_key_header *key) ++{ ++ return (le64_to_cpu(key->obj_id_and_type) & APFS_OBJ_TYPE_MASK) >> APFS_OBJ_TYPE_SHIFT; ++} ++ ++/** ++ * apfs_cat_cnid - Read the cnid value on a catalog key ++ * @key: the raw catalog key ++ * ++ * TODO: rename this function, since it's not just for the catalog anymore ++ */ ++static inline u64 apfs_cat_cnid(struct apfs_key_header *key) ++{ ++ return le64_to_cpu(key->obj_id_and_type) & APFS_OBJ_ID_MASK; ++} ++ ++/* Flags for the query structure */ ++#define APFS_QUERY_TREE_MASK 000177 /* Which b-tree we query */ ++#define APFS_QUERY_OMAP 000001 /* This is a b-tree object map query */ ++#define APFS_QUERY_CAT 000002 /* This is a catalog tree query */ ++#define APFS_QUERY_FREE_QUEUE 000004 /* This is a free queue query */ ++#define APFS_QUERY_EXTENTREF 000010 /* This is an extent reference query */ ++#define APFS_QUERY_FEXT 000020 /* This is a fext tree query */ ++#define APFS_QUERY_SNAP_META 000040 /* This is a snapshot meta query */ ++#define APFS_QUERY_OMAP_SNAP 000100 /* This is an omap snapshots query */ ++#define APFS_QUERY_NEXT 000200 /* Find next of multiple matches */ ++#define APFS_QUERY_EXACT 000400 /* Search for an exact match */ ++#define APFS_QUERY_DONE 001000 /* The search at this level is over */ ++#define APFS_QUERY_ANY_NAME 002000 /* Multiple search for any name */ ++#define APFS_QUERY_ANY_NUMBER 004000 /* Multiple search for any number */ ++#define APFS_QUERY_MULTIPLE (APFS_QUERY_ANY_NAME | APFS_QUERY_ANY_NUMBER) ++#define APFS_QUERY_PREV 010000 /* Find previous record */ ++ ++/* ++ * Structure used to retrieve data from an APFS B-Tree. For now only used ++ * on the calalog and the object map. ++ */ ++struct apfs_query { ++ struct apfs_node *node; /* Node being searched */ ++ struct apfs_key key; /* What the query is looking for */ ++ ++ struct apfs_query *parent; /* Query for parent node */ ++ unsigned int flags; ++ ++ /* Set by the query on success */ ++ int index; /* Index of the entry in the node */ ++ int key_off; /* Offset of the key in the node */ ++ int key_len; /* Length of the key */ ++ int off; /* Offset of the data in the node */ ++ int len; /* Length of the data */ ++ ++ int depth; /* Put a limit on recursion */ ++}; ++ ++/** ++ * apfs_query_storage - Get the storage type for a query's btree ++ * @query: the query structure ++ */ ++static inline u32 apfs_query_storage(struct apfs_query *query) ++{ ++ if (query->flags & APFS_QUERY_OMAP) ++ return APFS_OBJ_PHYSICAL; ++ if (query->flags & APFS_QUERY_CAT) ++ return APFS_OBJ_VIRTUAL; ++ if (query->flags & APFS_QUERY_FEXT) ++ return APFS_OBJ_PHYSICAL; ++ if (query->flags & APFS_QUERY_FREE_QUEUE) ++ return APFS_OBJ_EPHEMERAL; ++ if (query->flags & APFS_QUERY_EXTENTREF) ++ return APFS_OBJ_PHYSICAL; ++ if (query->flags & APFS_QUERY_SNAP_META) ++ return APFS_OBJ_PHYSICAL; ++ if (query->flags & APFS_QUERY_OMAP_SNAP) ++ return APFS_OBJ_PHYSICAL; ++ ++ /* Absurd, but don't panic: let the callers fail and report it */ ++ return -1; ++} ++ ++/* ++ * Extent record data in memory ++ */ ++struct apfs_file_extent { ++ u64 logical_addr; ++ u64 phys_block_num; ++ u64 len; ++ u64 crypto_id; ++}; ++ ++/* ++ * Physical extent record data in memory ++ */ ++struct apfs_phys_extent { ++ u64 bno; ++ u64 blkcount; ++ u64 len; /* In bytes */ ++ u32 refcnt; ++ u8 kind; ++}; ++ ++/* ++ * Data stream info in memory ++ */ ++struct apfs_dstream_info { ++ struct super_block *ds_sb; /* Filesystem superblock */ ++ struct inode *ds_inode; /* NULL for xattr dstreams */ ++ u64 ds_id; /* ID of the extent records */ ++ u64 ds_size; /* Length of the stream */ ++ u64 ds_sparse_bytes;/* Hole byte count in stream */ ++ struct apfs_file_extent ds_cached_ext; /* Latest extent record */ ++ bool ds_ext_dirty; /* Is ds_cached_ext dirty? */ ++ spinlock_t ds_ext_lock; /* Protects ds_cached_ext */ ++ bool ds_shared; /* Has multiple references? */ ++}; ++ ++/** ++ * apfs_alloced_size - Return the alloced size for a data stream ++ * @dstream: data stream info ++ * ++ * TODO: is this always correct? Or could the extents have an unused tail? ++ */ ++static inline u64 apfs_alloced_size(struct apfs_dstream_info *dstream) ++{ ++ struct super_block *sb = dstream->ds_sb; ++ u64 blks = (dstream->ds_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; ++ ++ return blks << sb->s_blocksize_bits; ++} ++ ++/* ++ * APFS inode data in memory ++ */ ++struct apfs_inode_info { ++ u64 i_ino64; /* 32-bit-safe inode number */ ++ u64 i_parent_id; /* ID of primary parent */ ++ struct timespec64 i_crtime; /* Time of creation */ ++ u32 i_nchildren; /* Child count for directory */ ++ uid_t i_saved_uid; /* User ID on disk */ ++ gid_t i_saved_gid; /* Group ID on disk */ ++ u32 i_key_class; /* Security class for directory */ ++ u64 i_int_flags; /* Internal flags */ ++ u32 i_bsd_flags; /* BSD flags */ ++ struct list_head i_list; /* List of inodes in transaction */ ++ ++ bool i_has_dstream; /* Is there a dstream record? */ ++ struct apfs_dstream_info i_dstream; /* Dstream data, if any */ ++ ++ bool i_cleaned; /* Orphan data already deleted */ ++ ++ struct inode vfs_inode; ++}; ++ ++static inline struct apfs_inode_info *APFS_I(const struct inode *inode) ++{ ++ return container_of(inode, struct apfs_inode_info, vfs_inode); ++} ++ ++/** ++ * apfs_ino - Get the 64-bit id of an inode ++ * @inode: the vfs inode ++ * ++ * Returns all 64 bits of @inode's id, even on 32-bit architectures. ++ */ ++static inline u64 apfs_ino(const struct inode *inode) ++{ ++ return APFS_I(inode)->i_ino64; ++} ++ ++/** ++ * apfs_set_ino - Set a 64-bit id on an inode ++ * @inode: the vfs inode ++ * @id: id to set ++ * ++ * Sets both the vfs inode number and the actual 32-bit-safe id. ++ */ ++static inline void apfs_set_ino(struct inode *inode, u64 id) ++{ ++ inode->i_ino = id; /* Higher bits may be lost, but it doesn't matter */ ++ APFS_I(inode)->i_ino64 = id; ++} ++ ++/* Make the compiler complain if we ever access i_ino directly by mistake */ ++#define i_ino DONT_USE_I_INO ++ ++/* ++ * Directory entry record in memory ++ */ ++struct apfs_drec { ++ u8 *name; ++ u64 ino; ++ u64 sibling_id; /* The sibling id; 0 if none */ ++ int name_len; ++ unsigned int type; ++}; ++ ++/* ++ * Xattr record data in memory ++ */ ++struct apfs_xattr { ++ u8 *name; ++ u8 *xdata; ++ int name_len; ++ int xdata_len; ++ bool has_dstream; ++}; ++ ++struct apfs_compressed_data { ++ bool has_dstream; ++ u64 size; ++ union { ++ struct apfs_dstream_info *dstream; ++ void *data; ++ }; ++}; ++ ++/* ++ * Report function name and line number for the message types that are likely ++ * to signal a bug, to make things easier for reporters. Don't do this for the ++ * common messages, there is no point and it makes the console look too busy. ++ */ ++#define apfs_emerg(sb, fmt, ...) apfs_msg(sb, KERN_EMERG, __func__, __LINE__, fmt, ##__VA_ARGS__) ++#define apfs_alert(sb, fmt, ...) apfs_msg(sb, KERN_ALERT, __func__, __LINE__, fmt, ##__VA_ARGS__) ++#define apfs_crit(sb, fmt, ...) apfs_msg(sb, KERN_CRIT, __func__, __LINE__, fmt, ##__VA_ARGS__) ++#define apfs_err(sb, fmt, ...) apfs_msg(sb, KERN_ERR, __func__, __LINE__, fmt, ##__VA_ARGS__) ++#define apfs_warn(sb, fmt, ...) apfs_msg(sb, KERN_WARNING, NULL, 0, fmt, ##__VA_ARGS__) ++#define apfs_notice(sb, fmt, ...) apfs_msg(sb, KERN_NOTICE, NULL, 0, fmt, ##__VA_ARGS__) ++#define apfs_info(sb, fmt, ...) apfs_msg(sb, KERN_INFO, NULL, 0, fmt, ##__VA_ARGS__) ++ ++#ifdef CONFIG_APFS_DEBUG ++#define ASSERT(expr) WARN_ON(!(expr)) ++#define apfs_debug(sb, fmt, ...) apfs_msg(sb, KERN_DEBUG, __func__, __LINE__, fmt, ##__VA_ARGS__) ++#else ++#define ASSERT(expr) ((void)0) ++#define apfs_debug(sb, fmt, ...) no_printk(fmt, ##__VA_ARGS__) ++#endif /* CONFIG_APFS_DEBUG */ ++ ++/** ++ * apfs_assert_in_transaction - Assert that the object is in current transaction ++ * @sb: superblock structure ++ * @obj: on-disk object to check ++ */ ++#define apfs_assert_in_transaction(sb, obj) \ ++do { \ ++ (void)sb; \ ++ (void)obj; \ ++ ASSERT(le64_to_cpu((obj)->o_xid) == APFS_NXI(sb)->nx_xid); \ ++} while (0) ++ ++/* btree.c */ ++extern struct apfs_node *apfs_query_root(const struct apfs_query *query); ++extern struct apfs_query *apfs_alloc_query(struct apfs_node *node, ++ struct apfs_query *parent); ++extern void apfs_free_query(struct apfs_query *query); ++extern int apfs_btree_query(struct super_block *sb, struct apfs_query **query); ++extern int apfs_omap_lookup_block(struct super_block *sb, struct apfs_omap *omap, u64 id, u64 *block, bool write); ++extern int apfs_omap_lookup_newest_block(struct super_block *sb, struct apfs_omap *omap, u64 id, u64 *block, bool write); ++extern int apfs_create_omap_rec(struct super_block *sb, u64 oid, u64 bno); ++extern int apfs_delete_omap_rec(struct super_block *sb, u64 oid); ++extern int apfs_query_join_transaction(struct apfs_query *query); ++extern int __apfs_btree_insert(struct apfs_query *query, void *key, int key_len, void *val, int val_len); ++extern int apfs_btree_insert(struct apfs_query *query, void *key, int key_len, ++ void *val, int val_len); ++extern int apfs_btree_remove(struct apfs_query *query); ++extern void apfs_btree_change_node_count(struct apfs_query *query, int change); ++extern int apfs_btree_replace(struct apfs_query *query, void *key, int key_len, ++ void *val, int val_len); ++extern void apfs_query_direct_forward(struct apfs_query *query); ++ ++/* compress.c */ ++extern int apfs_compress_get_size(struct inode *inode, loff_t *size); ++ ++/* dir.c */ ++extern int apfs_inode_by_name(struct inode *dir, const struct qstr *child, ++ u64 *ino); ++extern int apfs_mkany(struct inode *dir, struct dentry *dentry, ++ umode_t mode, dev_t rdev, const char *symname); ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0) ++extern int apfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, ++ dev_t rdev); ++extern int apfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); ++extern int apfs_rename(struct inode *old_dir, struct dentry *old_dentry, ++ struct inode *new_dir, struct dentry *new_dentry, ++ unsigned int flags); ++extern int apfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, ++ bool excl); ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 3, 0) ++extern int apfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, ++ struct dentry *dentry, umode_t mode, dev_t rdev); ++extern int apfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, ++ struct dentry *dentry, umode_t mode); ++extern int apfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, ++ struct dentry *old_dentry, struct inode *new_dir, ++ struct dentry *new_dentry, unsigned int flags); ++extern int apfs_create(struct user_namespace *mnt_userns, struct inode *dir, ++ struct dentry *dentry, umode_t mode, bool excl); ++#else ++extern int apfs_mknod(struct mnt_idmap *idmap, struct inode *dir, ++ struct dentry *dentry, umode_t mode, dev_t rdev); ++extern int apfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, ++ struct dentry *dentry, umode_t mode); ++extern int apfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, ++ struct dentry *old_dentry, struct inode *new_dir, ++ struct dentry *new_dentry, unsigned int flags); ++extern int apfs_create(struct mnt_idmap *idmap, struct inode *dir, ++ struct dentry *dentry, umode_t mode, bool excl); ++#endif ++ ++extern int apfs_link(struct dentry *old_dentry, struct inode *dir, ++ struct dentry *dentry); ++extern int apfs_unlink(struct inode *dir, struct dentry *dentry); ++extern int apfs_rmdir(struct inode *dir, struct dentry *dentry); ++extern int apfs_delete_orphan_link(struct inode *inode); ++extern int APFS_DELETE_ORPHAN_LINK_MAXOPS(void); ++extern u64 apfs_any_orphan_ino(struct super_block *sb, u64 *ino_p); ++ ++/* extents.c */ ++extern int apfs_extent_from_query(struct apfs_query *query, ++ struct apfs_file_extent *extent); ++extern int apfs_logic_to_phys_bno(struct apfs_dstream_info *dstream, sector_t dsblock, u64 *bno); ++extern int __apfs_get_block(struct apfs_dstream_info *dstream, sector_t iblock, ++ struct buffer_head *bh_result, int create); ++extern int apfs_get_block(struct inode *inode, sector_t iblock, ++ struct buffer_head *bh_result, int create); ++extern int apfs_flush_extent_cache(struct apfs_dstream_info *dstream); ++extern int apfs_dstream_get_new_bno(struct apfs_dstream_info *dstream, u64 dsblock, u64 *bno); ++extern int apfs_get_new_block(struct inode *inode, sector_t iblock, ++ struct buffer_head *bh_result, int create); ++extern int APFS_GET_NEW_BLOCK_MAXOPS(void); ++extern int apfs_truncate(struct apfs_dstream_info *dstream, loff_t new_size); ++extern int apfs_inode_delete_front(struct inode *inode); ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 20, 0) ++extern loff_t apfs_remap_file_range(struct file *src_file, loff_t off, struct file *dst_file, loff_t destoff, loff_t len, unsigned int remap_flags); ++#else ++extern int apfs_clone_file_range(struct file *src_file, loff_t off, struct file *dst_file, loff_t destoff, u64 len); ++#endif ++extern int apfs_clone_extents(struct apfs_dstream_info *dstream, u64 new_id); ++extern int apfs_nonsparse_dstream_read(struct apfs_dstream_info *dstream, void *buf, size_t count, u64 offset); ++extern void apfs_nonsparse_dstream_preread(struct apfs_dstream_info *dstream); ++ ++/* file.c */ ++extern int apfs_file_mmap(struct file *file, struct vm_area_struct *vma); ++extern int apfs_fsync(struct file *file, loff_t start, loff_t end, int datasync); ++ ++/* inode.c */ ++extern struct inode *apfs_iget(struct super_block *sb, u64 cnid); ++extern int apfs_update_inode(struct inode *inode, char *new_name); ++extern int APFS_UPDATE_INODE_MAXOPS(void); ++extern void apfs_orphan_cleanup_work(struct work_struct *work); ++extern void apfs_evict_inode(struct inode *inode); ++extern struct inode *apfs_new_inode(struct inode *dir, umode_t mode, ++ dev_t rdev); ++extern int apfs_create_inode_rec(struct super_block *sb, struct inode *inode, ++ struct dentry *dentry); ++extern int apfs_inode_create_exclusive_dstream(struct inode *inode); ++extern int APFS_CREATE_INODE_REC_MAXOPS(void); ++extern int __apfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned int len, unsigned int flags, struct page **pagep, void **fsdata); ++extern int __apfs_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned int len, unsigned int copied, struct page *page, void *fsdata); ++extern int apfs_dstream_adj_refcnt(struct apfs_dstream_info *dstream, u32 delta); ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0) ++extern int apfs_setattr(struct dentry *dentry, struct iattr *iattr); ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 3, 0) ++extern int apfs_setattr(struct user_namespace *mnt_userns, ++ struct dentry *dentry, struct iattr *iattr); ++#else ++extern int apfs_setattr(struct mnt_idmap *idmap, ++ struct dentry *dentry, struct iattr *iattr); ++#endif ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0) ++extern int apfs_update_time(struct inode *inode, struct timespec64 *time, int flags); ++#else ++extern int apfs_update_time(struct inode *inode, int flags); ++#endif ++long apfs_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg); ++long apfs_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) /* No statx yet... */ ++extern int apfs_getattr(struct vfsmount *mnt, struct dentry *dentry, ++ struct kstat *stat); ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0) ++extern int apfs_getattr(const struct path *path, struct kstat *stat, ++ u32 request_mask, unsigned int query_flags); ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 3, 0) ++extern int apfs_getattr(struct user_namespace *mnt_userns, ++ const struct path *path, struct kstat *stat, u32 request_mask, ++ unsigned int query_flags); ++#else ++extern int apfs_getattr(struct mnt_idmap *idmap, ++ const struct path *path, struct kstat *stat, u32 request_mask, ++ unsigned int query_flags); ++#endif ++ ++extern int apfs_crypto_adj_refcnt(struct super_block *sb, u64 crypto_id, int delta); ++extern int APFS_CRYPTO_ADJ_REFCNT_MAXOPS(void); ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 3, 0) ++extern int apfs_fileattr_get(struct dentry *dentry, struct fileattr *fa); ++extern int apfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); ++#elif LINUX_VERSION_CODE >= KERNEL_VERSION(5, 13, 0) ++extern int apfs_fileattr_get(struct dentry *dentry, struct fileattr *fa); ++extern int apfs_fileattr_set(struct user_namespace *mnt_userns, struct dentry *dentry, struct fileattr *fa); ++#endif ++ ++/* key.c */ ++extern int apfs_filename_cmp(struct super_block *sb, const char *name1, unsigned int len1, const char *name2, unsigned int len2); ++extern int apfs_keycmp(struct apfs_key *k1, struct apfs_key *k2); ++extern int apfs_read_cat_key(void *raw, int size, struct apfs_key *key, bool hashed); ++extern int apfs_read_fext_key(void *raw, int size, struct apfs_key *key); ++extern int apfs_read_free_queue_key(void *raw, int size, struct apfs_key *key); ++extern int apfs_read_omap_key(void *raw, int size, struct apfs_key *key); ++extern int apfs_read_extentref_key(void *raw, int size, struct apfs_key *key); ++extern int apfs_read_snap_meta_key(void *raw, int size, struct apfs_key *key); ++extern int apfs_read_omap_snap_key(void *raw, int size, struct apfs_key *key); ++ ++/* message.c */ ++extern __printf(5, 6) void apfs_msg(struct super_block *sb, const char *prefix, const char *func, int line, const char *fmt, ...); ++ ++/* node.c */ ++extern struct apfs_node *apfs_read_node(struct super_block *sb, u64 oid, ++ u32 storage, bool write); ++extern void apfs_update_node(struct apfs_node *node); ++extern int apfs_delete_node(struct apfs_node *node, int type); ++extern int apfs_node_query(struct super_block *sb, struct apfs_query *query); ++extern void apfs_node_query_first(struct apfs_query *query); ++extern int apfs_omap_map_from_query(struct apfs_query *query, struct apfs_omap_map *map); ++extern int apfs_node_split(struct apfs_query *query); ++extern int apfs_node_locate_key(struct apfs_node *node, int index, int *off); ++extern void apfs_node_free(struct apfs_node *node); ++extern void apfs_node_free_range(struct apfs_node *node, u16 off, u16 len); ++extern bool apfs_node_has_room(struct apfs_node *node, int length, bool replace); ++extern int apfs_node_replace(struct apfs_query *query, void *key, int key_len, void *val, int val_len); ++extern int apfs_node_insert(struct apfs_query *query, void *key, int key_len, void *val, int val_len); ++extern int apfs_create_single_rec_node(struct apfs_query *query, void *key, int key_len, void *val, int val_len); ++extern int apfs_make_empty_btree_root(struct super_block *sb, u32 subtype, u64 *oid); ++ ++/* object.c */ ++extern int apfs_obj_verify_csum(struct super_block *sb, struct buffer_head *bh); ++extern void apfs_obj_set_csum(struct super_block *sb, struct apfs_obj_phys *obj); ++extern int apfs_multiblock_verify_csum(char *object, u32 size); ++extern void apfs_multiblock_set_csum(char *object, u32 size); ++extern int apfs_create_cpm_block(struct super_block *sb, u64 bno, struct buffer_head **bh_p); ++extern int apfs_create_cpoint_map(struct super_block *sb, struct apfs_checkpoint_map_phys *cpm, struct apfs_obj_phys *obj, u64 bno, u32 size); ++extern struct apfs_ephemeral_object_info *apfs_ephemeral_object_lookup(struct super_block *sb, u64 oid); ++extern struct buffer_head *apfs_read_object_block(struct super_block *sb, u64 bno, bool write, bool preserve); ++extern u32 apfs_index_in_data_area(struct super_block *sb, u64 bno); ++extern u64 apfs_data_index_to_bno(struct super_block *sb, u32 index); ++ ++/* snapshot.c */ ++extern int apfs_ioc_take_snapshot(struct file *file, void __user *user_arg); ++extern int apfs_switch_to_snapshot(struct super_block *sb); ++ ++/* spaceman.c */ ++extern int apfs_read_spaceman(struct super_block *sb); ++extern int apfs_free_queue_insert_nocache(struct super_block *sb, u64 bno, u64 count); ++extern int apfs_free_queue_insert(struct super_block *sb, u64 bno, u64 count); ++extern int apfs_spaceman_allocate_block(struct super_block *sb, u64 *bno, bool backwards); ++ ++/* super.c */ ++extern int apfs_map_volume_super_bno(struct super_block *sb, u64 bno, bool check); ++extern int apfs_map_volume_super(struct super_block *sb, bool write); ++extern void apfs_unmap_volume_super(struct super_block *sb); ++extern int apfs_read_omap(struct super_block *sb, bool write); ++extern int apfs_read_catalog(struct super_block *sb, bool write); ++extern int apfs_sync_fs(struct super_block *sb, int wait); ++ ++/* transaction.c */ ++extern int apfs_cpoint_data_free(struct super_block *sb, u64 bno); ++extern int apfs_transaction_start(struct super_block *sb, struct apfs_max_ops maxops); ++extern int apfs_transaction_commit(struct super_block *sb); ++extern void apfs_inode_join_transaction(struct super_block *sb, struct inode *inode); ++extern int apfs_transaction_join(struct super_block *sb, ++ struct buffer_head *bh); ++void apfs_transaction_abort(struct super_block *sb); ++extern int apfs_transaction_flush_all_inodes(struct super_block *sb); ++ ++/* xattr.c */ ++extern int ____apfs_xattr_get(struct inode *inode, const char *name, void *buffer, ++ size_t size, bool only_whole); ++extern int __apfs_xattr_get(struct inode *inode, const char *name, void *buffer, ++ size_t size); ++extern int apfs_delete_all_xattrs(struct inode *inode); ++extern int apfs_xattr_set(struct inode *inode, const char *name, const void *value, ++ size_t size, int flags); ++extern int APFS_XATTR_SET_MAXOPS(void); ++extern ssize_t apfs_listxattr(struct dentry *dentry, char *buffer, size_t size); ++extern int apfs_xattr_get_compressed_data(struct inode *inode, const char *name, struct apfs_compressed_data *cdata); ++extern void apfs_release_compressed_data(struct apfs_compressed_data *cdata); ++extern int apfs_compressed_data_read(struct apfs_compressed_data *cdata, void *buf, size_t count, u64 offset); ++ ++/* xfield.c */ ++extern int apfs_find_xfield(u8 *xfields, int len, u8 xtype, char **xval); ++extern int apfs_init_xfields(u8 *buffer, int buflen); ++extern int apfs_insert_xfield(u8 *buffer, int buflen, ++ const struct apfs_x_field *xkey, ++ const void *xval); ++ ++/* ++ * Inode and file operations ++ */ ++ ++/* compress.c */ ++extern const struct address_space_operations apfs_compress_aops; ++extern const struct file_operations apfs_compress_file_operations; ++ ++/* dir.c */ ++extern const struct file_operations apfs_dir_operations; ++ ++/* file.c */ ++extern const struct file_operations apfs_file_operations; ++extern const struct inode_operations apfs_file_inode_operations; ++ ++/* namei.c */ ++extern const struct inode_operations apfs_dir_inode_operations; ++extern const struct inode_operations apfs_special_inode_operations; ++extern const struct dentry_operations apfs_dentry_operations; ++ ++/* symlink.c */ ++extern const struct inode_operations apfs_symlink_inode_operations; ++ ++/* xattr.c */ ++extern const struct xattr_handler *apfs_xattr_handlers[]; ++ ++/** ++ * apfs_assert_query_is_valid - Assert that all of a query's ancestors are set ++ * @query: the query to check ++ * ++ * A query may lose some of its ancestors during a node split, but nothing ++ * should be done to such a query until it gets refreshed. ++ */ ++static inline void apfs_assert_query_is_valid(const struct apfs_query *query) ++{ ++ ASSERT(apfs_node_is_root(apfs_query_root(query))); ++} ++ ++/* ++ * TODO: the following are modified variants of buffer head functions that will ++ * work with the shared block device for the container. The correct approach ++ * here would be to avoid buffer heads and use bios, but for now this will do. ++ */ ++ ++static inline void ++apfs_map_bh(struct buffer_head *bh, struct super_block *sb, sector_t block) ++{ ++ set_buffer_mapped(bh); ++ bh->b_bdev = APFS_NXI(sb)->nx_bdev; ++ bh->b_blocknr = block; ++ bh->b_size = sb->s_blocksize; ++} ++ ++static inline struct buffer_head * ++apfs_sb_bread(struct super_block *sb, sector_t block) ++{ ++ return __bread_gfp(APFS_NXI(sb)->nx_bdev, block, sb->s_blocksize, __GFP_MOVABLE); ++} ++ ++/* Use instead of apfs_sb_bread() for blocks that will just be overwritten */ ++static inline struct buffer_head * ++apfs_getblk(struct super_block *sb, sector_t block) ++{ ++ struct buffer_head *bh; ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 7, 0) ++ bh = __getblk_gfp(APFS_NXI(sb)->nx_bdev, block, sb->s_blocksize, __GFP_MOVABLE); ++#else ++ bh = bdev_getblk(APFS_NXI(sb)->nx_bdev, block, sb->s_blocksize, __GFP_MOVABLE); ++#endif ++ if (bh) ++ set_buffer_uptodate(bh); ++ return bh; ++} ++ ++#endif /* _APFS_H */ +diff --git a/fs/apfs/apfs_raw.h b/fs/apfs/apfs_raw.h +new file mode 100644 +index 000000000..2195e8a78 +--- /dev/null ++++ b/fs/apfs/apfs_raw.h +@@ -0,0 +1,1562 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* ++ * Copyright (C) 2019 Ernesto A. Fernández ++ * ++ * Definitions for all on-disk data structures. ++ */ ++ ++#ifndef _APFS_RAW_H ++#define _APFS_RAW_H ++ ++#include ++#include ++ ++/* Object identifiers constants */ ++#define APFS_OID_NX_SUPERBLOCK 1 ++#define APFS_OID_INVALID 0ULL ++#define APFS_OID_RESERVED_COUNT 1024 ++ ++/* Object type masks */ ++#define APFS_OBJECT_TYPE_MASK 0x0000ffff ++#define APFS_OBJECT_TYPE_FLAGS_MASK 0xffff0000 ++#define APFS_OBJ_STORAGETYPE_MASK 0xc0000000 ++#define APFS_OBJECT_TYPE_FLAGS_DEFINED_MASK 0xf8000000 ++ ++/* Object types */ ++#define APFS_OBJECT_TYPE_NX_SUPERBLOCK 0x00000001 ++#define APFS_OBJECT_TYPE_BTREE 0x00000002 ++#define APFS_OBJECT_TYPE_BTREE_NODE 0x00000003 ++#define APFS_OBJECT_TYPE_SPACEMAN 0x00000005 ++#define APFS_OBJECT_TYPE_SPACEMAN_CAB 0x00000006 ++#define APFS_OBJECT_TYPE_SPACEMAN_CIB 0x00000007 ++#define APFS_OBJECT_TYPE_SPACEMAN_BITMAP 0x00000008 ++#define APFS_OBJECT_TYPE_SPACEMAN_FREE_QUEUE 0x00000009 ++#define APFS_OBJECT_TYPE_EXTENT_LIST_TREE 0x0000000a ++#define APFS_OBJECT_TYPE_OMAP 0x0000000b ++#define APFS_OBJECT_TYPE_CHECKPOINT_MAP 0x0000000c ++#define APFS_OBJECT_TYPE_FS 0x0000000d ++#define APFS_OBJECT_TYPE_FSTREE 0x0000000e ++#define APFS_OBJECT_TYPE_BLOCKREFTREE 0x0000000f ++#define APFS_OBJECT_TYPE_SNAPMETATREE 0x00000010 ++#define APFS_OBJECT_TYPE_NX_REAPER 0x00000011 ++#define APFS_OBJECT_TYPE_NX_REAP_LIST 0x00000012 ++#define APFS_OBJECT_TYPE_OMAP_SNAPSHOT 0x00000013 ++#define APFS_OBJECT_TYPE_EFI_JUMPSTART 0x00000014 ++#define APFS_OBJECT_TYPE_FUSION_MIDDLE_TREE 0x00000015 ++#define APFS_OBJECT_TYPE_NX_FUSION_WBC 0x00000016 ++#define APFS_OBJECT_TYPE_NX_FUSION_WBC_LIST 0x00000017 ++#define APFS_OBJECT_TYPE_ER_STATE 0x00000018 ++#define APFS_OBJECT_TYPE_GBITMAP 0x00000019 ++#define APFS_OBJECT_TYPE_GBITMAP_TREE 0x0000001a ++#define APFS_OBJECT_TYPE_GBITMAP_BLOCK 0x0000001b ++#define APFS_OBJECT_TYPE_ER_RECOVERY_BLOCK 0x0000001c ++#define APFS_OBJECT_TYPE_SNAP_META_EXT 0x0000001d ++#define APFS_OBJECT_TYPE_INTEGRITY_META 0x0000001e ++#define APFS_OBJECT_TYPE_FEXT_TREE 0x0000001f ++#define APFS_OBJECT_TYPE_RESERVED_20 0x00000020 ++#define APFS_OBJECT_TYPE_INVALID 0x00000000 ++#define APFS_OBJECT_TYPE_TEST 0x000000ff ++ ++/* Object type flags */ ++#define APFS_OBJ_VIRTUAL 0x00000000 ++#define APFS_OBJ_EPHEMERAL 0x80000000 ++#define APFS_OBJ_PHYSICAL 0x40000000 ++#define APFS_OBJ_NOHEADER 0x20000000 ++#define APFS_OBJ_ENCRYPTED 0x10000000 ++#define APFS_OBJ_NONPERSISTENT 0x08000000 ++ ++#define APFS_MAX_CKSUM_SIZE 8 ++ ++/* ++ * On-disk representation of an APFS object ++ */ ++struct apfs_obj_phys { ++/*00*/ __le64 o_cksum; /* Fletcher checksum */ ++ __le64 o_oid; /* Object-id */ ++/*10*/ __le64 o_xid; /* Transaction ID */ ++ __le32 o_type; /* Object type */ ++ __le32 o_subtype; /* Object subtype */ ++} __packed; ++ ++/* Flags for the object map structure */ ++#define APFS_OMAP_MANUALLY_MANAGED 0x00000001 ++#define APFS_OMAP_ENCRYPTING 0x00000002 ++#define APFS_OMAP_DECRYPTING 0x00000004 ++#define APFS_OMAP_KEYROLLING 0x00000008 ++#define APFS_OMAP_CRYPTO_GENERATION 0x00000010 ++#define APFS_OMAP_FLAGS_VALID_MASK (APFS_OMAP_MANUALLY_MANAGED \ ++ | APFS_OMAP_ENCRYPTING \ ++ | APFS_OMAP_DECRYPTING \ ++ | APFS_OMAP_KEYROLLING \ ++ | APFS_OMAP_CRYPTO_GENERATION) ++ ++/* ++ * On-disk representation of an object map ++ */ ++struct apfs_omap_phys { ++/*00*/ struct apfs_obj_phys om_o; ++/*20*/ __le32 om_flags; ++ __le32 om_snap_count; ++ __le32 om_tree_type; ++ __le32 om_snapshot_tree_type; ++/*30*/ __le64 om_tree_oid; ++ __le64 om_snapshot_tree_oid; ++/*40*/ __le64 om_most_recent_snap; ++ __le64 om_pending_revert_min; ++ __le64 om_pending_revert_max; ++} __packed; ++ ++/* Object map value flags */ ++#define APFS_OMAP_VAL_DELETED 0x00000001 ++#define APFS_OMAP_VAL_SAVED 0x00000002 ++#define APFS_OMAP_VAL_ENCRYPTED 0x00000004 ++#define APFS_OMAP_VAL_NOHEADER 0x00000008 ++#define APFS_OMAP_VAL_CRYPTO_GENERATION 0x00000010 ++#define APFS_OMAP_VAL_FLAGS_VALID_MASK (APFS_OMAP_VAL_DELETED \ ++ | APFS_OMAP_VAL_SAVED \ ++ | APFS_OMAP_VAL_ENCRYPTED \ ++ | APFS_OMAP_VAL_NOHEADER \ ++ | APFS_OMAP_VAL_CRYPTO_GENERATION) ++ ++/* ++ * Structure of a value in an object map B-tree ++ */ ++struct apfs_omap_val { ++ __le32 ov_flags; ++ __le32 ov_size; ++ __le64 ov_paddr; ++} __packed; ++ ++/* ++ * Structure of a value in an omap's snapshot tree ++ */ ++struct apfs_omap_snapshot { ++ __le32 oms_flags; ++ __le32 oms_pad; ++ __le64 oms_oid; ++} __packed; ++ ++/* B-tree node flags */ ++#define APFS_BTNODE_ROOT 0x0001 ++#define APFS_BTNODE_LEAF 0x0002 ++#define APFS_BTNODE_FIXED_KV_SIZE 0x0004 ++#define APFS_BTNODE_CHECK_KOFF_INVAL 0x8000 ++#define APFS_BTNODE_MASK 0x0007 /* Valid on-disk flags */ ++ ++/* B-tree location constants */ ++#define APFS_BTOFF_INVALID 0xffff ++ ++/* ++ * Structure storing a location inside a B-tree node ++ */ ++struct apfs_nloc { ++ __le16 off; ++ __le16 len; ++} __packed; ++ ++/* ++ * Structure storing the location of a key/value pair within a B-tree node ++ */ ++struct apfs_kvloc { ++ struct apfs_nloc k; ++ struct apfs_nloc v; ++} __packed; ++ ++/* ++ * Structure storing the location of a key/value pair within a B-tree node ++ * having fixed-size key and value (flag APFS_BTNODE_FIXED_KV_SIZE is present) ++ */ ++struct apfs_kvoff { ++ __le16 k; ++ __le16 v; ++} __packed; ++ ++/* ++ * On-disk representation of a B-tree node ++ */ ++struct apfs_btree_node_phys { ++/*00*/ struct apfs_obj_phys btn_o; ++/*20*/ __le16 btn_flags; ++ __le16 btn_level; ++ __le32 btn_nkeys; ++/*28*/ struct apfs_nloc btn_table_space; ++ struct apfs_nloc btn_free_space; ++ struct apfs_nloc btn_key_free_list; ++ struct apfs_nloc btn_val_free_list; ++/*38*/ __le64 btn_data[]; ++} __packed; ++ ++/* B-tree info flags */ ++#define APFS_BTREE_UINT64_KEYS 0x00000001 ++#define APFS_BTREE_SEQUENTIAL_INSERT 0x00000002 ++#define APFS_BTREE_ALLOW_GHOSTS 0x00000004 ++#define APFS_BTREE_EPHEMERAL 0x00000008 ++#define APFS_BTREE_PHYSICAL 0x00000010 ++#define APFS_BTREE_NONPERSISTENT 0x00000020 ++#define APFS_BTREE_KV_NONALIGNED 0x00000040 ++#define APFS_BTREE_FLAGS_VALID_MASK (APFS_BTREE_UINT64_KEYS \ ++ | APFS_BTREE_SEQUENTIAL_INSERT \ ++ | APFS_BTREE_ALLOW_GHOSTS \ ++ | APFS_BTREE_EPHEMERAL \ ++ | APFS_BTREE_PHYSICAL \ ++ | APFS_BTREE_NONPERSISTENT \ ++ | APFS_BTREE_KV_NONALIGNED) ++ ++/* ++ * Structure used to store information about a B-tree that won't change ++ * over time ++ */ ++struct apfs_btree_info_fixed { ++ __le32 bt_flags; ++ __le32 bt_node_size; ++ __le32 bt_key_size; ++ __le32 bt_val_size; ++} __packed; ++ ++/* ++ * Structure used to store information about a B-tree (located at the end of ++ * a B-tree root node block) ++ */ ++struct apfs_btree_info { ++ struct apfs_btree_info_fixed bt_fixed; ++ __le32 bt_longest_key; /* Longest key ever stored */ ++ __le32 bt_longest_val; /* Longest value ever stored */ ++ __le64 bt_key_count; ++ __le64 bt_node_count; ++} __packed; ++ ++/* ++ * Structure of the value of a directory entry. This is the data in ++ * the catalog nodes for record type APFS_TYPE_DIR_REC. ++ */ ++struct apfs_drec_val { ++ __le64 file_id; ++ __le64 date_added; ++ __le16 flags; ++ u8 xfields[]; ++} __packed; ++ ++/* Physical extent records */ ++#define APFS_PEXT_LEN_MASK 0x0fffffffffffffffULL ++#define APFS_PEXT_KIND_MASK 0xf000000000000000ULL ++#define APFS_PEXT_KIND_SHIFT 60 ++ ++/* The kind of a physical extent record */ ++enum { ++ APFS_KIND_ANY = 0, ++ APFS_KIND_NEW = 1, ++ APFS_KIND_UPDATE = 2, ++ APFS_KIND_DEAD = 3, ++ APFS_KIND_UPDATE_REFCNT = 4, ++ ++ APFS_KIND_INVALID = 255 /* This is weird, won't fit in 4 bits */ ++}; ++ ++#define APFS_OWNING_OBJ_ID_INVALID (~0ULL) ++#define APFS_OWNING_OBJ_ID_UNKNOWN (~1ULL) ++ ++/* ++ * Structure of a physical extent record ++ */ ++struct apfs_phys_ext_val { ++ __le64 len_and_kind; ++ __le64 owning_obj_id; ++ __le32 refcnt; ++} __packed; ++ ++/* File extent records */ ++#define APFS_FILE_EXTENT_LEN_MASK 0x00ffffffffffffffULL ++#define APFS_FILE_EXTENT_FLAG_MASK 0xff00000000000000ULL ++#define APFS_FILE_EXTENT_FLAG_SHIFT 56 ++#define APFS_FILE_EXTENT_CRYPTO_FLAG 0x01 /* Made-up name */ ++#define APFS_FILE_EXTENT_PREALLOCATED 0x02 /* Made-up name */ ++#define APFS_VALID_FILE_EXTENT_FLAGS (APFS_FILE_EXTENT_CRYPTO_FLAG \ ++ | APFS_FILE_EXTENT_PREALLOCATED) ++ ++/* ++ * Put a bound on maximum file size so that a growing truncation will always ++ * produce a single hole extent, even if 64k block sizes were in use. Larger ++ * file sizes could be supported with multiple extents of course, but it takes ++ * some work and I don't see the point. ++ */ ++#define APFS_MAX_FILE_SIZE 0x00ffffffffff0000ULL ++ ++/* ++ * Structure of a file extent record ++ */ ++struct apfs_file_extent_val { ++ __le64 len_and_flags; ++ __le64 phys_block_num; ++ __le64 crypto_id; ++} __packed; ++ ++/* ++ * Structure of a data stream record ++ */ ++struct apfs_dstream_id_val { ++ __le32 refcnt; ++} __packed; ++ ++#define APFS_CP_MAX_WRAPPEDKEYSIZE 128 ++ ++/* ++ * Structure used to store the encryption state for PFKs ++ */ ++struct apfs_wrapped_crypto_state { ++ __le16 major_version; ++ __le16 minor_version; ++ __le32 cpflags; ++ __le32 persistent_class; ++ __le32 key_os_version; ++ __le16 key_revision; ++ __le16 key_len; ++ u8 persistent_key[]; ++} __packed; ++ ++/* ++ * Structure of a crypto state record ++ */ ++struct apfs_crypto_state_val { ++ __le32 refcnt; ++ struct apfs_wrapped_crypto_state state; ++} __packed; ++ ++/* Inode numbers for special inodes */ ++#define APFS_INVALID_INO_NUM 0 ++ ++#define APFS_ROOT_DIR_PARENT 1 /* Root directory parent */ ++#define APFS_ROOT_DIR_INO_NUM 2 /* Root directory */ ++#define APFS_PRIV_DIR_INO_NUM 3 /* Private directory */ ++#define APFS_SNAP_DIR_INO_NUM 6 /* Snapshots metadata */ ++#define APFS_PURGEABLE_DIR_INO_NUM 7 /* Parent of purgeable files */ ++ ++/* Smallest inode number available for user content */ ++#define APFS_MIN_USER_INO_NUM 16 ++ ++#define APFS_UNIFIED_ID_SPACE_MARK 0x0800000000000000 ++ ++/* Inode internal flags */ ++#define APFS_INODE_IS_APFS_PRIVATE 0x00000001 ++#define APFS_INODE_MAINTAIN_DIR_STATS 0x00000002 ++#define APFS_INODE_DIR_STATS_ORIGIN 0x00000004 ++#define APFS_INODE_PROT_CLASS_EXPLICIT 0x00000008 ++#define APFS_INODE_WAS_CLONED 0x00000010 ++#define APFS_INODE_FLAG_UNUSED 0x00000020 ++#define APFS_INODE_HAS_SECURITY_EA 0x00000040 ++#define APFS_INODE_BEING_TRUNCATED 0x00000080 ++#define APFS_INODE_HAS_FINDER_INFO 0x00000100 ++#define APFS_INODE_IS_SPARSE 0x00000200 ++#define APFS_INODE_WAS_EVER_CLONED 0x00000400 ++#define APFS_INODE_ACTIVE_FILE_TRIMMED 0x00000800 ++#define APFS_INODE_PINNED_TO_MAIN 0x00001000 ++#define APFS_INODE_PINNED_TO_TIER2 0x00002000 ++#define APFS_INODE_HAS_RSRC_FORK 0x00004000 ++#define APFS_INODE_NO_RSRC_FORK 0x00008000 ++#define APFS_INODE_ALLOCATION_SPILLEDOVER 0x00010000 ++#define APFS_INODE_FAST_PROMOTE 0x00020000 ++#define APFS_INODE_HAS_UNCOMPRESSED_SIZE 0x00040000 ++#define APFS_INODE_IS_PURGEABLE 0x00080000 ++#define APFS_INODE_WANTS_TO_BE_PURGEABLE 0x00100000 ++#define APFS_INODE_IS_SYNC_ROOT 0x00200000 ++#define APFS_INODE_SNAPSHOT_COW_EXEMPTION 0x00400000 ++/* This flag is not documented */ ++#define APFS_INODE_HAS_PURGEABLE_FLAGS 0x02000000 ++ ++/* Masks for internal flags */ ++#define APFS_VALID_INTERNAL_INODE_FLAGS (APFS_INODE_IS_APFS_PRIVATE \ ++ | APFS_INODE_MAINTAIN_DIR_STATS \ ++ | APFS_INODE_DIR_STATS_ORIGIN \ ++ | APFS_INODE_PROT_CLASS_EXPLICIT \ ++ | APFS_INODE_WAS_CLONED \ ++ | APFS_INODE_HAS_SECURITY_EA \ ++ | APFS_INODE_BEING_TRUNCATED \ ++ | APFS_INODE_HAS_FINDER_INFO \ ++ | APFS_INODE_IS_SPARSE \ ++ | APFS_INODE_WAS_EVER_CLONED \ ++ | APFS_INODE_ACTIVE_FILE_TRIMMED \ ++ | APFS_INODE_PINNED_TO_MAIN \ ++ | APFS_INODE_PINNED_TO_TIER2 \ ++ | APFS_INODE_HAS_RSRC_FORK \ ++ | APFS_INODE_NO_RSRC_FORK \ ++ | APFS_INODE_ALLOCATION_SPILLEDOVER \ ++ | APFS_INODE_FAST_PROMOTE \ ++ | APFS_INODE_HAS_UNCOMPRESSED_SIZE \ ++ | APFS_INODE_IS_PURGEABLE \ ++ | APFS_INODE_WANTS_TO_BE_PURGEABLE \ ++ | APFS_INODE_IS_SYNC_ROOT \ ++ | APFS_INODE_SNAPSHOT_COW_EXEMPTION \ ++ | APFS_INODE_HAS_PURGEABLE_FLAGS) ++#define APFS_INODE_INHERITED_INTERNAL_FLAGS (APFS_INODE_MAINTAIN_DIR_STATS) ++#define APFS_INDOE_CLONED_INTERNAL_FLAGS (APFS_INODE_HAS_RSRC_FORK \ ++ | APFS_INODE_NO_RSRC_FORK \ ++ | APFS_INODE_HAS_FINDER_INFO) ++#define APFS_INODE_PINNED_MASK (APFS_INODE_PINNED_TO_MAIN \ ++ | APFS_INODE_PINNED_TO_TIER2) ++ ++/* BSD flags */ ++#define APFS_INOBSD_NODUMP 0x00000001 ++#define APFS_INOBSD_IMMUTABLE 0x00000002 ++#define APFS_INOBSD_APPEND 0x00000004 ++#define APFS_INOBSD_COMPRESSED 0x00000020 ++ ++/* ++ * Structure of an inode as stored as a B-tree value ++ */ ++struct apfs_inode_val { ++/*00*/ __le64 parent_id; ++ __le64 private_id; ++/*10*/ __le64 create_time; ++ __le64 mod_time; ++ __le64 change_time; ++ __le64 access_time; ++/*30*/ __le64 internal_flags; ++ union { ++ __le32 nchildren; ++ __le32 nlink; ++ }; ++ __le32 default_protection_class; ++/*40*/ __le32 write_generation_counter; ++ __le32 bsd_flags; ++ __le32 owner; ++ __le32 group; ++/*50*/ __le16 mode; ++ __le16 pad1; ++ __le64 uncompressed_size; ++/*5C*/ u8 xfields[]; ++} __packed; ++ ++/* Extended field types for dentries */ ++#define APFS_DREC_EXT_TYPE_SIBLING_ID 1 ++ ++/* Extended field types for inodes */ ++#define APFS_INO_EXT_TYPE_SNAP_XID 1 ++#define APFS_INO_EXT_TYPE_DELTA_TREE_OID 2 ++#define APFS_INO_EXT_TYPE_DOCUMENT_ID 3 ++#define APFS_INO_EXT_TYPE_NAME 4 ++#define APFS_INO_EXT_TYPE_PREV_FSIZE 5 ++#define APFS_INO_EXT_TYPE_RESERVED_6 6 ++#define APFS_INO_EXT_TYPE_FINDER_INFO 7 ++#define APFS_INO_EXT_TYPE_DSTREAM 8 ++#define APFS_INO_EXT_TYPE_RESERVED_9 9 ++#define APFS_INO_EXT_TYPE_DIR_STATS_KEY 10 ++#define APFS_INO_EXT_TYPE_FS_UUID 11 ++#define APFS_INO_EXT_TYPE_RESERVED_12 12 ++#define APFS_INO_EXT_TYPE_SPARSE_BYTES 13 ++#define APFS_INO_EXT_TYPE_RDEV 14 ++#define APFS_INO_EXT_TYPE_PURGEABLE_FLAGS 15 ++#define APFS_INO_EXT_TYPE_ORIG_SYNC_ROOT_ID 16 ++ ++/* Extended field flags */ ++#define APFS_XF_DATA_DEPENDENT 0x01 ++#define APFS_XF_DO_NOT_COPY 0x02 ++#define APFS_XF_RESERVED_4 0x04 ++#define APFS_XF_CHILDREN_INHERIT 0x08 ++#define APFS_XF_USER_FIELD 0x10 ++#define APFS_XF_SYSTEM_FIELD 0x20 ++#define APFS_XF_RESERVED_40 0x40 ++#define APFS_XF_RESERVED_80 0x80 ++ ++/* Constants for extended fields */ ++#define APFS_MIN_DOC_ID 3 /* Smallest not reserved document id */ ++ ++/* ++ * Structure used to store the number and size of an xfield collection. The ++ * official reference seems to be wrong about @xf_used_data: it's the size of ++ * the xfield values alone, without the metadata. ++ */ ++struct apfs_xf_blob { ++ __le16 xf_num_exts; ++ __le16 xf_used_data; ++ u8 xf_data[]; ++} __packed; ++ ++/* ++ * Structure used to describe an extended field ++ */ ++struct apfs_x_field { ++ u8 x_type; ++ u8 x_flags; ++ __le16 x_size; ++} __packed; ++ ++/* ++ * Structure used to store information about a data stream ++ */ ++struct apfs_dstream { ++ __le64 size; ++ __le64 alloced_size; ++ __le64 default_crypto_id; ++ __le64 total_bytes_written; ++ __le64 total_bytes_read; ++} __packed; ++ ++/* ++ * Structure used to store directory information ++ */ ++struct apfs_dir_stats_val { ++ __le64 num_children; ++ __le64 total_size; ++ __le64 chained_key; ++ __le64 gen_count; ++} __packed; ++ ++/* ++ * Structure of the value for a sibling link record. These are used to ++ * list the hard links for a given inode. ++ */ ++struct apfs_sibling_val { ++ __le64 parent_id; ++ __le16 name_len; ++ u8 name[]; ++} __packed; ++ ++/* ++ * Structure of the value for a sibling map record. No idea what these are for. ++ */ ++struct apfs_sibling_map_val { ++ __le64 file_id; ++} __packed; ++ ++/* ++ * Structure of a key in an object map B-tree ++ */ ++struct apfs_omap_key { ++ __le64 ok_oid; ++ __le64 ok_xid; ++} __packed; ++ ++/* ++ * Structure of a key in a free-space queue b-tree ++ */ ++struct apfs_spaceman_free_queue_key { ++ __le64 sfqk_xid; ++ __le64 sfqk_paddr; ++} __packed; ++ ++/* Catalog records types */ ++enum { ++ APFS_TYPE_ANY = 0, ++ APFS_TYPE_SNAP_METADATA = 1, ++ APFS_TYPE_EXTENT = 2, ++ APFS_TYPE_INODE = 3, ++ APFS_TYPE_XATTR = 4, ++ APFS_TYPE_SIBLING_LINK = 5, ++ APFS_TYPE_DSTREAM_ID = 6, ++ APFS_TYPE_CRYPTO_STATE = 7, ++ APFS_TYPE_FILE_EXTENT = 8, ++ APFS_TYPE_DIR_REC = 9, ++ APFS_TYPE_DIR_STATS = 10, ++ APFS_TYPE_SNAP_NAME = 11, ++ APFS_TYPE_SIBLING_MAP = 12, ++ APFS_TYPE_MAX_VALID = 12, ++ APFS_TYPE_MAX = 15, ++ APFS_TYPE_INVALID = 15, ++}; ++ ++/* Bit masks for the 'obj_id_and_type' field of a key header */ ++#define APFS_OBJ_ID_MASK 0x0fffffffffffffffULL ++#define APFS_OBJ_TYPE_MASK 0xf000000000000000ULL ++#define APFS_OBJ_TYPE_SHIFT 60 ++ ++/* Key header for filesystem-object keys */ ++struct apfs_key_header { ++ __le64 obj_id_and_type; ++} __packed; ++ ++/* ++ * Structure of the key for a physical extent record ++ */ ++struct apfs_phys_ext_key { ++ struct apfs_key_header hdr; ++} __packed; ++ ++/* ++ * Structure of the key for an inode record ++ */ ++struct apfs_inode_key { ++ struct apfs_key_header hdr; ++} __packed; ++ ++/* ++ * Structure of the key for a file extent record ++ */ ++struct apfs_file_extent_key { ++ struct apfs_key_header hdr; ++ __le64 logical_addr; ++} __packed; ++ ++/* ++ * Structure of the key for a data stream record ++ */ ++struct apfs_dstream_id_key { ++ struct apfs_key_header hdr; ++} __packed; ++ ++/* ++ * Structure of the key for a crypto state record ++ */ ++struct apfs_crypto_state_key { ++ struct apfs_key_header hdr; ++} __packed; ++ ++/* Bit masks for the 'name_len_and_hash' field of a directory entry */ ++#define APFS_DREC_LEN_MASK 0x000003ff ++#define APFS_DREC_HASH_MASK 0xfffffc00 ++#define APFS_DREC_HASH_SHIFT 10 ++ ++/* We could support bigger filenames, but I don't know if Linux allows it */ ++#define APFS_NAME_LEN NAME_MAX ++ ++/* Bit masks for the 'type' field of a directory entry */ ++enum { ++ APFS_DREC_TYPE_MASK = 0x000f, ++ APFS_DREC_RESERVED_10 = 0x0010, ++ ++ /* These flags are not documented */ ++ APFS_DREC_PURGEABLE_2 = 0x0200, ++ APFS_DREC_PURGEABLE_8 = 0x0800, ++}; ++ ++#define APFS_DREC_PURGEABLE (APFS_DREC_PURGEABLE_2 | APFS_DREC_PURGEABLE_8) ++ ++/* ++ * Structure of the key for a directory entry - no hash, used on normalization ++ * sensitive volumes ++ */ ++struct apfs_drec_key { ++ struct apfs_key_header hdr; ++ __le16 name_len; ++ u8 name[]; ++} __packed; ++ ++/* ++ * Structure of the key for a directory entry, including a precomputed ++ * hash of its name ++ */ ++struct apfs_drec_hashed_key { ++ struct apfs_key_header hdr; ++ __le32 name_len_and_hash; ++ u8 name[]; ++} __packed; ++ ++/* ++ * Structure of the key for an extended attributes record ++ */ ++struct apfs_xattr_key { ++ struct apfs_key_header hdr; ++ __le16 name_len; ++ u8 name[]; ++} __packed; ++ ++/* ++ * Structure of the key for a snapshot metadata record ++ */ ++struct apfs_snap_metadata_key { ++ struct apfs_key_header hdr; ++} __packed; ++ ++/* The snapshot name headers always have this placeholder object id */ ++#define APFS_SNAP_NAME_OBJ_ID (~0ULL & APFS_OBJ_ID_MASK) ++ ++/* ++ * Structure of the key for a snapshot name record ++ */ ++struct apfs_snap_name_key { ++ struct apfs_key_header hdr; ++ __le16 name_len; ++ u8 name[]; ++} __packed; ++ ++/* ++ * Structure of the key for a sibling link record ++ */ ++struct apfs_sibling_link_key { ++ struct apfs_key_header hdr; ++ __le64 sibling_id; ++} __packed; ++ ++/* ++ * Structure of the key for a siblink map record ++ */ ++struct apfs_sibling_map_key { ++ struct apfs_key_header hdr; ++} __packed; ++ ++/* ++ * On-disk allocation info for a chunk of blocks ++ */ ++struct apfs_chunk_info { ++ __le64 ci_xid; ++ __le64 ci_addr; ++ __le32 ci_block_count; ++ __le32 ci_free_count; ++ __le64 ci_bitmap_addr; ++} __packed; ++ ++/* Constants for the chunk info block */ ++#define APFS_CI_COUNT_MASK 0x000FFFFF ++#define APFS_CI_COUNT_RESERVED_MASK 0xFFF00000 ++ ++/* ++ * Structure of a block with an array of chunk allocation info structures ++ */ ++struct apfs_chunk_info_block { ++ struct apfs_obj_phys cib_o; ++ __le32 cib_index; ++ __le32 cib_chunk_info_count; ++ struct apfs_chunk_info cib_chunk_info[]; ++} __packed; ++ ++/* ++ * Structure of a block with an array of addresses to chunk information blocks ++ */ ++struct apfs_cib_addr_block { ++ struct apfs_obj_phys cab_o; ++ __le32 cab_index; ++ __le32 cab_cib_count; ++ __le64 cab_cib_addr[]; ++} __packed; ++ ++/* ++ * On-disk structure for a free queue ++ */ ++struct apfs_spaceman_free_queue { ++ __le64 sfq_count; ++ __le64 sfq_tree_oid; ++ __le64 sfq_oldest_xid; ++ __le16 sfq_tree_node_limit; ++ __le16 sfq_pad16; ++ __le32 sfq_pad32; ++ __le64 sfq_reserved; ++} __packed; ++ ++/* Indexes for a free queue array */ ++enum { ++ APFS_SFQ_IP = 0, ++ APFS_SFQ_MAIN = 1, ++ APFS_SFQ_TIER2 = 2, ++ APFS_SFQ_COUNT = 3 ++}; ++ ++/* ++ * On-disk structure for device allocation information ++ */ ++struct apfs_spaceman_device { ++ __le64 sm_block_count; ++ __le64 sm_chunk_count; ++ __le32 sm_cib_count; ++ __le32 sm_cab_count; ++ __le64 sm_free_count; ++ __le32 sm_addr_offset; ++ __le32 sm_reserved; ++ __le64 sm_reserved2; ++} __packed; ++ ++/* Indexes for a device array */ ++enum { ++ APFS_SD_MAIN = 0, ++ APFS_SD_TIER2 = 1, ++ APFS_SD_COUNT = 2 ++}; ++ ++/* ++ * On-disk structure to describe allocation zone boundaries ++ */ ++struct apfs_spaceman_allocation_zone_boundaries { ++ __le64 saz_zone_start; ++ __le64 saz_zone_end; ++} __packed; ++ ++/* Allocation zone constants */ ++#define APFS_SM_ALLOCZONE_INVALID_END_BOUNDARY 0 ++#define APFS_SM_ALLOCZONE_NUM_PREVIOUS_BOUNDARIES 7 ++ ++struct apfs_spaceman_allocation_zone_info_phys { ++ struct apfs_spaceman_allocation_zone_boundaries saz_current_boundaries; ++ struct apfs_spaceman_allocation_zone_boundaries ++ saz_previous_boundaries[APFS_SM_ALLOCZONE_NUM_PREVIOUS_BOUNDARIES]; ++ ++ __le16 saz_zone_id; ++ __le16 saz_previous_boundary_index; ++ __le32 saz_reserved; ++} __packed; ++ ++/* Datazone constants */ ++#define APFS_SM_DATAZONE_ALLOCZONE_COUNT 8 ++ ++struct apfs_spaceman_datazone_info_phys { ++ struct apfs_spaceman_allocation_zone_info_phys ++ sdz_allocation_zones[APFS_SD_COUNT][APFS_SM_DATAZONE_ALLOCZONE_COUNT]; ++} __packed; ++ ++/* Internal-pool bitmap constants */ ++#define APFS_SPACEMAN_IP_BM_TX_MULTIPLIER 16 ++#define APFS_SPACEMAN_IP_BM_INDEX_INVALID 0xFFFF ++#define APFS_SPACEMAN_IP_BM_BLOCK_COUNT_MAX 0xFFFE ++ ++/* Space manager flags */ ++#define APFS_SM_FLAG_VERSIONED 0x00000001 ++#define APFS_SM_FLAGS_VALID_MASK APFS_SM_FLAG_VERSIONED ++ ++/* ++ * On-disk structure for the space manager ++ */ ++struct apfs_spaceman_phys { ++ struct apfs_obj_phys sm_o; ++ __le32 sm_block_size; ++ __le32 sm_blocks_per_chunk; ++ __le32 sm_chunks_per_cib; ++ __le32 sm_cibs_per_cab; ++ struct apfs_spaceman_device sm_dev[APFS_SD_COUNT]; ++ __le32 sm_flags; ++ __le32 sm_ip_bm_tx_multiplier; ++ __le64 sm_ip_block_count; ++ __le32 sm_ip_bm_size_in_blocks; ++ __le32 sm_ip_bm_block_count; ++ __le64 sm_ip_bm_base; ++ __le64 sm_ip_base; ++ __le64 sm_fs_reserve_block_count; ++ __le64 sm_fs_reserve_alloc_count; ++ struct apfs_spaceman_free_queue sm_fq[APFS_SFQ_COUNT]; ++ __le16 sm_ip_bm_free_head; ++ __le16 sm_ip_bm_free_tail; ++ __le32 sm_ip_bm_xid_offset; ++ __le32 sm_ip_bitmap_offset; ++ __le32 sm_ip_bm_free_next_offset; ++ __le32 sm_version; ++ __le32 sm_struct_size; ++ struct apfs_spaceman_datazone_info_phys sm_datazone; ++} __packed; ++ ++/* ++ * Structure used to store a range of physical blocks ++ */ ++struct apfs_prange { ++ __le64 pr_start_paddr; ++ __le64 pr_block_count; ++} __packed; ++ ++/* Reaper flags */ ++#define APFS_NR_BHM_FLAG 0x00000001 ++#define APFS_NR_CONTINUE 0x00000002 ++#define APFS_NR_FLAGS_VALID_MASK (APFS_NR_BHM_FLAG | APFS_NR_CONTINUE) ++ ++/* ++ * On-disk reaper structure ++ */ ++struct apfs_nx_reaper_phys { ++ struct apfs_obj_phys nr_o; ++ __le64 nr_next_reap_id; ++ __le64 nr_completed_id; ++ __le64 nr_head; ++ __le64 nr_tail; ++ __le32 nr_flags; ++ __le32 nr_rlcount; ++ __le32 nr_type; ++ __le32 nr_size; ++ __le64 nr_fs_oid; ++ __le64 nr_oid; ++ __le64 nr_xid; ++ __le32 nr_nrle_flags; ++ __le32 nr_state_buffer_size; ++ u8 nr_state_buffer[]; ++} __packed; ++ ++struct apfs_nx_reap_list_entry { ++ __le32 nrle_next; ++ __le32 nrle_flags; ++ __le32 nrle_type; ++ __le32 nrle_size; ++ __le64 nrle_fs_oid; ++ __le64 nrle_oid; ++ __le64 nrle_xid; ++} __packed; ++ ++struct apfs_nx_reap_list_phys { ++ struct apfs_obj_phys nrl_o; ++ __le64 nrl_next; ++ __le32 nrl_flags; ++ __le32 nrl_max; ++ __le32 nrl_count; ++ __le32 nrl_first; ++ __le32 nrl_last; ++ __le32 nrl_free; ++ struct apfs_nx_reap_list_entry nrl_entries[]; ++} __packed; ++ ++/* EFI constants */ ++#define APFS_NX_EFI_JUMPSTART_MAGIC 0x5244534A ++#define APFS_NX_EFI_JUMPSTART_VERSION 1 ++ ++/* ++ * Information about the embedded EFI driver ++ */ ++struct apfs_nx_efi_jumpstart { ++ struct apfs_obj_phys nej_o; ++ __le32 nej_magic; ++ __le32 nej_version; ++ __le32 nej_efi_file_len; ++ __le32 nej_num_extents; ++ __le64 nej_reserved[16]; ++ struct apfs_prange nej_rec_extents[]; ++} __packed; ++ ++/* Main container */ ++ ++/* Container constants */ ++#define APFS_NX_MAGIC 0x4253584E ++#define APFS_NX_BLOCK_NUM 0 ++#define APFS_NX_MAX_FILE_SYSTEMS 100 ++ ++#define APFS_NX_EPH_INFO_COUNT 4 ++#define APFS_NX_EPH_MIN_BLOCK_COUNT 8 ++#define APFS_NX_MAX_FILE_SYSTEM_EPH_STRUCTS 4 ++#define APFS_NX_TX_MIN_CHECKPOINT_COUNT 4 ++#define APFS_NX_EPH_INFO_VERSION_1 1 ++ ++/* Container flags */ ++#define APFS_NX_RESERVED_1 0x00000001LL ++#define APFS_NX_RESERVED_2 0x00000002LL ++#define APFS_NX_CRYPTO_SW 0x00000004LL ++#define APFS_NX_FLAGS_VALID_MASK (APFS_NX_RESERVED_1 \ ++ | APFS_NX_RESERVED_2 \ ++ | APFS_NX_CRYPTO_SW) ++ ++/* Optional container feature flags */ ++#define APFS_NX_FEATURE_DEFRAG 0x0000000000000001ULL ++#define APFS_NX_FEATURE_LCFD 0x0000000000000002ULL ++#define APFS_NX_SUPPORTED_FEATURES_MASK (APFS_NX_FEATURE_DEFRAG | \ ++ APFS_NX_FEATURE_LCFD) ++ ++/* Read-only compatible container feature flags */ ++#define APFS_NX_SUPPORTED_ROCOMPAT_MASK (0x0ULL) ++ ++/* Incompatible container feature flags */ ++#define APFS_NX_INCOMPAT_VERSION1 0x0000000000000001ULL ++#define APFS_NX_INCOMPAT_VERSION2 0x0000000000000002ULL ++#define APFS_NX_INCOMPAT_FUSION 0x0000000000000100ULL ++#define APFS_NX_SUPPORTED_INCOMPAT_MASK (APFS_NX_INCOMPAT_VERSION2 \ ++ | APFS_NX_INCOMPAT_FUSION) ++ ++/* Block and container sizes */ ++#define APFS_NX_MINIMUM_BLOCK_SIZE 4096 ++#define APFS_NX_DEFAULT_BLOCK_SIZE 4096 ++#define APFS_NX_MAXIMUM_BLOCK_SIZE 65536 ++#define APFS_NX_MINIMUM_CONTAINER_SIZE 1048576 ++ ++/* Indexes into a container superblock's array of counters */ ++enum { ++ APFS_NX_CNTR_OBJ_CKSUM_SET = 0, ++ APFS_NX_CNTR_OBJ_CKSUM_FAIL = 1, ++ ++ APFS_NX_NUM_COUNTERS = 32 ++}; ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) ++#define UUID_SIZE 16 ++#endif ++ ++/* ++ * On-disk representation of the container superblock ++ */ ++struct apfs_nx_superblock { ++/*00*/ struct apfs_obj_phys nx_o; ++/*20*/ __le32 nx_magic; ++ __le32 nx_block_size; ++ __le64 nx_block_count; ++ ++/*30*/ __le64 nx_features; ++ __le64 nx_readonly_compatible_features; ++ __le64 nx_incompatible_features; ++ ++/*48*/ char nx_uuid[UUID_SIZE]; ++ ++/*58*/ __le64 nx_next_oid; ++ __le64 nx_next_xid; ++ ++/*68*/ __le32 nx_xp_desc_blocks; ++ __le32 nx_xp_data_blocks; ++/*70*/ __le64 nx_xp_desc_base; ++ __le64 nx_xp_data_base; ++ __le32 nx_xp_desc_next; ++ __le32 nx_xp_data_next; ++/*88*/ __le32 nx_xp_desc_index; ++ __le32 nx_xp_desc_len; ++ __le32 nx_xp_data_index; ++ __le32 nx_xp_data_len; ++ ++/*98*/ __le64 nx_spaceman_oid; ++ __le64 nx_omap_oid; ++ __le64 nx_reaper_oid; ++ ++/*B0*/ __le32 nx_test_type; ++ ++ __le32 nx_max_file_systems; ++/*B8*/ __le64 nx_fs_oid[APFS_NX_MAX_FILE_SYSTEMS]; ++/*3D8*/ __le64 nx_counters[APFS_NX_NUM_COUNTERS]; ++/*4D8*/ struct apfs_prange nx_blocked_out_prange; ++ __le64 nx_evict_mapping_tree_oid; ++/*4F0*/ __le64 nx_flags; ++ __le64 nx_efi_jumpstart; ++/*500*/ char nx_fusion_uuid[UUID_SIZE]; ++ struct apfs_prange nx_keylocker; ++/*520*/ __le64 nx_ephemeral_info[APFS_NX_EPH_INFO_COUNT]; ++ ++/*540*/ __le64 nx_test_oid; ++ ++ __le64 nx_fusion_mt_oid; ++/*550*/ __le64 nx_fusion_wbc_oid; ++ struct apfs_prange nx_fusion_wbc; ++ ++ __le64 nx_newest_mounted_version; ++ ++/*570*/ struct apfs_prange nx_mkb_locker; ++} __packed; ++ ++/* ++ * A mapping from an ephemeral object id to its physical address ++ */ ++struct apfs_checkpoint_mapping { ++ __le32 cpm_type; ++ __le32 cpm_subtype; ++ __le32 cpm_size; ++ __le32 cpm_pad; ++ __le64 cpm_fs_oid; ++ __le64 cpm_oid; ++ __le64 cpm_paddr; ++} __packed; ++ ++/* Checkpoint flags */ ++#define APFS_CHECKPOINT_MAP_LAST 0x00000001 ++ ++/* ++ * A checkpoint-mapping block ++ */ ++struct apfs_checkpoint_map_phys { ++ struct apfs_obj_phys cpm_o; ++ __le32 cpm_flags; ++ __le32 cpm_count; ++ struct apfs_checkpoint_mapping cpm_map[]; ++} __packed; ++ ++/* Volume */ ++ ++/* Volume constants */ ++#define APFS_MAGIC 0x42535041 ++ ++#define APFS_MAX_HIST 8 ++#define APFS_VOLNAME_LEN 256 ++ ++/* Volume flags */ ++#define APFS_FS_UNENCRYPTED 0x00000001LL ++#define APFS_FS_EFFACEABLE 0x00000002LL ++#define APFS_FS_RESERVED_4 0x00000004LL ++#define APFS_FS_ONEKEY 0x00000008LL ++#define APFS_FS_SPILLEDOVER 0x00000010LL ++#define APFS_FS_RUN_SPILLOVER_CLEANER 0x00000020LL ++#define APFS_FS_ALWAYS_CHECK_EXTENTREF 0x00000040LL ++#define APFS_FS_PREVIOUSLY_SEALED 0x00000080LL /* Made-up name */ ++#define APFS_FS_PFK 0x00000100LL /* Made-up name */ ++#define APFS_FS_UNKNOWN_200 0x00000200LL ++#define APFS_FS_FLAGS_VALID_MASK (APFS_FS_UNENCRYPTED \ ++ | APFS_FS_EFFACEABLE \ ++ | APFS_FS_RESERVED_4 \ ++ | APFS_FS_ONEKEY \ ++ | APFS_FS_SPILLEDOVER \ ++ | APFS_FS_RUN_SPILLOVER_CLEANER \ ++ | APFS_FS_ALWAYS_CHECK_EXTENTREF \ ++ | APFS_FS_PREVIOUSLY_SEALED \ ++ | APFS_FS_PFK \ ++ | APFS_FS_UNKNOWN_200) ++ ++#define APFS_FS_CRYPTOFLAGS (APFS_FS_UNENCRYPTED \ ++ | APFS_FS_EFFACEABLE \ ++ | APFS_FS_ONEKEY) ++ ++/* Volume roles */ ++#define APFS_VOLUME_ENUM_SHIFT 6 ++#define APFS_VOL_ROLE_NONE 0x0000 ++#define APFS_VOL_ROLE_SYSTEM 0x0001 ++#define APFS_VOL_ROLE_USER 0x0002 ++#define APFS_VOL_ROLE_RECOVERY 0x0004 ++#define APFS_VOL_ROLE_VM 0x0008 ++#define APFS_VOL_ROLE_PREBOOT 0x0010 ++#define APFS_VOL_ROLE_INSTALLER 0x0020 ++#define APFS_VOL_ROLE_DATA (1 << APFS_VOLUME_ENUM_SHIFT) ++#define APFS_VOL_ROLE_BASEBAND (2 << APFS_VOLUME_ENUM_SHIFT) ++#define APFS_VOL_ROLE_UPDATE (3 << APFS_VOLUME_ENUM_SHIFT) ++#define APFS_VOL_ROLE_XART (4 << APFS_VOLUME_ENUM_SHIFT) ++#define APFS_VOL_ROLE_HARDWARE (5 << APFS_VOLUME_ENUM_SHIFT) ++#define APFS_VOL_ROLE_BACKUP (6 << APFS_VOLUME_ENUM_SHIFT) ++#define APFS_VOL_ROLE_RESERVED_7 (7 << APFS_VOLUME_ENUM_SHIFT) ++#define APFS_VOL_ROLE_RESERVED_8 (8 << APFS_VOLUME_ENUM_SHIFT) ++#define APFS_VOL_ROLE_ENTERPRISE (9 << APFS_VOLUME_ENUM_SHIFT) ++#define APFS_VOL_ROLE_RESERVED_10 (10 << APFS_VOLUME_ENUM_SHIFT) ++#define APFS_VOL_ROLE_PRELOGIN (11 << APFS_VOLUME_ENUM_SHIFT) ++#define APFS_VOL_ROLES_VALID_MASK (APFS_VOL_ROLE_SYSTEM \ ++ | APFS_VOL_ROLE_USER \ ++ | APFS_VOL_ROLE_RECOVERY \ ++ | APFS_VOL_ROLE_VM \ ++ | APFS_VOL_ROLE_PREBOOT \ ++ | APFS_VOL_ROLE_INSTALLER \ ++ | APFS_VOL_ROLE_DATA \ ++ | APFS_VOL_ROLE_BASEBAND \ ++ | APFS_VOL_ROLE_UPDATE \ ++ | APFS_VOL_ROLE_XART \ ++ | APFS_VOL_ROLE_HARDWARE \ ++ | APFS_VOL_ROLE_BACKUP \ ++ | APFS_VOL_ROLE_RESERVED_7 \ ++ | APFS_VOL_ROLE_RESERVED_8 \ ++ | APFS_VOL_ROLE_ENTERPRISE \ ++ | APFS_VOL_ROLE_RESERVED_10 \ ++ | APFS_VOL_ROLE_PRELOGIN) ++ ++/* Optional volume feature flags */ ++#define APFS_FEATURE_DEFRAG_PRERELEASE 0x00000001LL ++#define APFS_FEATURE_HARDLINK_MAP_RECORDS 0x00000002LL ++#define APFS_FEATURE_DEFRAG 0x00000004LL ++#define APFS_FEATURE_STRICTATIME 0x00000008LL ++#define APFS_FEATURE_VOLGRP_SYSTEM_INO_SPACE 0x00000010LL ++ ++#define APFS_SUPPORTED_FEATURES_MASK (APFS_FEATURE_DEFRAG \ ++ | APFS_FEATURE_DEFRAG_PRERELEASE \ ++ | APFS_FEATURE_HARDLINK_MAP_RECORDS \ ++ | APFS_FEATURE_STRICTATIME \ ++ | APFS_FEATURE_VOLGRP_SYSTEM_INO_SPACE) ++ ++/* Read-only compatible volume feature flags */ ++#define APFS_SUPPORTED_ROCOMPAT_MASK (0x0ULL) ++ ++/* Incompatible volume feature flags */ ++#define APFS_INCOMPAT_CASE_INSENSITIVE 0x00000001LL ++#define APFS_INCOMPAT_DATALESS_SNAPS 0x00000002LL ++#define APFS_INCOMPAT_ENC_ROLLED 0x00000004LL ++#define APFS_INCOMPAT_NORMALIZATION_INSENSITIVE 0x00000008LL ++#define APFS_INCOMPAT_INCOMPLETE_RESTORE 0x00000010LL ++#define APFS_INCOMPAT_SEALED_VOLUME 0x00000020LL ++#define APFS_INCOMPAT_PFK 0x00000040LL /* Made-up name */ ++#define APFS_INCOMPAT_EXTENT_PREALLOC_FLAG 0x00000080LL /* Made-up name */ ++#define APFS_INCOMPAT_SECONDARY_FSROOT 0x00000100LL /* Made-up name */ ++ ++#define APFS_SUPPORTED_INCOMPAT_MASK (APFS_INCOMPAT_CASE_INSENSITIVE \ ++ | APFS_INCOMPAT_DATALESS_SNAPS \ ++ | APFS_INCOMPAT_ENC_ROLLED \ ++ | APFS_INCOMPAT_NORMALIZATION_INSENSITIVE \ ++ | APFS_INCOMPAT_INCOMPLETE_RESTORE \ ++ | APFS_INCOMPAT_SEALED_VOLUME \ ++ | APFS_INCOMPAT_PFK \ ++ | APFS_INCOMPAT_EXTENT_PREALLOC_FLAG \ ++ | APFS_INCOMPAT_SECONDARY_FSROOT) ++ ++#define APFS_MODIFIED_NAMELEN 32 ++ ++/* ++ * Structure containing information about a program that modified the volume ++ */ ++struct apfs_modified_by { ++ u8 id[APFS_MODIFIED_NAMELEN]; ++ __le64 timestamp; ++ __le64 last_xid; ++} __packed; ++ ++/* Version constants for wrapped meta crypto state */ ++#define APFS_WMCS_MAJOR_VERSION 5 ++#define APFS_WMCS_MINOR_VERSION 0 ++ ++/* Protection classes */ ++#define APFS_PROTECTION_CLASS_DIR_NONE 0 /* Inherits the directory's default */ ++#define APFS_PROTECTION_CLASS_A 1 ++#define APFS_PROTECTION_CLASS_B 2 ++#define APFS_PROTECTION_CLASS_C 3 ++#define APFS_PROTECTION_CLASS_D 4 /* No protection */ ++#define APFS_PROTECTION_CLASS_F 6 /* No protection, nonpersistent key */ ++ ++/* Encryption identifiers */ ++#define APFS_CRYPTO_SW_ID 4 ++#define APFS_CRYPTO_RESERVED_5 5 ++#define APFS_UNASSIGNED_CRYPTO_ID (~0ULL) ++ ++/* Doc id index flags. I'm making up the names for now. */ ++#define APFS_DOC_ID_HAS_PREV_TREE 0x00000001 ++#define APFS_DOC_ID_UNKNOWN_02 0x00000002 ++#define APFS_DOC_ID_UNKNOWN_04 0x00000004 ++#define APFS_DOC_ID_UNKNOWN_08 0x00000008 ++#define APFS_DOC_ID_UNKNOWN_10 0x00000010 ++#define APFS_DOC_ID_VALID_FLAGS (APFS_DOC_ID_HAS_PREV_TREE \ ++ | APFS_DOC_ID_UNKNOWN_02 \ ++ | APFS_DOC_ID_UNKNOWN_04 \ ++ | APFS_DOC_ID_UNKNOWN_08 \ ++ | APFS_DOC_ID_UNKNOWN_10) ++ ++/* ++ * Structure used to store the encryption state ++ */ ++struct apfs_wrapped_meta_crypto_state { ++ __le16 major_version; ++ __le16 minor_version; ++ __le32 cpflags; ++ __le32 persistent_class; ++ __le32 key_os_version; ++ __le16 key_revision; ++ __le16 unused; ++} __packed; ++ ++/* ++ * On-disk representation of a volume superblock ++ */ ++struct apfs_superblock { ++/*00*/ struct apfs_obj_phys apfs_o; ++ ++/*20*/ __le32 apfs_magic; ++ __le32 apfs_fs_index; ++ ++/*28*/ __le64 apfs_features; ++ __le64 apfs_readonly_compatible_features; ++ __le64 apfs_incompatible_features; ++ ++/*40*/ __le64 apfs_unmount_time; ++ ++ __le64 apfs_fs_reserve_block_count; ++ __le64 apfs_fs_quota_block_count; ++ __le64 apfs_fs_alloc_count; ++ ++/*60*/ struct apfs_wrapped_meta_crypto_state apfs_meta_crypto; ++ ++/*74*/ __le32 apfs_root_tree_type; ++ __le32 apfs_extentref_tree_type; ++ __le32 apfs_snap_meta_tree_type; ++ ++/*80*/ __le64 apfs_omap_oid; ++ __le64 apfs_root_tree_oid; ++ __le64 apfs_extentref_tree_oid; ++ __le64 apfs_snap_meta_tree_oid; ++ ++/*A0*/ __le64 apfs_revert_to_xid; ++ __le64 apfs_revert_to_sblock_oid; ++ ++/*B0*/ __le64 apfs_next_obj_id; ++ ++/*B8*/ __le64 apfs_num_files; ++ __le64 apfs_num_directories; ++ __le64 apfs_num_symlinks; ++ __le64 apfs_num_other_fsobjects; ++ __le64 apfs_num_snapshots; ++ ++/*E0*/ __le64 apfs_total_blocks_alloced; ++ __le64 apfs_total_blocks_freed; ++ ++/*F0*/ char apfs_vol_uuid[UUID_SIZE]; ++/*100*/ __le64 apfs_last_mod_time; ++ ++ __le64 apfs_fs_flags; ++ ++/*110*/ struct apfs_modified_by apfs_formatted_by; ++/*140*/ struct apfs_modified_by apfs_modified_by[APFS_MAX_HIST]; ++ ++/*2C0*/ u8 apfs_volname[APFS_VOLNAME_LEN]; ++/*3C0*/ __le32 apfs_next_doc_id; ++ ++ __le16 apfs_role; ++ __le16 reserved; ++ ++/*3C8*/ __le64 apfs_root_to_xid; ++ __le64 apfs_er_state_oid; ++ ++ __le64 apfs_cloneinfo_id_epoch; ++ __le64 apfs_cloneinfo_xid; ++ ++ __le64 apfs_snap_meta_ext_oid; ++ ++/*3F0*/ char apfs_volume_group_id[UUID_SIZE]; ++ ++/*400*/ __le64 apfs_integrity_meta_oid; ++ ++ __le64 apfs_fext_tree_oid; ++/*410*/ __le32 apfs_fext_tree_type; ++ ++ __le32 reserved_type; ++ __le64 reserved_oid; ++ ++/*420*/ __le64 apfs_doc_id_index_xid; ++ __le32 apfs_doc_id_index_flags; ++ __le32 apfs_doc_id_tree_type; ++/*430*/ __le64 apfs_doc_id_tree_oid; /* Made-up name */ ++ __le64 apfs_prev_doc_id_tree_oid; ++ __le64 apfs_doc_id_fixup_cursor; ++ __le64 apfs_sec_root_tree_oid; ++/*450*/ __le32 apfs_sec_root_tree_type; ++} __packed; ++ ++/* Extended attributes constants */ ++#define APFS_XATTR_MAX_EMBEDDED_SIZE 3804 ++ ++/* Extended attributes names */ ++#define APFS_XATTR_NAME_SYMLINK "com.apple.fs.symlink" ++#define APFS_XATTR_NAME_COMPRESSED "com.apple.decmpfs" ++#define APFS_XATTR_NAME_RSRC_FORK "com.apple.ResourceFork" ++#define APFS_XATTR_NAME_SECURITY "com.apple.system.Security" ++#define APFS_XATTR_NAME_FINDER_INFO "com.apple.FinderInfo" ++ ++/* Extended attributes flags */ ++enum { ++ APFS_XATTR_DATA_STREAM = 0x00000001, ++ APFS_XATTR_DATA_EMBEDDED = 0x00000002, ++ APFS_XATTR_FILE_SYSTEM_OWNED = 0x00000004, ++ APFS_XATTR_RESERVED_8 = 0x00000008, ++ APFS_XATTR_UNKNOWN_10 = 0x00000010, ++}; ++ ++#define APFS_XATTR_VALID_FLAGS 0x0000001f ++ ++/* ++ * Structure of the value of an extended attributes record ++ */ ++struct apfs_xattr_val { ++ __le16 flags; ++ __le16 xdata_len; ++ u8 xdata[]; ++} __packed; ++ ++/* ++ * Structure used to store the data of an extended attributes record ++ */ ++struct apfs_xattr_dstream { ++ __le64 xattr_obj_id; ++ struct apfs_dstream dstream; ++} __packed; ++ ++/* ++ * Integrity metadata for a sealed volume ++ */ ++struct apfs_integrity_meta_phys { ++ struct apfs_obj_phys im_o; ++ ++ __le32 im_version; ++ __le32 im_flags; ++ __le32 im_hash_type; ++ __le32 im_root_hash_offset; ++ __le64 im_broken_xid; ++ __le64 im_reserved[9]; ++} __packed; ++ ++/* ++ * Version numbers for the integrity metadata structure ++ */ ++enum { ++ APFS_INTEGRITY_META_VERSION_INVALID = 0, ++ APFS_INTEGRITY_META_VERSION_1 = 1, ++ APFS_INTEGRITY_META_VERSION_2 = 2, ++ APFS_INTEGRITY_META_VERSION_HIGHEST = APFS_INTEGRITY_META_VERSION_2 ++}; ++ ++/* Flags used by integrity metadata */ ++#define APFS_SEAL_BROKEN (1U << 0) ++ ++/* ++ * Constants used to identify hash algorithms ++ */ ++enum { ++ APFS_HASH_INVALID = 0, ++ APFS_HASH_SHA256 = 0x1, ++ APFS_HASH_SHA512_256 = 0x2, ++ APFS_HASH_SHA384 = 0x3, ++ APFS_HASH_SHA512 = 0x4, ++ ++ APFS_HASH_MIN = APFS_HASH_SHA256, ++ APFS_HASH_MAX = APFS_HASH_SHA512, ++ APFS_HASH_DEFAULT = APFS_HASH_SHA256, ++}; ++ ++#define APFS_HASH_CCSHA256_SIZE 32 ++#define APFS_HASH_CCSHA512_256_SIZE 32 ++#define APFS_HASH_CCSHA384_SIZE 48 ++#define APFS_HASH_CCSHA512_SIZE 64 ++ ++#define APFS_HASH_MAX_SIZE 64 ++ ++/* ++ * Structure of a key in a fext tree ++ */ ++struct apfs_fext_tree_key { ++ __le64 private_id; ++ __le64 logical_addr; ++} __packed; ++ ++/* ++ * Structure of a value in a fext tree ++ */ ++struct apfs_fext_tree_val { ++ __le64 len_and_flags; ++ __le64 phys_block_num; ++} __packed; ++ ++/* ++ * Structure of the key for a file info record ++ */ ++struct apfs_file_info_key { ++ struct apfs_key_header hdr; ++ __le64 info_and_lba; ++} __packed; ++ ++#define APFS_FILE_INFO_LBA_MASK 0x00ffffffffffffffULL ++#define APFS_FILE_INFO_TYPE_MASK 0xff00000000000000ULL ++#define APFS_FILE_INFO_TYPE_SHIFT 56 ++ ++/* ++ * A hash of file data ++ */ ++struct apfs_file_data_hash_val { ++ __le16 hashed_len; ++ u8 hash_size; ++ u8 hash[]; ++} __packed; ++ ++#define APFS_FILE_INFO_DATA_HASH 1 ++ ++/* ++ * Structure of the value for a file info record ++ */ ++struct apfs_file_info_val { ++ union { ++ struct apfs_file_data_hash_val dhash; ++ }; ++} __packed; ++ ++#define APFS_BTREE_NODE_HASH_SIZE_MAX 64 ++ ++/* ++ * Structure of the value of an index record for a hashed catalog tree ++ */ ++struct apfs_btn_index_node_val { ++ __le64 binv_child_oid; ++ /* ++ * The reference seems to be wrong about the hash size, at least for ++ * SHA-256. TODO: check what happens with other hash functions. ++ */ ++ u8 binv_child_hash[APFS_HASH_CCSHA256_SIZE]; ++} __packed; ++ ++/* ++ * Compressed file header ++ */ ++struct apfs_compress_hdr { ++ __le32 signature; ++ __le32 algo; ++ __le64 size; ++} __packed; ++ ++#define APFS_COMPRESS_ZLIB_ATTR 3 ++#define APFS_COMPRESS_ZLIB_RSRC 4 ++#define APFS_COMPRESS_LZVN_ATTR 7 ++#define APFS_COMPRESS_LZVN_RSRC 8 ++#define APFS_COMPRESS_PLAIN_ATTR 9 ++#define APFS_COMPRESS_PLAIN_RSRC 10 ++#define APFS_COMPRESS_LZFSE_ATTR 11 ++#define APFS_COMPRESS_LZFSE_RSRC 12 ++#define APFS_COMPRESS_LZBITMAP_ATTR 13 ++#define APFS_COMPRESS_LZBITMAP_RSRC 14 ++ ++struct apfs_compress_rsrc_hdr { ++ __be32 data_offs; ++ __be32 mgmt_offs; ++ __be32 data_size; ++ __be32 mgmt_size; ++} __packed; ++ ++#define APFS_COMPRESS_BLOCK 0x10000 ++ ++struct apfs_compress_rsrc_data { ++ __le32 unknown; ++ __le32 num; ++ struct apfs_compress_rsrc_block { ++ __le32 offs; ++ __le32 size; ++ } __packed block[]; ++} __packed; ++ ++/* ++ * TODO: this is a placeholder, create some snapshots with the official ++ * implementation to find the actual limit. ++ */ ++#define APFS_SNAP_MAX_NAMELEN 255 ++ ++/* ++ * Structure of the value of a snapshot metadata record ++ */ ++struct apfs_snap_metadata_val { ++ __le64 extentref_tree_oid; ++ __le64 sblock_oid; ++ __le64 create_time; ++ __le64 change_time; ++ __le64 inum; ++ __le32 extentref_tree_type; ++ __le32 flags; ++ __le16 name_len; ++ u8 name[]; ++} __packed; ++ ++/* ++ * Structure of the value of a snapshot name record ++ */ ++struct apfs_snap_name_val { ++ __le64 snap_xid; ++} __packed; ++ ++/* ++ * Structure of the extended snapshot metadata ++ */ ++struct apfs_snap_meta_ext { ++ struct apfs_obj_phys sme_o; ++ ++ __le32 sme_version; ++ __le32 sme_flags; ++ __le64 sme_snap_xid; ++ char sme_uuid[UUID_SIZE]; ++ __le64 sme_token; ++} __packed; ++ ++#define APFS_OBJECT_TYPE_KEYBAG 0x6b657973 /* Spells 'syek' */ ++ ++#define APFS_VOL_KEYBAG_ENTRY_MAX_SIZE 512 ++#define APFS_FV_PERSONAL_RECOVERY_KEY_UUID "EBC6C064-0000-11AA-AA11-00306543ECAC" ++ ++/* Keybag entry types */ ++enum { ++ KB_TAG_UNKNOWN = 0, ++ KB_TAG_RESERVED_1 = 1, ++ ++ KB_TAG_VOLUME_KEY = 2, ++ KB_TAG_VOLUME_UNLOCK_RECORDS = 3, ++ KB_TAG_VOLUME_PASSPHRASE_HINT = 4, ++ ++ KB_TAG_WRAPPING_M_KEY = 5, ++ KB_TAG_VOLUME_M_KEY = 6, ++ ++ KB_TAG_RESERVED_F8 = 0xF8 ++}; ++ ++/* ++ * Structure of a single entry in the keybag ++ */ ++struct apfs_keybag_entry { ++ char ke_uuid[UUID_SIZE]; ++ __le16 ke_tag; ++ __le16 ke_keylen; ++ __le32 padding; ++ u8 ke_keydata[]; ++} __packed; ++ ++#define APFS_KEYBAG_VERSION 2 ++ ++/* ++ * Structure of the locker in the keybag ++ */ ++struct apfs_kb_locker { ++ __le16 kl_version; ++ __le16 kl_nkeys; ++ __le32 kl_nbytes; ++ __le64 padding; ++ struct apfs_keybag_entry kl_entries[]; ++} __packed; ++ ++#endif /* _APFS_RAW_H */ +diff --git a/fs/apfs/btree.c b/fs/apfs/btree.c +new file mode 100644 +index 000000000..c5af1100a +--- /dev/null ++++ b/fs/apfs/btree.c +@@ -0,0 +1,1174 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (C) 2018 Ernesto A. Fernández ++ */ ++ ++#include ++#include ++#include "apfs.h" ++ ++struct apfs_node *apfs_query_root(const struct apfs_query *query) ++{ ++ while (query->parent) ++ query = query->parent; ++ ASSERT(apfs_node_is_root(query->node)); ++ return query->node; ++} ++ ++static u64 apfs_catalog_base_oid(struct apfs_query *query) ++{ ++ struct apfs_query *root_query = NULL; ++ ++ root_query = query; ++ while (root_query->parent) ++ root_query = root_query->parent; ++ ++ return root_query->node->object.oid; ++} ++ ++/** ++ * apfs_child_from_query - Read the child id found by a successful nonleaf query ++ * @query: the query that found the record ++ * @child: Return parameter. The child id found. ++ * ++ * Reads the child id in the nonleaf node record into @child and performs a ++ * basic sanity check as a protection against crafted filesystems. Returns 0 ++ * on success or -EFSCORRUPTED otherwise. ++ */ ++static int apfs_child_from_query(struct apfs_query *query, u64 *child) ++{ ++ struct super_block *sb = query->node->object.sb; ++ char *raw = query->node->object.data; ++ ++ if (query->flags & APFS_QUERY_CAT && apfs_is_sealed(sb)) { ++ struct apfs_btn_index_node_val *index_val = NULL; ++ ++ if (query->len != sizeof(*index_val)) { ++ apfs_err(sb, "bad sealed index value length (%d)", query->len); ++ return -EFSCORRUPTED; ++ } ++ index_val = (struct apfs_btn_index_node_val *)(raw + query->off); ++ *child = le64_to_cpu(index_val->binv_child_oid) + apfs_catalog_base_oid(query); ++ } else { ++ if (query->len != 8) { /* The data on a nonleaf node is the child id */ ++ apfs_err(sb, "bad index value length (%d)", query->len); ++ return -EFSCORRUPTED; ++ } ++ *child = le64_to_cpup((__le64 *)(raw + query->off)); ++ } ++ return 0; ++} ++ ++/** ++ * apfs_omap_cache_lookup - Look for an oid in an omap's cache ++ * @omap: the object map ++ * @oid: object id to look up ++ * @bno: on return, the block number for the oid ++ * ++ * Returns 0 on success, or -1 if this mapping is not cached. ++ */ ++static int apfs_omap_cache_lookup(struct apfs_omap *omap, u64 oid, u64 *bno) ++{ ++ struct apfs_omap_cache *cache = &omap->omap_cache; ++ struct apfs_omap_rec *record = NULL; ++ int slot; ++ int ret = -1; ++ ++ if (cache->disabled) ++ return -1; ++ ++ /* Uninitialized cache records use OID 0, so check this just in case */ ++ if (!oid) ++ return -1; ++ ++ slot = oid & APFS_OMAP_CACHE_SLOT_MASK; ++ record = &cache->recs[slot]; ++ ++ spin_lock(&cache->lock); ++ if (record->oid == oid) { ++ *bno = record->bno; ++ ret = 0; ++ } ++ spin_unlock(&cache->lock); ++ ++ return ret; ++} ++ ++/** ++ * apfs_omap_cache_save - Save a record in an omap's cache ++ * @omap: the object map ++ * @oid: object id of the record ++ * @bno: block number for the oid ++ */ ++static void apfs_omap_cache_save(struct apfs_omap *omap, u64 oid, u64 bno) ++{ ++ struct apfs_omap_cache *cache = &omap->omap_cache; ++ struct apfs_omap_rec *record = NULL; ++ int slot; ++ ++ if (cache->disabled) ++ return; ++ ++ slot = oid & APFS_OMAP_CACHE_SLOT_MASK; ++ record = &cache->recs[slot]; ++ ++ spin_lock(&cache->lock); ++ record->oid = oid; ++ record->bno = bno; ++ spin_unlock(&cache->lock); ++} ++ ++/** ++ * apfs_omap_cache_delete - Try to delete a record from an omap's cache ++ * @omap: the object map ++ * @oid: object id of the record ++ */ ++static void apfs_omap_cache_delete(struct apfs_omap *omap, u64 oid) ++{ ++ struct apfs_omap_cache *cache = &omap->omap_cache; ++ struct apfs_omap_rec *record = NULL; ++ int slot; ++ ++ if (cache->disabled) ++ return; ++ ++ slot = oid & APFS_OMAP_CACHE_SLOT_MASK; ++ record = &cache->recs[slot]; ++ ++ spin_lock(&cache->lock); ++ if (record->oid == oid) { ++ record->oid = 0; ++ record->bno = 0; ++ } ++ spin_unlock(&cache->lock); ++} ++ ++/** ++ * apfs_mounted_xid - Returns the mounted xid for this superblock ++ * @sb: superblock structure ++ * ++ * This function is needed instead of APFS_NXI(@sb)->nx_xid in situations where ++ * we might be working with a snapshot. Snapshots are read-only and should ++ * mostly ignore xids, so this only appears to matter for omap lookups. ++ */ ++static inline u64 apfs_mounted_xid(struct super_block *sb) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ ++ return sbi->s_snap_xid ? sbi->s_snap_xid : nxi->nx_xid; ++} ++ ++/** ++ * apfs_xid_in_snapshot - Check if an xid is part of a snapshot ++ * @omap: the object map ++ * @xid: the xid to check ++ */ ++static inline bool apfs_xid_in_snapshot(struct apfs_omap *omap, u64 xid) ++{ ++ return xid <= omap->omap_latest_snap; ++} ++ ++/** ++ * apfs_omap_lookup_block_with_xid - Find bno of a virtual object from oid/xid ++ * @sb: filesystem superblock ++ * @omap: object map to be searched ++ * @id: id of the node ++ * @xid: transaction id ++ * @block: on return, the found block number ++ * @write: get write access to the object? ++ * ++ * Searches @omap for the most recent matching object with a transaction id ++ * below @xid. Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_omap_lookup_block_with_xid(struct super_block *sb, struct apfs_omap *omap, u64 id, u64 xid, u64 *block, bool write) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_query *query; ++ struct apfs_omap_map map = {0}; ++ int ret = 0; ++ ++ if (!write) { ++ if (!apfs_omap_cache_lookup(omap, id, block)) ++ return 0; ++ } ++ ++ query = apfs_alloc_query(omap->omap_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_omap_key(id, xid, &query->key); ++ query->flags |= APFS_QUERY_OMAP; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret) { ++ if (ret != -ENODATA) ++ apfs_err(sb, "query failed for oid 0x%llx, xid 0x%llx", id, xid); ++ goto fail; ++ } ++ ++ ret = apfs_omap_map_from_query(query, &map); ++ if (ret) { ++ apfs_alert(sb, "bad object map leaf block: 0x%llx", ++ query->node->object.block_nr); ++ goto fail; ++ } ++ *block = map.bno; ++ ++ if (write) { ++ struct apfs_omap_key key; ++ struct apfs_omap_val val; ++ struct buffer_head *new_bh; ++ bool preserve; ++ ++ preserve = apfs_xid_in_snapshot(omap, map.xid); ++ ++ new_bh = apfs_read_object_block(sb, *block, write, preserve); ++ if (IS_ERR(new_bh)) { ++ apfs_err(sb, "CoW failed for oid 0x%llx, xid 0x%llx", id, xid); ++ ret = PTR_ERR(new_bh); ++ goto fail; ++ } ++ ++ key.ok_oid = cpu_to_le64(id); ++ key.ok_xid = cpu_to_le64(nxi->nx_xid); ++ val.ov_flags = cpu_to_le32(map.flags); ++ val.ov_size = cpu_to_le32(sb->s_blocksize); ++ val.ov_paddr = cpu_to_le64(new_bh->b_blocknr); ++ ++ if (preserve) ++ ret = apfs_btree_insert(query, &key, sizeof(key), &val, sizeof(val)); ++ else ++ ret = apfs_btree_replace(query, &key, sizeof(key), &val, sizeof(val)); ++ if (ret) ++ apfs_err(sb, "CoW omap update failed (oid 0x%llx, xid 0x%llx)", id, xid); ++ ++ *block = new_bh->b_blocknr; ++ brelse(new_bh); ++ } ++ ++ apfs_omap_cache_save(omap, id, *block); ++ ++fail: ++ apfs_free_query(query); ++ return ret; ++} ++ ++/** ++ * apfs_omap_lookup_block - Find the block number of a b-tree node from its id ++ * @sb: filesystem superblock ++ * @omap: object map to be searched ++ * @id: id of the node ++ * @block: on return, the found block number ++ * @write: get write access to the object? ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++int apfs_omap_lookup_block(struct super_block *sb, struct apfs_omap *omap, u64 id, u64 *block, bool write) ++{ ++ return apfs_omap_lookup_block_with_xid(sb, omap, id, apfs_mounted_xid(sb), block, write); ++} ++ ++/** ++ * apfs_omap_lookup_newest_block - Find newest bno for a virtual object's oid ++ * @sb: filesystem superblock ++ * @omap: object map to be searched ++ * @id: id of the object ++ * @block: on return, the found block number ++ * @write: get write access to the object? ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++int apfs_omap_lookup_newest_block(struct super_block *sb, struct apfs_omap *omap, u64 id, u64 *block, bool write) ++{ ++ return apfs_omap_lookup_block_with_xid(sb, omap, id, -1, block, write); ++} ++ ++/** ++ * apfs_create_omap_rec - Create a record in the volume's omap tree ++ * @sb: filesystem superblock ++ * @oid: object id ++ * @bno: block number ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++int apfs_create_omap_rec(struct super_block *sb, u64 oid, u64 bno) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_omap *omap = sbi->s_omap; ++ struct apfs_query *query; ++ struct apfs_omap_key raw_key; ++ struct apfs_omap_val raw_val; ++ int ret; ++ ++ query = apfs_alloc_query(omap->omap_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_omap_key(oid, nxi->nx_xid, &query->key); ++ query->flags |= APFS_QUERY_OMAP; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret && ret != -ENODATA) { ++ apfs_err(sb, "query failed for oid 0x%llx, bno 0x%llx", oid, bno); ++ goto fail; ++ } ++ ++ raw_key.ok_oid = cpu_to_le64(oid); ++ raw_key.ok_xid = cpu_to_le64(nxi->nx_xid); ++ raw_val.ov_flags = 0; ++ raw_val.ov_size = cpu_to_le32(sb->s_blocksize); ++ raw_val.ov_paddr = cpu_to_le64(bno); ++ ++ ret = apfs_btree_insert(query, &raw_key, sizeof(raw_key), ++ &raw_val, sizeof(raw_val)); ++ if (ret) { ++ apfs_err(sb, "insertion failed for oid 0x%llx, bno 0x%llx", oid, bno); ++ goto fail; ++ } ++ ++ apfs_omap_cache_save(omap, oid, bno); ++ ++fail: ++ apfs_free_query(query); ++ return ret; ++} ++ ++/** ++ * apfs_delete_omap_rec - Delete an existing record from the volume's omap tree ++ * @sb: filesystem superblock ++ * @oid: object id for the record ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++int apfs_delete_omap_rec(struct super_block *sb, u64 oid) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_omap *omap = sbi->s_omap; ++ struct apfs_query *query; ++ int ret; ++ ++ query = apfs_alloc_query(omap->omap_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_omap_key(oid, nxi->nx_xid, &query->key); ++ query->flags |= APFS_QUERY_OMAP; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret == -ENODATA) { ++ apfs_err(sb, "nonexistent record (oid 0x%llx)", oid); ++ ret = -EFSCORRUPTED; ++ goto fail; ++ } ++ if (ret) { ++ apfs_err(sb, "query failed (oid 0x%llx)", oid); ++ goto fail; ++ } ++ ret = apfs_btree_remove(query); ++ if (ret) { ++ apfs_err(sb, "removal failed (oid 0x%llx)", oid); ++ goto fail; ++ } ++ apfs_omap_cache_delete(omap, oid); ++ ++fail: ++ apfs_free_query(query); ++ return ret; ++} ++ ++/** ++ * apfs_alloc_query - Allocates a query structure ++ * @node: node to be searched ++ * @parent: query for the parent node ++ * ++ * Callers other than apfs_btree_query() should set @parent to NULL, and @node ++ * to the root of the b-tree. They should also initialize most of the query ++ * fields themselves; when @parent is not NULL the query will inherit them. ++ * ++ * Returns the allocated query, or NULL in case of failure. ++ */ ++struct apfs_query *apfs_alloc_query(struct apfs_node *node, ++ struct apfs_query *parent) ++{ ++ struct apfs_query *query; ++ ++ query = kzalloc(sizeof(*query), GFP_KERNEL); ++ if (!query) ++ return NULL; ++ ++ /* To be released by free_query. */ ++ query->node = node; ++ ++ if (parent) { ++ query->key = parent->key; ++ query->flags = parent->flags & ~(APFS_QUERY_DONE | APFS_QUERY_NEXT); ++ query->parent = parent; ++ query->depth = parent->depth + 1; ++ } ++ ++ /* ++ * We start the search with the last record and go backwards, but ++ * some queries later use the PREV flag later to list them in order. ++ */ ++ if (query->flags & APFS_QUERY_PREV) ++ query->index = -1; ++ else ++ query->index = node->records; ++ ++ return query; ++} ++ ++/** ++ * apfs_free_query - Free a query structure ++ * @query: query to free ++ * ++ * Also frees the ancestor queries, if they are kept. ++ */ ++void apfs_free_query(struct apfs_query *query) ++{ ++ while (query) { ++ struct apfs_query *parent = query->parent; ++ ++ /* The caller decides whether to free the root node */ ++ if (query->depth != 0) ++ apfs_node_free(query->node); ++ kfree(query); ++ query = parent; ++ } ++} ++ ++/** ++ * apfs_query_set_before_first - Set the query to point before the first record ++ * @sb: superblock structure ++ * @query: the query to set ++ * ++ * Queries set in this way are used to insert a record before the first one. ++ * Only the leaf gets set to the -1 entry; queries for other levels must be set ++ * to 0, since the first entry in each index node will need to be modified. ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++static int apfs_query_set_before_first(struct super_block *sb, struct apfs_query **query) ++{ ++ struct apfs_node *node = NULL; ++ struct apfs_query *parent = NULL; ++ u64 child_id; ++ u32 storage = apfs_query_storage(*query); ++ int err; ++ ++ while ((*query)->depth < 12) { ++ if (apfs_node_is_leaf((*query)->node)) { ++ (*query)->index = -1; ++ return 0; ++ } ++ apfs_node_query_first(*query); ++ ++ err = apfs_child_from_query(*query, &child_id); ++ if (err) { ++ apfs_alert(sb, "bad index block: 0x%llx", ++ (*query)->node->object.block_nr); ++ return err; ++ } ++ ++ /* Now go a level deeper */ ++ node = apfs_read_node(sb, child_id, storage, false /* write */); ++ if (IS_ERR(node)) { ++ apfs_err(sb, "failed to read child 0x%llx of node 0x%llx", child_id, (*query)->node->object.oid); ++ return PTR_ERR(node); ++ } ++ ++ parent = *query; ++ *query = apfs_alloc_query(node, parent); ++ if (!*query) { ++ apfs_node_free(node); ++ *query = parent; ++ return -ENOMEM; ++ } ++ node = NULL; ++ } ++ ++ apfs_err(sb, "btree is too high"); ++ return -EFSCORRUPTED; ++} ++ ++/** ++ * apfs_btree_query - Execute a query on a b-tree ++ * @sb: filesystem superblock ++ * @query: the query to execute ++ * ++ * Searches the b-tree starting at @query->index in @query->node, looking for ++ * the record corresponding to @query->key. ++ * ++ * Returns 0 in case of success and sets the @query->len, @query->off and ++ * @query->index fields to the results of the query. @query->node will now ++ * point to the leaf node holding the record. ++ * ++ * In case of failure returns an appropriate error code. ++ */ ++int apfs_btree_query(struct super_block *sb, struct apfs_query **query) ++{ ++ struct apfs_node *node; ++ struct apfs_query *parent; ++ u64 child_id; ++ u32 storage = apfs_query_storage(*query); ++ int err; ++ ++next_node: ++ if ((*query)->depth >= 12) { ++ /* ++ * We need a maximum depth for the tree so we can't loop ++ * forever if the filesystem is damaged. 12 should be more ++ * than enough to map every block. ++ */ ++ apfs_err(sb, "btree is too high"); ++ err = -EFSCORRUPTED; ++ goto fail; ++ } ++ ++ err = apfs_node_query(sb, *query); ++ if (err == -ENODATA && !(*query)->parent && (*query)->index == -1) { ++ /* ++ * We may be trying to insert a record before all others: don't ++ * let the query give up at the root node. ++ */ ++ err = apfs_query_set_before_first(sb, query); ++ if (err) { ++ apfs_err(sb, "failed to set before the first record"); ++ goto fail; ++ } ++ err = -ENODATA; ++ goto fail; ++ } else if (err == -EAGAIN) { ++ if (!(*query)->parent) { ++ /* We are at the root of the tree */ ++ err = -ENODATA; ++ goto fail; ++ } ++ ++ /* Move back up one level and continue the query */ ++ parent = (*query)->parent; ++ (*query)->parent = NULL; /* Don't free the parent */ ++ apfs_free_query(*query); ++ *query = parent; ++ goto next_node; ++ } else if (err) { ++ goto fail; ++ } ++ if (apfs_node_is_leaf((*query)->node)) /* All done */ ++ return 0; ++ ++ err = apfs_child_from_query(*query, &child_id); ++ if (err) { ++ apfs_alert(sb, "bad index block: 0x%llx", ++ (*query)->node->object.block_nr); ++ goto fail; ++ } ++ ++ /* Now go a level deeper and search the child */ ++ node = apfs_read_node(sb, child_id, storage, false /* write */); ++ if (IS_ERR(node)) { ++ apfs_err(sb, "failed to read node 0x%llx", child_id); ++ err = PTR_ERR(node); ++ goto fail; ++ } ++ ++ if (node->object.oid != child_id) ++ apfs_debug(sb, "corrupt b-tree"); ++ ++ /* ++ * Remember the parent node and index in case the search needs ++ * to be continued later. ++ */ ++ parent = *query; ++ *query = apfs_alloc_query(node, parent); ++ if (!*query) { ++ apfs_node_free(node); ++ *query = parent; ++ err = -ENOMEM; ++ goto fail; ++ } ++ node = NULL; ++ goto next_node; ++ ++fail: ++ /* Don't leave stale record info here or some callers will use it */ ++ (*query)->key_len = (*query)->len = 0; ++ return err; ++} ++ ++static int __apfs_btree_replace(struct apfs_query *query, void *key, int key_len, void *val, int val_len); ++ ++/** ++ * apfs_query_join_transaction - Add the found node to the current transaction ++ * @query: query that found the node ++ */ ++int apfs_query_join_transaction(struct apfs_query *query) ++{ ++ struct apfs_node *node = query->node; ++ struct super_block *sb = node->object.sb; ++ u64 oid = node->object.oid; ++ u32 storage = apfs_query_storage(query); ++ struct apfs_obj_phys *raw = NULL; ++ ++ /* ++ * Ephemeral objects are checkpoint data, and all of their xids get ++ * updated on commit. There is no real need to do it here as well, but ++ * it's better for consistency with the other object types. ++ */ ++ if (storage == APFS_OBJ_EPHEMERAL) { ++ ASSERT(node->object.ephemeral); ++ raw = (void *)node->object.data; ++ raw->o_xid = cpu_to_le64(APFS_NXI(sb)->nx_xid); ++ return 0; ++ } ++ ++ if (buffer_trans(node->object.o_bh)) /* Already in the transaction */ ++ return 0; ++ /* Root nodes should join the transaction before the query is created */ ++ ASSERT(!apfs_node_is_root(node)); ++ ++ node = apfs_read_node(sb, oid, storage, true /* write */); ++ if (IS_ERR(node)) { ++ apfs_err(sb, "Cow failed for node 0x%llx", oid); ++ return PTR_ERR(node); ++ } ++ apfs_node_free(query->node); ++ query->node = node; ++ ++ if (storage == APFS_OBJ_PHYSICAL && query->parent) { ++ __le64 bno = cpu_to_le64(node->object.block_nr); ++ ++ /* The parent node needs to report the new location */ ++ return __apfs_btree_replace(query->parent, NULL /* key */, 0 /* key_len */, &bno, sizeof(bno)); ++ } ++ return 0; ++} ++ ++/** ++ * apfs_btree_change_rec_count - Update the b-tree info before a record change ++ * @query: query used to insert/remove/replace the leaf record ++ * @change: change in the record count ++ * @key_len: length of the new leaf record key (0 if removed or unchanged) ++ * @val_len: length of the new leaf record value (0 if removed or unchanged) ++ * ++ * Don't call this function if @query->parent was reset to NULL, or if the same ++ * is true of any of its ancestor queries. ++ */ ++static void apfs_btree_change_rec_count(struct apfs_query *query, int change, ++ int key_len, int val_len) ++{ ++ struct super_block *sb; ++ struct apfs_node *root; ++ struct apfs_btree_node_phys *root_raw; ++ struct apfs_btree_info *info; ++ ++ if (change == -1) ++ ASSERT(!key_len && !val_len); ++ ASSERT(apfs_node_is_leaf(query->node)); ++ ++ root = apfs_query_root(query); ++ ASSERT(apfs_node_is_root(root)); ++ ++ sb = root->object.sb; ++ root_raw = (void *)root->object.data; ++ info = (void *)root_raw + sb->s_blocksize - sizeof(*info); ++ ++ apfs_assert_in_transaction(sb, &root_raw->btn_o); ++ if (key_len > le32_to_cpu(info->bt_longest_key)) ++ info->bt_longest_key = cpu_to_le32(key_len); ++ if (val_len > le32_to_cpu(info->bt_longest_val)) ++ info->bt_longest_val = cpu_to_le32(val_len); ++ le64_add_cpu(&info->bt_key_count, change); ++} ++ ++/** ++ * apfs_btree_change_node_count - Change the node count for a b-tree ++ * @query: query used to remove/create the node ++ * @change: change in the node count ++ * ++ * Also changes the node count in the volume superblock. Don't call this ++ * function if @query->parent was reset to NULL, or if the same is true of ++ * any of its ancestor queries. ++ */ ++void apfs_btree_change_node_count(struct apfs_query *query, int change) ++{ ++ struct super_block *sb; ++ struct apfs_node *root; ++ struct apfs_btree_node_phys *root_raw; ++ struct apfs_btree_info *info; ++ ++ root = apfs_query_root(query); ++ ASSERT(apfs_node_is_root(root)); ++ ++ sb = root->object.sb; ++ root_raw = (void *)root->object.data; ++ info = (void *)root_raw + sb->s_blocksize - sizeof(*info); ++ ++ apfs_assert_in_transaction(sb, &root_raw->btn_o); ++ le64_add_cpu(&info->bt_node_count, change); ++} ++ ++/** ++ * apfs_query_refresh - Recreate a catalog query invalidated by node splits ++ * @old_query: query chain to refresh ++ * @root: root node of the query chain ++ * @nodata: is the query expected to find nothing? ++ * ++ * On success, @old_query is left pointing to the same leaf record, but with ++ * valid ancestor queries as well. Returns a negative error code in case of ++ * failure, or 0 on success. ++ */ ++static int apfs_query_refresh(struct apfs_query *old_query, struct apfs_node *root, bool nodata) ++{ ++ struct super_block *sb = NULL; ++ struct apfs_query *new_query = NULL; ++ int err = 0; ++ ++ sb = root->object.sb; ++ ++ if (!apfs_node_is_leaf(old_query->node)) { ++ apfs_alert(sb, "attempting refresh of non-leaf query"); ++ return -EFSCORRUPTED; ++ } ++ if (apfs_node_is_root(old_query->node)) { ++ apfs_alert(sb, "attempting refresh of root query"); ++ return -EFSCORRUPTED; ++ } ++ ++ new_query = apfs_alloc_query(root, NULL /* parent */); ++ if (!new_query) ++ return -ENOMEM; ++ new_query->key = old_query->key; ++ new_query->flags = old_query->flags & ~(APFS_QUERY_DONE | APFS_QUERY_NEXT); ++ ++ err = apfs_btree_query(sb, &new_query); ++ if (!nodata && err == -ENODATA) { ++ apfs_err(sb, "record should exist"); ++ err = -EFSCORRUPTED; ++ goto fail; ++ } ++ if (err && err != -ENODATA) { ++ apfs_err(sb, "failed to rerun"); ++ goto fail; ++ } ++ err = 0; ++ ++ /* Replace the parent of the original query with the new valid one */ ++ apfs_free_query(old_query->parent); ++ old_query->parent = new_query->parent; ++ new_query->parent = NULL; ++ ++ /* ++ * The records may have moved around so update this too. TODO: rework ++ * the query struct so this stuff is not needed. ++ */ ++ ASSERT(old_query->node->object.oid == new_query->node->object.oid); ++ old_query->index = new_query->index; ++ old_query->key_off = new_query->key_off; ++ old_query->key_len = new_query->key_len; ++ old_query->off = new_query->off; ++ old_query->len = new_query->len; ++ old_query->depth = new_query->depth; ++ ++fail: ++ apfs_free_query(new_query); ++ return err; ++} ++ ++/** ++ * __apfs_btree_insert - Insert a new record into a b-tree (at any level) ++ * @query: query run to search for the record ++ * @key: on-disk record key ++ * @key_len: length of @key ++ * @val: on-disk record value (NULL for ghost records) ++ * @val_len: length of @val (0 for ghost records) ++ * ++ * The new record is placed right after the one found by @query. On success, ++ * returns 0 and sets @query to the new record; returns a negative error code ++ * in case of failure, which may be -EAGAIN if a split happened and the caller ++ * must retry. ++ */ ++int __apfs_btree_insert(struct apfs_query *query, void *key, int key_len, void *val, int val_len) ++{ ++ struct apfs_node *node = query->node; ++ struct super_block *sb = node->object.sb; ++ struct apfs_btree_node_phys *node_raw; ++ int needed_room; ++ int err; ++ ++ apfs_assert_query_is_valid(query); ++ ++ err = apfs_query_join_transaction(query); ++ if (err) { ++ apfs_err(sb, "query join failed"); ++ return err; ++ } ++ ++ node = query->node; ++ node_raw = (void *)node->object.data; ++ apfs_assert_in_transaction(node->object.sb, &node_raw->btn_o); ++ ++ needed_room = key_len + val_len; ++ if (!apfs_node_has_room(node, needed_room, false /* replace */)) { ++ if (node->records == 1) { ++ /* The new record just won't fit in the node */ ++ err = apfs_create_single_rec_node(query, key, key_len, val, val_len); ++ if (err && err != -EAGAIN) ++ apfs_err(sb, "failed to create single-record node"); ++ return err; ++ } ++ err = apfs_node_split(query); ++ if (err && err != -EAGAIN) { ++ apfs_err(sb, "node split failed"); ++ return err; ++ } ++ return -EAGAIN; ++ } ++ ++ apfs_assert_query_is_valid(query); ++ ++ if (query->parent && query->index == -1) { ++ /* We are about to insert a record before all others */ ++ err = __apfs_btree_replace(query->parent, key, key_len, NULL /* val */, 0 /* val_len */); ++ if (err) { ++ if (err != -EAGAIN) ++ apfs_err(sb, "parent update failed"); ++ return err; ++ } ++ } ++ ++ apfs_assert_query_is_valid(query); ++ ++ err = apfs_node_insert(query, key, key_len, val, val_len); ++ if (err) { ++ apfs_err(sb, "node record insertion failed"); ++ return err; ++ } ++ return 0; ++} ++ ++/** ++ * apfs_btree_insert - Insert a new record into a b-tree leaf ++ * @query: query run to search for the record ++ * @key: on-disk record key ++ * @key_len: length of @key ++ * @val: on-disk record value (NULL for ghost records) ++ * @val_len: length of @val (0 for ghost records) ++ * ++ * The new record is placed right after the one found by @query. On success, ++ * returns 0 and sets @query to the new record; returns a negative error code ++ * in case of failure. ++ */ ++int apfs_btree_insert(struct apfs_query *query, void *key, int key_len, void *val, int val_len) ++{ ++ struct super_block *sb = NULL; ++ struct apfs_node *root = NULL, *leaf = NULL; ++ int err; ++ ++ root = apfs_query_root(query); ++ ASSERT(apfs_node_is_root(root)); ++ leaf = query->node; ++ ASSERT(apfs_node_is_leaf(leaf)); ++ sb = root->object.sb; ++ ++ while (true) { ++ err = __apfs_btree_insert(query, key, key_len, val, val_len); ++ if (err != -EAGAIN) { ++ if (err) ++ return err; ++ break; ++ } ++ err = apfs_query_refresh(query, root, true /* nodata */); ++ if (err) { ++ apfs_err(sb, "query refresh failed"); ++ return err; ++ } ++ } ++ ++ apfs_assert_query_is_valid(query); ++ apfs_btree_change_rec_count(query, 1 /* change */, key_len, val_len); ++ return 0; ++} ++ ++/** ++ * __apfs_btree_remove - Remove a record from a b-tree (at any level) ++ * @query: exact query that found the record ++ * ++ * Returns 0 on success, or a negative error code in case of failure, which may ++ * be -EAGAIN if a split happened and the caller must retry. ++ */ ++static int __apfs_btree_remove(struct apfs_query *query) ++{ ++ struct apfs_node *node = query->node; ++ struct super_block *sb = node->object.sb; ++ struct apfs_btree_node_phys *node_raw; ++ int later_entries = node->records - query->index - 1; ++ int err; ++ ++ apfs_assert_query_is_valid(query); ++ ++ err = apfs_query_join_transaction(query); ++ if (err) { ++ apfs_err(sb, "query join failed"); ++ return err; ++ } ++ ++ node = query->node; ++ node_raw = (void *)query->node->object.data; ++ apfs_assert_in_transaction(node->object.sb, &node_raw->btn_o); ++ ++ if (query->parent && node->records == 1) { ++ /* Just get rid of the node */ ++ err = __apfs_btree_remove(query->parent); ++ if (err == -EAGAIN) ++ return -EAGAIN; ++ if (err) { ++ apfs_err(sb, "parent index removal failed"); ++ return err; ++ } ++ apfs_btree_change_node_count(query, -1 /* change */); ++ err = apfs_delete_node(node, query->flags & APFS_QUERY_TREE_MASK); ++ if (err) { ++ apfs_err(sb, "node deletion failed"); ++ return err; ++ } ++ return 0; ++ } ++ ++ apfs_assert_query_is_valid(query); ++ ++ /* The first key in a node must match the parent record's */ ++ if (query->parent && query->index == 0) { ++ int first_key_len, first_key_off; ++ void *key; ++ ++ first_key_len = apfs_node_locate_key(node, 1, &first_key_off); ++ if (!first_key_len) ++ return -EFSCORRUPTED; ++ key = (void *)node_raw + first_key_off; ++ ++ err = __apfs_btree_replace(query->parent, key, first_key_len, NULL /* val */, 0 /* val_len */); ++ if (err == -EAGAIN) ++ return -EAGAIN; ++ if (err) { ++ apfs_err(sb, "parent update failed"); ++ return err; ++ } ++ } ++ ++ apfs_assert_query_is_valid(query); ++ ++ /* Remove the entry from the table of contents */ ++ if (apfs_node_has_fixed_kv_size(node)) { ++ struct apfs_kvoff *toc_entry; ++ ++ toc_entry = (struct apfs_kvoff *)node_raw->btn_data + ++ query->index; ++ memmove(toc_entry, toc_entry + 1, ++ later_entries * sizeof(*toc_entry)); ++ } else { ++ struct apfs_kvloc *toc_entry; ++ ++ toc_entry = (struct apfs_kvloc *)node_raw->btn_data + ++ query->index; ++ memmove(toc_entry, toc_entry + 1, ++ later_entries * sizeof(*toc_entry)); ++ } ++ ++ apfs_node_free_range(node, query->key_off, query->key_len); ++ apfs_node_free_range(node, query->off, query->len); ++ ++ --node->records; ++ if (node->records == 0) { ++ /* All descendants are gone, root is the whole tree */ ++ node_raw->btn_level = 0; ++ node->flags |= APFS_BTNODE_LEAF; ++ } ++ apfs_update_node(node); ++ ++ --query->index; ++ return 0; ++} ++ ++/** ++ * apfs_btree_remove - Remove a record from a b-tree leaf ++ * @query: exact query that found the record ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++int apfs_btree_remove(struct apfs_query *query) ++{ ++ struct super_block *sb = NULL; ++ struct apfs_node *root = NULL, *leaf = NULL; ++ int err; ++ ++ root = apfs_query_root(query); ++ ASSERT(apfs_node_is_root(root)); ++ leaf = query->node; ++ ASSERT(apfs_node_is_leaf(leaf)); ++ sb = root->object.sb; ++ ++ while (true) { ++ err = __apfs_btree_remove(query); ++ if (err != -EAGAIN) { ++ if (err) ++ return err; ++ break; ++ } ++ err = apfs_query_refresh(query, root, false /* nodata */); ++ if (err) { ++ apfs_err(sb, "query refresh failed"); ++ return err; ++ } ++ } ++ ++ apfs_assert_query_is_valid(query); ++ apfs_btree_change_rec_count(query, -1 /* change */, 0 /* key_len */, 0 /* val_len */); ++ return 0; ++} ++ ++/** ++ * __apfs_btree_replace - Replace a record in a b-tree (at any level) ++ * @query: exact query that found the record ++ * @key: new on-disk record key (NULL if unchanged) ++ * @key_len: length of @key ++ * @val: new on-disk record value (NULL if unchanged) ++ * @val_len: length of @val ++ * ++ * It's important that the order of the records is not changed by the new @key. ++ * This function is not needed to replace an old value with a new one of the ++ * same length: it can just be overwritten in place. ++ * ++ * Returns 0 on success, and @query is left pointing to the same record; returns ++ * a negative error code in case of failure, which may be -EAGAIN if a split ++ * happened and the caller must retry. ++ */ ++static int __apfs_btree_replace(struct apfs_query *query, void *key, int key_len, void *val, int val_len) ++{ ++ struct apfs_node *node = query->node; ++ struct super_block *sb = node->object.sb; ++ struct apfs_btree_node_phys *node_raw; ++ int needed_room; ++ int err; ++ ++ ASSERT(key || val); ++ apfs_assert_query_is_valid(query); ++ ++ err = apfs_query_join_transaction(query); ++ if (err) { ++ apfs_err(sb, "query join failed"); ++ return err; ++ } ++ ++ node = query->node; ++ node_raw = (void *)node->object.data; ++ apfs_assert_in_transaction(sb, &node_raw->btn_o); ++ ++ needed_room = key_len + val_len; ++ /* We can reuse the space of the replaced key/value */ ++ if (key) ++ needed_room -= query->key_len; ++ if (val) ++ needed_room -= query->len; ++ ++ if (!apfs_node_has_room(node, needed_room, true /* replace */)) { ++ if (node->records == 1) { ++ apfs_alert(sb, "no room in empty node?"); ++ return -EFSCORRUPTED; ++ } ++ err = apfs_node_split(query); ++ if (err && err != -EAGAIN) { ++ apfs_err(sb, "node split failed"); ++ return err; ++ } ++ return -EAGAIN; ++ } ++ ++ apfs_assert_query_is_valid(query); ++ ++ /* The first key in a node must match the parent record's */ ++ if (key && query->parent && query->index == 0) { ++ err = __apfs_btree_replace(query->parent, key, key_len, NULL /* val */, 0 /* val_len */); ++ if (err) { ++ if (err != -EAGAIN) ++ apfs_err(sb, "parent update failed"); ++ return err; ++ } ++ } ++ ++ apfs_assert_query_is_valid(query); ++ ++ err = apfs_node_replace(query, key, key_len, val, val_len); ++ if (err) { ++ apfs_err(sb, "node record replacement failed"); ++ return err; ++ } ++ return 0; ++} ++ ++/** ++ * apfs_btree_replace - Replace a record in a b-tree leaf ++ * @query: exact query that found the record ++ * @key: new on-disk record key (NULL if unchanged) ++ * @key_len: length of @key ++ * @val: new on-disk record value (NULL if unchanged) ++ * @val_len: length of @val ++ * ++ * It's important that the order of the records is not changed by the new @key. ++ * This function is not needed to replace an old value with a new one of the ++ * same length: it can just be overwritten in place. ++ * ++ * Returns 0 on success, and @query is left pointing to the same record; returns ++ * a negative error code in case of failure. ++ */ ++int apfs_btree_replace(struct apfs_query *query, void *key, int key_len, void *val, int val_len) ++{ ++ struct super_block *sb = NULL; ++ struct apfs_node *root = NULL, *leaf = NULL; ++ int err; ++ ++ root = apfs_query_root(query); ++ ASSERT(apfs_node_is_root(root)); ++ leaf = query->node; ++ ASSERT(apfs_node_is_leaf(leaf)); ++ sb = root->object.sb; ++ ++ while (true) { ++ err = __apfs_btree_replace(query, key, key_len, val, val_len); ++ if (err != -EAGAIN) { ++ if (err) ++ return err; ++ break; ++ } ++ err = apfs_query_refresh(query, root, false /* nodata */); ++ if (err) { ++ apfs_err(sb, "query refresh failed"); ++ return err; ++ } ++ } ++ ++ apfs_assert_query_is_valid(query); ++ apfs_btree_change_rec_count(query, 0 /* change */, key_len, val_len); ++ return 0; ++} ++ ++/** ++ * apfs_query_direct_forward - Set a query to start listing records forwards ++ * @query: a successfully executed query ++ * ++ * Multiple queries list records backwards, but queries marked with this ++ * function after execution will go in the opposite direction. ++ */ ++void apfs_query_direct_forward(struct apfs_query *query) ++{ ++ if (query->flags & APFS_QUERY_PREV) ++ return; ++ ++ apfs_assert_query_is_valid(query); ++ ASSERT(apfs_node_is_leaf(query->node)); ++ ++ while (query) { ++ query->flags |= APFS_QUERY_PREV; ++ query = query->parent; ++ } ++} +diff --git a/fs/apfs/compress.c b/fs/apfs/compress.c +new file mode 100644 +index 000000000..a36050688 +--- /dev/null ++++ b/fs/apfs/compress.c +@@ -0,0 +1,474 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (C) 2020 Corellium LLC ++ */ ++ ++#include ++#include ++#include ++ ++#include "apfs.h" ++#include "libzbitmap.h" ++#include "lzfse/lzfse.h" ++#include "lzfse/lzvn_decode_base.h" ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0) ++ ++#include ++ ++static inline void *kvmalloc(size_t size, gfp_t flags) ++{ ++ gfp_t kmalloc_flags = flags; ++ void *ret; ++ ++ if ((flags & GFP_KERNEL) != GFP_KERNEL) ++ return kmalloc(size, flags); ++ ++ if (size > PAGE_SIZE) ++ kmalloc_flags |= __GFP_NOWARN | __GFP_NORETRY; ++ ++ ret = kmalloc(size, flags); ++ if (ret || size < PAGE_SIZE) ++ return ret; ++ ++ return vmalloc(size); ++} ++ ++#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0) */ ++ ++struct apfs_compress_file_data { ++ struct apfs_compress_hdr hdr; ++ u8 *buf; ++ ssize_t bufblk; ++ size_t bufsize; ++ struct mutex mtx; ++ struct super_block *sb; ++ struct apfs_compressed_data cdata; ++}; ++ ++static inline int apfs_compress_is_rsrc(u32 algo) ++{ ++ return (algo & 1) == 0; ++} ++ ++static inline bool apfs_compress_is_supported(u32 algo) ++{ ++ switch (algo) { ++ case APFS_COMPRESS_ZLIB_RSRC: ++ case APFS_COMPRESS_ZLIB_ATTR: ++ case APFS_COMPRESS_LZVN_RSRC: ++ case APFS_COMPRESS_LZVN_ATTR: ++ case APFS_COMPRESS_PLAIN_RSRC: ++ case APFS_COMPRESS_PLAIN_ATTR: ++ case APFS_COMPRESS_LZFSE_RSRC: ++ case APFS_COMPRESS_LZFSE_ATTR: ++ case APFS_COMPRESS_LZBITMAP_RSRC: ++ case APFS_COMPRESS_LZBITMAP_ATTR: ++ return true; ++ default: ++ /* Once will usually be enough, don't flood the console */ ++ pr_err_once("APFS: unsupported compression algorithm (%u)\n", algo); ++ return false; ++ } ++} ++ ++static int apfs_compress_file_open(struct inode *inode, struct file *filp) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_compress_file_data *fd; ++ ssize_t res; ++ bool is_rsrc; ++ ++ /* ++ * The official implementation seems to transparently decompress files ++ * when you write to them. Doing that atomically inside the kernel is ++ * probably a chore, so for now I'll just leave it to the user to make ++ * an uncompressed copy themselves and replace the original. I might ++ * fix this in the future, but only if people complain (TODO). ++ */ ++ if (filp->f_mode & FMODE_WRITE) { ++ apfs_warn(sb, "writes to compressed files are not supported"); ++ apfs_warn(sb, "you can work with a copy of the file instead"); ++ return -EOPNOTSUPP; ++ } ++ ++ if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) ++ return -EOVERFLOW; ++ ++ fd = kzalloc(sizeof(*fd), GFP_KERNEL); ++ if (!fd) ++ return -ENOMEM; ++ mutex_init(&fd->mtx); ++ fd->sb = sb; ++ ++ down_read(&nxi->nx_big_sem); ++ ++ res = ____apfs_xattr_get(inode, APFS_XATTR_NAME_COMPRESSED, &fd->hdr, sizeof(fd->hdr), 0); ++ if (res != sizeof(fd->hdr)) { ++ apfs_err(sb, "decmpfs header read failed"); ++ goto fail; ++ } ++ ++ if (!apfs_compress_is_supported(le32_to_cpu(fd->hdr.algo))) { ++ res = -EOPNOTSUPP; ++ goto fail; ++ } ++ ++ fd->buf = kvmalloc(APFS_COMPRESS_BLOCK, GFP_KERNEL); ++ if (!fd->buf) { ++ res = -ENOMEM; ++ goto fail; ++ } ++ fd->bufblk = -1; ++ ++ is_rsrc = apfs_compress_is_rsrc(le32_to_cpu(fd->hdr.algo)); ++ res = apfs_xattr_get_compressed_data(inode, is_rsrc ? APFS_XATTR_NAME_RSRC_FORK : APFS_XATTR_NAME_COMPRESSED, &fd->cdata); ++ if (res) { ++ apfs_err(sb, "failed to get compressed data"); ++ goto fail; ++ } ++ ++ up_read(&nxi->nx_big_sem); ++ ++ filp->private_data = fd; ++ return 0; ++ ++fail: ++ apfs_release_compressed_data(&fd->cdata); ++ if (fd->buf) ++ kvfree(fd->buf); ++ up_read(&nxi->nx_big_sem); ++ kfree(fd); ++ if (res > 0) ++ res = -EINVAL; ++ return res; ++} ++ ++static int apfs_compress_file_read_block(struct apfs_compress_file_data *fd, loff_t block) ++{ ++ struct super_block *sb = fd->sb; ++ struct apfs_compressed_data *comp_data = &fd->cdata; ++ u8 *cdata = NULL; ++ u8 *tmp = fd->buf; ++ u32 doffs = 0, coffs; ++ size_t csize, bsize; ++ int res = 0; ++ ++ if (apfs_compress_is_rsrc(le32_to_cpu(fd->hdr.algo)) && ++ le32_to_cpu(fd->hdr.algo) != APFS_COMPRESS_LZBITMAP_RSRC && ++ le32_to_cpu(fd->hdr.algo) != APFS_COMPRESS_LZVN_RSRC && ++ le32_to_cpu(fd->hdr.algo) != APFS_COMPRESS_LZFSE_RSRC) { ++ struct apfs_compress_rsrc_hdr hdr = {0}; ++ struct apfs_compress_rsrc_data cd = {0}; ++ struct apfs_compress_rsrc_block blk = {0}; ++ u32 blk_off; ++ ++ res = apfs_compressed_data_read(comp_data, &hdr, sizeof(hdr), 0 /* offset */); ++ if (res) { ++ apfs_err(sb, "failed to read resource header"); ++ return res; ++ } ++ ++ doffs = be32_to_cpu(hdr.data_offs); ++ res = apfs_compressed_data_read(comp_data, &cd, sizeof(cd), doffs); ++ if (res) { ++ apfs_err(sb, "failed to read resource data header"); ++ return res; ++ } ++ if (block >= le32_to_cpu(cd.num)) ++ return 0; ++ ++ blk_off = doffs + sizeof(cd) + sizeof(blk) * block; ++ res = apfs_compressed_data_read(comp_data, &blk, sizeof(blk), blk_off); ++ if (res) { ++ apfs_err(sb, "failed to read resource block metadata"); ++ return res; ++ } ++ ++ bsize = le64_to_cpu(fd->hdr.size) - block * APFS_COMPRESS_BLOCK; ++ if (bsize > APFS_COMPRESS_BLOCK) ++ bsize = APFS_COMPRESS_BLOCK; ++ ++ csize = le32_to_cpu(blk.size); ++ coffs = le32_to_cpu(blk.offs) + 4; ++ } else if (apfs_compress_is_rsrc(le32_to_cpu(fd->hdr.algo))) { ++ __le32 blks[2]; ++ u32 blk_off; ++ ++ blk_off = doffs + sizeof(__le32) * block; ++ res = apfs_compressed_data_read(comp_data, blks, sizeof(blks), blk_off); ++ if (res) { ++ apfs_err(sb, "failed to read resource block metadata"); ++ return res; ++ } ++ ++ bsize = le64_to_cpu(fd->hdr.size) - block * APFS_COMPRESS_BLOCK; ++ if (bsize > APFS_COMPRESS_BLOCK) ++ bsize = APFS_COMPRESS_BLOCK; ++ ++ coffs = le32_to_cpu(blks[0]); ++ csize = le32_to_cpu(blks[1]) - coffs; ++ } else { ++ /* ++ * I think attr compression is only for single-block files, in ++ * fact none of these files ever seem to decompress to more than ++ * 2048 bytes. ++ */ ++ bsize = le64_to_cpu(fd->hdr.size); ++ if (block != 0 || bsize > APFS_COMPRESS_BLOCK) { ++ apfs_err(sb, "file too big for inline compression"); ++ return -EFSCORRUPTED; ++ } ++ ++ /* The first few bytes are the decmpfs header */ ++ coffs = sizeof(struct apfs_compress_hdr); ++ csize = comp_data->size - sizeof(struct apfs_compress_hdr); ++ } ++ ++ cdata = kvmalloc(csize, GFP_KERNEL); ++ if (!cdata) ++ return -ENOMEM; ++ res = apfs_compressed_data_read(comp_data, cdata, csize, doffs + coffs); ++ if (res) { ++ apfs_err(sb, "failed to read compressed block"); ++ goto fail; ++ } ++ ++ switch (le32_to_cpu(fd->hdr.algo)) { ++ case APFS_COMPRESS_ZLIB_RSRC: ++ case APFS_COMPRESS_ZLIB_ATTR: ++ if (cdata[0] == 0x78 && csize >= 2) { ++ res = zlib_inflate_blob(tmp, bsize, cdata + 2, csize - 2); ++ if (res <= 0) { ++ apfs_err(sb, "zlib decompression failed"); ++ goto fail; ++ } ++ bsize = res; ++ res = 0; ++ } else if ((cdata[0] & 0x0F) == 0x0F) { ++ memcpy(tmp, &cdata[1], csize - 1); ++ bsize = csize - 1; ++ } else { ++ apfs_err(sb, "zlib decompression failed"); ++ res = -EINVAL; ++ goto fail; ++ } ++ break; ++ case APFS_COMPRESS_LZVN_RSRC: ++ case APFS_COMPRESS_LZVN_ATTR: ++ if (cdata[0] == 0x06) { ++ memcpy(tmp, &cdata[1], csize - 1); ++ bsize = csize - 1; ++ } else { ++ lzvn_decoder_state dstate = {0}; ++ ++ dstate.src = cdata; ++ dstate.src_end = dstate.src + csize; ++ dstate.dst = dstate.dst_begin = tmp; ++ dstate.dst_end = dstate.dst + bsize; ++ lzvn_decode(&dstate); ++ bsize = dstate.dst - tmp; ++ } ++ break; ++ case APFS_COMPRESS_LZBITMAP_RSRC: ++ case APFS_COMPRESS_LZBITMAP_ATTR: ++ if (cdata[0] == 0x5a) { ++ res = zbm_decompress(tmp, bsize, cdata, csize, &bsize); ++ if (res < 0) { ++ apfs_err(sb, "lzbitmap decompression failed"); ++ goto fail; ++ } ++ res = 0; ++ } else if ((cdata[0] & 0x0F) == 0x0F) { ++ memcpy(tmp, &cdata[1], csize - 1); ++ bsize = csize - 1; ++ } else { ++ apfs_err(sb, "lzbitmap decompression failed"); ++ res = -EINVAL; ++ goto fail; ++ } ++ break; ++ case APFS_COMPRESS_LZFSE_RSRC: ++ case APFS_COMPRESS_LZFSE_ATTR: ++ if (cdata[0] == 0x62 && csize >= 2) { ++ res = lzfse_decode_buffer(tmp, bsize, cdata, csize, NULL); ++ if (res == 0) { ++ apfs_err(sb, "lzfse decompression failed"); ++ /* Could be ENOMEM too... */ ++ res = -EINVAL; ++ goto fail; ++ } ++ bsize = res; ++ res = 0; ++ } else { ++ /* cdata[0] == 0xff, apparently */ ++ memcpy(tmp, &cdata[1], csize - 1); ++ bsize = csize - 1; ++ } ++ break; ++ case APFS_COMPRESS_PLAIN_RSRC: ++ case APFS_COMPRESS_PLAIN_ATTR: ++ memcpy(tmp, &cdata[1], csize - 1); ++ bsize = csize - 1; ++ break; ++ default: ++ res = -EINVAL; ++ goto fail; ++ } ++ fd->bufblk = block; ++ fd->bufsize = bsize; ++fail: ++ kvfree(cdata); ++ return res; ++} ++ ++static int apfs_compress_file_release(struct inode *inode, struct file *filp) ++{ ++ struct apfs_compress_file_data *fd = filp->private_data; ++ ++ apfs_release_compressed_data(&fd->cdata); ++ if (fd->buf) ++ kvfree(fd->buf); ++ kfree(fd); ++ return 0; ++} ++ ++static ssize_t apfs_compress_file_read_from_block(struct apfs_compress_file_data *fd, char *buf, size_t size, loff_t off) ++{ ++ struct super_block *sb = fd->sb; ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_compressed_data cdata = fd->cdata; ++ loff_t block; ++ size_t bsize; ++ ssize_t res; ++ ++ /* ++ * Request reads of all blocks before actually working with any of them. ++ * The compressed data is typically small enough that this is effective. ++ * It would be much better to make an inode for the xattr dstream and ++ * work with readahead as usual, but I'm not confident I can get that ++ * right (TODO). ++ */ ++ if (cdata.has_dstream && off == 0) { ++ down_read(&nxi->nx_big_sem); ++ apfs_nonsparse_dstream_preread(cdata.dstream); ++ up_read(&nxi->nx_big_sem); ++ } ++ ++ if (off >= le64_to_cpu(fd->hdr.size)) ++ return 0; ++ if (size > le64_to_cpu(fd->hdr.size) - off) ++ size = le64_to_cpu(fd->hdr.size) - off; ++ ++ block = off / APFS_COMPRESS_BLOCK; ++ off -= block * APFS_COMPRESS_BLOCK; ++ if (block != fd->bufblk) { ++ down_read(&nxi->nx_big_sem); ++ res = apfs_compress_file_read_block(fd, block); ++ up_read(&nxi->nx_big_sem); ++ if (res) { ++ apfs_err(sb, "failed to read block into buffer"); ++ return res; ++ } ++ } ++ bsize = fd->bufsize; ++ ++ if (bsize < off) ++ return 0; ++ bsize -= off; ++ if (size > bsize) ++ size = bsize; ++ memcpy(buf, fd->buf + off, size); ++ return size; ++} ++ ++static ssize_t apfs_compress_file_read_page(struct file *filp, char *buf, loff_t off) ++{ ++ struct apfs_compress_file_data *fd = filp->private_data; ++ loff_t step; ++ ssize_t block, res; ++ size_t size = PAGE_SIZE; ++ ++ step = 0; ++ while (step < size) { ++ block = APFS_COMPRESS_BLOCK - ((off + step) & (APFS_COMPRESS_BLOCK - 1)); ++ if (block > size - step) ++ block = size - step; ++ mutex_lock(&fd->mtx); ++ res = apfs_compress_file_read_from_block(fd, buf + step, block, off + step); ++ mutex_unlock(&fd->mtx); ++ if (res < block) { ++ if (res < 0 && !step) ++ return res; ++ step += res > 0 ? res : 0; ++ break; ++ } ++ step += block; ++ } ++ return step; ++} ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 19, 0) ++static int apfs_compress_read_folio(struct file *filp, struct folio *folio) ++{ ++ struct page *page = &folio->page; ++#else ++static int apfs_compress_readpage(struct file *filp, struct page *page) ++{ ++#endif ++ char *addr = NULL; ++ ssize_t ret; ++ loff_t off; ++ ++ /* Mostly copied from ext4_read_inline_page() */ ++ off = page->index << PAGE_SHIFT; ++ addr = kmap(page); ++ ret = apfs_compress_file_read_page(filp, addr, off); ++ flush_dcache_page(page); ++ kunmap(page); ++ if (ret >= 0) { ++ zero_user_segment(page, ret, PAGE_SIZE); ++ SetPageUptodate(page); ++ ret = 0; ++ } ++ ++ unlock_page(page); ++ return ret; ++} ++ ++const struct address_space_operations apfs_compress_aops = { ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 19, 0) ++ .read_folio = apfs_compress_read_folio, ++#else ++ .readpage = apfs_compress_readpage, ++#endif ++}; ++ ++/* TODO: these operations are all happening without proper locks */ ++const struct file_operations apfs_compress_file_operations = { ++ .open = apfs_compress_file_open, ++ .llseek = generic_file_llseek, ++ .read_iter = generic_file_read_iter, ++ .release = apfs_compress_file_release, ++ .mmap = apfs_file_mmap, ++}; ++ ++int apfs_compress_get_size(struct inode *inode, loff_t *size) ++{ ++ struct apfs_compress_hdr hdr; ++ int res = ____apfs_xattr_get(inode, APFS_XATTR_NAME_COMPRESSED, &hdr, sizeof(hdr), 0); ++ ++ if (res < 0) ++ return res; ++ if (res != sizeof(hdr)) { ++ apfs_err(inode->i_sb, "decmpfs header read failed"); ++ return 1; ++ } ++ ++ if (!apfs_compress_is_supported(le32_to_cpu(hdr.algo))) ++ return 1; ++ ++ *size = le64_to_cpu(hdr.size); ++ return 0; ++} +diff --git a/fs/apfs/dir.c b/fs/apfs/dir.c +new file mode 100644 +index 000000000..416f42f02 +--- /dev/null ++++ b/fs/apfs/dir.c +@@ -0,0 +1,1544 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (C) 2018 Ernesto A. Fernández ++ */ ++ ++#include ++#include ++#include "apfs.h" ++ ++/** ++ * apfs_drec_from_query - Read the directory record found by a successful query ++ * @query: the query that found the record ++ * @drec: Return parameter. The directory record found. ++ * @hashed: is this record hashed? ++ * ++ * Reads the directory record into @drec and performs some basic sanity checks ++ * as a protection against crafted filesystems. Returns 0 on success or ++ * -EFSCORRUPTED otherwise. ++ * ++ * The caller must not free @query while @drec is in use, because @drec->name ++ * points to data on disk. ++ */ ++static int apfs_drec_from_query(struct apfs_query *query, struct apfs_drec *drec, bool hashed) ++{ ++ struct super_block *sb = query->node->object.sb; ++ char *raw = query->node->object.data; ++ struct apfs_drec_hashed_key *de_hkey = NULL; ++ struct apfs_drec_key *de_ukey = NULL; ++ struct apfs_drec_val *de; ++ int namelen, xlen; ++ char *xval = NULL, *name; ++ ++ namelen = query->key_len - (hashed ? sizeof(*de_hkey) : sizeof(*de_ukey)); ++ if (namelen < 1) { ++ apfs_err(sb, "key is too small (%d)", query->key_len); ++ return -EFSCORRUPTED; ++ } ++ if (query->len < sizeof(*de)) { ++ apfs_err(sb, "value is too small (%d)", query->len); ++ return -EFSCORRUPTED; ++ } ++ ++ de = (struct apfs_drec_val *)(raw + query->off); ++ if (hashed) { ++ de_hkey = (struct apfs_drec_hashed_key *)(raw + query->key_off); ++ if (namelen != (le32_to_cpu(de_hkey->name_len_and_hash) & APFS_DREC_LEN_MASK)) { ++ apfs_err(sb, "inconsistent name length"); ++ return -EFSCORRUPTED; ++ } ++ name = de_hkey->name; ++ } else { ++ de_ukey = (struct apfs_drec_key *)(raw + query->key_off); ++ if (namelen != le16_to_cpu(de_ukey->name_len)) { ++ apfs_err(sb, "inconsistent name length"); ++ return -EFSCORRUPTED; ++ } ++ name = de_ukey->name; ++ } ++ ++ /* Filename must be NULL-terminated */ ++ if (name[namelen - 1] != 0) { ++ apfs_err(sb, "null termination missing"); ++ return -EFSCORRUPTED; ++ } ++ ++ /* The dentry may have at most one xfield: the sibling id */ ++ drec->sibling_id = 0; ++ xlen = apfs_find_xfield(de->xfields, query->len - sizeof(*de), ++ APFS_DREC_EXT_TYPE_SIBLING_ID, &xval); ++ if (xlen >= sizeof(__le64)) { ++ __le64 *sib_id = (__le64 *)xval; ++ ++ drec->sibling_id = le64_to_cpup(sib_id); ++ } ++ ++ drec->name = name; ++ drec->name_len = namelen - 1; /* Don't count the NULL termination */ ++ drec->ino = le64_to_cpu(de->file_id); ++ ++ drec->type = le16_to_cpu(de->flags) & APFS_DREC_TYPE_MASK; ++ if (drec->type != DT_FIFO && drec->type & 1) /* Invalid file type */ ++ drec->type = DT_UNKNOWN; ++ return 0; ++} ++ ++/** ++ * apfs_dentry_lookup - Lookup a dentry record in the catalog b-tree ++ * @dir: parent directory ++ * @child: filename ++ * @drec: on return, the directory record found ++ * ++ * Runs a catalog query for @name in the @dir directory. On success, sets ++ * @drec and returns a pointer to the query structure. On failure, returns ++ * an appropriate error pointer. ++ */ ++static struct apfs_query *apfs_dentry_lookup(struct inode *dir, ++ const struct qstr *child, ++ struct apfs_drec *drec) ++{ ++ struct super_block *sb = dir->i_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_query *query; ++ u64 cnid = apfs_ino(dir); ++ bool hashed = apfs_is_normalization_insensitive(sb); ++ int err; ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) ++ return ERR_PTR(-ENOMEM); ++ apfs_init_drec_key(sb, cnid, child->name, child->len, &query->key); ++ ++ /* ++ * Distinct filenames in the same directory may (rarely) share the same ++ * hash. The query code cannot handle that because their order in the ++ * b-tree would depend on their unnormalized original names. Just get ++ * all the candidates and check them one by one. ++ * ++ * This is very wasteful for normalization-sensitive filesystems: there ++ * are no hashes so we just check every single file in the directory for ++ * no reason. This would be easy to avoid but does it matter? (TODO) ++ */ ++ query->flags |= APFS_QUERY_CAT | APFS_QUERY_ANY_NAME | APFS_QUERY_EXACT; ++ do { ++ err = apfs_btree_query(sb, &query); ++ if (err) ++ goto fail; ++ err = apfs_drec_from_query(query, drec, hashed); ++ if (err) ++ goto fail; ++ } while (unlikely(apfs_filename_cmp(sb, child->name, child->len, drec->name, drec->name_len))); ++ ++ /* ++ * We may need to refresh the query later, but the refresh code doesn't ++ * know how to deal with hash collisions. Instead set the key to the ++ * unnormalized name and pretend that this was never a multiple query ++ * in the first place. ++ */ ++ query->key.name = drec->name; ++ query->flags &= ~(APFS_QUERY_MULTIPLE | APFS_QUERY_DONE | APFS_QUERY_NEXT); ++ return query; ++ ++fail: ++ if (err != -ENODATA) ++ apfs_err(sb, "query failed in dir 0x%llx", cnid); ++ apfs_free_query(query); ++ return ERR_PTR(err); ++} ++ ++/** ++ * apfs_inode_by_name - Find the cnid for a given filename ++ * @dir: parent directory ++ * @child: filename ++ * @ino: on return, the inode number found ++ * ++ * Returns 0 and the inode number (which is the cnid of the file ++ * record); otherwise, return the appropriate error code. ++ */ ++int apfs_inode_by_name(struct inode *dir, const struct qstr *child, u64 *ino) ++{ ++ struct super_block *sb = dir->i_sb; ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_query *query; ++ struct apfs_drec drec; ++ int err = 0; ++ ++ down_read(&nxi->nx_big_sem); ++ query = apfs_dentry_lookup(dir, child, &drec); ++ if (IS_ERR(query)) { ++ err = PTR_ERR(query); ++ goto out; ++ } ++ *ino = drec.ino; ++ apfs_free_query(query); ++out: ++ up_read(&nxi->nx_big_sem); ++ return err; ++} ++ ++static int apfs_readdir(struct file *file, struct dir_context *ctx) ++{ ++ struct inode *inode = file_inode(file); ++ struct super_block *sb = inode->i_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_query *query; ++ u64 cnid = apfs_ino(inode); ++ loff_t pos; ++ bool hashed = apfs_is_normalization_insensitive(sb); ++ int err = 0; ++ ++ down_read(&nxi->nx_big_sem); ++ ++ /* Inode numbers might overflow here; follow btrfs in ignoring that */ ++ if (!dir_emit_dots(file, ctx)) ++ goto out; ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ /* We want all the children for the cnid, regardless of the name */ ++ apfs_init_drec_key(sb, cnid, NULL /* name */, 0 /* name_len */, &query->key); ++ query->flags = APFS_QUERY_CAT | APFS_QUERY_MULTIPLE | APFS_QUERY_EXACT; ++ ++ pos = ctx->pos - 2; ++ while (1) { ++ struct apfs_drec drec; ++ /* ++ * We query for the matching records, one by one. After we ++ * pass ctx->pos we begin to emit them. ++ * ++ * TODO: Faster approach for large directories? ++ */ ++ ++ err = apfs_btree_query(sb, &query); ++ if (err == -ENODATA) { /* Got all the records */ ++ err = 0; ++ break; ++ } ++ if (err) ++ break; ++ ++ err = apfs_drec_from_query(query, &drec, hashed); ++ if (err) { ++ apfs_alert(sb, "bad dentry record in directory 0x%llx", ++ cnid); ++ break; ++ } ++ ++ err = 0; ++ if (pos <= 0) { ++ if (!dir_emit(ctx, drec.name, drec.name_len, ++ drec.ino, drec.type)) ++ break; ++ ++ctx->pos; ++ } ++ pos--; ++ } ++ apfs_free_query(query); ++ ++out: ++ up_read(&nxi->nx_big_sem); ++ return err; ++} ++ ++const struct file_operations apfs_dir_operations = { ++ .llseek = generic_file_llseek, ++ .read = generic_read_dir, ++ .iterate_shared = apfs_readdir, ++ .fsync = apfs_fsync, ++ .unlocked_ioctl = apfs_dir_ioctl, ++}; ++ ++/** ++ * apfs_build_dentry_unhashed_key - Allocate and initialize the key for an unhashed dentry record ++ * @qname: filename ++ * @parent_id: inode number for the parent of the dentry ++ * @key_p: on return, a pointer to the new on-disk key structure ++ * ++ * Returns the length of the key, or a negative error code in case of failure. ++ */ ++static int apfs_build_dentry_unhashed_key(struct qstr *qname, u64 parent_id, ++ struct apfs_drec_key **key_p) ++{ ++ struct apfs_drec_key *key; ++ u16 namelen = qname->len + 1; /* We count the null-termination */ ++ int key_len; ++ ++ key_len = sizeof(*key) + namelen; ++ key = kmalloc(key_len, GFP_KERNEL); ++ if (!key) ++ return -ENOMEM; ++ ++ apfs_key_set_hdr(APFS_TYPE_DIR_REC, parent_id, key); ++ key->name_len = cpu_to_le16(namelen); ++ strscpy(key->name, qname->name, namelen); ++ ++ *key_p = key; ++ return key_len; ++} ++ ++/** ++ * apfs_build_dentry_hashed_key - Allocate and initialize the key for a hashed dentry record ++ * @qname: filename ++ * @hash: filename hash ++ * @parent_id: inode number for the parent of the dentry ++ * @key_p: on return, a pointer to the new on-disk key structure ++ * ++ * Returns the length of the key, or a negative error code in case of failure. ++ */ ++static int apfs_build_dentry_hashed_key(struct qstr *qname, u64 hash, u64 parent_id, ++ struct apfs_drec_hashed_key **key_p) ++{ ++ struct apfs_drec_hashed_key *key; ++ u16 namelen = qname->len + 1; /* We count the null-termination */ ++ int key_len; ++ ++ key_len = sizeof(*key) + namelen; ++ key = kmalloc(key_len, GFP_KERNEL); ++ if (!key) ++ return -ENOMEM; ++ ++ apfs_key_set_hdr(APFS_TYPE_DIR_REC, parent_id, key); ++ key->name_len_and_hash = cpu_to_le32(namelen | hash); ++ strscpy(key->name, qname->name, namelen); ++ ++ *key_p = key; ++ return key_len; ++} ++ ++/** ++ * apfs_build_dentry_val - Allocate and initialize the value for a dentry record ++ * @inode: vfs inode for the dentry ++ * @sibling_id: sibling id for this hardlink (0 for none) ++ * @val_p: on return, a pointer to the new on-disk value structure ++ * ++ * Returns the length of the value, or a negative error code in case of failure. ++ */ ++static int apfs_build_dentry_val(struct inode *inode, u64 sibling_id, ++ struct apfs_drec_val **val_p) ++{ ++ struct apfs_drec_val *val; ++ struct apfs_x_field xkey; ++ int total_xlen = 0, val_len; ++ __le64 raw_sibling_id = cpu_to_le64(sibling_id); ++ struct timespec64 now = current_time(inode); ++ ++ /* The dentry record may have one xfield: the sibling id */ ++ if (sibling_id) ++ total_xlen += sizeof(struct apfs_xf_blob) + ++ sizeof(xkey) + sizeof(raw_sibling_id); ++ ++ val_len = sizeof(*val) + total_xlen; ++ val = kmalloc(val_len, GFP_KERNEL); ++ if (!val) ++ return -ENOMEM; ++ *val_p = val; ++ ++ val->file_id = cpu_to_le64(apfs_ino(inode)); ++ val->date_added = cpu_to_le64(timespec64_to_ns(&now)); ++ val->flags = cpu_to_le16((inode->i_mode >> 12) & 15); /* File type */ ++ ++ if (!sibling_id) ++ return val_len; ++ ++ /* The buffer was just allocated: none of these functions should fail */ ++ apfs_init_xfields(val->xfields, val_len - sizeof(*val)); ++ xkey.x_type = APFS_DREC_EXT_TYPE_SIBLING_ID; ++ xkey.x_flags = 0; /* TODO: proper flags here? */ ++ xkey.x_size = cpu_to_le16(sizeof(raw_sibling_id)); ++ apfs_insert_xfield(val->xfields, total_xlen, &xkey, &raw_sibling_id); ++ return val_len; ++} ++ ++/** ++ * apfs_create_dentry_rec - Create a dentry record in the catalog b-tree ++ * @inode: vfs inode for the dentry ++ * @qname: filename ++ * @parent_id: inode number for the parent of the dentry ++ * @sibling_id: sibling id for this hardlink (0 for none) ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_create_dentry_rec(struct inode *inode, struct qstr *qname, ++ u64 parent_id, u64 sibling_id) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_query *query; ++ void *raw_key = NULL; ++ struct apfs_drec_val *raw_val = NULL; ++ int key_len, val_len; ++ bool hashed = apfs_is_normalization_insensitive(sb); ++ int ret; ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_drec_key(sb, parent_id, qname->name, qname->len, &query->key); ++ query->flags |= APFS_QUERY_CAT; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret && ret != -ENODATA) { ++ apfs_err(sb, "query failed in dir 0x%llx (hash 0x%llx)", parent_id, query->key.number); ++ goto fail; ++ } ++ ++ if (hashed) ++ key_len = apfs_build_dentry_hashed_key(qname, query->key.number, parent_id, ++ (struct apfs_drec_hashed_key **)&raw_key); ++ else ++ key_len = apfs_build_dentry_unhashed_key(qname, parent_id, ++ (struct apfs_drec_key **)&raw_key); ++ if (key_len < 0) { ++ ret = key_len; ++ goto fail; ++ } ++ ++ val_len = apfs_build_dentry_val(inode, sibling_id, &raw_val); ++ if (val_len < 0) { ++ ret = val_len; ++ goto fail; ++ } ++ /* TODO: deal with hash collisions */ ++ ret = apfs_btree_insert(query, raw_key, key_len, raw_val, val_len); ++ if (ret) ++ apfs_err(sb, "insertion failed in dir 0x%llx (hash 0x%llx)", parent_id, query->key.number); ++ ++fail: ++ kfree(raw_val); ++ kfree(raw_key); ++ apfs_free_query(query); ++ return ret; ++} ++#define APFS_CREATE_DENTRY_REC_MAXOPS 1 ++ ++/** ++ * apfs_build_sibling_val - Allocate and initialize a sibling link's value ++ * @dentry: in-memory dentry for this hardlink ++ * @val_p: on return, a pointer to the new on-disk value structure ++ * ++ * Returns the length of the value, or a negative error code in case of failure. ++ */ ++static int apfs_build_sibling_val(struct dentry *dentry, ++ struct apfs_sibling_val **val_p) ++{ ++ struct apfs_sibling_val *val; ++ struct qstr *qname = &dentry->d_name; ++ u16 namelen = qname->len + 1; /* We count the null-termination */ ++ struct inode *parent = d_inode(dentry->d_parent); ++ int val_len; ++ ++ val_len = sizeof(*val) + namelen; ++ val = kmalloc(val_len, GFP_KERNEL); ++ if (!val) ++ return -ENOMEM; ++ ++ val->parent_id = cpu_to_le64(apfs_ino(parent)); ++ val->name_len = cpu_to_le16(namelen); ++ strscpy(val->name, qname->name, namelen); ++ ++ *val_p = val; ++ return val_len; ++} ++ ++/** ++ * apfs_create_sibling_link_rec - Create a sibling link record for a dentry ++ * @dentry: the in-memory dentry ++ * @inode: vfs inode for the dentry ++ * @sibling_id: sibling id for this hardlink ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_create_sibling_link_rec(struct dentry *dentry, ++ struct inode *inode, u64 sibling_id) ++{ ++ struct super_block *sb = dentry->d_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_query *query = NULL; ++ struct apfs_sibling_link_key raw_key; ++ struct apfs_sibling_val *raw_val; ++ int val_len; ++ int ret; ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_sibling_link_key(apfs_ino(inode), sibling_id, &query->key); ++ query->flags |= APFS_QUERY_CAT; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret && ret != -ENODATA) { ++ apfs_err(sb, "query failed for ino 0x%llx, sibling 0x%llx", apfs_ino(inode), sibling_id); ++ goto fail; ++ } ++ ++ apfs_key_set_hdr(APFS_TYPE_SIBLING_LINK, apfs_ino(inode), &raw_key); ++ raw_key.sibling_id = cpu_to_le64(sibling_id); ++ val_len = apfs_build_sibling_val(dentry, &raw_val); ++ if (val_len < 0) ++ goto fail; ++ ++ ret = apfs_btree_insert(query, &raw_key, sizeof(raw_key), raw_val, val_len); ++ if (ret) ++ apfs_err(sb, "insertion failed for ino 0x%llx, sibling 0x%llx", apfs_ino(inode), sibling_id); ++ kfree(raw_val); ++ ++fail: ++ apfs_free_query(query); ++ return ret; ++} ++#define APFS_CREATE_SIBLING_LINK_REC_MAXOPS 1 ++ ++/** ++ * apfs_create_sibling_map_rec - Create a sibling map record for a dentry ++ * @dentry: the in-memory dentry ++ * @inode: vfs inode for the dentry ++ * @sibling_id: sibling id for this hardlink ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_create_sibling_map_rec(struct dentry *dentry, ++ struct inode *inode, u64 sibling_id) ++{ ++ struct super_block *sb = dentry->d_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_query *query = NULL; ++ struct apfs_sibling_map_key raw_key; ++ struct apfs_sibling_map_val raw_val; ++ int ret; ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_sibling_map_key(sibling_id, &query->key); ++ query->flags |= APFS_QUERY_CAT; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret && ret != -ENODATA) { ++ apfs_err(sb, "query failed for sibling 0x%llx", sibling_id); ++ goto fail; ++ } ++ ++ apfs_key_set_hdr(APFS_TYPE_SIBLING_MAP, sibling_id, &raw_key); ++ raw_val.file_id = cpu_to_le64(apfs_ino(inode)); ++ ++ ret = apfs_btree_insert(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val)); ++ if (ret) ++ apfs_err(sb, "insertion failed for sibling 0x%llx", sibling_id); ++ ++fail: ++ apfs_free_query(query); ++ return ret; ++} ++#define APFS_CREATE_SIBLING_MAP_REC_MAXOPS 1 ++ ++/** ++ * apfs_create_sibling_recs - Create sibling link and map records for a dentry ++ * @dentry: the in-memory dentry ++ * @inode: vfs inode for the dentry ++ * @sibling_id: on return, the sibling id for this hardlink ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_create_sibling_recs(struct dentry *dentry, ++ struct inode *inode, u64 *sibling_id) ++{ ++ struct super_block *sb = dentry->d_sb; ++ struct apfs_superblock *vsb_raw = APFS_SB(sb)->s_vsb_raw; ++ u64 cnid; ++ int ret; ++ ++ /* Sibling ids come from the same pool as the inode numbers */ ++ apfs_assert_in_transaction(sb, &vsb_raw->apfs_o); ++ cnid = le64_to_cpu(vsb_raw->apfs_next_obj_id); ++ le64_add_cpu(&vsb_raw->apfs_next_obj_id, 1); ++ ++ ret = apfs_create_sibling_link_rec(dentry, inode, cnid); ++ if (ret) ++ return ret; ++ ret = apfs_create_sibling_map_rec(dentry, inode, cnid); ++ if (ret) ++ return ret; ++ ++ *sibling_id = cnid; ++ return 0; ++} ++#define APFS_CREATE_SIBLING_RECS_MAXOPS (APFS_CREATE_SIBLING_LINK_REC_MAXOPS + \ ++ APFS_CREATE_SIBLING_MAP_REC_MAXOPS) ++ ++/** ++ * apfs_create_dentry - Create all records for a new dentry ++ * @dentry: the in-memory dentry ++ * @inode: vfs inode for the dentry ++ * ++ * Creates the dentry record itself, as well as the sibling records if needed; ++ * also updates the child count for the parent inode. Returns 0 on success or ++ * a negative error code in case of failure. ++ */ ++static int apfs_create_dentry(struct dentry *dentry, struct inode *inode) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct inode *parent = d_inode(dentry->d_parent); ++ u64 sibling_id = 0; ++ int err; ++ ++ if (inode->i_nlink > 1) { ++ /* This is optional for a single link, so don't waste space */ ++ err = apfs_create_sibling_recs(dentry, inode, &sibling_id); ++ if (err) { ++ apfs_err(sb, "failed to create sibling recs for ino 0x%llx", apfs_ino(inode)); ++ return err; ++ } ++ } ++ ++ err = apfs_create_dentry_rec(inode, &dentry->d_name, apfs_ino(parent), sibling_id); ++ if (err) { ++ apfs_err(sb, "failed to create drec for ino 0x%llx", apfs_ino(inode)); ++ return err; ++ } ++ ++ /* Now update the parent inode */ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0) ++ parent->i_mtime = parent->i_ctime = current_time(inode); ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 7, 0) ++ parent->i_mtime = inode_set_ctime_current(parent); ++#else ++ inode_set_mtime_to_ts(parent, inode_set_ctime_current(parent)); ++#endif ++ ++APFS_I(parent)->i_nchildren; ++ apfs_inode_join_transaction(parent->i_sb, parent); ++ return 0; ++} ++#define APFS_CREATE_DENTRY_MAXOPS (APFS_CREATE_SIBLING_RECS_MAXOPS + \ ++ APFS_CREATE_DENTRY_REC_MAXOPS + \ ++ APFS_UPDATE_INODE_MAXOPS()) ++ ++/** ++ * apfs_undo_create_dentry - Clean up apfs_create_dentry() ++ * @dentry: the in-memory dentry ++ */ ++static void apfs_undo_create_dentry(struct dentry *dentry) ++{ ++ struct inode *parent = d_inode(dentry->d_parent); ++ ++ --APFS_I(parent)->i_nchildren; ++} ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0) ++ ++static inline void discard_new_inode(struct inode *inode) ++{ ++ unlock_new_inode(inode); ++ iput(inode); ++} ++ ++#endif ++ ++int apfs_mkany(struct inode *dir, struct dentry *dentry, umode_t mode, ++ dev_t rdev, const char *symname) ++{ ++ struct super_block *sb = dir->i_sb; ++ struct inode *inode; ++ struct apfs_max_ops maxops; ++ int err; ++ ++ maxops.cat = APFS_CREATE_INODE_REC_MAXOPS() + APFS_CREATE_DENTRY_MAXOPS; ++ if (symname) ++ maxops.cat += APFS_XATTR_SET_MAXOPS(); ++ maxops.blks = 0; ++ ++ err = apfs_transaction_start(sb, maxops); ++ if (err) ++ return err; ++ ++ inode = apfs_new_inode(dir, mode, rdev); ++ if (IS_ERR(inode)) { ++ err = PTR_ERR(inode); ++ goto out_abort; ++ } ++ ++ err = apfs_create_inode_rec(sb, inode, dentry); ++ if (err) { ++ apfs_err(sb, "failed to create inode rec for ino 0x%llx", apfs_ino(inode)); ++ goto out_discard_inode; ++ } ++ ++ err = apfs_create_dentry(dentry, inode); ++ if (err) { ++ apfs_err(sb, "failed to create dentry recs for ino 0x%llx", apfs_ino(inode)); ++ goto out_discard_inode; ++ } ++ ++ if (symname) { ++ err = apfs_xattr_set(inode, APFS_XATTR_NAME_SYMLINK, symname, ++ strlen(symname) + 1, 0 /* flags */); ++ if (err == -ERANGE) { ++ err = -ENAMETOOLONG; ++ goto out_undo_create; ++ } ++ if (err) { ++ apfs_err(sb, "failed to set symlink xattr for ino 0x%llx", apfs_ino(inode)); ++ goto out_undo_create; ++ } ++ } ++ ++ err = apfs_transaction_commit(sb); ++ if (err) ++ goto out_undo_create; ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 17, 0) ++ /* Apparently there is a lockdep bug here, but it doesn't matter */ ++ unlock_new_inode(inode); ++ d_instantiate(dentry, inode); ++#else ++ d_instantiate_new(dentry, inode); ++#endif ++ return 0; ++ ++out_undo_create: ++ apfs_undo_create_dentry(dentry); ++out_discard_inode: ++ /* Don't reset nlink: on-disk cleanup is unneeded and would deadlock */ ++ discard_new_inode(inode); ++out_abort: ++ apfs_transaction_abort(sb); ++ return err; ++} ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0) ++int apfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, ++ dev_t rdev) ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 3, 0) ++int apfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, ++ struct dentry *dentry, umode_t mode, dev_t rdev) ++#else ++int apfs_mknod(struct mnt_idmap *idmap, struct inode *dir, ++ struct dentry *dentry, umode_t mode, dev_t rdev) ++#endif ++{ ++ return apfs_mkany(dir, dentry, mode, rdev, NULL /* symname */); ++} ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0) ++ ++int apfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) ++{ ++ return apfs_mknod(dir, dentry, mode | S_IFDIR, 0 /* rdev */); ++} ++ ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 3, 0) ++ ++int apfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, ++ struct dentry *dentry, umode_t mode) ++{ ++ return apfs_mknod(mnt_userns, dir, dentry, mode | S_IFDIR, 0 /* rdev */); ++} ++ ++#else ++ ++int apfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, ++ struct dentry *dentry, umode_t mode) ++{ ++ return apfs_mknod(idmap, dir, dentry, mode | S_IFDIR, 0 /* rdev */); ++} ++ ++#endif ++ ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0) ++ ++int apfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, ++ bool excl) ++{ ++ return apfs_mknod(dir, dentry, mode, 0 /* rdev */); ++} ++ ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 3, 0) ++ ++int apfs_create(struct user_namespace *mnt_userns, struct inode *dir, ++ struct dentry *dentry, umode_t mode, bool excl) ++{ ++ return apfs_mknod(mnt_userns, dir, dentry, mode, 0 /* rdev */); ++} ++ ++#else ++ ++int apfs_create(struct mnt_idmap *idmap, struct inode *dir, ++ struct dentry *dentry, umode_t mode, bool excl) ++{ ++ return apfs_mknod(idmap, dir, dentry, mode, 0 /* rdev */); ++} ++ ++#endif ++ ++ ++/** ++ * apfs_prepare_dentry_for_link - Assign a sibling id and records to a dentry ++ * @dentry: the in-memory dentry (should be for a primary link) ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++static int apfs_prepare_dentry_for_link(struct dentry *dentry) ++{ ++ struct inode *parent = d_inode(dentry->d_parent); ++ struct super_block *sb = parent->i_sb; ++ struct apfs_query *query; ++ struct apfs_drec drec; ++ u64 sibling_id; ++ int ret; ++ ++ query = apfs_dentry_lookup(parent, &dentry->d_name, &drec); ++ if (IS_ERR(query)) { ++ apfs_err(sb, "lookup failed in dir 0x%llx", apfs_ino(parent)); ++ return PTR_ERR(query); ++ } ++ if (drec.sibling_id) { ++ /* This dentry already has a sibling id xfield */ ++ apfs_free_query(query); ++ return 0; ++ } ++ ++ /* Don't modify the dentry record, just delete it to make a new one */ ++ ret = apfs_btree_remove(query); ++ apfs_free_query(query); ++ if (ret) { ++ apfs_err(sb, "removal failed in dir 0x%llx", apfs_ino(parent)); ++ return ret; ++ } ++ ++ ret = apfs_create_sibling_recs(dentry, d_inode(dentry), &sibling_id); ++ if (ret) { ++ apfs_err(sb, "failed to create sibling recs in dir 0x%llx", apfs_ino(parent)); ++ return ret; ++ } ++ return apfs_create_dentry_rec(d_inode(dentry), &dentry->d_name, ++ apfs_ino(parent), sibling_id); ++} ++#define APFS_PREPARE_DENTRY_FOR_LINK_MAXOPS (1 + APFS_CREATE_SIBLING_RECS_MAXOPS + \ ++ APFS_CREATE_DENTRY_REC_MAXOPS) ++ ++/** ++ * __apfs_undo_link - Clean up __apfs_link() ++ * @dentry: the in-memory dentry ++ * @inode: target inode ++ */ ++static void __apfs_undo_link(struct dentry *dentry, struct inode *inode) ++{ ++ apfs_undo_create_dentry(dentry); ++ drop_nlink(inode); ++} ++ ++/** ++ * __apfs_link - Link a dentry ++ * @old_dentry: dentry for the old link ++ * @dentry: new dentry to link ++ * ++ * Does the same as apfs_link(), but without starting a transaction, taking a ++ * new reference to @old_dentry->d_inode, or instantiating @dentry. ++ */ ++static int __apfs_link(struct dentry *old_dentry, struct dentry *dentry) ++{ ++ struct inode *inode = d_inode(old_dentry); ++ struct super_block *sb = inode->i_sb; ++ int err; ++ ++ /* First update the inode's link count */ ++ inc_nlink(inode); ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0) ++ inode->i_ctime = current_time(inode); ++#else ++ inode_set_ctime_current(inode); ++#endif ++ apfs_inode_join_transaction(inode->i_sb, inode); ++ ++ if (inode->i_nlink == 2) { ++ /* A single link may lack sibling records, so create them now */ ++ err = apfs_prepare_dentry_for_link(old_dentry); ++ if (err) { ++ apfs_err(sb, "failed to prepare original dentry"); ++ goto fail; ++ } ++ } ++ ++ err = apfs_create_dentry(dentry, inode); ++ if (err) { ++ apfs_err(sb, "failed to create new dentry"); ++ goto fail; ++ } ++ return 0; ++ ++fail: ++ drop_nlink(inode); ++ return err; ++} ++#define __APFS_LINK_MAXOPS (APFS_UPDATE_INODE_MAXOPS() + \ ++ APFS_PREPARE_DENTRY_FOR_LINK_MAXOPS + \ ++ APFS_CREATE_DENTRY_MAXOPS) ++ ++int apfs_link(struct dentry *old_dentry, struct inode *dir, ++ struct dentry *dentry) ++{ ++ struct super_block *sb = dir->i_sb; ++ struct inode *inode = d_inode(old_dentry); ++ struct apfs_max_ops maxops; ++ int err; ++ ++ maxops.cat = __APFS_LINK_MAXOPS; ++ maxops.blks = 0; ++ ++ err = apfs_transaction_start(sb, maxops); ++ if (err) ++ return err; ++ ++ err = __apfs_link(old_dentry, dentry); ++ if (err) ++ goto out_abort; ++ ihold(inode); ++ ++ err = apfs_transaction_commit(sb); ++ if (err) ++ goto out_undo_link; ++ ++ d_instantiate(dentry, inode); ++ return 0; ++ ++out_undo_link: ++ iput(inode); ++ __apfs_undo_link(dentry, inode); ++out_abort: ++ apfs_transaction_abort(sb); ++ return err; ++} ++ ++/** ++ * apfs_delete_sibling_link_rec - Delete the sibling link record for a dentry ++ * @dentry: the in-memory dentry ++ * @sibling_id: sibling id for this hardlink ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_delete_sibling_link_rec(struct dentry *dentry, u64 sibling_id) ++{ ++ struct super_block *sb = dentry->d_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct inode *inode = d_inode(dentry); ++ struct apfs_query *query = NULL; ++ int ret; ++ ++ ASSERT(sibling_id); ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_sibling_link_key(apfs_ino(inode), sibling_id, &query->key); ++ query->flags |= APFS_QUERY_CAT | APFS_QUERY_EXACT; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret == -ENODATA) { ++ /* A dentry with a sibling id must have sibling records */ ++ ret = -EFSCORRUPTED; ++ } ++ if (ret) { ++ apfs_err(sb, "query failed for ino 0x%llx, sibling 0x%llx", apfs_ino(inode), sibling_id); ++ goto fail; ++ } ++ ret = apfs_btree_remove(query); ++ if (ret) ++ apfs_err(sb, "removal failed for ino 0x%llx, sibling 0x%llx", apfs_ino(inode), sibling_id); ++ ++fail: ++ apfs_free_query(query); ++ return ret; ++} ++#define APFS_DELETE_SIBLING_LINK_REC_MAXOPS 1 ++ ++/** ++ * apfs_delete_sibling_map_rec - Delete the sibling map record for a dentry ++ * @dentry: the in-memory dentry ++ * @sibling_id: sibling id for this hardlink ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_delete_sibling_map_rec(struct dentry *dentry, u64 sibling_id) ++{ ++ struct super_block *sb = dentry->d_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_query *query = NULL; ++ int ret; ++ ++ ASSERT(sibling_id); ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_sibling_map_key(sibling_id, &query->key); ++ query->flags |= APFS_QUERY_CAT | APFS_QUERY_EXACT; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret == -ENODATA) { ++ /* A dentry with a sibling id must have sibling records */ ++ ret = -EFSCORRUPTED; ++ } ++ if (ret) { ++ apfs_err(sb, "query failed for sibling 0x%llx", sibling_id); ++ goto fail; ++ } ++ ret = apfs_btree_remove(query); ++ if (ret) ++ apfs_err(sb, "removal failed for sibling 0x%llx", sibling_id); ++ ++fail: ++ apfs_free_query(query); ++ return ret; ++} ++#define APFS_DELETE_SIBLING_MAP_REC_MAXOPS 1 ++ ++/** ++ * apfs_delete_dentry - Delete all records for a dentry ++ * @dentry: the in-memory dentry ++ * ++ * Deletes the dentry record itself, as well as the sibling records if they ++ * exist; also updates the child count for the parent inode. Returns 0 on ++ * success or a negative error code in case of failure. ++ */ ++static int apfs_delete_dentry(struct dentry *dentry) ++{ ++ struct super_block *sb = dentry->d_sb; ++ struct inode *parent = d_inode(dentry->d_parent); ++ struct apfs_query *query; ++ struct apfs_drec drec; ++ int err; ++ ++ query = apfs_dentry_lookup(parent, &dentry->d_name, &drec); ++ if (IS_ERR(query)) ++ return PTR_ERR(query); ++ err = apfs_btree_remove(query); ++ apfs_free_query(query); ++ if (err) { ++ apfs_err(sb, "drec removal failed"); ++ return err; ++ } ++ ++ if (drec.sibling_id) { ++ err = apfs_delete_sibling_link_rec(dentry, drec.sibling_id); ++ if (err) { ++ apfs_err(sb, "sibling link removal failed"); ++ return err; ++ } ++ err = apfs_delete_sibling_map_rec(dentry, drec.sibling_id); ++ if (err) { ++ apfs_err(sb, "sibling map removal failed"); ++ return err; ++ } ++ } ++ ++ /* Now update the parent inode */ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0) ++ parent->i_mtime = parent->i_ctime = current_time(parent); ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 7, 0) ++ parent->i_mtime = inode_set_ctime_current(parent); ++#else ++ inode_set_mtime_to_ts(parent, inode_set_ctime_current(parent)); ++#endif ++ --APFS_I(parent)->i_nchildren; ++ apfs_inode_join_transaction(sb, parent); ++ return err; ++} ++#define APFS_DELETE_DENTRY_MAXOPS (1 + APFS_DELETE_SIBLING_LINK_REC_MAXOPS + \ ++ APFS_DELETE_SIBLING_MAP_REC_MAXOPS + \ ++ APFS_UPDATE_INODE_MAXOPS()) ++ ++/** ++ * apfs_undo_delete_dentry - Clean up apfs_delete_dentry() ++ * @dentry: the in-memory dentry ++ */ ++static inline void apfs_undo_delete_dentry(struct dentry *dentry) ++{ ++ struct inode *parent = d_inode(dentry->d_parent); ++ ++ /* Cleanup for the on-disk changes will happen on transaction abort */ ++ ++APFS_I(parent)->i_nchildren; ++} ++ ++/** ++ * apfs_sibling_link_from_query - Read the sibling link record found by a query ++ * @query: the query that found the record ++ * @name: on return, the name of link ++ * @parent: on return, the inode number for the link's parent ++ * ++ * Reads the sibling link information into @parent and @name, and performs some ++ * basic sanity checks as a protection against crafted filesystems. The caller ++ * must free @name after use. Returns 0 on success or a negative error code in ++ * case of failure. ++ */ ++static int apfs_sibling_link_from_query(struct apfs_query *query, ++ char **name, u64 *parent) ++{ ++ struct super_block *sb = query->node->object.sb; ++ char *raw = query->node->object.data; ++ struct apfs_sibling_val *siblink; ++ int namelen = query->len - sizeof(*siblink); ++ ++ if (namelen < 1) { ++ apfs_err(sb, "value is too small (%d)", query->len); ++ return -EFSCORRUPTED; ++ } ++ siblink = (struct apfs_sibling_val *)(raw + query->off); ++ ++ if (namelen != le16_to_cpu(siblink->name_len)) { ++ apfs_err(sb, "inconsistent name length"); ++ return -EFSCORRUPTED; ++ } ++ /* Filename must be NULL-terminated */ ++ if (siblink->name[namelen - 1] != 0) { ++ apfs_err(sb, "null termination missing"); ++ return -EFSCORRUPTED; ++ } ++ ++ *name = kmalloc(namelen, GFP_KERNEL); ++ if (!*name) ++ return -ENOMEM; ++ strscpy(*name, siblink->name, namelen); ++ *parent = le64_to_cpu(siblink->parent_id); ++ return 0; ++} ++ ++/** ++ * apfs_find_primary_link - Find the primary link for an inode ++ * @inode: the vfs inode ++ * @name: on return, the name of the primary link ++ * @parent: on return, the inode number for the primary parent ++ * ++ * On success, returns 0 and sets @parent and @name; the second must be freed ++ * by the caller after use. Returns a negative error code in case of failure. ++ */ ++static int apfs_find_primary_link(struct inode *inode, char **name, u64 *parent) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_query *query; ++ int err; ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_sibling_link_key(apfs_ino(inode), 0 /* sibling_id */, &query->key); ++ query->flags |= APFS_QUERY_CAT | APFS_QUERY_ANY_NUMBER | APFS_QUERY_EXACT; ++ ++ /* The primary link is the one with the lowest sibling id */ ++ *name = NULL; ++ while (1) { ++ err = apfs_btree_query(sb, &query); ++ if (err == -ENODATA) /* No more link records */ ++ break; ++ kfree(*name); ++ if (err) { ++ apfs_err(sb, "query failed for ino 0x%llx", apfs_ino(inode)); ++ goto fail; ++ } ++ ++ err = apfs_sibling_link_from_query(query, name, parent); ++ if (err) { ++ apfs_err(sb, "bad sibling link record for ino 0x%llx", apfs_ino(inode)); ++ goto fail; ++ } ++ } ++ err = *name ? 0 : -EFSCORRUPTED; /* Sibling records must exist */ ++ if (err) ++ apfs_err(sb, "query failed for ino 0x%llx", apfs_ino(inode)); ++ ++fail: ++ apfs_free_query(query); ++ return err; ++} ++ ++/** ++ * apfs_orphan_name - Get the name for an orphan inode's invisible link ++ * @ino: the inode number ++ * @qname: on return, the name assigned to the link ++ * ++ * Returns 0 on success; the caller must remember to free @qname->name after ++ * use. Returns a negative error code in case of failure. ++ */ ++static int apfs_orphan_name(u64 ino, struct qstr *qname) ++{ ++ int max_len; ++ char *name; ++ ++ /* The name is the inode number in hex, with '-dead' suffix */ ++ max_len = 2 + 16 + 5 + 1; ++ name = kmalloc(max_len, GFP_KERNEL); ++ if (!name) ++ return -ENOMEM; ++ qname->len = snprintf(name, max_len, "0x%llx-dead", ino); ++ qname->name = name; ++ return 0; ++} ++ ++/** ++ * apfs_create_orphan_link - Create a link for an orphan inode under private-dir ++ * @inode: the vfs inode ++ * ++ * On success, returns 0. Returns a negative error code in case of failure. ++ */ ++static int apfs_create_orphan_link(struct inode *inode) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct inode *priv_dir = sbi->s_private_dir; ++ struct qstr qname; ++ int err = 0; ++ ++ err = apfs_orphan_name(apfs_ino(inode), &qname); ++ if (err) ++ return err; ++ err = apfs_create_dentry_rec(inode, &qname, apfs_ino(priv_dir), 0 /* sibling_id */); ++ if (err) { ++ apfs_err(sb, "failed to create drec for ino 0x%llx", apfs_ino(inode)); ++ goto fail; ++ } ++ ++ /* Now update the child count for private-dir */ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0) ++ priv_dir->i_mtime = priv_dir->i_ctime = current_time(priv_dir); ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 7, 0) ++ priv_dir->i_mtime = inode_set_ctime_current(priv_dir); ++#else ++ inode_set_mtime_to_ts(priv_dir, inode_set_ctime_current(priv_dir)); ++#endif ++ ++APFS_I(priv_dir)->i_nchildren; ++ apfs_inode_join_transaction(sb, priv_dir); ++ ++fail: ++ kfree(qname.name); ++ return err; ++} ++#define APFS_CREATE_ORPHAN_LINK_MAXOPS (APFS_CREATE_DENTRY_REC_MAXOPS + \ ++ APFS_UPDATE_INODE_MAXOPS()) ++ ++/** ++ * apfs_delete_orphan_link - Delete the link for an orphan inode ++ * @inode: the vfs inode ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++int apfs_delete_orphan_link(struct inode *inode) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct inode *priv_dir = sbi->s_private_dir; ++ struct apfs_query *query; ++ struct qstr qname; ++ struct apfs_drec drec; ++ int err; ++ ++ err = apfs_orphan_name(apfs_ino(inode), &qname); ++ if (err) ++ return err; ++ ++ query = apfs_dentry_lookup(priv_dir, &qname, &drec); ++ if (IS_ERR(query)) { ++ apfs_err(sb, "dentry lookup failed"); ++ err = PTR_ERR(query); ++ query = NULL; ++ goto fail; ++ } ++ err = apfs_btree_remove(query); ++ if (err) { ++ apfs_err(sb, "dentry removal failed"); ++ goto fail; ++ } ++ ++ /* Now update the child count for private-dir */ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0) ++ priv_dir->i_mtime = priv_dir->i_ctime = current_time(priv_dir); ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 7, 0) ++ priv_dir->i_mtime = inode_set_ctime_current(priv_dir); ++#else ++ inode_set_mtime_to_ts(priv_dir, inode_set_ctime_current(priv_dir)); ++#endif ++ --APFS_I(priv_dir)->i_nchildren; ++ apfs_inode_join_transaction(sb, priv_dir); ++ ++fail: ++ apfs_free_query(query); ++ kfree(qname.name); ++ return err; ++} ++int APFS_DELETE_ORPHAN_LINK_MAXOPS(void) ++{ ++ return 1 + APFS_UPDATE_INODE_MAXOPS(); ++} ++ ++/** ++ * __apfs_undo_unlink - Clean up __apfs_unlink() ++ * @dentry: dentry to unlink ++ */ ++static void __apfs_undo_unlink(struct dentry *dentry) ++{ ++ struct inode *inode = d_inode(dentry); ++ ++ inode->i_state |= I_LINKABLE; /* Silence warning about nlink 0->1 */ ++ inc_nlink(inode); ++ inode->i_state &= ~I_LINKABLE; ++ ++ apfs_undo_delete_dentry(dentry); ++} ++ ++/** ++ * apfs_vol_filecnt_dec - Update the volume file count after a new orphaning ++ * @orphan: the new orphan ++ */ ++static void apfs_vol_filecnt_dec(struct inode *orphan) ++{ ++ struct super_block *sb = orphan->i_sb; ++ struct apfs_superblock *vsb_raw = APFS_SB(sb)->s_vsb_raw; ++ ++ apfs_assert_in_transaction(sb, &vsb_raw->apfs_o); ++ ++ switch (orphan->i_mode & S_IFMT) { ++ case S_IFREG: ++ le64_add_cpu(&vsb_raw->apfs_num_files, -1); ++ break; ++ case S_IFDIR: ++ le64_add_cpu(&vsb_raw->apfs_num_directories, -1); ++ break; ++ case S_IFLNK: ++ le64_add_cpu(&vsb_raw->apfs_num_symlinks, -1); ++ break; ++ default: ++ le64_add_cpu(&vsb_raw->apfs_num_other_fsobjects, -1); ++ break; ++ } ++} ++ ++/** ++ * __apfs_unlink - Unlink a dentry ++ * @dir: parent directory ++ * @dentry: dentry to unlink ++ * ++ * Does the same as apfs_unlink(), but without starting a transaction. ++ */ ++static int __apfs_unlink(struct inode *dir, struct dentry *dentry) ++{ ++ struct inode *inode = d_inode(dentry); ++ struct super_block *sb = inode->i_sb; ++ struct apfs_inode_info *ai = APFS_I(inode); ++ char *primary_name = NULL; ++ int err; ++ ++ err = apfs_delete_dentry(dentry); ++ if (err) { ++ apfs_err(sb, "failed to delete dentry recs for ino 0x%llx", apfs_ino(inode)); ++ return err; ++ } ++ ++ drop_nlink(inode); ++ if (!inode->i_nlink) { ++ /* Orphaned inodes continue to report their old location */ ++ err = apfs_create_orphan_link(inode); ++ /* Orphans are not included in the volume file counts */ ++ apfs_vol_filecnt_dec(inode); ++ } else { ++ /* We may have deleted the primary link, so get the new one */ ++ err = apfs_find_primary_link(inode, &primary_name, ++ &ai->i_parent_id); ++ } ++ if (err) ++ goto fail; ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0) ++ inode->i_ctime = dir->i_ctime; ++#else ++ inode_set_ctime_to_ts(inode, inode_get_ctime(dir)); ++#endif ++ /* TODO: defer write of the primary name? */ ++ err = apfs_update_inode(inode, primary_name); ++ if (err) ++ apfs_err(sb, "inode update failed for 0x%llx", apfs_ino(inode)); ++ ++fail: ++ kfree(primary_name); ++ primary_name = NULL; ++ if (err) ++ __apfs_undo_unlink(dentry); ++ return err; ++} ++#define __APFS_UNLINK_MAXOPS (APFS_DELETE_DENTRY_MAXOPS + \ ++ APFS_CREATE_ORPHAN_LINK_MAXOPS + \ ++ APFS_UPDATE_INODE_MAXOPS()) ++ ++int apfs_unlink(struct inode *dir, struct dentry *dentry) ++{ ++ struct super_block *sb = dir->i_sb; ++ struct apfs_max_ops maxops; ++ int err; ++ ++ maxops.cat = __APFS_UNLINK_MAXOPS; ++ maxops.blks = 0; ++ ++ err = apfs_transaction_start(sb, maxops); ++ if (err) ++ return err; ++ ++ err = __apfs_unlink(dir, dentry); ++ if (err) ++ goto out_abort; ++ ++ err = apfs_transaction_commit(sb); ++ if (err) ++ goto out_undo_unlink; ++ return 0; ++ ++out_undo_unlink: ++ __apfs_undo_unlink(dentry); ++out_abort: ++ apfs_transaction_abort(sb); ++ return err; ++} ++ ++int apfs_rmdir(struct inode *dir, struct dentry *dentry) ++{ ++ struct inode *inode = d_inode(dentry); ++ ++ if (APFS_I(inode)->i_nchildren) ++ return -ENOTEMPTY; ++ return apfs_unlink(dir, dentry); ++} ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0) ++int apfs_rename(struct inode *old_dir, struct dentry *old_dentry, ++ struct inode *new_dir, struct dentry *new_dentry, ++ unsigned int flags) ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 3, 0) ++int apfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, ++ struct dentry *old_dentry, struct inode *new_dir, ++ struct dentry *new_dentry, unsigned int flags) ++#else ++int apfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, ++ struct dentry *old_dentry, struct inode *new_dir, ++ struct dentry *new_dentry, unsigned int flags) ++#endif ++{ ++ struct super_block *sb = old_dir->i_sb; ++ struct inode *old_inode = d_inode(old_dentry); ++ struct inode *new_inode = d_inode(new_dentry); ++ struct apfs_max_ops maxops; ++ int err; ++ ++ if (new_inode && APFS_I(new_inode)->i_nchildren) ++ return -ENOTEMPTY; ++ ++ if (flags & ~RENAME_NOREPLACE) /* TODO: support RENAME_EXCHANGE */ ++ return -EINVAL; ++ ++ maxops.cat = __APFS_UNLINK_MAXOPS + __APFS_LINK_MAXOPS; ++ if (new_inode) ++ maxops.cat += __APFS_UNLINK_MAXOPS; ++ maxops.blks = 0; ++ ++ err = apfs_transaction_start(sb, maxops); ++ if (err) ++ return err; ++ ++ if (new_inode) { ++ err = __apfs_unlink(new_dir, new_dentry); ++ if (err) { ++ apfs_err(sb, "unlink failed for replaced dentry"); ++ goto out_abort; ++ } ++ } ++ ++ err = __apfs_link(old_dentry, new_dentry); ++ if (err) { ++ apfs_err(sb, "link failed for new dentry"); ++ goto out_undo_unlink_new; ++ } ++ ++ err = __apfs_unlink(old_dir, old_dentry); ++ if (err) { ++ apfs_err(sb, "unlink failed for old dentry"); ++ goto out_undo_link; ++ } ++ ++ err = apfs_transaction_commit(sb); ++ if (err) ++ goto out_undo_unlink_old; ++ return 0; ++ ++out_undo_unlink_old: ++ __apfs_undo_unlink(old_dentry); ++out_undo_link: ++ __apfs_undo_link(new_dentry, old_inode); ++out_undo_unlink_new: ++ if (new_inode) ++ __apfs_undo_unlink(new_dentry); ++out_abort: ++ apfs_transaction_abort(sb); ++ return err; ++} ++ ++/** ++ * apfs_any_orphan_ino - Find the inode number for any orphaned regular file ++ * @sb: filesytem superblock ++ * @ino_p: on return, the inode number found ++ * ++ * Returns 0 on success, or a negative error code in case of failure, which may ++ * be -ENODATA if there are no orphan files. ++ */ ++u64 apfs_any_orphan_ino(struct super_block *sb, u64 *ino_p) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_query *query = NULL; ++ struct apfs_drec drec = {0}; ++ struct qstr qname = {0}; ++ bool hashed = apfs_is_normalization_insensitive(sb); ++ bool found = false; ++ int err; ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_drec_key(sb, APFS_PRIV_DIR_INO_NUM, NULL /* name */, 0 /* name_len */, &query->key); ++ query->flags = APFS_QUERY_CAT | APFS_QUERY_MULTIPLE | APFS_QUERY_EXACT; ++ ++ while (!found) { ++ err = apfs_btree_query(sb, &query); ++ if (err) { ++ if (err == -ENODATA) ++ goto out; ++ apfs_err(sb, "drec query failed for private dir"); ++ goto out; ++ } ++ err = apfs_drec_from_query(query, &drec, hashed); ++ if (err) { ++ apfs_alert(sb, "bad dentry record in private dir"); ++ goto out; ++ } ++ ++ /* These files are deleted immediately by ->evict_inode() */ ++ if (drec.type != DT_REG) ++ continue; ++ ++ /* ++ * Confirm that this is an orphan file, because the official ++ * reference allows other uses for the private directory. ++ */ ++ err = apfs_orphan_name(drec.ino, &qname); ++ if (err) ++ goto out; ++ found = strcmp(drec.name, qname.name) == 0; ++ kfree(qname.name); ++ qname.name = NULL; ++ } ++ *ino_p = drec.ino; ++ ++out: ++ apfs_free_query(query); ++ query = NULL; ++ return err; ++} +diff --git a/fs/apfs/extents.c b/fs/apfs/extents.c +new file mode 100644 +index 000000000..3ebcd296c +--- /dev/null ++++ b/fs/apfs/extents.c +@@ -0,0 +1,2392 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (C) 2018 Ernesto A. Fernández ++ */ ++ ++#include ++#include ++#include ++#include "apfs.h" ++ ++#define MAX(X, Y) ((X) <= (Y) ? (Y) : (X)) ++ ++/** ++ * apfs_ext_is_hole - Does this extent represent a hole in a sparse file? ++ * @extent: the extent to check ++ */ ++static inline bool apfs_ext_is_hole(struct apfs_file_extent *extent) ++{ ++ return extent->phys_block_num == 0; ++} ++ ++/** ++ * apfs_size_to_blocks - Return the block count for a given size, rounded up ++ * @sb: filesystem superblock ++ * @size: size in bytes ++ * ++ * TODO: reuse for inode.c ++ */ ++static inline u64 apfs_size_to_blocks(struct super_block *sb, u64 size) ++{ ++ return (size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; ++} ++ ++/** ++ * apfs_extent_from_query - Read the extent found by a successful query ++ * @query: the query that found the record ++ * @extent: Return parameter. The extent found. ++ * ++ * Reads the extent record into @extent and performs some basic sanity checks ++ * as a protection against crafted filesystems. Returns 0 on success or ++ * -EFSCORRUPTED otherwise. ++ */ ++int apfs_extent_from_query(struct apfs_query *query, ++ struct apfs_file_extent *extent) ++{ ++ struct super_block *sb = query->node->object.sb; ++ char *raw = query->node->object.data; ++ u64 ext_len; ++ ++ if (!apfs_is_sealed(sb)) { ++ struct apfs_file_extent_val *ext = NULL; ++ struct apfs_file_extent_key *ext_key = NULL; ++ ++ if (query->len != sizeof(*ext) || query->key_len != sizeof(*ext_key)) { ++ apfs_err(sb, "bad length of key (%d) or value (%d)", query->key_len, query->len); ++ return -EFSCORRUPTED; ++ } ++ ++ ext = (struct apfs_file_extent_val *)(raw + query->off); ++ ext_key = (struct apfs_file_extent_key *)(raw + query->key_off); ++ ext_len = le64_to_cpu(ext->len_and_flags) & APFS_FILE_EXTENT_LEN_MASK; ++ ++ extent->logical_addr = le64_to_cpu(ext_key->logical_addr); ++ extent->phys_block_num = le64_to_cpu(ext->phys_block_num); ++ extent->crypto_id = le64_to_cpu(ext->crypto_id); ++ } else { ++ struct apfs_fext_tree_val *fext_val = NULL; ++ struct apfs_fext_tree_key *fext_key = NULL; ++ ++ if (query->len != sizeof(*fext_val) || query->key_len != sizeof(*fext_key)) { ++ apfs_err(sb, "bad length of sealed key (%d) or value (%d)", query->key_len, query->len); ++ return -EFSCORRUPTED; ++ } ++ ++ fext_val = (struct apfs_fext_tree_val *)(raw + query->off); ++ fext_key = (struct apfs_fext_tree_key *)(raw + query->key_off); ++ ext_len = le64_to_cpu(fext_val->len_and_flags) & APFS_FILE_EXTENT_LEN_MASK; ++ ++ extent->logical_addr = le64_to_cpu(fext_key->logical_addr); ++ extent->phys_block_num = le64_to_cpu(fext_val->phys_block_num); ++ extent->crypto_id = 0; ++ } ++ ++ /* Extent length must be a multiple of the block size */ ++ if (ext_len & (sb->s_blocksize - 1)) { ++ apfs_err(sb, "invalid length (0x%llx)", ext_len); ++ return -EFSCORRUPTED; ++ } ++ extent->len = ext_len; ++ return 0; ++} ++ ++/** ++ * apfs_extent_read - Read the extent record that covers a block ++ * @dstream: data stream info ++ * @dsblock: logical number of the wanted block (must be in range) ++ * @extent: Return parameter. The extent found. ++ * ++ * Finds and caches the extent record. On success, returns a pointer to the ++ * cache record; on failure, returns an error code. ++ */ ++static int apfs_extent_read(struct apfs_dstream_info *dstream, sector_t dsblock, ++ struct apfs_file_extent *extent) ++{ ++ struct super_block *sb = dstream->ds_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_superblock *vsb_raw = sbi->s_vsb_raw; ++ struct apfs_key key; ++ struct apfs_query *query; ++ struct apfs_file_extent *cache = &dstream->ds_cached_ext; ++ u64 iaddr = dsblock << sb->s_blocksize_bits; ++ struct apfs_node *root = NULL; ++ int ret = 0; ++ ++ spin_lock(&dstream->ds_ext_lock); ++ if (iaddr >= cache->logical_addr && ++ iaddr < cache->logical_addr + cache->len) { ++ *extent = *cache; ++ spin_unlock(&dstream->ds_ext_lock); ++ return 0; ++ } ++ spin_unlock(&dstream->ds_ext_lock); ++ ++ /* We will search for the extent that covers iblock */ ++ if (!apfs_is_sealed(sb)) { ++ apfs_init_file_extent_key(dstream->ds_id, iaddr, &key); ++ root = sbi->s_cat_root; ++ } else { ++ apfs_init_fext_key(dstream->ds_id, iaddr, &key); ++ root = apfs_read_node(sb, le64_to_cpu(vsb_raw->apfs_fext_tree_oid), APFS_OBJ_PHYSICAL, false /* write */); ++ if (IS_ERR(root)) { ++ apfs_err(sb, "failed to read fext root 0x%llx", le64_to_cpu(vsb_raw->apfs_fext_tree_oid)); ++ return PTR_ERR(root); ++ } ++ } ++ ++ query = apfs_alloc_query(root, NULL /* parent */); ++ if (!query) { ++ ret = -ENOMEM; ++ goto done; ++ } ++ query->key = key; ++ query->flags = apfs_is_sealed(sb) ? APFS_QUERY_FEXT : APFS_QUERY_CAT; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret) { ++ apfs_err(sb, "query failed for id 0x%llx, addr 0x%llx", dstream->ds_id, iaddr); ++ if (ret == -ENODATA) ++ ret = -EFSCORRUPTED; ++ goto done; ++ } ++ ++ ret = apfs_extent_from_query(query, extent); ++ if (ret) { ++ apfs_err(sb, "bad extent record for dstream 0x%llx", dstream->ds_id); ++ goto done; ++ } ++ if (iaddr < extent->logical_addr || iaddr >= extent->logical_addr + extent->len) { ++ apfs_err(sb, "no extent for addr 0x%llx in dstream 0x%llx", iaddr, dstream->ds_id); ++ ret = -EFSCORRUPTED; ++ goto done; ++ } ++ ++ /* ++ * For now prioritize the deferral of writes. ++ * i_extent_dirty is protected by the read semaphore. ++ */ ++ if (!dstream->ds_ext_dirty) { ++ spin_lock(&dstream->ds_ext_lock); ++ *cache = *extent; ++ spin_unlock(&dstream->ds_ext_lock); ++ } ++ ++done: ++ apfs_free_query(query); ++ if (apfs_is_sealed(sb)) ++ apfs_node_free(root); ++ return ret; ++} ++ ++/** ++ * apfs_logic_to_phys_bno - Find the physical block number for a dstream block ++ * @dstream: data stream info ++ * @dsblock: logical number of the wanted block ++ * @bno: on return, the physical block number (or zero for holes) ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++int apfs_logic_to_phys_bno(struct apfs_dstream_info *dstream, sector_t dsblock, u64 *bno) ++{ ++ struct super_block *sb = dstream->ds_sb; ++ struct apfs_file_extent ext; ++ u64 blk_off; ++ int ret; ++ ++ ret = apfs_extent_read(dstream, dsblock, &ext); ++ if (ret) ++ return ret; ++ ++ if (apfs_ext_is_hole(&ext)) { ++ *bno = 0; ++ return 0; ++ } ++ ++ /* Find the block offset of iblock within the extent */ ++ blk_off = dsblock - (ext.logical_addr >> sb->s_blocksize_bits); ++ *bno = ext.phys_block_num + blk_off; ++ return 0; ++} ++ ++/* This does the same as apfs_get_block(), but without taking any locks */ ++int __apfs_get_block(struct apfs_dstream_info *dstream, sector_t dsblock, ++ struct buffer_head *bh_result, int create) ++{ ++ struct super_block *sb = dstream->ds_sb; ++ struct apfs_file_extent ext; ++ u64 blk_off, bno, map_len; ++ int ret; ++ ++ ASSERT(!create); ++ ++ if (dsblock >= apfs_size_to_blocks(sb, dstream->ds_size)) ++ return 0; ++ ++ ret = apfs_extent_read(dstream, dsblock, &ext); ++ if (ret) { ++ apfs_err(sb, "extent read failed"); ++ return ret; ++ } ++ ++ /* Find the block offset of iblock within the extent */ ++ blk_off = dsblock - (ext.logical_addr >> sb->s_blocksize_bits); ++ ++ /* Make sure we don't read past the extent boundaries */ ++ map_len = ext.len - (blk_off << sb->s_blocksize_bits); ++ if (bh_result->b_size > map_len) ++ bh_result->b_size = map_len; ++ ++ /* ++ * Save the requested mapping length as apfs_map_bh() replaces it with ++ * the filesystem block size ++ */ ++ map_len = bh_result->b_size; ++ /* Extents representing holes have block number 0 */ ++ if (!apfs_ext_is_hole(&ext)) { ++ /* Find the block number of iblock within the disk */ ++ bno = ext.phys_block_num + blk_off; ++ apfs_map_bh(bh_result, sb, bno); ++ } ++ bh_result->b_size = map_len; ++ return 0; ++} ++ ++int apfs_get_block(struct inode *inode, sector_t iblock, ++ struct buffer_head *bh_result, int create) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(inode->i_sb); ++ struct apfs_inode_info *ai = APFS_I(inode); ++ int ret; ++ ++ down_read(&nxi->nx_big_sem); ++ ret = __apfs_get_block(&ai->i_dstream, iblock, bh_result, create); ++ up_read(&nxi->nx_big_sem); ++ return ret; ++} ++ ++/** ++ * apfs_set_extent_length - Set a new length in an extent record's value ++ * @ext: the extent record's value ++ * @len: the new length ++ * ++ * Preserves the flags, though none are defined yet and I don't know if that ++ * will ever be important. ++ */ ++static inline void apfs_set_extent_length(struct apfs_file_extent_val *ext, u64 len) ++{ ++ u64 len_and_flags = le64_to_cpu(ext->len_and_flags); ++ u64 flags = len_and_flags & APFS_FILE_EXTENT_FLAG_MASK; ++ ++ ext->len_and_flags = cpu_to_le64(flags | len); ++} ++ ++static int apfs_range_put_reference(struct super_block *sb, u64 paddr, u64 length); ++ ++/** ++ * apfs_shrink_extent_head - Shrink an extent record in its head ++ * @query: the query that found the record ++ * @dstream: data stream info ++ * @start: new logical start for the extent ++ * ++ * Also deletes the physical extent records for the head. Returns 0 on success ++ * or a negative error code in case of failure. ++ */ ++static int apfs_shrink_extent_head(struct apfs_query *query, struct apfs_dstream_info *dstream, u64 start) ++{ ++ struct super_block *sb = query->node->object.sb; ++ struct apfs_file_extent_key key; ++ struct apfs_file_extent_val val; ++ struct apfs_file_extent extent; ++ u64 new_len, head_len; ++ void *raw = NULL; ++ int err = 0; ++ ++ err = apfs_extent_from_query(query, &extent); ++ if (err) { ++ apfs_err(sb, "bad extent record for dstream 0x%llx", dstream->ds_id); ++ return err; ++ } ++ raw = query->node->object.data; ++ key = *(struct apfs_file_extent_key *)(raw + query->key_off); ++ val = *(struct apfs_file_extent_val *)(raw + query->off); ++ ++ new_len = extent.logical_addr + extent.len - start; ++ head_len = extent.len - new_len; ++ ++ /* Delete the physical records for the blocks lost in the shrinkage */ ++ if (!apfs_ext_is_hole(&extent)) { ++ err = apfs_range_put_reference(sb, extent.phys_block_num, head_len); ++ if (err) { ++ apfs_err(sb, "failed to put range 0x%llx-0x%llx", extent.phys_block_num, head_len); ++ return err; ++ } ++ } else { ++ dstream->ds_sparse_bytes -= head_len; ++ } ++ ++ /* This is the actual shrinkage of the logical extent */ ++ key.logical_addr = cpu_to_le64(start); ++ apfs_set_extent_length(&val, new_len); ++ if (!apfs_ext_is_hole(&extent)) ++ le64_add_cpu(&val.phys_block_num, head_len >> sb->s_blocksize_bits); ++ return apfs_btree_replace(query, &key, sizeof(key), &val, sizeof(val)); ++} ++ ++/** ++ * apfs_shrink_extent_tail - Shrink an extent record in its tail ++ * @query: the query that found the record ++ * @dstream: data stream info ++ * @end: new logical end for the extent ++ * ++ * Also puts the physical extent records for the tail. Returns 0 on success or ++ * a negative error code in case of failure. ++ */ ++static int apfs_shrink_extent_tail(struct apfs_query *query, struct apfs_dstream_info *dstream, u64 end) ++{ ++ struct super_block *sb = query->node->object.sb; ++ struct apfs_file_extent_val *val; ++ struct apfs_file_extent extent; ++ u64 new_len, new_blkcount, tail_len; ++ void *raw; ++ int err = 0; ++ ++ ASSERT((end & (sb->s_blocksize - 1)) == 0); ++ ++ err = apfs_query_join_transaction(query); ++ if (err) { ++ apfs_err(sb, "query join failed"); ++ return err; ++ } ++ raw = query->node->object.data; ++ ++ err = apfs_extent_from_query(query, &extent); ++ if (err) { ++ apfs_err(sb, "bad extent record for dstream 0x%llx", dstream->ds_id); ++ return err; ++ } ++ val = raw + query->off; ++ ++ new_len = end - extent.logical_addr; ++ new_blkcount = new_len >> sb->s_blocksize_bits; ++ tail_len = extent.len - new_len; ++ ++ /* Delete the physical records for the blocks lost in the shrinkage */ ++ if (!apfs_ext_is_hole(&extent)) { ++ err = apfs_range_put_reference(sb, extent.phys_block_num + new_blkcount, tail_len); ++ if (err) { ++ apfs_err(sb, "failed to put range 0x%llx-0x%llx", extent.phys_block_num + new_blkcount, tail_len); ++ return err; ++ } ++ } else { ++ dstream->ds_sparse_bytes -= tail_len; ++ } ++ ++ /* This is the actual shrinkage of the logical extent */ ++ apfs_set_extent_length(val, new_len); ++ return err; ++} ++ ++/** ++ * apfs_query_found_extent - Did this query find an extent with the right id? ++ * @query: the (successful) query that found the record ++ */ ++static inline bool apfs_query_found_extent(struct apfs_query *query) ++{ ++ void *raw = query->node->object.data; ++ struct apfs_key_header *hdr; ++ ++ if (query->key_len < sizeof(*hdr)) ++ return false; ++ hdr = raw + query->key_off; ++ ++ if (apfs_cat_type(hdr) != APFS_TYPE_FILE_EXTENT) ++ return false; ++ if (apfs_cat_cnid(hdr) != query->key.id) ++ return false; ++ return true; ++} ++ ++/** ++ * apfs_update_tail_extent - Grow the tail extent for a data stream ++ * @dstream: data stream info ++ * @extent: new in-memory extent ++ * ++ * Also takes care of any needed changes to the physical extent records. Returns ++ * 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_update_tail_extent(struct apfs_dstream_info *dstream, const struct apfs_file_extent *extent) ++{ ++ struct super_block *sb = dstream->ds_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_query *query; ++ struct apfs_file_extent_key raw_key; ++ struct apfs_file_extent_val raw_val; ++ u64 extent_id = dstream->ds_id; ++ int ret; ++ u64 new_crypto; ++ ++ apfs_key_set_hdr(APFS_TYPE_FILE_EXTENT, extent_id, &raw_key); ++ raw_key.logical_addr = cpu_to_le64(extent->logical_addr); ++ raw_val.len_and_flags = cpu_to_le64(extent->len); ++ raw_val.phys_block_num = cpu_to_le64(extent->phys_block_num); ++ if (apfs_vol_is_encrypted(sb)) ++ new_crypto = extent_id; ++ else ++ new_crypto = 0; ++ raw_val.crypto_id = cpu_to_le64(new_crypto); ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ /* We want the last extent record */ ++ apfs_init_file_extent_key(extent_id, -1, &query->key); ++ query->flags = APFS_QUERY_CAT; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret && ret != -ENODATA) { ++ apfs_err(sb, "query failed for last extent of id 0x%llx", extent_id); ++ goto out; ++ } ++ ++ if (ret == -ENODATA || !apfs_query_found_extent(query)) { ++ /* We are creating the first extent for the file */ ++ ret = apfs_btree_insert(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val)); ++ if (ret) { ++ apfs_err(sb, "insertion failed for id 0x%llx, addr 0x%llx", extent_id, extent->logical_addr); ++ goto out; ++ } ++ } else { ++ struct apfs_file_extent tail; ++ ++ ret = apfs_extent_from_query(query, &tail); ++ if (ret) { ++ apfs_err(sb, "bad extent record for dstream 0x%llx", dstream->ds_id); ++ goto out; ++ } ++ ++ if (tail.logical_addr > extent->logical_addr) { ++ apfs_alert(sb, "extent is not tail - bug!"); ++ ret = -EOPNOTSUPP; ++ goto out; ++ } else if (tail.logical_addr == extent->logical_addr) { ++ ret = apfs_btree_replace(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val)); ++ if (ret) { ++ apfs_err(sb, "update failed for id 0x%llx, addr 0x%llx", extent_id, extent->logical_addr); ++ goto out; ++ } ++ if (apfs_ext_is_hole(&tail)) { ++ dstream->ds_sparse_bytes -= tail.len; ++ } else if (tail.phys_block_num != extent->phys_block_num) { ++ ret = apfs_range_put_reference(sb, tail.phys_block_num, tail.len); ++ if (ret) { ++ apfs_err(sb, "failed to put range 0x%llx-0x%llx", tail.phys_block_num, tail.len); ++ goto out; ++ } ++ } ++ if (new_crypto == tail.crypto_id) ++ goto out; ++ ret = apfs_crypto_adj_refcnt(sb, tail.crypto_id, -1); ++ if (ret) { ++ apfs_err(sb, "failed to put crypto id 0x%llx", tail.crypto_id); ++ goto out; ++ } ++ } else { ++ /* ++ * TODO: we could actually also continue the tail extent ++ * if it's right next to the new one (both logically and ++ * physically), even if they don't overlap. Or maybe we ++ * should always make sure that the tail extent is in ++ * the cache before a write... ++ */ ++ if (extent->logical_addr < tail.logical_addr + tail.len) { ++ ret = apfs_shrink_extent_tail(query, dstream, extent->logical_addr); ++ if (ret) { ++ apfs_err(sb, "failed to shrink tail of dstream 0x%llx", extent_id); ++ goto out; ++ } ++ } ++ ret = apfs_btree_insert(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val)); ++ if (ret) { ++ apfs_err(sb, "insertion failed for id 0x%llx, addr 0x%llx", extent_id, extent->logical_addr); ++ goto out; ++ } ++ } ++ } ++ ++ ret = apfs_crypto_adj_refcnt(sb, new_crypto, 1); ++ if (ret) ++ apfs_err(sb, "failed to take crypto id 0x%llx", new_crypto); ++ ++out: ++ apfs_free_query(query); ++ return ret; ++} ++ ++/** ++ * apfs_split_extent - Break an extent in two ++ * @query: query pointing to the extent ++ * @div: logical address for the division ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_split_extent(struct apfs_query *query, u64 div) ++{ ++ struct super_block *sb = query->node->object.sb; ++ struct apfs_file_extent_val *val1; ++ struct apfs_file_extent_key key2; ++ struct apfs_file_extent_val val2; ++ struct apfs_file_extent extent; ++ u64 len1, len2, blkcount1; ++ void *raw; ++ int err = 0; ++ ++ err = apfs_query_join_transaction(query); ++ if (err) { ++ apfs_err(sb, "query join failed"); ++ return err; ++ } ++ raw = query->node->object.data; ++ ++ err = apfs_extent_from_query(query, &extent); ++ if (err) { ++ apfs_err(sb, "bad extent record"); ++ return err; ++ } ++ val1 = raw + query->off; ++ val2 = *(struct apfs_file_extent_val *)(raw + query->off); ++ key2 = *(struct apfs_file_extent_key *)(raw + query->key_off); ++ ++ len1 = div - extent.logical_addr; ++ blkcount1 = len1 >> sb->s_blocksize_bits; ++ len2 = extent.len - len1; ++ ++ /* Modify the current extent in place to become the first half */ ++ apfs_set_extent_length(val1, len1); ++ ++ /* Insert the second half right after the first */ ++ key2.logical_addr = cpu_to_le64(div); ++ if (!apfs_ext_is_hole(&extent)) ++ val2.phys_block_num = cpu_to_le64(extent.phys_block_num + blkcount1); ++ apfs_set_extent_length(&val2, len2); ++ err = apfs_btree_insert(query, &key2, sizeof(key2), &val2, sizeof(val2)); ++ if (err) { ++ apfs_err(sb, "insertion failed in division 0x%llx", div); ++ return err; ++ } ++ ++ return apfs_crypto_adj_refcnt(sb, extent.crypto_id, 1); ++} ++ ++/** ++ * apfs_update_mid_extent - Create or update a non-tail extent for a dstream ++ * @dstream: data stream info ++ * @extent: new in-memory extent ++ * ++ * Also takes care of any needed changes to the physical extent records. Returns ++ * 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_update_mid_extent(struct apfs_dstream_info *dstream, const struct apfs_file_extent *extent) ++{ ++ struct super_block *sb = dstream->ds_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_key key; ++ struct apfs_query *query; ++ struct apfs_file_extent_key raw_key; ++ struct apfs_file_extent_val raw_val; ++ struct apfs_file_extent prev_ext; ++ u64 extent_id = dstream->ds_id; ++ u64 prev_crypto, new_crypto; ++ u64 prev_start, prev_end; ++ bool second_run = false; ++ int ret; ++ ++ apfs_key_set_hdr(APFS_TYPE_FILE_EXTENT, extent_id, &raw_key); ++ raw_key.logical_addr = cpu_to_le64(extent->logical_addr); ++ raw_val.len_and_flags = cpu_to_le64(extent->len); ++ raw_val.phys_block_num = cpu_to_le64(extent->phys_block_num); ++ if (apfs_vol_is_encrypted(sb)) ++ new_crypto = extent_id; ++ else ++ new_crypto = 0; ++ raw_val.crypto_id = cpu_to_le64(new_crypto); ++ ++ apfs_init_file_extent_key(extent_id, extent->logical_addr, &key); ++ ++search_and_insert: ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ query->key = key; ++ query->flags = APFS_QUERY_CAT; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret && ret != -ENODATA) { ++ apfs_err(sb, "query failed for id 0x%llx, addr 0x%llx", extent_id, extent->logical_addr); ++ goto out; ++ } ++ ++ if (ret == -ENODATA || !apfs_query_found_extent(query)) { ++ /* ++ * The new extent goes in a hole we just made, right at the ++ * beginning of the file. ++ */ ++ if (!second_run) { ++ apfs_err(sb, "missing extent on dstream 0x%llx", extent_id); ++ ret = -EFSCORRUPTED; ++ } else { ++ ret = apfs_btree_insert(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val)); ++ if (ret) ++ apfs_err(sb, "insertion failed for id 0x%llx, addr 0x%llx", extent_id, extent->logical_addr); ++ } ++ goto out; ++ } ++ ++ if (apfs_extent_from_query(query, &prev_ext)) { ++ apfs_err(sb, "bad mid extent record on dstream 0x%llx", extent_id); ++ ret = -EFSCORRUPTED; ++ goto out; ++ } ++ prev_crypto = prev_ext.crypto_id; ++ prev_start = prev_ext.logical_addr; ++ prev_end = prev_ext.logical_addr + prev_ext.len; ++ ++ if (prev_end == extent->logical_addr && second_run) { ++ /* The new extent goes in the hole we just made */ ++ ret = apfs_btree_insert(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val)); ++ if (ret) { ++ apfs_err(sb, "insertion failed for id 0x%llx, addr 0x%llx", extent_id, extent->logical_addr); ++ goto out; ++ } ++ } else if (prev_start == extent->logical_addr && prev_ext.len == extent->len) { ++ /* The old and new extents are the same logical block */ ++ ret = apfs_btree_replace(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val)); ++ if (ret) { ++ apfs_err(sb, "update failed for id 0x%llx, addr 0x%llx", extent_id, extent->logical_addr); ++ goto out; ++ } ++ if (apfs_ext_is_hole(&prev_ext)) { ++ dstream->ds_sparse_bytes -= prev_ext.len; ++ } else if (prev_ext.phys_block_num != extent->phys_block_num) { ++ ret = apfs_range_put_reference(sb, prev_ext.phys_block_num, prev_ext.len); ++ if (ret) { ++ apfs_err(sb, "failed to put range 0x%llx-0x%llx", prev_ext.phys_block_num, prev_ext.len); ++ goto out; ++ } ++ } ++ ret = apfs_crypto_adj_refcnt(sb, prev_crypto, -1); ++ if (ret) { ++ apfs_err(sb, "failed to put crypto id 0x%llx", prev_crypto); ++ goto out; ++ } ++ } else if (prev_start == extent->logical_addr) { ++ /* The new extent is the first logical block of the old one */ ++ if (second_run) { ++ /* I don't know if this is possible, but be safe */ ++ apfs_alert(sb, "recursion shrinking extent head for dstream 0x%llx", extent_id); ++ ret = -EFSCORRUPTED; ++ goto out; ++ } ++ ret = apfs_shrink_extent_head(query, dstream, extent->logical_addr + extent->len); ++ if (ret) { ++ apfs_err(sb, "failed to shrink extent in dstream 0x%llx", extent_id); ++ goto out; ++ } ++ /* The query should point to the previous record, start again */ ++ apfs_free_query(query); ++ second_run = true; ++ goto search_and_insert; ++ } else if (prev_end == extent->logical_addr + extent->len) { ++ /* The new extent is the last logical block of the old one */ ++ ret = apfs_shrink_extent_tail(query, dstream, extent->logical_addr); ++ if (ret) { ++ apfs_err(sb, "failed to shrink extent in dstream 0x%llx", extent_id); ++ goto out; ++ } ++ ret = apfs_btree_insert(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val)); ++ if (ret) { ++ apfs_err(sb, "insertion failed for id 0x%llx, addr 0x%llx", extent_id, extent->logical_addr); ++ goto out; ++ } ++ } else if (prev_start < extent->logical_addr && prev_end > extent->logical_addr + extent->len) { ++ /* The new extent is logically in the middle of the old one */ ++ if (second_run) { ++ /* I don't know if this is possible, but be safe */ ++ apfs_alert(sb, "recursion when splitting extents for dstream 0x%llx", extent_id); ++ ret = -EFSCORRUPTED; ++ goto out; ++ } ++ ret = apfs_split_extent(query, extent->logical_addr + extent->len); ++ if (ret) { ++ apfs_err(sb, "failed to split extent in dstream 0x%llx", extent_id); ++ goto out; ++ } ++ /* The split may make the query invalid */ ++ apfs_free_query(query); ++ second_run = true; ++ goto search_and_insert; ++ } else { ++ /* I don't know what this is, be safe */ ++ apfs_alert(sb, "strange extents for dstream 0x%llx", extent_id); ++ ret = -EFSCORRUPTED; ++ goto out; ++ } ++ ++ ret = apfs_crypto_adj_refcnt(sb, new_crypto, 1); ++ if (ret) ++ apfs_err(sb, "failed to take crypto id 0x%llx", new_crypto); ++ ++out: ++ apfs_free_query(query); ++ return ret; ++} ++ ++/** ++ * apfs_update_extent - Create or update the extent record for an extent ++ * @dstream: data stream info ++ * @extent: new in-memory file extent ++ * ++ * The @extent must either be a new tail for the dstream, or a single block. ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_update_extent(struct apfs_dstream_info *dstream, const struct apfs_file_extent *extent) ++{ ++ struct super_block *sb = dstream->ds_sb; ++ ++ if (extent->logical_addr + extent->len >= dstream->ds_size) ++ return apfs_update_tail_extent(dstream, extent); ++ if (extent->len > sb->s_blocksize) { ++ apfs_err(sb, "can't create mid extents of length 0x%llx", extent->len); ++ return -EOPNOTSUPP; ++ } ++ return apfs_update_mid_extent(dstream, extent); ++} ++#define APFS_UPDATE_EXTENTS_MAXOPS (1 + 2 * APFS_CRYPTO_ADJ_REFCNT_MAXOPS()) ++ ++static int apfs_extend_phys_extent(struct apfs_query *query, u64 bno, u64 blkcnt, u64 dstream_id) ++{ ++ struct apfs_phys_ext_key raw_key; ++ struct apfs_phys_ext_val raw_val; ++ u64 kind = (u64)APFS_KIND_NEW << APFS_PEXT_KIND_SHIFT; ++ ++ apfs_key_set_hdr(APFS_TYPE_EXTENT, bno, &raw_key); ++ raw_val.len_and_kind = cpu_to_le64(kind | blkcnt); ++ raw_val.owning_obj_id = cpu_to_le64(dstream_id); ++ raw_val.refcnt = cpu_to_le32(1); ++ return apfs_btree_replace(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val)); ++} ++ ++static int apfs_insert_new_phys_extent(struct apfs_query *query, u64 bno, u64 blkcnt, u64 dstream_id) ++{ ++ struct apfs_phys_ext_key raw_key; ++ struct apfs_phys_ext_val raw_val; ++ u64 kind = (u64)APFS_KIND_NEW << APFS_PEXT_KIND_SHIFT; ++ ++ apfs_key_set_hdr(APFS_TYPE_EXTENT, bno, &raw_key); ++ raw_val.len_and_kind = cpu_to_le64(kind | blkcnt); ++ raw_val.owning_obj_id = cpu_to_le64(dstream_id); ++ raw_val.refcnt = cpu_to_le32(1); ++ return apfs_btree_insert(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val)); ++} ++ ++static int apfs_phys_ext_from_query(struct apfs_query *query, struct apfs_phys_extent *pext); ++ ++/** ++ * apfs_insert_phys_extent - Create or grow the physical record for an extent ++ * @dstream: data stream info for the extent ++ * @extent: new in-memory file extent ++ * ++ * Only works for appending to extents, for now. TODO: reference counting. ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_insert_phys_extent(struct apfs_dstream_info *dstream, const struct apfs_file_extent *extent) ++{ ++ struct super_block *sb = dstream->ds_sb; ++ struct apfs_superblock *vsb_raw = APFS_SB(sb)->s_vsb_raw; ++ struct apfs_node *extref_root; ++ struct apfs_query *query = NULL; ++ struct apfs_phys_extent pext; ++ u64 blkcnt = extent->len >> sb->s_blocksize_bits; ++ u64 last_bno, new_base, new_blkcnt; ++ int ret; ++ ++ extref_root = apfs_read_node(sb, ++ le64_to_cpu(vsb_raw->apfs_extentref_tree_oid), ++ APFS_OBJ_PHYSICAL, true /* write */); ++ if (IS_ERR(extref_root)) { ++ apfs_err(sb, "failed to read extref root 0x%llx", le64_to_cpu(vsb_raw->apfs_extentref_tree_oid)); ++ return PTR_ERR(extref_root); ++ } ++ apfs_assert_in_transaction(sb, &vsb_raw->apfs_o); ++ vsb_raw->apfs_extentref_tree_oid = cpu_to_le64(extref_root->object.oid); ++ ++ query = apfs_alloc_query(extref_root, NULL /* parent */); ++ if (!query) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ /* ++ * The cached logical extent may have been split into multiple physical ++ * extents because of clones. If that happens, we want to grow the last ++ * one. ++ */ ++ last_bno = extent->phys_block_num + blkcnt - 1; ++ apfs_init_extent_key(last_bno, &query->key); ++ query->flags = APFS_QUERY_EXTENTREF; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret && ret != -ENODATA) { ++ apfs_err(sb, "query failed for paddr 0x%llx", last_bno); ++ goto out; ++ } ++ ++ if (ret == -ENODATA) { ++ /* This is a fresh new physical extent */ ++ ret = apfs_insert_new_phys_extent(query, extent->phys_block_num, blkcnt, dstream->ds_id); ++ if (ret) ++ apfs_err(sb, "insertion failed for paddr 0x%llx", extent->phys_block_num); ++ goto out; ++ } ++ ++ ret = apfs_phys_ext_from_query(query, &pext); ++ if (ret) { ++ apfs_err(sb, "bad pext record for bno 0x%llx", last_bno); ++ goto out; ++ } ++ if (pext.bno + pext.blkcount <= extent->phys_block_num) { ++ /* Also a fresh new physical extent */ ++ ret = apfs_insert_new_phys_extent(query, extent->phys_block_num, blkcnt, dstream->ds_id); ++ if (ret) ++ apfs_err(sb, "insertion failed for paddr 0x%llx", extent->phys_block_num); ++ goto out; ++ } ++ ++ /* ++ * There is an existing physical extent that overlaps the new one. The ++ * cache was dirty, so the existing extent can't cover the whole tail. ++ */ ++ if (pext.bno + pext.blkcount >= extent->phys_block_num + blkcnt) { ++ apfs_err(sb, "dirty cache tail covered by existing physical extent 0x%llx-0x%llx", pext.bno, pext.blkcount); ++ ret = -EFSCORRUPTED; ++ goto out; ++ } ++ if (pext.refcnt == 1) { ++ new_base = pext.bno; ++ new_blkcnt = extent->phys_block_num + blkcnt - new_base; ++ ret = apfs_extend_phys_extent(query, new_base, new_blkcnt, dstream->ds_id); ++ if (ret) ++ apfs_err(sb, "update failed for paddr 0x%llx", new_base); ++ } else { ++ /* ++ * We can't extend this one, because it would extend the other ++ * references as well. ++ */ ++ new_base = pext.bno + pext.blkcount; ++ new_blkcnt = extent->phys_block_num + blkcnt - new_base; ++ ret = apfs_insert_new_phys_extent(query, new_base, new_blkcnt, dstream->ds_id); ++ if (ret) ++ apfs_err(sb, "insertion failed for paddr 0x%llx", new_base); ++ } ++ ++out: ++ apfs_free_query(query); ++ apfs_node_free(extref_root); ++ return ret; ++} ++ ++/** ++ * apfs_phys_ext_from_query - Read the physical extent record found by a query ++ * @query: the (successful) query that found the record ++ * @pext: on return, the physical extent read ++ * ++ * Reads the physical extent record into @pext and performs some basic sanity ++ * checks as a protection against crafted filesystems. Returns 0 on success or ++ * -EFSCORRUPTED otherwise. ++ */ ++static int apfs_phys_ext_from_query(struct apfs_query *query, struct apfs_phys_extent *pext) ++{ ++ struct super_block *sb = query->node->object.sb; ++ struct apfs_phys_ext_key *key; ++ struct apfs_phys_ext_val *val; ++ char *raw = query->node->object.data; ++ ++ if (query->len != sizeof(*val) || query->key_len != sizeof(*key)) { ++ apfs_err(sb, "bad length of key (%d) or value (%d)", query->key_len, query->len); ++ return -EFSCORRUPTED; ++ } ++ ++ key = (struct apfs_phys_ext_key *)(raw + query->key_off); ++ val = (struct apfs_phys_ext_val *)(raw + query->off); ++ ++ pext->bno = apfs_cat_cnid(&key->hdr); ++ pext->blkcount = le64_to_cpu(val->len_and_kind) & APFS_PEXT_LEN_MASK; ++ pext->len = pext->blkcount << sb->s_blocksize_bits; ++ pext->refcnt = le32_to_cpu(val->refcnt); ++ pext->kind = le64_to_cpu(val->len_and_kind) >> APFS_PEXT_KIND_SHIFT; ++ return 0; ++} ++ ++/** ++ * apfs_free_phys_ext - Add all blocks in a physical extent to the free queue ++ * @sb: superblock structure ++ * @pext: physical range to free ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_free_phys_ext(struct super_block *sb, struct apfs_phys_extent *pext) ++{ ++ struct apfs_superblock *vsb_raw = APFS_SB(sb)->s_vsb_raw; ++ ++ apfs_assert_in_transaction(sb, &vsb_raw->apfs_o); ++ le64_add_cpu(&vsb_raw->apfs_fs_alloc_count, -pext->blkcount); ++ le64_add_cpu(&vsb_raw->apfs_total_blocks_freed, pext->blkcount); ++ ++ return apfs_free_queue_insert(sb, pext->bno, pext->blkcount); ++} ++ ++/** ++ * apfs_put_phys_extent - Reduce the reference count for a physical extent ++ * @pext: physical extent data, already read ++ * @query: query that found the extent ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_put_phys_extent(struct apfs_phys_extent *pext, struct apfs_query *query) ++{ ++ struct super_block *sb = query->node->object.sb; ++ struct apfs_phys_ext_val *val; ++ void *raw; ++ int err; ++ ++ if (--pext->refcnt == 0) { ++ err = apfs_btree_remove(query); ++ if (err) { ++ apfs_err(sb, "removal failed for paddr 0x%llx", pext->bno); ++ return err; ++ } ++ return pext->kind == APFS_KIND_NEW ? apfs_free_phys_ext(sb, pext) : 0; ++ } ++ ++ err = apfs_query_join_transaction(query); ++ if (err) { ++ apfs_err(sb, "query join failed"); ++ return err; ++ } ++ raw = query->node->object.data; ++ val = raw + query->off; ++ val->refcnt = cpu_to_le32(pext->refcnt); ++ return 0; ++} ++ ++/** ++ * apfs_take_phys_extent - Increase the reference count for a physical extent ++ * @pext: physical extent data, already read ++ * @query: query that found the extent ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_take_phys_extent(struct apfs_phys_extent *pext, struct apfs_query *query) ++{ ++ struct super_block *sb = query->node->object.sb; ++ struct apfs_phys_ext_val *val; ++ void *raw; ++ int err; ++ ++ /* An update extent may be dropped when a reference is taken */ ++ if (++pext->refcnt == 0) ++ return apfs_btree_remove(query); ++ ++ err = apfs_query_join_transaction(query); ++ if (err) { ++ apfs_err(sb, "query join failed"); ++ return err; ++ } ++ raw = query->node->object.data; ++ val = raw + query->off; ++ val->refcnt = cpu_to_le32(pext->refcnt); ++ return 0; ++} ++ ++/** ++ * apfs_set_phys_ext_length - Set new length in a physical extent record's value ++ * @pext: the physical extent record's value ++ * @len: the new length (in blocks) ++ * ++ * Preserves the kind, though I doubt that's the right thing to do in general. ++ */ ++static inline void apfs_set_phys_ext_length(struct apfs_phys_ext_val *pext, u64 len) ++{ ++ u64 len_and_kind = le64_to_cpu(pext->len_and_kind); ++ u64 kind = len_and_kind & APFS_PEXT_KIND_MASK; ++ ++ pext->len_and_kind = cpu_to_le64(kind | len); ++} ++ ++/** ++ * apfs_split_phys_ext - Break a physical extent in two ++ * @query: query pointing to the extent ++ * @div: first physical block number to come after the division ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_split_phys_ext(struct apfs_query *query, u64 div) ++{ ++ struct super_block *sb = query->node->object.sb; ++ struct apfs_phys_ext_val *val1; ++ struct apfs_phys_ext_key key2; ++ struct apfs_phys_ext_val val2; ++ struct apfs_phys_extent pextent; ++ u64 blkcount1, blkcount2; ++ void *raw; ++ int err = 0; ++ ++ err = apfs_query_join_transaction(query); ++ if (err) { ++ apfs_err(sb, "query join failed"); ++ return err; ++ } ++ raw = query->node->object.data; ++ ++ err = apfs_phys_ext_from_query(query, &pextent); ++ if (err) { ++ apfs_err(sb, "bad pext record over div 0x%llx", div); ++ return err; ++ } ++ val1 = raw + query->off; ++ val2 = *(struct apfs_phys_ext_val *)(raw + query->off); ++ key2 = *(struct apfs_phys_ext_key *)(raw + query->key_off); ++ ++ blkcount1 = div - pextent.bno; ++ blkcount2 = pextent.blkcount - blkcount1; ++ ++ /* Modify the current extent in place to become the first half */ ++ apfs_set_phys_ext_length(val1, blkcount1); ++ ++ /* Insert the second half right after the first */ ++ apfs_key_set_hdr(APFS_TYPE_EXTENT, div, &key2); ++ apfs_set_phys_ext_length(&val2, blkcount2); ++ return apfs_btree_insert(query, &key2, sizeof(key2), &val2, sizeof(val2)); ++} ++ ++/** ++ * apfs_create_update_pext - Create a reference update physical extent record ++ * @query: query that searched for the physical extent ++ * @extent: range of physical blocks to update ++ * @diff: reference count change ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_create_update_pext(struct apfs_query *query, const struct apfs_file_extent *extent, u32 diff) ++{ ++ struct super_block *sb = query->node->object.sb; ++ struct apfs_phys_ext_key key = {0}; ++ struct apfs_phys_ext_val val = {0}; ++ ++ apfs_key_set_hdr(APFS_TYPE_EXTENT, extent->phys_block_num, &key); ++ val.len_and_kind = cpu_to_le64((u64)APFS_KIND_UPDATE << APFS_PEXT_KIND_SHIFT | extent->len >> sb->s_blocksize_bits); ++ val.owning_obj_id = cpu_to_le64(APFS_OWNING_OBJ_ID_INVALID); ++ val.refcnt = cpu_to_le32(diff); ++ return apfs_btree_insert(query, &key, sizeof(key), &val, sizeof(val)); ++} ++ ++/** ++ * apfs_dstream_cache_is_tail - Is the tail of this dstream in its extent cache? ++ * @dstream: dstream to check ++ */ ++static inline bool apfs_dstream_cache_is_tail(struct apfs_dstream_info *dstream) ++{ ++ struct apfs_file_extent *cache = &dstream->ds_cached_ext; ++ ++ /* nx_big_sem provides the locking for the cache here */ ++ return cache->len && (dstream->ds_size <= cache->logical_addr + cache->len); ++} ++ ++/** ++ * apfs_flush_extent_cache - Write the cached extent to the catalog, if dirty ++ * @dstream: data stream to flush ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++int apfs_flush_extent_cache(struct apfs_dstream_info *dstream) ++{ ++ struct super_block *sb = dstream->ds_sb; ++ struct apfs_file_extent *ext = &dstream->ds_cached_ext; ++ int err; ++ ++ if (!dstream->ds_ext_dirty) ++ return 0; ++ ASSERT(ext->len > 0); ++ ++ err = apfs_update_extent(dstream, ext); ++ if (err) { ++ apfs_err(sb, "extent update failed"); ++ return err; ++ } ++ err = apfs_insert_phys_extent(dstream, ext); ++ if (err) { ++ apfs_err(sb, "pext insertion failed"); ++ return err; ++ } ++ ++ /* ++ * TODO: keep track of the byte and block count through the use of ++ * inode_add_bytes() and inode_set_bytes(). This hasn't been done with ++ * care in the rest of the module and it doesn't seem to matter beyond ++ * stat(), so I'm ignoring it for now. ++ */ ++ ++ dstream->ds_ext_dirty = false; ++ return 0; ++} ++#define APFS_FLUSH_EXTENT_CACHE APFS_UPDATE_EXTENTS_MAXOPS ++ ++/** ++ * apfs_create_hole - Create and insert a hole extent for the dstream ++ * @dstream: data stream info ++ * @start: first logical block number for the hole ++ * @end: first logical block number right after the hole ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ * TODO: what happens to the crypto refcount? ++ */ ++static int apfs_create_hole(struct apfs_dstream_info *dstream, u64 start, u64 end) ++{ ++ struct super_block *sb = dstream->ds_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_query *query; ++ struct apfs_file_extent_key raw_key; ++ struct apfs_file_extent_val raw_val; ++ u64 extent_id = dstream->ds_id; ++ int ret; ++ ++ if (start == end) ++ return 0; ++ ++ /* File extent records use addresses, not block numbers */ ++ start <<= sb->s_blocksize_bits; ++ end <<= sb->s_blocksize_bits; ++ ++ apfs_key_set_hdr(APFS_TYPE_FILE_EXTENT, extent_id, &raw_key); ++ raw_key.logical_addr = cpu_to_le64(start); ++ raw_val.len_and_flags = cpu_to_le64(end - start); ++ raw_val.phys_block_num = cpu_to_le64(0); /* It's a hole... */ ++ raw_val.crypto_id = cpu_to_le64(apfs_vol_is_encrypted(sb) ? extent_id : 0); ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_file_extent_key(extent_id, start, &query->key); ++ query->flags = APFS_QUERY_CAT; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret && ret != -ENODATA) { ++ apfs_err(sb, "query failed for id 0x%llx, addr 0x%llx", extent_id, start); ++ goto out; ++ } ++ ++ ret = apfs_btree_insert(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val)); ++ if (ret) ++ apfs_err(sb, "insertion failed for id 0x%llx, addr 0x%llx", extent_id, start); ++ dstream->ds_sparse_bytes += end - start; ++ ++out: ++ apfs_free_query(query); ++ return ret; ++} ++ ++/** ++ * apfs_zero_dstream_tail - Zero out stale bytes in a data stream's last block ++ * @dstream: data stream info ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++static int apfs_zero_dstream_tail(struct apfs_dstream_info *dstream) ++{ ++ struct super_block *sb = dstream->ds_sb; ++ struct inode *inode = NULL; ++ struct page *page = NULL; ++ void *fsdata = NULL; ++ int valid_length; ++ int err; ++ ++ /* No stale bytes if no actual content */ ++ if (dstream->ds_size <= dstream->ds_sparse_bytes) ++ return 0; ++ ++ /* No stale tail if the last block is fully used */ ++ valid_length = dstream->ds_size & (sb->s_blocksize - 1); ++ if (valid_length == 0) ++ return 0; ++ ++ inode = dstream->ds_inode; ++ if (!inode) { ++ /* This should never happen, but be safe */ ++ apfs_alert(sb, "attempt to zero the tail of xattr dstream 0x%llx", dstream->ds_id); ++ return -EFSCORRUPTED; ++ } ++ ++ /* This will take care of the CoW and zeroing */ ++ err = __apfs_write_begin(NULL, inode->i_mapping, inode->i_size, 0, 0, &page, &fsdata); ++ if (err) ++ return err; ++ return __apfs_write_end(NULL, inode->i_mapping, inode->i_size, 0, 0, page, fsdata); ++} ++ ++/** ++ * apfs_zero_bh_tail - Zero out stale bytes in a buffer head ++ * @sb: filesystem superblock ++ * @bh: buffer head to zero ++ * @length: length of valid bytes to be left alone ++ */ ++static void apfs_zero_bh_tail(struct super_block *sb, struct buffer_head *bh, u64 length) ++{ ++ ASSERT(buffer_trans(bh)); ++ if (length < sb->s_blocksize) ++ memset(bh->b_data + length, 0, sb->s_blocksize - length); ++} ++ ++/** ++ * apfs_range_in_snap - Check if a given block range overlaps a snapshot ++ * @sb: filesystem superblock ++ * @bno: first block in the range ++ * @blkcnt: block count for the range ++ * @in_snap: on return, the result ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++static int apfs_range_in_snap(struct super_block *sb, u64 bno, u64 blkcnt, bool *in_snap) ++{ ++ struct apfs_superblock *vsb_raw = APFS_SB(sb)->s_vsb_raw; ++ struct apfs_node *extref_root = NULL; ++ struct apfs_query *query = NULL; ++ struct apfs_phys_extent pext = {0}; ++ int ret; ++ ++ /* Avoid the tree queries when we don't even have snapshots */ ++ if (vsb_raw->apfs_num_snapshots == 0) { ++ *in_snap = false; ++ return 0; ++ } ++ ++ /* ++ * Now check if the current physical extent tree has an entry for ++ * these blocks ++ */ ++ extref_root = apfs_read_node(sb, le64_to_cpu(vsb_raw->apfs_extentref_tree_oid), APFS_OBJ_PHYSICAL, false /* write */); ++ if (IS_ERR(extref_root)) { ++ apfs_err(sb, "failed to read extref root 0x%llx", le64_to_cpu(vsb_raw->apfs_extentref_tree_oid)); ++ return PTR_ERR(extref_root); ++ } ++ ++ query = apfs_alloc_query(extref_root, NULL /* parent */); ++ if (!query) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ apfs_init_extent_key(bno, &query->key); ++ query->flags = APFS_QUERY_EXTENTREF; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret && ret != -ENODATA) { ++ apfs_err(sb, "query failed for paddr 0x%llx", bno); ++ goto out; ++ } ++ if (ret == -ENODATA) { ++ *in_snap = false; ++ ret = 0; ++ goto out; ++ } ++ ++ ret = apfs_phys_ext_from_query(query, &pext); ++ if (ret) { ++ apfs_err(sb, "bad pext record for paddr 0x%llx", bno); ++ goto out; ++ } ++ ++ if (pext.bno <= bno && pext.bno + pext.blkcount >= bno + blkcnt) { ++ if (pext.kind == APFS_KIND_NEW) { ++ *in_snap = false; ++ goto out; ++ } ++ } ++ ++ /* ++ * I think the file extent could still be covered by two different ++ * physical extents from the current tree, but it's easier to just ++ * assume the worst here. ++ */ ++ *in_snap = true; ++ ++out: ++ apfs_free_query(query); ++ apfs_node_free(extref_root); ++ return ret; ++} ++ ++/** ++ * apfs_dstream_cache_in_snap - Check if the cached extent overlaps a snapshot ++ * @dstream: the data stream to check ++ * @in_snap: on return, the result ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++static int apfs_dstream_cache_in_snap(struct apfs_dstream_info *dstream, bool *in_snap) ++{ ++ struct super_block *sb = dstream->ds_sb; ++ struct apfs_file_extent *cache = NULL; ++ ++ /* All changes to extents get flushed when a snaphot is created */ ++ if (dstream->ds_ext_dirty) { ++ *in_snap = false; ++ return 0; ++ } ++ ++ cache = &dstream->ds_cached_ext; ++ return apfs_range_in_snap(sb, cache->phys_block_num, cache->len >> sb->s_blocksize_bits, in_snap); ++} ++ ++/** ++ * apfs_dstream_get_new_block - Like the get_block_t function, but for dstreams ++ * @dstream: data stream info ++ * @dsblock: logical dstream block to map ++ * @bh_result: buffer head to map (NULL if none) ++ * @bno: if not NULL, the new block number is returned here ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++static int apfs_dstream_get_new_block(struct apfs_dstream_info *dstream, u64 dsblock, struct buffer_head *bh_result, u64 *bno) ++{ ++ struct super_block *sb = dstream->ds_sb; ++ struct apfs_superblock *vsb_raw = APFS_SB(sb)->s_vsb_raw; ++ struct apfs_file_extent *cache = NULL; ++ u64 phys_bno, logical_addr, cache_blks, dstream_blks; ++ bool in_snap = true; ++ int err; ++ ++ /* TODO: preallocate tail blocks */ ++ logical_addr = dsblock << sb->s_blocksize_bits; ++ ++ err = apfs_spaceman_allocate_block(sb, &phys_bno, false /* backwards */); ++ if (err) { ++ apfs_err(sb, "block allocation failed"); ++ return err; ++ } ++ apfs_assert_in_transaction(sb, &vsb_raw->apfs_o); ++ le64_add_cpu(&vsb_raw->apfs_fs_alloc_count, 1); ++ le64_add_cpu(&vsb_raw->apfs_total_blocks_alloced, 1); ++ if (bno) ++ *bno = phys_bno; ++ ++ if (bh_result) { ++ apfs_map_bh(bh_result, sb, phys_bno); ++ err = apfs_transaction_join(sb, bh_result); ++ if (err) ++ return err; ++ ++ if (!buffer_uptodate(bh_result)) { ++ /* ++ * Truly new buffers need to be marked as such, to get ++ * zeroed; this also takes care of holes in sparse files ++ */ ++ set_buffer_new(bh_result); ++ } else if (dstream->ds_size > logical_addr) { ++ /* ++ * The last block may have stale data left from a ++ * truncation ++ */ ++ apfs_zero_bh_tail(sb, bh_result, dstream->ds_size - logical_addr); ++ } ++ } ++ ++ dstream_blks = apfs_size_to_blocks(sb, dstream->ds_size); ++ if (dstream_blks < dsblock) { ++ /* ++ * This recurses into apfs_dstream_get_new_block() and dirties ++ * the extent cache, so it must happen before flushing it. ++ */ ++ err = apfs_zero_dstream_tail(dstream); ++ if (err) { ++ apfs_err(sb, "failed to zero tail for dstream 0x%llx", dstream->ds_id); ++ return err; ++ } ++ } ++ ++ err = apfs_dstream_cache_in_snap(dstream, &in_snap); ++ if (err) ++ return err; ++ ++ cache = &dstream->ds_cached_ext; ++ cache_blks = apfs_size_to_blocks(sb, cache->len); ++ ++ /* TODO: allow dirty caches of several blocks in the middle of a file */ ++ if (!in_snap && apfs_dstream_cache_is_tail(dstream) && ++ logical_addr == cache->logical_addr + cache->len && ++ phys_bno == cache->phys_block_num + cache_blks) { ++ cache->len += sb->s_blocksize; ++ dstream->ds_ext_dirty = true; ++ return 0; ++ } ++ ++ err = apfs_flush_extent_cache(dstream); ++ if (err) { ++ apfs_err(sb, "extent cache flush failed for dstream 0x%llx", dstream->ds_id); ++ return err; ++ } ++ ++ if (dstream_blks < dsblock) { ++ /* ++ * This puts new extents after the reported end of the file, so ++ * it must happen after the flush to avoid conflict with those ++ * extent operations. ++ */ ++ err = apfs_create_hole(dstream, dstream_blks, dsblock); ++ if (err) { ++ apfs_err(sb, "hole creation failed for dstream 0x%llx", dstream->ds_id); ++ return err; ++ } ++ } ++ ++ cache->logical_addr = logical_addr; ++ cache->phys_block_num = phys_bno; ++ cache->len = sb->s_blocksize; ++ dstream->ds_ext_dirty = true; ++ return 0; ++} ++int APFS_GET_NEW_BLOCK_MAXOPS(void) ++{ ++ return APFS_FLUSH_EXTENT_CACHE; ++} ++ ++/** ++ * apfs_dstream_get_new_bno - Allocate a new block inside a dstream ++ * @dstream: data stream info ++ * @dsblock: logical dstream block to allocate ++ * @bno: on return, the new block number ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++int apfs_dstream_get_new_bno(struct apfs_dstream_info *dstream, u64 dsblock, u64 *bno) ++{ ++ return apfs_dstream_get_new_block(dstream, dsblock, NULL /* bh_result */, bno); ++} ++ ++int apfs_get_new_block(struct inode *inode, sector_t iblock, ++ struct buffer_head *bh_result, int create) ++{ ++ struct apfs_inode_info *ai = APFS_I(inode); ++ ++ ASSERT(create); ++ return apfs_dstream_get_new_block(&ai->i_dstream, iblock, bh_result, NULL /* bno */); ++} ++ ++/** ++ * apfs_shrink_dstream_last_extent - Shrink last extent of dstream being resized ++ * @dstream: data stream info ++ * @new_size: new size for the whole data stream ++ * ++ * Deletes, shrinks or zeroes the last extent, as needed for the truncation of ++ * the data stream. ++ * ++ * Only works with the last extent, so it needs to be called repeatedly to ++ * complete the truncation. Returns -EAGAIN in that case, or 0 when the process ++ * is complete. Returns other negative error codes in case of failure. ++ */ ++static int apfs_shrink_dstream_last_extent(struct apfs_dstream_info *dstream, loff_t new_size) ++{ ++ struct super_block *sb = dstream->ds_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_query *query; ++ struct apfs_file_extent tail; ++ u64 extent_id = dstream->ds_id; ++ int ret = 0; ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_file_extent_key(extent_id, -1, &query->key); ++ query->flags = APFS_QUERY_CAT; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret && ret != -ENODATA) { ++ apfs_err(sb, "query failed for last extent of id 0x%llx", extent_id); ++ goto out; ++ } ++ ++ if (!apfs_query_found_extent(query)) { ++ /* No more extents, we deleted the whole file already? */ ++ if (new_size) { ++ apfs_err(sb, "missing extent for dstream 0x%llx", extent_id); ++ ret = -EFSCORRUPTED; ++ } else { ++ ret = 0; ++ } ++ goto out; ++ } ++ ++ ret = apfs_extent_from_query(query, &tail); ++ if (ret) { ++ apfs_err(sb, "bad tail extent record on dstream 0x%llx", extent_id); ++ goto out; ++ } ++ ++ if (tail.logical_addr + tail.len < new_size) { ++ apfs_err(sb, "missing extent for dstream 0x%llx", extent_id); ++ ret = -EFSCORRUPTED; /* Tail extent missing */ ++ } else if (tail.logical_addr + tail.len == new_size) { ++ ret = 0; /* Nothing more to be done */ ++ } else if (tail.logical_addr >= new_size) { ++ /* This whole extent needs to go */ ++ ret = apfs_btree_remove(query); ++ if (ret) { ++ apfs_err(sb, "removal failed for id 0x%llx, addr 0x%llx", dstream->ds_id, tail.logical_addr); ++ goto out; ++ } ++ if (apfs_ext_is_hole(&tail)) { ++ dstream->ds_sparse_bytes -= tail.len; ++ } else { ++ ret = apfs_range_put_reference(sb, tail.phys_block_num, tail.len); ++ if (ret) { ++ apfs_err(sb, "failed to put range 0x%llx-0x%llx", tail.phys_block_num, tail.len); ++ goto out; ++ } ++ } ++ ret = apfs_crypto_adj_refcnt(sb, tail.crypto_id, -1); ++ if (ret) { ++ apfs_err(sb, "failed to take crypto id 0x%llx", tail.crypto_id); ++ goto out; ++ } ++ ret = tail.logical_addr == new_size ? 0 : -EAGAIN; ++ } else { ++ /* ++ * The file is being truncated in the middle of this extent. ++ * TODO: preserve the physical tail to be overwritten later. ++ */ ++ new_size = apfs_size_to_blocks(sb, new_size) << sb->s_blocksize_bits; ++ ret = apfs_shrink_extent_tail(query, dstream, new_size); ++ if (ret) ++ apfs_err(sb, "failed to shrink tail of dstream 0x%llx", extent_id); ++ } ++ ++out: ++ apfs_free_query(query); ++ return ret; ++} ++ ++/** ++ * apfs_shrink_dstream - Shrink a data stream's extents to a new length ++ * @dstream: data stream info ++ * @new_size: the new size ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++static int apfs_shrink_dstream(struct apfs_dstream_info *dstream, loff_t new_size) ++{ ++ int ret; ++ ++ do { ++ ret = apfs_shrink_dstream_last_extent(dstream, new_size); ++ } while (ret == -EAGAIN); ++ ++ return ret; ++} ++ ++/** ++ * apfs_truncate - Truncate a data stream's content ++ * @dstream: data stream info ++ * @new_size: the new size ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++int apfs_truncate(struct apfs_dstream_info *dstream, loff_t new_size) ++{ ++ struct super_block *sb = dstream->ds_sb; ++ u64 old_blks, new_blks; ++ struct apfs_file_extent *cache = &dstream->ds_cached_ext; ++ int err; ++ ++ /* TODO: don't write the cached extent if it will be deleted */ ++ err = apfs_flush_extent_cache(dstream); ++ if (err) { ++ apfs_err(sb, "extent cache flush failed for dstream 0x%llx", dstream->ds_id); ++ return err; ++ } ++ dstream->ds_ext_dirty = false; ++ ++ /* TODO: keep the cache valid on truncation */ ++ cache->len = 0; ++ ++ /* "<=", because a partial write may have left extents beyond the end */ ++ if (new_size <= dstream->ds_size) ++ return apfs_shrink_dstream(dstream, new_size); ++ ++ err = apfs_zero_dstream_tail(dstream); ++ if (err) { ++ apfs_err(sb, "failed to zero tail for dstream 0x%llx", dstream->ds_id); ++ return err; ++ } ++ new_blks = apfs_size_to_blocks(sb, new_size); ++ old_blks = apfs_size_to_blocks(sb, dstream->ds_size); ++ return apfs_create_hole(dstream, old_blks, new_blks); ++} ++ ++/** ++ * apfs_dstream_delete_front - Deletes as many leading extents as possible ++ * @sb: filesystem superblock ++ * @ds_id: id for the dstream to delete ++ * ++ * Returns 0 on success, or a negative error code in case of failure, which may ++ * be -ENODATA if there are no more extents, or -EAGAIN if the free queue is ++ * getting too full. ++ */ ++static int apfs_dstream_delete_front(struct super_block *sb, u64 ds_id) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_spaceman *sm = APFS_SM(sb); ++ struct apfs_spaceman_phys *sm_raw = sm->sm_raw; ++ struct apfs_spaceman_free_queue *fq = NULL; ++ struct apfs_query *query = NULL; ++ struct apfs_file_extent head; ++ bool first_match = true; ++ int ret; ++ ++ fq = &sm_raw->sm_fq[APFS_SFQ_MAIN]; ++ if (le64_to_cpu(fq->sfq_count) > TRANSACTION_MAIN_QUEUE_MAX) ++ return -EAGAIN; ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_file_extent_key(ds_id, 0, &query->key); ++ query->flags = APFS_QUERY_CAT; ++ ++next_extent: ++ ret = apfs_btree_query(sb, &query); ++ if (ret && ret != -ENODATA) { ++ apfs_err(sb, "query failed for first extent of id 0x%llx", ds_id); ++ goto out; ++ } ++ apfs_query_direct_forward(query); ++ if (!apfs_query_found_extent(query)) { ++ /* ++ * After the original lookup, the query may not be set to the ++ * first extent, but instead to the record that comes right ++ * before. ++ */ ++ if (first_match) { ++ first_match = false; ++ goto next_extent; ++ } ++ ret = -ENODATA; ++ goto out; ++ } ++ first_match = false; ++ ++ ret = apfs_extent_from_query(query, &head); ++ if (ret) { ++ apfs_err(sb, "bad head extent record on dstream 0x%llx", ds_id); ++ goto out; ++ } ++ ret = apfs_btree_remove(query); ++ if (ret) { ++ apfs_err(sb, "removal failed for id 0x%llx, addr 0x%llx", ds_id, head.logical_addr); ++ goto out; ++ } ++ ++ /* ++ * The official fsck doesn't complain about wrong sparse byte counts ++ * for orphans, so I guess we don't need to update them here ++ */ ++ if (!apfs_ext_is_hole(&head)) { ++ ret = apfs_range_put_reference(sb, head.phys_block_num, head.len); ++ if (ret) { ++ apfs_err(sb, "failed to put range 0x%llx-0x%llx", head.phys_block_num, head.len); ++ goto out; ++ } ++ ret = apfs_crypto_adj_refcnt(sb, head.crypto_id, -1); ++ if (ret) { ++ apfs_err(sb, "failed to take crypto id 0x%llx", head.crypto_id); ++ goto out; ++ } ++ } ++ ++ if (le64_to_cpu(fq->sfq_count) <= TRANSACTION_MAIN_QUEUE_MAX) ++ goto next_extent; ++ ret = -EAGAIN; ++out: ++ apfs_free_query(query); ++ return ret; ++} ++ ++/** ++ * apfs_inode_delete_front - Deletes as many leading extents as possible ++ * @inode: inode to delete ++ * ++ * Tries to delete all extents for @inode, in which case it returns 0. If the ++ * free queue is getting too full, deletes as much as is reasonable and returns ++ * -EAGAIN. May return other negative error codes as well. ++ */ ++int apfs_inode_delete_front(struct inode *inode) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct apfs_dstream_info *dstream = NULL; ++ struct apfs_inode_info *ai = APFS_I(inode); ++ int ret; ++ ++ if (!ai->i_has_dstream) ++ return 0; ++ ++ dstream = &ai->i_dstream; ++ ret = apfs_flush_extent_cache(dstream); ++ if (ret) { ++ apfs_err(sb, "extent cache flush failed for dstream 0x%llx", dstream->ds_id); ++ return ret; ++ } ++ ++ ret = apfs_dstream_delete_front(sb, dstream->ds_id); ++ if (ret == -ENODATA) ++ return 0; ++ return ret; ++} ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 20, 0) ++loff_t apfs_remap_file_range(struct file *src_file, loff_t off, struct file *dst_file, loff_t destoff, loff_t len, unsigned int remap_flags) ++#else ++int apfs_clone_file_range(struct file *src_file, loff_t off, struct file *dst_file, loff_t destoff, u64 len) ++#endif ++{ ++ struct inode *src_inode = file_inode(src_file); ++ struct inode *dst_inode = file_inode(dst_file); ++ struct apfs_inode_info *src_ai = APFS_I(src_inode); ++ struct apfs_inode_info *dst_ai = APFS_I(dst_inode); ++ struct apfs_dstream_info *src_ds = &src_ai->i_dstream; ++ struct apfs_dstream_info *dst_ds = &dst_ai->i_dstream; ++ struct super_block *sb = src_inode->i_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ /* TODO: remember to update the maxops in the future */ ++ struct apfs_max_ops maxops = {0}; ++ const u64 xfield_flags = APFS_INODE_MAINTAIN_DIR_STATS | APFS_INODE_IS_SPARSE | APFS_INODE_HAS_PURGEABLE_FLAGS; ++ int err; ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 20, 0) ++ if (remap_flags & ~(REMAP_FILE_ADVISORY)) ++ return -EINVAL; ++#endif ++ if (src_inode == dst_inode) ++ return -EINVAL; ++ ++ /* We only want to clone whole files, like in the official driver */ ++ if (off != 0 || destoff != 0 || len != 0) ++ return -EINVAL; ++ ++ /* ++ * Clones here work in two steps: first the user creates an empty target ++ * file, and then the user calls the ioctl, which replaces the file with ++ * a clone. This is not atomic, of course. ++ */ ++ if (dst_ai->i_has_dstream || dst_ai->i_bsd_flags & APFS_INOBSD_COMPRESSED) { ++ apfs_warn(sb, "clones can only replace freshly created files"); ++ return -EOPNOTSUPP; ++ } ++ if (dst_ai->i_int_flags & xfield_flags) { ++ apfs_warn(sb, "clone can't replace a file that has xfields"); ++ return -EOPNOTSUPP; ++ } ++ ++ if (!src_ai->i_has_dstream) { ++ apfs_warn(sb, "can't clone a file with no dstream"); ++ return -EOPNOTSUPP; ++ } ++ ++ err = apfs_transaction_start(sb, maxops); ++ if (err) ++ return err; ++ apfs_inode_join_transaction(sb, src_inode); ++ apfs_inode_join_transaction(sb, dst_inode); ++ ++ err = apfs_flush_extent_cache(src_ds); ++ if (err) { ++ apfs_err(sb, "extent cache flush failed for dstream 0x%llx", src_ds->ds_id); ++ goto fail; ++ } ++ err = apfs_dstream_adj_refcnt(src_ds, +1); ++ if (err) { ++ apfs_err(sb, "failed to take dstream id 0x%llx", src_ds->ds_id); ++ goto fail; ++ } ++ src_ds->ds_shared = true; ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0) ++ dst_inode->i_mtime = dst_inode->i_ctime = current_time(dst_inode); ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 7, 0) ++ dst_inode->i_mtime = inode_set_ctime_current(dst_inode); ++#else ++ inode_set_mtime_to_ts(dst_inode, inode_set_ctime_current(dst_inode)); ++#endif ++ dst_inode->i_size = src_inode->i_size; ++ dst_ai->i_key_class = src_ai->i_key_class; ++ dst_ai->i_int_flags = src_ai->i_int_flags; ++ dst_ai->i_bsd_flags = src_ai->i_bsd_flags; ++ dst_ai->i_has_dstream = true; ++ ++ dst_ds->ds_sb = src_ds->ds_sb; ++ dst_ds->ds_inode = dst_inode; ++ dst_ds->ds_id = src_ds->ds_id; ++ dst_ds->ds_size = src_ds->ds_size; ++ dst_ds->ds_sparse_bytes = src_ds->ds_sparse_bytes; ++ dst_ds->ds_cached_ext = src_ds->ds_cached_ext; ++ dst_ds->ds_ext_dirty = false; ++ dst_ds->ds_shared = true; ++ ++ dst_ai->i_int_flags |= APFS_INODE_WAS_EVER_CLONED | APFS_INODE_WAS_CLONED; ++ src_ai->i_int_flags |= APFS_INODE_WAS_EVER_CLONED; ++ ++ /* ++ * The sparse flag is the important one here: if we need it, it will get ++ * set later by apfs_update_inode(), after the xfield gets created. ++ */ ++ dst_ai->i_int_flags &= ~xfield_flags; ++ ++ /* ++ * Commit the transaction to make sure all buffers in the source inode ++ * go through copy-on-write. This is a bit excessive, but I don't expect ++ * clones to be created often enough for it to matter. ++ */ ++ sbi->s_nxi->nx_transaction.t_state |= APFS_NX_TRANS_FORCE_COMMIT; ++ err = apfs_transaction_commit(sb); ++ if (err) ++ goto fail; ++ return dst_ds->ds_size; ++ ++fail: ++ apfs_transaction_abort(sb); ++ return err; ++} ++ ++/** ++ * apfs_extent_create_record - Create a logical extent record for a dstream id ++ * @sb: filesystem superblock ++ * @dstream_id: the dstream id ++ * @extent: extent info for the record ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++static int apfs_extent_create_record(struct super_block *sb, u64 dstream_id, struct apfs_file_extent *extent) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_query *query = NULL; ++ struct apfs_file_extent_val raw_val; ++ struct apfs_file_extent_key raw_key; ++ int ret = 0; ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_file_extent_key(dstream_id, extent->logical_addr, &query->key); ++ query->flags = APFS_QUERY_CAT | APFS_QUERY_EXACT; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret && ret != -ENODATA) { ++ apfs_err(sb, "query failed for id 0x%llx, addr 0x%llx", dstream_id, extent->logical_addr); ++ goto out; ++ } ++ ++ apfs_key_set_hdr(APFS_TYPE_FILE_EXTENT, dstream_id, &raw_key); ++ raw_key.logical_addr = cpu_to_le64(extent->logical_addr); ++ raw_val.len_and_flags = cpu_to_le64(extent->len); ++ raw_val.phys_block_num = cpu_to_le64(extent->phys_block_num); ++ raw_val.crypto_id = cpu_to_le64(apfs_vol_is_encrypted(sb) ? dstream_id : 0); /* TODO */ ++ ++ ret = apfs_btree_insert(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val)); ++ if (ret) ++ apfs_err(sb, "insertion failed for id 0x%llx, addr 0x%llx", dstream_id, extent->logical_addr); ++out: ++ apfs_free_query(query); ++ return ret; ++} ++ ++/** ++ * apfs_put_single_extent - Put a reference to a single extent ++ * @sb: filesystem superblock ++ * @paddr_end: first block after the extent to put ++ * @paddr_min: don't put references before this block ++ * ++ * Puts a reference to the physical extent range that ends in paddr. Sets ++ * @paddr_end to the beginning of the extent, so that the caller can continue ++ * with the previous one. Returns 0 on success, or a negative error code in ++ * case of failure. ++ * ++ * TODO: unify this with apfs_take_single_extent(), they are almost the same. ++ */ ++static int apfs_put_single_extent(struct super_block *sb, u64 *paddr_end, u64 paddr_min) ++{ ++ struct apfs_superblock *vsb_raw = APFS_SB(sb)->s_vsb_raw; ++ struct apfs_node *extref_root = NULL; ++ struct apfs_key key; ++ struct apfs_query *query = NULL; ++ struct apfs_phys_extent prev_ext; ++ u64 prev_start, prev_end; ++ bool cropped_head = false, cropped_tail = false; ++ struct apfs_file_extent tmp = {0}; /* TODO: clean up all the fake extent interfaces? */ ++ int ret; ++ ++ extref_root = apfs_read_node(sb, le64_to_cpu(vsb_raw->apfs_extentref_tree_oid), APFS_OBJ_PHYSICAL, true /* write */); ++ if (IS_ERR(extref_root)) { ++ apfs_err(sb, "failed to read extref root 0x%llx", le64_to_cpu(vsb_raw->apfs_extentref_tree_oid)); ++ return PTR_ERR(extref_root); ++ } ++ apfs_assert_in_transaction(sb, &vsb_raw->apfs_o); ++ vsb_raw->apfs_extentref_tree_oid = cpu_to_le64(extref_root->object.oid); ++ ++ apfs_init_extent_key(*paddr_end - 1, &key); ++ ++restart: ++ query = apfs_alloc_query(extref_root, NULL /* parent */); ++ if (!query) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ query->key = key; ++ query->flags = APFS_QUERY_EXTENTREF; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret && ret != -ENODATA) { ++ apfs_err(sb, "query failed for paddr 0x%llx", *paddr_end - 1); ++ goto out; ++ } ++ ++ if (ret == -ENODATA) { ++ /* The whole range to put is part of a snapshot */ ++ tmp.phys_block_num = paddr_min; ++ tmp.len = (*paddr_end - paddr_min) << sb->s_blocksize_bits; ++ ret = apfs_create_update_pext(query, &tmp, -1); ++ *paddr_end = paddr_min; ++ goto out; ++ } ++ ++ ret = apfs_phys_ext_from_query(query, &prev_ext); ++ if (ret) { ++ apfs_err(sb, "bad pext record over paddr 0x%llx", *paddr_end - 1); ++ goto out; ++ } ++ prev_start = prev_ext.bno; ++ prev_end = prev_ext.bno + prev_ext.blkcount; ++ if (prev_end < *paddr_end) { ++ /* The extent to put is part of a snapshot */ ++ tmp.phys_block_num = MAX(prev_end, paddr_min); ++ tmp.len = (*paddr_end - tmp.phys_block_num) << sb->s_blocksize_bits; ++ ret = apfs_create_update_pext(query, &tmp, -1); ++ *paddr_end = tmp.phys_block_num; ++ goto out; ++ } ++ ++ if ((cropped_tail && prev_end > *paddr_end) || (cropped_head && prev_start < paddr_min)) { ++ /* This should never happen, but be safe */ ++ apfs_alert(sb, "recursion cropping physical extent 0x%llx-0x%llx", prev_start, prev_end); ++ ret = -EFSCORRUPTED; ++ goto out; ++ } ++ ++ if (prev_end > *paddr_end) { ++ ret = apfs_split_phys_ext(query, *paddr_end); ++ if (ret) { ++ apfs_err(sb, "failed to split pext at 0x%llx", *paddr_end); ++ goto out; ++ } ++ /* The split may make the query invalid */ ++ apfs_free_query(query); ++ cropped_tail = true; ++ goto restart; ++ } ++ ++ if (prev_start < paddr_min) { ++ ret = apfs_split_phys_ext(query, paddr_min); ++ if (ret) { ++ apfs_err(sb, "failed to split pext at 0x%llx", paddr_min); ++ goto out; ++ } ++ /* The split may make the query invalid */ ++ apfs_free_query(query); ++ cropped_head = true; ++ goto restart; ++ } ++ ++ /* The extent to put already exists */ ++ ret = apfs_put_phys_extent(&prev_ext, query); ++ if (ret) ++ apfs_err(sb, "failed to put pext at 0x%llx", prev_ext.bno); ++ *paddr_end = prev_start; ++ ++out: ++ apfs_free_query(query); ++ apfs_node_free(extref_root); ++ return ret; ++} ++ ++/** ++ * apfs_range_put_reference - Put a reference to a physical range ++ * @sb: filesystem superblock ++ * @paddr: first block of the range ++ * @length: length of the range (in bytes) ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++static int apfs_range_put_reference(struct super_block *sb, u64 paddr, u64 length) ++{ ++ u64 extent_end; ++ int err; ++ ++ ASSERT(paddr); ++ ++ extent_end = paddr + (length >> sb->s_blocksize_bits); ++ while (extent_end > paddr) { ++ err = apfs_put_single_extent(sb, &extent_end, paddr); ++ if (err) ++ return err; ++ } ++ return 0; ++} ++ ++/** ++ * apfs_take_single_extent - Take a reference to a single extent ++ * @sb: filesystem superblock ++ * @paddr_end: first block after the extent to take ++ * @paddr_min: don't take references before this block ++ * ++ * Takes a reference to the physical extent range that ends in paddr. Sets ++ * @paddr_end to the beginning of the extent, so that the caller can continue ++ * with the previous one. Returns 0 on success, or a negative error code in ++ * case of failure. ++ */ ++static int apfs_take_single_extent(struct super_block *sb, u64 *paddr_end, u64 paddr_min) ++{ ++ struct apfs_superblock *vsb_raw = APFS_SB(sb)->s_vsb_raw; ++ struct apfs_node *extref_root = NULL; ++ struct apfs_key key; ++ struct apfs_query *query = NULL; ++ struct apfs_phys_extent prev_ext; ++ u64 prev_start, prev_end; ++ bool cropped_head = false, cropped_tail = false; ++ struct apfs_file_extent tmp = {0}; /* TODO: clean up all the fake extent interfaces? */ ++ int ret; ++ ++ extref_root = apfs_read_node(sb, le64_to_cpu(vsb_raw->apfs_extentref_tree_oid), APFS_OBJ_PHYSICAL, true /* write */); ++ if (IS_ERR(extref_root)) { ++ apfs_err(sb, "failed to read extref root 0x%llx", le64_to_cpu(vsb_raw->apfs_extentref_tree_oid)); ++ return PTR_ERR(extref_root); ++ } ++ apfs_assert_in_transaction(sb, &vsb_raw->apfs_o); ++ vsb_raw->apfs_extentref_tree_oid = cpu_to_le64(extref_root->object.oid); ++ ++ apfs_init_extent_key(*paddr_end - 1, &key); ++ ++restart: ++ query = apfs_alloc_query(extref_root, NULL /* parent */); ++ if (!query) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ query->key = key; ++ query->flags = APFS_QUERY_EXTENTREF; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret && ret != -ENODATA) { ++ apfs_err(sb, "query failed for paddr 0x%llx", *paddr_end - 1); ++ goto out; ++ } ++ ++ if (ret == -ENODATA) { ++ /* The whole range to take is part of a snapshot */ ++ tmp.phys_block_num = paddr_min; ++ tmp.len = (*paddr_end - paddr_min) << sb->s_blocksize_bits; ++ ret = apfs_create_update_pext(query, &tmp, +1); ++ *paddr_end = paddr_min; ++ goto out; ++ } ++ ++ ret = apfs_phys_ext_from_query(query, &prev_ext); ++ if (ret) { ++ apfs_err(sb, "bad pext record over paddr 0x%llx", *paddr_end - 1); ++ goto out; ++ } ++ prev_start = prev_ext.bno; ++ prev_end = prev_ext.bno + prev_ext.blkcount; ++ if (prev_end < *paddr_end) { ++ /* The extent to take is part of a snapshot */ ++ tmp.phys_block_num = MAX(prev_end, paddr_min); ++ tmp.len = (*paddr_end - tmp.phys_block_num) << sb->s_blocksize_bits; ++ ret = apfs_create_update_pext(query, &tmp, +1); ++ *paddr_end = tmp.phys_block_num; ++ goto out; ++ } ++ ++ if ((cropped_tail && prev_end > *paddr_end) || (cropped_head && prev_start < paddr_min)) { ++ /* This should never happen, but be safe */ ++ apfs_alert(sb, "recursion cropping physical extent 0x%llx-0x%llx", prev_start, prev_end); ++ ret = -EFSCORRUPTED; ++ goto out; ++ } ++ ++ if (prev_end > *paddr_end) { ++ ret = apfs_split_phys_ext(query, *paddr_end); ++ if (ret) { ++ apfs_err(sb, "failed to split pext at 0x%llx", *paddr_end); ++ goto out; ++ } ++ /* The split may make the query invalid */ ++ apfs_free_query(query); ++ cropped_tail = true; ++ goto restart; ++ } ++ ++ if (prev_start < paddr_min) { ++ ret = apfs_split_phys_ext(query, paddr_min); ++ if (ret) { ++ apfs_err(sb, "failed to split pext at 0x%llx", paddr_min); ++ goto out; ++ } ++ /* The split may make the query invalid */ ++ apfs_free_query(query); ++ cropped_head = true; ++ goto restart; ++ } ++ ++ /* The extent to take already exists */ ++ ret = apfs_take_phys_extent(&prev_ext, query); ++ if (ret) ++ apfs_err(sb, "failed to take pext at 0x%llx", prev_ext.bno); ++ *paddr_end = prev_start; ++ ++out: ++ apfs_free_query(query); ++ apfs_node_free(extref_root); ++ return ret; ++} ++ ++/** ++ * apfs_range_take_reference - Take a reference to a physical range ++ * @sb: filesystem superblock ++ * @paddr: first block of the range ++ * @length: length of the range (in bytes) ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++static int apfs_range_take_reference(struct super_block *sb, u64 paddr, u64 length) ++{ ++ u64 extent_end; ++ int err; ++ ++ ASSERT(paddr); ++ ++ extent_end = paddr + (length >> sb->s_blocksize_bits); ++ while (extent_end > paddr) { ++ err = apfs_take_single_extent(sb, &extent_end, paddr); ++ if (err) ++ return err; ++ } ++ return 0; ++} ++ ++/** ++ * apfs_clone_single_extent - Make a copy of an extent in a dstream to a new one ++ * @dstream: old dstream ++ * @new_id: id of the new dstream ++ * @log_addr: logical address for the extent ++ * ++ * Duplicates the logical extent, and updates the references to the physical ++ * extents as required. Sets @addr to the end of the extent, so that the caller ++ * can continue in the same place. Returns 0 on success, or a negative error ++ * code in case of failure. ++ */ ++static int apfs_clone_single_extent(struct apfs_dstream_info *dstream, u64 new_id, u64 *log_addr) ++{ ++ struct super_block *sb = dstream->ds_sb; ++ struct apfs_file_extent extent; ++ int err; ++ ++ err = apfs_extent_read(dstream, *log_addr >> sb->s_blocksize_bits, &extent); ++ if (err) { ++ apfs_err(sb, "failed to read an extent to clone for dstream 0x%llx", dstream->ds_id); ++ return err; ++ } ++ err = apfs_extent_create_record(sb, new_id, &extent); ++ if (err) { ++ apfs_err(sb, "failed to create extent record for clone of dstream 0x%llx", dstream->ds_id); ++ return err; ++ } ++ ++ if (!apfs_ext_is_hole(&extent)) { ++ err = apfs_range_take_reference(sb, extent.phys_block_num, extent.len); ++ if (err) { ++ apfs_err(sb, "failed to take a reference to physical range 0x%llx-0x%llx", extent.phys_block_num, extent.len); ++ return err; ++ } ++ } ++ ++ *log_addr += extent.len; ++ return 0; ++} ++ ++/** ++ * apfs_clone_extents - Make a copy of all extents in a dstream to a new one ++ * @dstream: old dstream ++ * @new_id: id for the new dstream ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++int apfs_clone_extents(struct apfs_dstream_info *dstream, u64 new_id) ++{ ++ u64 next = 0; ++ int err; ++ ++ while (next < dstream->ds_size) { ++ err = apfs_clone_single_extent(dstream, new_id, &next); ++ if (err) ++ return err; ++ } ++ return 0; ++} ++ ++/** ++ * apfs_nonsparse_dstream_read - Read from a dstream without holes ++ * @dstream: dstream to read ++ * @buf: destination buffer ++ * @count: exact number of bytes to read ++ * @offset: dstream offset to read from ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++int apfs_nonsparse_dstream_read(struct apfs_dstream_info *dstream, void *buf, size_t count, u64 offset) ++{ ++ struct super_block *sb = dstream->ds_sb; ++ u64 logical_start_block, logical_end_block, log_bno, blkcnt, idx; ++ struct buffer_head **bhs = NULL; ++ int ret = 0; ++ ++ /* Save myself from thinking about overflow here */ ++ if (count >= APFS_MAX_FILE_SIZE || offset >= APFS_MAX_FILE_SIZE) { ++ apfs_err(sb, "dstream read overflow (0x%llx-0x%llx)", offset, (unsigned long long)count); ++ return -EFBIG; ++ } ++ ++ if (offset + count > dstream->ds_size) { ++ apfs_err(sb, "reading past the end (0x%llx-0x%llx)", offset, (unsigned long long)count); ++ /* No caller is expected to legitimately read out-of-bounds */ ++ return -EFSCORRUPTED; ++ } ++ ++ logical_start_block = offset >> sb->s_blocksize_bits; ++ logical_end_block = (offset + count + sb->s_blocksize - 1) >> sb->s_blocksize_bits; ++ blkcnt = logical_end_block - logical_start_block; ++ bhs = kcalloc(blkcnt, sizeof(*bhs), GFP_KERNEL); ++ if (!bhs) ++ return -ENOMEM; ++ ++ for (log_bno = logical_start_block; log_bno < logical_end_block; log_bno++) { ++ struct buffer_head *bh = NULL; ++ u64 bno = 0; ++ ++ idx = log_bno - logical_start_block; ++ ++ ret = apfs_logic_to_phys_bno(dstream, log_bno, &bno); ++ if (ret) ++ goto out; ++ if (bno == 0) { ++ apfs_err(sb, "nonsparse dstream has a hole"); ++ ret = -EFSCORRUPTED; ++ goto out; ++ } ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 7, 0) ++ bhs[idx] = __getblk_gfp(APFS_NXI(sb)->nx_bdev, bno, sb->s_blocksize, __GFP_MOVABLE); ++#else ++ bhs[idx] = bdev_getblk(APFS_NXI(sb)->nx_bdev, bno, sb->s_blocksize, __GFP_MOVABLE); ++#endif ++ if (!bhs[idx]) { ++ apfs_err(sb, "failed to map block 0x%llx", bno); ++ ret = -EIO; ++ goto out; ++ } ++ ++ bh = bhs[idx]; ++ if (!buffer_uptodate(bh)) { ++ get_bh(bh); ++ lock_buffer(bh); ++ bh->b_end_io = end_buffer_read_sync; ++ apfs_submit_bh(REQ_OP_READ, 0, bh); ++ } ++ } ++ ++ for (log_bno = logical_start_block; log_bno < logical_end_block; log_bno++) { ++ int off_in_block, left_in_block; ++ ++ idx = log_bno - logical_start_block; ++ wait_on_buffer(bhs[idx]); ++ if (!buffer_uptodate(bhs[idx])) { ++ apfs_err(sb, "failed to read a block"); ++ ret = -EIO; ++ goto out; ++ } ++ ++ if (log_bno == logical_start_block) ++ off_in_block = offset & (sb->s_blocksize - 1); ++ else ++ off_in_block = 0; ++ ++ if (log_bno == logical_end_block - 1) ++ left_in_block = count + offset - (log_bno << sb->s_blocksize_bits) - off_in_block; ++ else ++ left_in_block = sb->s_blocksize - off_in_block; ++ ++ memcpy(buf, bhs[idx]->b_data + off_in_block, left_in_block); ++ buf += left_in_block; ++ } ++ ++out: ++ if (bhs) { ++ for (idx = 0; idx < blkcnt; idx++) ++ brelse(bhs[idx]); ++ kfree(bhs); ++ } ++ return ret; ++} ++ ++/** ++ * apfs_nonsparse_dstream_preread - Attempt to preread a dstream without holes ++ * @dstream: dstream to preread ++ * ++ * Requests reads for all blocks of a dstream, but doesn't wait for the result. ++ */ ++void apfs_nonsparse_dstream_preread(struct apfs_dstream_info *dstream) ++{ ++ struct super_block *sb = dstream->ds_sb; ++ u64 logical_end_block, log_bno; ++ ++ logical_end_block = (dstream->ds_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; ++ ++ for (log_bno = 0; log_bno < logical_end_block; log_bno++) { ++ struct buffer_head *bh = NULL; ++ u64 bno = 0; ++ int ret; ++ ++ ret = apfs_logic_to_phys_bno(dstream, log_bno, &bno); ++ if (ret || bno == 0) ++ return; ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 7, 0) ++ bh = __getblk_gfp(APFS_NXI(sb)->nx_bdev, bno, sb->s_blocksize, __GFP_MOVABLE); ++#else ++ bh = bdev_getblk(APFS_NXI(sb)->nx_bdev, bno, sb->s_blocksize, __GFP_MOVABLE); ++#endif ++ if (!bh) ++ return; ++ if (!buffer_uptodate(bh)) { ++ get_bh(bh); ++ lock_buffer(bh); ++ bh->b_end_io = end_buffer_read_sync; ++ apfs_submit_bh(REQ_OP_READ, 0, bh); ++ } ++ brelse(bh); ++ bh = NULL; ++ } ++} +diff --git a/fs/apfs/file.c b/fs/apfs/file.c +new file mode 100644 +index 000000000..c37d59a02 +--- /dev/null ++++ b/fs/apfs/file.c +@@ -0,0 +1,220 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (C) 2018 Ernesto A. Fernández ++ */ ++ ++#include "apfs.h" ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 8, 0) ++#include ++#endif ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 17, 0) ++typedef int vm_fault_t; ++#endif ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) ++static vm_fault_t apfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ++{ ++#else ++static vm_fault_t apfs_page_mkwrite(struct vm_fault *vmf) ++{ ++ struct vm_area_struct *vma = vmf->vma; ++#endif ++ struct page *page = vmf->page; ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 7, 0) ++ struct folio *folio; ++#endif ++ struct inode *inode = file_inode(vma->vm_file); ++ struct super_block *sb = inode->i_sb; ++ struct buffer_head *bh, *head; ++ vm_fault_t ret = VM_FAULT_LOCKED; ++ struct apfs_max_ops maxops; ++ int blkcount = PAGE_SIZE >> inode->i_blkbits; ++ unsigned int blocksize, block_start, len; ++ u64 size; ++ int err = 0; ++ ++ sb_start_pagefault(inode->i_sb); ++ file_update_time(vma->vm_file); ++ ++ /* Placeholder values, I need to get back to this in the future */ ++ maxops.cat = APFS_UPDATE_INODE_MAXOPS() + ++ blkcount * APFS_GET_NEW_BLOCK_MAXOPS(); ++ maxops.blks = blkcount; ++ ++ err = apfs_transaction_start(sb, maxops); ++ if (err) ++ goto out; ++ apfs_inode_join_transaction(sb, inode); ++ ++ err = apfs_inode_create_exclusive_dstream(inode); ++ if (err) { ++ apfs_err(sb, "dstream creation failed for ino 0x%llx", apfs_ino(inode)); ++ goto out_abort; ++ } ++ ++ lock_page(page); ++ wait_for_stable_page(page); ++ if (page->mapping != inode->i_mapping) { ++ ret = VM_FAULT_NOPAGE; ++ goto out_unlock; ++ } ++ ++ if (!page_has_buffers(page)) { ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 7, 0) ++ create_empty_buffers(page, sb->s_blocksize, 0); ++#else ++ folio = page_folio(page); ++ bh = folio_buffers(folio); ++ if (!bh) ++ bh = create_empty_buffers(folio, sb->s_blocksize, 0); ++#endif ++ } ++ ++ size = i_size_read(inode); ++ if (page->index == size >> PAGE_SHIFT) ++ len = size & ~PAGE_MASK; ++ else ++ len = PAGE_SIZE; ++ ++ /* The blocks were read on the fault, mark them as unmapped for CoW */ ++ head = page_buffers(page); ++ blocksize = head->b_size; ++ for (bh = head, block_start = 0; bh != head || !block_start; ++ block_start += blocksize, bh = bh->b_this_page) { ++ if (len > block_start) { ++ /* If it's not a hole, the fault read it already */ ++ ASSERT(!buffer_mapped(bh) || buffer_uptodate(bh)); ++ if (buffer_trans(bh)) ++ continue; ++ clear_buffer_mapped(bh); ++ } ++ } ++ unlock_page(page); /* XXX: race? */ ++ ++ err = block_page_mkwrite(vma, vmf, apfs_get_new_block); ++ if (err) { ++ apfs_err(sb, "mkwrite failed for ino 0x%llx", apfs_ino(inode)); ++ goto out_abort; ++ } ++ set_page_dirty(page); ++ ++ /* An immediate commit would leave the page unlocked */ ++ APFS_SB(sb)->s_nxi->nx_transaction.t_state |= APFS_NX_TRANS_DEFER_COMMIT; ++ ++ err = apfs_transaction_commit(sb); ++ if (err) ++ goto out_unlock; ++ goto out; ++ ++out_unlock: ++ unlock_page(page); ++out_abort: ++ apfs_transaction_abort(sb); ++out: ++ if (err) ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0) ++ ret = block_page_mkwrite_return(err); ++#else ++ ret = vmf_fs_error(err); ++#endif ++ sb_end_pagefault(inode->i_sb); ++ return ret; ++} ++ ++static const struct vm_operations_struct apfs_file_vm_ops = { ++ .fault = filemap_fault, ++ .map_pages = filemap_map_pages, ++ .page_mkwrite = apfs_page_mkwrite, ++}; ++ ++int apfs_file_mmap(struct file *file, struct vm_area_struct *vma) ++{ ++ struct address_space *mapping = file->f_mapping; ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 19, 0) ++ if (!mapping->a_ops->read_folio) ++#else ++ if (!mapping->a_ops->readpage) ++#endif ++ return -ENOEXEC; ++ file_accessed(file); ++ vma->vm_ops = &apfs_file_vm_ops; ++ return 0; ++} ++ ++/* ++ * Just flush the whole transaction for now (TODO), since that's technically ++ * correct and easy to implement. ++ */ ++int apfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) ++{ ++ struct inode *inode = file->f_mapping->host; ++ struct super_block *sb = inode->i_sb; ++ ++ return apfs_sync_fs(sb, true /* wait */); ++} ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 8, 0) ++static ssize_t apfs_copy_file_range(struct file *src_file, loff_t src_off, ++ struct file *dst_file, loff_t dst_off, ++ size_t len, unsigned int flags) ++{ ++ return (splice_copy_file_range(src_file, src_off, ++ dst_file, dst_off, len)); ++} ++#endif ++ ++const struct file_operations apfs_file_operations = { ++ .llseek = generic_file_llseek, ++ .read_iter = generic_file_read_iter, ++ .write_iter = generic_file_write_iter, ++ .mmap = apfs_file_mmap, ++ .open = generic_file_open, ++ .fsync = apfs_fsync, ++ .unlocked_ioctl = apfs_file_ioctl, ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 8, 0) ++ .copy_file_range = apfs_copy_file_range, ++#elif LINUX_VERSION_CODE >= KERNEL_VERSION(5, 3, 0) ++ .copy_file_range = generic_copy_file_range, ++#endif ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 20, 0) ++ .remap_file_range = apfs_remap_file_range, ++#else ++ .clone_file_range = apfs_clone_file_range, ++#endif ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 5, 0) ++ .splice_read = generic_file_splice_read, ++#else ++ .splice_read = filemap_splice_read, ++#endif ++ .splice_write = iter_file_splice_write, ++}; ++ ++#if LINUX_VERSION_CODE == KERNEL_VERSION(5, 3, 0) ++/* ++ * This is needed mainly to test clones with xfstests, so we only support the ++ * kernel version I use during testing. TODO: support all kernel versions. ++ */ ++int apfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) ++{ ++ return generic_block_fiemap(inode, fieinfo, start, len, apfs_get_block); ++} ++#endif ++ ++const struct inode_operations apfs_file_inode_operations = { ++ .getattr = apfs_getattr, ++ .listxattr = apfs_listxattr, ++ .setattr = apfs_setattr, ++ .update_time = apfs_update_time, ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 13, 0) ++ .fileattr_get = apfs_fileattr_get, ++ .fileattr_set = apfs_fileattr_set, ++#endif ++#if LINUX_VERSION_CODE == KERNEL_VERSION(5, 3, 0) ++ .fiemap = apfs_fiemap, ++#endif ++}; +diff --git a/fs/apfs/inode.c b/fs/apfs/inode.c +new file mode 100644 +index 000000000..71a1dca38 +--- /dev/null ++++ b/fs/apfs/inode.c +@@ -0,0 +1,2569 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (C) 2018 Ernesto A. Fernández ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include "apfs.h" ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 19, 0) ++#include ++#endif ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 13, 0) ++#include ++#endif ++ ++#define MAX_PFK_LEN 512 ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 19, 0) ++ ++static int apfs_read_folio(struct file *file, struct folio *folio) ++{ ++ return mpage_read_folio(folio, apfs_get_block); ++} ++ ++#else ++ ++static int apfs_readpage(struct file *file, struct page *page) ++{ ++ return mpage_readpage(page, apfs_get_block); ++} ++ ++#endif ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0) /* Misses mpage_readpages() */ ++ ++static void apfs_readahead(struct readahead_control *rac) ++{ ++ mpage_readahead(rac, apfs_get_block); ++} ++ ++#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0) */ ++ ++static int apfs_readpages(struct file *file, struct address_space *mapping, ++ struct list_head *pages, unsigned int nr_pages) ++{ ++ return mpage_readpages(mapping, pages, nr_pages, apfs_get_block); ++} ++ ++#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0) */ ++ ++/** ++ * apfs_create_dstream_rec - Create a data stream record ++ * @dstream: data stream info ++ * ++ * Does nothing if the record already exists. TODO: support cloned files. ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_create_dstream_rec(struct apfs_dstream_info *dstream) ++{ ++ struct super_block *sb = dstream->ds_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_query *query; ++ struct apfs_dstream_id_key raw_key; ++ struct apfs_dstream_id_val raw_val; ++ int ret; ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_dstream_id_key(dstream->ds_id, &query->key); ++ query->flags |= APFS_QUERY_CAT | APFS_QUERY_EXACT; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret != -ENODATA) /* Either an error, or the record already exists */ ++ goto out; ++ ++ apfs_key_set_hdr(APFS_TYPE_DSTREAM_ID, dstream->ds_id, &raw_key); ++ raw_val.refcnt = cpu_to_le32(1); ++ ret = apfs_btree_insert(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val)); ++ if (ret) { ++ apfs_err(sb, "insertion failed for id 0x%llx", dstream->ds_id); ++ goto out; ++ } ++out: ++ apfs_free_query(query); ++ return ret; ++} ++#define APFS_CREATE_DSTREAM_REC_MAXOPS 1 ++ ++static int apfs_check_dstream_refcnt(struct inode *inode); ++static int apfs_put_dstream_rec(struct apfs_dstream_info *dstream); ++ ++/** ++ * apfs_inode_create_exclusive_dstream - Make an inode's dstream not shared ++ * @inode: the vfs inode ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++int apfs_inode_create_exclusive_dstream(struct inode *inode) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct apfs_superblock *vsb_raw = APFS_SB(sb)->s_vsb_raw; ++ struct apfs_inode_info *ai = APFS_I(inode); ++ struct apfs_dstream_info *dstream = &ai->i_dstream; ++ u64 new_id; ++ int err; ++ ++ if (!ai->i_has_dstream || !dstream->ds_shared) ++ return 0; ++ ++ /* ++ * The ds_shared field is not updated when the other user of the ++ * dstream puts it, so it could be a false positive. Check it again ++ * before actually putting the dstream. The double query is wasteful, ++ * but I don't know if it makes sense to optimize this (TODO). ++ */ ++ err = apfs_check_dstream_refcnt(inode); ++ if (err) { ++ apfs_err(sb, "failed to check refcnt for ino 0x%llx", apfs_ino(inode)); ++ return err; ++ } ++ if (!dstream->ds_shared) ++ return 0; ++ err = apfs_put_dstream_rec(dstream); ++ if (err) { ++ apfs_err(sb, "failed to put dstream for ino 0x%llx", apfs_ino(inode)); ++ return err; ++ } ++ ++ apfs_assert_in_transaction(sb, &vsb_raw->apfs_o); ++ new_id = le64_to_cpu(vsb_raw->apfs_next_obj_id); ++ le64_add_cpu(&vsb_raw->apfs_next_obj_id, 1); ++ ++ err = apfs_clone_extents(dstream, new_id); ++ if (err) { ++ apfs_err(sb, "failed clone extents for ino 0x%llx", apfs_ino(inode)); ++ return err; ++ } ++ ++ dstream->ds_id = new_id; ++ err = apfs_create_dstream_rec(dstream); ++ if (err) { ++ apfs_err(sb, "failed to create dstream for ino 0x%llx", apfs_ino(inode)); ++ return err; ++ } ++ ++ dstream->ds_shared = false; ++ return 0; ++} ++ ++/** ++ * apfs_inode_create_dstream_rec - Create the data stream record for an inode ++ * @inode: the vfs inode ++ * ++ * Does nothing if the record already exists. TODO: support cloned files. ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_inode_create_dstream_rec(struct inode *inode) ++{ ++ struct apfs_inode_info *ai = APFS_I(inode); ++ int err; ++ ++ if (ai->i_has_dstream) ++ return apfs_inode_create_exclusive_dstream(inode); ++ ++ err = apfs_create_dstream_rec(&ai->i_dstream); ++ if (err) ++ return err; ++ ++ ai->i_has_dstream = true; ++ return 0; ++} ++ ++/** ++ * apfs_dstream_adj_refcnt - Adjust dstream record refcount ++ * @dstream: data stream info ++ * @delta: desired change in reference count ++ * ++ * Deletes the record if the reference count goes to zero. Returns 0 on success ++ * or a negative error code in case of failure. ++ */ ++int apfs_dstream_adj_refcnt(struct apfs_dstream_info *dstream, u32 delta) ++{ ++ struct super_block *sb = dstream->ds_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_query *query; ++ struct apfs_dstream_id_val raw_val; ++ void *raw = NULL; ++ u32 refcnt; ++ int ret; ++ ++ ASSERT(APFS_I(dstream->ds_inode)->i_has_dstream); ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_dstream_id_key(dstream->ds_id, &query->key); ++ query->flags |= APFS_QUERY_CAT | APFS_QUERY_EXACT; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret) { ++ apfs_err(sb, "query failed for id 0x%llx", dstream->ds_id); ++ if (ret == -ENODATA) ++ ret = -EFSCORRUPTED; ++ goto out; ++ } ++ ++ if (query->len != sizeof(raw_val)) { ++ apfs_err(sb, "bad value length (%d)", query->len); ++ ret = -EFSCORRUPTED; ++ goto out; ++ } ++ raw = query->node->object.data; ++ raw_val = *(struct apfs_dstream_id_val *)(raw + query->off); ++ refcnt = le32_to_cpu(raw_val.refcnt); ++ ++ refcnt += delta; ++ if (refcnt == 0) { ++ ret = apfs_btree_remove(query); ++ if (ret) ++ apfs_err(sb, "removal failed for id 0x%llx", dstream->ds_id); ++ goto out; ++ } ++ ++ raw_val.refcnt = cpu_to_le32(refcnt); ++ ret = apfs_btree_replace(query, NULL /* key */, 0 /* key_len */, &raw_val, sizeof(raw_val)); ++ if (ret) ++ apfs_err(sb, "update failed for id 0x%llx", dstream->ds_id); ++out: ++ apfs_free_query(query); ++ return ret; ++} ++ ++/** ++ * apfs_put_dstream_rec - Put a reference for a data stream record ++ * @dstream: data stream info ++ * ++ * Deletes the record if the reference count goes to zero. Returns 0 on success ++ * or a negative error code in case of failure. ++ */ ++static int apfs_put_dstream_rec(struct apfs_dstream_info *dstream) ++{ ++ struct apfs_inode_info *ai = APFS_I(dstream->ds_inode); ++ ++ if (!ai->i_has_dstream) ++ return 0; ++ return apfs_dstream_adj_refcnt(dstream, -1); ++} ++ ++/** ++ * apfs_create_crypto_rec - Create the crypto state record for an inode ++ * @inode: the vfs inode ++ * ++ * Does nothing if the record already exists. TODO: support cloned files. ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_create_crypto_rec(struct inode *inode) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_dstream_info *dstream = &APFS_I(inode)->i_dstream; ++ struct apfs_query *query; ++ struct apfs_crypto_state_key raw_key; ++ int ret; ++ ++ if (inode->i_size || inode->i_blocks) /* Already has a dstream */ ++ return 0; ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_crypto_state_key(dstream->ds_id, &query->key); ++ query->flags |= APFS_QUERY_CAT | APFS_QUERY_EXACT; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret != -ENODATA) /* Either an error, or the record already exists */ ++ goto out; ++ ++ apfs_key_set_hdr(APFS_TYPE_CRYPTO_STATE, dstream->ds_id, &raw_key); ++ if (sbi->s_dflt_pfk) { ++ struct apfs_crypto_state_val *raw_val = sbi->s_dflt_pfk; ++ unsigned int key_len = le16_to_cpu(raw_val->state.key_len); ++ ++ ret = apfs_btree_insert(query, &raw_key, sizeof(raw_key), raw_val, sizeof(*raw_val) + key_len); ++ if (ret) ++ apfs_err(sb, "insertion failed for id 0x%llx", dstream->ds_id); ++ } else { ++ struct apfs_crypto_state_val raw_val; ++ ++ raw_val.refcnt = cpu_to_le32(1); ++ raw_val.state.major_version = cpu_to_le16(APFS_WMCS_MAJOR_VERSION); ++ raw_val.state.minor_version = cpu_to_le16(APFS_WMCS_MINOR_VERSION); ++ raw_val.state.cpflags = 0; ++ raw_val.state.persistent_class = cpu_to_le32(APFS_PROTECTION_CLASS_F); ++ raw_val.state.key_os_version = 0; ++ raw_val.state.key_revision = cpu_to_le16(1); ++ raw_val.state.key_len = cpu_to_le16(0); ++ ret = apfs_btree_insert(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val)); ++ if (ret) ++ apfs_err(sb, "insertion failed for id 0x%llx", dstream->ds_id); ++ } ++out: ++ apfs_free_query(query); ++ return ret; ++} ++#define APFS_CREATE_CRYPTO_REC_MAXOPS 1 ++ ++/** ++ * apfs_dflt_key_class - Returns default key class for files in volume ++ * @sb: volume superblock ++ */ ++static unsigned int apfs_dflt_key_class(struct super_block *sb) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ ++ if (!sbi->s_dflt_pfk) ++ return APFS_PROTECTION_CLASS_F; ++ ++ return le32_to_cpu(sbi->s_dflt_pfk->state.persistent_class); ++} ++ ++/** ++ * apfs_create_crypto_rec - Adjust crypto state record refcount ++ * @sb: volume superblock ++ * @crypto_id: crypto_id to adjust ++ * @delta: desired change in reference count ++ * ++ * This function is used when adding or removing extents, as each extent holds ++ * a reference to the crypto ID. It should also be used when removing inodes, ++ * and in that case it should also remove the crypto record (TODO). ++ */ ++int apfs_crypto_adj_refcnt(struct super_block *sb, u64 crypto_id, int delta) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_query *query; ++ struct apfs_crypto_state_val *raw_val; ++ char *raw; ++ int ret; ++ ++ if (!crypto_id) ++ return 0; ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_crypto_state_key(crypto_id, &query->key); ++ query->flags |= APFS_QUERY_CAT | APFS_QUERY_EXACT; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret) { ++ apfs_err(sb, "query failed for id 0x%llx", crypto_id); ++ goto out; ++ } ++ ++ ret = apfs_query_join_transaction(query); ++ if (ret) { ++ apfs_err(sb, "query join failed"); ++ return ret; ++ } ++ raw = query->node->object.data; ++ raw_val = (void *)raw + query->off; ++ ++ le32_add_cpu(&raw_val->refcnt, delta); ++ ++out: ++ apfs_free_query(query); ++ return ret; ++} ++int APFS_CRYPTO_ADJ_REFCNT_MAXOPS(void) ++{ ++ return 1; ++} ++ ++/** ++ * apfs_crypto_set_key - Modify content of crypto state record ++ * @sb: volume superblock ++ * @crypto_id: crypto_id to modify ++ * @new_val: new crypto state data; new_val->refcnt is overridden ++ * ++ * This function does not alter the inode's default protection class field. ++ * It needs to be done separately if the class changes. ++ */ ++static int apfs_crypto_set_key(struct super_block *sb, u64 crypto_id, struct apfs_crypto_state_val *new_val) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_query *query; ++ struct apfs_crypto_state_val *raw_val; ++ char *raw; ++ int ret; ++ unsigned int pfk_len; ++ ++ if (!crypto_id) ++ return 0; ++ ++ pfk_len = le16_to_cpu(new_val->state.key_len); ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_crypto_state_key(crypto_id, &query->key); ++ query->flags |= APFS_QUERY_CAT | APFS_QUERY_EXACT; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret) { ++ apfs_err(sb, "query failed for id 0x%llx", crypto_id); ++ goto out; ++ } ++ raw = query->node->object.data; ++ raw_val = (void *)raw + query->off; ++ ++ new_val->refcnt = raw_val->refcnt; ++ ++ ret = apfs_btree_replace(query, NULL /* key */, 0 /* key_len */, new_val, sizeof(*new_val) + pfk_len); ++ if (ret) ++ apfs_err(sb, "update failed for id 0x%llx", crypto_id); ++ ++out: ++ apfs_free_query(query); ++ return ret; ++} ++#define APFS_CRYPTO_SET_KEY_MAXOPS 1 ++ ++/** ++ * apfs_crypto_get_key - Retrieve content of crypto state record ++ * @sb: volume superblock ++ * @crypto_id: crypto_id to modify ++ * @val: result crypto state data ++ * @max_len: maximum allowed value of val->state.key_len ++ */ ++static int apfs_crypto_get_key(struct super_block *sb, u64 crypto_id, struct apfs_crypto_state_val *val, ++ unsigned int max_len) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_query *query; ++ struct apfs_crypto_state_val *raw_val; ++ char *raw; ++ int ret; ++ unsigned int pfk_len; ++ ++ if (!crypto_id) ++ return -ENOENT; ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_crypto_state_key(crypto_id, &query->key); ++ query->flags |= APFS_QUERY_CAT | APFS_QUERY_EXACT; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret) ++ goto out; ++ raw = query->node->object.data; ++ raw_val = (void *)raw + query->off; ++ ++ pfk_len = le16_to_cpu(raw_val->state.key_len); ++ if (pfk_len > max_len) { ++ ret = -ENOSPC; ++ goto out; ++ } ++ ++ memcpy(val, raw_val, sizeof(*val) + pfk_len); ++ ++out: ++ apfs_free_query(query); ++ return ret; ++} ++ ++int __apfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned int len, unsigned int flags, struct page **pagep, void **fsdata) ++{ ++ struct inode *inode = mapping->host; ++ struct apfs_dstream_info *dstream = &APFS_I(inode)->i_dstream; ++ struct super_block *sb = inode->i_sb; ++ struct page *page; ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 7, 0) ++ struct folio *folio; ++#endif ++ struct buffer_head *bh, *head; ++ unsigned int blocksize, block_start, block_end, from, to; ++ pgoff_t index = pos >> PAGE_SHIFT; ++ sector_t iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits); ++ loff_t i_blks_end; ++ int err; ++ ++ apfs_inode_join_transaction(sb, inode); ++ ++ err = apfs_inode_create_dstream_rec(inode); ++ if (err) { ++ apfs_err(sb, "failed to create dstream for ino 0x%llx", apfs_ino(inode)); ++ return err; ++ } ++ ++ if (apfs_vol_is_encrypted(sb)) { ++ err = apfs_create_crypto_rec(inode); ++ if (err) { ++ apfs_err(sb, "crypto creation failed for ino 0x%llx", apfs_ino(inode)); ++ return err; ++ } ++ } ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 19, 0) ++ flags = memalloc_nofs_save(); ++ page = grab_cache_page_write_begin(mapping, index); ++ memalloc_nofs_restore(flags); ++#else ++ page = grab_cache_page_write_begin(mapping, index, flags | AOP_FLAG_NOFS); ++#endif ++ if (!page) ++ return -ENOMEM; ++ if (!page_has_buffers(page)) { ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 7, 0) ++ create_empty_buffers(page, sb->s_blocksize, 0); ++#else ++ folio = page_folio(page); ++ bh = folio_buffers(folio); ++ if (!bh) ++ bh = create_empty_buffers(folio, sb->s_blocksize, 0); ++#endif ++ } ++ ++ /* CoW moves existing blocks, so read them but mark them as unmapped */ ++ head = page_buffers(page); ++ blocksize = head->b_size; ++ i_blks_end = (inode->i_size + sb->s_blocksize - 1) >> inode->i_blkbits; ++ i_blks_end <<= inode->i_blkbits; ++ if (i_blks_end >= pos) { ++ from = pos & (PAGE_SIZE - 1); ++ to = from + min(i_blks_end - pos, (loff_t)len); ++ } else { ++ /* TODO: deal with preallocated tail blocks */ ++ from = UINT_MAX; ++ to = 0; ++ } ++ for (bh = head, block_start = 0; bh != head || !block_start; ++ block_start = block_end, bh = bh->b_this_page, ++iblock) { ++ block_end = block_start + blocksize; ++ if (to > block_start && from < block_end) { ++ if (buffer_trans(bh)) ++ continue; ++ if (!buffer_mapped(bh)) { ++ err = __apfs_get_block(dstream, iblock, bh, ++ false /* create */); ++ if (err) { ++ apfs_err(sb, "failed to map block for ino 0x%llx", apfs_ino(inode)); ++ goto out_put_page; ++ } ++ } ++ if (buffer_mapped(bh) && !buffer_uptodate(bh)) { ++ get_bh(bh); ++ lock_buffer(bh); ++ bh->b_end_io = end_buffer_read_sync; ++ apfs_submit_bh(REQ_OP_READ, 0, bh); ++ wait_on_buffer(bh); ++ if (!buffer_uptodate(bh)) { ++ apfs_err(sb, "failed to read block for ino 0x%llx", apfs_ino(inode)); ++ err = -EIO; ++ goto out_put_page; ++ } ++ } ++ clear_buffer_mapped(bh); ++ } ++ } ++ ++ err = __block_write_begin(page, pos, len, apfs_get_new_block); ++ if (err) { ++ apfs_err(sb, "CoW failed in inode 0x%llx", apfs_ino(inode)); ++ goto out_put_page; ++ } ++ ++ *pagep = page; ++ return 0; ++ ++out_put_page: ++ unlock_page(page); ++ put_page(page); ++ return err; ++} ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 19, 0) ++static int apfs_write_begin(struct file *file, struct address_space *mapping, ++ loff_t pos, unsigned int len, ++ struct page **pagep, void **fsdata) ++#else ++static int apfs_write_begin(struct file *file, struct address_space *mapping, ++ loff_t pos, unsigned int len, unsigned int flags, ++ struct page **pagep, void **fsdata) ++#endif ++{ ++ struct inode *inode = mapping->host; ++ struct super_block *sb = inode->i_sb; ++ int blkcount = (len + sb->s_blocksize - 1) >> inode->i_blkbits; ++ struct apfs_max_ops maxops; ++ int err; ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 19, 0) ++ unsigned int flags = 0; ++#endif ++ ++ if (unlikely(pos >= APFS_MAX_FILE_SIZE)) ++ return -EFBIG; ++ ++ maxops.cat = APFS_CREATE_DSTREAM_REC_MAXOPS + ++ APFS_CREATE_CRYPTO_REC_MAXOPS + ++ APFS_UPDATE_INODE_MAXOPS() + ++ blkcount * APFS_GET_NEW_BLOCK_MAXOPS(); ++ maxops.blks = blkcount; ++ ++ err = apfs_transaction_start(sb, maxops); ++ if (err) ++ return err; ++ ++ err = __apfs_write_begin(file, mapping, pos, len, flags, pagep, fsdata); ++ if (err) ++ goto fail; ++ return 0; ++ ++fail: ++ apfs_transaction_abort(sb); ++ return err; ++} ++ ++int __apfs_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned int len, unsigned int copied, struct page *page, void *fsdata) ++{ ++ struct inode *inode = mapping->host; ++ struct apfs_dstream_info *dstream = &APFS_I(inode)->i_dstream; ++ int ret, err; ++ ++ ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); ++ dstream->ds_size = i_size_read(inode); ++ if (ret < len && pos + len > inode->i_size) { ++ truncate_pagecache(inode, inode->i_size); ++ err = apfs_truncate(dstream, inode->i_size); ++ if (err) { ++ apfs_err(inode->i_sb, "truncation failed for ino 0x%llx", apfs_ino(inode)); ++ return err; ++ } ++ } ++ return ret; ++} ++ ++static int apfs_write_end(struct file *file, struct address_space *mapping, ++ loff_t pos, unsigned int len, unsigned int copied, ++ struct page *page, void *fsdata) ++{ ++ struct inode *inode = mapping->host; ++ struct super_block *sb = inode->i_sb; ++ struct apfs_nx_transaction *trans = &APFS_NXI(sb)->nx_transaction; ++ int ret, err; ++ ++ ret = __apfs_write_end(file, mapping, pos, len, copied, page, fsdata); ++ if (ret < 0) { ++ err = ret; ++ goto fail; ++ } ++ ++ if ((pos + ret) & (sb->s_blocksize - 1)) ++ trans->t_state |= APFS_NX_TRANS_INCOMPLETE_BLOCK; ++ else ++ trans->t_state &= ~APFS_NX_TRANS_INCOMPLETE_BLOCK; ++ ++ err = apfs_transaction_commit(sb); ++ if (!err) ++ return ret; ++ ++fail: ++ apfs_transaction_abort(sb); ++ return err; ++} ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 18, 0) ++static void apfs_noop_invalidatepage(struct page *page, unsigned int offset, unsigned int length) ++#else ++static void apfs_noop_invalidate_folio(struct folio *folio, size_t offset, size_t length) ++#endif ++{ ++} ++ ++/* bmap is not implemented to avoid issues with CoW on swapfiles */ ++static const struct address_space_operations apfs_aops = { ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 18, 0) ++ .dirty_folio = block_dirty_folio, ++#elif LINUX_VERSION_CODE >= KERNEL_VERSION(5, 14, 0) ++ .set_page_dirty = __set_page_dirty_buffers, ++#endif ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 19, 0) ++ .read_folio = apfs_read_folio, ++#else ++ .readpage = apfs_readpage, ++#endif ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0) ++ .readahead = apfs_readahead, ++#else ++ .readpages = apfs_readpages, ++#endif ++ ++ .write_begin = apfs_write_begin, ++ .write_end = apfs_write_end, ++ ++ /* The intention is to keep bhs around until the transaction is over */ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 18, 0) ++ .invalidatepage = apfs_noop_invalidatepage, ++#else ++ .invalidate_folio = apfs_noop_invalidate_folio, ++#endif ++}; ++ ++/** ++ * apfs_inode_set_ops - Set up an inode's operations ++ * @inode: vfs inode to set up ++ * @rdev: device id (0 if not a device file) ++ * @compressed: is this a compressed inode? ++ * ++ * For device files, also sets the device id to @rdev. ++ */ ++static void apfs_inode_set_ops(struct inode *inode, dev_t rdev, bool compressed) ++{ ++ /* A lot of operations still missing, of course */ ++ switch (inode->i_mode & S_IFMT) { ++ case S_IFREG: ++ inode->i_op = &apfs_file_inode_operations; ++ if (compressed) { ++ inode->i_fop = &apfs_compress_file_operations; ++ inode->i_mapping->a_ops = &apfs_compress_aops; ++ } else { ++ inode->i_fop = &apfs_file_operations; ++ inode->i_mapping->a_ops = &apfs_aops; ++ } ++ break; ++ case S_IFDIR: ++ inode->i_op = &apfs_dir_inode_operations; ++ inode->i_fop = &apfs_dir_operations; ++ break; ++ case S_IFLNK: ++ inode->i_op = &apfs_symlink_inode_operations; ++ break; ++ default: ++ inode->i_op = &apfs_special_inode_operations; ++ init_special_inode(inode, inode->i_mode, rdev); ++ break; ++ } ++} ++ ++/** ++ * apfs_inode_from_query - Read the inode found by a successful query ++ * @query: the query that found the record ++ * @inode: vfs inode to be filled with the read data ++ * ++ * Reads the inode record into @inode and performs some basic sanity checks, ++ * mostly as a protection against crafted filesystems. Returns 0 on success ++ * or a negative error code otherwise. ++ */ ++static int apfs_inode_from_query(struct apfs_query *query, struct inode *inode) ++{ ++ struct apfs_inode_info *ai = APFS_I(inode); ++ struct apfs_dstream_info *dstream = &ai->i_dstream; ++ struct apfs_inode_val *inode_val; ++ char *raw = query->node->object.data; ++ char *xval = NULL; ++ int xlen; ++ u32 rdev = 0, bsd_flags; ++ bool compressed = false; ++ ++ if (query->len < sizeof(*inode_val)) ++ goto corrupted; ++ ++ inode_val = (struct apfs_inode_val *)(raw + query->off); ++ ++ ai->i_parent_id = le64_to_cpu(inode_val->parent_id); ++ dstream->ds_id = le64_to_cpu(inode_val->private_id); ++ inode->i_mode = le16_to_cpu(inode_val->mode); ++ ai->i_key_class = le32_to_cpu(inode_val->default_protection_class); ++ ai->i_int_flags = le64_to_cpu(inode_val->internal_flags); ++ ++ ai->i_saved_uid = le32_to_cpu(inode_val->owner); ++ i_uid_write(inode, ai->i_saved_uid); ++ ai->i_saved_gid = le32_to_cpu(inode_val->group); ++ i_gid_write(inode, ai->i_saved_gid); ++ ++ ai->i_bsd_flags = bsd_flags = le32_to_cpu(inode_val->bsd_flags); ++ if (bsd_flags & APFS_INOBSD_IMMUTABLE) ++ inode->i_flags |= S_IMMUTABLE; ++ if (bsd_flags & APFS_INOBSD_APPEND) ++ inode->i_flags |= S_APPEND; ++ ++ if (!S_ISDIR(inode->i_mode)) { ++ /* ++ * Directory inodes don't store their link count, so to provide ++ * it we would have to actually count the subdirectories. The ++ * HFS/HFS+ modules just leave it at 1, and so do we, for now. ++ */ ++ set_nlink(inode, le32_to_cpu(inode_val->nlink)); ++ } else { ++ ai->i_nchildren = le32_to_cpu(inode_val->nchildren); ++ } ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0) ++ inode->i_ctime = ns_to_timespec64(le64_to_cpu(inode_val->change_time)); ++#else ++ inode_set_ctime_to_ts(inode, ns_to_timespec64(le64_to_cpu(inode_val->change_time))); ++#endif ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 7, 0) ++ inode->i_atime = ns_to_timespec64(le64_to_cpu(inode_val->access_time)); ++ inode->i_mtime = ns_to_timespec64(le64_to_cpu(inode_val->mod_time)); ++#else ++ inode_set_atime_to_ts(inode, ns_to_timespec64(le64_to_cpu(inode_val->access_time))); ++ inode_set_mtime_to_ts(inode, ns_to_timespec64(le64_to_cpu(inode_val->mod_time))); ++#endif ++ ai->i_crtime = ns_to_timespec64(le64_to_cpu(inode_val->create_time)); ++ ++ dstream->ds_size = inode->i_size = inode->i_blocks = 0; ++ ai->i_has_dstream = false; ++ if ((bsd_flags & APFS_INOBSD_COMPRESSED) && !S_ISDIR(inode->i_mode)) { ++ if (!apfs_compress_get_size(inode, &inode->i_size)) { ++ inode->i_blocks = (inode->i_size + 511) >> 9; ++ compressed = true; ++ } ++ } else { ++ xlen = apfs_find_xfield(inode_val->xfields, ++ query->len - sizeof(*inode_val), ++ APFS_INO_EXT_TYPE_DSTREAM, &xval); ++ if (xlen >= sizeof(struct apfs_dstream)) { ++ struct apfs_dstream *dstream_raw = (struct apfs_dstream *)xval; ++ ++ dstream->ds_size = inode->i_size = le64_to_cpu(dstream_raw->size); ++ inode->i_blocks = le64_to_cpu(dstream_raw->alloced_size) >> 9; ++ ai->i_has_dstream = true; ++ } ++ } ++ xval = NULL; ++ ++ /* TODO: move each xfield read to its own function */ ++ dstream->ds_sparse_bytes = 0; ++ xlen = apfs_find_xfield(inode_val->xfields, query->len - sizeof(*inode_val), APFS_INO_EXT_TYPE_SPARSE_BYTES, &xval); ++ if (xlen >= sizeof(__le64)) { ++ __le64 *sparse_bytes_p = (__le64 *)xval; ++ ++ dstream->ds_sparse_bytes = le64_to_cpup(sparse_bytes_p); ++ } ++ xval = NULL; ++ ++ rdev = 0; ++ xlen = apfs_find_xfield(inode_val->xfields, ++ query->len - sizeof(*inode_val), ++ APFS_INO_EXT_TYPE_RDEV, &xval); ++ if (xlen >= sizeof(__le32)) { ++ __le32 *rdev_p = (__le32 *)xval; ++ ++ rdev = le32_to_cpup(rdev_p); ++ } ++ ++ apfs_inode_set_ops(inode, rdev, compressed); ++ return 0; ++ ++corrupted: ++ apfs_err(inode->i_sb, "bad inode record for inode 0x%llx", apfs_ino(inode)); ++ return -EFSCORRUPTED; ++} ++ ++/** ++ * apfs_inode_lookup - Lookup an inode record in the catalog b-tree ++ * @inode: vfs inode to lookup ++ * ++ * Runs a catalog query for the apfs_ino(@inode) inode record; returns a pointer ++ * to the query structure on success, or an error pointer in case of failure. ++ */ ++static struct apfs_query *apfs_inode_lookup(const struct inode *inode) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_query *query; ++ int ret; ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) ++ return ERR_PTR(-ENOMEM); ++ apfs_init_inode_key(apfs_ino(inode), &query->key); ++ query->flags |= APFS_QUERY_CAT | APFS_QUERY_EXACT; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (!ret) ++ return query; ++ ++ /* Don't complain if an orphan is already gone */ ++ if (!current_work() || ret != -ENODATA) ++ apfs_err(sb, "query failed for id 0x%llx", apfs_ino(inode)); ++ apfs_free_query(query); ++ return ERR_PTR(ret); ++} ++ ++/** ++ * apfs_test_inode - Check if the inode matches a 64-bit inode number ++ * @inode: inode to test ++ * @cnid: pointer to the inode number ++ */ ++static int apfs_test_inode(struct inode *inode, void *cnid) ++{ ++ u64 *ino = cnid; ++ ++ return apfs_ino(inode) == *ino; ++} ++ ++/** ++ * apfs_set_inode - Set a 64-bit inode number on the given inode ++ * @inode: inode to set ++ * @cnid: pointer to the inode number ++ */ ++static int apfs_set_inode(struct inode *inode, void *cnid) ++{ ++ apfs_set_ino(inode, *(u64 *)cnid); ++ return 0; ++} ++ ++/** ++ * apfs_iget_locked - Wrapper for iget5_locked() ++ * @sb: filesystem superblock ++ * @cnid: 64-bit inode number ++ * ++ * Works the same as iget_locked(), but can handle 64-bit inode numbers on ++ * 32-bit architectures. ++ */ ++static struct inode *apfs_iget_locked(struct super_block *sb, u64 cnid) ++{ ++ return iget5_locked(sb, cnid, apfs_test_inode, apfs_set_inode, &cnid); ++} ++ ++/** ++ * apfs_check_dstream_refcnt - Check if an inode's dstream is shared ++ * @inode: the inode to check ++ * ++ * Sets the value of ds_shared for the inode's dstream. Returns 0 on success, ++ * or a negative error code in case of failure. ++ */ ++static int apfs_check_dstream_refcnt(struct inode *inode) ++{ ++ struct apfs_inode_info *ai = APFS_I(inode); ++ struct apfs_dstream_info *dstream = &ai->i_dstream; ++ struct super_block *sb = inode->i_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_query *query = NULL; ++ struct apfs_dstream_id_val raw_val; ++ void *raw = NULL; ++ u32 refcnt; ++ int ret; ++ ++ if (!ai->i_has_dstream) { ++ dstream->ds_shared = false; ++ return 0; ++ } ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_dstream_id_key(dstream->ds_id, &query->key); ++ query->flags |= APFS_QUERY_CAT | APFS_QUERY_EXACT; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret) { ++ apfs_err(sb, "query failed for id 0x%llx", dstream->ds_id); ++ if (ret == -ENODATA) ++ ret = -EFSCORRUPTED; ++ goto fail; ++ } ++ ++ if (query->len != sizeof(raw_val)) { ++ ret = -EFSCORRUPTED; ++ goto fail; ++ } ++ raw = query->node->object.data; ++ raw_val = *(struct apfs_dstream_id_val *)(raw + query->off); ++ refcnt = le32_to_cpu(raw_val.refcnt); ++ ++ dstream->ds_shared = refcnt > 1; ++fail: ++ apfs_free_query(query); ++ return ret; ++} ++ ++/** ++ * apfs_iget - Populate inode structures with metadata from disk ++ * @sb: filesystem superblock ++ * @cnid: inode number ++ * ++ * Populates the vfs inode and the corresponding apfs_inode_info structure. ++ * Returns a pointer to the vfs inode in case of success, or an appropriate ++ * error pointer otherwise. ++ */ ++struct inode *apfs_iget(struct super_block *sb, u64 cnid) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct inode *inode; ++ struct apfs_query *query; ++ int err; ++ ++ inode = apfs_iget_locked(sb, cnid); ++ if (!inode) ++ return ERR_PTR(-ENOMEM); ++ if (!(inode->i_state & I_NEW)) ++ return inode; ++ ++ down_read(&nxi->nx_big_sem); ++ query = apfs_inode_lookup(inode); ++ if (IS_ERR(query)) { ++ err = PTR_ERR(query); ++ /* Don't complain if an orphan is already gone */ ++ if (!current_work() || err != -ENODATA) ++ apfs_err(sb, "lookup failed for ino 0x%llx", cnid); ++ goto fail; ++ } ++ err = apfs_inode_from_query(query, inode); ++ apfs_free_query(query); ++ if (err) ++ goto fail; ++ err = apfs_check_dstream_refcnt(inode); ++ if (err) { ++ apfs_err(sb, "refcnt check failed for ino 0x%llx", cnid); ++ goto fail; ++ } ++ up_read(&nxi->nx_big_sem); ++ ++ /* Allow the user to override the ownership */ ++ if (uid_valid(sbi->s_uid)) ++ inode->i_uid = sbi->s_uid; ++ if (gid_valid(sbi->s_gid)) ++ inode->i_gid = sbi->s_gid; ++ ++ /* Inode flags are not important for now, leave them at 0 */ ++ unlock_new_inode(inode); ++ return inode; ++ ++fail: ++ up_read(&nxi->nx_big_sem); ++ iget_failed(inode); ++ return ERR_PTR(err); ++} ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) /* No statx yet... */ ++ ++int apfs_getattr(struct vfsmount *mnt, struct dentry *dentry, ++ struct kstat *stat) ++{ ++ struct inode *inode = d_inode(dentry); ++ ++ generic_fillattr(inode, stat); ++ stat->dev = APFS_SB(inode->i_sb)->s_anon_dev; ++ stat->ino = apfs_ino(inode); ++ return 0; ++} ++ ++#else /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) */ ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0) ++int apfs_getattr(const struct path *path, struct kstat *stat, ++ u32 request_mask, unsigned int query_flags) ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 3, 0) ++int apfs_getattr(struct user_namespace *mnt_userns, ++ const struct path *path, struct kstat *stat, u32 request_mask, ++ unsigned int query_flags) ++#else ++int apfs_getattr(struct mnt_idmap *idmap, ++ const struct path *path, struct kstat *stat, u32 request_mask, ++ unsigned int query_flags) ++#endif ++{ ++ struct inode *inode = d_inode(path->dentry); ++ struct apfs_inode_info *ai = APFS_I(inode); ++ ++ stat->result_mask |= STATX_BTIME; ++ stat->btime = ai->i_crtime; ++ ++ if (ai->i_bsd_flags & APFS_INOBSD_APPEND) ++ stat->attributes |= STATX_ATTR_APPEND; ++ if (ai->i_bsd_flags & APFS_INOBSD_IMMUTABLE) ++ stat->attributes |= STATX_ATTR_IMMUTABLE; ++ if (ai->i_bsd_flags & APFS_INOBSD_NODUMP) ++ stat->attributes |= STATX_ATTR_NODUMP; ++ if (ai->i_bsd_flags & APFS_INOBSD_COMPRESSED) ++ stat->attributes |= STATX_ATTR_COMPRESSED; ++ ++ stat->attributes_mask |= STATX_ATTR_APPEND; ++ stat->attributes_mask |= STATX_ATTR_IMMUTABLE; ++ stat->attributes_mask |= STATX_ATTR_NODUMP; ++ stat->attributes_mask |= STATX_ATTR_COMPRESSED; ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0) ++ generic_fillattr(inode, stat); ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 3, 0) ++ generic_fillattr(mnt_userns, inode, stat); ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0) ++ generic_fillattr(idmap, inode, stat); ++#else ++ generic_fillattr(idmap, request_mask, inode, stat); ++#endif ++ ++ stat->dev = APFS_SB(inode->i_sb)->s_anon_dev; ++ stat->ino = apfs_ino(inode); ++ return 0; ++} ++ ++#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) */ ++ ++/** ++ * apfs_build_inode_val - Allocate and initialize the value for an inode record ++ * @inode: vfs inode to record ++ * @qname: filename for primary link ++ * @val_p: on return, a pointer to the new on-disk value structure ++ * ++ * Returns the length of the value, or a negative error code in case of failure. ++ */ ++static int apfs_build_inode_val(struct inode *inode, struct qstr *qname, ++ struct apfs_inode_val **val_p) ++{ ++ struct apfs_inode_val *val; ++ struct apfs_x_field xkey; ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 7, 0) ++ struct timespec64 ts; ++#endif ++ int total_xlen, val_len; ++ bool is_device = S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode); ++ __le32 rdev; ++ ++ /* The only required xfield is the name, and the id if it's a device */ ++ total_xlen = sizeof(struct apfs_xf_blob); ++ total_xlen += sizeof(xkey) + round_up(qname->len + 1, 8); ++ if (is_device) ++ total_xlen += sizeof(xkey) + round_up(sizeof(rdev), 8); ++ ++ val_len = sizeof(*val) + total_xlen; ++ val = kzalloc(val_len, GFP_KERNEL); ++ if (!val) ++ return -ENOMEM; ++ ++ val->parent_id = cpu_to_le64(APFS_I(inode)->i_parent_id); ++ val->private_id = cpu_to_le64(apfs_ino(inode)); ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 7, 0) ++ val->mod_time = cpu_to_le64(timespec64_to_ns(&inode->i_mtime)); ++#else ++ ts = inode_get_mtime(inode); ++ val->mod_time = cpu_to_le64(timespec64_to_ns(&ts)); ++#endif ++ val->create_time = val->change_time = val->access_time = val->mod_time; ++ ++ if (S_ISDIR(inode->i_mode)) ++ val->nchildren = 0; ++ else ++ val->nlink = cpu_to_le32(1); ++ ++ val->owner = cpu_to_le32(i_uid_read(inode)); ++ val->group = cpu_to_le32(i_gid_read(inode)); ++ val->mode = cpu_to_le16(inode->i_mode); ++ ++ /* The buffer was just allocated: none of these functions should fail */ ++ apfs_init_xfields(val->xfields, total_xlen); ++ xkey.x_type = APFS_INO_EXT_TYPE_NAME; ++ xkey.x_flags = APFS_XF_DO_NOT_COPY; ++ xkey.x_size = cpu_to_le16(qname->len + 1); ++ apfs_insert_xfield(val->xfields, total_xlen, &xkey, qname->name); ++ if (is_device) { ++ rdev = cpu_to_le32(inode->i_rdev); ++ xkey.x_type = APFS_INO_EXT_TYPE_RDEV; ++ xkey.x_flags = 0; /* TODO: proper flags here? */ ++ xkey.x_size = cpu_to_le16(sizeof(rdev)); ++ apfs_insert_xfield(val->xfields, total_xlen, &xkey, &rdev); ++ } ++ ++ *val_p = val; ++ return val_len; ++} ++ ++/* ++ * apfs_inode_rename - Update the primary name reported in an inode record ++ * @inode: the in-memory inode ++ * @new_name: name of the new primary link (NULL if unchanged) ++ * @query: the query that found the inode record ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++static int apfs_inode_rename(struct inode *inode, char *new_name, ++ struct apfs_query *query) ++{ ++ char *raw = query->node->object.data; ++ struct apfs_inode_val *new_val = NULL; ++ int buflen, namelen; ++ struct apfs_x_field xkey; ++ int xlen; ++ int err; ++ ++ if (!new_name) ++ return 0; ++ ++ namelen = strlen(new_name) + 1; /* Count the null-termination */ ++ buflen = query->len; ++ buflen += sizeof(struct apfs_x_field) + round_up(namelen, 8); ++ new_val = kzalloc(buflen, GFP_KERNEL); ++ if (!new_val) ++ return -ENOMEM; ++ memcpy(new_val, raw + query->off, query->len); ++ ++ /* TODO: can we assume that all inode records have an xfield blob? */ ++ xkey.x_type = APFS_INO_EXT_TYPE_NAME; ++ xkey.x_flags = APFS_XF_DO_NOT_COPY; ++ xkey.x_size = cpu_to_le16(namelen); ++ xlen = apfs_insert_xfield(new_val->xfields, buflen - sizeof(*new_val), ++ &xkey, new_name); ++ if (!xlen) { ++ /* Buffer has enough space, but the metadata claims otherwise */ ++ apfs_err(inode->i_sb, "bad xfields on inode 0x%llx", apfs_ino(inode)); ++ err = -EFSCORRUPTED; ++ goto fail; ++ } ++ ++ /* Just remove the old record and create a new one */ ++ err = apfs_btree_replace(query, NULL /* key */, 0 /* key_len */, new_val, sizeof(*new_val) + xlen); ++ if (err) ++ apfs_err(inode->i_sb, "update failed for ino 0x%llx", apfs_ino(inode)); ++ ++fail: ++ kfree(new_val); ++ return err; ++} ++#define APFS_INODE_RENAME_MAXOPS 1 ++ ++/** ++ * apfs_create_dstream_xfield - Create the inode xfield for a new data stream ++ * @inode: the in-memory inode ++ * @query: the query that found the inode record ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++static int apfs_create_dstream_xfield(struct inode *inode, ++ struct apfs_query *query) ++{ ++ char *raw = query->node->object.data; ++ struct apfs_inode_val *new_val; ++ struct apfs_dstream dstream_raw = {0}; ++ struct apfs_x_field xkey; ++ struct apfs_dstream_info *dstream = &APFS_I(inode)->i_dstream; ++ int xlen; ++ int buflen; ++ int err; ++ ++ buflen = query->len; ++ buflen += sizeof(struct apfs_x_field) + sizeof(dstream_raw); ++ new_val = kzalloc(buflen, GFP_KERNEL); ++ if (!new_val) ++ return -ENOMEM; ++ memcpy(new_val, raw + query->off, query->len); ++ ++ dstream_raw.size = cpu_to_le64(inode->i_size); ++ dstream_raw.alloced_size = cpu_to_le64(apfs_alloced_size(dstream)); ++ if (apfs_vol_is_encrypted(inode->i_sb)) ++ dstream_raw.default_crypto_id = cpu_to_le64(dstream->ds_id); ++ ++ /* TODO: can we assume that all inode records have an xfield blob? */ ++ xkey.x_type = APFS_INO_EXT_TYPE_DSTREAM; ++ xkey.x_flags = APFS_XF_SYSTEM_FIELD; ++ xkey.x_size = cpu_to_le16(sizeof(dstream_raw)); ++ xlen = apfs_insert_xfield(new_val->xfields, buflen - sizeof(*new_val), ++ &xkey, &dstream_raw); ++ if (!xlen) { ++ /* Buffer has enough space, but the metadata claims otherwise */ ++ apfs_err(inode->i_sb, "bad xfields on inode 0x%llx", apfs_ino(inode)); ++ err = -EFSCORRUPTED; ++ goto fail; ++ } ++ ++ /* Just remove the old record and create a new one */ ++ err = apfs_btree_replace(query, NULL /* key */, 0 /* key_len */, new_val, sizeof(*new_val) + xlen); ++ if (err) ++ apfs_err(inode->i_sb, "update failed for ino 0x%llx", apfs_ino(inode)); ++ ++fail: ++ kfree(new_val); ++ return err; ++} ++#define APFS_CREATE_DSTREAM_XFIELD_MAXOPS 1 ++ ++/** ++ * apfs_inode_resize - Update the sizes reported in an inode record ++ * @inode: the in-memory inode ++ * @query: the query that found the inode record ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++static int apfs_inode_resize(struct inode *inode, struct apfs_query *query) ++{ ++ struct apfs_inode_info *ai = APFS_I(inode); ++ char *raw; ++ struct apfs_inode_val *inode_raw; ++ char *xval; ++ int xlen; ++ int err; ++ ++ /* All dstream records must have a matching xfield, even if empty */ ++ if (!ai->i_has_dstream) ++ return 0; ++ ++ err = apfs_query_join_transaction(query); ++ if (err) { ++ apfs_err(inode->i_sb, "query join failed"); ++ return err; ++ } ++ raw = query->node->object.data; ++ inode_raw = (void *)raw + query->off; ++ ++ xlen = apfs_find_xfield(inode_raw->xfields, ++ query->len - sizeof(*inode_raw), ++ APFS_INO_EXT_TYPE_DSTREAM, &xval); ++ ++ if (xlen) { ++ struct apfs_dstream *dstream; ++ ++ if (xlen != sizeof(*dstream)) { ++ apfs_err(inode->i_sb, "bad xlen (%d) on inode 0x%llx", xlen, apfs_ino(inode)); ++ return -EFSCORRUPTED; ++ } ++ dstream = (struct apfs_dstream *)xval; ++ ++ /* TODO: count bytes read and written */ ++ dstream->size = cpu_to_le64(inode->i_size); ++ dstream->alloced_size = cpu_to_le64(apfs_alloced_size(&ai->i_dstream)); ++ return 0; ++ } ++ /* This inode has no dstream xfield, so we need to create it */ ++ return apfs_create_dstream_xfield(inode, query); ++} ++#define APFS_INODE_RESIZE_MAXOPS (1 + APFS_CREATE_DSTREAM_XFIELD_MAXOPS) ++ ++/** ++ * apfs_create_sparse_xfield - Create an inode xfield to count sparse bytes ++ * @inode: the in-memory inode ++ * @query: the query that found the inode record ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++static int apfs_create_sparse_xfield(struct inode *inode, struct apfs_query *query) ++{ ++ struct apfs_dstream_info *dstream = &APFS_I(inode)->i_dstream; ++ char *raw = query->node->object.data; ++ struct apfs_inode_val *new_val; ++ __le64 sparse_bytes; ++ struct apfs_x_field xkey; ++ int xlen; ++ int buflen; ++ int err; ++ ++ buflen = query->len; ++ buflen += sizeof(struct apfs_x_field) + sizeof(sparse_bytes); ++ new_val = kzalloc(buflen, GFP_KERNEL); ++ if (!new_val) ++ return -ENOMEM; ++ memcpy(new_val, raw + query->off, query->len); ++ ++ sparse_bytes = cpu_to_le64(dstream->ds_sparse_bytes); ++ ++ /* TODO: can we assume that all inode records have an xfield blob? */ ++ xkey.x_type = APFS_INO_EXT_TYPE_SPARSE_BYTES; ++ xkey.x_flags = APFS_XF_SYSTEM_FIELD | APFS_XF_CHILDREN_INHERIT; ++ xkey.x_size = cpu_to_le16(sizeof(sparse_bytes)); ++ xlen = apfs_insert_xfield(new_val->xfields, buflen - sizeof(*new_val), &xkey, &sparse_bytes); ++ if (!xlen) { ++ /* Buffer has enough space, but the metadata claims otherwise */ ++ apfs_err(inode->i_sb, "bad xfields on inode 0x%llx", apfs_ino(inode)); ++ err = -EFSCORRUPTED; ++ goto fail; ++ } ++ ++ /* Just remove the old record and create a new one */ ++ err = apfs_btree_replace(query, NULL /* key */, 0 /* key_len */, new_val, sizeof(*new_val) + xlen); ++ if (err) ++ apfs_err(inode->i_sb, "update failed for ino 0x%llx", apfs_ino(inode)); ++ ++fail: ++ kfree(new_val); ++ return err; ++} ++ ++/** ++ * apfs_inode_resize_sparse - Update sparse byte count reported in inode record ++ * @inode: the in-memory inode ++ * @query: the query that found the inode record ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ * ++ * TODO: should the xfield be removed if the count reaches 0? Should the inode ++ * flag change? ++ */ ++static int apfs_inode_resize_sparse(struct inode *inode, struct apfs_query *query) ++{ ++ struct apfs_dstream_info *dstream = &APFS_I(inode)->i_dstream; ++ char *raw; ++ struct apfs_inode_val *inode_raw; ++ char *xval; ++ int xlen; ++ int err; ++ ++ err = apfs_query_join_transaction(query); ++ if (err) { ++ apfs_err(inode->i_sb, "query join failed"); ++ return err; ++ } ++ raw = query->node->object.data; ++ inode_raw = (void *)raw + query->off; ++ ++ xlen = apfs_find_xfield(inode_raw->xfields, ++ query->len - sizeof(*inode_raw), ++ APFS_INO_EXT_TYPE_SPARSE_BYTES, &xval); ++ if (!xlen && !dstream->ds_sparse_bytes) ++ return 0; ++ ++ if (xlen) { ++ __le64 *sparse_bytes_p; ++ ++ if (xlen != sizeof(*sparse_bytes_p)) { ++ apfs_err(inode->i_sb, "bad xlen (%d) on inode 0x%llx", xlen, apfs_ino(inode)); ++ return -EFSCORRUPTED; ++ } ++ sparse_bytes_p = (__le64 *)xval; ++ ++ *sparse_bytes_p = cpu_to_le64(dstream->ds_sparse_bytes); ++ return 0; ++ } ++ return apfs_create_sparse_xfield(inode, query); ++} ++ ++/** ++ * apfs_update_inode - Update an existing inode record ++ * @inode: the modified in-memory inode ++ * @new_name: name of the new primary link (NULL if unchanged) ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++int apfs_update_inode(struct inode *inode, char *new_name) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_inode_info *ai = APFS_I(inode); ++ struct apfs_dstream_info *dstream = &ai->i_dstream; ++ struct apfs_query *query; ++ struct apfs_btree_node_phys *node_raw; ++ struct apfs_inode_val *inode_raw; ++ int err; ++ ++ err = apfs_flush_extent_cache(dstream); ++ if (err) { ++ apfs_err(sb, "extent cache flush failed for inode 0x%llx", apfs_ino(inode)); ++ return err; ++ } ++ ++ query = apfs_inode_lookup(inode); ++ if (IS_ERR(query)) { ++ apfs_err(sb, "lookup failed for ino 0x%llx", apfs_ino(inode)); ++ return PTR_ERR(query); ++ } ++ ++ /* TODO: copy the record to memory and make all xfield changes there */ ++ err = apfs_inode_rename(inode, new_name, query); ++ if (err) { ++ apfs_err(sb, "rename failed for ino 0x%llx", apfs_ino(inode)); ++ goto fail; ++ } ++ ++ err = apfs_inode_resize(inode, query); ++ if (err) { ++ apfs_err(sb, "resize failed for ino 0x%llx", apfs_ino(inode)); ++ goto fail; ++ } ++ ++ err = apfs_inode_resize_sparse(inode, query); ++ if (err) { ++ apfs_err(sb, "sparse resize failed for ino 0x%llx", apfs_ino(inode)); ++ goto fail; ++ } ++ if (dstream->ds_sparse_bytes) ++ ai->i_int_flags |= APFS_INODE_IS_SPARSE; ++ ++ /* TODO: just use apfs_btree_replace()? */ ++ err = apfs_query_join_transaction(query); ++ if (err) { ++ apfs_err(sb, "query join failed"); ++ goto fail; ++ } ++ node_raw = (void *)query->node->object.data; ++ apfs_assert_in_transaction(sb, &node_raw->btn_o); ++ inode_raw = (void *)node_raw + query->off; ++ ++ inode_raw->parent_id = cpu_to_le64(ai->i_parent_id); ++ inode_raw->private_id = cpu_to_le64(dstream->ds_id); ++ inode_raw->mode = cpu_to_le16(inode->i_mode); ++ inode_raw->owner = cpu_to_le32(i_uid_read(inode)); ++ inode_raw->group = cpu_to_le32(i_gid_read(inode)); ++ inode_raw->default_protection_class = cpu_to_le32(ai->i_key_class); ++ inode_raw->internal_flags = cpu_to_le64(ai->i_int_flags); ++ inode_raw->bsd_flags = cpu_to_le32(ai->i_bsd_flags); ++ ++ /* Don't persist the uid/gid provided by the user on mount */ ++ if (uid_valid(sbi->s_uid)) ++ inode_raw->owner = cpu_to_le32(ai->i_saved_uid); ++ if (gid_valid(sbi->s_gid)) ++ inode_raw->group = cpu_to_le32(ai->i_saved_gid); ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0) ++ inode_raw->change_time = cpu_to_le64(timespec64_to_ns(&inode->i_ctime)); ++#else ++ struct timespec64 ictime = inode_get_ctime(inode); ++ inode_raw->change_time = cpu_to_le64(timespec64_to_ns(&ictime)); ++#endif ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 7, 0) ++ inode_raw->access_time = cpu_to_le64(timespec64_to_ns(&inode->i_atime)); ++ inode_raw->mod_time = cpu_to_le64(timespec64_to_ns(&inode->i_mtime)); ++#else ++ struct timespec64 ts = inode_get_mtime(inode); ++ inode_raw->mod_time = cpu_to_le64(timespec64_to_ns(&ts)); ++ ts = inode_get_atime(inode); ++ inode_raw->access_time = cpu_to_le64(timespec64_to_ns(&ts)); ++#endif ++ inode_raw->create_time = cpu_to_le64(timespec64_to_ns(&ai->i_crtime)); ++ ++ if (S_ISDIR(inode->i_mode)) { ++ inode_raw->nchildren = cpu_to_le32(ai->i_nchildren); ++ } else { ++ /* The remaining link for orphan inodes is not counted */ ++ inode_raw->nlink = cpu_to_le32(inode->i_nlink); ++ } ++ ++fail: ++ apfs_free_query(query); ++ return err; ++} ++int APFS_UPDATE_INODE_MAXOPS(void) ++{ ++ return APFS_INODE_RENAME_MAXOPS + APFS_INODE_RESIZE_MAXOPS + 1; ++} ++ ++/** ++ * apfs_delete_inode - Delete an inode record ++ * @inode: the vfs inode to delete ++ * ++ * Returns 0 on success or a negative error code in case of failure, which may ++ * be -EAGAIN if the inode was not deleted in full. ++ */ ++static int apfs_delete_inode(struct inode *inode) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct apfs_inode_info *ai = APFS_I(inode); ++ struct apfs_dstream_info *dstream = NULL; ++ struct apfs_query *query; ++ u64 old_dstream_id; ++ int ret; ++ ++ ret = apfs_delete_all_xattrs(inode); ++ if (ret) { ++ apfs_err(sb, "xattr deletion failed for ino 0x%llx", apfs_ino(inode)); ++ return ret; ++ } ++ ++ dstream = &ai->i_dstream; ++ old_dstream_id = dstream->ds_id; ++ ++ /* ++ * This is very wasteful since all the new extents and references will ++ * get deleted right away, but it only affects clones, so I don't see a ++ * big reason to improve it (TODO) ++ */ ++ ret = apfs_inode_create_exclusive_dstream(inode); ++ if (ret) { ++ apfs_err(sb, "dstream creation failed for ino 0x%llx", apfs_ino(inode)); ++ return ret; ++ } ++ ++ /* TODO: what about partial deletion of xattrs? Is that allowed? */ ++ ret = apfs_inode_delete_front(inode); ++ if (ret) { ++ /* ++ * If the inode had too many extents, only the first few get ++ * deleted and the inode remains in the orphan list for now. ++ * I don't know why the deletion starts at the front, but it ++ * seems to be what the official driver does. ++ */ ++ if (ret != -EAGAIN) { ++ apfs_err(sb, "head deletion failed for ino 0x%llx", apfs_ino(inode)); ++ return ret; ++ } ++ if (dstream->ds_id != old_dstream_id) { ++ ret = apfs_update_inode(inode, NULL /* new_name */); ++ if (ret) { ++ apfs_err(sb, "dstream id update failed for orphan 0x%llx", apfs_ino(inode)); ++ return ret; ++ } ++ } ++ return -EAGAIN; ++ } ++ ++ ret = apfs_put_dstream_rec(dstream); ++ if (ret) { ++ apfs_err(sb, "failed to put dstream for ino 0x%llx", apfs_ino(inode)); ++ return ret; ++ } ++ dstream = NULL; ++ ai->i_has_dstream = false; ++ ++ query = apfs_inode_lookup(inode); ++ if (IS_ERR(query)) { ++ apfs_err(sb, "lookup failed for ino 0x%llx", apfs_ino(inode)); ++ return PTR_ERR(query); ++ } ++ ret = apfs_btree_remove(query); ++ apfs_free_query(query); ++ if (ret) { ++ apfs_err(sb, "removal failed for ino 0x%llx", apfs_ino(inode)); ++ return ret; ++ } ++ ++ ai->i_cleaned = true; ++ return ret; ++} ++#define APFS_DELETE_INODE_MAXOPS 1 ++ ++/** ++ * apfs_clean_single_orphan - Clean the given orphan file ++ * @inode: inode for the file to clean ++ * ++ * Returns 0 on success or a negative error code in case of failure, which may ++ * be -EAGAIN if the file could not be deleted in full. ++ */ ++static int apfs_clean_single_orphan(struct inode *inode) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct apfs_max_ops maxops = {0}; /* TODO: rethink this stuff */ ++ u64 ino = apfs_ino(inode); ++ bool eagain = false; ++ int err; ++ ++ err = apfs_transaction_start(sb, maxops); ++ if (err) ++ return err; ++ err = apfs_delete_inode(inode); ++ if (err) { ++ if (err != -EAGAIN) { ++ apfs_err(sb, "failed to delete orphan 0x%llx", ino); ++ goto fail; ++ } ++ eagain = true; ++ } else { ++ err = apfs_delete_orphan_link(inode); ++ if (err) { ++ apfs_err(sb, "failed to unlink orphan 0x%llx", ino); ++ goto fail; ++ } ++ } ++ err = apfs_transaction_commit(sb); ++ if (err) ++ goto fail; ++ return eagain ? -EAGAIN : 0; ++ ++fail: ++ apfs_transaction_abort(sb); ++ return err; ++} ++ ++/** ++ * apfs_clean_any_orphan - Pick an orphan and delete as much as reasonable ++ * @sb: filesystem superblock ++ * ++ * Returns 0 on success, or a negative error code in case of failure, which may ++ * be -ENODATA if there are no more orphan files or -EAGAIN if a file could not ++ * be deleted in full. ++ */ ++static int apfs_clean_any_orphan(struct super_block *sb) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct inode *inode = NULL; ++ int err; ++ u64 ino; ++ ++ down_read(&nxi->nx_big_sem); ++ err = apfs_any_orphan_ino(sb, &ino); ++ up_read(&nxi->nx_big_sem); ++ if (err) { ++ if (err == -ENODATA) ++ return -ENODATA; ++ apfs_err(sb, "failed to find orphan inode numbers"); ++ return err; ++ } ++ ++ inode = apfs_iget(sb, ino); ++ if (IS_ERR(inode)) { ++ err = PTR_ERR(inode); ++ if (err != -ENODATA) { ++ apfs_err(sb, "iget failed for orphan 0x%llx", ino); ++ return err; ++ } ++ /* ++ * This happens rarely for files with no extents, if we hit a ++ * race with ->evict_inode(). Not a problem: the file is gone. ++ */ ++ apfs_notice(sb, "orphan 0x%llx not found", ino); ++ return 0; ++ } ++ ++ if (atomic_read(&inode->i_count) > 1) ++ goto out; ++ err = apfs_clean_single_orphan(inode); ++ if (err && err != -EAGAIN) { ++ apfs_err(sb, "failed to clean orphan 0x%llx", ino); ++ goto out; ++ } ++out: ++ iput(inode); ++ return err; ++} ++ ++/** ++ * apfs_clean_orphans - Delete as many orphan files as is reasonable ++ * @sb: filesystem superblock ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_clean_orphans(struct super_block *sb) ++{ ++ int ret, i; ++ ++ for (i = 0; i < 100; ++i) { ++ ret = apfs_clean_any_orphan(sb); ++ if (ret) { ++ if (ret == -ENODATA) ++ return 0; ++ if (ret == -EAGAIN) ++ break; ++ apfs_err(sb, "failed to delete an orphan file"); ++ return ret; ++ } ++ } ++ ++ /* ++ * If a file is too big, or if there are too many files, take a break ++ * and continue later. ++ */ ++ if (atomic_read(&sb->s_active) != 0) ++ schedule_work(&APFS_SB(sb)->s_orphan_cleanup_work); ++ return 0; ++} ++ ++void apfs_evict_inode(struct inode *inode) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct apfs_inode_info *ai = APFS_I(inode); ++ int err; ++ ++ if (is_bad_inode(inode) || inode->i_nlink || ai->i_cleaned) ++ goto out; ++ ++ if (!ai->i_has_dstream || ai->i_dstream.ds_size == 0) { ++ /* For files with no extents, scheduled cleanup wastes time */ ++ err = apfs_clean_single_orphan(inode); ++ if (err) ++ apfs_err(sb, "failed to clean orphan 0x%llx (err:%d)", apfs_ino(inode), err); ++ goto out; ++ } ++ ++ /* ++ * If the inode still has extents then schedule cleanup for the rest ++ * of it. Not during unmount though: completing all cleanup could take ++ * a while so just leave future mounts to handle the orphans. ++ */ ++ if (atomic_read(&sb->s_active)) ++ schedule_work(&APFS_SB(sb)->s_orphan_cleanup_work); ++out: ++ truncate_inode_pages_final(&inode->i_data); ++ clear_inode(inode); ++} ++ ++void apfs_orphan_cleanup_work(struct work_struct *work) ++{ ++ struct super_block *sb = NULL; ++ struct apfs_sb_info *sbi = NULL; ++ struct inode *priv = NULL; ++ int err; ++ ++ sbi = container_of(work, struct apfs_sb_info, s_orphan_cleanup_work); ++ priv = sbi->s_private_dir; ++ sb = priv->i_sb; ++ ++ if (sb->s_flags & SB_RDONLY) { ++ apfs_alert(sb, "attempt to flush orphans in read-only mount"); ++ return; ++ } ++ ++ err = apfs_clean_orphans(sb); ++ if (err) ++ apfs_err(sb, "orphan cleanup failed (err:%d)", err); ++} ++ ++/** ++ * apfs_insert_inode_locked - Wrapper for insert_inode_locked4() ++ * @inode: vfs inode to insert in cache ++ * ++ * Works the same as insert_inode_locked(), but can handle 64-bit inode numbers ++ * on 32-bit architectures. ++ */ ++static int apfs_insert_inode_locked(struct inode *inode) ++{ ++ u64 cnid = apfs_ino(inode); ++ ++ return insert_inode_locked4(inode, cnid, apfs_test_inode, &cnid); ++} ++ ++/** ++ * apfs_new_inode - Create a new in-memory inode ++ * @dir: parent inode ++ * @mode: mode bits for the new inode ++ * @rdev: device id (0 if not a device file) ++ * ++ * Returns a pointer to the new vfs inode on success, or an error pointer in ++ * case of failure. ++ */ ++struct inode *apfs_new_inode(struct inode *dir, umode_t mode, dev_t rdev) ++{ ++ struct super_block *sb = dir->i_sb; ++ struct apfs_superblock *vsb_raw = APFS_SB(sb)->s_vsb_raw; ++ struct inode *inode; ++ struct apfs_inode_info *ai; ++ struct apfs_dstream_info *dstream; ++ u64 cnid; ++ struct timespec64 now; ++ ++ /* Updating on-disk structures here is odd, but it works for now */ ++ apfs_assert_in_transaction(sb, &vsb_raw->apfs_o); ++ ++ inode = new_inode(sb); ++ if (!inode) ++ return ERR_PTR(-ENOMEM); ++ ai = APFS_I(inode); ++ dstream = &ai->i_dstream; ++ ++ cnid = le64_to_cpu(vsb_raw->apfs_next_obj_id); ++ le64_add_cpu(&vsb_raw->apfs_next_obj_id, 1); ++ apfs_set_ino(inode, cnid); ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0) ++ inode_init_owner(inode, dir, mode); ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 3, 0) ++ inode_init_owner(&init_user_ns, inode, dir, mode); ++#else ++ inode_init_owner(&nop_mnt_idmap, inode, dir, mode); ++#endif ++ ++ ai->i_saved_uid = i_uid_read(inode); ++ ai->i_saved_gid = i_gid_read(inode); ++ ai->i_parent_id = apfs_ino(dir); ++ set_nlink(inode, 1); ++ ai->i_nchildren = 0; ++ if (apfs_vol_is_encrypted(sb) && S_ISREG(mode)) ++ ai->i_key_class = apfs_dflt_key_class(sb); ++ else ++ ai->i_key_class = 0; ++ ai->i_int_flags = APFS_INODE_NO_RSRC_FORK; ++ ai->i_bsd_flags = 0; ++ ++ ai->i_has_dstream = false; ++ dstream->ds_id = cnid; ++ dstream->ds_size = 0; ++ dstream->ds_sparse_bytes = 0; ++ dstream->ds_shared = false; ++ ++ now = current_time(inode); ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0) ++ inode->i_atime = inode->i_mtime = inode->i_ctime = ai->i_crtime = now; ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 7, 0) ++ inode_set_ctime_to_ts(inode, now); ++ inode->i_atime = inode->i_mtime = ai->i_crtime = now; ++#else ++ ai->i_crtime = simple_inode_init_ts(inode); ++#endif ++ vsb_raw->apfs_last_mod_time = cpu_to_le64(timespec64_to_ns(&now)); ++ ++ if (S_ISREG(mode)) ++ le64_add_cpu(&vsb_raw->apfs_num_files, 1); ++ else if (S_ISDIR(mode)) ++ le64_add_cpu(&vsb_raw->apfs_num_directories, 1); ++ else if (S_ISLNK(mode)) ++ le64_add_cpu(&vsb_raw->apfs_num_symlinks, 1); ++ else ++ le64_add_cpu(&vsb_raw->apfs_num_other_fsobjects, 1); ++ ++ if (apfs_insert_inode_locked(inode)) { ++ /* The inode number should have been free, but wasn't */ ++ apfs_err(sb, "next obj_id (0x%llx) not free", cnid); ++ make_bad_inode(inode); ++ iput(inode); ++ return ERR_PTR(-EFSCORRUPTED); ++ } ++ ++ /* No need to dirty the inode, we'll write it to disk right away */ ++ apfs_inode_set_ops(inode, rdev, false /* compressed */); ++ return inode; ++} ++ ++/** ++ * apfs_create_inode_rec - Create an inode record in the catalog b-tree ++ * @sb: filesystem superblock ++ * @inode: vfs inode to record ++ * @dentry: dentry for primary link ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++int apfs_create_inode_rec(struct super_block *sb, struct inode *inode, ++ struct dentry *dentry) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_query *query; ++ struct apfs_inode_key raw_key; ++ struct apfs_inode_val *raw_val; ++ int val_len; ++ int ret; ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_inode_key(apfs_ino(inode), &query->key); ++ query->flags |= APFS_QUERY_CAT; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret && ret != -ENODATA) { ++ apfs_err(sb, "query failed for ino 0x%llx", apfs_ino(inode)); ++ goto fail; ++ } ++ ++ apfs_key_set_hdr(APFS_TYPE_INODE, apfs_ino(inode), &raw_key); ++ ++ val_len = apfs_build_inode_val(inode, &dentry->d_name, &raw_val); ++ if (val_len < 0) { ++ ret = val_len; ++ goto fail; ++ } ++ ++ ret = apfs_btree_insert(query, &raw_key, sizeof(raw_key), raw_val, val_len); ++ if (ret) ++ apfs_err(sb, "insertion failed for ino 0x%llx", apfs_ino(inode)); ++ kfree(raw_val); ++ ++fail: ++ apfs_free_query(query); ++ return ret; ++} ++int APFS_CREATE_INODE_REC_MAXOPS(void) ++{ ++ return 1; ++} ++ ++/** ++ * apfs_setsize - Change the size of a regular file ++ * @inode: the vfs inode ++ * @new_size: the new size ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_setsize(struct inode *inode, loff_t new_size) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct apfs_dstream_info *dstream = &APFS_I(inode)->i_dstream; ++ int err; ++ ++ if (new_size == inode->i_size) ++ return 0; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0) ++ inode->i_mtime = inode->i_ctime = current_time(inode); ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 7, 0) ++ inode->i_mtime = inode_set_ctime_current(inode); ++#else ++ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); ++#endif ++ ++ err = apfs_inode_create_dstream_rec(inode); ++ if (err) { ++ apfs_err(sb, "failed to create dstream for ino 0x%llx", apfs_ino(inode)); ++ return err; ++ } ++ ++ /* Must be called before i_size is changed */ ++ err = apfs_truncate(dstream, new_size); ++ if (err) { ++ apfs_err(sb, "truncation failed for ino 0x%llx", apfs_ino(inode)); ++ return err; ++ } ++ ++ truncate_setsize(inode, new_size); ++ dstream->ds_size = i_size_read(inode); ++ return 0; ++} ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0) ++int apfs_setattr(struct dentry *dentry, struct iattr *iattr) ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 3, 0) ++int apfs_setattr(struct user_namespace *mnt_userns, ++ struct dentry *dentry, struct iattr *iattr) ++#else ++int apfs_setattr(struct mnt_idmap *idmap, ++ struct dentry *dentry, struct iattr *iattr) ++#endif ++{ ++ struct inode *inode = d_inode(dentry); ++ struct super_block *sb = inode->i_sb; ++ struct apfs_max_ops maxops; ++ bool resizing = S_ISREG(inode->i_mode) && (iattr->ia_valid & ATTR_SIZE); ++ int err; ++ ++ if (resizing && iattr->ia_size > APFS_MAX_FILE_SIZE) ++ return -EFBIG; ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0) ++ err = setattr_prepare(dentry, iattr); ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 3, 0) ++ err = setattr_prepare(&init_user_ns, dentry, iattr); ++#else ++ err = setattr_prepare(&nop_mnt_idmap, dentry, iattr); ++#endif ++ if (err) ++ return err; ++ ++ maxops.cat = APFS_UPDATE_INODE_MAXOPS(); ++ maxops.blks = 0; ++ ++ /* TODO: figure out why ->write_inode() isn't firing */ ++ err = apfs_transaction_start(sb, maxops); ++ if (err) ++ return err; ++ apfs_inode_join_transaction(sb, inode); ++ ++ if (resizing) { ++ err = apfs_setsize(inode, iattr->ia_size); ++ if (err) { ++ apfs_err(sb, "setsize failed for ino 0x%llx", apfs_ino(inode)); ++ goto fail; ++ } ++ } ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0) ++ setattr_copy(inode, iattr); ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 3, 0) ++ setattr_copy(&init_user_ns, inode, iattr); ++#else ++ setattr_copy(&nop_mnt_idmap, inode, iattr); ++#endif ++ ++ mark_inode_dirty(inode); ++ err = apfs_transaction_commit(sb); ++ if (err) ++ goto fail; ++ return 0; ++ ++fail: ++ apfs_transaction_abort(sb); ++ return err; ++} ++ ++/* TODO: this only seems to be necessary because ->write_inode() isn't firing */ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0) ++int apfs_update_time(struct inode *inode, struct timespec64 *time, int flags) ++#else ++int apfs_update_time(struct inode *inode, int flags) ++#endif ++{ ++ struct super_block *sb = inode->i_sb; ++ struct apfs_max_ops maxops; ++ int err; ++ ++ maxops.cat = APFS_UPDATE_INODE_MAXOPS(); ++ maxops.blks = 0; ++ ++ err = apfs_transaction_start(sb, maxops); ++ if (err) ++ return err; ++ apfs_inode_join_transaction(sb, inode); ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0) ++ generic_update_time(inode, time, flags); ++#else ++ generic_update_time(inode, flags); ++#endif ++ ++ err = apfs_transaction_commit(sb); ++ if (err) ++ goto fail; ++ return 0; ++ ++fail: ++ apfs_transaction_abort(sb); ++ return err; ++} ++ ++static int apfs_ioc_set_dflt_pfk(struct file *file, void __user *user_pfk) ++{ ++ struct inode *inode = file_inode(file); ++ struct super_block *sb = inode->i_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_wrapped_crypto_state pfk_hdr; ++ struct apfs_crypto_state_val *pfk; ++ unsigned int key_len; ++ ++ if (__copy_from_user(&pfk_hdr, user_pfk, sizeof(pfk_hdr))) ++ return -EFAULT; ++ key_len = le16_to_cpu(pfk_hdr.key_len); ++ if (key_len > MAX_PFK_LEN) ++ return -EFBIG; ++ pfk = kmalloc(sizeof(*pfk) + key_len, GFP_KERNEL); ++ if (!pfk) ++ return -ENOMEM; ++ if (__copy_from_user(&pfk->state, user_pfk, sizeof(pfk_hdr) + key_len)) { ++ kfree(pfk); ++ return -EFAULT; ++ } ++ pfk->refcnt = cpu_to_le32(1); ++ ++ down_write(&nxi->nx_big_sem); ++ ++ if (sbi->s_dflt_pfk) ++ kfree(sbi->s_dflt_pfk); ++ sbi->s_dflt_pfk = pfk; ++ ++ up_write(&nxi->nx_big_sem); ++ ++ return 0; ++} ++ ++static int apfs_ioc_set_dir_class(struct file *file, u32 __user *user_class) ++{ ++ struct inode *inode = file_inode(file); ++ struct apfs_inode_info *ai = APFS_I(inode); ++ struct super_block *sb = inode->i_sb; ++ struct apfs_max_ops maxops; ++ u32 class; ++ int err; ++ ++ if (get_user(class, user_class)) ++ return -EFAULT; ++ ++ ai->i_key_class = class; ++ ++ maxops.cat = APFS_UPDATE_INODE_MAXOPS(); ++ maxops.blks = 0; ++ ++ err = apfs_transaction_start(sb, maxops); ++ if (err) ++ return err; ++ apfs_inode_join_transaction(sb, inode); ++ err = apfs_transaction_commit(sb); ++ if (err) ++ goto fail; ++ return 0; ++ ++fail: ++ apfs_transaction_abort(sb); ++ return err; ++} ++ ++static int apfs_ioc_set_pfk(struct file *file, void __user *user_pfk) ++{ ++ struct inode *inode = file_inode(file); ++ struct super_block *sb = inode->i_sb; ++ struct apfs_wrapped_crypto_state pfk_hdr; ++ struct apfs_crypto_state_val *pfk; ++ struct apfs_inode_info *ai = APFS_I(inode); ++ struct apfs_dstream_info *dstream = &ai->i_dstream; ++ struct apfs_max_ops maxops; ++ unsigned int key_len, key_class; ++ int err; ++ ++ if (__copy_from_user(&pfk_hdr, user_pfk, sizeof(pfk_hdr))) ++ return -EFAULT; ++ key_len = le16_to_cpu(pfk_hdr.key_len); ++ if (key_len > MAX_PFK_LEN) ++ return -EFBIG; ++ pfk = kmalloc(sizeof(*pfk) + key_len, GFP_KERNEL); ++ if (!pfk) ++ return -ENOMEM; ++ if (__copy_from_user(&pfk->state, user_pfk, sizeof(pfk_hdr) + key_len)) { ++ kfree(pfk); ++ return -EFAULT; ++ } ++ pfk->refcnt = cpu_to_le32(1); ++ ++ maxops.cat = APFS_CRYPTO_SET_KEY_MAXOPS + APFS_UPDATE_INODE_MAXOPS(); ++ maxops.blks = 0; ++ ++ err = apfs_transaction_start(sb, maxops); ++ if (err) { ++ kfree(pfk); ++ return err; ++ } ++ ++ err = apfs_crypto_set_key(sb, dstream->ds_id, pfk); ++ if (err) ++ goto fail; ++ ++ key_class = le32_to_cpu(pfk_hdr.persistent_class); ++ if (ai->i_key_class != key_class) { ++ ai->i_key_class = key_class; ++ apfs_inode_join_transaction(sb, inode); ++ } ++ ++ err = apfs_transaction_commit(sb); ++ if (err) ++ goto fail; ++ kfree(pfk); ++ return 0; ++ ++fail: ++ apfs_transaction_abort(sb); ++ kfree(pfk); ++ return err; ++} ++ ++static int apfs_ioc_get_class(struct file *file, u32 __user *user_class) ++{ ++ struct inode *inode = file_inode(file); ++ struct apfs_inode_info *ai = APFS_I(inode); ++ u32 class; ++ ++ class = ai->i_key_class; ++ if (put_user(class, user_class)) ++ return -EFAULT; ++ return 0; ++} ++ ++static int apfs_ioc_get_pfk(struct file *file, void __user *user_pfk) ++{ ++ struct inode *inode = file_inode(file); ++ struct super_block *sb = inode->i_sb; ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_wrapped_crypto_state pfk_hdr; ++ struct apfs_crypto_state_val *pfk; ++ unsigned int max_len, key_len; ++ struct apfs_dstream_info *dstream = &APFS_I(inode)->i_dstream; ++ int err; ++ ++ if (__copy_from_user(&pfk_hdr, user_pfk, sizeof(pfk_hdr))) ++ return -EFAULT; ++ max_len = le16_to_cpu(pfk_hdr.key_len); ++ if (max_len > MAX_PFK_LEN) ++ return -EFBIG; ++ pfk = kmalloc(sizeof(*pfk) + max_len, GFP_KERNEL); ++ if (!pfk) ++ return -ENOMEM; ++ ++ down_read(&nxi->nx_big_sem); ++ ++ err = apfs_crypto_get_key(sb, dstream->ds_id, pfk, max_len); ++ if (err) ++ goto fail; ++ ++ up_read(&nxi->nx_big_sem); ++ ++ key_len = le16_to_cpu(pfk->state.key_len); ++ if (__copy_to_user(user_pfk, &pfk->state, sizeof(pfk_hdr) + key_len)) { ++ kfree(pfk); ++ return -EFAULT; ++ } ++ ++ kfree(pfk); ++ return 0; ++ ++fail: ++ up_read(&nxi->nx_big_sem); ++ kfree(pfk); ++ return err; ++} ++ ++/* ++ * Older kernels have no vfs_ioc_setflags_prepare(), so don't implement the ++ * SETFLAGS/GETFLAGS ioctls there. It should be easy to fix, but it's not ++ * really needed at all. Be careful with this macro check, because it nests ++ * over a few others. ++ */ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 3, 0) ++ ++/** ++ * apfs_getflags - Read an inode's bsd flags in FS_IOC_GETFLAGS format ++ * @inode: the vfs inode ++ */ ++static unsigned int apfs_getflags(struct inode *inode) ++{ ++ struct apfs_inode_info *ai = APFS_I(inode); ++ unsigned int flags = 0; ++ ++ if (ai->i_bsd_flags & APFS_INOBSD_APPEND) ++ flags |= FS_APPEND_FL; ++ if (ai->i_bsd_flags & APFS_INOBSD_IMMUTABLE) ++ flags |= FS_IMMUTABLE_FL; ++ if (ai->i_bsd_flags & APFS_INOBSD_NODUMP) ++ flags |= FS_NODUMP_FL; ++ return flags; ++} ++ ++/** ++ * apfs_setflags - Set an inode's bsd flags ++ * @inode: the vfs inode ++ * @flags: flags to set, in FS_IOC_SETFLAGS format ++ */ ++static void apfs_setflags(struct inode *inode, unsigned int flags) ++{ ++ struct apfs_inode_info *ai = APFS_I(inode); ++ unsigned int i_flags = 0; ++ ++ if (flags & FS_APPEND_FL) { ++ ai->i_bsd_flags |= APFS_INOBSD_APPEND; ++ i_flags |= S_APPEND; ++ } else { ++ ai->i_bsd_flags &= ~APFS_INOBSD_APPEND; ++ } ++ ++ if (flags & FS_IMMUTABLE_FL) { ++ ai->i_bsd_flags |= APFS_INOBSD_IMMUTABLE; ++ i_flags |= S_IMMUTABLE; ++ } else { ++ ai->i_bsd_flags &= ~APFS_INOBSD_IMMUTABLE; ++ } ++ ++ if (flags & FS_NODUMP_FL) ++ ai->i_bsd_flags |= APFS_INOBSD_NODUMP; ++ else ++ ai->i_bsd_flags &= ~APFS_INOBSD_NODUMP; ++ ++ inode_set_flags(inode, i_flags, S_IMMUTABLE | S_APPEND); ++} ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 13, 0) ++ ++/** ++ * apfs_ioc_getflags - Ioctl handler for FS_IOC_GETFLAGS ++ * @file: affected file ++ * @arg: ioctl argument ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++static int apfs_ioc_getflags(struct file *file, int __user *arg) ++{ ++ unsigned int flags = apfs_getflags(file_inode(file)); ++ ++ return put_user(flags, arg); ++} ++ ++/** ++ * apfs_do_ioc_setflags - Actual work for apfs_ioc_setflags(), after preparation ++ * @inode: affected vfs inode ++ * @newflags: inode flags to set, in FS_IOC_SETFLAGS format ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++static int apfs_do_ioc_setflags(struct inode *inode, unsigned int newflags) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct apfs_max_ops maxops; ++ unsigned int oldflags; ++ int err; ++ ++ lockdep_assert_held_write(&inode->i_rwsem); ++ ++ oldflags = apfs_getflags(inode); ++ err = vfs_ioc_setflags_prepare(inode, oldflags, newflags); ++ if (err) ++ return err; ++ ++ maxops.cat = APFS_UPDATE_INODE_MAXOPS(); ++ maxops.blks = 0; ++ err = apfs_transaction_start(sb, maxops); ++ if (err) ++ return err; ++ ++ apfs_inode_join_transaction(sb, inode); ++ apfs_setflags(inode, newflags); ++ inode->i_ctime = current_time(inode); ++ ++ err = apfs_transaction_commit(sb); ++ if (err) ++ apfs_transaction_abort(sb); ++ return err; ++} ++ ++/** ++ * apfs_ioc_setflags - Ioctl handler for FS_IOC_SETFLAGS ++ * @file: affected file ++ * @arg: ioctl argument ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++static int apfs_ioc_setflags(struct file *file, int __user *arg) ++{ ++ struct inode *inode = file_inode(file); ++ struct super_block *sb = inode->i_sb; ++ unsigned int newflags; ++ int err; ++ ++ if (sb->s_flags & SB_RDONLY) ++ return -EROFS; ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0) ++ if (!inode_owner_or_capable(inode)) ++#else ++ if (!inode_owner_or_capable(&init_user_ns, inode)) ++#endif ++ return -EPERM; ++ ++ if (get_user(newflags, arg)) ++ return -EFAULT; ++ ++ if (newflags & ~(FS_APPEND_FL | FS_IMMUTABLE_FL | FS_NODUMP_FL)) ++ return -EOPNOTSUPP; ++ ++ err = mnt_want_write_file(file); ++ if (err) ++ return err; ++ ++ inode_lock(inode); ++ err = apfs_do_ioc_setflags(inode, newflags); ++ inode_unlock(inode); ++ ++ mnt_drop_write_file(file); ++ return err; ++} ++ ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 3, 0) ++ ++int apfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) ++{ ++ unsigned int flags = apfs_getflags(d_inode(dentry)); ++ ++ fileattr_fill_flags(fa, flags); ++ return 0; ++} ++ ++int apfs_fileattr_set(struct user_namespace *mnt_userns, struct dentry *dentry, struct fileattr *fa) ++{ ++ struct inode *inode = d_inode(dentry); ++ struct super_block *sb = inode->i_sb; ++ struct apfs_max_ops maxops; ++ int err; ++ ++ if (sb->s_flags & SB_RDONLY) ++ return -EROFS; ++ ++ if (fa->flags & ~(FS_APPEND_FL | FS_IMMUTABLE_FL | FS_NODUMP_FL)) ++ return -EOPNOTSUPP; ++ if (fileattr_has_fsx(fa)) ++ return -EOPNOTSUPP; ++ ++ lockdep_assert_held_write(&inode->i_rwsem); ++ ++ maxops.cat = APFS_UPDATE_INODE_MAXOPS(); ++ maxops.blks = 0; ++ err = apfs_transaction_start(sb, maxops); ++ if (err) ++ return err; ++ ++ apfs_inode_join_transaction(sb, inode); ++ apfs_setflags(inode, fa->flags); ++ inode->i_ctime = current_time(inode); ++ ++ err = apfs_transaction_commit(sb); ++ if (err) ++ apfs_transaction_abort(sb); ++ return err; ++} ++ ++#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(6, 3, 0) */ ++ ++int apfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) ++{ ++ unsigned int flags = apfs_getflags(d_inode(dentry)); ++ ++ fileattr_fill_flags(fa, flags); ++ return 0; ++} ++ ++int apfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) ++{ ++ struct inode *inode = d_inode(dentry); ++ struct super_block *sb = inode->i_sb; ++ struct apfs_max_ops maxops; ++ int err; ++ ++ if (sb->s_flags & SB_RDONLY) ++ return -EROFS; ++ ++ if (fa->flags & ~(FS_APPEND_FL | FS_IMMUTABLE_FL | FS_NODUMP_FL)) ++ return -EOPNOTSUPP; ++ if (fileattr_has_fsx(fa)) ++ return -EOPNOTSUPP; ++ ++ lockdep_assert_held_write(&inode->i_rwsem); ++ ++ maxops.cat = APFS_UPDATE_INODE_MAXOPS(); ++ maxops.blks = 0; ++ err = apfs_transaction_start(sb, maxops); ++ if (err) ++ return err; ++ ++ apfs_inode_join_transaction(sb, inode); ++ apfs_setflags(inode, fa->flags); ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0) ++ inode->i_ctime = current_time(inode); ++#else ++ inode_set_ctime_current(inode); ++#endif ++ ++ err = apfs_transaction_commit(sb); ++ if (err) ++ apfs_transaction_abort(sb); ++ return err; ++} ++ ++#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(5, 13, 0) */ ++ ++#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(5, 3, 0) */ ++ ++long apfs_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ++{ ++ void __user *argp = (void __user *)arg; ++ ++ switch (cmd) { ++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 13, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 3, 0) ++ case FS_IOC_GETFLAGS: ++ return apfs_ioc_getflags(file, argp); ++ case FS_IOC_SETFLAGS: ++ return apfs_ioc_setflags(file, argp); ++#endif ++ case APFS_IOC_SET_DFLT_PFK: ++ return apfs_ioc_set_dflt_pfk(file, argp); ++ case APFS_IOC_SET_DIR_CLASS: ++ return apfs_ioc_set_dir_class(file, argp); ++ case APFS_IOC_GET_CLASS: ++ return apfs_ioc_get_class(file, argp); ++ case APFS_IOC_TAKE_SNAPSHOT: ++ return apfs_ioc_take_snapshot(file, argp); ++ default: ++ return -ENOTTY; ++ } ++} ++ ++long apfs_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ++{ ++ void __user *argp = (void __user *)arg; ++ ++ switch (cmd) { ++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 13, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 3, 0) ++ case FS_IOC_GETFLAGS: ++ return apfs_ioc_getflags(file, argp); ++ case FS_IOC_SETFLAGS: ++ return apfs_ioc_setflags(file, argp); ++#endif ++ case APFS_IOC_SET_PFK: ++ return apfs_ioc_set_pfk(file, argp); ++ case APFS_IOC_GET_CLASS: ++ return apfs_ioc_get_class(file, argp); ++ case APFS_IOC_GET_PFK: ++ return apfs_ioc_get_pfk(file, argp); ++ default: ++ return -ENOTTY; ++ } ++} +diff --git a/fs/apfs/key.c b/fs/apfs/key.c +new file mode 100644 +index 000000000..eb38cd2c1 +--- /dev/null ++++ b/fs/apfs/key.c +@@ -0,0 +1,334 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (C) 2018 Ernesto A. Fernández ++ */ ++ ++#include ++#include "apfs.h" ++#include "unicode.h" ++ ++/** ++ * apfs_filename_cmp - Normalize and compare two APFS filenames ++ * @sb: filesystem superblock ++ * @name1: first name to compare ++ * @len1: length of @name1 ++ * @name2: second name to compare ++ * @len2: length of the @name2 ++ * ++ * Returns 0 if @name1 and @name2 are equal, or non-zero otherwise. ++ */ ++int apfs_filename_cmp(struct super_block *sb, ++ const char *name1, unsigned int len1, ++ const char *name2, unsigned int len2) ++{ ++ struct apfs_unicursor cursor1, cursor2; ++ bool case_fold = apfs_is_case_insensitive(sb); ++ ++ if (!apfs_is_normalization_insensitive(sb)) { ++ if (len1 != len2) ++ return -1; ++ return memcmp(name1, name2, len1); ++ } ++ ++ apfs_init_unicursor(&cursor1, name1, len1); ++ apfs_init_unicursor(&cursor2, name2, len2); ++ ++ while (1) { ++ unicode_t uni1, uni2; ++ ++ uni1 = apfs_normalize_next(&cursor1, case_fold); ++ uni2 = apfs_normalize_next(&cursor2, case_fold); ++ ++ if (uni1 != uni2) ++ return uni1 < uni2 ? -1 : 1; ++ if (!uni1) ++ return 0; ++ } ++} ++ ++/** ++ * apfs_keycmp - Compare two keys ++ * @k1: first key to compare ++ * @k2: second key to compare ++ * ++ * returns 0 if @k1 and @k2 are equal ++ * < 0 if @k1 comes before @k2 in the btree ++ * > 0 if @k1 comes after @k2 in the btree ++ */ ++int apfs_keycmp(struct apfs_key *k1, struct apfs_key *k2) ++{ ++ if (k1->id != k2->id) ++ return k1->id < k2->id ? -1 : 1; ++ if (k1->type != k2->type) ++ return k1->type < k2->type ? -1 : 1; ++ if (k1->number != k2->number) ++ return k1->number < k2->number ? -1 : 1; ++ if (!k1->name || !k2->name) ++ return 0; ++ ++ /* Normalization seems to be ignored here, even for directory records */ ++ return strcmp(k1->name, k2->name); ++} ++ ++/** ++ * apfs_read_cat_key - Parse an on-disk catalog key ++ * @raw: pointer to the raw key ++ * @size: size of the raw key ++ * @key: apfs_key structure to store the result ++ * @hashed: are the directory records hashed? ++ * ++ * Returns 0 on success, or a negative error code otherwise. ++ */ ++int apfs_read_cat_key(void *raw, int size, struct apfs_key *key, bool hashed) ++{ ++ if (size < sizeof(struct apfs_key_header)) { ++ apfs_err(NULL, "bad key length (%d)", size); ++ return -EFSCORRUPTED; ++ } ++ key->id = apfs_cat_cnid((struct apfs_key_header *)raw); ++ key->type = apfs_cat_type((struct apfs_key_header *)raw); ++ ++ switch (key->type) { ++ case APFS_TYPE_DIR_REC: ++ if (hashed) { ++ if (size < sizeof(struct apfs_drec_hashed_key) + 1 || ++ *((char *)raw + size - 1) != 0) { ++ /* Filename must have NULL-termination */ ++ apfs_err(NULL, "invalid drec key (%d)", size); ++ return -EFSCORRUPTED; ++ } ++ /* Name length is not used in key comparisons, only the hash */ ++ key->number = le32_to_cpu( ++ ((struct apfs_drec_hashed_key *)raw)->name_len_and_hash) & ++ APFS_DREC_HASH_MASK; ++ key->name = ((struct apfs_drec_hashed_key *)raw)->name; ++ } else { ++ if (size < sizeof(struct apfs_drec_key) + 1 || ++ *((char *)raw + size - 1) != 0) { ++ /* Filename must have NULL-termination */ ++ apfs_err(NULL, "invalid drec key (%d)", size); ++ return -EFSCORRUPTED; ++ } ++ /* There's no hash */ ++ key->number = 0; ++ key->name = ((struct apfs_drec_key *)raw)->name; ++ } ++ break; ++ case APFS_TYPE_XATTR: ++ if (size < sizeof(struct apfs_xattr_key) + 1 || ++ *((char *)raw + size - 1) != 0) { ++ /* xattr name must have NULL-termination */ ++ apfs_err(NULL, "invalid xattr key (%d)", size); ++ return -EFSCORRUPTED; ++ } ++ key->number = 0; ++ key->name = ((struct apfs_xattr_key *)raw)->name; ++ break; ++ case APFS_TYPE_FILE_EXTENT: ++ if (size != sizeof(struct apfs_file_extent_key)) { ++ apfs_err(NULL, "bad key length (%d)", size); ++ return -EFSCORRUPTED; ++ } ++ key->number = le64_to_cpu( ++ ((struct apfs_file_extent_key *)raw)->logical_addr); ++ key->name = NULL; ++ break; ++ case APFS_TYPE_SIBLING_LINK: ++ if (size != sizeof(struct apfs_sibling_link_key)) { ++ apfs_err(NULL, "bad key length (%d)", size); ++ return -EFSCORRUPTED; ++ } ++ key->number = le64_to_cpu( ++ ((struct apfs_sibling_link_key *)raw)->sibling_id); ++ key->name = NULL; ++ break; ++ default: ++ key->number = 0; ++ key->name = NULL; ++ break; ++ } ++ ++ return 0; ++} ++ ++int apfs_read_fext_key(void *raw, int size, struct apfs_key *key) ++{ ++ struct apfs_fext_tree_key *raw_key; ++ ++ if (size != sizeof(*raw_key)) { ++ apfs_err(NULL, "bad key length (%d)", size); ++ return -EFSCORRUPTED; ++ } ++ raw_key = raw; ++ ++ key->id = le64_to_cpu(raw_key->private_id); ++ key->type = 0; ++ key->number = le64_to_cpu(raw_key->logical_addr); ++ key->name = NULL; ++ return 0; ++} ++ ++/** ++ * apfs_read_free_queue_key - Parse an on-disk free queue key ++ * @raw: pointer to the raw key ++ * @size: size of the raw key ++ * @key: apfs_key structure to store the result ++ * ++ * Returns 0 on success, or a negative error code otherwise. ++ */ ++int apfs_read_free_queue_key(void *raw, int size, struct apfs_key *key) ++{ ++ struct apfs_spaceman_free_queue_key *raw_key; ++ ++ if (size < sizeof(struct apfs_spaceman_free_queue_key)) { ++ apfs_err(NULL, "bad key length (%d)", size); ++ return -EFSCORRUPTED; ++ } ++ raw_key = raw; ++ ++ key->id = le64_to_cpu(raw_key->sfqk_xid); ++ key->type = 0; ++ key->number = le64_to_cpu(raw_key->sfqk_paddr); ++ key->name = NULL; ++ ++ return 0; ++} ++ ++/** ++ * apfs_read_omap_key - Parse an on-disk object map key ++ * @raw: pointer to the raw key ++ * @size: size of the raw key ++ * @key: apfs_key structure to store the result ++ * ++ * Returns 0 on success, or a negative error code otherwise. ++ */ ++int apfs_read_omap_key(void *raw, int size, struct apfs_key *key) ++{ ++ if (size < sizeof(struct apfs_omap_key)) { ++ apfs_err(NULL, "bad key length (%d)", size); ++ return -EFSCORRUPTED; ++ } ++ ++ key->id = le64_to_cpu(((struct apfs_omap_key *)raw)->ok_oid); ++ key->type = 0; ++ key->number = le64_to_cpu(((struct apfs_omap_key *)raw)->ok_xid); ++ key->name = NULL; ++ ++ return 0; ++} ++ ++/** ++ * apfs_read_extentref_key - Parse an on-disk extent reference tree key ++ * @raw: pointer to the raw key ++ * @size: size of the raw key ++ * @key: apfs_key structure to store the result ++ * ++ * Returns 0 on success, or a negative error code otherwise. ++ */ ++int apfs_read_extentref_key(void *raw, int size, struct apfs_key *key) ++{ ++ if (size != sizeof(struct apfs_phys_ext_key)) { ++ apfs_err(NULL, "bad key length (%d)", size); ++ return -EFSCORRUPTED; ++ } ++ key->id = apfs_cat_cnid((struct apfs_key_header *)raw); ++ key->type = apfs_cat_type((struct apfs_key_header *)raw); ++ key->number = 0; ++ key->name = NULL; ++ return 0; ++} ++ ++int apfs_read_snap_meta_key(void *raw, int size, struct apfs_key *key) ++{ ++ if (size < sizeof(struct apfs_key_header)) { ++ apfs_err(NULL, "bad key length (%d)", size); ++ return -EFSCORRUPTED; ++ } ++ key->id = apfs_cat_cnid((struct apfs_key_header *)raw); ++ key->type = apfs_cat_type((struct apfs_key_header *)raw); ++ key->number = 0; ++ ++ switch (key->type) { ++ case APFS_TYPE_SNAP_METADATA: ++ if (size != sizeof(struct apfs_snap_metadata_key)) { ++ apfs_err(NULL, "bad key length (%d)", size); ++ return -EFSCORRUPTED; ++ } ++ key->name = NULL; ++ return 0; ++ case APFS_TYPE_SNAP_NAME: ++ if (size < sizeof(struct apfs_snap_name_key) + 1 || *((char *)raw + size - 1) != 0) { ++ /* snapshot name must have NULL-termination */ ++ apfs_err(NULL, "invalid snap name key (%d)", size); ++ return -EFSCORRUPTED; ++ } ++ key->name = ((struct apfs_snap_name_key *)raw)->name; ++ return 0; ++ default: ++ return -EFSCORRUPTED; ++ } ++} ++ ++int apfs_read_omap_snap_key(void *raw, int size, struct apfs_key *key) ++{ ++ __le64 *xid = NULL; ++ ++ if (size != sizeof(*xid)) { ++ apfs_err(NULL, "bad key length (%d)", size); ++ return -EFSCORRUPTED; ++ } ++ xid = raw; ++ ++ key->id = le64_to_cpup(xid); ++ key->number = 0; ++ key->name = NULL; ++ key->type = 0; ++ return 0; ++} ++ ++/** ++ * apfs_init_drec_key - Initialize an in-memory key for a dentry query ++ * @sb: filesystem superblock ++ * @ino: inode number of the parent directory ++ * @name: filename (NULL for a multiple query) ++ * @name_len: filename length (0 if NULL) ++ * @key: apfs_key structure to initialize ++ */ ++void apfs_init_drec_key(struct super_block *sb, u64 ino, const char *name, ++ unsigned int name_len, struct apfs_key *key) ++{ ++ struct apfs_unicursor cursor; ++ bool case_fold = apfs_is_case_insensitive(sb); ++ u32 hash = 0xFFFFFFFF; ++ ++ key->id = ino; ++ key->type = APFS_TYPE_DIR_REC; ++ if (!apfs_is_normalization_insensitive(sb)) { ++ key->name = name; ++ key->number = 0; ++ return; ++ } ++ ++ /* To respect normalization, queries can only consider the hash */ ++ key->name = NULL; ++ ++ if (!name) { ++ key->number = 0; ++ return; ++ } ++ ++ apfs_init_unicursor(&cursor, name, name_len); ++ ++ while (1) { ++ unicode_t utf32; ++ ++ utf32 = apfs_normalize_next(&cursor, case_fold); ++ if (!utf32) ++ break; ++ ++ hash = crc32c(hash, &utf32, sizeof(utf32)); ++ } ++ ++ /* The filename length doesn't matter, so it's left as zero */ ++ key->number = hash << APFS_DREC_HASH_SHIFT; ++} +diff --git a/fs/apfs/libzbitmap.c b/fs/apfs/libzbitmap.c +new file mode 100644 +index 000000000..74799d8b1 +--- /dev/null ++++ b/fs/apfs/libzbitmap.c +@@ -0,0 +1,444 @@ ++// SPDX-License-Identifier: GPL-2.0+ OR MIT ++/* ++ * Copyright (C) 2022 Corellium LLC ++ * ++ * Author: Ernesto A. Fernández ++ * ++ * Ported from libzbitmap (https://github.com/eafer/libzbitmap). Only the ++ * decompression code is included. ++ */ ++ ++#include ++#include ++#include "libzbitmap.h" ++ ++#define MIN(x, y) ((x) > (y) ? (y) : (x)) ++ ++#define ZBM_MAGIC "ZBM\x09" ++#define ZBM_MAGIC_SZ 4 ++ ++#define ZBM_MAX_DECMP_CHUNK_SIZE 0x8000 ++#define ZBM_MAX_DECMP_CHUNK_SIZE_BITS 15 ++ ++struct uint24 { ++ uint8_t low; ++ uint8_t mid; ++ uint8_t hig; ++}; ++ ++/* This header is shared by both compressed and decompressed chunks */ ++struct zbm_chunk_hdr { ++ struct uint24 len; /* Length of the chunk */ ++ struct uint24 decmp_len; /* Length of the chunk after decompression */ ++}; ++ ++/* The full header for compressed chunks */ ++struct zbm_cmp_chunk_hdr { ++ /* Shared with decompressed chunks */ ++ struct zbm_chunk_hdr hdr; ++ ++ /* Offset for each of the three metadata areas */ ++ struct uint24 meta_off_1; ++ struct uint24 meta_off_2; ++ struct uint24 meta_off_3; ++}; ++ ++/* Pointer to a half-byte */ ++struct nybl_ptr { ++ uint8_t *addr; /* Address of the byte */ ++ int nibble; /* Which of the two nibbles? */ ++}; ++ ++/* 0-2 and 0xf are not real bitmap indexes */ ++#define ZBM_BITMAP_COUNT (16 - 1 - 3) ++#define ZBM_BITMAP_BASE 3 ++#define ZBM_BITMAP_BYTECNT 17 ++#define ZBM_MAX_PERIOD_BYTECNT 2 ++ ++struct zbm_bmap { ++ uint8_t bitmap; /* The bitmap */ ++ uint8_t period_bytecnt; /* Read this many bytes to get the new period */ ++}; ++ ++struct zbm_state { ++ /* Updated during a chunk read */ ++ uint8_t *dest; /* Write the next byte here */ ++ size_t dest_left; /* Room left in destination buffer */ ++ uint32_t written; /* Bytes written so far for current chunk */ ++ uint16_t period; /* Repetition period for decompression, in bytes */ ++ ++ /* Updated right before a chunk read */ ++ const uint8_t *src_end; /* End of current chunk */ ++ uint32_t len; /* Length of the chunk */ ++ uint32_t decmp_len; /* Expected chunk length after decompression */ ++ ++ /* Updated after a chunk read */ ++ const uint8_t *src; /* Start of buffer, or current chunk if any */ ++ size_t src_left; /* Room left in the source buffer */ ++ size_t prewritten; /* Bytes written for previous chunks */ ++ ++ /* Current position in data and metadata areas for this chunk */ ++ const uint8_t *data; ++ const uint8_t *meta_1; ++ const uint8_t *meta_2; ++ struct nybl_ptr meta_3; ++ ++ /* Array of bitmaps for the current chunk */ ++ struct zbm_bmap bitmaps[ZBM_BITMAP_COUNT]; ++}; ++ ++static int zbm_check_magic(struct zbm_state *state) ++{ ++ if(state->src_left < ZBM_MAGIC_SZ) ++ return -EINVAL; ++ ++ if(memcmp(state->src, ZBM_MAGIC, ZBM_MAGIC_SZ)) ++ return -EINVAL; ++ ++ state->src += ZBM_MAGIC_SZ; ++ state->src_left -= ZBM_MAGIC_SZ; ++ return 0; ++} ++ ++static uint32_t zbm_u24_to_u32(struct uint24 n) ++{ ++ uint32_t res; ++ ++ res = n.hig; ++ res <<= 8; ++ res += n.mid; ++ res <<= 8; ++ res += n.low; ++ return res; ++} ++ ++/* Some chunks just have regular uncompressed data, but with a header */ ++static int zbm_chunk_is_uncompressed(struct zbm_state *state) ++{ ++ return state->len == state->decmp_len + sizeof(struct zbm_chunk_hdr); ++} ++ ++static int zbm_handle_uncompressed_chunk(struct zbm_state *state) ++{ ++ state->meta_1 = state->meta_2 = NULL; ++ state->meta_3.addr = NULL; ++ state->meta_3.nibble = 0; ++ state->data = state->src + sizeof(struct zbm_chunk_hdr); ++ memcpy(state->dest, state->data, state->decmp_len); ++ ++ state->dest += state->decmp_len; ++ state->dest_left -= state->decmp_len; ++ state->written = state->decmp_len; ++ return 0; ++} ++ ++static int zbm_read_nibble(struct nybl_ptr *nybl, const uint8_t *limit, uint8_t *result) ++{ ++ if(nybl->addr >= limit) ++ return -EINVAL; ++ ++ if(nybl->nibble == 0) { ++ *result = *nybl->addr & 0xf; ++ nybl->nibble = 1; ++ } else { ++ *result = (*nybl->addr >> 4) & 0xf; ++ nybl->nibble = 0; ++ ++nybl->addr; ++ } ++ return 0; ++} ++ ++static void zbm_rewind_nibble(struct nybl_ptr *nybl) ++{ ++ if(nybl->nibble == 0) { ++ nybl->nibble = 1; ++ --nybl->addr; ++ } else { ++ nybl->nibble = 0; ++ } ++} ++ ++static int zbm_apply_bitmap(struct zbm_state *state, struct zbm_bmap *bitmap) ++{ ++ int i; ++ ++ /* The periods are stored in the first metadata area */ ++ if(bitmap->period_bytecnt) { ++ state->period = 0; ++ for(i = 0; i < bitmap->period_bytecnt; ++i) { ++ if(state->meta_1 >= state->src_end) ++ return -EINVAL; ++ state->period |= *state->meta_1 << i * 8; ++ ++state->meta_1; ++ } ++ } ++ if(state->period == 0) ++ return -EINVAL; ++ ++ for(i = 0; i < 8; ++i) { ++ if(state->written == state->decmp_len) ++ break; ++ if(bitmap->bitmap & 1 << i) { ++ if(state->data >= state->src_end) ++ return -EINVAL; ++ *state->dest = *state->data; ++ ++state->data; ++ } else { ++ if(state->prewritten + state->written < state->period) ++ return -EINVAL; ++ *state->dest = *(state->dest - state->period); ++ } ++ ++state->dest; ++ --state->dest_left; ++ ++state->written; ++ } ++ ++ return 0; ++} ++ ++static int zbm_apply_bitmap_number(struct zbm_state *state, uint8_t bmp_num) ++{ ++ struct zbm_bmap next = {0}; ++ ++ /* Not a valid bitmap number (it signals a repetition) */ ++ if(bmp_num == 0xf) ++ return -EINVAL; ++ ++ /* An actual index in the bitmap array */ ++ if(bmp_num > ZBM_MAX_PERIOD_BYTECNT) ++ return zbm_apply_bitmap(state, &state->bitmaps[bmp_num - ZBM_BITMAP_BASE]); ++ ++ /* For < 2, use the next bitmap in the second metadata area */ ++ if(state->meta_2 >= state->src_end) ++ return -EINVAL; ++ next.bitmap = *state->meta_2; ++ next.period_bytecnt = bmp_num; ++ ++state->meta_2; ++ return zbm_apply_bitmap(state, &next); ++} ++ ++/* Find out how many times we need to repeat the current bitmap operation */ ++static int zbm_read_repetition_count(struct zbm_state *state, uint16_t *repeat) ++{ ++ uint8_t nibble; ++ uint16_t total; ++ int err; ++ ++ /* Don't confuse the trailing bitmaps with a repetition count */ ++ if(state->decmp_len - state->written <= 8) { ++ *repeat = 1; ++ return 0; ++ } ++ ++ err = zbm_read_nibble(&state->meta_3, state->src_end, &nibble); ++ if(err) ++ return err; ++ ++ if(nibble != 0xf) { ++ /* No repetition count: the previous bitmap number gets applied once */ ++ zbm_rewind_nibble(&state->meta_3); ++ *repeat = 1; ++ return 0; ++ } ++ ++ /* ++ * Under this scheme, repeating a bitmap number 3 times wouldn't save any ++ * space, so the repetition count starts from 4. ++ */ ++ total = 4; ++ while(nibble == 0xf) { ++ err = zbm_read_nibble(&state->meta_3, state->src_end, &nibble); ++ if(err) ++ return err; ++ total += nibble; ++ if(total < nibble) ++ return -EINVAL; ++ } ++ ++ *repeat = total; ++ return 0; ++} ++ ++static int zbm_decompress_single_bitmap(struct zbm_state *state) ++{ ++ uint8_t bmp_num; ++ uint16_t repeat; ++ int i; ++ int err; ++ ++ /* The current nibble is the offset of the next bitmap to apply */ ++ err = zbm_read_nibble(&state->meta_3, state->src_end, &bmp_num); ++ if(err) ++ return err; ++ ++ err = zbm_read_repetition_count(state, &repeat); ++ if(err) ++ return err; ++ ++ for(i = 0; i < repeat; ++i) { ++ err = zbm_apply_bitmap_number(state, bmp_num); ++ if(err) ++ return err; ++ } ++ return 0; ++} ++ ++/* Pointer to a bit */ ++struct bit_ptr { ++ uint8_t *addr; /* Address of the byte */ ++ int offset; /* Bit number */ ++}; ++ ++/* This function does not perform boundary checks, the caller must do it */ ++static int zbm_read_single_bit(struct bit_ptr *bit) ++{ ++ int res = *bit->addr >> bit->offset & 1; ++ ++ ++bit->offset; ++ if(bit->offset != 8) ++ return res; ++ bit->offset = 0; ++ ++bit->addr; ++ return res; ++} ++ ++static int zbm_read_single_bitmap(struct bit_ptr *bit, const uint8_t *limit, struct zbm_bmap *result) ++{ ++ int i; ++ ++ result->bitmap = 0; ++ result->period_bytecnt = 0; ++ ++ /* The bitmap itself */ ++ for(i = 0; i < 8; ++i) { ++ if(bit->addr >= limit) ++ return -EINVAL; ++ result->bitmap |= zbm_read_single_bit(bit) << i; ++ } ++ ++ /* ++ * The two trailing bits tell us how many bytes to read for the next ++ * repetition period ++ */ ++ for(i = 0; i < 2; ++i) { ++ if(bit->addr >= limit) ++ return -EINVAL; ++ result->period_bytecnt |= zbm_read_single_bit(bit) << i; ++ } ++ ++ return 0; ++} ++ ++static int zbm_read_bitmaps(struct zbm_state *state) ++{ ++ struct bit_ptr bmap = {0}; ++ int err, i; ++ ++ if(state->len < ZBM_BITMAP_BYTECNT) ++ return -EINVAL; ++ ++ bmap.addr = (uint8_t *)state->src_end - ZBM_BITMAP_BYTECNT; ++ bmap.offset = 0; ++ ++ for(i = 0; i < ZBM_BITMAP_COUNT; ++i) { ++ err = zbm_read_single_bitmap(&bmap, state->src_end, &state->bitmaps[i]); ++ if(err) ++ return err; ++ if(state->bitmaps[i].period_bytecnt > ZBM_MAX_PERIOD_BYTECNT) ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++static int zbm_handle_compressed_chunk(struct zbm_state *state) ++{ ++ const struct zbm_cmp_chunk_hdr *hdr = NULL; ++ uint32_t meta_off_1, meta_off_2, meta_off_3; ++ int err; ++ ++ state->written = 0; ++ state->period = 8; ++ ++ if(state->len < sizeof(*hdr)) ++ return -EINVAL; ++ hdr = (struct zbm_cmp_chunk_hdr *)state->src; ++ state->data = state->src + sizeof(*hdr); ++ ++ meta_off_1 = zbm_u24_to_u32(hdr->meta_off_1); ++ meta_off_2 = zbm_u24_to_u32(hdr->meta_off_2); ++ meta_off_3 = zbm_u24_to_u32(hdr->meta_off_3); ++ if(meta_off_1 >= state->len || meta_off_2 >= state->len || meta_off_3 >= state->len) ++ return -EINVAL; ++ state->meta_1 = state->src + meta_off_1; ++ state->meta_2 = state->src + meta_off_2; ++ state->meta_3.addr = (uint8_t *)state->src + meta_off_3; ++ state->meta_3.nibble = 0; ++ ++ err = zbm_read_bitmaps(state); ++ if(err) ++ return err; ++ ++ while(state->written < state->decmp_len) { ++ err = zbm_decompress_single_bitmap(state); ++ if(err) ++ return err; ++ } ++ ++ return 0; ++} ++ ++static int zbm_handle_chunk(struct zbm_state *state) ++{ ++ const struct zbm_chunk_hdr *decmp_hdr = NULL; ++ ++ if(state->src_left < sizeof(*decmp_hdr)) ++ return -EINVAL; ++ decmp_hdr = (struct zbm_chunk_hdr *)state->src; ++ ++ state->len = zbm_u24_to_u32(decmp_hdr->len); ++ if(state->len > state->src_left) ++ return -EINVAL; ++ state->src_end = state->src + state->len; ++ ++ state->decmp_len = zbm_u24_to_u32(decmp_hdr->decmp_len); ++ if(state->decmp_len > ZBM_MAX_DECMP_CHUNK_SIZE) ++ return -EINVAL; ++ if(!state->dest) /* We just wanted the length, so we are done */ ++ return 0; ++ if(state->decmp_len > state->dest_left) ++ return -ERANGE; ++ ++ if(zbm_chunk_is_uncompressed(state)) ++ return zbm_handle_uncompressed_chunk(state); ++ ++ return zbm_handle_compressed_chunk(state); ++} ++ ++int zbm_decompress(void *dest, size_t dest_size, const void *src, size_t src_size, size_t *out_len) ++{ ++ struct zbm_state state = {0}; ++ int err; ++ ++ state.src = src; ++ state.src_left = src_size; ++ state.dest = dest; ++ state.dest_left = dest_size; ++ state.prewritten = 0; ++ ++ err = zbm_check_magic(&state); ++ if(err) ++ return err; ++ ++ /* The final chunk has zero decompressed length */ ++ do { ++ err = zbm_handle_chunk(&state); ++ if(err) ++ return err; ++ state.src += state.len; ++ state.src_left -= state.len; ++ state.prewritten += state.decmp_len; ++ } while(state.decmp_len != 0); ++ ++ *out_len = state.prewritten; ++ return 0; ++} +diff --git a/fs/apfs/libzbitmap.h b/fs/apfs/libzbitmap.h +new file mode 100644 +index 000000000..5188d00d3 +--- /dev/null ++++ b/fs/apfs/libzbitmap.h +@@ -0,0 +1,31 @@ ++/* SPDX-License-Identifier: GPL-2.0+ OR MIT */ ++/* ++ * Copyright (c) 2022 Corellium LLC ++ * ++ * Author: Ernesto A. Fernández ++ * ++ * Ported from libzbitmap (https://github.com/eafer/libzbitmap). Only the ++ * decompression code is included. ++ */ ++ ++#ifndef _LIBZBITMAP_H ++#define _LIBZBITMAP_H ++ ++#include ++#include ++ ++/** ++ * zbm_decompress - Decompress an LZBITMAP buffer ++ * @dest: destination buffer (may be NULL) ++ * @dest_size: size of the destination buffer ++ * @src: source buffer ++ * @src_size: size of the source buffer ++ * @out_len: on return, the length of the decompressed output ++ * ++ * May be called with a NULL destination buffer to retrieve the expected length ++ * of the decompressed data. Returns 0 on success, or a negative error code in ++ * case of failure. ++ */ ++int zbm_decompress(void *dest, size_t dest_size, const void *src, size_t src_size, size_t *out_len); ++ ++#endif /* _LIBZBITMAP_H */ +diff --git a/fs/apfs/lzfse/lzfse.h b/fs/apfs/lzfse/lzfse.h +new file mode 100644 +index 000000000..75a14c4fd +--- /dev/null ++++ b/fs/apfs/lzfse/lzfse.h +@@ -0,0 +1,136 @@ ++/* ++Copyright (c) 2015-2016, Apple Inc. All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ++ ++1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ++ ++2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer ++ in the documentation and/or other materials provided with the distribution. ++ ++3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++*/ ++ ++#ifndef LZFSE_H ++#define LZFSE_H ++ ++#include ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#if defined(_MSC_VER) && !defined(__clang__) ++# define __attribute__(X) ++# pragma warning(disable : 4068) ++#endif ++ ++#if defined(LZFSE_DLL) ++# if defined(_WIN32) || defined(__CYGWIN__) ++# if defined(LZFSE_DLL_EXPORTS) ++# define LZFSE_API __declspec(dllexport) ++# else ++# define LZFSE_API __declspec(dllimport) ++# endif ++# endif ++#endif ++ ++#if !defined(LZFSE_API) ++# if __GNUC__ >= 4 ++# define LZFSE_API __attribute__((visibility("default"))) ++# else ++# define LZFSE_API ++# endif ++#endif ++ ++/*! @abstract Get the required scratch buffer size to compress using LZFSE. */ ++size_t lzfse_encode_scratch_size(void); ++ ++/*! @abstract Compress a buffer using LZFSE. ++ * ++ * @param dst_buffer ++ * Pointer to the first byte of the destination buffer. ++ * ++ * @param dst_size ++ * Size of the destination buffer in bytes. ++ * ++ * @param src_buffer ++ * Pointer to the first byte of the source buffer. ++ * ++ * @param src_size ++ * Size of the source buffer in bytes. ++ * ++ * @param scratch_buffer ++ * If non-NULL, a pointer to scratch space for the routine to use as workspace; ++ * the routine may use up to lzfse_encode_scratch_size( ) bytes of workspace ++ * during its operation, and will not perform any internal allocations. If ++ * NULL, the routine may allocate its own memory to use during operation via ++ * a single call to malloc( ), and will release it by calling free( ) prior ++ * to returning. For most use, passing NULL is perfectly satisfactory, but if ++ * you require strict control over allocation, you will want to pass an ++ * explicit scratch buffer. ++ * ++ * @return ++ * The number of bytes written to the destination buffer if the input is ++ * successfully compressed. If the input cannot be compressed to fit into ++ * the provided buffer, or an error occurs, zero is returned, and the ++ * contents of dst_buffer are unspecified. */ ++size_t lzfse_encode_buffer(uint8_t *__restrict dst_buffer, ++ size_t dst_size, ++ const uint8_t *__restrict src_buffer, ++ size_t src_size, ++ void *__restrict scratch_buffer); ++ ++/*! @abstract Get the required scratch buffer size to decompress using LZFSE. */ ++size_t lzfse_decode_scratch_size(void); ++ ++/*! @abstract Decompress a buffer using LZFSE. ++ * ++ * @param dst_buffer ++ * Pointer to the first byte of the destination buffer. ++ * ++ * @param dst_size ++ * Size of the destination buffer in bytes. ++ * ++ * @param src_buffer ++ * Pointer to the first byte of the source buffer. ++ * ++ * @param src_size ++ * Size of the source buffer in bytes. ++ * ++ * @param scratch_buffer ++ * If non-NULL, a pointer to scratch space for the routine to use as workspace; ++ * the routine may use up to lzfse_decode_scratch_size( ) bytes of workspace ++ * during its operation, and will not perform any internal allocations. If ++ * NULL, the routine may allocate its own memory to use during operation via ++ * a single call to malloc( ), and will release it by calling free( ) prior ++ * to returning. For most use, passing NULL is perfectly satisfactory, but if ++ * you require strict control over allocation, you will want to pass an ++ * explicit scratch buffer. ++ * ++ * @return ++ * The number of bytes written to the destination buffer if the input is ++ * successfully decompressed. If there is not enough space in the destination ++ * buffer to hold the entire expanded output, only the first dst_size bytes ++ * will be written to the buffer and dst_size is returned. Note that this ++ * behavior differs from that of lzfse_encode_buffer. */ ++size_t lzfse_decode_buffer(uint8_t *__restrict dst_buffer, ++ size_t dst_size, ++ const uint8_t *__restrict src_buffer, ++ size_t src_size, ++ void *__restrict scratch_buffer); ++ ++#ifdef __cplusplus ++} /* extern "C" */ ++#endif ++ ++#endif /* LZFSE_H */ +diff --git a/fs/apfs/lzfse/lzfse_decode.c b/fs/apfs/lzfse/lzfse_decode.c +new file mode 100644 +index 000000000..c3b4f3727 +--- /dev/null ++++ b/fs/apfs/lzfse/lzfse_decode.c +@@ -0,0 +1,74 @@ ++/* ++Copyright (c) 2015-2016, Apple Inc. All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ++ ++1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ++ ++2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer ++ in the documentation and/or other materials provided with the distribution. ++ ++3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++*/ ++ ++// LZFSE decode API ++ ++#include ++#include "lzfse.h" ++#include "lzfse_internal.h" ++ ++size_t lzfse_decode_scratch_size(void) { return sizeof(lzfse_decoder_state); } ++ ++static size_t lzfse_decode_buffer_with_scratch(uint8_t *__restrict dst_buffer, ++ size_t dst_size, const uint8_t *__restrict src_buffer, ++ size_t src_size, void *__restrict scratch_buffer) { ++ int status; ++ lzfse_decoder_state *s = (lzfse_decoder_state *)scratch_buffer; ++ memset(s, 0x00, sizeof(*s)); ++ ++ // Initialize state ++ s->src = src_buffer; ++ s->src_begin = src_buffer; ++ s->src_end = s->src + src_size; ++ s->dst = dst_buffer; ++ s->dst_begin = dst_buffer; ++ s->dst_end = dst_buffer + dst_size; ++ ++ // Decode ++ status = lzfse_decode(s); ++ if (status == LZFSE_STATUS_DST_FULL) ++ return dst_size; ++ if (status != LZFSE_STATUS_OK) ++ return 0; // failed ++ return (size_t)(s->dst - dst_buffer); // bytes written ++} ++ ++size_t lzfse_decode_buffer(uint8_t *__restrict dst_buffer, size_t dst_size, ++ const uint8_t *__restrict src_buffer, ++ size_t src_size, void *__restrict scratch_buffer) { ++ int has_malloc = 0; ++ size_t ret = 0; ++ ++ // Deal with the possible NULL pointer ++ if (scratch_buffer == NULL) { ++ // +1 in case scratch size could be zero ++ scratch_buffer = kmalloc(lzfse_decode_scratch_size() + 1, GFP_KERNEL); ++ has_malloc = 1; ++ } ++ if (scratch_buffer == NULL) ++ return 0; ++ ret = lzfse_decode_buffer_with_scratch(dst_buffer, ++ dst_size, src_buffer, ++ src_size, scratch_buffer); ++ if (has_malloc) ++ kfree(scratch_buffer); ++ return ret; ++} +diff --git a/fs/apfs/lzfse/lzfse_decode_base.c b/fs/apfs/lzfse/lzfse_decode_base.c +new file mode 100644 +index 000000000..3f3bfe348 +--- /dev/null ++++ b/fs/apfs/lzfse/lzfse_decode_base.c +@@ -0,0 +1,652 @@ ++/* ++Copyright (c) 2015-2016, Apple Inc. All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ++ ++1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ++ ++2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer ++ in the documentation and/or other materials provided with the distribution. ++ ++3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++*/ ++ ++#include "lzfse_internal.h" ++#include "lzvn_decode_base.h" ++ ++/*! @abstract Decode an entry value from next bits of stream. ++ * Return \p value, and set \p *nbits to the number of bits to consume ++ * (starting with LSB). */ ++static inline int lzfse_decode_v1_freq_value(uint32_t bits, int *nbits) { ++ static const int8_t lzfse_freq_nbits_table[32] = { ++ 2, 3, 2, 5, 2, 3, 2, 8, 2, 3, 2, 5, 2, 3, 2, 14, ++ 2, 3, 2, 5, 2, 3, 2, 8, 2, 3, 2, 5, 2, 3, 2, 14}; ++ static const int8_t lzfse_freq_value_table[32] = { ++ 0, 2, 1, 4, 0, 3, 1, -1, 0, 2, 1, 5, 0, 3, 1, -1, ++ 0, 2, 1, 6, 0, 3, 1, -1, 0, 2, 1, 7, 0, 3, 1, -1}; ++ ++ uint32_t b = bits & 31; // lower 5 bits ++ int n = lzfse_freq_nbits_table[b]; ++ *nbits = n; ++ ++ // Special cases for > 5 bits encoding ++ if (n == 8) ++ return 8 + ((bits >> 4) & 0xf); ++ if (n == 14) ++ return 24 + ((bits >> 4) & 0x3ff); ++ ++ // <= 5 bits encoding from table ++ return lzfse_freq_value_table[b]; ++} ++ ++/*! @abstract Extracts up to 32 bits from a 64-bit field beginning at ++ * \p offset, and zero-extends them to a \p uint32_t. ++ * ++ * If we number the bits of \p v from 0 (least significant) to 63 (most ++ * significant), the result is bits \p offset to \p offset+nbits-1. */ ++static inline uint32_t get_field(uint64_t v, int offset, int nbits) { ++ if (nbits == 32) ++ return (uint32_t)(v >> offset); ++ return (uint32_t)((v >> offset) & ((1 << nbits) - 1)); ++} ++ ++/*! @abstract Return \c header_size field from a \c lzfse_compressed_block_header_v2. */ ++static inline uint32_t ++lzfse_decode_v2_header_size(const lzfse_compressed_block_header_v2 *in) { ++ return get_field(in->packed_fields[2], 0, 32); ++} ++ ++/*! @abstract Decode all fields from a \c lzfse_compressed_block_header_v2 to a ++ * \c lzfse_compressed_block_header_v1. ++ * @return 0 on success. ++ * @return -1 on failure. */ ++static inline int lzfse_decode_v1(lzfse_compressed_block_header_v1 *out, ++ const lzfse_compressed_block_header_v2 *in) { ++ uint64_t v0; ++ uint64_t v1; ++ uint64_t v2; ++ uint16_t *dst = NULL; ++ const uint8_t *src = NULL; ++ const uint8_t *src_end = NULL; ++ uint32_t accum = 0; ++ int accum_nbits = 0; ++ int nbits = 0; ++ int i; ++ ++ // Clear all fields ++ memset(out, 0x00, sizeof(lzfse_compressed_block_header_v1)); ++ ++ v0 = in->packed_fields[0]; ++ v1 = in->packed_fields[1]; ++ v2 = in->packed_fields[2]; ++ ++ out->magic = LZFSE_COMPRESSEDV1_BLOCK_MAGIC; ++ out->n_raw_bytes = in->n_raw_bytes; ++ ++ // Literal state ++ out->n_literals = get_field(v0, 0, 20); ++ out->n_literal_payload_bytes = get_field(v0, 20, 20); ++ out->literal_bits = (int)get_field(v0, 60, 3) - 7; ++ out->literal_state[0] = get_field(v1, 0, 10); ++ out->literal_state[1] = get_field(v1, 10, 10); ++ out->literal_state[2] = get_field(v1, 20, 10); ++ out->literal_state[3] = get_field(v1, 30, 10); ++ ++ // L,M,D state ++ out->n_matches = get_field(v0, 40, 20); ++ out->n_lmd_payload_bytes = get_field(v1, 40, 20); ++ out->lmd_bits = (int)get_field(v1, 60, 3) - 7; ++ out->l_state = get_field(v2, 32, 10); ++ out->m_state = get_field(v2, 42, 10); ++ out->d_state = get_field(v2, 52, 10); ++ ++ // Total payload size ++ out->n_payload_bytes = ++ out->n_literal_payload_bytes + out->n_lmd_payload_bytes; ++ ++ // Freq tables ++ dst = &(out->l_freq[0]); ++ src = &(in->freq[0]); ++ src_end = ++ (const uint8_t *)in + get_field(v2, 0, 32); // first byte after header ++ accum = 0; ++ accum_nbits = 0; ++ ++ // No freq tables? ++ if (src_end == src) ++ return 0; // OK, freq tables were omitted ++ ++ for (i = 0; i < LZFSE_ENCODE_L_SYMBOLS + LZFSE_ENCODE_M_SYMBOLS + ++ LZFSE_ENCODE_D_SYMBOLS + LZFSE_ENCODE_LITERAL_SYMBOLS; ++ i++) { ++ // Refill accum, one byte at a time, until we reach end of header, or accum ++ // is full ++ while (src < src_end && accum_nbits + 8 <= 32) { ++ accum |= (uint32_t)(*src) << accum_nbits; ++ accum_nbits += 8; ++ src++; ++ } ++ ++ // Decode and store value ++ nbits = 0; ++ dst[i] = lzfse_decode_v1_freq_value(accum, &nbits); ++ ++ if (nbits > accum_nbits) ++ return -1; // failed ++ ++ // Consume nbits bits ++ accum >>= nbits; ++ accum_nbits -= nbits; ++ } ++ ++ if (accum_nbits >= 8 || src != src_end) ++ return -1; // we need to end up exactly at the end of header, with less than ++ // 8 bits in accumulator ++ ++ return 0; ++} ++ ++static inline void copy(uint8_t *dst, const uint8_t *src, size_t length) { ++ const uint8_t *dst_end = dst + length; ++ do { ++ copy8(dst, src); ++ dst += 8; ++ src += 8; ++ } while (dst < dst_end); ++} ++ ++static int lzfse_decode_lmd(lzfse_decoder_state *s) { ++ lzfse_compressed_block_decoder_state *bs = &(s->compressed_lzfse_block_state); ++ fse_state l_state = bs->l_state; ++ fse_state m_state = bs->m_state; ++ fse_state d_state = bs->d_state; ++ fse_in_stream in = bs->lmd_in_stream; ++ const uint8_t *src_start = s->src_begin; ++ const uint8_t *src = s->src + bs->lmd_in_buf; ++ const uint8_t *lit = bs->current_literal; ++ uint8_t *dst = s->dst; ++ uint32_t symbols = bs->n_matches; ++ int32_t L = bs->l_value; ++ int32_t M = bs->m_value; ++ int32_t D = bs->d_value; ++ int32_t new_d; ++ ++ // Number of bytes remaining in the destination buffer, minus 32 to ++ // provide a margin of safety for using overlarge copies on the fast path. ++ // This is a signed quantity, and may go negative when we are close to the ++ // end of the buffer. That's OK; we're careful about how we handle it ++ // in the slow-and-careful match execution path. ++ ptrdiff_t remaining_bytes = s->dst_end - dst - 32; ++ ++ // If L or M is non-zero, that means that we have already started decoding ++ // this block, and that we needed to interrupt decoding to get more space ++ // from the caller. There's a pending L, M, D triplet that we weren't ++ // able to completely process. Jump ahead to finish executing that symbol ++ // before decoding new values. ++ if (L || M) ++ goto ExecuteMatch; ++ ++ while (symbols > 0) { ++ int res; ++ // Decode the next L, M, D symbol from the input stream. ++ res = fse_in_flush(&in, &src, src_start); ++ if (res) { ++ return LZFSE_STATUS_ERROR; ++ } ++ L = fse_value_decode(&l_state, bs->l_decoder, &in); ++ if ((lit + L) >= (bs->literals + LZFSE_LITERALS_PER_BLOCK + 64)) { ++ return LZFSE_STATUS_ERROR; ++ } ++ res = fse_in_flush2(&in, &src, src_start); ++ if (res) { ++ return LZFSE_STATUS_ERROR; ++ } ++ M = fse_value_decode(&m_state, bs->m_decoder, &in); ++ res = fse_in_flush2(&in, &src, src_start); ++ if (res) { ++ return LZFSE_STATUS_ERROR; ++ } ++ new_d = fse_value_decode(&d_state, bs->d_decoder, &in); ++ D = new_d ? new_d : D; ++ symbols--; ++ ++ ExecuteMatch: ++ // Error if D is out of range, so that we avoid passing through ++ // uninitialized data or accesssing memory out of the destination ++ // buffer. ++ if ((uint32_t)D > dst + L - s->dst_begin) ++ return LZFSE_STATUS_ERROR; ++ ++ if (L + M <= remaining_bytes) { ++ size_t i; ++ // If we have plenty of space remaining, we can copy the literal ++ // and match with 16- and 32-byte operations, without worrying ++ // about writing off the end of the buffer. ++ remaining_bytes -= L + M; ++ copy(dst, lit, L); ++ dst += L; ++ lit += L; ++ // For the match, we have two paths; a fast copy by 16-bytes if ++ // the match distance is large enough to allow it, and a more ++ // careful path that applies a permutation to account for the ++ // possible overlap between source and destination if the distance ++ // is small. ++ if (D >= 8 || D >= M) ++ copy(dst, dst - D, M); ++ else ++ for (i = 0; i < M; i++) ++ dst[i] = dst[i - D]; ++ dst += M; ++ } ++ ++ else { ++ // Otherwise, we are very close to the end of the destination ++ // buffer, so we cannot use wide copies that slop off the end ++ // of the region that we are copying to. First, we restore ++ // the true length remaining, rather than the sham value we've ++ // been using so far. ++ remaining_bytes += 32; ++ // Now, we process the literal. Either there's space for it ++ // or there isn't; if there is, we copy the whole thing and ++ // update all the pointers and lengths to reflect the copy. ++ if (L <= remaining_bytes) { ++ size_t i; ++ for (i = 0; i < L; i++) ++ dst[i] = lit[i]; ++ dst += L; ++ lit += L; ++ remaining_bytes -= L; ++ L = 0; ++ } ++ // There isn't enough space to fit the whole literal. Copy as ++ // much of it as we can, update the pointers and the value of ++ // L, and report that the destination buffer is full. Note that ++ // we always write right up to the end of the destination buffer. ++ else { ++ size_t i; ++ for (i = 0; i < remaining_bytes; i++) ++ dst[i] = lit[i]; ++ dst += remaining_bytes; ++ lit += remaining_bytes; ++ L -= remaining_bytes; ++ goto DestinationBufferIsFull; ++ } ++ // The match goes just like the literal does. We copy as much as ++ // we can byte-by-byte, and if we reach the end of the buffer ++ // before finishing, we return to the caller indicating that ++ // the buffer is full. ++ if (M <= remaining_bytes) { ++ size_t i; ++ for (i = 0; i < M; i++) ++ dst[i] = dst[i - D]; ++ dst += M; ++ remaining_bytes -= M; ++ M = 0; ++ (void)M; // no dead store warning ++ // We don't need to update M = 0, because there's no partial ++ // symbol to continue executing. Either we're at the end of ++ // the block, in which case we will never need to resume with ++ // this state, or we're going to decode another L, M, D set, ++ // which will overwrite M anyway. ++ // ++ // But we still set M = 0, to maintain the post-condition. ++ } else { ++ size_t i; ++ for (i = 0; i < remaining_bytes; i++) ++ dst[i] = dst[i - D]; ++ dst += remaining_bytes; ++ M -= remaining_bytes; ++ DestinationBufferIsFull: ++ // Because we want to be able to resume decoding where we've left ++ // off (even in the middle of a literal or match), we need to ++ // update all of the block state fields with the current values ++ // so that we can resume execution from this point once the ++ // caller has given us more space to write into. ++ bs->l_value = L; ++ bs->m_value = M; ++ bs->d_value = D; ++ bs->l_state = l_state; ++ bs->m_state = m_state; ++ bs->d_state = d_state; ++ bs->lmd_in_stream = in; ++ bs->n_matches = symbols; ++ bs->lmd_in_buf = (uint32_t)(src - s->src); ++ bs->current_literal = lit; ++ s->dst = dst; ++ return LZFSE_STATUS_DST_FULL; ++ } ++ // Restore the "sham" decremented value of remaining_bytes and ++ // continue to the next L, M, D triple. We'll just be back in ++ // the careful path again, but this only happens at the very end ++ // of the buffer, so a little minor inefficiency here is a good ++ // tradeoff for simpler code. ++ remaining_bytes -= 32; ++ } ++ } ++ // Because we've finished with the whole block, we don't need to update ++ // any of the blockstate fields; they will not be used again. We just ++ // update the destination pointer in the state object and return. ++ s->dst = dst; ++ return LZFSE_STATUS_OK; ++} ++ ++int lzfse_decode(lzfse_decoder_state *s) { ++ while (1) { ++ // Are we inside a block? ++ switch (s->block_magic) { ++ case LZFSE_NO_BLOCK_MAGIC: { ++ uint32_t magic; ++ // We need at least 4 bytes of magic number to identify next block ++ if (s->src + 4 > s->src_end) ++ return LZFSE_STATUS_SRC_EMPTY; // SRC truncated ++ magic = load4(s->src); ++ ++ if (magic == LZFSE_ENDOFSTREAM_BLOCK_MAGIC) { ++ s->src += 4; ++ s->end_of_stream = 1; ++ return LZFSE_STATUS_OK; // done ++ } ++ ++ if (magic == LZFSE_UNCOMPRESSED_BLOCK_MAGIC) { ++ uncompressed_block_decoder_state *bs = NULL; ++ if (s->src + sizeof(uncompressed_block_header) > s->src_end) ++ return LZFSE_STATUS_SRC_EMPTY; // SRC truncated ++ // Setup state for uncompressed block ++ bs = &(s->uncompressed_block_state); ++ bs->n_raw_bytes = ++ load4(s->src + offsetof(uncompressed_block_header, n_raw_bytes)); ++ s->src += sizeof(uncompressed_block_header); ++ s->block_magic = magic; ++ break; ++ } ++ ++ if (magic == LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC) { ++ lzvn_compressed_block_decoder_state *bs = NULL; ++ if (s->src + sizeof(lzvn_compressed_block_header) > s->src_end) ++ return LZFSE_STATUS_SRC_EMPTY; // SRC truncated ++ // Setup state for compressed LZVN block ++ bs = &(s->compressed_lzvn_block_state); ++ bs->n_raw_bytes = ++ load4(s->src + offsetof(lzvn_compressed_block_header, n_raw_bytes)); ++ bs->n_payload_bytes = load4( ++ s->src + offsetof(lzvn_compressed_block_header, n_payload_bytes)); ++ bs->d_prev = 0; ++ s->src += sizeof(lzvn_compressed_block_header); ++ s->block_magic = magic; ++ break; ++ } ++ ++ if (magic == LZFSE_COMPRESSEDV1_BLOCK_MAGIC || ++ magic == LZFSE_COMPRESSEDV2_BLOCK_MAGIC) { ++ lzfse_compressed_block_header_v1 header1; ++ size_t header_size = 0; ++ lzfse_compressed_block_decoder_state *bs = NULL; ++ ++ // Decode compressed headers ++ if (magic == LZFSE_COMPRESSEDV2_BLOCK_MAGIC) { ++ const lzfse_compressed_block_header_v2 *header2; ++ int decodeStatus; ++ // Check we have the fixed part of the structure ++ if (s->src + offsetof(lzfse_compressed_block_header_v2, freq) > s->src_end) ++ return LZFSE_STATUS_SRC_EMPTY; // SRC truncated ++ ++ // Get size, and check we have the entire structure ++ header2 = (const lzfse_compressed_block_header_v2 *)s->src; // not aligned, OK ++ header_size = lzfse_decode_v2_header_size(header2); ++ if (s->src + header_size > s->src_end) ++ return LZFSE_STATUS_SRC_EMPTY; // SRC truncated ++ decodeStatus = lzfse_decode_v1(&header1, header2); ++ if (decodeStatus != 0) ++ return LZFSE_STATUS_ERROR; // failed ++ } else { ++ if (s->src + sizeof(lzfse_compressed_block_header_v1) > s->src_end) ++ return LZFSE_STATUS_SRC_EMPTY; // SRC truncated ++ memcpy(&header1, s->src, sizeof(lzfse_compressed_block_header_v1)); ++ header_size = sizeof(lzfse_compressed_block_header_v1); ++ } ++ ++ // We require the header + entire encoded block to be present in SRC ++ // during the entire block decoding. ++ // This can be relaxed somehow, if it becomes a limiting factor, at the ++ // price of a more complex state maintenance. ++ // For DST, we can't easily require space for the entire decoded block, ++ // because it may expand to something very very large. ++ if (s->src + header_size + header1.n_literal_payload_bytes + ++ header1.n_lmd_payload_bytes > ++ s->src_end) ++ return LZFSE_STATUS_SRC_EMPTY; // need all encoded block ++ ++ // Sanity checks ++ if (lzfse_check_block_header_v1(&header1) != 0) { ++ return LZFSE_STATUS_ERROR; ++ } ++ ++ // Skip header ++ s->src += header_size; ++ ++ // Setup state for compressed V1 block from header ++ bs = &(s->compressed_lzfse_block_state); ++ bs->n_lmd_payload_bytes = header1.n_lmd_payload_bytes; ++ bs->n_matches = header1.n_matches; ++ fse_init_decoder_table(LZFSE_ENCODE_LITERAL_STATES, ++ LZFSE_ENCODE_LITERAL_SYMBOLS, ++ header1.literal_freq, bs->literal_decoder); ++ fse_init_value_decoder_table( ++ LZFSE_ENCODE_L_STATES, LZFSE_ENCODE_L_SYMBOLS, header1.l_freq, ++ l_extra_bits, l_base_value, bs->l_decoder); ++ fse_init_value_decoder_table( ++ LZFSE_ENCODE_M_STATES, LZFSE_ENCODE_M_SYMBOLS, header1.m_freq, ++ m_extra_bits, m_base_value, bs->m_decoder); ++ fse_init_value_decoder_table( ++ LZFSE_ENCODE_D_STATES, LZFSE_ENCODE_D_SYMBOLS, header1.d_freq, ++ d_extra_bits, d_base_value, bs->d_decoder); ++ ++ // Decode literals ++ { ++ fse_in_stream in; ++ const uint8_t *buf_start = s->src_begin; ++ const uint8_t *buf; ++ fse_state state0; ++ fse_state state1; ++ fse_state state2; ++ fse_state state3; ++ uint32_t i; ++ ++ s->src += header1.n_literal_payload_bytes; // skip literal payload ++ buf = s->src; // read bits backwards from the end ++ if (fse_in_init(&in, header1.literal_bits, &buf, buf_start) != 0) ++ return LZFSE_STATUS_ERROR; ++ ++ state0 = header1.literal_state[0]; ++ state1 = header1.literal_state[1]; ++ state2 = header1.literal_state[2]; ++ state3 = header1.literal_state[3]; ++ ++ for (i = 0; i < header1.n_literals; i += 4) // n_literals is multiple of 4 ++ { ++#if FSE_IOSTREAM_64 ++ if (fse_in_flush(&in, &buf, buf_start) != 0) ++ return LZFSE_STATUS_ERROR; // [57, 64] bits ++ bs->literals[i + 0] = ++ fse_decode(&state0, bs->literal_decoder, &in); // 10b max ++ bs->literals[i + 1] = ++ fse_decode(&state1, bs->literal_decoder, &in); // 10b max ++ bs->literals[i + 2] = ++ fse_decode(&state2, bs->literal_decoder, &in); // 10b max ++ bs->literals[i + 3] = ++ fse_decode(&state3, bs->literal_decoder, &in); // 10b max ++#else ++ if (fse_in_flush(&in, &buf, buf_start) != 0) ++ return LZFSE_STATUS_ERROR; // [25, 23] bits ++ bs->literals[i + 0] = ++ fse_decode(&state0, bs->literal_decoder, &in); // 10b max ++ bs->literals[i + 1] = ++ fse_decode(&state1, bs->literal_decoder, &in); // 10b max ++ if (fse_in_flush(&in, &buf, buf_start) != 0) ++ return LZFSE_STATUS_ERROR; // [25, 23] bits ++ bs->literals[i + 2] = ++ fse_decode(&state2, bs->literal_decoder, &in); // 10b max ++ bs->literals[i + 3] = ++ fse_decode(&state3, bs->literal_decoder, &in); // 10b max ++#endif ++ } ++ ++ bs->current_literal = bs->literals; ++ } // literals ++ ++ // SRC is not incremented to skip the LMD payload, since we need it ++ // during block decode. ++ // We will increment SRC at the end of the block only after this point. ++ ++ // Initialize the L,M,D decode stream, do not start decoding matches ++ // yet, and store decoder state ++ { ++ fse_in_stream in; ++ // read bits backwards from the end ++ const uint8_t *buf = s->src + header1.n_lmd_payload_bytes; ++ if (fse_in_init(&in, header1.lmd_bits, &buf, s->src) != 0) ++ return LZFSE_STATUS_ERROR; ++ ++ bs->l_state = header1.l_state; ++ bs->m_state = header1.m_state; ++ bs->d_state = header1.d_state; ++ bs->lmd_in_buf = (uint32_t)(buf - s->src); ++ bs->l_value = bs->m_value = 0; ++ // Initialize D to an illegal value so we can't erroneously use ++ // an uninitialized "previous" value. ++ bs->d_value = -1; ++ bs->lmd_in_stream = in; ++ } ++ ++ s->block_magic = magic; ++ break; ++ } ++ ++ // Here we have an invalid magic number ++ return LZFSE_STATUS_ERROR; ++ } // LZFSE_NO_BLOCK_MAGIC ++ ++ case LZFSE_UNCOMPRESSED_BLOCK_MAGIC: { ++ uncompressed_block_decoder_state *bs = &(s->uncompressed_block_state); ++ ++ // Compute the size (in bytes) of the data that we will actually copy. ++ // This size is minimum(bs->n_raw_bytes, space in src, space in dst). ++ ++ uint32_t copy_size = bs->n_raw_bytes; // bytes left to copy ++ size_t src_space, dst_space; ++ if (copy_size == 0) { ++ s->block_magic = 0; ++ break; ++ } // end of block ++ ++ if (s->src_end <= s->src) ++ return LZFSE_STATUS_SRC_EMPTY; // need more SRC data ++ src_space = s->src_end - s->src; ++ if (copy_size > src_space) ++ copy_size = (uint32_t)src_space; // limit to SRC data (> 0) ++ ++ if (s->dst_end <= s->dst) ++ return LZFSE_STATUS_DST_FULL; // need more DST capacity ++ dst_space = s->dst_end - s->dst; ++ if (copy_size > dst_space) ++ copy_size = (uint32_t)dst_space; // limit to DST capacity (> 0) ++ ++ // Now that we know that the copy size is bounded to the source and ++ // dest buffers, go ahead and copy the data. ++ // We always have copy_size > 0 here ++ memcpy(s->dst, s->src, copy_size); ++ s->src += copy_size; ++ s->dst += copy_size; ++ bs->n_raw_bytes -= copy_size; ++ ++ break; ++ } // LZFSE_UNCOMPRESSED_BLOCK_MAGIC ++ ++ case LZFSE_COMPRESSEDV1_BLOCK_MAGIC: ++ case LZFSE_COMPRESSEDV2_BLOCK_MAGIC: { ++ int status; ++ lzfse_compressed_block_decoder_state *bs = ++ &(s->compressed_lzfse_block_state); ++ // Require the entire LMD payload to be in SRC ++ if (s->src_end <= s->src || ++ bs->n_lmd_payload_bytes > (size_t)(s->src_end - s->src)) ++ return LZFSE_STATUS_SRC_EMPTY; ++ ++ status = lzfse_decode_lmd(s); ++ if (status != LZFSE_STATUS_OK) ++ return status; ++ ++ s->block_magic = LZFSE_NO_BLOCK_MAGIC; ++ s->src += bs->n_lmd_payload_bytes; // to next block ++ break; ++ } // LZFSE_COMPRESSEDV1_BLOCK_MAGIC || LZFSE_COMPRESSEDV2_BLOCK_MAGIC ++ ++ case LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC: { ++ lzvn_compressed_block_decoder_state *bs = ++ &(s->compressed_lzvn_block_state); ++ lzvn_decoder_state dstate; ++ size_t src_used, dst_used; ++ if (bs->n_payload_bytes > 0 && s->src_end <= s->src) ++ return LZFSE_STATUS_SRC_EMPTY; // need more SRC data ++ ++ // Init LZVN decoder state ++ memset(&dstate, 0x00, sizeof(dstate)); ++ dstate.src = s->src; ++ dstate.src_end = s->src_end; ++ if (dstate.src_end - s->src > bs->n_payload_bytes) ++ dstate.src_end = s->src + bs->n_payload_bytes; // limit to payload bytes ++ dstate.dst_begin = s->dst_begin; ++ dstate.dst = s->dst; ++ dstate.dst_end = s->dst_end; ++ if (dstate.dst_end - s->dst > bs->n_raw_bytes) ++ dstate.dst_end = s->dst + bs->n_raw_bytes; // limit to raw bytes ++ dstate.d_prev = bs->d_prev; ++ dstate.end_of_stream = 0; ++ ++ // Run LZVN decoder ++ lzvn_decode(&dstate); ++ ++ // Update our state ++ src_used = dstate.src - s->src; ++ dst_used = dstate.dst - s->dst; ++ if (src_used > bs->n_payload_bytes || dst_used > bs->n_raw_bytes) ++ return LZFSE_STATUS_ERROR; // sanity check ++ s->src = dstate.src; ++ s->dst = dstate.dst; ++ bs->n_payload_bytes -= (uint32_t)src_used; ++ bs->n_raw_bytes -= (uint32_t)dst_used; ++ bs->d_prev = (uint32_t)dstate.d_prev; ++ ++ // Test end of block ++ if (bs->n_payload_bytes == 0 && bs->n_raw_bytes == 0 && ++ dstate.end_of_stream) { ++ s->block_magic = 0; ++ break; ++ } // block done ++ ++ // Check for invalid state ++ if (bs->n_payload_bytes == 0 || bs->n_raw_bytes == 0 || ++ dstate.end_of_stream) ++ return LZFSE_STATUS_ERROR; ++ ++ // Here, block is not done and state is valid, so we need more space in dst. ++ return LZFSE_STATUS_DST_FULL; ++ } ++ ++ default: ++ return LZFSE_STATUS_ERROR; // invalid magic ++ ++ } // switch magic ++ ++ } // block loop ++ ++ return LZFSE_STATUS_OK; ++} +diff --git a/fs/apfs/lzfse/lzfse_encode.c b/fs/apfs/lzfse/lzfse_encode.c +new file mode 100644 +index 000000000..f0742ec09 +--- /dev/null ++++ b/fs/apfs/lzfse/lzfse_encode.c +@@ -0,0 +1,163 @@ ++/* ++Copyright (c) 2015-2016, Apple Inc. All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ++ ++1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ++ ++2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer ++ in the documentation and/or other materials provided with the distribution. ++ ++3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++*/ ++ ++// LZFSE encode API ++ ++#include "lzfse.h" ++#include "lzfse_internal.h" ++ ++size_t lzfse_encode_scratch_size() { ++ size_t s1 = sizeof(lzfse_encoder_state); ++ size_t s2 = lzvn_encode_scratch_size(); ++ return (s1 > s2) ? s1 : s2; // max(lzfse,lzvn) ++} ++ ++size_t lzfse_encode_buffer_with_scratch(uint8_t *__restrict dst_buffer, ++ size_t dst_size, const uint8_t *__restrict src_buffer, ++ size_t src_size, void *__restrict scratch_buffer) { ++ const size_t original_size = src_size; ++ ++ // If input is really really small, go directly to uncompressed buffer ++ // (because LZVN will refuse to encode it, and we will report a failure) ++ if (src_size < LZVN_ENCODE_MIN_SRC_SIZE) ++ goto try_uncompressed; ++ ++ // If input is too small, try encoding with LZVN ++ if (src_size < LZFSE_ENCODE_LZVN_THRESHOLD) { ++ // need header + end-of-stream marker ++ size_t extra_size = 4 + sizeof(lzvn_compressed_block_header); ++ if (dst_size <= extra_size) ++ goto try_uncompressed; // DST is really too small, give up ++ ++ size_t sz = lzvn_encode_buffer( ++ dst_buffer + sizeof(lzvn_compressed_block_header), ++ dst_size - extra_size, src_buffer, src_size, scratch_buffer); ++ if (sz == 0 || sz >= src_size) ++ goto try_uncompressed; // failed, or no compression, fall back to ++ // uncompressed block ++ ++ // If we could encode, setup header and end-of-stream marker (we left room ++ // for them, no need to test) ++ lzvn_compressed_block_header header; ++ header.magic = LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC; ++ header.n_raw_bytes = (uint32_t)src_size; ++ header.n_payload_bytes = (uint32_t)sz; ++ memcpy(dst_buffer, &header, sizeof(header)); ++ store4(dst_buffer + sizeof(lzvn_compressed_block_header) + sz, ++ LZFSE_ENDOFSTREAM_BLOCK_MAGIC); ++ ++ return sz + extra_size; ++ } ++ ++ // Try encoding with LZFSE ++ { ++ lzfse_encoder_state *state = scratch_buffer; ++ memset(state, 0x00, sizeof *state); ++ if (lzfse_encode_init(state) != LZFSE_STATUS_OK) ++ goto try_uncompressed; ++ state->dst = dst_buffer; ++ state->dst_begin = dst_buffer; ++ state->dst_end = &dst_buffer[dst_size]; ++ state->src = src_buffer; ++ state->src_encode_i = 0; ++ ++ if (src_size >= 0xffffffffU) { ++ // lzfse only uses 32 bits for offsets internally, so if the input ++ // buffer is really huge, we need to process it in smaller chunks. ++ // Note that we switch over to this path for sizes much smaller ++ // 2GB because it's actually faster to change algorithms well before ++ // it's necessary for correctness. ++ // The first chunk, we just process normally. ++ const lzfse_offset encoder_block_size = 262144; ++ state->src_end = encoder_block_size; ++ if (lzfse_encode_base(state) != LZFSE_STATUS_OK) ++ goto try_uncompressed; ++ src_size -= encoder_block_size; ++ while (src_size >= encoder_block_size) { ++ // All subsequent chunks require a translation to keep the offsets ++ // from getting too big. Note that we are always going from ++ // encoder_block_size up to 2*encoder_block_size so that the ++ // offsets remain positive (as opposed to resetting to zero and ++ // having negative offsets). ++ state->src_end = 2 * encoder_block_size; ++ if (lzfse_encode_base(state) != LZFSE_STATUS_OK) ++ goto try_uncompressed; ++ lzfse_encode_translate(state, encoder_block_size); ++ src_size -= encoder_block_size; ++ } ++ // Set the end for the final chunk. ++ state->src_end = encoder_block_size + (lzfse_offset)src_size; ++ } ++ // If the source buffer is small enough to use 32-bit offsets, we simply ++ // encode the whole thing in a single chunk. ++ else ++ state->src_end = (lzfse_offset)src_size; ++ // This is either the trailing chunk (if the source file is huge), or ++ // the whole source file. ++ if (lzfse_encode_base(state) != LZFSE_STATUS_OK) ++ goto try_uncompressed; ++ if (lzfse_encode_finish(state) != LZFSE_STATUS_OK) ++ goto try_uncompressed; ++ // No error occured, return compressed size. ++ return state->dst - dst_buffer; ++ } ++ ++try_uncompressed: ++ // Compression failed for some reason. If we can fit the data into the ++ // output buffer uncompressed, go ahead and do that instead. ++ if (original_size + 12 <= dst_size && original_size < INT32_MAX) { ++ uncompressed_block_header header = {.magic = LZFSE_UNCOMPRESSED_BLOCK_MAGIC, ++ .n_raw_bytes = (uint32_t)src_size}; ++ uint8_t *dst_end = dst_buffer; ++ memcpy(dst_end, &header, sizeof header); ++ dst_end += sizeof header; ++ memcpy(dst_end, src_buffer, original_size); ++ dst_end += original_size; ++ store4(dst_end, LZFSE_ENDOFSTREAM_BLOCK_MAGIC); ++ dst_end += 4; ++ return dst_end - dst_buffer; ++ } ++ ++ // Otherwise, there's nothing we can do, so return zero. ++ return 0; ++} ++ ++size_t lzfse_encode_buffer(uint8_t *__restrict dst_buffer, size_t dst_size, ++ const uint8_t *__restrict src_buffer, ++ size_t src_size, void *__restrict scratch_buffer) { ++ int has_malloc = 0; ++ size_t ret = 0; ++ ++ // Deal with the possible NULL pointer ++ if (scratch_buffer == NULL) { ++ // +1 in case scratch size could be zero ++ scratch_buffer = malloc(lzfse_encode_scratch_size() + 1); ++ has_malloc = 1; ++ } ++ if (scratch_buffer == NULL) ++ return 0; ++ ret = lzfse_encode_buffer_with_scratch(dst_buffer, ++ dst_size, src_buffer, ++ src_size, scratch_buffer); ++ if (has_malloc) ++ free(scratch_buffer); ++ return ret; ++} +diff --git a/fs/apfs/lzfse/lzfse_encode_base.c b/fs/apfs/lzfse/lzfse_encode_base.c +new file mode 100644 +index 000000000..a813fbabc +--- /dev/null ++++ b/fs/apfs/lzfse/lzfse_encode_base.c +@@ -0,0 +1,826 @@ ++/* ++Copyright (c) 2015-2016, Apple Inc. All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ++ ++1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ++ ++2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer ++ in the documentation and/or other materials provided with the distribution. ++ ++3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++*/ ++ ++// LZFSE encoder ++ ++#include "lzfse_internal.h" ++#include "lzfse_encode_tables.h" ++ ++/*! @abstract Get hash in range [0, LZFSE_ENCODE_HASH_VALUES-1] from 4 bytes in X. */ ++static inline uint32_t hashX(uint32_t x) { ++ return (x * 2654435761U) >> ++ (32 - LZFSE_ENCODE_HASH_BITS); // Knuth multiplicative hash ++} ++ ++/*! @abstract Return value with all 0 except nbits<=32 unsigned bits from V ++ * at bit offset OFFSET. ++ * V is assumed to fit on nbits bits. */ ++static inline uint64_t setField(uint32_t v, int offset, int nbits) { ++ return ((uint64_t)v << (uint64_t)offset); ++} ++ ++/*! @abstract Encode all fields, except freq, from a ++ * lzfse_compressed_block_header_v1 to a lzfse_compressed_block_header_v2. ++ * All but the header_size and freq fields of the output are modified. */ ++static inline void ++lzfse_encode_v1_state(lzfse_compressed_block_header_v2 *out, ++ const lzfse_compressed_block_header_v1 *in) { ++ out->magic = LZFSE_COMPRESSEDV2_BLOCK_MAGIC; ++ out->n_raw_bytes = in->n_raw_bytes; ++ ++ // Literal state ++ out->packed_fields[0] = setField(in->n_literals, 0, 20) | ++ setField(in->n_literal_payload_bytes, 20, 20) | ++ setField(in->n_matches, 40, 20) | ++ setField(7 + in->literal_bits, 60, 3); ++ out->packed_fields[1] = setField(in->literal_state[0], 0, 10) | ++ setField(in->literal_state[1], 10, 10) | ++ setField(in->literal_state[2], 20, 10) | ++ setField(in->literal_state[3], 30, 10) | ++ setField(in->n_lmd_payload_bytes, 40, 20) | ++ setField(7 + in->lmd_bits, 60, 3); ++ out->packed_fields[2] = out->packed_fields[2] // header_size already stored in v[2] ++ | setField(in->l_state, 32, 10) | setField(in->m_state, 42, 10) | ++ setField(in->d_state, 52, 10); ++} ++ ++/*! @abstract Encode an entry value in a freq table. Return bits, and sets ++ * *nbits to the number of bits to serialize. */ ++static inline uint32_t lzfse_encode_v1_freq_value(int value, int *nbits) { ++ // Fixed Huffman code, bits are read from LSB. ++ // Note that we rely on the position of the first '0' bit providing the number ++ // of bits. ++ switch (value) { ++ case 0: ++ *nbits = 2; ++ return 0; // 0.0 ++ case 1: ++ *nbits = 2; ++ return 2; // 1.0 ++ case 2: ++ *nbits = 3; ++ return 1; // 0.01 ++ case 3: ++ *nbits = 3; ++ return 5; // 1.01 ++ case 4: ++ *nbits = 5; ++ return 3; // 00.011 ++ case 5: ++ *nbits = 5; ++ return 11; // 01.011 ++ case 6: ++ *nbits = 5; ++ return 19; // 10.011 ++ case 7: ++ *nbits = 5; ++ return 27; // 11.011 ++ default: ++ break; ++ } ++ if (value < 24) { ++ *nbits = 8; // 4+4 ++ return 7 + ((value - 8) << 4); // xxxx.0111 ++ } ++ // 24..1047 ++ *nbits = 14; // 4+10 ++ return ((value - 24) << 4) + 15; // xxxxxxxxxx.1111 ++} ++ ++/*! @abstract Encode all tables from a lzfse_compressed_block_header_v1 ++ * to a lzfse_compressed_block_header_v2. ++ * Only the header_size and freq fields of the output are modified. ++ * @return Size of the lzfse_compressed_block_header_v2 */ ++static inline size_t ++lzfse_encode_v1_freq_table(lzfse_compressed_block_header_v2 *out, ++ const lzfse_compressed_block_header_v1 *in) { ++ uint32_t accum = 0; ++ int accum_nbits = 0; ++ const uint16_t *src = &(in->l_freq[0]); // first value of first table (struct ++ // will not be modified, so this code ++ // will remain valid) ++ uint8_t *dst = &(out->freq[0]); ++ for (int i = 0; i < LZFSE_ENCODE_L_SYMBOLS + LZFSE_ENCODE_M_SYMBOLS + ++ LZFSE_ENCODE_D_SYMBOLS + LZFSE_ENCODE_LITERAL_SYMBOLS; ++ i++) { ++ // Encode one value to accum ++ int nbits = 0; ++ uint32_t bits = lzfse_encode_v1_freq_value(src[i], &nbits); ++ accum |= bits << accum_nbits; ++ accum_nbits += nbits; ++ ++ // Store bytes from accum to output buffer ++ while (accum_nbits >= 8) { ++ *dst = (uint8_t)(accum & 0xff); ++ accum >>= 8; ++ accum_nbits -= 8; ++ dst++; ++ } ++ } ++ // Store final byte if needed ++ if (accum_nbits > 0) { ++ *dst = (uint8_t)(accum & 0xff); ++ dst++; ++ } ++ ++ // Return final size of out ++ uint32_t header_size = (uint32_t)(dst - (uint8_t *)out); ++ out->packed_fields[0] = 0; ++ out->packed_fields[1] = 0; ++ out->packed_fields[2] = setField(header_size, 0, 32); ++ ++ return header_size; ++} ++ ++// We need to limit forward match length to make sure it won't split into a too ++// large number of LMD. ++// The limit itself is quite large, so it doesn't really impact compression ++// ratio. ++// The matches may still be expanded backwards by a few bytes, so the final ++// length may be greater than this limit, which is OK. ++#define LZFSE_ENCODE_MAX_MATCH_LENGTH (100 * LZFSE_ENCODE_MAX_M_VALUE) ++ ++// =============================================================== ++// Encoder back end ++ ++/*! @abstract Encode matches stored in STATE into a compressed/uncompressed block. ++ * @return LZFSE_STATUS_OK on success. ++ * @return LZFSE_STATUS_DST_FULL and restore initial state if output buffer is ++ * full. */ ++static int lzfse_encode_matches(lzfse_encoder_state *s) { ++ if (s->n_literals == 0 && s->n_matches == 0) ++ return LZFSE_STATUS_OK; // nothing to store, OK ++ ++ uint32_t l_occ[LZFSE_ENCODE_L_SYMBOLS]; ++ uint32_t m_occ[LZFSE_ENCODE_M_SYMBOLS]; ++ uint32_t d_occ[LZFSE_ENCODE_D_SYMBOLS]; ++ uint32_t literal_occ[LZFSE_ENCODE_LITERAL_SYMBOLS]; ++ fse_encoder_entry l_encoder[LZFSE_ENCODE_L_SYMBOLS]; ++ fse_encoder_entry m_encoder[LZFSE_ENCODE_M_SYMBOLS]; ++ fse_encoder_entry d_encoder[LZFSE_ENCODE_D_SYMBOLS]; ++ fse_encoder_entry literal_encoder[LZFSE_ENCODE_LITERAL_SYMBOLS]; ++ int ok = 1; ++ lzfse_compressed_block_header_v1 header1 = {0}; ++ lzfse_compressed_block_header_v2 *header2 = 0; ++ ++ // Keep initial state to be able to restore it if DST full ++ uint8_t *dst0 = s->dst; ++ uint32_t n_literals0 = s->n_literals; ++ ++ // Add 0x00 literals until n_literals multiple of 4, since we encode 4 ++ // interleaved literal streams. ++ while (s->n_literals & 3) { ++ uint32_t n = s->n_literals++; ++ s->literals[n] = 0; ++ } ++ ++ // Encode previous distance ++ uint32_t d_prev = 0; ++ for (uint32_t i = 0; i < s->n_matches; i++) { ++ uint32_t d = s->d_values[i]; ++ if (d == d_prev) ++ s->d_values[i] = 0; ++ else ++ d_prev = d; ++ } ++ ++ // Clear occurrence tables ++ memset(l_occ, 0, sizeof(l_occ)); ++ memset(m_occ, 0, sizeof(m_occ)); ++ memset(d_occ, 0, sizeof(d_occ)); ++ memset(literal_occ, 0, sizeof(literal_occ)); ++ ++ // Update occurrence tables in all 4 streams (L,M,D,literals) ++ uint32_t l_sum = 0; ++ uint32_t m_sum = 0; ++ for (uint32_t i = 0; i < s->n_matches; i++) { ++ uint32_t l = s->l_values[i]; ++ l_sum += l; ++ l_occ[l_base_from_value(l)]++; ++ } ++ for (uint32_t i = 0; i < s->n_matches; i++) { ++ uint32_t m = s->m_values[i]; ++ m_sum += m; ++ m_occ[m_base_from_value(m)]++; ++ } ++ for (uint32_t i = 0; i < s->n_matches; i++) ++ d_occ[d_base_from_value(s->d_values[i])]++; ++ for (uint32_t i = 0; i < s->n_literals; i++) ++ literal_occ[s->literals[i]]++; ++ ++ // Make sure we have enough room for a _full_ V2 header ++ if (s->dst + sizeof(lzfse_compressed_block_header_v2) > s->dst_end) { ++ ok = 0; ++ goto END; ++ } ++ header2 = (lzfse_compressed_block_header_v2 *)(s->dst); ++ ++ // Setup header V1 ++ header1.magic = LZFSE_COMPRESSEDV1_BLOCK_MAGIC; ++ header1.n_raw_bytes = m_sum + l_sum; ++ header1.n_matches = s->n_matches; ++ header1.n_literals = s->n_literals; ++ ++ // Normalize occurrence tables to freq tables ++ fse_normalize_freq(LZFSE_ENCODE_L_STATES, LZFSE_ENCODE_L_SYMBOLS, l_occ, ++ header1.l_freq); ++ fse_normalize_freq(LZFSE_ENCODE_M_STATES, LZFSE_ENCODE_M_SYMBOLS, m_occ, ++ header1.m_freq); ++ fse_normalize_freq(LZFSE_ENCODE_D_STATES, LZFSE_ENCODE_D_SYMBOLS, d_occ, ++ header1.d_freq); ++ fse_normalize_freq(LZFSE_ENCODE_LITERAL_STATES, LZFSE_ENCODE_LITERAL_SYMBOLS, ++ literal_occ, header1.literal_freq); ++ ++ // Compress freq tables to V2 header, and get actual size of V2 header ++ s->dst += lzfse_encode_v1_freq_table(header2, &header1); ++ ++ // Initialize encoder tables from freq tables ++ fse_init_encoder_table(LZFSE_ENCODE_L_STATES, LZFSE_ENCODE_L_SYMBOLS, ++ header1.l_freq, l_encoder); ++ fse_init_encoder_table(LZFSE_ENCODE_M_STATES, LZFSE_ENCODE_M_SYMBOLS, ++ header1.m_freq, m_encoder); ++ fse_init_encoder_table(LZFSE_ENCODE_D_STATES, LZFSE_ENCODE_D_SYMBOLS, ++ header1.d_freq, d_encoder); ++ fse_init_encoder_table(LZFSE_ENCODE_LITERAL_STATES, ++ LZFSE_ENCODE_LITERAL_SYMBOLS, header1.literal_freq, ++ literal_encoder); ++ ++ // Encode literals ++ { ++ fse_out_stream out; ++ fse_out_init(&out); ++ fse_state state0, state1, state2, state3; ++ state0 = state1 = state2 = state3 = 0; ++ ++ uint8_t *buf = s->dst; ++ uint32_t i = s->n_literals; // I multiple of 4 ++ // We encode starting from the last literal so we can decode starting from ++ // the first ++ while (i > 0) { ++ if (buf + 16 > s->dst_end) { ++ ok = 0; ++ goto END; ++ } // out full ++ i -= 4; ++ fse_encode(&state3, literal_encoder, &out, s->literals[i + 3]); // 10b ++ fse_encode(&state2, literal_encoder, &out, s->literals[i + 2]); // 10b ++#if !FSE_IOSTREAM_64 ++ fse_out_flush(&out, &buf); ++#endif ++ fse_encode(&state1, literal_encoder, &out, s->literals[i + 1]); // 10b ++ fse_encode(&state0, literal_encoder, &out, s->literals[i + 0]); // 10b ++ fse_out_flush(&out, &buf); ++ } ++ fse_out_finish(&out, &buf); ++ ++ // Update header with final encoder state ++ header1.literal_bits = out.accum_nbits; // [-7, 0] ++ header1.n_literal_payload_bytes = (uint32_t)(buf - s->dst); ++ header1.literal_state[0] = state0; ++ header1.literal_state[1] = state1; ++ header1.literal_state[2] = state2; ++ header1.literal_state[3] = state3; ++ ++ // Update state ++ s->dst = buf; ++ ++ } // literals ++ ++ // Encode L,M,D ++ { ++ fse_out_stream out; ++ fse_out_init(&out); ++ fse_state l_state, m_state, d_state; ++ l_state = m_state = d_state = 0; ++ ++ uint8_t *buf = s->dst; ++ uint32_t i = s->n_matches; ++ ++ // Add 8 padding bytes to the L,M,D payload ++ if (buf + 8 > s->dst_end) { ++ ok = 0; ++ goto END; ++ } // out full ++ store8(buf, 0); ++ buf += 8; ++ ++ // We encode starting from the last match so we can decode starting from the ++ // first ++ while (i > 0) { ++ if (buf + 16 > s->dst_end) { ++ ok = 0; ++ goto END; ++ } // out full ++ i -= 1; ++ ++ // D requires 23b max ++ int32_t d_value = s->d_values[i]; ++ uint8_t d_symbol = d_base_from_value(d_value); ++ int32_t d_nbits = d_extra_bits[d_symbol]; ++ int32_t d_bits = d_value - d_base_value[d_symbol]; ++ fse_out_push(&out, d_nbits, d_bits); ++ fse_encode(&d_state, d_encoder, &out, d_symbol); ++#if !FSE_IOSTREAM_64 ++ fse_out_flush(&out, &buf); ++#endif ++ ++ // M requires 17b max ++ int32_t m_value = s->m_values[i]; ++ uint8_t m_symbol = m_base_from_value(m_value); ++ int32_t m_nbits = m_extra_bits[m_symbol]; ++ int32_t m_bits = m_value - m_base_value[m_symbol]; ++ fse_out_push(&out, m_nbits, m_bits); ++ fse_encode(&m_state, m_encoder, &out, m_symbol); ++#if !FSE_IOSTREAM_64 ++ fse_out_flush(&out, &buf); ++#endif ++ ++ // L requires 14b max ++ int32_t l_value = s->l_values[i]; ++ uint8_t l_symbol = l_base_from_value(l_value); ++ int32_t l_nbits = l_extra_bits[l_symbol]; ++ int32_t l_bits = l_value - l_base_value[l_symbol]; ++ fse_out_push(&out, l_nbits, l_bits); ++ fse_encode(&l_state, l_encoder, &out, l_symbol); ++ fse_out_flush(&out, &buf); ++ } ++ fse_out_finish(&out, &buf); ++ ++ // Update header with final encoder state ++ header1.n_lmd_payload_bytes = (uint32_t)(buf - s->dst); ++ header1.lmd_bits = out.accum_nbits; // [-7, 0] ++ header1.l_state = l_state; ++ header1.m_state = m_state; ++ header1.d_state = d_state; ++ ++ // Update state ++ s->dst = buf; ++ ++ } // L,M,D ++ ++ // Final state update, here we had enough space in DST, and are not going to ++ // revert state ++ s->n_literals = 0; ++ s->n_matches = 0; ++ ++ // Final payload size ++ header1.n_payload_bytes = ++ header1.n_literal_payload_bytes + header1.n_lmd_payload_bytes; ++ ++ // Encode state info in V2 header (we previously encoded the tables, now we ++ // set the other fields) ++ lzfse_encode_v1_state(header2, &header1); ++ ++END: ++ if (!ok) { ++ // Revert state, DST was full ++ ++ // Revert the d_prev encoding ++ uint32_t d_prev = 0; ++ for (uint32_t i = 0; i < s->n_matches; i++) { ++ uint32_t d = s->d_values[i]; ++ if (d == 0) ++ s->d_values[i] = d_prev; ++ else ++ d_prev = d; ++ } ++ ++ // Revert literal count ++ s->n_literals = n_literals0; ++ ++ // Revert DST ++ s->dst = dst0; ++ ++ return LZFSE_STATUS_DST_FULL; // DST full ++ } ++ ++ return LZFSE_STATUS_OK; ++} ++ ++/*! @abstract Push a L,M,D match into the STATE. ++ * @return LZFSE_STATUS_OK if OK. ++ * @return LZFSE_STATUS_DST_FULL if the match can't be pushed, meaning one of ++ * the buffers is full. In that case the state is not modified. */ ++static inline int lzfse_push_lmd(lzfse_encoder_state *s, uint32_t L, ++ uint32_t M, uint32_t D) { ++ // Check if we have enough space to push the match (we add some margin to copy ++ // literals faster here, and round final count later) ++ if (s->n_matches + 1 + 8 > LZFSE_MATCHES_PER_BLOCK) ++ return LZFSE_STATUS_DST_FULL; // state full ++ if (s->n_literals + L + 16 > LZFSE_LITERALS_PER_BLOCK) ++ return LZFSE_STATUS_DST_FULL; // state full ++ ++ // Store match ++ uint32_t n = s->n_matches++; ++ s->l_values[n] = L; ++ s->m_values[n] = M; ++ s->d_values[n] = D; ++ ++ // Store literals ++ uint8_t *dst = s->literals + s->n_literals; ++ const uint8_t *src = s->src + s->src_literal; ++ uint8_t *dst_end = dst + L; ++ if (s->src_literal + L + 16 > s->src_end) { ++ // Careful at the end of SRC, we can't read 16 bytes ++ if (L > 0) ++ memcpy(dst, src, L); ++ } else { ++ copy16(dst, src); ++ dst += 16; ++ src += 16; ++ while (dst < dst_end) { ++ copy16(dst, src); ++ dst += 16; ++ src += 16; ++ } ++ } ++ s->n_literals += L; ++ ++ // Update state ++ s->src_literal += L + M; ++ ++ return LZFSE_STATUS_OK; ++} ++ ++/*! @abstract Split MATCH into one or more L,M,D parts, and push to STATE. ++ * @return LZFSE_STATUS_OK if OK. ++ * @return LZFSE_STATUS_DST_FULL if the match can't be pushed, meaning one of the ++ * buffers is full. In that case the state is not modified. */ ++static int lzfse_push_match(lzfse_encoder_state *s, const lzfse_match *match) { ++ // Save the initial n_matches, n_literals, src_literal ++ uint32_t n_matches0 = s->n_matches; ++ uint32_t n_literals0 = s->n_literals; ++ lzfse_offset src_literals0 = s->src_literal; ++ ++ // L,M,D ++ uint32_t L = (uint32_t)(match->pos - s->src_literal); // literal count ++ uint32_t M = match->length; // match length ++ uint32_t D = (uint32_t)(match->pos - match->ref); // match distance ++ int ok = 1; ++ ++ // Split L if too large ++ while (L > LZFSE_ENCODE_MAX_L_VALUE) { ++ if (lzfse_push_lmd(s, LZFSE_ENCODE_MAX_L_VALUE, 0, 1) != 0) { ++ ok = 0; ++ goto END; ++ } // take D=1 because most frequent, but not actually used ++ L -= LZFSE_ENCODE_MAX_L_VALUE; ++ } ++ ++ // Split if M too large ++ while (M > LZFSE_ENCODE_MAX_M_VALUE) { ++ if (lzfse_push_lmd(s, L, LZFSE_ENCODE_MAX_M_VALUE, D) != 0) { ++ ok = 0; ++ goto END; ++ } ++ L = 0; ++ M -= LZFSE_ENCODE_MAX_M_VALUE; ++ } ++ ++ // L,M in range ++ if (L > 0 || M > 0) { ++ if (lzfse_push_lmd(s, L, M, D) != 0) { ++ ok = 0; ++ goto END; ++ } ++ L = M = 0; ++ (void)L; ++ (void)M; // dead stores ++ } ++ ++END: ++ if (!ok) { ++ // Revert state ++ s->n_matches = n_matches0; ++ s->n_literals = n_literals0; ++ s->src_literal = src_literals0; ++ ++ return LZFSE_STATUS_DST_FULL; // state tables full ++ } ++ ++ return LZFSE_STATUS_OK; // OK ++} ++ ++/*! @abstract Backend: add MATCH to state S. Encode block if necessary, when ++ * state is full. ++ * @return LZFSE_STATUS_OK if OK. ++ * @return LZFSE_STATUS_DST_FULL if the match can't be added, meaning one of the ++ * buffers is full. In that case the state is not modified. */ ++static int lzfse_backend_match(lzfse_encoder_state *s, ++ const lzfse_match *match) { ++ // Try to push the match in state ++ if (lzfse_push_match(s, match) == LZFSE_STATUS_OK) ++ return LZFSE_STATUS_OK; // OK, match added to state ++ ++ // Here state tables are full, try to emit block ++ if (lzfse_encode_matches(s) != LZFSE_STATUS_OK) ++ return LZFSE_STATUS_DST_FULL; // DST full, match not added ++ ++ // Here block has been emitted, re-try to push the match in state ++ return lzfse_push_match(s, match); ++} ++ ++/*! @abstract Backend: add L literals to state S. Encode block if necessary, ++ * when state is full. ++ * @return LZFSE_STATUS_OK if OK. ++ * @return LZFSE_STATUS_DST_FULL if the literals can't be added, meaning one of ++ * the buffers is full. In that case the state is not modified. */ ++static int lzfse_backend_literals(lzfse_encoder_state *s, lzfse_offset L) { ++ // Create a fake match with M=0, D=1 ++ lzfse_match match; ++ lzfse_offset pos = s->src_literal + L; ++ match.pos = pos; ++ match.ref = match.pos - 1; ++ match.length = 0; ++ return lzfse_backend_match(s, &match); ++} ++ ++/*! @abstract Backend: flush final block, and emit end of stream ++ * @return LZFSE_STATUS_OK if OK. ++ * @return LZFSE_STATUS_DST_FULL if either the final block, or the end-of-stream ++ * can't be added, meaning one of the buffers is full. If the block was emitted, ++ * the state is updated to reflect this. Otherwise, it is left unchanged. */ ++static int lzfse_backend_end_of_stream(lzfse_encoder_state *s) { ++ // Final match triggers write, otherwise emit blocks when we have enough ++ // matches stored ++ if (lzfse_encode_matches(s) != LZFSE_STATUS_OK) ++ return LZFSE_STATUS_DST_FULL; // DST full ++ ++ // Emit end-of-stream block ++ if (s->dst + 4 > s->dst_end) ++ return LZFSE_STATUS_DST_FULL; // DST full ++ store4(s->dst, LZFSE_ENDOFSTREAM_BLOCK_MAGIC); ++ s->dst += 4; ++ ++ return LZFSE_STATUS_OK; // OK ++} ++ ++// =============================================================== ++// Encoder state management ++ ++/*! @abstract Initialize state: ++ * @code ++ * - hash table with all invalid pos, and value 0. ++ * - pending match to NO_MATCH. ++ * - src_literal to 0. ++ * - d_prev to 0. ++ @endcode ++ * @return LZFSE_STATUS_OK */ ++int lzfse_encode_init(lzfse_encoder_state *s) { ++ const lzfse_match NO_MATCH = {0}; ++ lzfse_history_set line; ++ for (int i = 0; i < LZFSE_ENCODE_HASH_WIDTH; i++) { ++ line.pos[i] = -4 * LZFSE_ENCODE_MAX_D_VALUE; // invalid pos ++ line.value[i] = 0; ++ } ++ // Fill table ++ for (int i = 0; i < LZFSE_ENCODE_HASH_VALUES; i++) ++ s->history_table[i] = line; ++ s->pending = NO_MATCH; ++ s->src_literal = 0; ++ ++ return LZFSE_STATUS_OK; // OK ++} ++ ++/*! @abstract Translate state \p src forward by \p delta > 0. ++ * Offsets in \p src are updated backwards to point to the same positions. ++ * @return LZFSE_STATUS_OK */ ++int lzfse_encode_translate(lzfse_encoder_state *s, lzfse_offset delta) { ++ if (delta == 0) ++ return LZFSE_STATUS_OK; // OK ++ ++ // SRC ++ s->src += delta; ++ ++ // Offsets in SRC ++ s->src_end -= delta; ++ s->src_encode_i -= delta; ++ s->src_encode_end -= delta; ++ s->src_literal -= delta; ++ ++ // Pending match ++ s->pending.pos -= delta; ++ s->pending.ref -= delta; ++ ++ // history_table positions, translated, and clamped to invalid pos ++ int32_t invalidPos = -4 * LZFSE_ENCODE_MAX_D_VALUE; ++ for (int i = 0; i < LZFSE_ENCODE_HASH_VALUES; i++) { ++ int32_t *p = &(s->history_table[i].pos[0]); ++ for (int j = 0; j < LZFSE_ENCODE_HASH_WIDTH; j++) { ++ lzfse_offset newPos = p[j] - delta; // translate ++ p[j] = (int32_t)((newPos < invalidPos) ? invalidPos : newPos); // clamp ++ } ++ } ++ ++ return LZFSE_STATUS_OK; // OK ++} ++ ++// =============================================================== ++// Encoder front end ++ ++int lzfse_encode_base(lzfse_encoder_state *s) { ++ lzfse_history_set *history_table = s->history_table; ++ lzfse_history_set *hashLine = 0; ++ lzfse_history_set newH; ++ const lzfse_match NO_MATCH = {0}; ++ int ok = 1; ++ ++ memset(&newH, 0x00, sizeof(newH)); ++ ++ // 8 byte padding at end of buffer ++ s->src_encode_end = s->src_end - 8; ++ for (; s->src_encode_i < s->src_encode_end; s->src_encode_i++) { ++ lzfse_offset pos = s->src_encode_i; // pos >= 0 ++ ++ // Load 4 byte value and get hash line ++ uint32_t x = load4(s->src + pos); ++ hashLine = history_table + hashX(x); ++ lzfse_history_set h = *hashLine; ++ ++ // Prepare next hash line (component 0 is the most recent) to prepare new ++ // entries (stored later) ++ { ++ newH.pos[0] = (int32_t)pos; ++ for (int k = 0; k < LZFSE_ENCODE_HASH_WIDTH - 1; k++) ++ newH.pos[k + 1] = h.pos[k]; ++ newH.value[0] = x; ++ for (int k = 0; k < LZFSE_ENCODE_HASH_WIDTH - 1; k++) ++ newH.value[k + 1] = h.value[k]; ++ } ++ ++ // Do not look for a match if we are still covered by a previous match ++ if (pos < s->src_literal) ++ goto END_POS; ++ ++ // Search best incoming match ++ lzfse_match incoming = {.pos = pos, .ref = 0, .length = 0}; ++ ++ // Check for matches. We consider matches of length >= 4 only. ++ for (int k = 0; k < LZFSE_ENCODE_HASH_WIDTH; k++) { ++ uint32_t d = h.value[k] ^ x; ++ if (d) ++ continue; // no 4 byte match ++ int32_t ref = h.pos[k]; ++ if (ref + LZFSE_ENCODE_MAX_D_VALUE < pos) ++ continue; // too far ++ ++ const uint8_t *src_ref = s->src + ref; ++ const uint8_t *src_pos = s->src + pos; ++ uint32_t length = 4; ++ uint32_t maxLength = ++ (uint32_t)(s->src_end - pos - 8); // ensure we don't hit the end of SRC ++ while (length < maxLength) { ++ uint64_t d = load8(src_ref + length) ^ load8(src_pos + length); ++ if (d == 0) { ++ length += 8; ++ continue; ++ } ++ ++ length += ++ (__builtin_ctzll(d) >> 3); // ctzll must be called only with D != 0 ++ break; ++ } ++ if (length > incoming.length) { ++ incoming.length = length; ++ incoming.ref = ref; ++ } // keep if longer ++ } ++ ++ // No incoming match? ++ if (incoming.length == 0) { ++ // We may still want to emit some literals here, to not lag too far behind ++ // the current search point, and avoid ++ // ending up with a literal block not fitting in the state. ++ lzfse_offset n_literals = pos - s->src_literal; ++ // The threshold here should be larger than a couple of MAX_L_VALUE, and ++ // much smaller than LITERALS_PER_BLOCK ++ if (n_literals > 8 * LZFSE_ENCODE_MAX_L_VALUE) { ++ // Here, we need to consume some literals. Emit pending match if there ++ // is one ++ if (s->pending.length > 0) { ++ if (lzfse_backend_match(s, &s->pending) != LZFSE_STATUS_OK) { ++ ok = 0; ++ goto END; ++ } ++ s->pending = NO_MATCH; ++ } else { ++ // No pending match, emit a full LZFSE_ENCODE_MAX_L_VALUE block of ++ // literals ++ if (lzfse_backend_literals(s, LZFSE_ENCODE_MAX_L_VALUE) != ++ LZFSE_STATUS_OK) { ++ ok = 0; ++ goto END; ++ } ++ } ++ } ++ goto END_POS; // no incoming match ++ } ++ ++ // Limit match length (it may still be expanded backwards, but this is ++ // bounded by the limit on literals we tested before) ++ if (incoming.length > LZFSE_ENCODE_MAX_MATCH_LENGTH) { ++ incoming.length = LZFSE_ENCODE_MAX_MATCH_LENGTH; ++ } ++ ++ // Expand backwards (since this is expensive, we do this for the best match ++ // only) ++ while (incoming.pos > s->src_literal && incoming.ref > 0 && ++ s->src[incoming.ref - 1] == s->src[incoming.pos - 1]) { ++ incoming.pos--; ++ incoming.ref--; ++ } ++ incoming.length += pos - incoming.pos; // update length after expansion ++ ++ // Match filtering heuristic (from LZVN). INCOMING is always defined here. ++ ++ // Incoming is 'good', emit incoming ++ if (incoming.length >= LZFSE_ENCODE_GOOD_MATCH) { ++ if (lzfse_backend_match(s, &incoming) != LZFSE_STATUS_OK) { ++ ok = 0; ++ goto END; ++ } ++ s->pending = NO_MATCH; ++ goto END_POS; ++ } ++ ++ // No pending, keep incoming ++ if (s->pending.length == 0) { ++ s->pending = incoming; ++ goto END_POS; ++ } ++ ++ // No overlap, emit pending, keep incoming ++ if (s->pending.pos + s->pending.length <= incoming.pos) { ++ if (lzfse_backend_match(s, &s->pending) != LZFSE_STATUS_OK) { ++ ok = 0; ++ goto END; ++ } ++ s->pending = incoming; ++ goto END_POS; ++ } ++ ++ // Overlap: emit longest ++ if (incoming.length > s->pending.length) { ++ if (lzfse_backend_match(s, &incoming) != LZFSE_STATUS_OK) { ++ ok = 0; ++ goto END; ++ } ++ } else { ++ if (lzfse_backend_match(s, &s->pending) != LZFSE_STATUS_OK) { ++ ok = 0; ++ goto END; ++ } ++ } ++ s->pending = NO_MATCH; ++ ++ END_POS: ++ // We are done with this src_encode_i. ++ // Update state now (s->pending has already been updated). ++ *hashLine = newH; ++ } ++ ++END: ++ return ok ? LZFSE_STATUS_OK : LZFSE_STATUS_DST_FULL; ++} ++ ++int lzfse_encode_finish(lzfse_encoder_state *s) { ++ const lzfse_match NO_MATCH = {0}; ++ ++ // Emit pending match ++ if (s->pending.length > 0) { ++ if (lzfse_backend_match(s, &s->pending) != LZFSE_STATUS_OK) ++ return LZFSE_STATUS_DST_FULL; ++ s->pending = NO_MATCH; ++ } ++ ++ // Emit final literals if any ++ lzfse_offset L = s->src_end - s->src_literal; ++ if (L > 0) { ++ if (lzfse_backend_literals(s, L) != LZFSE_STATUS_OK) ++ return LZFSE_STATUS_DST_FULL; ++ } ++ ++ // Emit all matches, and end-of-stream block ++ if (lzfse_backend_end_of_stream(s) != LZFSE_STATUS_OK) ++ return LZFSE_STATUS_DST_FULL; ++ ++ return LZFSE_STATUS_OK; ++} +diff --git a/fs/apfs/lzfse/lzfse_encode_tables.h b/fs/apfs/lzfse/lzfse_encode_tables.h +new file mode 100644 +index 000000000..81c9c7069 +--- /dev/null ++++ b/fs/apfs/lzfse/lzfse_encode_tables.h +@@ -0,0 +1,218 @@ ++/* ++Copyright (c) 2015-2016, Apple Inc. All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ++ ++1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ++ ++2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer ++ in the documentation and/or other materials provided with the distribution. ++ ++3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++*/ ++ ++#ifndef LZFSE_ENCODE_TABLES_H ++#define LZFSE_ENCODE_TABLES_H ++ ++#if defined(_MSC_VER) && !defined(__clang__) ++# define inline __inline ++#endif ++ ++static inline uint8_t l_base_from_value(int32_t value) { ++ static const uint8_t sym[LZFSE_ENCODE_MAX_L_VALUE + 1] = { ++ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, ++ 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, ++ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, ++ 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19}; ++ return sym[value]; ++} ++static inline uint8_t m_base_from_value(int32_t value) { ++ static const uint8_t sym[LZFSE_ENCODE_MAX_M_VALUE + 1] = { ++ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, ++ 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, ++ 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, ++ 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, ++ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, ++ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, ++ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, ++ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, ++ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, ++ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, ++ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, ++ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, ++ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, ++ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, ++ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, ++ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, ++ 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, ++ 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, ++ 19, 19}; ++ return sym[value]; ++} ++ ++static inline uint8_t d_base_from_value(int32_t value) { ++ static const uint8_t sym[64 * 4] = { ++ 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 8, 8, 9, 9, ++ 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, ++ 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, ++ 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 17, 18, 19, 20, 20, 21, 21, ++ 22, 22, 23, 23, 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, ++ 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, ++ 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, ++ 32, 32, 32, 33, 34, 35, 36, 36, 37, 37, 38, 38, 39, 39, 40, 40, 40, 40, ++ 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43, 44, 44, 44, 44, 44, 44, ++ 44, 44, 45, 45, 45, 45, 45, 45, 45, 45, 46, 46, 46, 46, 46, 46, 46, 46, ++ 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 48, 49, 50, 51, 52, 52, ++ 53, 53, 54, 54, 55, 55, 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, ++ 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 60, 60, 61, 61, 61, 61, 61, 61, ++ 61, 61, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, ++ 0, 0, 0, 0}; ++ int index = 0; ++ int in_range_k; ++ in_range_k = (value >= 0 && value < 60); ++ index |= (((value - 0) >> 0) + 0) & -in_range_k; ++ in_range_k = (value >= 60 && value < 1020); ++ index |= (((value - 60) >> 4) + 64) & -in_range_k; ++ in_range_k = (value >= 1020 && value < 16380); ++ index |= (((value - 1020) >> 8) + 128) & -in_range_k; ++ in_range_k = (value >= 16380 && value < 262140); ++ index |= (((value - 16380) >> 12) + 192) & -in_range_k; ++ return sym[index & 255]; ++} ++ ++#endif // LZFSE_ENCODE_TABLES_H +diff --git a/fs/apfs/lzfse/lzfse_fse.c b/fs/apfs/lzfse/lzfse_fse.c +new file mode 100644 +index 000000000..2bf37a621 +--- /dev/null ++++ b/fs/apfs/lzfse/lzfse_fse.c +@@ -0,0 +1,217 @@ ++/* ++Copyright (c) 2015-2016, Apple Inc. All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ++ ++1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ++ ++2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer ++ in the documentation and/or other materials provided with the distribution. ++ ++3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++*/ ++ ++#include "lzfse_internal.h" ++ ++// Initialize encoder table T[NSYMBOLS]. ++// NSTATES = sum FREQ[i] is the number of states (a power of 2) ++// NSYMBOLS is the number of symbols. ++// FREQ[NSYMBOLS] is a normalized histogram of symbol frequencies, with FREQ[i] ++// >= 0. ++// Some symbols may have a 0 frequency. In that case, they should not be ++// present in the data. ++void fse_init_encoder_table(int nstates, int nsymbols, ++ const uint16_t *__restrict freq, ++ fse_encoder_entry *__restrict t) { ++ int offset = 0; // current offset ++ int n_clz = __builtin_clz(nstates); ++ int i; ++ for (i = 0; i < nsymbols; i++) { ++ int f = (int)freq[i]; ++ int k; ++ if (f == 0) ++ continue; // skip this symbol, no occurrences ++ k = __builtin_clz(f) - n_clz; // shift needed to ensure N <= (F<> k)); ++ t[i].delta1 = (int16_t)(offset - f + (nstates >> (k - 1))); ++ offset += f; ++ } ++} ++ ++// Initialize decoder table T[NSTATES]. ++// NSTATES = sum FREQ[i] is the number of states (a power of 2) ++// NSYMBOLS is the number of symbols. ++// FREQ[NSYMBOLS] is a normalized histogram of symbol frequencies, with FREQ[i] ++// >= 0. ++// Some symbols may have a 0 frequency. In that case, they should not be ++// present in the data. ++int fse_init_decoder_table(int nstates, int nsymbols, ++ const uint16_t *__restrict freq, ++ int32_t *__restrict t) { ++ int n_clz = __builtin_clz(nstates); ++ int sum_of_freq = 0; ++ int i, j0, j; ++ for (i = 0; i < nsymbols; i++) { ++ int f = (int)freq[i]; ++ int k; ++ if (f == 0) ++ continue; // skip this symbol, no occurrences ++ ++ sum_of_freq += f; ++ ++ if (sum_of_freq > nstates) { ++ return -1; ++ } ++ ++ k = __builtin_clz(f) - n_clz; // shift needed to ensure N <= (F<> k) - f; ++ ++ // Initialize all states S reached by this symbol: OFFSET <= S < OFFSET + F ++ for (j = 0; j < f; j++) { ++ fse_decoder_entry e; ++ ++ e.symbol = (uint8_t)i; ++ if (j < j0) { ++ e.k = (int8_t)k; ++ e.delta = (int16_t)(((f + j) << k) - nstates); ++ } else { ++ e.k = (int8_t)(k - 1); ++ e.delta = (int16_t)((j - j0) << (k - 1)); ++ } ++ ++ memcpy(t, &e, sizeof(e)); ++ t++; ++ } ++ } ++ ++ return 0; // OK ++} ++ ++// Initialize value decoder table T[NSTATES]. ++// NSTATES = sum FREQ[i] is the number of states (a power of 2) ++// NSYMBOLS is the number of symbols. ++// FREQ[NSYMBOLS] is a normalized histogram of symbol frequencies, with FREQ[i] ++// >= 0. ++// SYMBOL_VBITS[NSYMBOLS] and SYMBOLS_VBASE[NSYMBOLS] are the number of value ++// bits to read and the base value for each symbol. ++// Some symbols may have a 0 frequency. In that case, they should not be ++// present in the data. ++void fse_init_value_decoder_table(int nstates, int nsymbols, ++ const uint16_t *__restrict freq, ++ const uint8_t *__restrict symbol_vbits, ++ const int32_t *__restrict symbol_vbase, ++ fse_value_decoder_entry *__restrict t) { ++ int n_clz = __builtin_clz(nstates); ++ int i; ++ for (i = 0; i < nsymbols; i++) { ++ fse_value_decoder_entry ei = {0}; ++ int f = (int)freq[i]; ++ int k, j0, j; ++ if (f == 0) ++ continue; // skip this symbol, no occurrences ++ ++ k = __builtin_clz(f) - n_clz; // shift needed to ensure N <= (F<> k) - f; ++ ++ ei.value_bits = symbol_vbits[i]; ++ ei.vbase = symbol_vbase[i]; ++ ++ // Initialize all states S reached by this symbol: OFFSET <= S < OFFSET + F ++ for (j = 0; j < f; j++) { ++ fse_value_decoder_entry e = ei; ++ ++ if (j < j0) { ++ e.total_bits = (uint8_t)k + e.value_bits; ++ e.delta = (int16_t)(((f + j) << k) - nstates); ++ } else { ++ e.total_bits = (uint8_t)(k - 1) + e.value_bits; ++ e.delta = (int16_t)((j - j0) << (k - 1)); ++ } ++ ++ memcpy(t, &e, 8); ++ t++; ++ } ++ } ++} ++ ++// Remove states from symbols until the correct number of states is used. ++static void fse_adjust_freqs(uint16_t *freq, int overrun, int nsymbols) { ++ int shift; ++ for (shift = 3; overrun != 0; shift--) { ++ int sym; ++ for (sym = 0; sym < nsymbols; sym++) { ++ if (freq[sym] > 1) { ++ int n = (freq[sym] - 1) >> shift; ++ if (n > overrun) ++ n = overrun; ++ freq[sym] -= n; ++ overrun -= n; ++ if (overrun == 0) ++ break; ++ } ++ } ++ } ++} ++ ++// Normalize a table T[NSYMBOLS] of occurrences to FREQ[NSYMBOLS]. ++void fse_normalize_freq(int nstates, int nsymbols, const uint32_t *__restrict t, ++ uint16_t *__restrict freq) { ++ uint32_t s_count = 0; ++ int remaining = nstates; // must be signed; this may become < 0 ++ int max_freq = 0; ++ int max_freq_sym = 0; ++ int shift = __builtin_clz(nstates) - 1; ++ uint32_t highprec_step; ++ int i; ++ ++ // Compute the total number of symbol occurrences ++ for (i = 0; i < nsymbols; i++) ++ s_count += t[i]; ++ ++ if (s_count == 0) ++ highprec_step = 0; // no symbols used ++ else ++ highprec_step = ((uint32_t)1 << 31) / s_count; ++ ++ for (i = 0; i < nsymbols; i++) { ++ ++ // Rescale the occurrence count to get the normalized frequency. ++ // Round up if the fractional part is >= 0.5; otherwise round down. ++ // For efficiency, we do this calculation using integer arithmetic. ++ int f = (((t[i] * highprec_step) >> shift) + 1) >> 1; ++ ++ // If a symbol was used, it must be given a nonzero normalized frequency. ++ if (f == 0 && t[i] != 0) ++ f = 1; ++ ++ freq[i] = f; ++ remaining -= f; ++ ++ // Remember the maximum frequency and which symbol had it. ++ if (f > max_freq) { ++ max_freq = f; ++ max_freq_sym = i; ++ } ++ } ++ ++ // If there remain states to be assigned, then just assign them to the most ++ // frequent symbol. Alternatively, if we assigned more states than were ++ // actually available, then either remove states from the most frequent symbol ++ // (for minor overruns) or use the slower adjustment algorithm (for major ++ // overruns). ++ if (-remaining < (max_freq >> 2)) { ++ freq[max_freq_sym] += remaining; ++ } else { ++ fse_adjust_freqs(freq, -remaining, nsymbols); ++ } ++} +diff --git a/fs/apfs/lzfse/lzfse_fse.h b/fs/apfs/lzfse/lzfse_fse.h +new file mode 100644 +index 000000000..58dd724b9 +--- /dev/null ++++ b/fs/apfs/lzfse/lzfse_fse.h +@@ -0,0 +1,606 @@ ++/* ++Copyright (c) 2015-2016, Apple Inc. All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ++ ++1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ++ ++2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer ++ in the documentation and/or other materials provided with the distribution. ++ ++3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++*/ ++ ++// Finite state entropy coding (FSE) ++// This is an implementation of the tANS algorithm described by Jarek Duda, ++// we use the more descriptive name "Finite State Entropy". ++ ++#pragma once ++ ++#include ++#include ++ ++// Select between 32/64-bit I/O streams for FSE. Note that the FSE stream ++// size need not match the word size of the machine, but in practice you ++// want to use 64b streams on 64b systems for better performance. ++#if defined(_M_AMD64) || defined(__x86_64__) || defined(__arm64__) ++#define FSE_IOSTREAM_64 1 ++#else ++#define FSE_IOSTREAM_64 0 ++#endif ++ ++#if defined(_MSC_VER) && !defined(__clang__) ++# define FSE_INLINE __forceinline ++# define inline __inline ++# pragma warning(disable : 4068) // warning C4068: unknown pragma ++#else ++# define FSE_INLINE static inline __attribute__((__always_inline__)) ++#endif ++ ++// MARK: - Bit utils ++ ++/*! @abstract Signed type used to represent bit count. */ ++typedef int32_t fse_bit_count; ++ ++/*! @abstract Unsigned type used to represent FSE state. */ ++typedef uint16_t fse_state; ++ ++// Mask the NBITS lsb of X. 0 <= NBITS < 64 ++static inline uint64_t fse_mask_lsb64(uint64_t x, fse_bit_count nbits) { ++ static const uint64_t mtable[65] = { ++ 0x0000000000000000LLU, 0x0000000000000001LLU, 0x0000000000000003LLU, ++ 0x0000000000000007LLU, 0x000000000000000fLLU, 0x000000000000001fLLU, ++ 0x000000000000003fLLU, 0x000000000000007fLLU, 0x00000000000000ffLLU, ++ 0x00000000000001ffLLU, 0x00000000000003ffLLU, 0x00000000000007ffLLU, ++ 0x0000000000000fffLLU, 0x0000000000001fffLLU, 0x0000000000003fffLLU, ++ 0x0000000000007fffLLU, 0x000000000000ffffLLU, 0x000000000001ffffLLU, ++ 0x000000000003ffffLLU, 0x000000000007ffffLLU, 0x00000000000fffffLLU, ++ 0x00000000001fffffLLU, 0x00000000003fffffLLU, 0x00000000007fffffLLU, ++ 0x0000000000ffffffLLU, 0x0000000001ffffffLLU, 0x0000000003ffffffLLU, ++ 0x0000000007ffffffLLU, 0x000000000fffffffLLU, 0x000000001fffffffLLU, ++ 0x000000003fffffffLLU, 0x000000007fffffffLLU, 0x00000000ffffffffLLU, ++ 0x00000001ffffffffLLU, 0x00000003ffffffffLLU, 0x00000007ffffffffLLU, ++ 0x0000000fffffffffLLU, 0x0000001fffffffffLLU, 0x0000003fffffffffLLU, ++ 0x0000007fffffffffLLU, 0x000000ffffffffffLLU, 0x000001ffffffffffLLU, ++ 0x000003ffffffffffLLU, 0x000007ffffffffffLLU, 0x00000fffffffffffLLU, ++ 0x00001fffffffffffLLU, 0x00003fffffffffffLLU, 0x00007fffffffffffLLU, ++ 0x0000ffffffffffffLLU, 0x0001ffffffffffffLLU, 0x0003ffffffffffffLLU, ++ 0x0007ffffffffffffLLU, 0x000fffffffffffffLLU, 0x001fffffffffffffLLU, ++ 0x003fffffffffffffLLU, 0x007fffffffffffffLLU, 0x00ffffffffffffffLLU, ++ 0x01ffffffffffffffLLU, 0x03ffffffffffffffLLU, 0x07ffffffffffffffLLU, ++ 0x0fffffffffffffffLLU, 0x1fffffffffffffffLLU, 0x3fffffffffffffffLLU, ++ 0x7fffffffffffffffLLU, 0xffffffffffffffffLLU, ++ }; ++ return x & mtable[nbits]; ++} ++ ++// Mask the NBITS lsb of X. 0 <= NBITS < 32 ++static inline uint32_t fse_mask_lsb32(uint32_t x, fse_bit_count nbits) { ++ static const uint32_t mtable[33] = { ++ 0x0000000000000000U, 0x0000000000000001U, 0x0000000000000003U, ++ 0x0000000000000007U, 0x000000000000000fU, 0x000000000000001fU, ++ 0x000000000000003fU, 0x000000000000007fU, 0x00000000000000ffU, ++ 0x00000000000001ffU, 0x00000000000003ffU, 0x00000000000007ffU, ++ 0x0000000000000fffU, 0x0000000000001fffU, 0x0000000000003fffU, ++ 0x0000000000007fffU, 0x000000000000ffffU, 0x000000000001ffffU, ++ 0x000000000003ffffU, 0x000000000007ffffU, 0x00000000000fffffU, ++ 0x00000000001fffffU, 0x00000000003fffffU, 0x00000000007fffffU, ++ 0x0000000000ffffffU, 0x0000000001ffffffU, 0x0000000003ffffffU, ++ 0x0000000007ffffffU, 0x000000000fffffffU, 0x000000001fffffffU, ++ 0x000000003fffffffU, 0x000000007fffffffU, 0x00000000ffffffffU, ++ }; ++ return x & mtable[nbits]; ++} ++ ++/*! @abstract Select \c nbits at index \c start from \c x. ++ * 0 <= start <= start+nbits <= 64 */ ++FSE_INLINE uint64_t fse_extract_bits64(uint64_t x, fse_bit_count start, ++ fse_bit_count nbits) { ++#if defined(__GNUC__) ++ // If START and NBITS are constants, map to bit-field extraction instructions ++ if (__builtin_constant_p(start) && __builtin_constant_p(nbits)) ++ return (x >> start) & ((1LLU << nbits) - 1LLU); ++#endif ++ ++ // Otherwise, shift and mask ++ return fse_mask_lsb64(x >> start, nbits); ++} ++ ++/*! @abstract Select \c nbits at index \c start from \c x. ++ * 0 <= start <= start+nbits <= 32 */ ++FSE_INLINE uint32_t fse_extract_bits32(uint32_t x, fse_bit_count start, ++ fse_bit_count nbits) { ++#if defined(__GNUC__) ++ // If START and NBITS are constants, map to bit-field extraction instructions ++ if (__builtin_constant_p(start) && __builtin_constant_p(nbits)) ++ return (x >> start) & ((1U << nbits) - 1U); ++#endif ++ ++ // Otherwise, shift and mask ++ return fse_mask_lsb32(x >> start, nbits); ++} ++ ++// MARK: - Bit stream ++ ++// I/O streams ++// The streams can be shared between several FSE encoders/decoders, which is why ++// they are not in the state struct ++ ++/*! @abstract Output stream, 64-bit accum. */ ++typedef struct { ++ uint64_t accum; // Output bits ++ fse_bit_count accum_nbits; // Number of valid bits in ACCUM, other bits are 0 ++} fse_out_stream64; ++ ++/*! @abstract Output stream, 32-bit accum. */ ++typedef struct { ++ uint32_t accum; // Output bits ++ fse_bit_count accum_nbits; // Number of valid bits in ACCUM, other bits are 0 ++} fse_out_stream32; ++ ++/*! @abstract Object representing an input stream. */ ++typedef struct { ++ uint64_t accum; // Input bits ++ fse_bit_count accum_nbits; // Number of valid bits in ACCUM, other bits are 0 ++} fse_in_stream64; ++ ++/*! @abstract Object representing an input stream. */ ++typedef struct { ++ uint32_t accum; // Input bits ++ fse_bit_count accum_nbits; // Number of valid bits in ACCUM, other bits are 0 ++} fse_in_stream32; ++ ++/*! @abstract Initialize an output stream object. */ ++FSE_INLINE void fse_out_init64(fse_out_stream64 *s) { ++ s->accum = 0; ++ s->accum_nbits = 0; ++} ++ ++/*! @abstract Initialize an output stream object. */ ++FSE_INLINE void fse_out_init32(fse_out_stream32 *s) { ++ s->accum = 0; ++ s->accum_nbits = 0; ++} ++ ++/*! @abstract Write full bytes from the accumulator to output buffer, ensuring ++ * accum_nbits is in [0, 7]. ++ * We assume we can write 8 bytes to the output buffer \c (*pbuf[0..7]) in all ++ * cases. ++ * @note *pbuf is incremented by the number of written bytes. */ ++FSE_INLINE void fse_out_flush64(fse_out_stream64 *s, uint8_t **pbuf) { ++ fse_bit_count nbits = ++ s->accum_nbits & -8; // number of bits written, multiple of 8 ++ ++ // Write 8 bytes of current accumulator ++ memcpy(*pbuf, &(s->accum), 8); ++ *pbuf += (nbits >> 3); // bytes ++ ++ // Update state ++ s->accum >>= nbits; // remove nbits ++ s->accum_nbits -= nbits; ++} ++ ++/*! @abstract Write full bytes from the accumulator to output buffer, ensuring ++ * accum_nbits is in [0, 7]. ++ * We assume we can write 4 bytes to the output buffer \c (*pbuf[0..3]) in all ++ * cases. ++ * @note *pbuf is incremented by the number of written bytes. */ ++FSE_INLINE void fse_out_flush32(fse_out_stream32 *s, uint8_t **pbuf) { ++ fse_bit_count nbits = ++ s->accum_nbits & -8; // number of bits written, multiple of 8 ++ ++ // Write 4 bytes of current accumulator ++ memcpy(*pbuf, &(s->accum), 4); ++ *pbuf += (nbits >> 3); // bytes ++ ++ // Update state ++ s->accum >>= nbits; // remove nbits ++ s->accum_nbits -= nbits; ++} ++ ++/*! @abstract Write the last bytes from the accumulator to output buffer, ++ * ensuring accum_nbits is in [-7, 0]. Bits are padded with 0 if needed. ++ * We assume we can write 8 bytes to the output buffer \c (*pbuf[0..7]) in all ++ * cases. ++ * @note *pbuf is incremented by the number of written bytes. */ ++FSE_INLINE void fse_out_finish64(fse_out_stream64 *s, uint8_t **pbuf) { ++ fse_bit_count nbits = ++ (s->accum_nbits + 7) & -8; // number of bits written, multiple of 8 ++ ++ // Write 8 bytes of current accumulator ++ memcpy(*pbuf, &(s->accum), 8); ++ *pbuf += (nbits >> 3); // bytes ++ ++ // Update state ++ s->accum = 0; // remove nbits ++ s->accum_nbits -= nbits; ++} ++ ++/*! @abstract Write the last bytes from the accumulator to output buffer, ++ * ensuring accum_nbits is in [-7, 0]. Bits are padded with 0 if needed. ++ * We assume we can write 4 bytes to the output buffer \c (*pbuf[0..3]) in all ++ * cases. ++ * @note *pbuf is incremented by the number of written bytes. */ ++FSE_INLINE void fse_out_finish32(fse_out_stream32 *s, uint8_t **pbuf) { ++ fse_bit_count nbits = ++ (s->accum_nbits + 7) & -8; // number of bits written, multiple of 8 ++ ++ // Write 8 bytes of current accumulator ++ memcpy(*pbuf, &(s->accum), 4); ++ *pbuf += (nbits >> 3); // bytes ++ ++ // Update state ++ s->accum = 0; // remove nbits ++ s->accum_nbits -= nbits; ++} ++ ++/*! @abstract Accumulate \c n bits \c b to output stream \c s. We \b must have: ++ * 0 <= b < 2^n, and N + s->accum_nbits <= 64. ++ * @note The caller must ensure out_flush is called \b before the accumulator ++ * overflows to more than 64 bits. */ ++FSE_INLINE void fse_out_push64(fse_out_stream64 *s, fse_bit_count n, ++ uint64_t b) { ++ s->accum |= b << s->accum_nbits; ++ s->accum_nbits += n; ++} ++ ++/*! @abstract Accumulate \c n bits \c b to output stream \c s. We \b must have: ++ * 0 <= n < 2^n, and n + s->accum_nbits <= 32. ++ * @note The caller must ensure out_flush is called \b before the accumulator ++ * overflows to more than 32 bits. */ ++FSE_INLINE void fse_out_push32(fse_out_stream32 *s, fse_bit_count n, ++ uint32_t b) { ++ s->accum |= b << s->accum_nbits; ++ s->accum_nbits += n; ++} ++ ++#define DEBUG_CHECK_INPUT_STREAM_PARAMETERS ++ ++/*! @abstract Initialize the fse input stream so that accum holds between 56 ++ * and 63 bits. We never want to have 64 bits in the stream, because that allows ++ * us to avoid a special case in the fse_in_pull function (eliminating an ++ * unpredictable branch), while not requiring any additional fse_flush ++ * operations. This is why we have the special case for n == 0 (in which case ++ * we want to load only 7 bytes instead of 8). */ ++FSE_INLINE int fse_in_checked_init64(fse_in_stream64 *s, fse_bit_count n, ++ const uint8_t **pbuf, ++ const uint8_t *buf_start) { ++ if (n) { ++ if (*pbuf < buf_start + 8) ++ return -1; // out of range ++ *pbuf -= 8; ++ memcpy(&(s->accum), *pbuf, 8); ++ s->accum_nbits = n + 64; ++ } else { ++ if (*pbuf < buf_start + 7) ++ return -1; // out of range ++ *pbuf -= 7; ++ memcpy(&(s->accum), *pbuf, 7); ++ s->accum &= 0xffffffffffffff; ++ s->accum_nbits = n + 56; ++ } ++ ++ if ((s->accum_nbits < 56 || s->accum_nbits >= 64) || ++ ((s->accum >> s->accum_nbits) != 0)) { ++ return -1; // the incoming input is wrong (encoder should have zeroed the ++ // upper bits) ++ } ++ ++ return 0; // OK ++} ++ ++/*! @abstract Identical to previous function, but for 32-bit operation ++ * (resulting bit count is between 24 and 31 bits). */ ++FSE_INLINE int fse_in_checked_init32(fse_in_stream32 *s, fse_bit_count n, ++ const uint8_t **pbuf, ++ const uint8_t *buf_start) { ++ if (n) { ++ if (*pbuf < buf_start + 4) ++ return -1; // out of range ++ *pbuf -= 4; ++ memcpy(&(s->accum), *pbuf, 4); ++ s->accum_nbits = n + 32; ++ } else { ++ if (*pbuf < buf_start + 3) ++ return -1; // out of range ++ *pbuf -= 3; ++ memcpy(&(s->accum), *pbuf, 3); ++ s->accum &= 0xffffff; ++ s->accum_nbits = n + 24; ++ } ++ ++ if ((s->accum_nbits < 24 || s->accum_nbits >= 32) || ++ ((s->accum >> s->accum_nbits) != 0)) { ++ return -1; // the incoming input is wrong (encoder should have zeroed the ++ // upper bits) ++ } ++ ++ return 0; // OK ++} ++ ++/*! @abstract Read in new bytes from buffer to ensure that we have a full ++ * complement of bits in the stream object (again, between 56 and 63 bits). ++ * checking the new value of \c *pbuf remains >= \c buf_start. ++ * @return 0 if OK. ++ * @return -1 on failure. */ ++FSE_INLINE int fse_in_checked_flush64(fse_in_stream64 *s, const uint8_t **pbuf, ++ const uint8_t *buf_start) { ++ // Get number of bits to add to bring us into the desired range. ++ fse_bit_count nbits = (63 - s->accum_nbits) & -8; ++ // Convert bits to bytes and decrement buffer address, then load new data. ++ const uint8_t *buf = (*pbuf) - (nbits >> 3); ++ uint64_t incoming; ++ if (buf < buf_start) { ++ return -1; // out of range ++ } ++ *pbuf = buf; ++ memcpy(&incoming, buf, 8); ++ // Update the state object and verify its validity (in DEBUG). ++ s->accum = (s->accum << nbits) | fse_mask_lsb64(incoming, nbits); ++ s->accum_nbits += nbits; ++ DEBUG_CHECK_INPUT_STREAM_PARAMETERS ++ return 0; // OK ++} ++ ++/*! @abstract Identical to previous function (but again, we're only filling ++ * a 32-bit field with between 24 and 31 bits). */ ++FSE_INLINE int fse_in_checked_flush32(fse_in_stream32 *s, const uint8_t **pbuf, ++ const uint8_t *buf_start) { ++ // Get number of bits to add to bring us into the desired range. ++ fse_bit_count nbits = (31 - s->accum_nbits) & -8; ++ ++ if (nbits > 0) { ++ // Convert bits to bytes and decrement buffer address, then load new data. ++ const uint8_t *buf = (*pbuf) - (nbits >> 3); ++ uint32_t incoming; ++ if (buf < buf_start) { ++ return -1; // out of range ++ } ++ ++ *pbuf = buf; ++ ++ incoming = *((uint32_t *)buf); ++ ++ // Update the state object and verify its validity (in DEBUG). ++ s->accum = (s->accum << nbits) | fse_mask_lsb32(incoming, nbits); ++ s->accum_nbits += nbits; ++ } ++ DEBUG_CHECK_INPUT_STREAM_PARAMETERS ++ return 0; // OK ++} ++ ++/*! @abstract Pull n bits out of the fse stream object. */ ++FSE_INLINE uint64_t fse_in_pull64(fse_in_stream64 *s, fse_bit_count n) { ++ uint64_t result; ++ s->accum_nbits -= n; ++ result = s->accum >> s->accum_nbits; ++ s->accum = fse_mask_lsb64(s->accum, s->accum_nbits); ++ return result; ++} ++ ++/*! @abstract Pull n bits out of the fse stream object. */ ++FSE_INLINE uint32_t fse_in_pull32(fse_in_stream32 *s, fse_bit_count n) { ++ uint32_t result; ++ s->accum_nbits -= n; ++ result = s->accum >> s->accum_nbits; ++ s->accum = fse_mask_lsb32(s->accum, s->accum_nbits); ++ return result; ++} ++ ++// MARK: - Encode/Decode ++ ++// Map to 32/64-bit implementations and types for I/O ++#if FSE_IOSTREAM_64 ++ ++typedef uint64_t fse_bits; ++typedef fse_out_stream64 fse_out_stream; ++typedef fse_in_stream64 fse_in_stream; ++#define fse_mask_lsb fse_mask_lsb64 ++#define fse_extract_bits fse_extract_bits64 ++#define fse_out_init fse_out_init64 ++#define fse_out_flush fse_out_flush64 ++#define fse_out_finish fse_out_finish64 ++#define fse_out_push fse_out_push64 ++#define fse_in_init fse_in_checked_init64 ++#define fse_in_checked_init fse_in_checked_init64 ++#define fse_in_flush fse_in_checked_flush64 ++#define fse_in_checked_flush fse_in_checked_flush64 ++#define fse_in_flush2(_unused, _parameters, _unused2) 0 /* nothing */ ++#define fse_in_checked_flush2(_unused, _parameters) /* nothing */ ++#define fse_in_pull fse_in_pull64 ++ ++#else ++ ++typedef uint32_t fse_bits; ++typedef fse_out_stream32 fse_out_stream; ++typedef fse_in_stream32 fse_in_stream; ++#define fse_mask_lsb fse_mask_lsb32 ++#define fse_extract_bits fse_extract_bits32 ++#define fse_out_init fse_out_init32 ++#define fse_out_flush fse_out_flush32 ++#define fse_out_finish fse_out_finish32 ++#define fse_out_push fse_out_push32 ++#define fse_in_init fse_in_checked_init32 ++#define fse_in_checked_init fse_in_checked_init32 ++#define fse_in_flush fse_in_checked_flush32 ++#define fse_in_checked_flush fse_in_checked_flush32 ++#define fse_in_flush2 fse_in_checked_flush32 ++#define fse_in_checked_flush2 fse_in_checked_flush32 ++#define fse_in_pull fse_in_pull32 ++ ++#endif ++ ++/*! @abstract Entry for one symbol in the encoder table (64b). */ ++typedef struct { ++ int16_t s0; // First state requiring a K-bit shift ++ int16_t k; // States S >= S0 are shifted K bits. States S < S0 are ++ // shifted K-1 bits ++ int16_t delta0; // Relative increment used to compute next state if S >= S0 ++ int16_t delta1; // Relative increment used to compute next state if S < S0 ++} fse_encoder_entry; ++ ++/*! @abstract Entry for one state in the decoder table (32b). */ ++typedef struct { // DO NOT REORDER THE FIELDS ++ int8_t k; // Number of bits to read ++ uint8_t symbol; // Emitted symbol ++ int16_t delta; // Signed increment used to compute next state (+bias) ++} fse_decoder_entry; ++ ++/*! @abstract Entry for one state in the value decoder table (64b). */ ++typedef struct { // DO NOT REORDER THE FIELDS ++ uint8_t total_bits; // state bits + extra value bits = shift for next decode ++ uint8_t value_bits; // extra value bits ++ int16_t delta; // state base (delta) ++ int32_t vbase; // value base ++} fse_value_decoder_entry; ++ ++/*! @abstract Encode SYMBOL using the encoder table, and update \c *pstate, ++ * \c out. ++ * @note The caller must ensure we have enough bits available in the output ++ * stream accumulator. */ ++FSE_INLINE void fse_encode(fse_state *__restrict pstate, ++ const fse_encoder_entry *__restrict encoder_table, ++ fse_out_stream *__restrict out, uint8_t symbol) { ++ int s = *pstate; ++ fse_encoder_entry e = encoder_table[symbol]; ++ int s0 = e.s0; ++ int k = e.k; ++ int delta0 = e.delta0; ++ int delta1 = e.delta1; ++ ++ // Number of bits to write ++ int hi = s >= s0; ++ fse_bit_count nbits = hi ? k : (k - 1); ++ fse_state delta = hi ? delta0 : delta1; ++ ++ // Write lower NBITS of state ++ fse_bits b = fse_mask_lsb(s, nbits); ++ fse_out_push(out, nbits, b); ++ ++ // Update state with remaining bits and delta ++ *pstate = delta + (s >> nbits); ++} ++ ++/*! @abstract Decode and return symbol using the decoder table, and update ++ * \c *pstate, \c in. ++ * @note The caller must ensure we have enough bits available in the input ++ * stream accumulator. */ ++FSE_INLINE uint8_t fse_decode(fse_state *__restrict pstate, ++ const int32_t *__restrict decoder_table, ++ fse_in_stream *__restrict in) { ++ int32_t e = decoder_table[*pstate]; ++ ++ // Update state from K bits of input + DELTA ++ *pstate = (fse_state)(e >> 16) + (fse_state)fse_in_pull(in, e & 0xff); ++ ++ // Return the symbol for this state ++ return fse_extract_bits(e, 8, 8); // symbol ++} ++ ++/*! @abstract Decode and return value using the decoder table, and update \c ++ * *pstate, \c in. ++ * \c value_decoder_table[nstates] ++ * @note The caller must ensure we have enough bits available in the input ++ * stream accumulator. */ ++FSE_INLINE int32_t ++fse_value_decode(fse_state *__restrict pstate, ++ const fse_value_decoder_entry *value_decoder_table, ++ fse_in_stream *__restrict in) { ++ fse_value_decoder_entry entry = value_decoder_table[*pstate]; ++ uint32_t state_and_value_bits = (uint32_t)fse_in_pull(in, entry.total_bits); ++ *pstate = ++ (fse_state)(entry.delta + (state_and_value_bits >> entry.value_bits)); ++ return (int32_t)(entry.vbase + ++ fse_mask_lsb(state_and_value_bits, entry.value_bits)); ++} ++ ++// MARK: - Tables ++ ++// IMPORTANT: To properly decode an FSE encoded stream, both encoder/decoder ++// tables shall be initialized with the same parameters, including the ++// FREQ[NSYMBOL] array. ++// ++ ++/*! @abstract Sanity check on frequency table, verify sum of \c freq ++ * is <= \c number_of_states. */ ++FSE_INLINE int fse_check_freq(const uint16_t *freq_table, ++ const size_t table_size, ++ const size_t number_of_states) { ++ size_t sum_of_freq = 0; ++ int i; ++ for (i = 0; i < table_size; i++) { ++ sum_of_freq += freq_table[i]; ++ } ++ return (sum_of_freq > number_of_states) ? -1 : 0; ++} ++ ++/*! @abstract Initialize encoder table \c t[nsymbols]. ++ * ++ * @param nstates ++ * sum \c freq[i]; the number of states (a power of 2). ++ * ++ * @param nsymbols ++ * the number of symbols. ++ * ++ * @param freq[nsymbols] ++ * is a normalized histogram of symbol frequencies, with \c freq[i] >= 0. ++ * Some symbols may have a 0 frequency. In that case they should not be ++ * present in the data. ++ */ ++void fse_init_encoder_table(int nstates, int nsymbols, ++ const uint16_t *__restrict freq, ++ fse_encoder_entry *__restrict t); ++ ++/*! @abstract Initialize decoder table \c t[nstates]. ++ * ++ * @param nstates ++ * sum \c freq[i]; the number of states (a power of 2). ++ * ++ * @param nsymbols ++ * the number of symbols. ++ * ++ * @param feq[nsymbols] ++ * a normalized histogram of symbol frequencies, with \c freq[i] >= 0. ++ * Some symbols may have a 0 frequency. In that case they should not be ++ * present in the data. ++ * ++ * @return 0 if OK. ++ * @return -1 on failure. ++ */ ++int fse_init_decoder_table(int nstates, int nsymbols, ++ const uint16_t *__restrict freq, ++ int32_t *__restrict t); ++ ++/*! @abstract Initialize value decoder table \c t[nstates]. ++ * ++ * @param nstates ++ * sum \cfreq[i]; the number of states (a power of 2). ++ * ++ * @param nsymbols ++ * the number of symbols. ++ * ++ * @param freq[nsymbols] ++ * a normalized histogram of symbol frequencies, with \c freq[i] >= 0. ++ * \c symbol_vbits[nsymbols] and \c symbol_vbase[nsymbols] are the number of ++ * value bits to read and the base value for each symbol. ++ * Some symbols may have a 0 frequency. In that case they should not be ++ * present in the data. ++ */ ++void fse_init_value_decoder_table(int nstates, int nsymbols, ++ const uint16_t *__restrict freq, ++ const uint8_t *__restrict symbol_vbits, ++ const int32_t *__restrict symbol_vbase, ++ fse_value_decoder_entry *__restrict t); ++ ++/*! @abstract Normalize a table \c t[nsymbols] of occurrences to ++ * \c freq[nsymbols]. */ ++void fse_normalize_freq(int nstates, int nsymbols, const uint32_t *__restrict t, ++ uint16_t *__restrict freq); +diff --git a/fs/apfs/lzfse/lzfse_internal.h b/fs/apfs/lzfse/lzfse_internal.h +new file mode 100644 +index 000000000..24d4d93d0 +--- /dev/null ++++ b/fs/apfs/lzfse/lzfse_internal.h +@@ -0,0 +1,612 @@ ++/* ++Copyright (c) 2015-2016, Apple Inc. All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ++ ++1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ++ ++2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer ++ in the documentation and/or other materials provided with the distribution. ++ ++3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++*/ ++ ++#ifndef LZFSE_INTERNAL_H ++#define LZFSE_INTERNAL_H ++ ++// Unlike the tunable parameters defined in lzfse_tunables.h, you probably ++// should not modify the values defined in this header. Doing so will either ++// break the compressor, or result in a compressed data format that is ++// incompatible. ++ ++#include "lzfse_fse.h" ++#include "lzfse_tunables.h" ++#include ++#include ++ ++#if defined(_MSC_VER) && !defined(__clang__) ++# define LZFSE_INLINE __forceinline ++# define __builtin_expect(X, Y) (X) ++# define __attribute__(X) ++# pragma warning(disable : 4068) // warning C4068: unknown pragma ++#else ++# define LZFSE_INLINE static inline __attribute__((__always_inline__)) ++#endif ++ ++// Implement GCC bit scan builtins for MSVC ++#if defined(_MSC_VER) && !defined(__clang__) ++#include ++ ++LZFSE_INLINE int __builtin_clz(unsigned int val) { ++ unsigned long r = 0; ++ if (_BitScanReverse(&r, val)) { ++ return 31 - r; ++ } ++ return 32; ++} ++ ++LZFSE_INLINE int __builtin_ctzl(unsigned long val) { ++ unsigned long r = 0; ++ if (_BitScanForward(&r, val)) { ++ return r; ++ } ++ return 32; ++} ++ ++LZFSE_INLINE int __builtin_ctzll(uint64_t val) { ++ unsigned long r = 0; ++#if defined(_M_AMD64) || defined(_M_ARM) ++ if (_BitScanForward64(&r, val)) { ++ return r; ++ } ++#else ++ if (_BitScanForward(&r, (uint32_t)val)) { ++ return r; ++ } ++ if (_BitScanForward(&r, (uint32_t)(val >> 32))) { ++ return 32 + r; ++ } ++#endif ++ return 64; ++} ++#endif ++ ++// Throughout LZFSE we refer to "L", "M" and "D"; these will always appear as ++// a triplet, and represent a "usual" LZ-style literal and match pair. "L" ++// is the number of literal bytes, "M" is the number of match bytes, and "D" ++// is the match "distance"; the distance in bytes between the current pointer ++// and the start of the match. ++#define LZFSE_ENCODE_HASH_VALUES (1 << LZFSE_ENCODE_HASH_BITS) ++#define LZFSE_ENCODE_L_SYMBOLS 20 ++#define LZFSE_ENCODE_M_SYMBOLS 20 ++#define LZFSE_ENCODE_D_SYMBOLS 64 ++#define LZFSE_ENCODE_LITERAL_SYMBOLS 256 ++#define LZFSE_ENCODE_L_STATES 64 ++#define LZFSE_ENCODE_M_STATES 64 ++#define LZFSE_ENCODE_D_STATES 256 ++#define LZFSE_ENCODE_LITERAL_STATES 1024 ++#define LZFSE_MATCHES_PER_BLOCK 10000 ++#define LZFSE_LITERALS_PER_BLOCK (4 * LZFSE_MATCHES_PER_BLOCK) ++#define LZFSE_DECODE_LITERALS_PER_BLOCK (4 * LZFSE_DECODE_MATCHES_PER_BLOCK) ++ ++// LZFSE internal status. These values are used by internal LZFSE routines ++// as return codes. There should not be any good reason to change their ++// values; it is plausible that additional codes might be added in the ++// future. ++#define LZFSE_STATUS_OK 0 ++#define LZFSE_STATUS_SRC_EMPTY -1 ++#define LZFSE_STATUS_DST_FULL -2 ++#define LZFSE_STATUS_ERROR -3 ++ ++// Type representing an offset between elements in a buffer. On 64-bit ++// systems, this is stored in a 64-bit container to avoid extra sign- ++// extension operations in addressing arithmetic, but the value is always ++// representable as a 32-bit signed value in LZFSE's usage. ++#if defined(_M_AMD64) || defined(__x86_64__) || defined(__arm64__) ++typedef int64_t lzfse_offset; ++#else ++typedef int32_t lzfse_offset; ++#endif ++ ++typedef uint64_t uintmax_t; ++ ++/*! @abstract History table set. Each line of the history table represents a set ++ * of candidate match locations, each of which begins with four bytes with the ++ * same hash. The table contains not only the positions, but also the first ++ * four bytes at each position. This doubles the memory footprint of the ++ * table, but allows us to quickly eliminate false-positive matches without ++ * doing any pointer chasing and without pulling in any additional cachelines. ++ * This provides a large performance win in practice. */ ++typedef struct { ++ int32_t pos[LZFSE_ENCODE_HASH_WIDTH]; ++ uint32_t value[LZFSE_ENCODE_HASH_WIDTH]; ++} lzfse_history_set; ++ ++/*! @abstract An lzfse match is a sequence of bytes in the source buffer that ++ * exactly matches an earlier (but possibly overlapping) sequence of bytes in ++ * the same buffer. ++ * @code ++ * exeMPLARYexaMPLE ++ * | | | ||-|--- lzfse_match2.length=3 ++ * | | | ||----- lzfse_match2.pos ++ * | | |-|------ lzfse_match1.length=3 ++ * | | |-------- lzfse_match1.pos ++ * | |-------------- lzfse_match2.ref ++ * |----------------- lzfse_match1.ref ++ * @endcode ++ */ ++typedef struct { ++ // Offset of the first byte in the match. ++ lzfse_offset pos; ++ // First byte of the source -- the earlier location in the buffer with the ++ // same contents. ++ lzfse_offset ref; ++ // Length of the match. ++ uint32_t length; ++} lzfse_match; ++ ++// MARK: - Encoder and Decoder state objects ++ ++/*! @abstract Encoder state object. */ ++typedef struct { ++ // Pointer to first byte of the source buffer. ++ const uint8_t *src; ++ // Length of the source buffer in bytes. Note that this is not a size_t, ++ // but rather lzfse_offset, which is a signed type. The largest ++ // representable buffer is 2GB, but arbitrarily large buffers may be ++ // handled by repeatedly calling the encoder function and "translating" ++ // the state between calls. When doing this, it is beneficial to use ++ // blocks smaller than 2GB in order to maintain residency in the last-level ++ // cache. Consult the implementation of lzfse_encode_buffer for details. ++ lzfse_offset src_end; ++ // Offset of the first byte of the next literal to encode in the source ++ // buffer. ++ lzfse_offset src_literal; ++ // Offset of the byte currently being checked for a match. ++ lzfse_offset src_encode_i; ++ // The last byte offset to consider for a match. In some uses it makes ++ // sense to use a smaller offset than src_end. ++ lzfse_offset src_encode_end; ++ // Pointer to the next byte to be written in the destination buffer. ++ uint8_t *dst; ++ // Pointer to the first byte of the destination buffer. ++ uint8_t *dst_begin; ++ // Pointer to one byte past the end of the destination buffer. ++ uint8_t *dst_end; ++ // Pending match; will be emitted unless a better match is found. ++ lzfse_match pending; ++ // The number of matches written so far. Note that there is no problem in ++ // using a 32-bit field for this quantity, because the state already limits ++ // us to at most 2GB of data; there cannot possibly be more matches than ++ // there are bytes in the input. ++ uint32_t n_matches; ++ // The number of literals written so far. ++ uint32_t n_literals; ++ // Lengths of found literals. ++ uint32_t l_values[LZFSE_MATCHES_PER_BLOCK]; ++ // Lengths of found matches. ++ uint32_t m_values[LZFSE_MATCHES_PER_BLOCK]; ++ // Distances of found matches. ++ uint32_t d_values[LZFSE_MATCHES_PER_BLOCK]; ++ // Concatenated literal bytes. ++ uint8_t literals[LZFSE_LITERALS_PER_BLOCK]; ++ // History table used to search for matches. Each entry of the table ++ // corresponds to a group of four byte sequences in the input stream ++ // that hash to the same value. ++ lzfse_history_set history_table[LZFSE_ENCODE_HASH_VALUES]; ++} lzfse_encoder_state; ++ ++/*! @abstract Decoder state object for lzfse compressed blocks. */ ++typedef struct { ++ // Number of matches remaining in the block. ++ uint32_t n_matches; ++ // Number of bytes used to encode L, M, D triplets for the block. ++ uint32_t n_lmd_payload_bytes; ++ // Pointer to the next literal to emit. ++ const uint8_t *current_literal; ++ // L, M, D triplet for the match currently being emitted. This is used only ++ // if we need to restart after reaching the end of the destination buffer in ++ // the middle of a literal or match. ++ int32_t l_value, m_value, d_value; ++ // FSE stream object. ++ fse_in_stream lmd_in_stream; ++ // Offset of L,M,D encoding in the input buffer. Because we read through an ++ // FSE stream *backwards* while decoding, this is decremented as we move ++ // through a block. ++ uint32_t lmd_in_buf; ++ // The current state of the L, M, and D FSE decoders. ++ uint16_t l_state, m_state, d_state; ++ // Internal FSE decoder tables for the current block. These have ++ // alignment forced to 8 bytes to guarantee that a single state's ++ // entry cannot span two cachelines. ++ fse_value_decoder_entry l_decoder[LZFSE_ENCODE_L_STATES] __attribute__((__aligned__(8))); ++ fse_value_decoder_entry m_decoder[LZFSE_ENCODE_M_STATES] __attribute__((__aligned__(8))); ++ fse_value_decoder_entry d_decoder[LZFSE_ENCODE_D_STATES] __attribute__((__aligned__(8))); ++ int32_t literal_decoder[LZFSE_ENCODE_LITERAL_STATES]; ++ // The literal stream for the block, plus padding to allow for faster copy ++ // operations. ++ uint8_t literals[LZFSE_LITERALS_PER_BLOCK + 64]; ++} lzfse_compressed_block_decoder_state; ++ ++// Decoder state object for uncompressed blocks. ++typedef struct { uint32_t n_raw_bytes; } uncompressed_block_decoder_state; ++ ++/*! @abstract Decoder state object for lzvn-compressed blocks. */ ++typedef struct { ++ uint32_t n_raw_bytes; ++ uint32_t n_payload_bytes; ++ uint32_t d_prev; ++} lzvn_compressed_block_decoder_state; ++ ++/*! @abstract Decoder state object. */ ++typedef struct { ++ // Pointer to next byte to read from source buffer (this is advanced as we ++ // decode; src_begin describe the buffer and do not change). ++ const uint8_t *src; ++ // Pointer to first byte of source buffer. ++ const uint8_t *src_begin; ++ // Pointer to one byte past the end of the source buffer. ++ const uint8_t *src_end; ++ // Pointer to the next byte to write to destination buffer (this is advanced ++ // as we decode; dst_begin and dst_end describe the buffer and do not change). ++ uint8_t *dst; ++ // Pointer to first byte of destination buffer. ++ uint8_t *dst_begin; ++ // Pointer to one byte past the end of the destination buffer. ++ uint8_t *dst_end; ++ // 1 if we have reached the end of the stream, 0 otherwise. ++ int end_of_stream; ++ // magic number of the current block if we are within a block, ++ // LZFSE_NO_BLOCK_MAGIC otherwise. ++ uint32_t block_magic; ++ lzfse_compressed_block_decoder_state compressed_lzfse_block_state; ++ lzvn_compressed_block_decoder_state compressed_lzvn_block_state; ++ uncompressed_block_decoder_state uncompressed_block_state; ++} lzfse_decoder_state; ++ ++// MARK: - Block header objects ++ ++#define LZFSE_NO_BLOCK_MAGIC 0x00000000 // 0 (invalid) ++#define LZFSE_ENDOFSTREAM_BLOCK_MAGIC 0x24787662 // bvx$ (end of stream) ++#define LZFSE_UNCOMPRESSED_BLOCK_MAGIC 0x2d787662 // bvx- (raw data) ++#define LZFSE_COMPRESSEDV1_BLOCK_MAGIC 0x31787662 // bvx1 (lzfse compressed, uncompressed tables) ++#define LZFSE_COMPRESSEDV2_BLOCK_MAGIC 0x32787662 // bvx2 (lzfse compressed, compressed tables) ++#define LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC 0x6e787662 // bvxn (lzvn compressed) ++ ++/*! @abstract Uncompressed block header in encoder stream. */ ++typedef struct { ++ // Magic number, always LZFSE_UNCOMPRESSED_BLOCK_MAGIC. ++ uint32_t magic; ++ // Number of raw bytes in block. ++ uint32_t n_raw_bytes; ++} uncompressed_block_header; ++ ++/*! @abstract Compressed block header with uncompressed tables. */ ++typedef struct { ++ // Magic number, always LZFSE_COMPRESSEDV1_BLOCK_MAGIC. ++ uint32_t magic; ++ // Number of decoded (output) bytes in block. ++ uint32_t n_raw_bytes; ++ // Number of encoded (source) bytes in block. ++ uint32_t n_payload_bytes; ++ // Number of literal bytes output by block (*not* the number of literals). ++ uint32_t n_literals; ++ // Number of matches in block (which is also the number of literals). ++ uint32_t n_matches; ++ // Number of bytes used to encode literals. ++ uint32_t n_literal_payload_bytes; ++ // Number of bytes used to encode matches. ++ uint32_t n_lmd_payload_bytes; ++ ++ // Final encoder states for the block, which will be the initial states for ++ // the decoder: ++ // Final accum_nbits for literals stream. ++ int32_t literal_bits; ++ // There are four interleaved streams of literals, so there are four final ++ // states. ++ uint16_t literal_state[4]; ++ // accum_nbits for the l, m, d stream. ++ int32_t lmd_bits; ++ // Final L (literal length) state. ++ uint16_t l_state; ++ // Final M (match length) state. ++ uint16_t m_state; ++ // Final D (match distance) state. ++ uint16_t d_state; ++ ++ // Normalized frequency tables for each stream. Sum of values in each ++ // array is the number of states. ++ uint16_t l_freq[LZFSE_ENCODE_L_SYMBOLS]; ++ uint16_t m_freq[LZFSE_ENCODE_M_SYMBOLS]; ++ uint16_t d_freq[LZFSE_ENCODE_D_SYMBOLS]; ++ uint16_t literal_freq[LZFSE_ENCODE_LITERAL_SYMBOLS]; ++} lzfse_compressed_block_header_v1; ++ ++/*! @abstract Compressed block header with compressed tables. Note that because ++ * freq[] is compressed, the structure-as-stored-in-the-stream is *truncated*; ++ * we only store the used bytes of freq[]. This means that some extra care must ++ * be taken when reading one of these headers from the stream. */ ++typedef struct { ++ // Magic number, always LZFSE_COMPRESSEDV2_BLOCK_MAGIC. ++ uint32_t magic; ++ // Number of decoded (output) bytes in block. ++ uint32_t n_raw_bytes; ++ // The fields n_payload_bytes ... d_state from the ++ // lzfse_compressed_block_header_v1 object are packed into three 64-bit ++ // fields in the compressed header, as follows: ++ // ++ // offset bits value ++ // 0 20 n_literals ++ // 20 20 n_literal_payload_bytes ++ // 40 20 n_matches ++ // 60 3 literal_bits ++ // 63 1 --- unused --- ++ // ++ // 0 10 literal_state[0] ++ // 10 10 literal_state[1] ++ // 20 10 literal_state[2] ++ // 30 10 literal_state[3] ++ // 40 20 n_lmd_payload_bytes ++ // 60 3 lmd_bits ++ // 63 1 --- unused --- ++ // ++ // 0 32 header_size (total header size in bytes; this does not ++ // correspond to a field in the uncompressed header version, ++ // but is required; we wouldn't know the size of the ++ // compresssed header otherwise. ++ // 32 10 l_state ++ // 42 10 m_state ++ // 52 10 d_state ++ // 62 2 --- unused --- ++ uint64_t packed_fields[3]; ++ // Variable size freq tables, using a Huffman-style fixed encoding. ++ // Size allocated here is an upper bound (all values stored on 16 bits). ++ uint8_t freq[2 * (LZFSE_ENCODE_L_SYMBOLS + LZFSE_ENCODE_M_SYMBOLS + ++ LZFSE_ENCODE_D_SYMBOLS + LZFSE_ENCODE_LITERAL_SYMBOLS)]; ++} __attribute__((__packed__, __aligned__(1))) ++lzfse_compressed_block_header_v2; ++ ++/*! @abstract LZVN compressed block header. */ ++typedef struct { ++ // Magic number, always LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC. ++ uint32_t magic; ++ // Number of decoded (output) bytes. ++ uint32_t n_raw_bytes; ++ // Number of encoded (source) bytes. ++ uint32_t n_payload_bytes; ++} lzvn_compressed_block_header; ++ ++// MARK: - LZFSE encode/decode interfaces ++ ++int lzfse_encode_init(lzfse_encoder_state *s); ++int lzfse_encode_translate(lzfse_encoder_state *s, lzfse_offset delta); ++int lzfse_encode_base(lzfse_encoder_state *s); ++int lzfse_encode_finish(lzfse_encoder_state *s); ++int lzfse_decode(lzfse_decoder_state *s); ++ ++// MARK: - LZVN encode/decode interfaces ++ ++// Minimum source buffer size for compression. Smaller buffers will not be ++// compressed; the lzvn encoder will simply return. ++#define LZVN_ENCODE_MIN_SRC_SIZE ((size_t)8) ++ ++// Maximum source buffer size for compression. Larger buffers will be ++// compressed partially. ++#define LZVN_ENCODE_MAX_SRC_SIZE ((size_t)0xffffffffU) ++ ++// Minimum destination buffer size for compression. No compression will take ++// place if smaller. ++#define LZVN_ENCODE_MIN_DST_SIZE ((size_t)8) ++ ++size_t lzvn_decode_scratch_size(void); ++size_t lzvn_encode_scratch_size(void); ++size_t lzvn_encode_buffer(void *__restrict dst, size_t dst_size, ++ const void *__restrict src, size_t src_size, ++ void *__restrict work); ++size_t lzvn_decode_buffer(void *__restrict dst, size_t dst_size, ++ const void *__restrict src, size_t src_size); ++ ++/*! @abstract Signed offset in buffers, stored on either 32 or 64 bits. */ ++#if defined(_M_AMD64) || defined(__x86_64__) || defined(__arm64__) ++typedef int64_t lzvn_offset; ++#else ++typedef int32_t lzvn_offset; ++#endif ++ ++// MARK: - LZFSE utility functions ++ ++/*! @abstract Load bytes from memory location SRC. */ ++LZFSE_INLINE uint16_t load2(const void *ptr) { ++ uint16_t data; ++ memcpy(&data, ptr, sizeof data); ++ return data; ++} ++ ++LZFSE_INLINE uint32_t load4(const void *ptr) { ++ uint32_t data; ++ memcpy(&data, ptr, sizeof data); ++ return data; ++} ++ ++LZFSE_INLINE uint64_t load8(const void *ptr) { ++ uint64_t data; ++ memcpy(&data, ptr, sizeof data); ++ return data; ++} ++ ++/*! @abstract Store bytes to memory location DST. */ ++LZFSE_INLINE void store2(void *ptr, uint16_t data) { ++ memcpy(ptr, &data, sizeof data); ++} ++ ++LZFSE_INLINE void store4(void *ptr, uint32_t data) { ++ memcpy(ptr, &data, sizeof data); ++} ++ ++LZFSE_INLINE void store8(void *ptr, uint64_t data) { ++ memcpy(ptr, &data, sizeof data); ++} ++ ++/*! @abstract Load+store bytes from locations SRC to DST. Not intended for use ++ * with overlapping buffers. Note that for LZ-style compression, you need ++ * copies to behave like naive memcpy( ) implementations do, splatting the ++ * leading sequence if the buffers overlap. This copy does not do that, so ++ * should not be used with overlapping buffers. */ ++LZFSE_INLINE void copy8(void *dst, const void *src) { store8(dst, load8(src)); } ++LZFSE_INLINE void copy16(void *dst, const void *src) { ++ uint64_t m0 = load8(src); ++ uint64_t m1 = load8((const unsigned char *)src + 8); ++ store8(dst, m0); ++ store8((unsigned char *)dst + 8, m1); ++} ++ ++// =============================================================== ++// Bitfield Operations ++ ++/*! @abstract Extracts \p width bits from \p container, starting with \p lsb; if ++ * we view \p container as a bit array, we extract \c container[lsb:lsb+width]. */ ++LZFSE_INLINE uintmax_t extract(uintmax_t container, unsigned lsb, ++ unsigned width) { ++ static const size_t container_width = sizeof container * 8; ++ if (width == container_width) ++ return container; ++ return (container >> lsb) & (((uintmax_t)1 << width) - 1); ++} ++ ++/*! @abstract Inserts \p width bits from \p data into \p container, starting with \p lsb. ++ * Viewed as bit arrays, the operations is: ++ * @code ++ * container[:lsb] is unchanged ++ * container[lsb:lsb+width] <-- data[0:width] ++ * container[lsb+width:] is unchanged ++ * @endcode ++ */ ++LZFSE_INLINE uintmax_t insert(uintmax_t container, uintmax_t data, unsigned lsb, ++ unsigned width) { ++ static const size_t container_width = sizeof container * 8; ++ uintmax_t mask; ++ if (width == container_width) ++ return container; ++ mask = ((uintmax_t)1 << width) - 1; ++ return (container & ~(mask << lsb)) | (data & mask) << lsb; ++} ++ ++/*! @abstract Perform sanity checks on the values of lzfse_compressed_block_header_v1. ++ * Test that the field values are in the allowed limits, verify that the ++ * frequency tables sum to value less than total number of states. ++ * @return 0 if all tests passed. ++ * @return negative error code with 1 bit set for each failed test. */ ++LZFSE_INLINE int lzfse_check_block_header_v1( ++ const lzfse_compressed_block_header_v1 *header) { ++ int tests_results = 0; ++ uint16_t literal_state[4]; ++ int res; ++ tests_results = ++ tests_results | ++ ((header->magic == LZFSE_COMPRESSEDV1_BLOCK_MAGIC) ? 0 : (1 << 0)); ++ tests_results = ++ tests_results | ++ ((header->n_literals <= LZFSE_LITERALS_PER_BLOCK) ? 0 : (1 << 1)); ++ tests_results = ++ tests_results | ++ ((header->n_matches <= LZFSE_MATCHES_PER_BLOCK) ? 0 : (1 << 2)); ++ ++ memcpy(literal_state, header->literal_state, sizeof(uint16_t) * 4); ++ ++ tests_results = ++ tests_results | ++ ((literal_state[0] < LZFSE_ENCODE_LITERAL_STATES) ? 0 : (1 << 3)); ++ tests_results = ++ tests_results | ++ ((literal_state[1] < LZFSE_ENCODE_LITERAL_STATES) ? 0 : (1 << 4)); ++ tests_results = ++ tests_results | ++ ((literal_state[2] < LZFSE_ENCODE_LITERAL_STATES) ? 0 : (1 << 5)); ++ tests_results = ++ tests_results | ++ ((literal_state[3] < LZFSE_ENCODE_LITERAL_STATES) ? 0 : (1 << 6)); ++ ++ tests_results = tests_results | ++ ((header->l_state < LZFSE_ENCODE_L_STATES) ? 0 : (1 << 7)); ++ tests_results = tests_results | ++ ((header->m_state < LZFSE_ENCODE_M_STATES) ? 0 : (1 << 8)); ++ tests_results = tests_results | ++ ((header->d_state < LZFSE_ENCODE_D_STATES) ? 0 : (1 << 9)); ++ ++ res = fse_check_freq(header->l_freq, LZFSE_ENCODE_L_SYMBOLS, ++ LZFSE_ENCODE_L_STATES); ++ tests_results = tests_results | ((res == 0) ? 0 : (1 << 10)); ++ res = fse_check_freq(header->m_freq, LZFSE_ENCODE_M_SYMBOLS, ++ LZFSE_ENCODE_M_STATES); ++ tests_results = tests_results | ((res == 0) ? 0 : (1 << 11)); ++ res = fse_check_freq(header->d_freq, LZFSE_ENCODE_D_SYMBOLS, ++ LZFSE_ENCODE_D_STATES); ++ tests_results = tests_results | ((res == 0) ? 0 : (1 << 12)); ++ res = fse_check_freq(header->literal_freq, LZFSE_ENCODE_LITERAL_SYMBOLS, ++ LZFSE_ENCODE_LITERAL_STATES); ++ tests_results = tests_results | ((res == 0) ? 0 : (1 << 13)); ++ ++ if (tests_results) { ++ return tests_results | 0x80000000; // each 1 bit is a test that failed ++ // (except for the sign bit) ++ } ++ ++ return 0; // OK ++} ++ ++// MARK: - L, M, D encoding constants for LZFSE ++ ++// Largest encodable L (literal length), M (match length) and D (match ++// distance) values. ++#define LZFSE_ENCODE_MAX_L_VALUE 315 ++#define LZFSE_ENCODE_MAX_M_VALUE 2359 ++#define LZFSE_ENCODE_MAX_D_VALUE 262139 ++ ++/*! @abstract The L, M, D data streams are all encoded as a "base" value, which is ++ * FSE-encoded, and an "extra bits" value, which is the difference between ++ * value and base, and is simply represented as a raw bit value (because it ++ * is the low-order bits of a larger number, not much entropy can be ++ * extracted from these bits by more complex encoding schemes). The following ++ * tables represent the number of low-order bits to encode separately and the ++ * base values for each of L, M, and D. ++ * ++ * @note The inverse tables for mapping the other way are significantly larger. ++ * Those tables have been split out to lzfse_encode_tables.h in order to keep ++ * this file relatively small. */ ++static const uint8_t l_extra_bits[LZFSE_ENCODE_L_SYMBOLS] = { ++ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 5, 8 ++}; ++static const int32_t l_base_value[LZFSE_ENCODE_L_SYMBOLS] = { ++ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 28, 60 ++}; ++static const uint8_t m_extra_bits[LZFSE_ENCODE_M_SYMBOLS] = { ++ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11 ++}; ++static const int32_t m_base_value[LZFSE_ENCODE_M_SYMBOLS] = { ++ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 24, 56, 312 ++}; ++static const uint8_t d_extra_bits[LZFSE_ENCODE_D_SYMBOLS] = { ++ 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, ++ 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, ++ 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, ++ 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15 ++}; ++static const int32_t d_base_value[LZFSE_ENCODE_D_SYMBOLS] = { ++ 0, 1, 2, 3, 4, 6, 8, 10, 12, 16, ++ 20, 24, 28, 36, 44, 52, 60, 76, 92, 108, ++ 124, 156, 188, 220, 252, 316, 380, 444, 508, 636, ++ 764, 892, 1020, 1276, 1532, 1788, 2044, 2556, 3068, 3580, ++ 4092, 5116, 6140, 7164, 8188, 10236, 12284, 14332, 16380, 20476, ++ 24572, 28668, 32764, 40956, 49148, 57340, 65532, 81916, 98300, 114684, ++ 131068, 163836, 196604, 229372 ++}; ++ ++#endif // LZFSE_INTERNAL_H +diff --git a/fs/apfs/lzfse/lzfse_main.c b/fs/apfs/lzfse/lzfse_main.c +new file mode 100644 +index 000000000..dd4df99a7 +--- /dev/null ++++ b/fs/apfs/lzfse/lzfse_main.c +@@ -0,0 +1,336 @@ ++/* ++Copyright (c) 2015-2016, Apple Inc. All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ++ ++1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ++ ++2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer ++ in the documentation and/or other materials provided with the distribution. ++ ++3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++*/ ++ ++// LZFSE command line tool ++ ++#if !defined(_POSIX_C_SOURCE) || (_POSIX_C_SOURCE < 200112L) ++# undef _POSIX_C_SOURCE ++# define _POSIX_C_SOURCE 200112L ++#endif ++ ++#if defined(_MSC_VER) ++# if !defined(_CRT_NONSTDC_NO_DEPRECATE) ++# define _CRT_NONSTDC_NO_DEPRECATE ++# endif ++# if !defined(_CRT_SECURE_NO_WARNINGS) ++# define _CRT_SECURE_NO_WARNINGS ++# endif ++# if !defined(__clang__) ++# define inline __inline ++# endif ++#endif ++ ++#include "lzfse.h" ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#if defined(_MSC_VER) ++# include ++# include ++#else ++# include ++# include ++#endif ++ ++// Same as realloc(x,s), except x is freed when realloc fails ++static inline void *lzfse_reallocf(void *x, size_t s) { ++ void *y = realloc(x, s); ++ if (y == 0) { ++ free(x); ++ return 0; ++ } ++ return y; ++} ++ ++static double get_time() { ++#if defined(_MSC_VER) ++ LARGE_INTEGER count, freq; ++ if (QueryPerformanceFrequency(&freq) && QueryPerformanceCounter(&count)) { ++ return (double)count.QuadPart / (double)freq.QuadPart; ++ } ++ return 1.0e-3 * (double)GetTickCount(); ++#else ++ struct timeval tv; ++ if (gettimeofday(&tv, 0) != 0) { ++ perror("gettimeofday"); ++ exit(1); ++ } ++ return (double)tv.tv_sec + 1.0e-6 * (double)tv.tv_usec; ++#endif ++} ++ ++//-------------------- ++ ++enum { LZFSE_ENCODE = 0, LZFSE_DECODE }; ++ ++void usage(int argc, char **argv) { ++ fprintf( ++ stderr, ++ "Usage: %s -encode|-decode [-i input_file] [-o output_file] [-h] [-v]\n", ++ argv[0]); ++} ++ ++#define USAGE(argc, argv) \ ++ do { \ ++ usage(argc, argv); \ ++ exit(0); \ ++ } while (0) ++#define USAGE_MSG(argc, argv, ...) \ ++ do { \ ++ usage(argc, argv); \ ++ fprintf(stderr, __VA_ARGS__); \ ++ exit(1); \ ++ } while (0) ++ ++int main(int argc, char **argv) { ++ const char *in_file = 0; // stdin ++ const char *out_file = 0; // stdout ++ int op = -1; // invalid op ++ int verbosity = 0; // quiet ++ ++ // Parse options ++ for (int i = 1; i < argc;) { ++ // no args ++ const char *a = argv[i++]; ++ if (strcmp(a, "-h") == 0) ++ USAGE(argc, argv); ++ if (strcmp(a, "-v") == 0) { ++ verbosity++; ++ continue; ++ } ++ if (strcmp(a, "-encode") == 0) { ++ op = LZFSE_ENCODE; ++ continue; ++ } ++ if (strcmp(a, "-decode") == 0) { ++ op = LZFSE_DECODE; ++ continue; ++ } ++ ++ // one arg ++ const char **arg_var = 0; ++ if (strcmp(a, "-i") == 0 && in_file == 0) ++ arg_var = &in_file; ++ else if (strcmp(a, "-o") == 0 && out_file == 0) ++ arg_var = &out_file; ++ if (arg_var != 0) { ++ // Flag is recognized. Check if there is an argument. ++ if (i == argc) ++ USAGE_MSG(argc, argv, "Error: Missing arg after %s\n", a); ++ *arg_var = argv[i++]; ++ continue; ++ } ++ ++ USAGE_MSG(argc, argv, "Error: invalid flag %s\n", a); ++ } ++ if (op < 0) ++ USAGE_MSG(argc, argv, "Error: -encode|-decode required\n"); ++ ++ // Info ++ if (verbosity > 0) { ++ if (op == LZFSE_ENCODE) ++ fprintf(stderr, "LZFSE encode\n"); ++ if (op == LZFSE_DECODE) ++ fprintf(stderr, "LZFSE decode\n"); ++ fprintf(stderr, "Input: %s\n", in_file ? in_file : "stdin"); ++ fprintf(stderr, "Output: %s\n", out_file ? out_file : "stdout"); ++ } ++ ++ // Load input ++ size_t in_allocated = 0; // allocated in IN ++ size_t in_size = 0; // used in IN ++ uint8_t *in = 0; // input buffer ++ int in_fd = -1; // input file desc ++ ++ if (in_file != 0) { ++ // If we have a file name, open it, and allocate the exact input size ++ struct stat st; ++#if defined(_WIN32) ++ in_fd = open(in_file, O_RDONLY | O_BINARY); ++#else ++ in_fd = open(in_file, O_RDONLY); ++#endif ++ if (in_fd < 0) { ++ perror(in_file); ++ exit(1); ++ } ++ if (fstat(in_fd, &st) != 0) { ++ perror(in_file); ++ exit(1); ++ } ++ if (st.st_size > SIZE_MAX) { ++ fprintf(stderr, "File is too large\n"); ++ exit(1); ++ } ++ in_allocated = (size_t)st.st_size; ++ } else { ++ // Otherwise, read from stdin, and allocate to 1 MB, grow as needed ++ in_allocated = 1 << 20; ++ in_fd = 0; ++#if defined(_WIN32) ++ if (setmode(in_fd, O_BINARY) == -1) { ++ perror("setmode"); ++ exit(1); ++ } ++#endif ++ } ++ in = (uint8_t *)malloc(in_allocated); ++ if (in == 0) { ++ perror("malloc"); ++ exit(1); ++ } ++ ++ while (1) { ++ // re-alloc if needed ++ if (in_size == in_allocated) { ++ if (in_allocated < (100 << 20)) ++ in_allocated <<= 1; // double it ++ else ++ in_allocated += (100 << 20); // or add 100 MB if already large ++ in = lzfse_reallocf(in, in_allocated); ++ if (in == 0) { ++ perror("malloc"); ++ exit(1); ++ } ++ } ++ ++ ptrdiff_t r = read(in_fd, in + in_size, in_allocated - in_size); ++ if (r < 0) { ++ perror("read"); ++ exit(1); ++ } ++ if (r == 0) ++ break; // end of file ++ in_size += (size_t)r; ++ } ++ ++ if (in_file != 0) { ++ close(in_fd); ++ in_fd = -1; ++ } ++ ++ // Size info ++ if (verbosity > 0) { ++ fprintf(stderr, "Input size: %zu B\n", in_size); ++ } ++ ++ // Encode/decode ++ // Compute size for result buffer; we assume here that encode shrinks size, ++ // and that decode grows by no more than 4x. These are reasonable common- ++ // case guidelines, but are not formally guaranteed to be satisfied. ++ size_t out_allocated = (op == LZFSE_ENCODE) ? in_size : (4 * in_size); ++ size_t out_size = 0; ++ size_t aux_allocated = (op == LZFSE_ENCODE) ? lzfse_encode_scratch_size() ++ : lzfse_decode_scratch_size(); ++ void *aux = aux_allocated ? malloc(aux_allocated) : 0; ++ if (aux_allocated != 0 && aux == 0) { ++ perror("malloc"); ++ exit(1); ++ } ++ uint8_t *out = (uint8_t *)malloc(out_allocated); ++ if (out == 0) { ++ perror("malloc"); ++ exit(1); ++ } ++ ++ double c0 = get_time(); ++ while (1) { ++ if (op == LZFSE_ENCODE) ++ out_size = lzfse_encode_buffer(out, out_allocated, in, in_size, aux); ++ else ++ out_size = lzfse_decode_buffer(out, out_allocated, in, in_size, aux); ++ ++ // If output buffer was too small, grow and retry. ++ if (out_size == 0 || (op == LZFSE_DECODE && out_size == out_allocated)) { ++ if (verbosity > 0) ++ fprintf(stderr, "Output buffer was too small, increasing size...\n"); ++ out_allocated <<= 1; ++ out = (uint8_t *)lzfse_reallocf(out, out_allocated); ++ if (out == 0) { ++ perror("malloc"); ++ exit(1); ++ } ++ continue; ++ } ++ ++ break; ++ } ++ double c1 = get_time(); ++ ++ if (verbosity > 0) { ++ fprintf(stderr, "Output size: %zu B\n", out_size); ++ size_t raw_size = (op == LZFSE_ENCODE) ? in_size : out_size; ++ size_t compressed_size = (op == LZFSE_ENCODE) ? out_size : in_size; ++ fprintf(stderr, "Compression ratio: %.3f\n", ++ (double)raw_size / (double)compressed_size); ++ double ns_per_byte = 1.0e9 * (c1 - c0) / (double)raw_size; ++ double mb_per_s = (double)raw_size / 1024.0 / 1024.0 / (c1 - c0); ++ fprintf(stderr, "Speed: %.2f ns/B, %.2f MB/s\n",ns_per_byte,mb_per_s); ++ } ++ ++ // Write output ++ int out_fd = -1; ++ if (out_file) { ++#if defined(_WIN32) ++ out_fd = open(out_file, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, ++ S_IWRITE); ++#else ++ out_fd = open(out_file, O_WRONLY | O_CREAT | O_TRUNC, 0644); ++#endif ++ if (out_fd < 0) { ++ perror(out_file); ++ exit(1); ++ } ++ } else { ++ out_fd = 1; // stdout ++#if defined(_WIN32) ++ if (setmode(out_fd, O_BINARY) == -1) { ++ perror("setmode"); ++ exit(1); ++ } ++#endif ++ } ++ for (size_t out_pos = 0; out_pos < out_size;) { ++ ptrdiff_t w = write(out_fd, out + out_pos, out_size - out_pos); ++ if (w < 0) { ++ perror("write"); ++ exit(1); ++ } ++ if (w == 0) { ++ fprintf(stderr, "Failed to write to output file\n"); ++ exit(1); ++ } ++ out_pos += (size_t)w; ++ } ++ if (out_file != 0) { ++ close(out_fd); ++ out_fd = -1; ++ } ++ ++ free(in); ++ free(out); ++ free(aux); ++ return 0; // OK ++} +diff --git a/fs/apfs/lzfse/lzfse_tunables.h b/fs/apfs/lzfse/lzfse_tunables.h +new file mode 100644 +index 000000000..a2a327528 +--- /dev/null ++++ b/fs/apfs/lzfse/lzfse_tunables.h +@@ -0,0 +1,60 @@ ++/* ++Copyright (c) 2015-2016, Apple Inc. All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ++ ++1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ++ ++2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer ++ in the documentation and/or other materials provided with the distribution. ++ ++3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++*/ ++ ++#ifndef LZFSE_TUNABLES_H ++#define LZFSE_TUNABLES_H ++ ++// Parameters controlling details of the LZ-style match search. These values ++// may be modified to fine tune compression ratio vs. encoding speed, while ++// keeping the compressed format compatible with LZFSE. Note that ++// modifying them will also change the amount of work space required by ++// the encoder. The values here are those used in the compression library ++// on iOS and OS X. ++ ++// Number of bits for hash function to produce. Should be in the range ++// [10, 16]. Larger values reduce the number of false-positive found during ++// the match search, and expand the history table, which may allow additional ++// matches to be found, generally improving the achieved compression ratio. ++// Larger values also increase the workspace size, and make it less likely ++// that the history table will be present in cache, which reduces performance. ++#define LZFSE_ENCODE_HASH_BITS 14 ++ ++// Number of positions to store for each line in the history table. May ++// be either 4 or 8. Using 8 doubles the size of the history table, which ++// increases the chance of finding matches (thus improving compression ratio), ++// but also increases the workspace size. ++#define LZFSE_ENCODE_HASH_WIDTH 4 ++ ++// Match length in bytes to cause immediate emission. Generally speaking, ++// LZFSE maintains multiple candidate matches and waits to decide which match ++// to emit until more information is available. When a match exceeds this ++// threshold, it is emitted immediately. Thus, smaller values may give ++// somewhat better performance, and larger values may give somewhat better ++// compression ratios. ++#define LZFSE_ENCODE_GOOD_MATCH 40 ++ ++// When the source buffer is very small, LZFSE doesn't compress as well as ++// some simpler algorithms. To maintain reasonable compression for these ++// cases, we transition to use LZVN instead if the size of the source buffer ++// is below this threshold. ++#define LZFSE_ENCODE_LZVN_THRESHOLD 4096 ++ ++#endif // LZFSE_TUNABLES_H +diff --git a/fs/apfs/lzfse/lzvn_decode_base.c b/fs/apfs/lzfse/lzvn_decode_base.c +new file mode 100644 +index 000000000..77e419994 +--- /dev/null ++++ b/fs/apfs/lzfse/lzvn_decode_base.c +@@ -0,0 +1,721 @@ ++/* ++Copyright (c) 2015-2016, Apple Inc. All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ++ ++1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ++ ++2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer ++ in the documentation and/or other materials provided with the distribution. ++ ++3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++*/ ++ ++// LZVN low-level decoder ++ ++#include "lzvn_decode_base.h" ++ ++#if !defined(HAVE_LABELS_AS_VALUES) ++# if defined(__GNUC__) || defined(__clang__) ++# define HAVE_LABELS_AS_VALUES 1 ++# else ++# define HAVE_LABELS_AS_VALUES 0 ++# endif ++#endif ++ ++// Both the source and destination buffers are represented by a pointer and ++// a length; they are *always* updated in concert using this macro; however ++// many bytes the pointer is advanced, the length is decremented by the same ++// amount. Thus, pointer + length always points to the byte one past the end ++// of the buffer. ++#define PTR_LEN_INC(_pointer, _length, _increment) \ ++ (_pointer += _increment, _length -= _increment) ++ ++// Update state with current positions and distance, corresponding to the ++// beginning of an instruction in both streams ++#define UPDATE_GOOD \ ++ (state->src = src_ptr, state->dst = dst_ptr, state->d_prev = D) ++ ++void lzvn_decode(lzvn_decoder_state *state) { ++#if HAVE_LABELS_AS_VALUES ++ // Jump table for all instructions ++ static const void *opc_tbl[256] = { ++ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&eos, &&lrg_d, ++ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&nop, &&lrg_d, ++ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&nop, &&lrg_d, ++ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d, ++ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d, ++ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d, ++ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d, ++ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d, ++ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, ++ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, ++ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, ++ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, ++ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, ++ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, ++ &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, ++ &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, ++ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, ++ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, ++ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, ++ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, ++ &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, ++ &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, ++ &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, ++ &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, ++ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, ++ &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, ++ &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, ++ &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, ++ &&lrg_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, ++ &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, ++ &&lrg_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, ++ &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m}; ++#endif ++ size_t src_len = state->src_end - state->src; ++ size_t dst_len = state->dst_end - state->dst; ++ const unsigned char *src_ptr = state->src; ++ unsigned char *dst_ptr = state->dst; ++ size_t D = state->d_prev; ++ size_t M; ++ size_t L; ++ size_t opc_len; ++ unsigned char opc; ++ uint16_t opc23; ++ ++ if (src_len == 0 || dst_len == 0) ++ return; // empty buffer ++ ++ // Do we have a partially expanded match saved in state? ++ if (state->L != 0 || state->M != 0) { ++ L = state->L; ++ M = state->M; ++ D = state->D; ++ opc_len = 0; // we already skipped the op ++ state->L = state->M = state->D = 0; ++ if (M == 0) ++ goto copy_literal; ++ if (L == 0) ++ goto copy_match; ++ goto copy_literal_and_match; ++ } ++ ++ opc = src_ptr[0]; ++ ++#if HAVE_LABELS_AS_VALUES ++ goto *opc_tbl[opc]; ++#else ++ for (;;) { ++ switch (opc) { ++#endif ++// =============================================================== ++// These four opcodes (sml_d, med_d, lrg_d, and pre_d) encode both a ++// literal and a match. The bulk of their implementations are shared; ++// each label here only does the work of setting the opcode length (not ++// including any literal bytes), and extracting the literal length, match ++// length, and match distance (except in pre_d). They then jump into the ++// shared implementation to actually output the literal and match bytes. ++// ++// No error checking happens in the first stage, except for ensuring that ++// the source has enough length to represent the full opcode before ++// reading past the first byte. ++sml_d: ++#if !HAVE_LABELS_AS_VALUES ++ case 0: ++ case 1: ++ case 2: ++ case 3: ++ case 4: ++ case 5: ++ case 8: ++ case 9: ++ case 10: ++ case 11: ++ case 12: ++ case 13: ++ case 16: ++ case 17: ++ case 18: ++ case 19: ++ case 20: ++ case 21: ++ case 24: ++ case 25: ++ case 26: ++ case 27: ++ case 28: ++ case 29: ++ case 32: ++ case 33: ++ case 34: ++ case 35: ++ case 36: ++ case 37: ++ case 40: ++ case 41: ++ case 42: ++ case 43: ++ case 44: ++ case 45: ++ case 48: ++ case 49: ++ case 50: ++ case 51: ++ case 52: ++ case 53: ++ case 56: ++ case 57: ++ case 58: ++ case 59: ++ case 60: ++ case 61: ++ case 64: ++ case 65: ++ case 66: ++ case 67: ++ case 68: ++ case 69: ++ case 72: ++ case 73: ++ case 74: ++ case 75: ++ case 76: ++ case 77: ++ case 80: ++ case 81: ++ case 82: ++ case 83: ++ case 84: ++ case 85: ++ case 88: ++ case 89: ++ case 90: ++ case 91: ++ case 92: ++ case 93: ++ case 96: ++ case 97: ++ case 98: ++ case 99: ++ case 100: ++ case 101: ++ case 104: ++ case 105: ++ case 106: ++ case 107: ++ case 108: ++ case 109: ++ case 128: ++ case 129: ++ case 130: ++ case 131: ++ case 132: ++ case 133: ++ case 136: ++ case 137: ++ case 138: ++ case 139: ++ case 140: ++ case 141: ++ case 144: ++ case 145: ++ case 146: ++ case 147: ++ case 148: ++ case 149: ++ case 152: ++ case 153: ++ case 154: ++ case 155: ++ case 156: ++ case 157: ++ case 192: ++ case 193: ++ case 194: ++ case 195: ++ case 196: ++ case 197: ++ case 200: ++ case 201: ++ case 202: ++ case 203: ++ case 204: ++ case 205: ++#endif ++ UPDATE_GOOD; ++ // "small distance": This opcode has the structure LLMMMDDD DDDDDDDD LITERAL ++ // where the length of literal (0-3 bytes) is encoded by the high 2 bits of ++ // the first byte. We first extract the literal length so we know how long ++ // the opcode is, then check that the source can hold both this opcode and ++ // at least one byte of the next (because any valid input stream must be ++ // terminated with an eos token). ++ opc_len = 2; ++ L = (size_t)extract(opc, 6, 2); ++ M = (size_t)extract(opc, 3, 3) + 3; ++ // We need to ensure that the source buffer is long enough that we can ++ // safely read this entire opcode, the literal that follows, and the first ++ // byte of the next opcode. Once we satisfy this requirement, we can ++ // safely unpack the match distance. A check similar to this one is ++ // present in all the opcode implementations. ++ if (src_len <= opc_len + L) ++ return; // source truncated ++ D = (size_t)extract(opc, 0, 3) << 8 | src_ptr[1]; ++ goto copy_literal_and_match; ++ ++med_d: ++#if !HAVE_LABELS_AS_VALUES ++ case 160: ++ case 161: ++ case 162: ++ case 163: ++ case 164: ++ case 165: ++ case 166: ++ case 167: ++ case 168: ++ case 169: ++ case 170: ++ case 171: ++ case 172: ++ case 173: ++ case 174: ++ case 175: ++ case 176: ++ case 177: ++ case 178: ++ case 179: ++ case 180: ++ case 181: ++ case 182: ++ case 183: ++ case 184: ++ case 185: ++ case 186: ++ case 187: ++ case 188: ++ case 189: ++ case 190: ++ case 191: ++#endif ++ UPDATE_GOOD; ++ // "medium distance": This is a minor variant of the "small distance" ++ // encoding, where we will now use two extra bytes instead of one to encode ++ // the restof the match length and distance. This allows an extra two bits ++ // for the match length, and an extra three bits for the match distance. The ++ // full structure of the opcode is 101LLMMM DDDDDDMM DDDDDDDD LITERAL. ++ opc_len = 3; ++ L = (size_t)extract(opc, 3, 2); ++ if (src_len <= opc_len + L) ++ return; // source truncated ++ opc23 = load2(&src_ptr[1]); ++ M = (size_t)((extract(opc, 0, 3) << 2 | extract(opc23, 0, 2)) + 3); ++ D = (size_t)extract(opc23, 2, 14); ++ goto copy_literal_and_match; ++ ++lrg_d: ++#if !HAVE_LABELS_AS_VALUES ++ case 7: ++ case 15: ++ case 23: ++ case 31: ++ case 39: ++ case 47: ++ case 55: ++ case 63: ++ case 71: ++ case 79: ++ case 87: ++ case 95: ++ case 103: ++ case 111: ++ case 135: ++ case 143: ++ case 151: ++ case 159: ++ case 199: ++ case 207: ++#endif ++ UPDATE_GOOD; ++ // "large distance": This is another variant of the "small distance" ++ // encoding, where we will now use two extra bytes to encode the match ++ // distance, which allows distances up to 65535 to be represented. The full ++ // structure of the opcode is LLMMM111 DDDDDDDD DDDDDDDD LITERAL. ++ opc_len = 3; ++ L = (size_t)extract(opc, 6, 2); ++ M = (size_t)extract(opc, 3, 3) + 3; ++ if (src_len <= opc_len + L) ++ return; // source truncated ++ D = load2(&src_ptr[1]); ++ goto copy_literal_and_match; ++ ++pre_d: ++#if !HAVE_LABELS_AS_VALUES ++ case 70: ++ case 78: ++ case 86: ++ case 94: ++ case 102: ++ case 110: ++ case 134: ++ case 142: ++ case 150: ++ case 158: ++ case 198: ++ case 206: ++#endif ++ UPDATE_GOOD; ++ // "previous distance": This opcode has the structure LLMMM110, where the ++ // length of the literal (0-3 bytes) is encoded by the high 2 bits of the ++ // first byte. We first extract the literal length so we know how long ++ // the opcode is, then check that the source can hold both this opcode and ++ // at least one byte of the next (because any valid input stream must be ++ // terminated with an eos token). ++ opc_len = 1; ++ L = (size_t)extract(opc, 6, 2); ++ M = (size_t)extract(opc, 3, 3) + 3; ++ if (src_len <= opc_len + L) ++ return; // source truncated ++ goto copy_literal_and_match; ++ ++copy_literal_and_match: ++ // Common implementation of writing data for opcodes that have both a ++ // literal and a match. We begin by advancing the source pointer past ++ // the opcode, so that it points at the first literal byte (if L ++ // is non-zero; otherwise it points at the next opcode). ++ PTR_LEN_INC(src_ptr, src_len, opc_len); ++ // Now we copy the literal from the source pointer to the destination. ++ if (__builtin_expect(dst_len >= 4 && src_len >= 4, 1)) { ++ // The literal is 0-3 bytes; if we are not near the end of the buffer, ++ // we can safely just do a 4 byte copy (which is guaranteed to cover ++ // the complete literal, and may include some other bytes as well). ++ store4(dst_ptr, load4(src_ptr)); ++ } else if (L <= dst_len) { ++ // We are too close to the end of either the input or output stream ++ // to be able to safely use a four-byte copy, but we will not exhaust ++ // either stream (we already know that the source will not be ++ // exhausted from checks in the individual opcode implementations, ++ // and we just tested that dst_len > L). Thus, we need to do a ++ // byte-by-byte copy of the literal. This is slow, but it can only ever ++ // happen near the very end of a buffer, so it is not an important case to ++ // optimize. ++ size_t i; ++ for (i = 0; i < L; ++i) ++ dst_ptr[i] = src_ptr[i]; ++ } else { ++ // Destination truncated: fill DST, and store partial match ++ ++ // Copy partial literal ++ size_t i; ++ for (i = 0; i < dst_len; ++i) ++ dst_ptr[i] = src_ptr[i]; ++ // Save state ++ state->src = src_ptr + dst_len; ++ state->dst = dst_ptr + dst_len; ++ state->L = L - dst_len; ++ state->M = M; ++ state->D = D; ++ return; // destination truncated ++ } ++ // Having completed the copy of the literal, we advance both the source ++ // and destination pointers by the number of literal bytes. ++ PTR_LEN_INC(dst_ptr, dst_len, L); ++ PTR_LEN_INC(src_ptr, src_len, L); ++ // Check if the match distance is valid; matches must not reference ++ // bytes that preceed the start of the output buffer, nor can the match ++ // distance be zero. ++ if (D > dst_ptr - state->dst_begin || D == 0) ++ goto invalid_match_distance; ++copy_match: ++ // Now we copy the match from dst_ptr - D to dst_ptr. It is important to keep ++ // in mind that we may have D < M, in which case the source and destination ++ // windows overlap in the copy. The semantics of the match copy are *not* ++ // those of memmove( ); if the buffers overlap it needs to behave as though ++ // we were copying byte-by-byte in increasing address order. If, for example, ++ // D is 1, the copy operation is equivalent to: ++ // ++ // memset(dst_ptr, dst_ptr[-1], M); ++ // ++ // i.e. it splats the previous byte. This means that we need to be very ++ // careful about using wide loads or stores to perform the copy operation. ++ if (__builtin_expect(dst_len >= M + 7 && D >= 8, 1)) { ++ // We are not near the end of the buffer, and the match distance ++ // is at least eight. Thus, we can safely loop using eight byte ++ // copies. The last of these may slop over the intended end of ++ // the match, but this is OK because we know we have a safety bound ++ // away from the end of the destination buffer. ++ size_t i; ++ for (i = 0; i < M; i += 8) ++ store8(&dst_ptr[i], load8(&dst_ptr[i - D])); ++ } else if (M <= dst_len) { ++ // Either the match distance is too small, or we are too close to ++ // the end of the buffer to safely use eight byte copies. Fall back ++ // on a simple byte-by-byte implementation. ++ size_t i; ++ for (i = 0; i < M; ++i) ++ dst_ptr[i] = dst_ptr[i - D]; ++ } else { ++ // Destination truncated: fill DST, and store partial match ++ ++ // Copy partial match ++ size_t i; ++ for (i = 0; i < dst_len; ++i) ++ dst_ptr[i] = dst_ptr[i - D]; ++ // Save state ++ state->src = src_ptr; ++ state->dst = dst_ptr + dst_len; ++ state->L = 0; ++ state->M = M - dst_len; ++ state->D = D; ++ return; // destination truncated ++ } ++ // Update the destination pointer and length to account for the bytes ++ // written by the match, then load the next opcode byte and branch to ++ // the appropriate implementation. ++ PTR_LEN_INC(dst_ptr, dst_len, M); ++ opc = src_ptr[0]; ++#if HAVE_LABELS_AS_VALUES ++ goto *opc_tbl[opc]; ++#else ++ break; ++#endif ++ ++// =============================================================== ++// Opcodes representing only a match (no literal). ++// These two opcodes (lrg_m and sml_m) encode only a match. The match ++// distance is carried over from the previous opcode, so all they need ++// to encode is the match length. We are able to reuse the match copy ++// sequence from the literal and match opcodes to perform the actual ++// copy implementation. ++sml_m: ++#if !HAVE_LABELS_AS_VALUES ++ case 241: ++ case 242: ++ case 243: ++ case 244: ++ case 245: ++ case 246: ++ case 247: ++ case 248: ++ case 249: ++ case 250: ++ case 251: ++ case 252: ++ case 253: ++ case 254: ++ case 255: ++#endif ++ UPDATE_GOOD; ++ // "small match": This opcode has no literal, and uses the previous match ++ // distance (i.e. it encodes only the match length), in a single byte as ++ // 1111MMMM. ++ opc_len = 1; ++ if (src_len <= opc_len) ++ return; // source truncated ++ M = (size_t)extract(opc, 0, 4); ++ PTR_LEN_INC(src_ptr, src_len, opc_len); ++ goto copy_match; ++ ++lrg_m: ++#if !HAVE_LABELS_AS_VALUES ++ case 240: ++#endif ++ UPDATE_GOOD; ++ // "large match": This opcode has no literal, and uses the previous match ++ // distance (i.e. it encodes only the match length). It is encoded in two ++ // bytes as 11110000 MMMMMMMM. Because matches smaller than 16 bytes can ++ // be represented by sml_m, there is an implicit bias of 16 on the match ++ // length; the representable values are [16,271]. ++ opc_len = 2; ++ if (src_len <= opc_len) ++ return; // source truncated ++ M = src_ptr[1] + 16; ++ PTR_LEN_INC(src_ptr, src_len, opc_len); ++ goto copy_match; ++ ++// =============================================================== ++// Opcodes representing only a literal (no match). ++// These two opcodes (lrg_l and sml_l) encode only a literal. There is no ++// match length or match distance to worry about (but we need to *not* ++// touch D, as it must be preserved between opcodes). ++sml_l: ++#if !HAVE_LABELS_AS_VALUES ++ case 225: ++ case 226: ++ case 227: ++ case 228: ++ case 229: ++ case 230: ++ case 231: ++ case 232: ++ case 233: ++ case 234: ++ case 235: ++ case 236: ++ case 237: ++ case 238: ++ case 239: ++#endif ++ UPDATE_GOOD; ++ // "small literal": This opcode has no match, and encodes only a literal ++ // of length up to 15 bytes. The format is 1110LLLL LITERAL. ++ opc_len = 1; ++ L = (size_t)extract(opc, 0, 4); ++ goto copy_literal; ++ ++lrg_l: ++#if !HAVE_LABELS_AS_VALUES ++ case 224: ++#endif ++ UPDATE_GOOD; ++ // "large literal": This opcode has no match, and uses the previous match ++ // distance (i.e. it encodes only the match length). It is encoded in two ++ // bytes as 11100000 LLLLLLLL LITERAL. Because literals smaller than 16 ++ // bytes can be represented by sml_l, there is an implicit bias of 16 on ++ // the literal length; the representable values are [16,271]. ++ opc_len = 2; ++ if (src_len <= 2) ++ return; // source truncated ++ L = src_ptr[1] + 16; ++ goto copy_literal; ++ ++copy_literal: ++ // Check that the source buffer is large enough to hold the complete ++ // literal and at least the first byte of the next opcode. If so, advance ++ // the source pointer to point to the first byte of the literal and adjust ++ // the source length accordingly. ++ if (src_len <= opc_len + L) ++ return; // source truncated ++ PTR_LEN_INC(src_ptr, src_len, opc_len); ++ // Now we copy the literal from the source pointer to the destination. ++ if (dst_len >= L + 7 && src_len >= L + 7) { ++ // We are not near the end of the source or destination buffers; thus ++ // we can safely copy the literal using wide copies, without worrying ++ // about reading or writing past the end of either buffer. ++ size_t i; ++ for (i = 0; i < L; i += 8) ++ store8(&dst_ptr[i], load8(&src_ptr[i])); ++ } else if (L <= dst_len) { ++ // We are too close to the end of either the input or output stream ++ // to be able to safely use an eight-byte copy. Instead we copy the ++ // literal byte-by-byte. ++ size_t i; ++ for (i = 0; i < L; ++i) ++ dst_ptr[i] = src_ptr[i]; ++ } else { ++ // Destination truncated: fill DST, and store partial match ++ ++ // Copy partial literal ++ size_t i; ++ for (i = 0; i < dst_len; ++i) ++ dst_ptr[i] = src_ptr[i]; ++ // Save state ++ state->src = src_ptr + dst_len; ++ state->dst = dst_ptr + dst_len; ++ state->L = L - dst_len; ++ state->M = 0; ++ state->D = D; ++ return; // destination truncated ++ } ++ // Having completed the copy of the literal, we advance both the source ++ // and destination pointers by the number of literal bytes. ++ PTR_LEN_INC(dst_ptr, dst_len, L); ++ PTR_LEN_INC(src_ptr, src_len, L); ++ // Load the first byte of the next opcode, and jump to its implementation. ++ opc = src_ptr[0]; ++#if HAVE_LABELS_AS_VALUES ++ goto *opc_tbl[opc]; ++#else ++ break; ++#endif ++ ++// =============================================================== ++// Other opcodes ++nop: ++#if !HAVE_LABELS_AS_VALUES ++ case 14: ++ case 22: ++#endif ++ UPDATE_GOOD; ++ opc_len = 1; ++ if (src_len <= opc_len) ++ return; // source truncated ++ PTR_LEN_INC(src_ptr, src_len, opc_len); ++ opc = src_ptr[0]; ++#if HAVE_LABELS_AS_VALUES ++ goto *opc_tbl[opc]; ++#else ++ break; ++#endif ++ ++eos: ++#if !HAVE_LABELS_AS_VALUES ++ case 6: ++#endif ++ opc_len = 8; ++ if (src_len < opc_len) ++ return; // source truncated (here we don't need an extra byte for next op ++ // code) ++ PTR_LEN_INC(src_ptr, src_len, opc_len); ++ state->end_of_stream = 1; ++ UPDATE_GOOD; ++ return; // end-of-stream ++ ++// =============================================================== ++// Return on error ++udef: ++#if !HAVE_LABELS_AS_VALUES ++ case 30: ++ case 38: ++ case 46: ++ case 54: ++ case 62: ++ case 112: ++ case 113: ++ case 114: ++ case 115: ++ case 116: ++ case 117: ++ case 118: ++ case 119: ++ case 120: ++ case 121: ++ case 122: ++ case 123: ++ case 124: ++ case 125: ++ case 126: ++ case 127: ++ case 208: ++ case 209: ++ case 210: ++ case 211: ++ case 212: ++ case 213: ++ case 214: ++ case 215: ++ case 216: ++ case 217: ++ case 218: ++ case 219: ++ case 220: ++ case 221: ++ case 222: ++ case 223: ++#endif ++invalid_match_distance: ++ ++ return; // we already updated state ++#if !HAVE_LABELS_AS_VALUES ++ } ++ } ++#endif ++} +diff --git a/fs/apfs/lzfse/lzvn_decode_base.h b/fs/apfs/lzfse/lzvn_decode_base.h +new file mode 100644 +index 000000000..1262eaded +--- /dev/null ++++ b/fs/apfs/lzfse/lzvn_decode_base.h +@@ -0,0 +1,68 @@ ++/* ++Copyright (c) 2015-2016, Apple Inc. All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ++ ++1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ++ ++2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer ++ in the documentation and/or other materials provided with the distribution. ++ ++3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++*/ ++ ++// LZVN low-level decoder (v2) ++// Functions in the low-level API should switch to these at some point. ++// Apr 2014 ++ ++#ifndef LZVN_DECODE_BASE_H ++#define LZVN_DECODE_BASE_H ++ ++#include "lzfse_internal.h" ++ ++/*! @abstract Base decoder state. */ ++typedef struct { ++ ++ // Decoder I/O ++ ++ // Next byte to read in source buffer ++ const unsigned char *src; ++ // Next byte after source buffer ++ const unsigned char *src_end; ++ ++ // Next byte to write in destination buffer (by decoder) ++ unsigned char *dst; ++ // Valid range for destination buffer is [dst_begin, dst_end - 1] ++ unsigned char *dst_begin; ++ unsigned char *dst_end; ++ // Next byte to read in destination buffer (modified by caller) ++ unsigned char *dst_current; ++ ++ // Decoder state ++ ++ // Partially expanded match, or 0,0,0. ++ // In that case, src points to the next literal to copy, or the next op-code ++ // if L==0. ++ size_t L, M, D; ++ ++ // Distance for last emitted match, or 0 ++ lzvn_offset d_prev; ++ ++ // Did we decode end-of-stream? ++ int end_of_stream; ++ ++} lzvn_decoder_state; ++ ++/*! @abstract Decode source to destination. ++ * Updates \p state (src,dst,d_prev). */ ++void lzvn_decode(lzvn_decoder_state *state); ++ ++#endif // LZVN_DECODE_BASE_H +diff --git a/fs/apfs/lzfse/lzvn_encode_base.c b/fs/apfs/lzfse/lzvn_encode_base.c +new file mode 100644 +index 000000000..c86b55114 +--- /dev/null ++++ b/fs/apfs/lzfse/lzvn_encode_base.c +@@ -0,0 +1,593 @@ ++/* ++Copyright (c) 2015-2016, Apple Inc. All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ++ ++1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ++ ++2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer ++ in the documentation and/or other materials provided with the distribution. ++ ++3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++*/ ++ ++// LZVN low-level encoder ++ ++#include "lzvn_encode_base.h" ++ ++#if defined(_MSC_VER) && !defined(__clang__) ++# define restrict __restrict ++#endif ++ ++// =============================================================== ++// Coarse/fine copy, non overlapping buffers ++ ++/*! @abstract Copy at least \p nbytes bytes from \p src to \p dst, by blocks ++ * of 8 bytes (may go beyond range). No overlap. ++ * @return \p dst + \p nbytes. */ ++static inline unsigned char *lzvn_copy64(unsigned char *restrict dst, ++ const unsigned char *restrict src, ++ size_t nbytes) { ++ for (size_t i = 0; i < nbytes; i += 8) ++ store8(dst + i, load8(src + i)); ++ return dst + nbytes; ++} ++ ++/*! @abstract Copy exactly \p nbytes bytes from \p src to \p dst (respects range). ++ * No overlap. ++ * @return \p dst + \p nbytes. */ ++static inline unsigned char *lzvn_copy8(unsigned char *restrict dst, ++ const unsigned char *restrict src, ++ size_t nbytes) { ++ for (size_t i = 0; i < nbytes; i++) ++ dst[i] = src[i]; ++ return dst + nbytes; ++} ++ ++/*! @abstract Emit (L,0,0) instructions (final literal). ++ * We read at most \p L bytes from \p p. ++ * @param p input stream ++ * @param q1 the first byte after the output buffer. ++ * @return pointer to the next output, <= \p q1. ++ * @return \p q1 if output is full. In that case, output will be partially invalid. ++ */ ++static inline unsigned char *emit_literal(const unsigned char *p, ++ unsigned char *q, unsigned char *q1, ++ size_t L) { ++ size_t x; ++ while (L > 15) { ++ x = L < 271 ? L : 271; ++ if (q + x + 10 >= q1) ++ goto OUT_FULL; ++ store2(q, 0xE0 + ((x - 16) << 8)); ++ q += 2; ++ L -= x; ++ q = lzvn_copy8(q, p, x); ++ p += x; ++ } ++ if (L > 0) { ++ if (q + L + 10 >= q1) ++ goto OUT_FULL; ++ *q++ = 0xE0 + L; // 1110LLLL ++ q = lzvn_copy8(q, p, L); ++ } ++ return q; ++ ++OUT_FULL: ++ return q1; ++} ++ ++/*! @abstract Emit (L,M,D) instructions. M>=3. ++ * @param p input stream pointing to the beginning of the literal. We read at ++ * most \p L+4 bytes from \p p. ++ * @param q1 the first byte after the output buffer. ++ * @return pointer to the next output, <= \p q1. ++ * @return \p q1 if output is full. In that case, output will be partially invalid. ++ */ ++static inline unsigned char *emit(const unsigned char *p, unsigned char *q, ++ unsigned char *q1, size_t L, size_t M, ++ size_t D, size_t D_prev) { ++ size_t x; ++ while (L > 15) { ++ x = L < 271 ? L : 271; ++ if (q + x + 10 >= q1) ++ goto OUT_FULL; ++ store2(q, 0xE0 + ((x - 16) << 8)); ++ q += 2; ++ L -= x; ++ q = lzvn_copy64(q, p, x); ++ p += x; ++ } ++ if (L > 3) { ++ if (q + L + 10 >= q1) ++ goto OUT_FULL; ++ *q++ = 0xE0 + L; // 1110LLLL ++ q = lzvn_copy64(q, p, L); ++ p += L; ++ L = 0; ++ } ++ x = M <= 10 - 2 * L ? M : 10 - 2 * L; // x = min(10-2*L,M) ++ M -= x; ++ x -= 3; // M = (x+3) + M' max value for x is 7-2*L ++ ++ // Here L<4 literals remaining, we read them here ++ uint32_t literal = load4(p); ++ // P is not accessed after this point ++ ++ // Relaxed capacity test covering all cases ++ if (q + 8 >= q1) ++ goto OUT_FULL; ++ ++ if (D == D_prev) { ++ if (L == 0) { ++ *q++ = 0xF0 + (x + 3); // XM! ++ } else { ++ *q++ = (L << 6) + (x << 3) + 6; // LLxxx110 ++ } ++ store4(q, literal); ++ q += L; ++ } else if (D < 2048 - 2 * 256) { ++ // Short dist D>>8 in 0..5 ++ *q++ = (D >> 8) + (L << 6) + (x << 3); // LLxxxDDD ++ *q++ = D & 0xFF; ++ store4(q, literal); ++ q += L; ++ } else if (D >= (1 << 14) || M == 0 || (x + 3) + M > 34) { ++ // Long dist ++ *q++ = (L << 6) + (x << 3) + 7; ++ store2(q, D); ++ q += 2; ++ store4(q, literal); ++ q += L; ++ } else { ++ // Medium distance ++ x += M; ++ M = 0; ++ *q++ = 0xA0 + (x >> 2) + (L << 3); ++ store2(q, D << 2 | (x & 3)); ++ q += 2; ++ store4(q, literal); ++ q += L; ++ } ++ ++ // Issue remaining match ++ while (M > 15) { ++ if (q + 2 >= q1) ++ goto OUT_FULL; ++ x = M < 271 ? M : 271; ++ store2(q, 0xf0 + ((x - 16) << 8)); ++ q += 2; ++ M -= x; ++ } ++ if (M > 0) { ++ if (q + 1 >= q1) ++ goto OUT_FULL; ++ *q++ = 0xF0 + M; // M = 0..15 ++ } ++ ++ return q; ++ ++OUT_FULL: ++ return q1; ++} ++ ++// =============================================================== ++// Conversions ++ ++/*! @abstract Return 32-bit value to store for offset x. */ ++static inline int32_t offset_to_s32(lzvn_offset x) { return (int32_t)x; } ++ ++/*! @abstract Get offset from 32-bit stored value x. */ ++static inline lzvn_offset offset_from_s32(int32_t x) { return (lzvn_offset)x; } ++ ++// =============================================================== ++// Hash and Matching ++ ++/*! @abstract Get hash in range \c [0,LZVN_ENCODE_HASH_VALUES-1] from 3 bytes in i. */ ++static inline uint32_t hash3i(uint32_t i) { ++ i &= 0xffffff; // truncate to 24-bit input (slightly increases compression ratio) ++ uint32_t h = (i * (1 + (1 << 6) + (1 << 12))) >> 12; ++ return h & (LZVN_ENCODE_HASH_VALUES - 1); ++} ++ ++/*! @abstract Return the number [0, 4] of zero bytes in \p x, starting from the ++ * least significant byte. */ ++static inline lzvn_offset trailing_zero_bytes(uint32_t x) { ++ return (x == 0) ? 4 : (__builtin_ctzl(x) >> 3); ++} ++ ++/*! @abstract Return the number [0, 4] of matching chars between values at ++ * \p src+i and \p src+j, starting from the least significant byte. ++ * Assumes we can read 4 chars from each position. */ ++static inline lzvn_offset nmatch4(const unsigned char *src, lzvn_offset i, ++ lzvn_offset j) { ++ uint32_t vi = load4(src + i); ++ uint32_t vj = load4(src + j); ++ return trailing_zero_bytes(vi ^ vj); ++} ++ ++/*! @abstract Check if l_begin, m_begin, m0_begin (m0_begin < m_begin) can be ++ * expanded to a match of length at least 3. ++ * @param m_begin new string to match. ++ * @param m0_begin candidate old string. ++ * @param src source buffer, with valid indices src_begin <= i < src_end. ++ * (src_begin may be <0) ++ * @return If a match can be found, return 1 and set all \p match fields, ++ * otherwise return 0. ++ * @note \p *match should be 0 before the call. */ ++static inline int lzvn_find_match(const unsigned char *src, ++ lzvn_offset src_begin, ++ lzvn_offset src_end, lzvn_offset l_begin, ++ lzvn_offset m0_begin, lzvn_offset m_begin, ++ lzvn_match_info *match) { ++ lzvn_offset n = nmatch4(src, m_begin, m0_begin); ++ if (n < 3) ++ return 0; // no match ++ ++ lzvn_offset D = m_begin - m0_begin; // actual distance ++ if (D <= 0 || D > LZVN_ENCODE_MAX_DISTANCE) ++ return 0; // distance out of range ++ ++ // Expand forward ++ lzvn_offset m_end = m_begin + n; ++ while (n == 4 && m_end + 4 < src_end) { ++ n = nmatch4(src, m_end, m_end - D); ++ m_end += n; ++ } ++ ++ // Expand backwards over literal ++ while (m0_begin > src_begin && m_begin > l_begin && ++ src[m_begin - 1] == src[m0_begin - 1]) { ++ m0_begin--; ++ m_begin--; ++ } ++ ++ // OK, we keep it, update MATCH ++ lzvn_offset M = m_end - m_begin; // match length ++ match->m_begin = m_begin; ++ match->m_end = m_end; ++ match->K = M - ((D < 0x600) ? 2 : 3); ++ match->M = M; ++ match->D = D; ++ ++ return 1; // OK ++} ++ ++/*! @abstract Same as lzvn_find_match, but we already know that N bytes do ++ * match (N<=4). */ ++static inline int lzvn_find_matchN(const unsigned char *src, ++ lzvn_offset src_begin, ++ lzvn_offset src_end, lzvn_offset l_begin, ++ lzvn_offset m0_begin, lzvn_offset m_begin, ++ lzvn_offset n, lzvn_match_info *match) { ++ // We can skip the first comparison on 4 bytes ++ if (n < 3) ++ return 0; // no match ++ ++ lzvn_offset D = m_begin - m0_begin; // actual distance ++ if (D <= 0 || D > LZVN_ENCODE_MAX_DISTANCE) ++ return 0; // distance out of range ++ ++ // Expand forward ++ lzvn_offset m_end = m_begin + n; ++ while (n == 4 && m_end + 4 < src_end) { ++ n = nmatch4(src, m_end, m_end - D); ++ m_end += n; ++ } ++ ++ // Expand backwards over literal ++ while (m0_begin > src_begin && m_begin > l_begin && ++ src[m_begin - 1] == src[m0_begin - 1]) { ++ m0_begin--; ++ m_begin--; ++ } ++ ++ // OK, we keep it, update MATCH ++ lzvn_offset M = m_end - m_begin; // match length ++ match->m_begin = m_begin; ++ match->m_end = m_end; ++ match->K = M - ((D < 0x600) ? 2 : 3); ++ match->M = M; ++ match->D = D; ++ ++ return 1; // OK ++} ++ ++// =============================================================== ++// Encoder Backend ++ ++/*! @abstract Emit a match and update state. ++ * @return number of bytes written to \p dst. May be 0 if there is no more space ++ * in \p dst to emit the match. */ ++static inline lzvn_offset lzvn_emit_match(lzvn_encoder_state *state, ++ lzvn_match_info match) { ++ size_t L = (size_t)(match.m_begin - state->src_literal); // literal count ++ size_t M = (size_t)match.M; // match length ++ size_t D = (size_t)match.D; // match distance ++ size_t D_prev = (size_t)state->d_prev; // previously emitted match distance ++ unsigned char *dst = emit(state->src + state->src_literal, state->dst, ++ state->dst_end, L, M, D, D_prev); ++ // Check if DST is full ++ if (dst >= state->dst_end) { ++ return 0; // FULL ++ } ++ ++ // Update state ++ lzvn_offset dst_used = dst - state->dst; ++ state->d_prev = match.D; ++ state->dst = dst; ++ state->src_literal = match.m_end; ++ return dst_used; ++} ++ ++/*! @abstract Emit a n-bytes literal and update state. ++ * @return number of bytes written to \p dst. May be 0 if there is no more space ++ * in \p dst to emit the literal. */ ++static inline lzvn_offset lzvn_emit_literal(lzvn_encoder_state *state, ++ lzvn_offset n) { ++ size_t L = (size_t)n; ++ unsigned char *dst = emit_literal(state->src + state->src_literal, state->dst, ++ state->dst_end, L); ++ // Check if DST is full ++ if (dst >= state->dst_end) ++ return 0; // FULL ++ ++ // Update state ++ lzvn_offset dst_used = dst - state->dst; ++ state->dst = dst; ++ state->src_literal += n; ++ return dst_used; ++} ++ ++/*! @abstract Emit end-of-stream and update state. ++ * @return number of bytes written to \p dst. May be 0 if there is no more space ++ * in \p dst to emit the instruction. */ ++static inline lzvn_offset lzvn_emit_end_of_stream(lzvn_encoder_state *state) { ++ // Do we have 8 byte in dst? ++ if (state->dst_end < state->dst + 8) ++ return 0; // FULL ++ ++ // Insert end marker and update state ++ store8(state->dst, 0x06); // end-of-stream command ++ state->dst += 8; ++ return 8; // dst_used ++} ++ ++// =============================================================== ++// Encoder Functions ++ ++/*! @abstract Initialize encoder table in \p state, uses current I/O parameters. */ ++static inline void lzvn_init_table(lzvn_encoder_state *state) { ++ lzvn_offset index = -LZVN_ENCODE_MAX_DISTANCE; // max match distance ++ if (index < state->src_begin) ++ index = state->src_begin; ++ uint32_t value = load4(state->src + index); ++ ++ lzvn_encode_entry_type e; ++ for (int i = 0; i < 4; i++) { ++ e.indices[i] = offset_to_s32(index); ++ e.values[i] = value; ++ } ++ for (int u = 0; u < LZVN_ENCODE_HASH_VALUES; u++) ++ state->table[u] = e; // fill entire table ++} ++ ++void lzvn_encode(lzvn_encoder_state *state) { ++ const lzvn_match_info NO_MATCH = {0}; ++ ++ for (; state->src_current < state->src_current_end; state->src_current++) { ++ // Get 4 bytes at src_current ++ uint32_t vi = load4(state->src + state->src_current); ++ ++ // Compute new hash H at position I, and push value into position table ++ int h = hash3i(vi); // index of first entry ++ ++ // Read table entries for H ++ lzvn_encode_entry_type e = state->table[h]; ++ ++ // Update entry with index=current and value=vi ++ lzvn_encode_entry_type updated_e; // rotate values, so we will replace the oldest ++ updated_e.indices[0] = offset_to_s32(state->src_current); ++ updated_e.indices[1] = e.indices[0]; ++ updated_e.indices[2] = e.indices[1]; ++ updated_e.indices[3] = e.indices[2]; ++ updated_e.values[0] = vi; ++ updated_e.values[1] = e.values[0]; ++ updated_e.values[2] = e.values[1]; ++ updated_e.values[3] = e.values[2]; ++ ++ // Do not check matches if still in previously emitted match ++ if (state->src_current < state->src_literal) ++ goto after_emit; ++ ++// Update best with candidate if better ++#define UPDATE(best, candidate) \ ++ do { \ ++ if (candidate.K > best.K || \ ++ ((candidate.K == best.K) && (candidate.m_end > best.m_end + 1))) { \ ++ best = candidate; \ ++ } \ ++ } while (0) ++// Check candidate. Keep if better. ++#define CHECK_CANDIDATE(ik, nk) \ ++ do { \ ++ lzvn_match_info m1; \ ++ if (lzvn_find_matchN(state->src, state->src_begin, state->src_end, \ ++ state->src_literal, ik, state->src_current, nk, &m1)) { \ ++ UPDATE(incoming, m1); \ ++ } \ ++ } while (0) ++// Emit match M. Return if we don't have enough space in the destination buffer ++#define EMIT_MATCH(m) \ ++ do { \ ++ if (lzvn_emit_match(state, m) == 0) \ ++ return; \ ++ } while (0) ++// Emit literal of length L. Return if we don't have enough space in the ++// destination buffer ++#define EMIT_LITERAL(l) \ ++ do { \ ++ if (lzvn_emit_literal(state, l) == 0) \ ++ return; \ ++ } while (0) ++ ++ lzvn_match_info incoming = NO_MATCH; ++ ++ // Check candidates in order (closest first) ++ uint32_t diffs[4]; ++ for (int k = 0; k < 4; k++) ++ diffs[k] = e.values[k] ^ vi; // XOR, 0 if equal ++ lzvn_offset ik; // index ++ lzvn_offset nk; // match byte count ++ ++ // The values stored in e.xyzw are 32-bit signed indices, extended to signed ++ // type lzvn_offset ++ ik = offset_from_s32(e.indices[0]); ++ nk = trailing_zero_bytes(diffs[0]); ++ CHECK_CANDIDATE(ik, nk); ++ ik = offset_from_s32(e.indices[1]); ++ nk = trailing_zero_bytes(diffs[1]); ++ CHECK_CANDIDATE(ik, nk); ++ ik = offset_from_s32(e.indices[2]); ++ nk = trailing_zero_bytes(diffs[2]); ++ CHECK_CANDIDATE(ik, nk); ++ ik = offset_from_s32(e.indices[3]); ++ nk = trailing_zero_bytes(diffs[3]); ++ CHECK_CANDIDATE(ik, nk); ++ ++ // Check candidate at previous distance ++ if (state->d_prev != 0) { ++ lzvn_match_info m1; ++ if (lzvn_find_match(state->src, state->src_begin, state->src_end, ++ state->src_literal, state->src_current - state->d_prev, ++ state->src_current, &m1)) { ++ m1.K = m1.M - 1; // fix K for D_prev ++ UPDATE(incoming, m1); ++ } ++ } ++ ++ // Here we have the best candidate in incoming, may be NO_MATCH ++ ++ // If no incoming match, and literal backlog becomes too high, emit pending ++ // match, or literals if there is no pending match ++ if (incoming.M == 0) { ++ if (state->src_current - state->src_literal >= ++ LZVN_ENCODE_MAX_LITERAL_BACKLOG) // at this point, we always have ++ // current >= literal ++ { ++ if (state->pending.M != 0) { ++ EMIT_MATCH(state->pending); ++ state->pending = NO_MATCH; ++ } else { ++ EMIT_LITERAL(271); // emit long literal (271 is the longest literal size we allow) ++ } ++ } ++ goto after_emit; ++ } ++ ++ if (state->pending.M == 0) { ++ // NOTE. Here, we can also emit incoming right away. It will make the ++ // encoder 1.5x faster, at a cost of ~10% lower compression ratio: ++ // EMIT_MATCH(incoming); ++ // state->pending = NO_MATCH; ++ ++ // No pending match, emit nothing, keep incoming ++ state->pending = incoming; ++ } else { ++ // Here we have both incoming and pending ++ if (state->pending.m_end <= incoming.m_begin) { ++ // No overlap: emit pending, keep incoming ++ EMIT_MATCH(state->pending); ++ state->pending = incoming; ++ } else { ++ // If pending is better, emit pending and discard incoming. ++ // Otherwise, emit incoming and discard pending. ++ if (incoming.K > state->pending.K) ++ state->pending = incoming; ++ EMIT_MATCH(state->pending); ++ state->pending = NO_MATCH; ++ } ++ } ++ ++ after_emit: ++ ++ // We commit state changes only after we tried to emit instructions, so we ++ // can restart in the same state in case dst was full and we quit the loop. ++ state->table[h] = updated_e; ++ ++ } // i loop ++ ++ // Do not emit pending match here. We do it only at the end of stream. ++} ++ ++// =============================================================== ++// API entry points ++ ++size_t lzvn_encode_scratch_size(void) { return LZVN_ENCODE_WORK_SIZE; } ++ ++static size_t lzvn_encode_partial(void *__restrict dst, size_t dst_size, ++ const void *__restrict src, size_t src_size, ++ size_t *src_used, void *__restrict work) { ++ // Min size checks to avoid accessing memory outside buffers. ++ if (dst_size < LZVN_ENCODE_MIN_DST_SIZE) { ++ *src_used = 0; ++ return 0; ++ } ++ // Max input size check (limit to offsets on uint32_t). ++ if (src_size > LZVN_ENCODE_MAX_SRC_SIZE) { ++ src_size = LZVN_ENCODE_MAX_SRC_SIZE; ++ } ++ ++ // Setup encoder state ++ lzvn_encoder_state state; ++ memset(&state, 0, sizeof(state)); ++ ++ state.src = src; ++ state.src_begin = 0; ++ state.src_end = (lzvn_offset)src_size; ++ state.src_literal = 0; ++ state.src_current = 0; ++ state.dst = dst; ++ state.dst_begin = dst; ++ state.dst_end = (unsigned char *)dst + dst_size - 8; // reserve 8 bytes for end-of-stream ++ state.table = work; ++ ++ // Do not encode if the input buffer is too small. We'll emit a literal instead. ++ if (src_size >= LZVN_ENCODE_MIN_SRC_SIZE) { ++ ++ state.src_current_end = (lzvn_offset)src_size - LZVN_ENCODE_MIN_MARGIN; ++ lzvn_init_table(&state); ++ lzvn_encode(&state); ++ ++ } ++ ++ // No need to test the return value: src_literal will not be updated on failure, ++ // and we will fail later. ++ lzvn_emit_literal(&state, state.src_end - state.src_literal); ++ ++ // Restore original size, so end-of-stream always succeeds, and emit it ++ state.dst_end = (unsigned char *)dst + dst_size; ++ lzvn_emit_end_of_stream(&state); ++ ++ *src_used = state.src_literal; ++ return (size_t)(state.dst - state.dst_begin); ++} ++ ++size_t lzvn_encode_buffer(void *__restrict dst, size_t dst_size, ++ const void *__restrict src, size_t src_size, ++ void *__restrict work) { ++ size_t src_used = 0; ++ size_t dst_used = ++ lzvn_encode_partial(dst, dst_size, src, src_size, &src_used, work); ++ if (src_used != src_size) ++ return 0; // could not encode entire input stream = fail ++ return dst_used; // return encoded size ++} +diff --git a/fs/apfs/lzfse/lzvn_encode_base.h b/fs/apfs/lzfse/lzvn_encode_base.h +new file mode 100644 +index 000000000..308cd4f72 +--- /dev/null ++++ b/fs/apfs/lzfse/lzvn_encode_base.h +@@ -0,0 +1,116 @@ ++/* ++Copyright (c) 2015-2016, Apple Inc. All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ++ ++1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ++ ++2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer ++ in the documentation and/or other materials provided with the distribution. ++ ++3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++*/ ++ ++// LZVN low-level encoder ++ ++#ifndef LZVN_ENCODE_BASE_H ++#define LZVN_ENCODE_BASE_H ++ ++#include "lzfse_internal.h" ++ ++// =============================================================== ++// Types and Constants ++ ++#define LZVN_ENCODE_HASH_BITS \ ++ 14 // number of bits returned by the hash function [10, 16] ++#define LZVN_ENCODE_OFFSETS_PER_HASH \ ++ 4 // stored offsets stack for each hash value, MUST be 4 ++#define LZVN_ENCODE_HASH_VALUES \ ++ (1 << LZVN_ENCODE_HASH_BITS) // number of entries in hash table ++#define LZVN_ENCODE_MAX_DISTANCE \ ++ 0xffff // max match distance we can represent with LZVN encoding, MUST be ++ // 0xFFFF ++#define LZVN_ENCODE_MIN_MARGIN \ ++ 8 // min number of bytes required between current and end during encoding, ++ // MUST be >= 8 ++#define LZVN_ENCODE_MAX_LITERAL_BACKLOG \ ++ 400 // if the number of pending literals exceeds this size, emit a long ++ // literal, MUST be >= 271 ++ ++/*! @abstract Type of table entry. */ ++typedef struct { ++ int32_t indices[4]; // signed indices in source buffer ++ uint32_t values[4]; // corresponding 32-bit values ++} lzvn_encode_entry_type; ++ ++// Work size ++#define LZVN_ENCODE_WORK_SIZE \ ++ (LZVN_ENCODE_HASH_VALUES * sizeof(lzvn_encode_entry_type)) ++ ++/*! @abstract Match */ ++typedef struct { ++ lzvn_offset m_begin; // beginning of match, current position ++ lzvn_offset m_end; // end of match, this is where the next literal would begin ++ // if we emit the entire match ++ lzvn_offset M; // match length M: m_end - m_begin ++ lzvn_offset D; // match distance D ++ lzvn_offset K; // match gain: M - distance storage (L not included) ++} lzvn_match_info; ++ ++// =============================================================== ++// Internal encoder state ++ ++/*! @abstract Base encoder state and I/O. */ ++typedef struct { ++ ++ // Encoder I/O ++ ++ // Source buffer ++ const unsigned char *src; ++ // Valid range in source buffer: we can access src[i] for src_begin <= i < ++ // src_end. src_begin may be negative. ++ lzvn_offset src_begin; ++ lzvn_offset src_end; ++ // Next byte to process in source buffer ++ lzvn_offset src_current; ++ // Next byte after the last byte to process in source buffer. We MUST have: ++ // src_current_end + 8 <= src_end. ++ lzvn_offset src_current_end; ++ // Next byte to encode in source buffer, may be before or after src_current. ++ lzvn_offset src_literal; ++ ++ // Next byte to write in destination buffer ++ unsigned char *dst; ++ // Valid range in destination buffer: [dst_begin, dst_end - 1] ++ unsigned char *dst_begin; ++ unsigned char *dst_end; ++ ++ // Encoder state ++ ++ // Pending match ++ lzvn_match_info pending; ++ ++ // Distance for last emitted match, or 0 ++ lzvn_offset d_prev; ++ ++ // Hash table used to find matches. Stores LZVN_ENCODE_OFFSETS_PER_HASH 32-bit ++ // signed indices in the source buffer, and the corresponding 4-byte values. ++ // The number of entries in the table is LZVN_ENCODE_HASH_VALUES. ++ lzvn_encode_entry_type *table; ++ ++} lzvn_encoder_state; ++ ++/*! @abstract Encode source to destination. ++ * Update \p state. ++ * The call ensures \c src_literal is never left too far behind \c src_current. */ ++void lzvn_encode(lzvn_encoder_state *state); ++ ++#endif // LZVN_ENCODE_BASE_H +diff --git a/fs/apfs/message.c b/fs/apfs/message.c +new file mode 100644 +index 000000000..fdd74ccb8 +--- /dev/null ++++ b/fs/apfs/message.c +@@ -0,0 +1,29 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (C) 2018 Ernesto A. Fernández ++ */ ++ ++#include ++#include "apfs.h" ++ ++void apfs_msg(struct super_block *sb, const char *prefix, const char *func, int line, const char *fmt, ...) ++{ ++ char *sb_id = NULL; ++ struct va_format vaf; ++ va_list args; ++ ++ va_start(args, fmt); ++ ++ vaf.fmt = fmt; ++ vaf.va = &args; ++ ++ /* The superblock is not available to all callers */ ++ sb_id = sb ? sb->s_id : "?"; ++ ++ if (func) ++ printk("%sAPFS (%s): %pV (%s:%d)\n", prefix, sb_id, &vaf, func, line); ++ else ++ printk("%sAPFS (%s): %pV\n", prefix, sb_id, &vaf); ++ ++ va_end(args); ++} +diff --git a/fs/apfs/namei.c b/fs/apfs/namei.c +new file mode 100644 +index 000000000..e17638863 +--- /dev/null ++++ b/fs/apfs/namei.c +@@ -0,0 +1,142 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (C) 2018 Ernesto A. Fernández ++ */ ++ ++#include ++#include "apfs.h" ++#include "unicode.h" ++ ++static struct dentry *apfs_lookup(struct inode *dir, struct dentry *dentry, ++ unsigned int flags) ++{ ++ struct inode *inode = NULL; ++ u64 ino = 0; ++ int err; ++ ++ if (dentry->d_name.len > APFS_NAME_LEN) ++ return ERR_PTR(-ENAMETOOLONG); ++ ++ err = apfs_inode_by_name(dir, &dentry->d_name, &ino); ++ if (err && err != -ENODATA) { ++ apfs_err(dir->i_sb, "inode lookup by name failed"); ++ return ERR_PTR(err); ++ } ++ ++ if (!err) { ++ inode = apfs_iget(dir->i_sb, ino); ++ if (IS_ERR(inode)) { ++ apfs_err(dir->i_sb, "iget failed"); ++ return ERR_CAST(inode); ++ } ++ } ++ ++ return d_splice_alias(inode, dentry); ++} ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0) ++static int apfs_symlink(struct inode *dir, struct dentry *dentry, ++ const char *symname) ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 3, 0) ++static int apfs_symlink(struct user_namespace *mnt_userns, struct inode *dir, ++ struct dentry *dentry, const char *symname) ++#else ++static int apfs_symlink(struct mnt_idmap *idmap, struct inode *dir, ++ struct dentry *dentry, const char *symname) ++#endif ++{ ++ /* Symlink permissions don't mean anything and their value is fixed */ ++ return apfs_mkany(dir, dentry, S_IFLNK | 0x1ed, 0 /* rdev */, symname); ++} ++ ++const struct inode_operations apfs_dir_inode_operations = { ++ .create = apfs_create, ++ .lookup = apfs_lookup, ++ .link = apfs_link, ++ .unlink = apfs_unlink, ++ .symlink = apfs_symlink, ++ .mkdir = apfs_mkdir, ++ .rmdir = apfs_rmdir, ++ .mknod = apfs_mknod, ++ .rename = apfs_rename, ++ .getattr = apfs_getattr, ++ .listxattr = apfs_listxattr, ++ .setattr = apfs_setattr, ++ .update_time = apfs_update_time, ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 13, 0) ++ .fileattr_get = apfs_fileattr_get, ++ .fileattr_set = apfs_fileattr_set, ++#endif ++}; ++ ++const struct inode_operations apfs_special_inode_operations = { ++ .getattr = apfs_getattr, ++ .listxattr = apfs_listxattr, ++ .setattr = apfs_setattr, ++ .update_time = apfs_update_time, ++}; ++ ++static int apfs_dentry_hash(const struct dentry *dir, struct qstr *child) ++{ ++ struct apfs_unicursor cursor; ++ unsigned long hash; ++ bool case_fold = apfs_is_case_insensitive(dir->d_sb); ++ ++ if (!apfs_is_normalization_insensitive(dir->d_sb)) ++ return 0; ++ ++ apfs_init_unicursor(&cursor, child->name, child->len); ++ hash = init_name_hash(dir); ++ ++ while (1) { ++ int i; ++ unicode_t utf32; ++ ++ utf32 = apfs_normalize_next(&cursor, case_fold); ++ if (!utf32) ++ break; ++ ++ /* Hash the unicode character one byte at a time */ ++ for (i = 0; i < 4; ++i) { ++ hash = partial_name_hash((u8)utf32, hash); ++ utf32 = utf32 >> 8; ++ } ++ } ++ child->hash = end_name_hash(hash); ++ ++ /* TODO: return error instead of truncating invalid UTF-8? */ ++ return 0; ++} ++ ++static int apfs_dentry_compare(const struct dentry *dentry, unsigned int len, ++ const char *str, const struct qstr *name) ++{ ++ return apfs_filename_cmp(dentry->d_sb, name->name, name->len, str, len); ++} ++ ++static int apfs_dentry_revalidate(struct dentry *dentry, unsigned int flags) ++{ ++ struct super_block *sb = dentry->d_sb; ++ ++ if (flags & LOOKUP_RCU) ++ return -ECHILD; ++ ++ /* ++ * If we want to create a link with a name that normalizes to the same ++ * as an existing negative dentry, then we first need to invalidate the ++ * dentry; otherwise it would keep the existing name. ++ */ ++ if (d_really_is_positive(dentry)) ++ return 1; ++ if (!apfs_is_case_insensitive(sb) && !apfs_is_normalization_insensitive(sb)) ++ return 1; ++ if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) ++ return 0; ++ return 1; ++} ++ ++const struct dentry_operations apfs_dentry_operations = { ++ .d_revalidate = apfs_dentry_revalidate, ++ .d_hash = apfs_dentry_hash, ++ .d_compare = apfs_dentry_compare, ++}; +diff --git a/fs/apfs/node.c b/fs/apfs/node.c +new file mode 100644 +index 000000000..2104177a3 +--- /dev/null ++++ b/fs/apfs/node.c +@@ -0,0 +1,2069 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (C) 2018 Ernesto A. Fernández ++ */ ++ ++#include ++#include ++#include "apfs.h" ++ ++/** ++ * apfs_node_is_valid - Check basic sanity of the node index ++ * @sb: filesystem superblock ++ * @node: node to check ++ * ++ * Verifies that the node index fits in a single block, and that the number ++ * of records fits in the index. Without this check a crafted filesystem could ++ * pretend to have too many records, and calls to apfs_node_locate_key() and ++ * apfs_node_locate_data() would read beyond the limits of the node. ++ */ ++static bool apfs_node_is_valid(struct super_block *sb, ++ struct apfs_node *node) ++{ ++ u32 records = node->records; ++ int index_size = node->key - sizeof(struct apfs_btree_node_phys); ++ int entry_size; ++ ++ if (node->key > sb->s_blocksize) ++ return false; ++ ++ entry_size = (apfs_node_has_fixed_kv_size(node)) ? ++ sizeof(struct apfs_kvoff) : sizeof(struct apfs_kvloc); ++ ++ /* Coarse bound to prevent multiplication overflow in final check */ ++ if (records > 1 << 16) ++ return false; ++ ++ return records * entry_size <= index_size; ++} ++ ++void apfs_node_free(struct apfs_node *node) ++{ ++ struct apfs_object *obj = NULL; ++ ++ if (!node) ++ return; ++ obj = &node->object; ++ ++ if (obj->o_bh) { ++ brelse(obj->o_bh); ++ obj->o_bh = NULL; ++ } else if (!obj->ephemeral) { ++ /* Ephemeral data always remains in memory */ ++ kfree(obj->data); ++ } ++ obj->data = NULL; ++ ++ kfree(node); ++} ++ ++/** ++ * apfs_read_node - Read a node header from disk ++ * @sb: filesystem superblock ++ * @oid: object id for the node ++ * @storage: storage type for the node object ++ * @write: request write access? ++ * ++ * Returns ERR_PTR in case of failure, otherwise return a pointer to the ++ * resulting apfs_node structure with the initial reference taken. ++ * ++ * For now we assume the node has not been read before. ++ */ ++struct apfs_node *apfs_read_node(struct super_block *sb, u64 oid, u32 storage, ++ bool write) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct buffer_head *bh = NULL; ++ struct apfs_ephemeral_object_info *eph_info = NULL; ++ struct apfs_btree_node_phys *raw = NULL; ++ struct apfs_node *node = NULL; ++ struct apfs_nloc *free_head = NULL; ++ u64 bno; ++ int err; ++ ++ switch (storage) { ++ case APFS_OBJ_VIRTUAL: ++ /* All virtual nodes are inside a volume, at least for now */ ++ err = apfs_omap_lookup_block(sb, sbi->s_omap, oid, &bno, write); ++ if (err) { ++ apfs_err(sb, "omap lookup failed for oid 0x%llx", oid); ++ return ERR_PTR(err); ++ } ++ /* CoW has already been done, don't worry about snapshots */ ++ bh = apfs_read_object_block(sb, bno, write, false /* preserve */); ++ if (IS_ERR(bh)) { ++ apfs_err(sb, "object read failed for bno 0x%llx", bno); ++ return (void *)bh; ++ } ++ bno = bh->b_blocknr; ++ raw = (struct apfs_btree_node_phys *)bh->b_data; ++ break; ++ case APFS_OBJ_PHYSICAL: ++ bh = apfs_read_object_block(sb, oid, write, false /* preserve */); ++ if (IS_ERR(bh)) { ++ apfs_err(sb, "object read failed for bno 0x%llx", oid); ++ return (void *)bh; ++ } ++ bno = oid = bh->b_blocknr; ++ raw = (struct apfs_btree_node_phys *)bh->b_data; ++ break; ++ case APFS_OBJ_EPHEMERAL: ++ /* Ephemeral objects are already in memory */ ++ eph_info = apfs_ephemeral_object_lookup(sb, oid); ++ if (IS_ERR(eph_info)) { ++ apfs_err(sb, "no ephemeral node for oid 0x%llx", oid); ++ return (void *)eph_info; ++ } ++ if (eph_info->size != sb->s_blocksize) { ++ apfs_err(sb, "unsupported size for ephemeral node (%u)", eph_info->size); ++ return ERR_PTR(-EOPNOTSUPP); ++ } ++ bno = 0; /* In memory, so meaningless */ ++ raw = eph_info->object; ++ /* Only for consistency, will happen again on commit */ ++ if (write) ++ raw->btn_o.o_xid = cpu_to_le64(nxi->nx_xid); ++ break; ++ default: ++ apfs_alert(sb, "invalid storage type %u - bug!", storage); ++ return ERR_PTR(-EINVAL); ++ } ++ ++ node = kmalloc(sizeof(*node), GFP_KERNEL); ++ if (!node) { ++ brelse(bh); ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ node->tree_type = le32_to_cpu(raw->btn_o.o_subtype); ++ node->flags = le16_to_cpu(raw->btn_flags); ++ node->records = le32_to_cpu(raw->btn_nkeys); ++ node->key = sizeof(*raw) + le16_to_cpu(raw->btn_table_space.off) ++ + le16_to_cpu(raw->btn_table_space.len); ++ node->free = node->key + le16_to_cpu(raw->btn_free_space.off); ++ node->data = node->free + le16_to_cpu(raw->btn_free_space.len); ++ ++ free_head = &raw->btn_key_free_list; ++ node->key_free_list_len = le16_to_cpu(free_head->len); ++ free_head = &raw->btn_val_free_list; ++ node->val_free_list_len = le16_to_cpu(free_head->len); ++ ++ node->object.sb = sb; ++ node->object.block_nr = bno; ++ node->object.oid = oid; ++ node->object.o_bh = bh; ++ node->object.data = (char *)raw; ++ node->object.ephemeral = !bh; ++ ++ /* Ephemeral objects already got checked on mount */ ++ if (!node->object.ephemeral && nxi->nx_flags & APFS_CHECK_NODES && !apfs_obj_verify_csum(sb, bh)) { ++ /* TODO: don't check this twice for virtual/physical objects */ ++ apfs_err(sb, "bad checksum for node in block 0x%llx", (unsigned long long)bno); ++ apfs_node_free(node); ++ return ERR_PTR(-EFSBADCRC); ++ } ++ if (!apfs_node_is_valid(sb, node)) { ++ apfs_err(sb, "bad node in block 0x%llx", (unsigned long long)bno); ++ apfs_node_free(node); ++ return ERR_PTR(-EFSCORRUPTED); ++ } ++ ++ return node; ++} ++ ++/** ++ * apfs_node_min_table_size - Return the minimum size for a node's toc ++ * @sb: superblock structure ++ * @type: tree type for the node ++ * @flags: flags for the node ++ */ ++static int apfs_node_min_table_size(struct super_block *sb, u32 type, u16 flags) ++{ ++ bool leaf = flags & APFS_BTNODE_LEAF; ++ int key_size, val_size, toc_size; ++ int space, count; ++ ++ /* Preallocate the whole table for trees with fixed key/value sizes */ ++ switch (type) { ++ case APFS_OBJECT_TYPE_OMAP: ++ key_size = sizeof(struct apfs_omap_key); ++ val_size = leaf ? sizeof(struct apfs_omap_val) : sizeof(__le64); ++ toc_size = sizeof(struct apfs_kvoff); ++ break; ++ case APFS_OBJECT_TYPE_SPACEMAN_FREE_QUEUE: ++ key_size = sizeof(struct apfs_spaceman_free_queue_key); ++ val_size = sizeof(__le64); /* We assume no ghosts here */ ++ toc_size = sizeof(struct apfs_kvoff); ++ break; ++ case APFS_OBJECT_TYPE_OMAP_SNAPSHOT: ++ key_size = sizeof(__le64); ++ val_size = leaf ? sizeof(struct apfs_omap_snapshot) : sizeof(__le64); ++ toc_size = sizeof(struct apfs_kvoff); ++ break; ++ case APFS_OBJECT_TYPE_FEXT_TREE: ++ key_size = sizeof(struct apfs_fext_tree_key); ++ val_size = leaf ? sizeof(struct apfs_fext_tree_val) : sizeof(__le64); ++ toc_size = sizeof(struct apfs_kvoff); ++ break; ++ default: ++ /* Make room for one record at least */ ++ toc_size = sizeof(struct apfs_kvloc); ++ return APFS_BTREE_TOC_ENTRY_INCREMENT * toc_size; ++ } ++ ++ /* The footer of root nodes is ignored for some reason */ ++ space = sb->s_blocksize - sizeof(struct apfs_btree_node_phys); ++ count = space / (key_size + val_size + toc_size); ++ return count * toc_size; ++} ++ ++/** ++ * apfs_set_empty_btree_info - Set the info footer for an empty b-tree node ++ * @sb: filesystem superblock ++ * @info: pointer to the on-disk info footer ++ * @subtype: subtype of the root node, i.e., tree type ++ * ++ * For now only supports the extent reference tree. ++ */ ++static void apfs_set_empty_btree_info(struct super_block *sb, struct apfs_btree_info *info, u32 subtype) ++{ ++ u32 flags; ++ ++ ASSERT(subtype == APFS_OBJECT_TYPE_BLOCKREFTREE || subtype == APFS_OBJECT_TYPE_OMAP_SNAPSHOT); ++ ++ memset(info, 0, sizeof(*info)); ++ ++ flags = APFS_BTREE_PHYSICAL; ++ if (subtype == APFS_OBJECT_TYPE_BLOCKREFTREE) ++ flags |= APFS_BTREE_KV_NONALIGNED; ++ ++ info->bt_fixed.bt_flags = cpu_to_le32(flags); ++ info->bt_fixed.bt_node_size = cpu_to_le32(sb->s_blocksize); ++ info->bt_key_count = 0; ++ info->bt_node_count = cpu_to_le64(1); /* Only one node: the root */ ++ if (subtype == APFS_OBJECT_TYPE_BLOCKREFTREE) ++ return; ++ ++ info->bt_fixed.bt_key_size = cpu_to_le32(8); ++ info->bt_longest_key = info->bt_fixed.bt_key_size; ++ info->bt_fixed.bt_val_size = cpu_to_le32(sizeof(struct apfs_omap_snapshot)); ++ info->bt_longest_val = info->bt_fixed.bt_val_size; ++} ++ ++/** ++ * apfs_make_empty_btree_root - Make an empty root for a b-tree ++ * @sb: filesystem superblock ++ * @subtype: subtype of the root node, i.e., tree type ++ * @oid: on return, the root's object id ++ * ++ * For now only supports the extent reference tree and an omap's snapshot tree. ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++int apfs_make_empty_btree_root(struct super_block *sb, u32 subtype, u64 *oid) ++{ ++ struct apfs_superblock *vsb_raw = APFS_SB(sb)->s_vsb_raw; ++ struct apfs_btree_node_phys *root = NULL; ++ struct buffer_head *bh = NULL; ++ u64 bno; ++ u16 flags; ++ int toc_len, free_len, head_len, info_len; ++ int err; ++ ++ ASSERT(subtype == APFS_OBJECT_TYPE_BLOCKREFTREE || subtype == APFS_OBJECT_TYPE_OMAP_SNAPSHOT); ++ ++ err = apfs_spaceman_allocate_block(sb, &bno, true /* backwards */); ++ if (err) { ++ apfs_err(sb, "block allocation failed"); ++ return err; ++ } ++ apfs_assert_in_transaction(sb, &vsb_raw->apfs_o); ++ le64_add_cpu(&vsb_raw->apfs_fs_alloc_count, 1); ++ le64_add_cpu(&vsb_raw->apfs_total_blocks_alloced, 1); ++ ++ bh = apfs_getblk(sb, bno); ++ if (!bh) ++ return -EIO; ++ root = (void *)bh->b_data; ++ err = apfs_transaction_join(sb, bh); ++ if (err) ++ goto fail; ++ set_buffer_csum(bh); ++ ++ flags = APFS_BTNODE_ROOT | APFS_BTNODE_LEAF; ++ if (subtype == APFS_OBJECT_TYPE_OMAP_SNAPSHOT) ++ flags |= APFS_BTNODE_FIXED_KV_SIZE; ++ root->btn_flags = cpu_to_le16(flags); ++ ++ toc_len = apfs_node_min_table_size(sb, subtype, flags); ++ head_len = sizeof(*root); ++ info_len = sizeof(struct apfs_btree_info); ++ free_len = sb->s_blocksize - head_len - toc_len - info_len; ++ ++ root->btn_level = 0; /* Root */ ++ ++ /* No keys and no values, so this is straightforward */ ++ root->btn_nkeys = 0; ++ root->btn_table_space.off = 0; ++ root->btn_table_space.len = cpu_to_le16(toc_len); ++ root->btn_free_space.off = 0; ++ root->btn_free_space.len = cpu_to_le16(free_len); ++ ++ /* No fragmentation */ ++ root->btn_key_free_list.off = cpu_to_le16(APFS_BTOFF_INVALID); ++ root->btn_key_free_list.len = 0; ++ root->btn_val_free_list.off = cpu_to_le16(APFS_BTOFF_INVALID); ++ root->btn_val_free_list.len = 0; ++ ++ apfs_set_empty_btree_info(sb, (void *)root + sb->s_blocksize - info_len, subtype); ++ ++ root->btn_o.o_oid = cpu_to_le64(bno); ++ root->btn_o.o_xid = cpu_to_le64(APFS_NXI(sb)->nx_xid); ++ root->btn_o.o_type = cpu_to_le32(APFS_OBJECT_TYPE_BTREE | APFS_OBJ_PHYSICAL); ++ root->btn_o.o_subtype = cpu_to_le32(subtype); ++ ++ *oid = bno; ++ err = 0; ++fail: ++ root = NULL; ++ brelse(bh); ++ bh = NULL; ++ return err; ++} ++ ++/** ++ * apfs_create_node - Allocates a new nonroot b-tree node on disk ++ * @sb: filesystem superblock ++ * @storage: storage type for the node object ++ * ++ * On success returns a pointer to the new in-memory node structure; the object ++ * header is initialized, and the node fields are given reasonable defaults. ++ * On failure, returns an error pointer. ++ */ ++static struct apfs_node *apfs_create_node(struct super_block *sb, u32 storage) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_nx_superblock *msb_raw = nxi->nx_raw; ++ struct apfs_superblock *vsb_raw = sbi->s_vsb_raw; ++ struct apfs_ephemeral_object_info *eph_info = NULL; ++ struct apfs_node *node = NULL; ++ struct buffer_head *bh = NULL; ++ struct apfs_btree_node_phys *raw = NULL; ++ u64 bno, oid; ++ int err; ++ ++ switch (storage) { ++ case APFS_OBJ_VIRTUAL: ++ err = apfs_spaceman_allocate_block(sb, &bno, true /* backwards */); ++ if (err) { ++ apfs_err(sb, "block allocation failed"); ++ return ERR_PTR(err); ++ } ++ apfs_assert_in_transaction(sb, &vsb_raw->apfs_o); ++ le64_add_cpu(&vsb_raw->apfs_fs_alloc_count, 1); ++ le64_add_cpu(&vsb_raw->apfs_total_blocks_alloced, 1); ++ ++ oid = le64_to_cpu(msb_raw->nx_next_oid); ++ le64_add_cpu(&msb_raw->nx_next_oid, 1); ++ err = apfs_create_omap_rec(sb, oid, bno); ++ if (err) { ++ apfs_err(sb, "omap rec creation failed (0x%llx-0x%llx)", oid, bno); ++ return ERR_PTR(err); ++ } ++ break; ++ case APFS_OBJ_PHYSICAL: ++ err = apfs_spaceman_allocate_block(sb, &bno, true /* backwards */); ++ if (err) { ++ apfs_err(sb, "block allocation failed"); ++ return ERR_PTR(err); ++ } ++ /* We don't write to the container's omap */ ++ apfs_assert_in_transaction(sb, &vsb_raw->apfs_o); ++ le64_add_cpu(&vsb_raw->apfs_fs_alloc_count, 1); ++ le64_add_cpu(&vsb_raw->apfs_total_blocks_alloced, 1); ++ oid = bno; ++ break; ++ case APFS_OBJ_EPHEMERAL: ++ if (nxi->nx_eph_count >= APFS_EPHEMERAL_LIST_LIMIT) { ++ apfs_err(sb, "creating too many ephemeral objects?"); ++ return ERR_PTR(-EOPNOTSUPP); ++ } ++ eph_info = &nxi->nx_eph_list[nxi->nx_eph_count++]; ++ eph_info->object = kzalloc(sb->s_blocksize, GFP_KERNEL); ++ if (!eph_info->object) ++ return ERR_PTR(-ENOMEM); ++ eph_info->size = sb->s_blocksize; ++ oid = eph_info->oid = le64_to_cpu(msb_raw->nx_next_oid); ++ le64_add_cpu(&msb_raw->nx_next_oid, 1); ++ break; ++ default: ++ apfs_alert(sb, "invalid storage type %u - bug!", storage); ++ return ERR_PTR(-EINVAL); ++ } ++ ++ if (storage == APFS_OBJ_EPHEMERAL) { ++ bh = NULL; ++ bno = 0; ++ raw = eph_info->object; ++ } else { ++ bh = apfs_getblk(sb, bno); ++ if (!bh) ++ return ERR_PTR(-EIO); ++ bno = bh->b_blocknr; ++ raw = (void *)bh->b_data; ++ err = apfs_transaction_join(sb, bh); ++ if (err) ++ goto fail; ++ set_buffer_csum(bh); ++ } ++ ++ /* Set most of the object header, but the subtype is up to the caller */ ++ raw->btn_o.o_oid = cpu_to_le64(oid); ++ raw->btn_o.o_xid = cpu_to_le64(nxi->nx_xid); ++ raw->btn_o.o_type = cpu_to_le32(storage | APFS_OBJECT_TYPE_BTREE_NODE); ++ raw->btn_o.o_subtype = 0; ++ ++ /* The caller is expected to change most node fields */ ++ raw->btn_flags = 0; ++ raw->btn_level = 0; ++ raw->btn_nkeys = 0; ++ raw->btn_table_space.off = 0; /* Put the toc right after the header */ ++ raw->btn_table_space.len = 0; ++ raw->btn_free_space.off = 0; ++ raw->btn_free_space.len = cpu_to_le16(sb->s_blocksize - sizeof(*raw)); ++ raw->btn_key_free_list.off = cpu_to_le16(APFS_BTOFF_INVALID); ++ raw->btn_key_free_list.len = 0; ++ raw->btn_val_free_list.off = cpu_to_le16(APFS_BTOFF_INVALID); ++ raw->btn_val_free_list.len = 0; ++ ++ node = kmalloc(sizeof(*node), GFP_KERNEL); ++ if (!node) { ++ err = -ENOMEM; ++ goto fail; ++ } ++ ++ node->object.sb = sb; ++ node->object.block_nr = bno; ++ node->object.oid = oid; ++ node->object.o_bh = bh; ++ node->object.data = (char *)raw; ++ node->object.ephemeral = !bh; ++ return node; ++ ++fail: ++ if (storage == APFS_OBJ_EPHEMERAL) ++ kfree(raw); ++ else ++ brelse(bh); ++ raw = NULL; ++ bh = NULL; ++ return ERR_PTR(err); ++} ++ ++/** ++ * apfs_delete_node - Deletes a nonroot node from disk ++ * @node: node to delete ++ * @type: tree type for the query that found the node ++ * ++ * Does nothing to the in-memory node structure. Returns 0 on success, or a ++ * negative error code in case of failure. ++ */ ++int apfs_delete_node(struct apfs_node *node, int type) ++{ ++ struct super_block *sb = node->object.sb; ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_superblock *vsb_raw; ++ u64 oid = node->object.oid; ++ u64 bno = node->object.block_nr; ++ struct apfs_ephemeral_object_info *eph_info = NULL, *eph_info_end = NULL; ++ int err; ++ ++ switch (type) { ++ case APFS_QUERY_CAT: ++ err = apfs_free_queue_insert(sb, bno, 1); ++ if (err) { ++ apfs_err(sb, "free queue insertion failed for 0x%llx", bno); ++ return err; ++ } ++ err = apfs_delete_omap_rec(sb, oid); ++ if (err) { ++ apfs_err(sb, "omap rec deletion failed (0x%llx)", oid); ++ return err; ++ } ++ vsb_raw = APFS_SB(sb)->s_vsb_raw; ++ apfs_assert_in_transaction(sb, &vsb_raw->apfs_o); ++ le64_add_cpu(&vsb_raw->apfs_fs_alloc_count, -1); ++ le64_add_cpu(&vsb_raw->apfs_total_blocks_freed, 1); ++ return 0; ++ case APFS_QUERY_OMAP: ++ case APFS_QUERY_EXTENTREF: ++ case APFS_QUERY_SNAP_META: ++ err = apfs_free_queue_insert(sb, bno, 1); ++ if (err) { ++ apfs_err(sb, "free queue insertion failed for 0x%llx", bno); ++ return err; ++ } ++ /* We don't write to the container's omap */ ++ vsb_raw = APFS_SB(sb)->s_vsb_raw; ++ apfs_assert_in_transaction(sb, &vsb_raw->apfs_o); ++ le64_add_cpu(&vsb_raw->apfs_fs_alloc_count, -1); ++ le64_add_cpu(&vsb_raw->apfs_total_blocks_freed, 1); ++ return 0; ++ case APFS_QUERY_FREE_QUEUE: ++ eph_info_end = &nxi->nx_eph_list[nxi->nx_eph_count]; ++ eph_info = apfs_ephemeral_object_lookup(sb, node->object.oid); ++ if (IS_ERR(eph_info)) { ++ apfs_alert(sb, "can't find ephemeral object to delete"); ++ return PTR_ERR(eph_info); ++ } ++ kfree(eph_info->object); ++ eph_info->object = NULL; ++ memmove(eph_info, eph_info + 1, (char *)eph_info_end - (char *)(eph_info + 1)); ++ eph_info_end->object = NULL; ++ --nxi->nx_eph_count; ++ return 0; ++ default: ++ apfs_alert(sb, "new query type must implement node deletion (%d)", type); ++ return -EOPNOTSUPP; ++ } ++} ++ ++/** ++ * apfs_update_node - Update an existing node header ++ * @node: the modified in-memory node ++ */ ++void apfs_update_node(struct apfs_node *node) ++{ ++ struct super_block *sb = node->object.sb; ++ struct buffer_head *bh = node->object.o_bh; ++ struct apfs_btree_node_phys *raw = (void *)node->object.data; ++ struct apfs_nloc *free_head; ++ u32 tflags, type; ++ int toc_off; ++ ++ apfs_assert_in_transaction(sb, &raw->btn_o); ++ ++ raw->btn_o.o_oid = cpu_to_le64(node->object.oid); ++ ++ /* The node may no longer be a root, so update the object type */ ++ tflags = le32_to_cpu(raw->btn_o.o_type) & APFS_OBJECT_TYPE_FLAGS_MASK; ++ type = (node->flags & APFS_BTNODE_ROOT) ? APFS_OBJECT_TYPE_BTREE : ++ APFS_OBJECT_TYPE_BTREE_NODE; ++ raw->btn_o.o_type = cpu_to_le32(type | tflags); ++ raw->btn_o.o_subtype = cpu_to_le32(node->tree_type); ++ ++ raw->btn_flags = cpu_to_le16(node->flags); ++ raw->btn_nkeys = cpu_to_le32(node->records); ++ ++ toc_off = sizeof(*raw) + le16_to_cpu(raw->btn_table_space.off); ++ raw->btn_table_space.len = cpu_to_le16(node->key - toc_off); ++ raw->btn_free_space.off = cpu_to_le16(node->free - node->key); ++ raw->btn_free_space.len = cpu_to_le16(node->data - node->free); ++ ++ /* Reset the lists on zero length, a defragmentation is taking place */ ++ free_head = &raw->btn_key_free_list; ++ free_head->len = cpu_to_le16(node->key_free_list_len); ++ if (!free_head->len) ++ free_head->off = cpu_to_le16(APFS_BTOFF_INVALID); ++ free_head = &raw->btn_val_free_list; ++ free_head->len = cpu_to_le16(node->val_free_list_len); ++ if (!free_head->len) ++ free_head->off = cpu_to_le16(APFS_BTOFF_INVALID); ++ ++ if (bh) { ++ ASSERT(buffer_trans(bh)); ++ ASSERT(buffer_csum(bh)); ++ } ++} ++ ++/** ++ * apfs_node_locate_key - Locate the key of a node record ++ * @node: node to be searched ++ * @index: number of the entry to locate ++ * @off: on return will hold the offset in the block ++ * ++ * Returns the length of the key, or 0 in case of failure. The function checks ++ * that this length fits within the block; callers must use the returned value ++ * to make sure they never operate outside its bounds. ++ */ ++int apfs_node_locate_key(struct apfs_node *node, int index, int *off) ++{ ++ struct super_block *sb = node->object.sb; ++ struct apfs_btree_node_phys *raw; ++ int len; ++ ++ if (index >= node->records) { ++ apfs_err(sb, "index out of bounds (%d of %d)", index, node->records); ++ return 0; ++ } ++ ++ raw = (struct apfs_btree_node_phys *)node->object.data; ++ if (apfs_node_has_fixed_kv_size(node)) { ++ struct apfs_kvoff *entry; ++ ++ entry = (struct apfs_kvoff *)raw->btn_data + index; ++ ++ /* TODO: it would be cleaner to read this stuff from disk */ ++ if (node->tree_type == APFS_OBJECT_TYPE_OMAP_SNAPSHOT) ++ len = 8; ++ else ++ len = 16; ++ ++ /* Translate offset in key area to offset in block */ ++ *off = node->key + le16_to_cpu(entry->k); ++ } else { ++ /* These node types have variable length keys and data */ ++ struct apfs_kvloc *entry; ++ ++ entry = (struct apfs_kvloc *)raw->btn_data + index; ++ len = le16_to_cpu(entry->k.len); ++ /* Translate offset in key area to offset in block */ ++ *off = node->key + le16_to_cpu(entry->k.off); ++ } ++ ++ if (*off + len > sb->s_blocksize) { ++ apfs_err(sb, "key out of bounds (%d-%d)", *off, len); ++ return 0; ++ } ++ return len; ++} ++ ++/** ++ * apfs_node_locate_data - Locate the data of a node record ++ * @node: node to be searched ++ * @index: number of the entry to locate ++ * @off: on return will hold the offset in the block ++ * ++ * Returns the length of the data, which may be 0 in case of corruption or if ++ * the record is a ghost. The function checks that this length fits within the ++ * block; callers must use the returned value to make sure they never operate ++ * outside its bounds. ++ */ ++static int apfs_node_locate_data(struct apfs_node *node, int index, int *off) ++{ ++ struct super_block *sb = node->object.sb; ++ struct apfs_btree_node_phys *raw; ++ int len; ++ ++ if (index >= node->records) { ++ apfs_err(sb, "index out of bounds (%d of %d)", index, node->records); ++ return 0; ++ } ++ ++ raw = (struct apfs_btree_node_phys *)node->object.data; ++ if (apfs_node_has_fixed_kv_size(node)) { ++ /* These node types have fixed length keys and data */ ++ struct apfs_kvoff *entry; ++ ++ entry = (struct apfs_kvoff *)raw->btn_data + index; ++ if (node->tree_type == APFS_OBJECT_TYPE_SPACEMAN_FREE_QUEUE) { ++ /* A free-space queue record may have no value */ ++ if (le16_to_cpu(entry->v) == APFS_BTOFF_INVALID) { ++ *off = 0; ++ return 0; ++ } ++ len = 8; ++ } else { ++ /* This is an omap or omap snapshots node */ ++ len = apfs_node_is_leaf(node) ? 16 : 8; ++ } ++ /* ++ * Data offsets are counted backwards from the end of the ++ * block, or from the beginning of the footer when it exists ++ */ ++ if (apfs_node_is_root(node)) /* has footer */ ++ *off = sb->s_blocksize - sizeof(struct apfs_btree_info) ++ - le16_to_cpu(entry->v); ++ else ++ *off = sb->s_blocksize - le16_to_cpu(entry->v); ++ } else { ++ /* These node types have variable length keys and data */ ++ struct apfs_kvloc *entry; ++ ++ entry = (struct apfs_kvloc *)raw->btn_data + index; ++ len = le16_to_cpu(entry->v.len); ++ /* ++ * Data offsets are counted backwards from the end of the ++ * block, or from the beginning of the footer when it exists ++ */ ++ if (apfs_node_is_root(node)) /* has footer */ ++ *off = sb->s_blocksize - sizeof(struct apfs_btree_info) ++ - le16_to_cpu(entry->v.off); ++ else ++ *off = sb->s_blocksize - le16_to_cpu(entry->v.off); ++ } ++ ++ if (*off < 0 || *off + len > sb->s_blocksize) { ++ apfs_err(sb, "value out of bounds (%d-%d)", *off, len); ++ return 0; ++ } ++ return len; ++} ++ ++/** ++ * apfs_create_toc_entry - Create the table-of-contents entry for a record ++ * @query: query pointing to the record ++ * ++ * Creates a toc entry for the record at index @query->index and increases ++ * @node->records. The caller must ensure enough space in the table. ++ */ ++static void apfs_create_toc_entry(struct apfs_query *query) ++{ ++ struct apfs_node *node = query->node; ++ struct super_block *sb = node->object.sb; ++ struct apfs_btree_node_phys *raw = (void *)node->object.data; ++ int value_end; ++ int recs = node->records; ++ int index = query->index; ++ ++ value_end = sb->s_blocksize; ++ if (apfs_node_is_root(node)) ++ value_end -= sizeof(struct apfs_btree_info); ++ ++ if (apfs_node_has_fixed_kv_size(node)) { ++ struct apfs_kvoff *kvoff; ++ ++ kvoff = (struct apfs_kvoff *)raw->btn_data + query->index; ++ memmove(kvoff + 1, kvoff, (recs - index) * sizeof(*kvoff)); ++ ++ if (!query->len) /* Ghost record */ ++ kvoff->v = cpu_to_le16(APFS_BTOFF_INVALID); ++ else ++ kvoff->v = cpu_to_le16(value_end - query->off); ++ kvoff->k = cpu_to_le16(query->key_off - node->key); ++ } else { ++ struct apfs_kvloc *kvloc; ++ ++ kvloc = (struct apfs_kvloc *)raw->btn_data + query->index; ++ memmove(kvloc + 1, kvloc, (recs - index) * sizeof(*kvloc)); ++ ++ kvloc->v.off = cpu_to_le16(value_end - query->off); ++ kvloc->v.len = cpu_to_le16(query->len); ++ kvloc->k.off = cpu_to_le16(query->key_off - node->key); ++ kvloc->k.len = cpu_to_le16(query->key_len); ++ } ++ node->records++; ++} ++ ++/** ++ * apfs_key_from_query - Read the current key from a query structure ++ * @query: the query, with @query->key_off and @query->key_len already set ++ * @key: return parameter for the key ++ * ++ * Reads the key into @key and performs some basic sanity checks as a ++ * protection against crafted filesystems. Returns 0 on success or a ++ * negative error code otherwise. ++ */ ++static int apfs_key_from_query(struct apfs_query *query, struct apfs_key *key) ++{ ++ struct super_block *sb = query->node->object.sb; ++ char *raw = query->node->object.data; ++ void *raw_key = (void *)(raw + query->key_off); ++ bool hashed; ++ int err = 0; ++ ++ switch (query->flags & APFS_QUERY_TREE_MASK) { ++ case APFS_QUERY_CAT: ++ hashed = apfs_is_normalization_insensitive(sb); ++ err = apfs_read_cat_key(raw_key, query->key_len, key, hashed); ++ break; ++ case APFS_QUERY_OMAP: ++ err = apfs_read_omap_key(raw_key, query->key_len, key); ++ break; ++ case APFS_QUERY_FREE_QUEUE: ++ err = apfs_read_free_queue_key(raw_key, query->key_len, key); ++ break; ++ case APFS_QUERY_EXTENTREF: ++ err = apfs_read_extentref_key(raw_key, query->key_len, key); ++ break; ++ case APFS_QUERY_FEXT: ++ err = apfs_read_fext_key(raw_key, query->key_len, key); ++ break; ++ case APFS_QUERY_SNAP_META: ++ err = apfs_read_snap_meta_key(raw_key, query->key_len, key); ++ break; ++ case APFS_QUERY_OMAP_SNAP: ++ err = apfs_read_omap_snap_key(raw_key, query->key_len, key); ++ break; ++ default: ++ apfs_alert(sb, "new query type must implement key reads (%d)", query->flags & APFS_QUERY_TREE_MASK); ++ err = -EOPNOTSUPP; ++ break; ++ } ++ if (err) ++ apfs_err(sb, "bad node key in block 0x%llx", query->node->object.block_nr); ++ ++ /* A multiple query must ignore some of these fields */ ++ if (query->flags & APFS_QUERY_ANY_NAME) ++ key->name = NULL; ++ if (query->flags & APFS_QUERY_ANY_NUMBER) ++ key->number = 0; ++ ++ return err; ++} ++ ++/** ++ * apfs_node_prev - Find the previous record in the current node ++ * @sb: filesystem superblock ++ * @query: query in execution ++ * ++ * Returns 0 on success, -EAGAIN if the previous record is in another node, ++ * -ENODATA if no more records exist, or another negative error code in case ++ * of failure. ++ * ++ * The meaning of "next" and "previous" is reverted here, because regular ++ * multiple always start with the final record, and then they go backwards. ++ * TODO: consider renaming this for clarity. ++ */ ++static int apfs_node_prev(struct super_block *sb, struct apfs_query *query) ++{ ++ struct apfs_node *node = query->node; ++ ++ if (query->index + 1 == node->records) { ++ /* The next record may be in another node */ ++ return -EAGAIN; ++ } ++ ++query->index; ++ ++ query->key_len = apfs_node_locate_key(node, query->index, &query->key_off); ++ if (query->key_len == 0) { ++ apfs_err(sb, "bad key for index %d", query->index); ++ return -EFSCORRUPTED; ++ } ++ query->len = apfs_node_locate_data(node, query->index, &query->off); ++ if (query->len == 0) { ++ apfs_err(sb, "bad value for index %d", query->index); ++ return -EFSCORRUPTED; ++ } ++ return 0; ++} ++ ++/** ++ * apfs_node_next - Find the next matching record in the current node ++ * @sb: filesystem superblock ++ * @query: multiple query in execution ++ * ++ * Returns 0 on success, -EAGAIN if the next record is in another node, ++ * -ENODATA if no more matching records exist, or another negative error ++ * code in case of failure. ++ */ ++static int apfs_node_next(struct super_block *sb, struct apfs_query *query) ++{ ++ struct apfs_node *node = query->node; ++ struct apfs_key curr_key; ++ int cmp, err; ++ ++ if (query->flags & APFS_QUERY_DONE) ++ /* Nothing left to search; the query failed */ ++ return -ENODATA; ++ ++ if (!query->index) /* The next record may be in another node */ ++ return -EAGAIN; ++ --query->index; ++ ++ query->key_len = apfs_node_locate_key(node, query->index, ++ &query->key_off); ++ err = apfs_key_from_query(query, &curr_key); ++ if (err) { ++ apfs_err(sb, "bad key for index %d", query->index); ++ return err; ++ } ++ ++ cmp = apfs_keycmp(&curr_key, &query->key); ++ ++ if (cmp > 0) { ++ apfs_err(sb, "records are out of order"); ++ return -EFSCORRUPTED; ++ } ++ ++ if (cmp != 0 && apfs_node_is_leaf(node) && ++ query->flags & APFS_QUERY_EXACT) ++ return -ENODATA; ++ ++ query->len = apfs_node_locate_data(node, query->index, &query->off); ++ if (query->len == 0) { ++ apfs_err(sb, "bad value for index %d", query->index); ++ return -EFSCORRUPTED; ++ } ++ ++ if (cmp != 0) { ++ /* ++ * This is the last entry that can be relevant in this node. ++ * Keep searching the children, but don't return to this level. ++ */ ++ query->flags |= APFS_QUERY_DONE; ++ } ++ ++ return 0; ++} ++ ++/** ++ * apfs_node_query - Execute a query on a single node ++ * @sb: filesystem superblock ++ * @query: the query to execute ++ * ++ * The search will start at index @query->index, looking for the key that comes ++ * right before @query->key, according to the order given by apfs_keycmp(). ++ * ++ * The @query->index will be updated to the last index checked. This is ++ * important when searching for multiple entries, since the query may need ++ * to remember where it was on this level. If we are done with this node, the ++ * query will be flagged as APFS_QUERY_DONE, and the search will end in failure ++ * as soon as we return to this level. The function may also return -EAGAIN, ++ * to signal that the search should go on in a different branch. ++ * ++ * On success returns 0; the offset of the data within the block will be saved ++ * in @query->off, and its length in @query->len. The function checks that this ++ * length fits within the block; callers must use the returned value to make ++ * sure they never operate outside its bounds. ++ * ++ * -ENODATA will be returned if no appropriate entry was found, -EFSCORRUPTED ++ * in case of corruption. ++ */ ++int apfs_node_query(struct super_block *sb, struct apfs_query *query) ++{ ++ struct apfs_node *node = query->node; ++ int left, right; ++ int cmp; ++ int err; ++ ++ if (query->flags & APFS_QUERY_PREV) ++ return apfs_node_prev(sb, query); ++ if (query->flags & APFS_QUERY_NEXT) ++ return apfs_node_next(sb, query); ++ ++ /* Search by bisection */ ++ cmp = 1; ++ left = 0; ++ do { ++ struct apfs_key curr_key; ++ ++ if (cmp > 0) { ++ right = query->index - 1; ++ if (right < left) { ++ query->index = -1; ++ return -ENODATA; ++ } ++ query->index = (left + right) / 2; ++ } else { ++ left = query->index; ++ query->index = DIV_ROUND_UP(left + right, 2); ++ } ++ ++ query->key_len = apfs_node_locate_key(node, query->index, ++ &query->key_off); ++ err = apfs_key_from_query(query, &curr_key); ++ if (err) { ++ apfs_err(sb, "bad key for index %d", query->index); ++ return err; ++ } ++ ++ cmp = apfs_keycmp(&curr_key, &query->key); ++ if (cmp == 0 && !(query->flags & APFS_QUERY_MULTIPLE)) ++ break; ++ } while (left != right); ++ ++ if (cmp > 0) { ++ query->index = -1; ++ return -ENODATA; ++ } ++ ++ if (cmp != 0 && apfs_node_is_leaf(query->node) && ++ query->flags & APFS_QUERY_EXACT) ++ return -ENODATA; ++ ++ if (query->flags & APFS_QUERY_MULTIPLE) { ++ if (cmp != 0) /* Last relevant entry in level */ ++ query->flags |= APFS_QUERY_DONE; ++ query->flags |= APFS_QUERY_NEXT; ++ } ++ ++ query->len = apfs_node_locate_data(node, query->index, &query->off); ++ return 0; ++} ++ ++/** ++ * apfs_node_query_first - Find the first record in a node ++ * @query: on return this query points to the record ++ */ ++void apfs_node_query_first(struct apfs_query *query) ++{ ++ struct apfs_node *node = query->node; ++ ++ query->index = 0; ++ query->key_len = apfs_node_locate_key(node, query->index, &query->key_off); ++ query->len = apfs_node_locate_data(node, query->index, &query->off); ++} ++ ++/** ++ * apfs_omap_map_from_query - Read the mapping found by a successful omap query ++ * @query: the query that found the record ++ * @map: Return parameter. The mapping found. ++ * ++ * Returns -EOPNOTSUPP if the object doesn't fit in one block, and -EFSCORRUPTED ++ * if the filesystem appears to be malicious. Otherwise, reads the mapping info ++ * in the omap record into @map and returns 0. ++ */ ++int apfs_omap_map_from_query(struct apfs_query *query, struct apfs_omap_map *map) ++{ ++ struct super_block *sb = query->node->object.sb; ++ struct apfs_omap_key *key = NULL; ++ struct apfs_omap_val *val = NULL; ++ char *raw = query->node->object.data; ++ ++ if (query->len != sizeof(*val) || query->key_len != sizeof(*key)) { ++ apfs_err(sb, "bad length of key (%d) or value (%d)", query->key_len, query->len); ++ return -EFSCORRUPTED; ++ } ++ key = (struct apfs_omap_key *)(raw + query->key_off); ++ val = (struct apfs_omap_val *)(raw + query->off); ++ ++ /* TODO: support objects with multiple blocks */ ++ if (le32_to_cpu(val->ov_size) != sb->s_blocksize) { ++ apfs_err(sb, "object size doesn't match block size"); ++ return -EOPNOTSUPP; ++ } ++ ++ map->xid = le64_to_cpu(key->ok_xid); ++ map->bno = le64_to_cpu(val->ov_paddr); ++ map->flags = le32_to_cpu(val->ov_flags); ++ return 0; ++} ++ ++/** ++ * apfs_btree_inc_height - Increase the height of a b-tree ++ * @query: query pointing to the root node ++ * ++ * On success returns 0, and @query is left pointing to the same record. ++ * Returns a negative error code in case of failure. ++ */ ++static int apfs_btree_inc_height(struct apfs_query *query) ++{ ++ struct apfs_query *root_query; ++ struct apfs_node *root = query->node; ++ struct apfs_node *new_node; ++ struct super_block *sb = root->object.sb; ++ struct apfs_btree_node_phys *root_raw; ++ struct apfs_btree_node_phys *new_raw; ++ struct apfs_btree_info *info; ++ __le64 *raw_oid; ++ u32 storage = apfs_query_storage(query); ++ ++ root_raw = (void *)root->object.data; ++ apfs_assert_in_transaction(sb, &root_raw->btn_o); ++ ++ if (query->parent || query->depth) { ++ apfs_err(sb, "invalid root query"); ++ return -EFSCORRUPTED; ++ } ++ ++ /* Create a new child node */ ++ new_node = apfs_create_node(sb, storage); ++ if (IS_ERR(new_node)) { ++ apfs_err(sb, "node creation failed"); ++ return PTR_ERR(new_node); ++ } ++ new_node->flags = root->flags & ~APFS_BTNODE_ROOT; ++ new_node->tree_type = root->tree_type; ++ ++ /* Move all records into the child node; get rid of the info footer */ ++ new_node->records = root->records; ++ new_node->key = root->key; ++ new_node->free = root->free; ++ new_node->data = root->data + sizeof(*info); ++ new_node->key_free_list_len = root->key_free_list_len; ++ new_node->val_free_list_len = root->val_free_list_len; ++ new_raw = (void *)new_node->object.data; ++ /* Don't copy the object header, already set by apfs_create_node() */ ++ memcpy((void *)new_raw + sizeof(new_raw->btn_o), ++ (void *)root_raw + sizeof(root_raw->btn_o), ++ root->free - sizeof(new_raw->btn_o)); ++ memcpy((void *)new_raw + new_node->data, ++ (void *)root_raw + root->data, ++ sb->s_blocksize - new_node->data); ++ query->off += sizeof(*info); ++ apfs_update_node(new_node); ++ ++ /* Add a new level to the query chain */ ++ root_query = query->parent = apfs_alloc_query(root, NULL /* parent */); ++ if (!query->parent) { ++ apfs_node_free(new_node); ++ return -ENOMEM; ++ } ++ root_query->key = query->key; ++ root_query->flags = query->flags; ++ query->node = new_node; ++ query->depth = 1; ++ ++ /* Now assemble the new root with only the first key */ ++ root_query->key_len = apfs_node_locate_key(root, 0 /* index */, ++ &root_query->key_off); ++ if (!root_query->key_len) { ++ apfs_err(sb, "bad key for index %d", 0); ++ return -EFSCORRUPTED; ++ } ++ root->key = sizeof(*root_raw) + ++ apfs_node_min_table_size(sb, root->tree_type, root->flags & ~APFS_BTNODE_LEAF); ++ memmove((void *)root_raw + root->key, ++ (void *)root_raw + root_query->key_off, root_query->key_len); ++ root_query->key_off = root->key; ++ root->free = root->key + root_query->key_len; ++ ++ /* The new root is a nonleaf node; the record value is the child id */ ++ root->flags &= ~APFS_BTNODE_LEAF; ++ root->data = sb->s_blocksize - sizeof(*info) - sizeof(*raw_oid); ++ raw_oid = (void *)root_raw + root->data; ++ *raw_oid = cpu_to_le64(new_node->object.oid); ++ root_query->off = root->data; ++ root_query->len = sizeof(*raw_oid); ++ ++ /* With the key and value in place, set the table-of-contents */ ++ root->records = 0; ++ root_query->index = 0; ++ apfs_create_toc_entry(root_query); ++ ++ /* There is no internal fragmentation */ ++ root->key_free_list_len = 0; ++ root->val_free_list_len = 0; ++ ++ /* Finally, update the node count in the info footer */ ++ apfs_btree_change_node_count(root_query, 1 /* change */); ++ ++ le16_add_cpu(&root_raw->btn_level, 1); /* TODO: move to update_node() */ ++ apfs_update_node(root); ++ return 0; ++} ++ ++/** ++ * apfs_copy_record_range - Copy a range of records to an empty node ++ * @dest_node: destination node ++ * @src_node: source node ++ * @start: index of first record in range ++ * @end: index of first record after the range ++ * ++ * Doesn't modify the info footer of root nodes. Returns 0 on success or a ++ * negative error code in case of failure. ++ */ ++static int apfs_copy_record_range(struct apfs_node *dest_node, ++ struct apfs_node *src_node, ++ int start, int end) ++{ ++ struct super_block *sb = dest_node->object.sb; ++ struct apfs_btree_node_phys *dest_raw; ++ struct apfs_btree_node_phys *src_raw; ++ struct apfs_query *query = NULL; ++ int toc_size, toc_entry_size; ++ int err; ++ int i; ++ ++ dest_raw = (void *)dest_node->object.data; ++ src_raw = (void *)src_node->object.data; ++ ++ ASSERT(!dest_node->records); ++ apfs_assert_in_transaction(sb, &dest_raw->btn_o); ++ ++ /* Resize the table of contents so that all the records fit */ ++ if (apfs_node_has_fixed_kv_size(src_node)) ++ toc_entry_size = sizeof(struct apfs_kvoff); ++ else ++ toc_entry_size = sizeof(struct apfs_kvloc); ++ toc_size = apfs_node_min_table_size(sb, src_node->tree_type, src_node->flags); ++ if (toc_size < toc_entry_size * (end - start)) ++ toc_size = toc_entry_size * round_up(end - start, APFS_BTREE_TOC_ENTRY_INCREMENT); ++ dest_node->key = sizeof(*dest_raw) + toc_size; ++ dest_node->free = dest_node->key; ++ dest_node->data = sb->s_blocksize; ++ if (apfs_node_is_root(dest_node)) ++ dest_node->data -= sizeof(struct apfs_btree_info); ++ ++ /* We'll use a temporary query structure to move the records around */ ++ query = apfs_alloc_query(dest_node, NULL /* parent */); ++ if (!query) { ++ err = -ENOMEM; ++ goto fail; ++ } ++ ++ err = -EFSCORRUPTED; ++ for (i = start; i < end; ++i) { ++ int len, off; ++ ++ len = apfs_node_locate_key(src_node, i, &off); ++ if (dest_node->free + len > sb->s_blocksize) { ++ apfs_err(sb, "key of length %d doesn't fit", len); ++ goto fail; ++ } ++ memcpy((char *)dest_raw + dest_node->free, ++ (char *)src_raw + off, len); ++ query->key_off = dest_node->free; ++ query->key_len = len; ++ dest_node->free += len; ++ ++ len = apfs_node_locate_data(src_node, i, &off); ++ dest_node->data -= len; ++ if (dest_node->data < 0) { ++ apfs_err(sb, "value of length %d doesn't fit", len); ++ goto fail; ++ } ++ memcpy((char *)dest_raw + dest_node->data, ++ (char *)src_raw + off, len); ++ query->off = dest_node->data; ++ query->len = len; ++ ++ query->index = i - start; ++ apfs_create_toc_entry(query); ++ } ++ err = 0; ++ ++fail: ++ apfs_free_query(query); ++ return err; ++} ++ ++/** ++ * apfs_attach_child - Attach a new node to its parent ++ * @query: query pointing to the previous record in the parent ++ * @child: the new child node to attach ++ * ++ * Returns 0 on success or a negative error code in case of failure (which may ++ * be -EAGAIN if a node split has happened and the caller must refresh and ++ * retry). ++ */ ++static int apfs_attach_child(struct apfs_query *query, struct apfs_node *child) ++{ ++ struct apfs_object *object = &child->object; ++ struct apfs_btree_node_phys *raw = (void *)object->data; ++ int key_len, key_off; ++ __le64 raw_oid = cpu_to_le64(object->oid); ++ ++ key_len = apfs_node_locate_key(child, 0, &key_off); ++ if (!key_len) { ++ /* This should never happen: @child was made by us */ ++ apfs_alert(object->sb, "bad key for index %d", 0); ++ return -EFSCORRUPTED; ++ } ++ ++ return __apfs_btree_insert(query, (void *)raw + key_off, key_len, &raw_oid, sizeof(raw_oid)); ++} ++ ++/** ++ * apfs_node_temp_dup - Make an in-memory duplicate of a node ++ * @original: node to duplicate ++ * @duplicate: on success, the duplicate node ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_node_temp_dup(const struct apfs_node *original, struct apfs_node **duplicate) ++{ ++ struct super_block *sb = original->object.sb; ++ struct apfs_node *dup = NULL; ++ char *buffer = NULL; ++ ++ dup = kmalloc(sizeof(*dup), GFP_KERNEL); ++ if (!dup) ++ return -ENOMEM; ++ *dup = *original; ++ dup->object.o_bh = NULL; ++ dup->object.data = NULL; ++ dup->object.ephemeral = false; ++ ++ buffer = kmalloc(sb->s_blocksize, GFP_KERNEL); ++ if (!buffer) { ++ kfree(dup); ++ return -ENOMEM; ++ } ++ memcpy(buffer, original->object.data, sb->s_blocksize); ++ dup->object.data = buffer; ++ ++ *duplicate = dup; ++ return 0; ++} ++ ++/** ++ * apfs_node_split - Split a b-tree node in two ++ * @query: query pointing to the node ++ * ++ * On success returns 0, and @query is left pointing to the same record on the ++ * tip; to simplify the implementation, @query->parent is set to NULL. Returns ++ * a negative error code in case of failure, which may be -EAGAIN if a node ++ * split has happened and the caller must refresh and retry. ++ */ ++int apfs_node_split(struct apfs_query *query) ++{ ++ struct super_block *sb = query->node->object.sb; ++ struct apfs_node *old_node = NULL, *new_node = NULL, *tmp_node = NULL; ++ struct apfs_btree_node_phys *new_raw = NULL, *old_raw = NULL; ++ u32 storage = apfs_query_storage(query); ++ int record_count, new_rec_count, old_rec_count; ++ int err; ++ ++ apfs_assert_query_is_valid(query); ++ ++ if (apfs_node_is_root(query->node)) { ++ err = apfs_btree_inc_height(query); ++ if (err) { ++ apfs_err(sb, "failed to increase tree height"); ++ return err; ++ } ++ } else if (!query->parent) { ++ apfs_err(sb, "nonroot node with no parent"); ++ return -EFSCORRUPTED; ++ } ++ old_node = query->node; ++ ++ old_raw = (void *)old_node->object.data; ++ apfs_assert_in_transaction(sb, &old_raw->btn_o); ++ ++ /* ++ * To defragment the original node, we put all records in a temporary ++ * in-memory node before dealing them out. ++ */ ++ err = apfs_node_temp_dup(old_node, &tmp_node); ++ if (err) ++ return err; ++ ++ record_count = old_node->records; ++ if (record_count == 1) { ++ apfs_alert(sb, "splitting node with a single record"); ++ err = -EFSCORRUPTED; ++ goto out; ++ } ++ new_rec_count = record_count / 2; ++ old_rec_count = record_count - new_rec_count; ++ ++ /* ++ * The second half of the records go into a new node. This is done ++ * before the first half to avoid committing to any actual changes ++ * until we know for sure that no ancestor splits are expected. ++ */ ++ ++ new_node = apfs_create_node(sb, storage); ++ if (IS_ERR(new_node)) { ++ apfs_err(sb, "node creation failed"); ++ err = PTR_ERR(new_node); ++ new_node = NULL; ++ goto out; ++ } ++ new_node->tree_type = old_node->tree_type; ++ new_node->flags = old_node->flags; ++ new_node->records = 0; ++ new_node->key_free_list_len = 0; ++ new_node->val_free_list_len = 0; ++ err = apfs_copy_record_range(new_node, tmp_node, old_rec_count, record_count); ++ if (err) { ++ apfs_err(sb, "record copy failed"); ++ goto out; ++ } ++ new_raw = (void *)new_node->object.data; ++ apfs_assert_in_transaction(sb, &new_raw->btn_o); ++ new_raw->btn_level = old_raw->btn_level; ++ apfs_update_node(new_node); ++ ++ err = apfs_attach_child(query->parent, new_node); ++ if (err) { ++ if (err != -EAGAIN) { ++ apfs_err(sb, "child attachment failed"); ++ goto out; ++ } ++ err = apfs_delete_node(new_node, query->flags & APFS_QUERY_TREE_MASK); ++ if (err) { ++ apfs_err(sb, "node cleanup failed for query retry"); ++ goto out; ++ } ++ err = -EAGAIN; ++ goto out; ++ } ++ apfs_assert_query_is_valid(query->parent); ++ apfs_btree_change_node_count(query->parent, 1 /* change */); ++ ++ /* ++ * No more risk of ancestor splits, now actual changes can be made. The ++ * first half of the records go into the original node. ++ */ ++ ++ old_node->records = 0; ++ old_node->key_free_list_len = 0; ++ old_node->val_free_list_len = 0; ++ err = apfs_copy_record_range(old_node, tmp_node, 0, old_rec_count); ++ if (err) { ++ apfs_err(sb, "record copy failed"); ++ goto out; ++ } ++ apfs_update_node(old_node); ++ ++ /* Point the query back to the original record */ ++ if (query->index >= old_rec_count) { ++ /* The record got moved to the new node */ ++ apfs_node_free(query->node); ++ query->node = new_node; ++ new_node = NULL; ++ query->index -= old_rec_count; ++ } ++ ++ /* ++ * This could be avoided in most cases, and queries could get refreshed ++ * only when really orphaned. But refreshing queries is probably not a ++ * bottleneck, and trying to be clever with this stuff has caused me a ++ * lot of trouble already. ++ */ ++ apfs_free_query(query->parent); ++ query->parent = NULL; /* The caller only gets the leaf */ ++ ++out: ++ apfs_node_free(new_node); ++ apfs_node_free(tmp_node); ++ return err; ++} ++ ++/* TODO: the following 4 functions could be reused elsewhere */ ++ ++/** ++ * apfs_off_to_val_off - Translate offset in node to offset in value area ++ * @node: the node ++ * @off: offset in the node ++ */ ++static u16 apfs_off_to_val_off(struct apfs_node *node, u16 off) ++{ ++ struct super_block *sb = node->object.sb; ++ u16 val_end; ++ ++ val_end = sb->s_blocksize; ++ if (apfs_node_is_root(node)) /* has footer */ ++ val_end -= sizeof(struct apfs_btree_info); ++ return val_end - off; ++} ++ ++/** ++ * apfs_val_off_to_off - Translate offset in value area to offset in node ++ * @node: the node ++ * @off: offset in the value area ++ */ ++static u16 apfs_val_off_to_off(struct apfs_node *node, u16 off) ++{ ++ return apfs_off_to_val_off(node, off); ++} ++ ++/** ++ * apfs_off_to_key_off - Translate offset in node to offset in key area ++ * @node: the node ++ * @off: offset in the node ++ */ ++static u16 apfs_off_to_key_off(struct apfs_node *node, u16 off) ++{ ++ return off - node->key; ++} ++ ++/** ++ * apfs_key_off_to_off - Translate offset in key area to offset in node ++ * @node: the node ++ * @off: offset in the key area ++ */ ++static u16 apfs_key_off_to_off(struct apfs_node *node, u16 off) ++{ ++ return off + node->key; ++} ++ ++/* The type of the previous four functions, used for node offset calculations */ ++typedef u16 (*offcalc)(struct apfs_node *, u16); ++ ++/** ++ * apfs_node_free_list_add - Add a free node segment to the proper free list ++ * @node: node for the segment ++ * @off: offset of the segment to add ++ * @len: length of the segment to add ++ * ++ * The caller must ensure that the freed segment fits in the node. ++ */ ++static void apfs_node_free_list_add(struct apfs_node *node, u16 off, u16 len) ++{ ++ struct super_block *sb = node->object.sb; ++ struct apfs_btree_node_phys *node_raw = (void *)node->object.data; ++ struct apfs_nloc *head, *new; ++ offcalc off_to_rel; ++ ++ apfs_assert_in_transaction(sb, &node_raw->btn_o); ++ ++ if (off >= node->data) { /* Value area */ ++ off_to_rel = apfs_off_to_val_off; ++ head = &node_raw->btn_val_free_list; ++ node->val_free_list_len += len; ++ } else { /* Key area */ ++ off_to_rel = apfs_off_to_key_off; ++ head = &node_raw->btn_key_free_list; ++ node->key_free_list_len += len; ++ } ++ ++ /* Very small segments are leaked until defragmentation */ ++ if (len < sizeof(*new)) ++ return; ++ ++ /* The free list doesn't seem to be kept in any particular order */ ++ new = (void *)node_raw + off; ++ new->off = head->off; ++ new->len = cpu_to_le16(len); ++ head->off = cpu_to_le16(off_to_rel(node, off)); ++} ++ ++/** ++ * apfs_node_free_range - Free space from a node's key or value areas ++ * @node: the node ++ * @off: offset to free ++ * @len: length to free ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++void apfs_node_free_range(struct apfs_node *node, u16 off, u16 len) ++{ ++ struct super_block *sb = node->object.sb; ++ struct apfs_btree_node_phys *raw = (void *)node->object.data; ++ ++ apfs_assert_in_transaction(sb, &raw->btn_o); ++ ++ if (off == node->data) ++ node->data += len; ++ else if (off + len == node->free) ++ node->free -= len; ++ else ++ apfs_node_free_list_add(node, off, len); ++} ++ ++/** ++ * apfs_node_free_list_unlink - Unlink an entry from a node's free list ++ * @prev: previous entry ++ * @curr: entry to unlink ++ */ ++static void apfs_node_free_list_unlink(struct apfs_nloc *prev, struct apfs_nloc *curr) ++{ ++ prev->off = curr->off; ++} ++ ++/** ++ * apfs_node_free_list_alloc - Allocate a free segment from a free list ++ * @node: the node ++ * @len: length to allocate ++ * @value: true to allocate in the value area, false for the key area ++ * ++ * Returns the offset in the node on success, or a negative error code in case ++ * of failure, which may be -ENOSPC if the node seems full. ++ */ ++static int apfs_node_free_list_alloc(struct apfs_node *node, u16 len, bool value) ++{ ++ struct super_block *sb = node->object.sb; ++ struct apfs_btree_node_phys *node_raw = (void *)node->object.data; ++ struct apfs_nloc *head, *curr, *prev; ++ offcalc rel_to_off; ++ int *list_len; ++ int bound = sb->s_blocksize; ++ ++ apfs_assert_in_transaction(sb, &node_raw->btn_o); ++ ++ if (value) { /* Value area */ ++ rel_to_off = apfs_val_off_to_off; ++ head = &node_raw->btn_val_free_list; ++ list_len = &node->val_free_list_len; ++ } else { /* Key area */ ++ rel_to_off = apfs_key_off_to_off; ++ head = &node_raw->btn_key_free_list; ++ list_len = &node->key_free_list_len; ++ } ++ ++ if (*list_len < len) ++ return -ENOSPC; ++ ++ prev = head; ++ while (bound--) { ++ u16 curr_off = le16_to_cpu(prev->off); ++ u16 abs_off = rel_to_off(node, curr_off); ++ u16 curr_len; ++ ++ if (curr_off == APFS_BTOFF_INVALID) ++ return -ENOSPC; ++ if (abs_off + sizeof(*curr) > sb->s_blocksize) { ++ apfs_err(sb, "nloc out of bounds (%d-%d)", abs_off, (int)sizeof(*curr)); ++ return -EFSCORRUPTED; ++ } ++ curr = (void *)node_raw + abs_off; ++ ++ curr_len = le16_to_cpu(curr->len); ++ if (curr_len >= len) { ++ if (abs_off + curr_len > sb->s_blocksize) { ++ apfs_err(sb, "entry out of bounds (%d-%d)", abs_off, curr_len); ++ return -EFSCORRUPTED; ++ } ++ *list_len -= curr_len; ++ apfs_node_free_list_unlink(prev, curr); ++ apfs_node_free_list_add(node, abs_off + len, curr_len - len); ++ return abs_off; ++ } ++ ++ prev = curr; ++ } ++ ++ /* Don't loop forever if the free list is corrupted and doesn't end */ ++ apfs_err(sb, "free list never ends"); ++ return -EFSCORRUPTED; ++} ++ ++/** ++ * apfs_node_alloc_key - Allocated free space for a new key ++ * @node: the node to search ++ * @len: wanted key length ++ * ++ * Returns the offset in the node on success, or a negative error code in case ++ * of failure, which may be -ENOSPC if the node seems full. ++ */ ++static int apfs_node_alloc_key(struct apfs_node *node, u16 len) ++{ ++ int off; ++ ++ if (node->free + len <= node->data) { ++ off = node->free; ++ node->free += len; ++ return off; ++ } ++ return apfs_node_free_list_alloc(node, len, false /* value */); ++} ++ ++/** ++ * apfs_node_alloc_val - Allocated free space for a new value ++ * @node: the node to search ++ * @len: wanted value length ++ * ++ * Returns the offset in the node on success, or a negative error code in case ++ * of failure, which may be -ENOSPC if the node seems full. ++ */ ++static int apfs_node_alloc_val(struct apfs_node *node, u16 len) ++{ ++ int off; ++ ++ if (node->free + len <= node->data) { ++ off = node->data - len; ++ node->data -= len; ++ return off; ++ } ++ return apfs_node_free_list_alloc(node, len, true /* value */); ++} ++ ++/** ++ * apfs_node_total_room - Total free space in a node ++ * @node: the node ++ */ ++static int apfs_node_total_room(struct apfs_node *node) ++{ ++ return node->data - node->free + node->key_free_list_len + node->val_free_list_len; ++} ++ ++/** ++ * apfs_node_has_room - Check if a node has room for insertion or replacement ++ * @node: node to check ++ * @length: length of the needed space (may be negative on replace) ++ * @replace: are we replacing a record? ++ */ ++bool apfs_node_has_room(struct apfs_node *node, int length, bool replace) ++{ ++ struct apfs_btree_node_phys *node_raw = (void *)node->object.data; ++ int toc_entry_size, needed_room; ++ ++ if (apfs_node_has_fixed_kv_size(node)) ++ toc_entry_size = sizeof(struct apfs_kvoff); ++ else ++ toc_entry_size = sizeof(struct apfs_kvloc); ++ ++ needed_room = length; ++ if (!replace) { ++ if (sizeof(*node_raw) + (node->records + 1) * toc_entry_size > node->key) ++ needed_room += APFS_BTREE_TOC_ENTRY_INCREMENT * toc_entry_size; ++ } ++ ++ return apfs_node_total_room(node) >= needed_room; ++} ++ ++/** ++ * apfs_defragment_node - Make all free space in a node contiguous ++ * @node: node to defragment ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_defragment_node(struct apfs_node *node) ++{ ++ struct super_block *sb = node->object.sb; ++ struct apfs_btree_node_phys *node_raw = (void *)node->object.data; ++ struct apfs_node *tmp_node = NULL; ++ int record_count, err; ++ ++ apfs_assert_in_transaction(sb, &node_raw->btn_o); ++ ++ /* Put all records in a temporary in-memory node and deal them out */ ++ err = apfs_node_temp_dup(node, &tmp_node); ++ if (err) ++ return err; ++ record_count = node->records; ++ node->records = 0; ++ node->key_free_list_len = 0; ++ node->val_free_list_len = 0; ++ err = apfs_copy_record_range(node, tmp_node, 0, record_count); ++ if (err) { ++ apfs_err(sb, "record copy failed"); ++ goto fail; ++ } ++ apfs_update_node(node); ++fail: ++ apfs_node_free(tmp_node); ++ return err; ++} ++ ++/** ++ * apfs_node_update_toc_entry - Update a table of contents entry in place ++ * @query: query pointing to the toc entry ++ * ++ * The toc entry gets updated with the length and offset for the key/value ++ * provided by @query. Don't call this function for nodes with fixed length ++ * key/values, those never need to update their toc entries. ++ */ ++static void apfs_node_update_toc_entry(struct apfs_query *query) ++{ ++ struct super_block *sb = NULL; ++ struct apfs_node *node = NULL; ++ struct apfs_btree_node_phys *node_raw = NULL; ++ struct apfs_kvloc *kvloc = NULL; ++ int value_end; ++ ++ node = query->node; ++ ASSERT(!apfs_node_has_fixed_kv_size(node)); ++ sb = node->object.sb; ++ node_raw = (void *)node->object.data; ++ ++ value_end = sb->s_blocksize; ++ if (apfs_node_is_root(node)) ++ value_end -= sizeof(struct apfs_btree_info); ++ ++ kvloc = (struct apfs_kvloc *)node_raw->btn_data + query->index; ++ kvloc->v.off = cpu_to_le16(value_end - query->off); ++ kvloc->v.len = cpu_to_le16(query->len); ++ kvloc->k.off = cpu_to_le16(query->key_off - node->key); ++ kvloc->k.len = cpu_to_le16(query->key_len); ++} ++ ++/** ++ * apfs_node_replace - Replace a record in a node that has enough room ++ * @query: exact query that found the record ++ * @key: new on-disk record key (NULL if unchanged) ++ * @key_len: length of @key ++ * @val: new on-disk record value (NULL if unchanged) ++ * @val_len: length of @val ++ * ++ * Returns 0 on success, and @query is left pointing to the same record. Returns ++ * a negative error code in case of failure. ++ */ ++int apfs_node_replace(struct apfs_query *query, void *key, int key_len, void *val, int val_len) ++{ ++ struct apfs_node *node = query->node; ++ struct super_block *sb = node->object.sb; ++ struct apfs_btree_node_phys *node_raw = (void *)node->object.data; ++ int key_off = 0, val_off = 0, err = 0; ++ bool defragged = false; ++ int qtree = query->flags & APFS_QUERY_TREE_MASK; ++ ++ apfs_assert_in_transaction(sb, &node_raw->btn_o); ++ ++ /* ++ * Free queues are weird because their tables of contents don't report ++ * record lengths, as if they were fixed, but some of the leaf values ++ * are actually "ghosts", that is, zero-length. Supporting replace of ++ * such records would require some changes, and so far I've had no need ++ * for it. ++ */ ++ (void)qtree; ++ ASSERT(!(qtree == APFS_QUERY_FREE_QUEUE && apfs_node_is_leaf(node))); ++ ++retry: ++ if (key) { ++ if (key_len <= query->key_len) { ++ u16 end = query->key_off + key_len; ++ u16 diff = query->key_len - key_len; ++ ++ apfs_node_free_range(node, end, diff); ++ key_off = query->key_off; ++ } else { ++ apfs_node_free_range(node, query->key_off, query->key_len); ++ key_off = apfs_node_alloc_key(node, key_len); ++ if (key_off < 0) { ++ if (key_off == -ENOSPC) ++ goto defrag; ++ return key_off; ++ } ++ } ++ } ++ ++ if (val) { ++ if (val_len <= query->len) { ++ u16 end = query->off + val_len; ++ u16 diff = query->len - val_len; ++ ++ apfs_node_free_range(node, end, diff); ++ val_off = query->off; ++ } else { ++ apfs_node_free_range(node, query->off, query->len); ++ val_off = apfs_node_alloc_val(node, val_len); ++ if (val_off < 0) { ++ if (val_off == -ENOSPC) ++ goto defrag; ++ return val_off; ++ } ++ } ++ } ++ ++ if (key) { ++ query->key_off = key_off; ++ query->key_len = key_len; ++ memcpy((void *)node_raw + key_off, key, key_len); ++ } ++ if (val) { ++ query->off = val_off; ++ query->len = val_len; ++ memcpy((void *)node_raw + val_off, val, val_len); ++ } ++ ++ /* If the key or value were resized, update the table of contents */ ++ if (!apfs_node_has_fixed_kv_size(node)) ++ apfs_node_update_toc_entry(query); ++ ++ apfs_update_node(node); ++ return 0; ++ ++defrag: ++ if (defragged) { ++ apfs_alert(sb, "no room in defragged node"); ++ return -EFSCORRUPTED; ++ } ++ ++ /* Crush the replaced entry, so that defragmentation is complete */ ++ if (apfs_node_has_fixed_kv_size(node)) { ++ apfs_alert(sb, "failed to replace a fixed size record"); ++ return -EFSCORRUPTED; ++ } ++ if (key) ++ query->key_len = 0; ++ if (val) ++ query->len = 0; ++ apfs_node_update_toc_entry(query); ++ ++ err = apfs_defragment_node(node); ++ if (err) { ++ apfs_err(sb, "failed to defragment node"); ++ return err; ++ } ++ defragged = true; ++ ++ /* The record to replace probably moved around */ ++ query->len = apfs_node_locate_data(query->node, query->index, &query->off); ++ query->key_len = apfs_node_locate_key(query->node, query->index, &query->key_off); ++ goto retry; ++} ++ ++/** ++ * apfs_node_insert - Insert a new record in a node that has enough room ++ * @query: query run to search for the record ++ * @key: on-disk record key ++ * @key_len: length of @key ++ * @val: on-disk record value (NULL for ghost records) ++ * @val_len: length of @val (0 for ghost records) ++ * ++ * The new record is placed right after the one found by @query. On success, ++ * returns 0 and sets @query to the new record. In case of failure, returns a ++ * negative error code and leaves @query pointing to the same record. ++ */ ++int apfs_node_insert(struct apfs_query *query, void *key, int key_len, void *val, int val_len) ++{ ++ struct apfs_node *node = query->node; ++ struct super_block *sb = node->object.sb; ++ struct apfs_btree_node_phys *node_raw = (void *)node->object.data; ++ int toc_entry_size; ++ int key_off, val_off, err; ++ bool defragged = false; ++ ++ apfs_assert_in_transaction(sb, &node_raw->btn_o); ++ ++retry: ++ if (apfs_node_has_fixed_kv_size(node)) ++ toc_entry_size = sizeof(struct apfs_kvoff); ++ else ++ toc_entry_size = sizeof(struct apfs_kvloc); ++ ++ /* Expand the table of contents if necessary */ ++ if (sizeof(*node_raw) + (node->records + 1) * toc_entry_size > node->key) { ++ int new_key_base = node->key; ++ int new_free_base = node->free; ++ int inc; ++ ++ inc = APFS_BTREE_TOC_ENTRY_INCREMENT * toc_entry_size; ++ ++ new_key_base += inc; ++ new_free_base += inc; ++ if (new_free_base > node->data) ++ goto defrag; ++ memmove((void *)node_raw + new_key_base, ++ (void *)node_raw + node->key, node->free - node->key); ++ ++ node->key = new_key_base; ++ node->free = new_free_base; ++ query->key_off += inc; ++ } ++ ++ key_off = apfs_node_alloc_key(node, key_len); ++ if (key_off < 0) { ++ if (key_off == -ENOSPC) ++ goto defrag; ++ return key_off; ++ } ++ ++ if (val) { ++ val_off = apfs_node_alloc_val(node, val_len); ++ if (val_off < 0) { ++ if (val_off == -ENOSPC) { ++ /* ++ * There is no need for an update of the on-disk ++ * node before the defrag, since only in-memory ++ * data should be used there... ++ */ ++ goto defrag; ++ } ++ return val_off; ++ } ++ } ++ ++ query->key_len = key_len; ++ query->key_off = key_off; ++ memcpy((void *)node_raw + key_off, key, key_len); ++ ++ query->len = val_len; ++ if (val) { ++ query->off = val_off; ++ memcpy((void *)node_raw + val_off, val, val_len); ++ } else { ++ query->off = 0; ++ } ++ ++ query->index++; /* The query returned the record right before @key */ ++ ++ /* Add the new entry to the table of contents */ ++ apfs_create_toc_entry(query); ++ ++ apfs_update_node(node); ++ return 0; ++ ++defrag: ++ if (defragged) { ++ apfs_err(sb, "node reports incorrect free space"); ++ return -EFSCORRUPTED; ++ } ++ err = apfs_defragment_node(node); ++ if (err) { ++ apfs_err(sb, "failed to defragment node"); ++ return err; ++ } ++ defragged = true; ++ goto retry; ++} ++ ++/** ++ * apfs_create_single_rec_node - Creates a new node with a single record ++ * @query: query run to search for the record ++ * @key: on-disk record key ++ * @key_len: length of @key ++ * @val: on-disk record value ++ * @val_len: length of @val ++ * ++ * The new node is placed right after the one found by @query, which must have ++ * a single record. On success, returns 0 and sets @query to the new record; ++ * returns a negative error code in case of failure, which may be -EAGAIN if a ++ * node split has happened and the caller must refresh and retry. ++ */ ++int apfs_create_single_rec_node(struct apfs_query *query, void *key, int key_len, void *val, int val_len) ++{ ++ struct super_block *sb = NULL; ++ struct apfs_node *new_node = NULL, *prev_node = NULL; ++ struct apfs_btree_node_phys *prev_raw = NULL; ++ struct apfs_btree_node_phys *new_raw = NULL; ++ int err; ++ ++ prev_node = query->node; ++ sb = prev_node->object.sb; ++ ++ ASSERT(query->parent); ++ ASSERT(prev_node->records == 1); ++ ASSERT(val && val_len); ++ ++ /* This function should only be needed for huge catalog records */ ++ if (prev_node->tree_type != APFS_OBJECT_TYPE_FSTREE) { ++ apfs_err(sb, "huge node records in the wrong tree"); ++ return -EFSCORRUPTED; ++ } ++ ++ /* ++ * This will only be called for leaf nodes because it's the values that ++ * can get huge, not the keys. It will also never be called for root, ++ * because the catalog always has more than a single record. ++ */ ++ if (apfs_node_is_root(prev_node) || !apfs_node_is_leaf(prev_node)) { ++ apfs_err(sb, "huge record in index node"); ++ return -EFSCORRUPTED; ++ } ++ ++ new_node = apfs_create_node(sb, apfs_query_storage(query)); ++ if (IS_ERR(new_node)) { ++ apfs_err(sb, "node creation failed"); ++ return PTR_ERR(new_node); ++ } ++ new_node->tree_type = prev_node->tree_type; ++ new_node->flags = prev_node->flags; ++ new_node->records = 0; ++ new_node->key_free_list_len = 0; ++ new_node->val_free_list_len = 0; ++ new_node->key = new_node->free = sizeof(*new_raw); ++ new_node->data = sb->s_blocksize; /* Nonroot */ ++ ++ prev_raw = (void *)prev_node->object.data; ++ new_raw = (void *)new_node->object.data; ++ apfs_assert_in_transaction(sb, &new_raw->btn_o); ++ new_raw->btn_level = prev_raw->btn_level; ++ apfs_update_node(new_node); ++ ++ query->node = new_node; ++ new_node = NULL; ++ query->index = -1; ++ err = apfs_node_insert(query, key, key_len, val, val_len); ++ if (err) { ++ apfs_err(sb, "node record insertion failed"); ++ goto fail; ++ } ++ ++ err = apfs_attach_child(query->parent, query->node); ++ if (err) { ++ if (err != -EAGAIN) { ++ apfs_err(sb, "child attachment failed"); ++ goto fail; ++ } ++ err = apfs_delete_node(query->node, query->flags & APFS_QUERY_TREE_MASK); ++ if (err) { ++ apfs_err(sb, "node cleanup failed for query retry"); ++ goto fail; ++ } ++ ++ /* ++ * The query must remain pointing to the original node for the ++ * refresh to take place. The index will not matter though. ++ */ ++ new_node = query->node; ++ query->node = prev_node; ++ prev_node = NULL; ++ err = -EAGAIN; ++ goto fail; ++ } ++ apfs_btree_change_node_count(query->parent, 1 /* change */); ++ ++fail: ++ apfs_node_free(prev_node); ++ apfs_node_free(new_node); ++ return err; ++} +diff --git a/fs/apfs/object.c b/fs/apfs/object.c +new file mode 100644 +index 000000000..9636a9a29 +--- /dev/null ++++ b/fs/apfs/object.c +@@ -0,0 +1,315 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Checksum routines for an APFS object ++ */ ++ ++#include ++#include ++#include "apfs.h" ++ ++/* ++ * Note that this is not a generic implementation of fletcher64, as it assumes ++ * a message length that doesn't overflow sum1 and sum2. This constraint is ok ++ * for apfs, though, since the block size is limited to 2^16. For a more ++ * generic optimized implementation, see Nakassis (1988). ++ */ ++static u64 apfs_fletcher64(void *addr, size_t len) ++{ ++ __le32 *buff = addr; ++ u64 sum1 = 0; ++ u64 sum2 = 0; ++ u64 c1, c2; ++ int i, count_32; ++ ++ count_32 = len >> 2; ++ for (i = 0; i < count_32; i++) { ++ sum1 += le32_to_cpu(buff[i]); ++ sum2 += sum1; ++ } ++ ++ c1 = sum1 + sum2; ++ c1 = 0xFFFFFFFF - do_div(c1, 0xFFFFFFFF); ++ c2 = sum1 + c1; ++ c2 = 0xFFFFFFFF - do_div(c2, 0xFFFFFFFF); ++ ++ return (c2 << 32) | c1; ++} ++ ++int apfs_obj_verify_csum(struct super_block *sb, struct buffer_head *bh) ++{ ++ /* The checksum may be stale until the transaction is committed */ ++ if (buffer_trans(bh)) ++ return 1; ++ return apfs_multiblock_verify_csum(bh->b_data, sb->s_blocksize); ++} ++ ++/** ++ * apfs_multiblock_verify_csum - Verify an object's checksum ++ * @object: the object to verify ++ * @size: size of the object in bytes (may be multiple blocks) ++ * ++ * Returns 1 on success, 0 on failure. ++ */ ++int apfs_multiblock_verify_csum(char *object, u32 size) ++{ ++ struct apfs_obj_phys *obj = (struct apfs_obj_phys *)object; ++ u64 actual_csum, header_csum; ++ ++ header_csum = le64_to_cpu(obj->o_cksum); ++ actual_csum = apfs_fletcher64(object + APFS_MAX_CKSUM_SIZE, size - APFS_MAX_CKSUM_SIZE); ++ return header_csum == actual_csum; ++} ++ ++/** ++ * apfs_obj_set_csum - Set the fletcher checksum in an object header ++ * @sb: superblock structure ++ * @obj: the object header ++ * ++ * The object must have a length of a single block. ++ */ ++void apfs_obj_set_csum(struct super_block *sb, struct apfs_obj_phys *obj) ++{ ++ apfs_multiblock_set_csum((char *)obj, sb->s_blocksize); ++} ++ ++/** ++ * apfs_multiblock_set_csum - Set an object's checksum ++ * @object: the object to checksum ++ * @size: size of the object in bytes (may be multiple blocks) ++ */ ++void apfs_multiblock_set_csum(char *object, u32 size) ++{ ++ struct apfs_obj_phys *obj = (struct apfs_obj_phys *)object; ++ u64 cksum; ++ ++ cksum = apfs_fletcher64(object + APFS_MAX_CKSUM_SIZE, size - APFS_MAX_CKSUM_SIZE); ++ obj->o_cksum = cpu_to_le64(cksum); ++} ++ ++/** ++ * apfs_create_cpm_block - Create a new checkpoint-mapping block ++ * @sb: filesystem superblock ++ * @bno: block number to use ++ * @bh_p: on return, the buffer head for the block ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++int apfs_create_cpm_block(struct super_block *sb, u64 bno, struct buffer_head **bh_p) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_checkpoint_map_phys *cpm = NULL; ++ struct buffer_head *bh = NULL; ++ int err; ++ ++ bh = apfs_getblk(sb, bno); ++ if (!bh) { ++ apfs_err(sb, "failed to map cpm block"); ++ return -EIO; ++ } ++ err = apfs_transaction_join(sb, bh); ++ if (err) { ++ brelse(bh); ++ return err; ++ } ++ set_buffer_csum(bh); ++ ++ cpm = (void *)bh->b_data; ++ memset(cpm, 0, sb->s_blocksize); ++ cpm->cpm_o.o_oid = cpu_to_le64(bno); ++ cpm->cpm_o.o_xid = cpu_to_le64(nxi->nx_xid); ++ cpm->cpm_o.o_type = cpu_to_le32(APFS_OBJ_PHYSICAL | APFS_OBJECT_TYPE_CHECKPOINT_MAP); ++ cpm->cpm_o.o_subtype = cpu_to_le32(APFS_OBJECT_TYPE_INVALID); ++ ++ /* For now: the caller will have to update these fields */ ++ cpm->cpm_flags = cpu_to_le32(APFS_CHECKPOINT_MAP_LAST); ++ cpm->cpm_count = 0; ++ ++ *bh_p = bh; ++ return 0; ++} ++ ++/** ++ * apfs_create_cpoint_map - Create a checkpoint mapping for an object ++ * @sb: filesystem superblock ++ * @cpm: checkpoint mapping block to use ++ * @obj: header for the ephemeral object ++ * @bno: block number for the ephemeral object ++ * @size: size of the ephemeral object in bytes ++ * ++ * Returns 0 on success or a negative error code in case of failure, which may ++ * be -ENOSPC if @cpm is full. ++ */ ++int apfs_create_cpoint_map(struct super_block *sb, struct apfs_checkpoint_map_phys *cpm, struct apfs_obj_phys *obj, u64 bno, u32 size) ++{ ++ struct apfs_checkpoint_mapping *map = NULL; ++ u32 cpm_count; ++ ++ apfs_assert_in_transaction(sb, &cpm->cpm_o); ++ ++ cpm_count = le32_to_cpu(cpm->cpm_count); ++ if (cpm_count >= apfs_max_maps_per_block(sb)) ++ return -ENOSPC; ++ map = &cpm->cpm_map[cpm_count]; ++ le32_add_cpu(&cpm->cpm_count, 1); ++ ++ map->cpm_type = obj->o_type; ++ map->cpm_subtype = obj->o_subtype; ++ map->cpm_size = cpu_to_le32(size); ++ map->cpm_pad = 0; ++ map->cpm_fs_oid = 0; ++ map->cpm_oid = obj->o_oid; ++ map->cpm_paddr = cpu_to_le64(bno); ++ return 0; ++} ++ ++/** ++ * apfs_index_in_data_area - Get position of block in current checkpoint's data ++ * @sb: superblock structure ++ * @bno: block number ++ */ ++u32 apfs_index_in_data_area(struct super_block *sb, u64 bno) ++{ ++ struct apfs_nx_superblock *raw_sb = APFS_NXI(sb)->nx_raw; ++ u64 data_base = le64_to_cpu(raw_sb->nx_xp_data_base); ++ u32 data_index = le32_to_cpu(raw_sb->nx_xp_data_index); ++ u32 data_blks = le32_to_cpu(raw_sb->nx_xp_data_blocks); ++ u64 tmp; ++ ++ tmp = bno - data_base + data_blks - data_index; ++ return do_div(tmp, data_blks); ++} ++ ++/** ++ * apfs_data_index_to_bno - Convert index in data area to block number ++ * @sb: superblock structure ++ * @index: index of the block in the current checkpoint's data area ++ */ ++u64 apfs_data_index_to_bno(struct super_block *sb, u32 index) ++{ ++ struct apfs_nx_superblock *raw_sb = APFS_NXI(sb)->nx_raw; ++ u64 data_base = le64_to_cpu(raw_sb->nx_xp_data_base); ++ u32 data_index = le32_to_cpu(raw_sb->nx_xp_data_index); ++ u32 data_blks = le32_to_cpu(raw_sb->nx_xp_data_blocks); ++ ++ return data_base + (index + data_index) % data_blks; ++} ++ ++/** ++ * apfs_ephemeral_object_lookup - Find an ephemeral object info in memory ++ * @sb: superblock structure ++ * @oid: ephemeral object id ++ * ++ * Returns a pointer to the object info on success, or an error pointer in case ++ * of failure. ++ */ ++struct apfs_ephemeral_object_info *apfs_ephemeral_object_lookup(struct super_block *sb, u64 oid) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_ephemeral_object_info *list = NULL; ++ int i; ++ ++ list = nxi->nx_eph_list; ++ for (i = 0; i < nxi->nx_eph_count; ++i) { ++ if (list[i].oid == oid) ++ return &list[i]; ++ } ++ apfs_err(sb, "no mapping for oid 0x%llx", oid); ++ return ERR_PTR(-EFSCORRUPTED); ++} ++ ++/** ++ * apfs_read_object_block - Map a non-ephemeral object block ++ * @sb: superblock structure ++ * @bno: block number for the object ++ * @write: request write access? ++ * @preserve: preserve the old block? ++ * ++ * On success returns the mapped buffer head for the object, which may now be ++ * in a new location if write access was requested. Returns an error pointer ++ * in case of failure. ++ */ ++struct buffer_head *apfs_read_object_block(struct super_block *sb, u64 bno, bool write, bool preserve) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_superblock *vsb_raw = NULL; ++ struct buffer_head *bh, *new_bh; ++ struct apfs_obj_phys *obj; ++ u32 type; ++ u64 new_bno; ++ int err; ++ ++ ASSERT(write || !preserve); ++ ++ bh = apfs_sb_bread(sb, bno); ++ if (!bh) { ++ apfs_err(sb, "failed to read object block 0x%llx", bno); ++ return ERR_PTR(-EIO); ++ } ++ ++ obj = (struct apfs_obj_phys *)bh->b_data; ++ type = le32_to_cpu(obj->o_type); ++ ASSERT(!(type & APFS_OBJ_EPHEMERAL)); ++ if (nxi->nx_flags & APFS_CHECK_NODES && !apfs_obj_verify_csum(sb, bh)) { ++ apfs_err(sb, "bad checksum for object in block 0x%llx", bno); ++ err = -EFSBADCRC; ++ goto fail; ++ } ++ ++ if (!write) ++ return bh; ++ ASSERT(!(sb->s_flags & SB_RDONLY)); ++ ++ /* Is the object already part of the current transaction? */ ++ if (obj->o_xid == cpu_to_le64(nxi->nx_xid)) ++ return bh; ++ ++ err = apfs_spaceman_allocate_block(sb, &new_bno, true /* backwards */); ++ if (err) { ++ apfs_err(sb, "block allocation failed"); ++ goto fail; ++ } ++ new_bh = apfs_getblk(sb, new_bno); ++ if (!new_bh) { ++ apfs_err(sb, "failed to map block for CoW (0x%llx)", new_bno); ++ err = -EIO; ++ goto fail; ++ } ++ memcpy(new_bh->b_data, bh->b_data, sb->s_blocksize); ++ ++ /* ++ * Don't free the old copy of the object if it's part of a snapshot. ++ * Also increase the allocation count, except for the volume superblock ++ * which is never counted there. ++ */ ++ if (!preserve) { ++ err = apfs_free_queue_insert(sb, bh->b_blocknr, 1); ++ if (err) ++ apfs_err(sb, "free queue insertion failed for 0x%llx", (unsigned long long)bh->b_blocknr); ++ } else if ((type & APFS_OBJECT_TYPE_MASK) != APFS_OBJECT_TYPE_FS) { ++ vsb_raw = APFS_SB(sb)->s_vsb_raw; ++ apfs_assert_in_transaction(sb, &vsb_raw->apfs_o); ++ le64_add_cpu(&vsb_raw->apfs_fs_alloc_count, 1); ++ le64_add_cpu(&vsb_raw->apfs_total_blocks_alloced, 1); ++ } ++ ++ brelse(bh); ++ bh = new_bh; ++ new_bh = NULL; ++ if (err) ++ goto fail; ++ obj = (struct apfs_obj_phys *)bh->b_data; ++ ++ if (type & APFS_OBJ_PHYSICAL) ++ obj->o_oid = cpu_to_le64(new_bno); ++ obj->o_xid = cpu_to_le64(nxi->nx_xid); ++ err = apfs_transaction_join(sb, bh); ++ if (err) ++ goto fail; ++ ++ set_buffer_csum(bh); ++ return bh; ++ ++fail: ++ brelse(bh); ++ return ERR_PTR(err); ++} +diff --git a/fs/apfs/snapshot.c b/fs/apfs/snapshot.c +new file mode 100644 +index 000000000..77f83b455 +--- /dev/null ++++ b/fs/apfs/snapshot.c +@@ -0,0 +1,612 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (C) 2022 Ernesto A. Fernández ++ */ ++ ++#include ++#include ++#include "apfs.h" ++ ++/** ++ * apfs_create_superblock_snapshot - Take a snapshot of the volume superblock ++ * @sb: superblock structure ++ * @bno: on return, the block number for the new superblock copy ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_create_superblock_snapshot(struct super_block *sb, u64 *bno) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_superblock *vsb_raw = sbi->s_vsb_raw; ++ struct buffer_head *curr_bh = NULL; ++ struct buffer_head *snap_bh = NULL; ++ struct apfs_superblock *snap_raw = NULL; ++ int err; ++ ++ err = apfs_spaceman_allocate_block(sb, bno, true /* backwards */); ++ if (err) { ++ apfs_err(sb, "block allocation failed"); ++ goto fail; ++ } ++ ++ snap_bh = apfs_getblk(sb, *bno); ++ if (!snap_bh) { ++ apfs_err(sb, "failed to map block for volume snap (0x%llx)", *bno); ++ err = -EIO; ++ goto fail; ++ } ++ apfs_assert_in_transaction(sb, &vsb_raw->apfs_o); ++ ++ curr_bh = sbi->s_vobject.o_bh; ++ memcpy(snap_bh->b_data, curr_bh->b_data, sb->s_blocksize); ++ curr_bh = NULL; ++ ++ err = apfs_transaction_join(sb, snap_bh); ++ if (err) ++ goto fail; ++ set_buffer_csum(snap_bh); ++ ++ snap_raw = (struct apfs_superblock *)snap_bh->b_data; ++ /* Volume superblocks in snapshots are physical objects */ ++ snap_raw->apfs_o.o_oid = cpu_to_le64p(bno); ++ snap_raw->apfs_o.o_type = cpu_to_le32(APFS_OBJ_PHYSICAL | APFS_OBJECT_TYPE_FS); ++ /* The omap is shared with the current volume */ ++ snap_raw->apfs_omap_oid = 0; ++ /* The extent reference tree is given by the snapshot metadata */ ++ snap_raw->apfs_extentref_tree_oid = 0; ++ /* No snapshots inside snapshots */ ++ snap_raw->apfs_snap_meta_tree_oid = 0; ++ ++ err = 0; ++fail: ++ snap_raw = NULL; ++ brelse(snap_bh); ++ snap_bh = NULL; ++ return err; ++} ++ ++static int apfs_create_snap_metadata_rec(struct inode *mntpoint, struct apfs_node *snap_root, const char *name, int name_len, u64 sblock_oid) ++{ ++ struct super_block *sb = mntpoint->i_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_superblock *vsb_raw = sbi->s_vsb_raw; ++ struct apfs_query *query = NULL; ++ struct apfs_snap_metadata_key raw_key; ++ struct apfs_snap_metadata_val *raw_val = NULL; ++ int val_len; ++ struct timespec64 now; ++ u64 xid = APFS_NXI(sb)->nx_xid; ++ int err; ++ ++ query = apfs_alloc_query(snap_root, NULL /* parent */); ++ if (!query) { ++ err = -ENOMEM; ++ goto fail; ++ } ++ apfs_init_snap_metadata_key(xid, &query->key); ++ query->flags |= APFS_QUERY_SNAP_META | APFS_QUERY_EXACT; ++ ++ err = apfs_btree_query(sb, &query); ++ if (err == 0) { ++ apfs_err(sb, "record exists for xid 0x%llx", xid); ++ err = -EFSCORRUPTED; ++ goto fail; ++ } ++ if (err != -ENODATA) { ++ apfs_err(sb, "query failed for xid 0x%llx", xid); ++ goto fail; ++ } ++ ++ apfs_key_set_hdr(APFS_TYPE_SNAP_METADATA, xid, &raw_key); ++ ++ val_len = sizeof(*raw_val) + name_len + 1; ++ raw_val = kzalloc(val_len, GFP_KERNEL); ++ if (!raw_val) { ++ err = -ENOMEM; ++ goto fail; ++ } ++ raw_val->extentref_tree_oid = vsb_raw->apfs_extentref_tree_oid; ++ raw_val->sblock_oid = cpu_to_le64(sblock_oid); ++ now = current_time(mntpoint); ++ raw_val->create_time = cpu_to_le64(timespec64_to_ns(&now)); ++ raw_val->change_time = raw_val->create_time; ++ raw_val->extentref_tree_type = vsb_raw->apfs_extentref_tree_type; ++ raw_val->flags = 0; ++ raw_val->name_len = cpu_to_le16(name_len + 1); /* Count the null byte */ ++ strscpy(raw_val->name, name, name_len + 1); ++ ++ apfs_assert_in_transaction(sb, &vsb_raw->apfs_o); ++ raw_val->inum = vsb_raw->apfs_next_obj_id; ++ le64_add_cpu(&vsb_raw->apfs_next_obj_id, 1); ++ ++ err = apfs_btree_insert(query, &raw_key, sizeof(raw_key), raw_val, val_len); ++ if (err) ++ apfs_err(sb, "insertion failed for xid 0x%llx", xid); ++fail: ++ kfree(raw_val); ++ raw_val = NULL; ++ apfs_free_query(query); ++ query = NULL; ++ return err; ++} ++ ++static int apfs_create_snap_name_rec(struct apfs_node *snap_root, const char *name, int name_len) ++{ ++ struct super_block *sb = snap_root->object.sb; ++ struct apfs_query *query = NULL; ++ struct apfs_snap_name_key *raw_key = NULL; ++ struct apfs_snap_name_val raw_val; ++ int key_len; ++ int err; ++ ++ query = apfs_alloc_query(snap_root, NULL /* parent */); ++ if (!query) { ++ err = -ENOMEM; ++ goto fail; ++ } ++ apfs_init_snap_name_key(name, &query->key); ++ query->flags |= APFS_QUERY_SNAP_META | APFS_QUERY_EXACT; ++ ++ err = apfs_btree_query(sb, &query); ++ if (err == 0) { ++ /* TODO: avoid transaction abort here */ ++ apfs_info(sb, "a snapshot with that name already exists"); ++ err = -EEXIST; ++ goto fail; ++ } ++ if (err != -ENODATA) { ++ apfs_err(sb, "query failed (%s)", name); ++ goto fail; ++ } ++ ++ key_len = sizeof(*raw_key) + name_len + 1; ++ raw_key = kzalloc(key_len, GFP_KERNEL); ++ if (!raw_key) { ++ err = -ENOMEM; ++ goto fail; ++ } ++ apfs_key_set_hdr(APFS_TYPE_SNAP_NAME, APFS_SNAP_NAME_OBJ_ID, raw_key); ++ raw_key->name_len = cpu_to_le16(name_len + 1); /* Count the null byte */ ++ strscpy(raw_key->name, name, name_len + 1); ++ ++ raw_val.snap_xid = cpu_to_le64(APFS_NXI(sb)->nx_xid); ++ ++ err = apfs_btree_insert(query, raw_key, key_len, &raw_val, sizeof(raw_val)); ++ if (err) ++ apfs_err(sb, "insertion failed (%s)", name); ++fail: ++ kfree(raw_key); ++ raw_key = NULL; ++ apfs_free_query(query); ++ query = NULL; ++ return err; ++} ++ ++static int apfs_create_snap_meta_records(struct inode *mntpoint, const char *name, int name_len, u64 sblock_oid) ++{ ++ struct super_block *sb = mntpoint->i_sb; ++ struct apfs_superblock *vsb_raw = APFS_SB(sb)->s_vsb_raw; ++ struct apfs_node *snap_root = NULL; ++ int err; ++ ++ snap_root = apfs_read_node(sb, le64_to_cpu(vsb_raw->apfs_snap_meta_tree_oid), APFS_OBJ_PHYSICAL, true /* write */); ++ if (IS_ERR(snap_root)) { ++ apfs_err(sb, "failed to read snap meta root 0x%llx", le64_to_cpu(vsb_raw->apfs_snap_meta_tree_oid)); ++ return PTR_ERR(snap_root); ++ } ++ apfs_assert_in_transaction(sb, &vsb_raw->apfs_o); ++ vsb_raw->apfs_snap_meta_tree_oid = cpu_to_le64(snap_root->object.oid); ++ ++ err = apfs_create_snap_metadata_rec(mntpoint, snap_root, name, name_len, sblock_oid); ++ if (err) { ++ apfs_err(sb, "meta rec creation failed"); ++ goto fail; ++ } ++ err = apfs_create_snap_name_rec(snap_root, name, name_len); ++ if (err) ++ apfs_err(sb, "name rec creation failed"); ++ ++fail: ++ apfs_node_free(snap_root); ++ return err; ++} ++ ++static int apfs_create_new_extentref_tree(struct super_block *sb) ++{ ++ struct apfs_superblock *vsb_raw = APFS_SB(sb)->s_vsb_raw; ++ u64 oid; ++ int err; ++ ++ err = apfs_make_empty_btree_root(sb, APFS_OBJECT_TYPE_BLOCKREFTREE, &oid); ++ if (err) { ++ apfs_err(sb, "failed to make empty root"); ++ return err; ++ } ++ ++ apfs_assert_in_transaction(sb, &vsb_raw->apfs_o); ++ vsb_raw->apfs_extentref_tree_oid = cpu_to_le64(oid); ++ return 0; ++} ++ ++/** ++ * apfs_update_omap_snap_tree - Add the current xid to the omap's snapshot tree ++ * @sb: filesystem superblock ++ * @oid_p: pointer to the on-disk block number for the root node ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++static int apfs_update_omap_snap_tree(struct super_block *sb, __le64 *oid_p) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_node *root = NULL; ++ u64 oid = le64_to_cpup(oid_p); ++ struct apfs_query *query = NULL; ++ __le64 raw_key; ++ struct apfs_omap_snapshot raw_val = {0}; ++ int err; ++ ++ /* An empty snapshots tree may not even have a root yet */ ++ if (!oid) { ++ err = apfs_make_empty_btree_root(sb, APFS_OBJECT_TYPE_OMAP_SNAPSHOT, &oid); ++ if (err) { ++ apfs_err(sb, "failed to make empty root"); ++ return err; ++ } ++ } ++ ++ root = apfs_read_node(sb, oid, APFS_OBJ_PHYSICAL, true /* write */); ++ if (IS_ERR(root)) { ++ apfs_err(sb, "failed to read omap snap root 0x%llx", oid); ++ return PTR_ERR(root); ++ } ++ oid = 0; ++ ++ query = apfs_alloc_query(root, NULL /* parent */); ++ if (!query) { ++ err = -ENOMEM; ++ goto fail; ++ } ++ apfs_init_omap_snap_key(nxi->nx_xid, &query->key); ++ query->flags = APFS_QUERY_OMAP_SNAP | APFS_QUERY_EXACT; ++ ++ err = apfs_btree_query(sb, &query); ++ if (err == 0) { ++ apfs_err(sb, "record exists for xid 0x%llx", nxi->nx_xid); ++ err = -EFSCORRUPTED; ++ goto fail; ++ } ++ if (err != -ENODATA) { ++ apfs_err(sb, "query failed for xid 0x%llx", nxi->nx_xid); ++ goto fail; ++ } ++ ++ raw_key = cpu_to_le64(nxi->nx_xid); ++ err = apfs_btree_insert(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val)); ++ if (err) ++ apfs_err(sb, "insertion failed for xid 0x%llx", nxi->nx_xid); ++ *oid_p = cpu_to_le64(root->object.block_nr); ++ ++fail: ++ apfs_free_query(query); ++ query = NULL; ++ apfs_node_free(root); ++ root = NULL; ++ return err; ++} ++ ++/** ++ * apfs_update_omap_snapshots - Add the current xid to the omap's snapshots ++ * @sb: filesystem superblock ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++static int apfs_update_omap_snapshots(struct super_block *sb) ++{ ++ struct apfs_superblock *vsb_raw = APFS_SB(sb)->s_vsb_raw; ++ struct buffer_head *bh = NULL; ++ struct apfs_omap_phys *omap = NULL; ++ u64 omap_blk; ++ u64 xid; ++ int err; ++ ++ xid = APFS_NXI(sb)->nx_xid; ++ ++ omap_blk = le64_to_cpu(vsb_raw->apfs_omap_oid); ++ bh = apfs_read_object_block(sb, omap_blk, true /* write */, false /* preserve */); ++ if (IS_ERR(bh)) { ++ apfs_err(sb, "CoW failed for bno 0x%llx", omap_blk); ++ return PTR_ERR(bh); ++ } ++ omap = (struct apfs_omap_phys *)bh->b_data; ++ ++ apfs_assert_in_transaction(sb, &omap->om_o); ++ le32_add_cpu(&omap->om_snap_count, 1); ++ omap->om_most_recent_snap = cpu_to_le64(xid); ++ err = apfs_update_omap_snap_tree(sb, &omap->om_snapshot_tree_oid); ++ if (err) ++ apfs_err(sb, "omap snap tree update failed"); ++ ++ omap = NULL; ++ brelse(bh); ++ bh = NULL; ++ return err; ++} ++ ++/** ++ * apfs_do_ioc_takesnapshot - Actual work for apfs_ioc_take_snapshot() ++ * @mntpoint: inode of the mount point to snapshot ++ * @name: label for the snapshot ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++static int apfs_do_ioc_take_snapshot(struct inode *mntpoint, const char *name, int name_len) ++{ ++ struct super_block *sb = mntpoint->i_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_superblock *vsb_raw = NULL; ++ struct apfs_omap *omap = sbi->s_omap; ++ /* TODO: remember to update the maxops in the future */ ++ struct apfs_max_ops maxops = {0}; ++ u64 sblock_oid; ++ int err; ++ ++ err = apfs_transaction_start(sb, maxops); ++ if (err) ++ return err; ++ ++ /* ++ * Flush the extent caches to the extenref tree before it gets moved to ++ * the snapshot. It seems safer in general to avoid big unpredictable ++ * changes to the layout after the snapshot is set up. ++ */ ++ err = apfs_transaction_flush_all_inodes(sb); ++ if (err) { ++ apfs_err(sb, "failed to flush all inodes"); ++ goto fail; ++ } ++ ++ err = apfs_create_superblock_snapshot(sb, &sblock_oid); ++ if (err) { ++ apfs_err(sb, "failed to snapshot superblock"); ++ goto fail; ++ } ++ ++ err = apfs_create_snap_meta_records(mntpoint, name, name_len, sblock_oid); ++ if (err) { ++ apfs_err(sb, "failed to create snap meta records"); ++ goto fail; ++ } ++ ++ err = apfs_create_new_extentref_tree(sb); ++ if (err) { ++ apfs_err(sb, "failed to create new extref tree"); ++ goto fail; ++ } ++ ++ err = apfs_update_omap_snapshots(sb); ++ if (err) { ++ apfs_err(sb, "failed to update omap snapshots"); ++ goto fail; ++ } ++ ++ /* ++ * The official reference allows old implementations to ignore extended ++ * snapshot metadata, so I don't see any reason why we can't do the ++ * same for now. ++ */ ++ ++ vsb_raw = sbi->s_vsb_raw; ++ apfs_assert_in_transaction(sb, &vsb_raw->apfs_o); ++ le64_add_cpu(&vsb_raw->apfs_num_snapshots, 1); ++ ++ omap->omap_latest_snap = APFS_NXI(sb)->nx_xid; ++ ++ sbi->s_nxi->nx_transaction.t_state |= APFS_NX_TRANS_FORCE_COMMIT; ++ err = apfs_transaction_commit(sb); ++ if (err) ++ goto fail; ++ return 0; ++ ++fail: ++ apfs_transaction_abort(sb); ++ return err; ++} ++ ++/** ++ * apfs_ioc_take_snapshot - Ioctl handler for APFS_IOC_CREATE_SNAPSHOT ++ * @file: affected file ++ * @arg: ioctl argument ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++int apfs_ioc_take_snapshot(struct file *file, void __user *user_arg) ++{ ++ struct inode *inode = file_inode(file); ++ struct super_block *sb = inode->i_sb; ++ struct apfs_ioctl_snap_name *arg = NULL; ++ size_t name_len; ++ int err; ++ ++ if (apfs_ino(inode) != APFS_ROOT_DIR_INO_NUM) { ++ apfs_info(sb, "snapshot must be requested on mountpoint"); ++ return -ENOTTY; ++ } ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0) ++ if (!inode_owner_or_capable(inode)) ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 3, 0) ++ if (!inode_owner_or_capable(&init_user_ns, inode)) ++#else ++ if (!inode_owner_or_capable(&nop_mnt_idmap, inode)) ++#endif ++ return -EPERM; ++ ++ err = mnt_want_write_file(file); ++ if (err) ++ return err; ++ ++ arg = kzalloc(sizeof(*arg), GFP_KERNEL); ++ if (!arg) { ++ err = -ENOMEM; ++ goto fail; ++ } ++ ++ if (copy_from_user(arg, user_arg, sizeof(*arg))) { ++ err = -EFAULT; ++ goto fail; ++ } ++ ++ name_len = strnlen(arg->name, sizeof(arg->name)); ++ if (name_len == sizeof(arg->name)) { ++ apfs_warn(sb, "snapshot name is too long (%d)", (int)name_len); ++ err = -EINVAL; ++ goto fail; ++ } ++ ++ err = apfs_do_ioc_take_snapshot(inode, arg->name, name_len); ++fail: ++ kfree(arg); ++ arg = NULL; ++ mnt_drop_write_file(file); ++ return err; ++} ++ ++static int apfs_snap_xid_from_query(struct apfs_query *query, u64 *xid) ++{ ++ char *raw = query->node->object.data; ++ __le64 *val = NULL; ++ ++ if (query->len != sizeof(*val)) { ++ apfs_err(query->node->object.sb, "bad value length (%d)", query->len); ++ return -EFSCORRUPTED; ++ } ++ val = (__le64 *)(raw + query->off); ++ ++ *xid = le64_to_cpup(val); ++ return 0; ++} ++ ++static int apfs_snapshot_name_to_xid(struct apfs_node *snap_root, const char *name, u64 *xid) ++{ ++ struct super_block *sb = snap_root->object.sb; ++ struct apfs_query *query = NULL; ++ int err; ++ ++ query = apfs_alloc_query(snap_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_snap_name_key(name, &query->key); ++ query->flags |= APFS_QUERY_SNAP_META | APFS_QUERY_EXACT; ++ ++ err = apfs_btree_query(sb, &query); ++ if (err) { ++ if (err != -ENODATA) ++ apfs_err(sb, "query failed (%s)", name); ++ goto fail; ++ } ++ ++ err = apfs_snap_xid_from_query(query, xid); ++ if (err) ++ apfs_err(sb, "bad snap name record (%s)", name); ++fail: ++ apfs_free_query(query); ++ query = NULL; ++ return err; ++} ++ ++static int apfs_snap_sblock_from_query(struct apfs_query *query, u64 *sblock_oid) ++{ ++ char *raw = query->node->object.data; ++ struct apfs_snap_metadata_val *val = NULL; ++ ++ if (query->len < sizeof(*val)) { ++ apfs_err(query->node->object.sb, "bad value length (%d)", query->len); ++ return -EFSCORRUPTED; ++ } ++ val = (struct apfs_snap_metadata_val *)(raw + query->off); ++ ++ *sblock_oid = le64_to_cpu(val->sblock_oid); ++ return 0; ++} ++ ++static int apfs_snapshot_xid_to_sblock(struct apfs_node *snap_root, u64 xid, u64 *sblock_oid) ++{ ++ struct super_block *sb = snap_root->object.sb; ++ struct apfs_query *query = NULL; ++ int err; ++ ++ query = apfs_alloc_query(snap_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_snap_metadata_key(xid, &query->key); ++ query->flags |= APFS_QUERY_SNAP_META | APFS_QUERY_EXACT; ++ ++ err = apfs_btree_query(sb, &query); ++ if (err) { ++ apfs_err(sb, "query failed for xid 0x%llx", xid); ++ goto fail; ++ } ++ ++ err = apfs_snap_sblock_from_query(query, sblock_oid); ++ if (err) ++ apfs_err(sb, "bad snap meta record for xid 0x%llx", xid); ++fail: ++ apfs_free_query(query); ++ query = NULL; ++ return err; ++} ++ ++/** ++ * apfs_switch_to_snapshot - Start working with the snapshot volume superblock ++ * @sb: superblock structure ++ * ++ * Maps the volume superblock from the snapshot specified in the mount options. ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++int apfs_switch_to_snapshot(struct super_block *sb) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_superblock *vsb_raw = sbi->s_vsb_raw; ++ struct apfs_node *snap_root = NULL; ++ const char *name = NULL; ++ u64 sblock_oid = 0; ++ u64 xid = 0; ++ int err; ++ ++ ASSERT(sb->s_flags & SB_RDONLY); ++ ++ name = sbi->s_snap_name; ++ if (strlen(name) > APFS_SNAP_MAX_NAMELEN) { ++ apfs_warn(sb, "snapshot name is too long"); ++ return -EINVAL; ++ } ++ ++ snap_root = apfs_read_node(sb, le64_to_cpu(vsb_raw->apfs_snap_meta_tree_oid), APFS_OBJ_PHYSICAL, false /* write */); ++ if (IS_ERR(snap_root)) { ++ apfs_err(sb, "failed to read snap meta root 0x%llx", le64_to_cpu(vsb_raw->apfs_snap_meta_tree_oid)); ++ return PTR_ERR(snap_root); ++ } ++ vsb_raw = NULL; ++ ++ err = apfs_snapshot_name_to_xid(snap_root, name, &xid); ++ if (err) { ++ if (err == -ENODATA) ++ apfs_info(sb, "no snapshot under that name (%s)", name); ++ goto fail; ++ } ++ sbi->s_snap_xid = xid; ++ ++ err = apfs_snapshot_xid_to_sblock(snap_root, xid, &sblock_oid); ++ if (err) ++ goto fail; ++ ++ apfs_unmap_volume_super(sb); ++ err = apfs_map_volume_super_bno(sb, sblock_oid, nxi->nx_flags & APFS_CHECK_NODES); ++ if (err) ++ apfs_err(sb, "failed to map volume block 0x%llx", sblock_oid); ++ ++fail: ++ apfs_node_free(snap_root); ++ return err; ++} +diff --git a/fs/apfs/spaceman.c b/fs/apfs/spaceman.c +new file mode 100644 +index 000000000..1ac9f76e8 +--- /dev/null ++++ b/fs/apfs/spaceman.c +@@ -0,0 +1,1305 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (C) 2019 Ernesto A. Fernández ++ */ ++ ++#include ++#include ++#include ++#include "apfs.h" ++ ++/** ++ * apfs_spaceman_read_cib_addr - Get the address of a cib from the spaceman ++ * @sb: superblock structure ++ * @index: index of the chunk-info block ++ * ++ * Returns the block number for the chunk-info block. ++ * ++ * This is not described in the official documentation; credit for figuring it ++ * out should go to Joachim Metz: . ++ */ ++static u64 apfs_spaceman_read_cib_addr(struct super_block *sb, int index) ++{ ++ struct apfs_spaceman *sm = APFS_SM(sb); ++ struct apfs_spaceman_phys *sm_raw = sm->sm_raw; ++ u32 offset; ++ __le64 *addr_p; ++ ++ offset = sm->sm_addr_offset + index * sizeof(*addr_p); ++ addr_p = (void *)sm_raw + offset; ++ return le64_to_cpup(addr_p); ++} ++ ++/** ++ * apfs_spaceman_write_cib_addr - Store the address of a cib in the spaceman ++ * @sb: superblock structure ++ * @index: index of the chunk-info block ++ * @addr: address of the chunk-info block ++ */ ++static void apfs_spaceman_write_cib_addr(struct super_block *sb, ++ int index, u64 addr) ++{ ++ struct apfs_spaceman *sm = APFS_SM(sb); ++ struct apfs_spaceman_phys *sm_raw = sm->sm_raw; ++ u32 offset; ++ __le64 *addr_p; ++ ++ apfs_assert_in_transaction(sb, &sm_raw->sm_o); ++ ++ offset = sm->sm_addr_offset + index * sizeof(*addr_p); ++ addr_p = (void *)sm_raw + offset; ++ *addr_p = cpu_to_le64(addr); ++} ++ ++/** ++ * apfs_max_chunks_per_cib - Find the maximum chunk count for a chunk-info block ++ * @sb: superblock structure ++ */ ++static inline int apfs_max_chunks_per_cib(struct super_block *sb) ++{ ++ return (sb->s_blocksize - sizeof(struct apfs_chunk_info_block)) / ++ sizeof(struct apfs_chunk_info); ++} ++ ++/** ++ * apfs_read_spaceman_dev - Read a space manager device structure ++ * @sb: superblock structure ++ * @dev: on-disk device structure ++ * ++ * Initializes the in-memory spaceman fields related to the main device; fusion ++ * drives are not yet supported. Returns 0 on success, or a negative error code ++ * in case of failure. ++ */ ++static int apfs_read_spaceman_dev(struct super_block *sb, ++ struct apfs_spaceman_device *dev) ++{ ++ struct apfs_spaceman *spaceman = APFS_SM(sb); ++ ++ if (dev->sm_cab_count) { ++ apfs_err(sb, "large devices are not supported"); ++ return -EINVAL; ++ } ++ ++ spaceman->sm_block_count = le64_to_cpu(dev->sm_block_count); ++ spaceman->sm_chunk_count = le64_to_cpu(dev->sm_chunk_count); ++ spaceman->sm_cib_count = le32_to_cpu(dev->sm_cib_count); ++ spaceman->sm_free_count = le64_to_cpu(dev->sm_free_count); ++ spaceman->sm_addr_offset = le32_to_cpu(dev->sm_addr_offset); ++ ++ /* Check that all the cib addresses fit in the spaceman object */ ++ if ((long long)spaceman->sm_addr_offset + ++ (long long)spaceman->sm_cib_count * sizeof(u64) > spaceman->sm_size) { ++ apfs_err(sb, "too many cibs (%u)", spaceman->sm_cib_count); ++ return -EFSCORRUPTED; ++ } ++ ++ return 0; ++} ++ ++/** ++ * apfs_spaceman_get_16 - Get a 16-bit value from an offset in the spaceman ++ * @sb: superblock structure ++ * @off: offset for the value ++ * ++ * Returns a pointer to the value, or NULL if it doesn't fit. ++ */ ++static __le16 *apfs_spaceman_get_16(struct super_block *sb, size_t off) ++{ ++ struct apfs_spaceman *spaceman = APFS_SM(sb); ++ struct apfs_spaceman_phys *sm_raw = spaceman->sm_raw; ++ ++ if (off > spaceman->sm_size) ++ return NULL; ++ if (off + sizeof(__le16) > spaceman->sm_size) ++ return NULL; ++ return (void *)sm_raw + off; ++} ++ ++/** ++ * apfs_spaceman_get_64 - Get a 64-bit value from an offset in the spaceman ++ * @sb: superblock structure ++ * @off: offset for the value ++ * ++ * Returns a pointer to the value, or NULL if it doesn't fit. ++ */ ++static __le64 *apfs_spaceman_get_64(struct super_block *sb, size_t off) ++{ ++ struct apfs_spaceman *spaceman = APFS_SM(sb); ++ struct apfs_spaceman_phys *sm_raw = spaceman->sm_raw; ++ ++ if (off > spaceman->sm_size) ++ return NULL; ++ if (off + sizeof(__le64) > spaceman->sm_size) ++ return NULL; ++ return (void *)sm_raw + off; ++} ++ ++/** ++ * apfs_ip_bm_is_free - Check if a given ip bitmap is in the free range ++ * @sm: on-disk spaceman structure ++ * @index: offset in the ring buffer of the bitmap block to check ++ */ ++static bool apfs_ip_bm_is_free(struct apfs_spaceman_phys *sm, u16 index) ++{ ++ u16 free_head = le16_to_cpu(sm->sm_ip_bm_free_head); ++ u16 free_tail = le16_to_cpu(sm->sm_ip_bm_free_tail); ++ u16 free_len, index_in_free; ++ u16 bmap_count = le32_to_cpu(sm->sm_ip_bm_block_count); ++ ++ free_len = (bmap_count + free_tail - free_head) % bmap_count; ++ index_in_free = (bmap_count + index - free_head) % bmap_count; ++ ++ return index_in_free < free_len; ++} ++ ++/** ++ * apfs_update_ip_bm_free_next - Update free_next for the internal pool ++ * @sb: superblock structure ++ * ++ * Uses the head and tail reported by the on-disk spaceman structure. Returns 0 ++ * on success, or -EFSCORRUPTED if corruption is detected. ++ */ ++static int apfs_update_ip_bm_free_next(struct super_block *sb) ++{ ++ struct apfs_spaceman *spaceman = APFS_SM(sb); ++ struct apfs_spaceman_phys *raw = spaceman->sm_raw; ++ u32 free_next_off = le32_to_cpu(raw->sm_ip_bm_free_next_offset); ++ int bmap_count = le32_to_cpu(raw->sm_ip_bm_block_count); /* TODO: this can overflow! */ ++ __le16 *free_next; ++ int i; ++ ++ if (free_next_off > spaceman->sm_size) { ++ apfs_err(sb, "offset out of bounds (%u)", free_next_off); ++ return -EFSCORRUPTED; ++ } ++ if (free_next_off + bmap_count * sizeof(*free_next) > spaceman->sm_size) { ++ apfs_err(sb, "free next out of bounds (%u-%u)", free_next_off, bmap_count * (u32)sizeof(*free_next)); ++ return -EFSCORRUPTED; ++ } ++ free_next = (void *)raw + free_next_off; ++ ++ for (i = 0; i < bmap_count; ++i) { ++ if (apfs_ip_bm_is_free(raw, i)) ++ free_next[i] = cpu_to_le16((1 + i) % bmap_count); ++ else ++ free_next[i] = cpu_to_le16(0xFFFF); ++ } ++ return 0; ++} ++ ++/** ++ * apfs_rotate_single_ip_bitmap - Reallocate an ip bmap in the circular buffer ++ * @sb: filesystem superblock ++ * @idx: index of the ip bitmap to reallocate ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_rotate_single_ip_bitmap(struct super_block *sb, u32 idx) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_spaceman *spaceman = APFS_SM(sb); ++ struct apfs_spaceman_phys *sm_raw = spaceman->sm_raw; ++ struct buffer_head *old_bh = NULL, *new_bh = NULL; ++ u64 ring_base; ++ u32 ring_length; ++ u32 xid_off, ip_bitmap_off; ++ u64 ip_bitmap_bno; ++ u16 free_head; ++ __le64 *xid_p = NULL; ++ __le16 *ip_bitmap_p = NULL; ++ int err; ++ ++ ring_base = le64_to_cpu(sm_raw->sm_ip_bm_base); ++ ring_length = le32_to_cpu(sm_raw->sm_ip_bm_block_count); ++ free_head = le16_to_cpu(sm_raw->sm_ip_bm_free_head); ++ ++ xid_off = le32_to_cpu(sm_raw->sm_ip_bm_xid_offset) + idx * sizeof(*xid_p); ++ xid_p = apfs_spaceman_get_64(sb, xid_off); ++ if (!xid_p) { ++ apfs_err(sb, "xid out of bounds (%u)", xid_off); ++ return -EFSCORRUPTED; ++ } ++ *xid_p = cpu_to_le64(nxi->nx_xid); ++ ++ ip_bitmap_off = le32_to_cpu(sm_raw->sm_ip_bitmap_offset) + idx * sizeof(*ip_bitmap_p); ++ ip_bitmap_p = apfs_spaceman_get_16(sb, ip_bitmap_off); ++ if (!ip_bitmap_p) { ++ apfs_err(sb, "bmap offset out of bounds (%u)", ip_bitmap_off); ++ return -EFSCORRUPTED; ++ } ++ ++ ip_bitmap_bno = ring_base + le16_to_cpup(ip_bitmap_p); ++ old_bh = apfs_sb_bread(sb, ip_bitmap_bno); ++ if (!old_bh) { ++ apfs_err(sb, "failed to read current ip bitmap (0x%llx)", ip_bitmap_bno); ++ return -EIO; ++ } ++ ++ *ip_bitmap_p = cpu_to_le16(free_head); ++ free_head = (free_head + 1) % ring_length; ++ sm_raw->sm_ip_bm_free_head = cpu_to_le16(free_head); ++ ++ ip_bitmap_bno = ring_base + le16_to_cpup(ip_bitmap_p); ++ new_bh = apfs_getblk(sb, ip_bitmap_bno); ++ if (!new_bh) { ++ apfs_err(sb, "failed to map block for CoW (0x%llx)", ip_bitmap_bno); ++ err = -EIO; ++ goto out; ++ } ++ memcpy(new_bh->b_data, old_bh->b_data, sb->s_blocksize); ++ err = apfs_transaction_join(sb, new_bh); ++ if (err) ++ goto out; ++ spaceman->sm_ip_bmaps[idx] = new_bh; ++ ++out: ++ brelse(old_bh); ++ if (err) ++ brelse(new_bh); ++ return err; ++} ++ ++/** ++ * apfs_rotate_ip_bitmaps - Allocate new ip bitmaps from the circular buffer ++ * @sb: superblock structure ++ * ++ * Allocates bitmaps for the whole internal pool at once, meaning that each ++ * transaction is forced to allocate one bitmap for every ~1.32 TiB of container ++ * size, even if they won't be needed. This seems very reasonable to me, but the ++ * official implementation avoids it and they may have a good reason. ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_rotate_ip_bitmaps(struct super_block *sb) ++{ ++ struct apfs_spaceman *spaceman = APFS_SM(sb); ++ struct apfs_spaceman_phys *sm_raw = spaceman->sm_raw; ++ u32 ring_length = le32_to_cpu(sm_raw->sm_ip_bm_block_count); ++ u32 bmaps_count = spaceman->sm_ip_bmaps_count; ++ u16 free_head, free_tail, free_len; ++ int err; ++ u32 i; ++ ++ apfs_assert_in_transaction(sb, &sm_raw->sm_o); ++ ++ free_head = le16_to_cpu(sm_raw->sm_ip_bm_free_head); ++ free_tail = le16_to_cpu(sm_raw->sm_ip_bm_free_tail); ++ ++ /* ++ * Check that we have enough room before doing anything. If we run out ++ * I may need to compact the ring using the blocks marked as 0xFFFF in ++ * ip_bm_free_next (TODO). ++ */ ++ free_len = (ring_length + free_tail - free_head) % ring_length; ++ if (free_len < bmaps_count) { ++ apfs_alert(sb, "full ip bitmap ring (%u < %u)", free_len, bmaps_count); ++ return -ENOSPC; ++ } ++ ++ for (i = 0; i < bmaps_count; ++i) { ++ err = apfs_rotate_single_ip_bitmap(sb, i); ++ if (err) { ++ apfs_err(sb, "failed to rotate ip bitmap %u", i); ++ return err; ++ } ++ } ++ ++ /* All bitmaps have been reallocated, so just free the same number */ ++ free_tail = (free_tail + bmaps_count) % ring_length; ++ sm_raw->sm_ip_bm_free_tail = cpu_to_le16(free_tail); ++ ++ err = apfs_update_ip_bm_free_next(sb); ++ if (err) { ++ apfs_err(sb, "failed to update bitmap ring"); ++ return err; ++ } ++ ++ return 0; ++} ++ ++/* ++ * Free queue record data ++ */ ++struct apfs_fq_rec { ++ u64 xid; ++ u64 bno; ++ u64 len; ++}; ++ ++/** ++ * apfs_fq_rec_from_query - Read the free queue record found by a query ++ * @query: the query that found the record ++ * @fqrec: on return, the free queue record ++ * ++ * Reads the free queue record into @fqrec and performs some basic sanity ++ * checks as a protection against crafted filesystems. Returns 0 on success ++ * or -EFSCORRUPTED otherwise. ++ */ ++static int apfs_fq_rec_from_query(struct apfs_query *query, struct apfs_fq_rec *fqrec) ++{ ++ char *raw = query->node->object.data; ++ struct apfs_spaceman_free_queue_key *key; ++ ++ if (query->key_len != sizeof(*key)) { ++ apfs_err(query->node->object.sb, "bad key length (%d)", query->key_len); ++ return -EFSCORRUPTED; ++ } ++ key = (struct apfs_spaceman_free_queue_key *)(raw + query->key_off); ++ ++ fqrec->xid = le64_to_cpu(key->sfqk_xid); ++ fqrec->bno = le64_to_cpu(key->sfqk_paddr); ++ ++ if (query->len == 0) { ++ fqrec->len = 1; /* Ghost record */ ++ return 0; ++ } else if (query->len == sizeof(__le64)) { ++ fqrec->len = le64_to_cpup((__le64 *)(raw + query->off)); ++ return 0; ++ } ++ apfs_err(query->node->object.sb, "bad value length (%d)", query->len); ++ return -EFSCORRUPTED; ++} ++ ++/** ++ * apfs_block_in_ip - Does this block belong to the internal pool? ++ * @sm: in-memory spaceman structure ++ * @bno: block number to check ++ */ ++static inline bool apfs_block_in_ip(struct apfs_spaceman *sm, u64 bno) ++{ ++ struct apfs_spaceman_phys *sm_raw = sm->sm_raw; ++ u64 start = le64_to_cpu(sm_raw->sm_ip_base); ++ u64 end = start + le64_to_cpu(sm_raw->sm_ip_block_count); ++ ++ return bno >= start && bno < end; ++} ++ ++/** ++ * apfs_ip_mark_free - Mark a block in the internal pool as free ++ * @sb: superblock structure ++ * @bno: block number (must belong to the ip) ++ */ ++static int apfs_ip_mark_free(struct super_block *sb, u64 bno) ++{ ++ struct apfs_spaceman *sm = APFS_SM(sb); ++ struct apfs_spaceman_phys *sm_raw = sm->sm_raw; ++ struct buffer_head *bmap_bh = NULL; ++ ++ bno -= le64_to_cpu(sm_raw->sm_ip_base); ++ bmap_bh = sm->sm_ip_bmaps[bno >> sm->sm_ip_bmaps_shift]; ++ __clear_bit_le(bno & sm->sm_ip_bmaps_mask, bmap_bh->b_data); ++ ++ return 0; ++} ++ ++/* ++ * apfs_main_free - Mark a regular block as free ++ */ ++static int apfs_main_free(struct super_block *sb, u64 bno); ++ ++/** ++ * apfs_flush_fq_rec - Delete a single fq record and mark its blocks as free ++ * @root: free queue root node ++ * @xid: transaction to target ++ * @len: on return, the number of freed blocks ++ * ++ * Returns 0 on success, or a negative error code in case of failure. -ENODATA ++ * in particular means that there are no matching records left. ++ */ ++static int apfs_flush_fq_rec(struct apfs_node *root, u64 xid, u64 *len) ++{ ++ struct super_block *sb = root->object.sb; ++ struct apfs_spaceman *sm = APFS_SM(sb); ++ struct apfs_query *query = NULL; ++ struct apfs_fq_rec fqrec = {0}; ++ u64 bno; ++ int err; ++ ++ query = apfs_alloc_query(root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_free_queue_key(xid, 0 /* paddr */, &query->key); ++ query->flags |= APFS_QUERY_FREE_QUEUE | APFS_QUERY_ANY_NUMBER | APFS_QUERY_EXACT; ++ ++ err = apfs_btree_query(sb, &query); ++ if (err) { ++ if (err != -ENODATA) ++ apfs_err(sb, "query failed for xid 0x%llx, paddr 0x%llx", xid, 0ULL); ++ goto fail; ++ } ++ err = apfs_fq_rec_from_query(query, &fqrec); ++ if (err) { ++ apfs_err(sb, "bad free queue rec for xid 0x%llx", xid); ++ goto fail; ++ } ++ ++ for (bno = fqrec.bno; bno < fqrec.bno + fqrec.len; ++bno) { ++ if (apfs_block_in_ip(sm, bno)) ++ err = apfs_ip_mark_free(sb, bno); ++ else ++ err = apfs_main_free(sb, bno); ++ if (err) { ++ apfs_err(sb, "freeing block 0x%llx failed (%d)", (unsigned long long)bno, err); ++ goto fail; ++ } ++ } ++ err = apfs_btree_remove(query); ++ if (err) { ++ apfs_err(sb, "removal failed for xid 0x%llx", xid); ++ goto fail; ++ } ++ *len = fqrec.len; ++ ++fail: ++ apfs_free_query(query); ++ return err; ++} ++ ++/** ++ * apfs_free_queue_oldest_xid - Find the oldest xid among the free queue records ++ * @root: free queue root node ++ */ ++static u64 apfs_free_queue_oldest_xid(struct apfs_node *root) ++{ ++ struct apfs_spaceman_free_queue_key *key; ++ char *raw = root->object.data; ++ int len, off; ++ ++ if (root->records == 0) ++ return 0; ++ len = apfs_node_locate_key(root, 0, &off); ++ if (len != sizeof(*key)) { ++ /* TODO: abort transaction */ ++ apfs_err(root->object.sb, "bad key length (%d)", len); ++ return 0; ++ } ++ key = (struct apfs_spaceman_free_queue_key *)(raw + off); ++ return le64_to_cpu(key->sfqk_xid); ++} ++ ++/** ++ * apfs_flush_free_queue - Free ip blocks queued by old transactions ++ * @sb: superblock structure ++ * @qid: queue to be freed ++ * @force: flush as much as possible ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_flush_free_queue(struct super_block *sb, unsigned int qid, bool force) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_spaceman *sm = APFS_SM(sb); ++ struct apfs_spaceman_phys *sm_raw = sm->sm_raw; ++ struct apfs_spaceman_free_queue *fq = &sm_raw->sm_fq[qid]; ++ struct apfs_node *fq_root; ++ u64 oldest = le64_to_cpu(fq->sfq_oldest_xid); ++ int err; ++ ++ fq_root = apfs_read_node(sb, le64_to_cpu(fq->sfq_tree_oid), ++ APFS_OBJ_EPHEMERAL, true /* write */); ++ if (IS_ERR(fq_root)) { ++ apfs_err(sb, "failed to read fq root 0x%llx", le64_to_cpu(fq->sfq_tree_oid)); ++ return PTR_ERR(fq_root); ++ } ++ ++ while (oldest) { ++ u64 sfq_count; ++ ++ /* ++ * Try to preserve one transaction here. I don't really know ++ * what free queues are for so this is probably silly. ++ */ ++ if (force) { ++ if (oldest == nxi->nx_xid) ++ break; ++ } else { ++ if (oldest + 1 >= nxi->nx_xid) ++ break; ++ } ++ ++ while (true) { ++ u64 count = 0; ++ ++ /* Probably not very efficient... */ ++ err = apfs_flush_fq_rec(fq_root, oldest, &count); ++ if (err == -ENODATA) { ++ err = 0; ++ break; ++ } else if (err) { ++ apfs_err(sb, "failed to flush fq"); ++ goto fail; ++ } else { ++ le64_add_cpu(&fq->sfq_count, -count); ++ } ++ } ++ oldest = apfs_free_queue_oldest_xid(fq_root); ++ fq->sfq_oldest_xid = cpu_to_le64(oldest); ++ ++ if (force) ++ continue; ++ ++ /* ++ * Flushing a single transaction may not be enough to avoid ++ * running out of space in the ip, but it's probably best not ++ * to flush all the old transactions at once either. We use a ++ * harsher version of the apfs_transaction_need_commit() check, ++ * to make sure we won't be forced to commit again right away. ++ */ ++ sfq_count = le64_to_cpu(fq->sfq_count); ++ if (qid == APFS_SFQ_IP && sfq_count * 6 <= le64_to_cpu(sm_raw->sm_ip_block_count)) ++ break; ++ if (qid == APFS_SFQ_MAIN && sfq_count <= TRANSACTION_MAIN_QUEUE_MAX - 200) ++ break; ++ } ++ ++fail: ++ apfs_node_free(fq_root); ++ return err; ++} ++ ++/** ++ * apfs_allocate_spaceman - Allocate an in-memory spaceman struct, if needed ++ * @sb: superblock structure ++ * @bmap_cnt: internal pool bitmap count ++ * ++ * Returns the spaceman and sets it in the superblock info. Also sets the fixed ++ * information about the ip bitmap count. On failure, returns an error pointer. ++ */ ++static struct apfs_spaceman *apfs_allocate_spaceman(struct super_block *sb, u32 bmap_cnt) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_spaceman *spaceman = NULL; ++ int blk_bitcnt = sb->s_blocksize * 8; ++ size_t sm_size; ++ ++ if (nxi->nx_spaceman) ++ return nxi->nx_spaceman; ++ ++ /* We don't expect filesystems this big, it would be like 260 TiB */ ++ if (bmap_cnt > 200) { ++ apfs_err(sb, "too many ip bitmap blocks (%u)", bmap_cnt); ++ return ERR_PTR(-EFSCORRUPTED); ++ } ++ sm_size = sizeof(*spaceman) + bmap_cnt * sizeof(spaceman->sm_ip_bmaps[0]); ++ ++ spaceman = nxi->nx_spaceman = kzalloc(sm_size, GFP_KERNEL); ++ if (!spaceman) ++ return ERR_PTR(-ENOMEM); ++ spaceman->sm_nxi = nxi; ++ ++ spaceman->sm_ip_bmaps_count = bmap_cnt; ++ spaceman->sm_ip_bmaps_mask = blk_bitcnt - 1; ++ spaceman->sm_ip_bmaps_shift = order_base_2(blk_bitcnt); ++ return spaceman; ++} ++ ++/** ++ * apfs_read_spaceman - Find and read the space manager ++ * @sb: superblock structure ++ * ++ * Reads the space manager structure from disk and initializes its in-memory ++ * counterpart; returns 0 on success, or a negative error code in case of ++ * failure. ++ */ ++int apfs_read_spaceman(struct super_block *sb) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_nx_superblock *raw_sb = nxi->nx_raw; ++ struct apfs_spaceman *spaceman = NULL; ++ struct apfs_ephemeral_object_info *sm_eph_info = NULL; ++ struct apfs_spaceman_phys *sm_raw; ++ u32 sm_flags; ++ u64 oid = le64_to_cpu(raw_sb->nx_spaceman_oid); ++ int err; ++ ++ if (sb->s_flags & SB_RDONLY) /* The space manager won't be needed */ ++ return 0; ++ ++ sm_eph_info = apfs_ephemeral_object_lookup(sb, oid); ++ if (IS_ERR(sm_eph_info)) { ++ apfs_err(sb, "no spaceman object for oid 0x%llx", oid); ++ return PTR_ERR(sm_eph_info); ++ } ++ sm_raw = (struct apfs_spaceman_phys *)sm_eph_info->object; ++ sm_raw->sm_o.o_xid = cpu_to_le64(nxi->nx_xid); ++ ++ spaceman = apfs_allocate_spaceman(sb, le32_to_cpu(sm_raw->sm_ip_bm_size_in_blocks)); ++ if (IS_ERR(spaceman)) { ++ apfs_err(sb, "failed to allocate spaceman"); ++ err = PTR_ERR(spaceman); ++ goto fail; ++ } ++ spaceman->sm_raw = sm_raw; ++ spaceman->sm_size = sm_eph_info->size; ++ ++ spaceman->sm_free_cache_base = spaceman->sm_free_cache_blkcnt = 0; ++ ++ sm_flags = le32_to_cpu(sm_raw->sm_flags); ++ /* Undocumented feature, but it's too common to refuse to mount */ ++ if (sm_flags & APFS_SM_FLAG_VERSIONED) ++ pr_warn_once("APFS: space manager is versioned\n"); ++ ++ /* Only read the main device; fusion drives are not yet supported */ ++ err = apfs_read_spaceman_dev(sb, &sm_raw->sm_dev[APFS_SD_MAIN]); ++ if (err) { ++ apfs_err(sb, "failed to read main device"); ++ goto fail; ++ } ++ ++ spaceman->sm_blocks_per_chunk = ++ le32_to_cpu(sm_raw->sm_blocks_per_chunk); ++ spaceman->sm_chunks_per_cib = le32_to_cpu(sm_raw->sm_chunks_per_cib); ++ if (spaceman->sm_chunks_per_cib > apfs_max_chunks_per_cib(sb)) { ++ apfs_err(sb, "too many chunks per cib (%u)", spaceman->sm_chunks_per_cib); ++ err = -EFSCORRUPTED; ++ goto fail; ++ } ++ ++ err = apfs_rotate_ip_bitmaps(sb); ++ if (err) { ++ apfs_err(sb, "failed to rotate ip bitmaps"); ++ goto fail; ++ } ++ err = apfs_flush_free_queue(sb, APFS_SFQ_IP, false /* force */); ++ if (err) { ++ apfs_err(sb, "failed to flush ip fq"); ++ goto fail; ++ } ++ err = apfs_flush_free_queue(sb, APFS_SFQ_MAIN, false /* force */); ++ if (err) { ++ apfs_err(sb, "failed to flush main fq"); ++ goto fail; ++ } ++ return 0; ++ ++fail: ++ spaceman->sm_raw = NULL; ++ return err; ++} ++ ++/** ++ * apfs_write_spaceman - Write the in-memory spaceman fields to the disk buffer ++ * @sm: in-memory spaceman structure ++ * ++ * Copies the updated in-memory fields of the space manager into the on-disk ++ * structure; the buffer is not dirtied. ++ */ ++static void apfs_write_spaceman(struct apfs_spaceman *sm) ++{ ++ struct apfs_spaceman_phys *sm_raw = sm->sm_raw; ++ struct apfs_spaceman_device *dev_raw = &sm_raw->sm_dev[APFS_SD_MAIN]; ++ struct apfs_nxsb_info *nxi; ++ ++ nxi = sm->sm_nxi; ++ ASSERT(le64_to_cpu(sm_raw->sm_o.o_xid) == nxi->nx_xid); ++ ++ dev_raw->sm_free_count = cpu_to_le64(sm->sm_free_count); ++} ++ ++/** ++ * apfs_ip_find_free - Find a free block inside the internal pool ++ * @sb: superblock structure ++ * ++ * Returns the block number for a free block, or 0 in case of corruption. ++ */ ++static u64 apfs_ip_find_free(struct super_block *sb) ++{ ++ struct apfs_spaceman *sm = APFS_SM(sb); ++ struct apfs_spaceman_phys *sm_raw = sm->sm_raw; ++ int blk_bitcnt = sb->s_blocksize * 8; ++ u64 full_bitcnt = le64_to_cpu(sm_raw->sm_ip_block_count); ++ u32 i; ++ ++ for (i = 0; i < sm->sm_ip_bmaps_count; ++i) { ++ char *bitmap = sm->sm_ip_bmaps[i]->b_data; ++ u64 off_in_bmap_blk, off_in_ip; ++ ++ off_in_bmap_blk = find_next_zero_bit_le(bitmap, blk_bitcnt, 0 /* offset */); ++ if (off_in_bmap_blk >= blk_bitcnt) /* No space in this chunk */ ++ continue; ++ ++ /* We found something, confirm that it's not outside the ip */ ++ off_in_ip = (i << sm->sm_ip_bmaps_shift) + off_in_bmap_blk; ++ if (off_in_ip >= full_bitcnt) ++ break; ++ return le64_to_cpu(sm_raw->sm_ip_base) + off_in_ip; ++ } ++ apfs_err(sb, "internal pool seems full"); ++ return 0; ++} ++ ++/** ++ * apfs_chunk_find_free - Find a free block inside a chunk ++ * @sb: superblock structure ++ * @bitmap: allocation bitmap for the chunk, which should have free blocks ++ * @addr: number of the first block in the chunk ++ * ++ * Returns the block number for a free block, or 0 in case of corruption. ++ */ ++static u64 apfs_chunk_find_free(struct super_block *sb, char *bitmap, u64 addr) ++{ ++ int bitcount = sb->s_blocksize * 8; ++ u64 bno; ++ ++ bno = find_next_zero_bit_le(bitmap, bitcount, 0 /* offset */); ++ if (bno >= bitcount) ++ return 0; ++ return addr + bno; ++} ++ ++/** ++ * apfs_ip_mark_used - Mark a block in the internal pool as used ++ * @sb: superblock strucuture ++ * @bno: block number (must belong to the ip) ++ */ ++static void apfs_ip_mark_used(struct super_block *sb, u64 bno) ++{ ++ struct apfs_spaceman *sm = APFS_SM(sb); ++ struct apfs_spaceman_phys *sm_raw = sm->sm_raw; ++ struct buffer_head *bmap_bh = NULL; ++ ++ bno -= le64_to_cpu(sm_raw->sm_ip_base); ++ bmap_bh = sm->sm_ip_bmaps[bno >> sm->sm_ip_bmaps_shift]; ++ __set_bit_le(bno & sm->sm_ip_bmaps_mask, bmap_bh->b_data); ++} ++ ++/** ++ * apfs_chunk_mark_used - Mark a block inside a chunk as used ++ * @sb: superblock structure ++ * @bitmap: allocation bitmap for the chunk ++ * @bno: block number (must belong to the chunk) ++ */ ++static inline void apfs_chunk_mark_used(struct super_block *sb, char *bitmap, ++ u64 bno) ++{ ++ int bitcount = sb->s_blocksize * 8; ++ ++ __set_bit_le(bno & (bitcount - 1), bitmap); ++} ++ ++/** ++ * apfs_chunk_mark_free - Mark a block inside a chunk as free ++ * @sb: superblock structure ++ * @bitmap: allocation bitmap for the chunk ++ * @bno: block number (must belong to the chunk) ++ */ ++static inline int apfs_chunk_mark_free(struct super_block *sb, char *bitmap, ++ u64 bno) ++{ ++ int bitcount = sb->s_blocksize * 8; ++ ++ return __test_and_clear_bit_le(bno & (bitcount - 1), bitmap); ++} ++ ++/** ++ * apfs_free_queue_try_insert - Try to add a block range to its free queue ++ * @sb: superblock structure ++ * @bno: first block number to free ++ * @count: number of consecutive blocks to free ++ * ++ * Same as apfs_free_queue_insert_nocache(), except that this one can also fail ++ * with -EAGAIN if there is no room for the new record, so that the caller can ++ * flush the queue and retry. ++ */ ++static int apfs_free_queue_try_insert(struct super_block *sb, u64 bno, u64 count) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_spaceman *sm = APFS_SM(sb); ++ struct apfs_spaceman_phys *sm_raw = sm->sm_raw; ++ struct apfs_spaceman_free_queue *fq; ++ struct apfs_node *fq_root = NULL; ++ struct apfs_btree_info *fq_info = NULL; ++ struct apfs_query *query = NULL; ++ struct apfs_spaceman_free_queue_key raw_key; ++ bool ghost = count == 1; ++ int needed_room; ++ __le64 raw_val; ++ u64 node_count; ++ u16 node_limit; ++ int err; ++ ++ if (apfs_block_in_ip(sm, bno)) ++ fq = &sm_raw->sm_fq[APFS_SFQ_IP]; ++ else ++ fq = &sm_raw->sm_fq[APFS_SFQ_MAIN]; ++ ++ fq_root = apfs_read_node(sb, le64_to_cpu(fq->sfq_tree_oid), ++ APFS_OBJ_EPHEMERAL, true /* write */); ++ if (IS_ERR(fq_root)) { ++ apfs_err(sb, "failed to read fq root 0x%llx", le64_to_cpu(fq->sfq_tree_oid)); ++ return PTR_ERR(fq_root); ++ } ++ ++ query = apfs_alloc_query(fq_root, NULL /* parent */); ++ if (!query) { ++ err = -ENOMEM; ++ goto fail; ++ } ++ apfs_init_free_queue_key(nxi->nx_xid, bno, &query->key); ++ query->flags |= APFS_QUERY_FREE_QUEUE; ++ ++ err = apfs_btree_query(sb, &query); ++ if (err && err != -ENODATA) { ++ apfs_err(sb, "query failed for xid 0x%llx, paddr 0x%llx", nxi->nx_xid, bno); ++ goto fail; ++ } ++ ++ fq_info = (void *)fq_root->object.data + sb->s_blocksize - sizeof(*fq_info); ++ node_count = le64_to_cpu(fq_info->bt_node_count); ++ node_limit = le16_to_cpu(fq->sfq_tree_node_limit); ++ if (node_count == node_limit) { ++ needed_room = sizeof(raw_key) + (ghost ? 0 : sizeof(raw_val)); ++ if (!apfs_node_has_room(query->node, needed_room, false /* replace */)) { ++ err = -EAGAIN; ++ goto fail; ++ } ++ } ++ ++ raw_key.sfqk_xid = cpu_to_le64(nxi->nx_xid); ++ raw_key.sfqk_paddr = cpu_to_le64(bno); ++ if (ghost) { ++ /* A lack of value (ghost record) means single-block extent */ ++ err = apfs_btree_insert(query, &raw_key, sizeof(raw_key), NULL /* val */, 0 /* val_len */); ++ } else { ++ raw_val = cpu_to_le64(count); ++ err = apfs_btree_insert(query, &raw_key, sizeof(raw_key), &raw_val, sizeof(raw_val)); ++ } ++ if (err) { ++ apfs_err(sb, "insertion failed for xid 0x%llx, paddr 0x%llx", nxi->nx_xid, bno); ++ goto fail; ++ } ++ ++ if (!fq->sfq_oldest_xid) ++ fq->sfq_oldest_xid = cpu_to_le64(nxi->nx_xid); ++ le64_add_cpu(&fq->sfq_count, count); ++ ++fail: ++ apfs_free_query(query); ++ apfs_node_free(fq_root); ++ return err; ++} ++ ++/** ++ * apfs_free_queue_insert_nocache - Add a block range to its free queue ++ * @sb: superblock structure ++ * @bno: first block number to free ++ * @count: number of consecutive blocks to free ++ * ++ * Same as apfs_free_queue_insert(), but writes to the free queue directly, ++ * bypassing the cache of the latest freed block range. ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++int apfs_free_queue_insert_nocache(struct super_block *sb, u64 bno, u64 count) ++{ ++ struct apfs_spaceman *sm = APFS_SM(sb); ++ unsigned int qid; ++ int err; ++ ++ err = apfs_free_queue_try_insert(sb, bno, count); ++ if (err == -EAGAIN) { ++ qid = apfs_block_in_ip(sm, bno) ? APFS_SFQ_IP : APFS_SFQ_MAIN; ++ err = apfs_flush_free_queue(sb, qid, true /* force */); ++ if (err) { ++ apfs_err(sb, "failed to flush fq to make room"); ++ return err; ++ } ++ err = apfs_free_queue_try_insert(sb, bno, count); ++ } ++ if (err) { ++ if (err == -EAGAIN) { ++ apfs_alert(sb, "failed to make room in fq - bug!"); ++ err = -EFSCORRUPTED; ++ } ++ apfs_err(sb, "fq insert failed (0x%llx-0x%llx)", bno, count); ++ return err; ++ } ++ return 0; ++} ++ ++/** ++ * apfs_free_queue_insert - Add a block range to its free queue ++ * @sb: superblock structure ++ * @bno: first block number to free ++ * @count: number of consecutive blocks to free ++ * ++ * Uses a cache to delay the actual tree operations as much as possible. ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++int apfs_free_queue_insert(struct super_block *sb, u64 bno, u64 count) ++{ ++ struct apfs_spaceman *sm = APFS_SM(sb); ++ int err; ++ ++ if (sm->sm_free_cache_base == 0) { ++ /* Nothing yet cached */ ++ sm->sm_free_cache_base = bno; ++ sm->sm_free_cache_blkcnt = count; ++ return 0; ++ } ++ ++ /* ++ * First attempt to extend the cache of freed blocks, but never cache ++ * a range that doesn't belong to a single free queue. ++ */ ++ if (apfs_block_in_ip(sm, bno) == apfs_block_in_ip(sm, sm->sm_free_cache_base)) { ++ if (bno == sm->sm_free_cache_base + sm->sm_free_cache_blkcnt) { ++ sm->sm_free_cache_blkcnt += count; ++ return 0; ++ } ++ if (bno + count == sm->sm_free_cache_base) { ++ sm->sm_free_cache_base -= count; ++ sm->sm_free_cache_blkcnt += count; ++ return 0; ++ } ++ } ++ ++ /* Failed to extend the cache, so flush it and replace it */ ++ err = apfs_free_queue_insert_nocache(sb, sm->sm_free_cache_base, sm->sm_free_cache_blkcnt); ++ if (err) { ++ apfs_err(sb, "fq cache flush failed (0x%llx-0x%llx)", sm->sm_free_cache_base, sm->sm_free_cache_blkcnt); ++ return err; ++ } ++ sm->sm_free_cache_base = bno; ++ sm->sm_free_cache_blkcnt = count; ++ return 0; ++} ++ ++/** ++ * apfs_chunk_alloc_free - Allocate or free block in given CIB and chunk ++ * @sb: superblock structure ++ * @cib_bh: buffer head for the chunk-info block ++ * @index: index of this chunk's info structure inside @cib ++ * @bno: block number ++ * @is_alloc: true to allocate, false to free ++ */ ++static int apfs_chunk_alloc_free(struct super_block *sb, ++ struct buffer_head **cib_bh, ++ int index, u64 *bno, bool is_alloc) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_spaceman *sm = APFS_SM(sb); ++ struct apfs_chunk_info_block *cib; ++ struct apfs_chunk_info *ci; ++ struct buffer_head *bmap_bh = NULL; ++ char *bmap = NULL; ++ bool old_cib = false; ++ bool old_bmap = false; ++ int err = 0; ++ ++ cib = (struct apfs_chunk_info_block *)(*cib_bh)->b_data; ++ ci = &cib->cib_chunk_info[index]; ++ ++ /* Cibs and bitmaps from old transactions can't be modified in place */ ++ if (le64_to_cpu(cib->cib_o.o_xid) < nxi->nx_xid) ++ old_cib = true; ++ if (le64_to_cpu(ci->ci_xid) < nxi->nx_xid) ++ old_bmap = true; ++ if (is_alloc && le32_to_cpu(ci->ci_free_count) < 1) ++ return -ENOSPC; ++ ++ /* Read the current bitmap, or allocate it if necessary */ ++ if (!ci->ci_bitmap_addr) { ++ u64 bmap_bno; ++ ++ if (!is_alloc) { ++ apfs_err(sb, "attempt to free block in all-free chunk"); ++ return -EFSCORRUPTED; ++ } ++ ++ /* All blocks in this chunk are free */ ++ bmap_bno = apfs_ip_find_free(sb); ++ if (!bmap_bno) { ++ apfs_err(sb, "no free blocks in ip"); ++ return -EFSCORRUPTED; ++ } ++ bmap_bh = apfs_sb_bread(sb, bmap_bno); ++ } else { ++ bmap_bh = apfs_sb_bread(sb, le64_to_cpu(ci->ci_bitmap_addr)); ++ } ++ if (!bmap_bh) { ++ apfs_err(sb, "failed to read bitmap block"); ++ return -EIO; ++ } ++ bmap = bmap_bh->b_data; ++ if (!ci->ci_bitmap_addr) { ++ memset(bmap, 0, sb->s_blocksize); ++ old_bmap = false; ++ } ++ ++ /* Write the bitmap to its location for the next transaction */ ++ if (old_bmap) { ++ struct buffer_head *new_bmap_bh; ++ u64 new_bmap_bno; ++ ++ new_bmap_bno = apfs_ip_find_free(sb); ++ if (!new_bmap_bno) { ++ apfs_err(sb, "no free blocks in ip"); ++ err = -EFSCORRUPTED; ++ goto fail; ++ } ++ ++ new_bmap_bh = apfs_getblk(sb, new_bmap_bno); ++ if (!new_bmap_bh) { ++ apfs_err(sb, "failed to map new bmap block (0x%llx)", new_bmap_bno); ++ err = -EIO; ++ goto fail; ++ } ++ memcpy(new_bmap_bh->b_data, bmap, sb->s_blocksize); ++ err = apfs_free_queue_insert(sb, bmap_bh->b_blocknr, 1); ++ brelse(bmap_bh); ++ bmap_bh = new_bmap_bh; ++ if (err) { ++ apfs_err(sb, "free queue insertion failed"); ++ goto fail; ++ } ++ bmap = bmap_bh->b_data; ++ } ++ apfs_ip_mark_used(sb, bmap_bh->b_blocknr); ++ ++ /* Write the cib to its location for the next transaction */ ++ if (old_cib) { ++ struct buffer_head *new_cib_bh; ++ u64 new_cib_bno; ++ ++ new_cib_bno = apfs_ip_find_free(sb); ++ if (!new_cib_bno) { ++ apfs_err(sb, "no free blocks in ip"); ++ err = -EFSCORRUPTED; ++ goto fail; ++ } ++ ++ new_cib_bh = apfs_getblk(sb, new_cib_bno); ++ if (!new_cib_bh) { ++ apfs_err(sb, "failed to map new cib block (0x%llx)", new_cib_bno); ++ err = -EIO; ++ goto fail; ++ } ++ memcpy(new_cib_bh->b_data, (*cib_bh)->b_data, sb->s_blocksize); ++ err = apfs_free_queue_insert(sb, (*cib_bh)->b_blocknr, 1); ++ brelse(*cib_bh); ++ *cib_bh = new_cib_bh; ++ if (err) { ++ apfs_err(sb, "free queue insertion failed"); ++ goto fail; ++ } ++ ++ err = apfs_transaction_join(sb, *cib_bh); ++ if (err) ++ goto fail; ++ ++ cib = (struct apfs_chunk_info_block *)(*cib_bh)->b_data; ++ ci = &cib->cib_chunk_info[index]; ++ cib->cib_o.o_oid = cpu_to_le64(new_cib_bno); ++ cib->cib_o.o_xid = cpu_to_le64(nxi->nx_xid); ++ ++ apfs_ip_mark_used(sb, new_cib_bno); ++ } ++ ++ /* The chunk info can be updated now */ ++ apfs_assert_in_transaction(sb, &cib->cib_o); ++ ci->ci_xid = cpu_to_le64(nxi->nx_xid); ++ le32_add_cpu(&ci->ci_free_count, is_alloc ? -1 : 1); ++ ci->ci_bitmap_addr = cpu_to_le64(bmap_bh->b_blocknr); ++ ASSERT(buffer_trans(*cib_bh)); ++ set_buffer_csum(*cib_bh); ++ ++ /* Finally, allocate / free the actual block that was requested */ ++ if (is_alloc) { ++ *bno = apfs_chunk_find_free(sb, bmap, le64_to_cpu(ci->ci_addr)); ++ if (!*bno) { ++ apfs_err(sb, "no free blocks in chunk"); ++ err = -EFSCORRUPTED; ++ goto fail; ++ } ++ apfs_chunk_mark_used(sb, bmap, *bno); ++ sm->sm_free_count -= 1; ++ } else { ++ if (!apfs_chunk_mark_free(sb, bmap, *bno)) { ++ apfs_err(sb, "block already marked as free (0x%llx)", *bno); ++ le32_add_cpu(&ci->ci_free_count, -1); ++ set_buffer_csum(*cib_bh); ++ err = -EFSCORRUPTED; ++ } else ++ sm->sm_free_count += 1; ++ } ++ mark_buffer_dirty(bmap_bh); ++ ++fail: ++ brelse(bmap_bh); ++ return err; ++} ++ ++/** ++ * apfs_chunk_allocate_block - Allocate a single block from a chunk ++ * @sb: superblock structure ++ * @cib_bh: buffer head for the chunk-info block ++ * @index: index of this chunk's info structure inside @cib ++ * @bno: on return, the allocated block number ++ * ++ * Finds a free block in the chunk and marks it as used; the buffer at @cib_bh ++ * may be replaced if needed for copy-on-write. Returns 0 on success, or a ++ * negative error code in case of failure. ++ */ ++static int apfs_chunk_allocate_block(struct super_block *sb, ++ struct buffer_head **cib_bh, ++ int index, u64 *bno) ++{ ++ return apfs_chunk_alloc_free(sb, cib_bh, index, bno, true); ++} ++ ++/** ++ * apfs_cib_allocate_block - Allocate a single block from a cib ++ * @sb: superblock structure ++ * @cib_bh: buffer head for the chunk-info block ++ * @bno: on return, the allocated block number ++ * @backwards: start the search on the last chunk ++ * ++ * Finds a free block among all the chunks in the cib and marks it as used; the ++ * buffer at @cib_bh may be replaced if needed for copy-on-write. Returns 0 on ++ * success, or a negative error code in case of failure. ++ */ ++static int apfs_cib_allocate_block(struct super_block *sb, ++ struct buffer_head **cib_bh, u64 *bno, bool backwards) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_spaceman *sm = APFS_SM(sb); ++ struct apfs_chunk_info_block *cib; ++ u32 chunk_count; ++ int i; ++ ++ cib = (struct apfs_chunk_info_block *)(*cib_bh)->b_data; ++ if (nxi->nx_flags & APFS_CHECK_NODES && !apfs_obj_verify_csum(sb, *cib_bh)) { ++ apfs_err(sb, "bad checksum for chunk-info block"); ++ return -EFSBADCRC; ++ } ++ ++ /* Avoid out-of-bounds operations on corrupted cibs */ ++ chunk_count = le32_to_cpu(cib->cib_chunk_info_count); ++ if (chunk_count > sm->sm_chunks_per_cib) { ++ apfs_err(sb, "too many chunks in cib (%u)", chunk_count); ++ return -EFSCORRUPTED; ++ } ++ ++ for (i = 0; i < chunk_count; ++i) { ++ int index; ++ int err; ++ ++ index = backwards ? chunk_count - 1 - i : i; ++ ++ err = apfs_chunk_allocate_block(sb, cib_bh, index, bno); ++ if (err == -ENOSPC) /* This chunk is full */ ++ continue; ++ if (err) ++ apfs_err(sb, "error during allocation"); ++ return err; ++ } ++ return -ENOSPC; ++} ++ ++/** ++ * apfs_spaceman_allocate_block - Allocate a single on-disk block ++ * @sb: superblock structure ++ * @bno: on return, the allocated block number ++ * @backwards: start the search on the last chunk ++ * ++ * Finds a free block among the spaceman bitmaps and marks it as used. Returns ++ * 0 on success, or a negative error code in case of failure. ++ */ ++int apfs_spaceman_allocate_block(struct super_block *sb, u64 *bno, bool backwards) ++{ ++ struct apfs_spaceman *sm = APFS_SM(sb); ++ int i; ++ ++ for (i = 0; i < sm->sm_cib_count; ++i) { ++ struct buffer_head *cib_bh; ++ u64 cib_bno; ++ int index; ++ int err; ++ ++ /* Keep extents and metadata separate to limit fragmentation */ ++ index = backwards ? sm->sm_cib_count - 1 - i : i; ++ ++ cib_bno = apfs_spaceman_read_cib_addr(sb, index); ++ cib_bh = apfs_sb_bread(sb, cib_bno); ++ if (!cib_bh) { ++ apfs_err(sb, "failed to read cib"); ++ return -EIO; ++ } ++ ++ err = apfs_cib_allocate_block(sb, &cib_bh, bno, backwards); ++ if (!err) { ++ /* The cib may have been moved */ ++ apfs_spaceman_write_cib_addr(sb, index, cib_bh->b_blocknr); ++ /* The free block count has changed */ ++ apfs_write_spaceman(sm); ++ } ++ brelse(cib_bh); ++ if (err == -ENOSPC) /* This cib is full */ ++ continue; ++ if (err) ++ apfs_err(sb, "error during allocation"); ++ return err; ++ } ++ return -ENOSPC; ++} ++ ++/** ++ * apfs_chunk_free - Mark a regular block as free given CIB and chunk ++ * @sb: superblock structure ++ * @cib_bh: buffer head for the chunk-info block ++ * @index: index of this chunk's info structure inside @cib ++ * @bno: block number (must not belong to the ip) ++ */ ++static int apfs_chunk_free(struct super_block *sb, ++ struct buffer_head **cib_bh, ++ int index, u64 bno) ++{ ++ return apfs_chunk_alloc_free(sb, cib_bh, index, &bno, false); ++} ++ ++/** ++ * apfs_main_free - Mark a regular block as free ++ * @sb: superblock structure ++ * @bno: block number (must not belong to the ip) ++ */ ++static int apfs_main_free(struct super_block *sb, u64 bno) ++{ ++ struct apfs_spaceman *sm = APFS_SM(sb); ++ struct apfs_spaceman_phys *sm_raw = sm->sm_raw; ++ u64 cib_idx, chunk_idx; ++ struct buffer_head *cib_bh; ++ u64 cib_bno; ++ int err; ++ ++ if (!sm_raw->sm_blocks_per_chunk || !sm_raw->sm_chunks_per_cib) { ++ apfs_err(sb, "block or chunk count not set"); ++ return -EINVAL; ++ } ++ /* TODO: use bitshifts instead of do_div() */ ++ chunk_idx = bno; ++ do_div(chunk_idx, sm->sm_blocks_per_chunk); ++ cib_idx = chunk_idx; ++ chunk_idx = do_div(cib_idx, sm->sm_chunks_per_cib); ++ ++ cib_bno = apfs_spaceman_read_cib_addr(sb, cib_idx); ++ cib_bh = apfs_sb_bread(sb, cib_bno); ++ if (!cib_bh) { ++ apfs_err(sb, "failed to read cib"); ++ return -EIO; ++ } ++ ++ err = apfs_chunk_free(sb, &cib_bh, chunk_idx, bno); ++ if (!err) { ++ /* The cib may have been moved */ ++ apfs_spaceman_write_cib_addr(sb, cib_idx, cib_bh->b_blocknr); ++ /* The free block count has changed */ ++ apfs_write_spaceman(sm); ++ } ++ brelse(cib_bh); ++ if (err) ++ apfs_err(sb, "error during free"); ++ ++ return err; ++} +diff --git a/fs/apfs/super.c b/fs/apfs/super.c +new file mode 100644 +index 000000000..393a97239 +--- /dev/null ++++ b/fs/apfs/super.c +@@ -0,0 +1,1788 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (C) 2018 Ernesto A. Fernández ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "apfs.h" ++#include "version.h" ++ ++#define APFS_MODULE_ID_STRING "linux-apfs by eafer (" GIT_COMMIT ")" ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) /* iversion came in 4.16 */ ++#include ++#endif ++ ++/* Keep a list of mounted containers, so that their volumes can share them */ ++DEFINE_MUTEX(nxs_mutex); ++static LIST_HEAD(nxs); ++ ++/** ++ * apfs_nx_find_by_dev - Search for a device in the list of mounted containers ++ * @dev: device number of block device for the wanted container ++ * ++ * Returns a pointer to the container structure in the list, or NULL if the ++ * container isn't currently mounted. ++ */ ++static struct apfs_nxsb_info *apfs_nx_find_by_dev(dev_t dev) ++{ ++ struct apfs_nxsb_info *curr; ++ ++ lockdep_assert_held(&nxs_mutex); ++ list_for_each_entry(curr, &nxs, nx_list) { ++ struct block_device *curr_bdev = curr->nx_bdev; ++ ++ if (curr_bdev->bd_dev == dev) ++ return curr; ++ } ++ return NULL; ++} ++ ++/** ++ * apfs_sb_set_blocksize - Set the block size for the container's device ++ * @sb: superblock structure ++ * @size: size to set ++ * ++ * This is like sb_set_blocksize(), but it uses the container's device instead ++ * of the nonexistent volume device. ++ */ ++static int apfs_sb_set_blocksize(struct super_block *sb, int size) ++{ ++ if (set_blocksize(APFS_NXI(sb)->nx_bdev, size)) ++ return 0; ++ sb->s_blocksize = size; ++ sb->s_blocksize_bits = blksize_bits(size); ++ return sb->s_blocksize; ++} ++ ++/** ++ * apfs_read_super_copy - Read the copy of the container superblock in block 0 ++ * @sb: superblock structure ++ * ++ * Returns a pointer to the buffer head, or an error pointer in case of failure. ++ */ ++static struct buffer_head *apfs_read_super_copy(struct super_block *sb) ++{ ++ struct buffer_head *bh; ++ struct apfs_nx_superblock *msb_raw; ++ int blocksize; ++ int err = -EINVAL; ++ ++ /* ++ * For now assume a small blocksize, we only need it so that we can ++ * read the actual blocksize from disk. ++ */ ++ if (!apfs_sb_set_blocksize(sb, APFS_NX_DEFAULT_BLOCK_SIZE)) { ++ apfs_err(sb, "unable to set blocksize"); ++ return ERR_PTR(err); ++ } ++ bh = apfs_sb_bread(sb, APFS_NX_BLOCK_NUM); ++ if (!bh) { ++ apfs_err(sb, "unable to read superblock"); ++ return ERR_PTR(err); ++ } ++ msb_raw = (struct apfs_nx_superblock *)bh->b_data; ++ blocksize = le32_to_cpu(msb_raw->nx_block_size); ++ ++ if (sb->s_blocksize != blocksize) { ++ brelse(bh); ++ ++ if (!apfs_sb_set_blocksize(sb, blocksize)) { ++ apfs_err(sb, "bad blocksize %d", blocksize); ++ return ERR_PTR(err); ++ } ++ bh = apfs_sb_bread(sb, APFS_NX_BLOCK_NUM); ++ if (!bh) { ++ apfs_err(sb, "unable to read superblock 2nd time"); ++ return ERR_PTR(err); ++ } ++ msb_raw = (struct apfs_nx_superblock *)bh->b_data; ++ } ++ ++ sb->s_magic = le32_to_cpu(msb_raw->nx_magic); ++ if (sb->s_magic != APFS_NX_MAGIC) { ++ apfs_err(sb, "not an apfs filesystem"); ++ goto fail; ++ } ++ if (!apfs_obj_verify_csum(sb, bh)) ++ apfs_notice(sb, "backup superblock seems corrupted"); ++ return bh; ++ ++fail: ++ brelse(bh); ++ return ERR_PTR(err); ++} ++ ++/** ++ * apfs_make_super_copy - Write a copy of the checkpoint superblock to block 0 ++ * @sb: superblock structure ++ */ ++static void apfs_make_super_copy(struct super_block *sb) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_nxsb_info *nxi = sbi->s_nxi; ++ struct buffer_head *bh; ++ ++ if (!(nxi->nx_flags & APFS_READWRITE)) ++ return; ++ ++ /* Only update the backup once all volumes are unmounted */ ++ mutex_lock(&nxs_mutex); ++ if (nxi->nx_refcnt > 1) ++ goto out_unlock; ++ ++ bh = apfs_sb_bread(sb, APFS_NX_BLOCK_NUM); ++ if (!bh) { ++ apfs_err(sb, "failed to write block zero"); ++ goto out_unlock; ++ } ++ memcpy(bh->b_data, nxi->nx_raw, sb->s_blocksize); ++ mark_buffer_dirty(bh); ++ brelse(bh); ++out_unlock: ++ mutex_unlock(&nxs_mutex); ++} ++ ++static int apfs_check_nx_features(struct super_block *sb); ++ ++/** ++ * apfs_read_main_super - Find the container superblock and read it into memory ++ * @sb: superblock structure ++ * ++ * Returns a negative error code in case of failure. On success, returns 0 ++ * and sets the nx_raw and nx_xid fields of APFS_NXI(@sb). ++ */ ++static int apfs_read_main_super(struct super_block *sb) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct buffer_head *bh; ++ struct buffer_head *desc_bh = NULL; ++ struct apfs_nx_superblock *msb_raw; ++ u64 xid, bno = APFS_NX_BLOCK_NUM; ++ u64 desc_base; ++ u32 desc_blocks; ++ int err = -EINVAL; ++ int i; ++ ++ lockdep_assert_held(&nxs_mutex); ++ if (nxi->nx_refcnt > 1) { ++ /* It's already mapped */ ++ sb->s_blocksize = nxi->nx_blocksize; ++ sb->s_blocksize_bits = nxi->nx_blocksize_bits; ++ sb->s_magic = le32_to_cpu(nxi->nx_raw->nx_magic); ++ return 0; ++ } ++ ++ /* Read the superblock from the last clean unmount */ ++ bh = apfs_read_super_copy(sb); ++ if (IS_ERR(bh)) ++ return PTR_ERR(bh); ++ msb_raw = (struct apfs_nx_superblock *)bh->b_data; ++ ++ /* We want to mount the latest valid checkpoint among the descriptors */ ++ desc_base = le64_to_cpu(msb_raw->nx_xp_desc_base); ++ if (desc_base >> 63 != 0) { ++ /* The highest bit is set when checkpoints are not contiguous */ ++ apfs_err(sb, "checkpoint descriptor tree not yet supported"); ++ goto fail; ++ } ++ desc_blocks = le32_to_cpu(msb_raw->nx_xp_desc_blocks); ++ if (desc_blocks > 10000) { /* Arbitrary loop limit, is it enough? */ ++ apfs_err(sb, "too many checkpoint descriptors?"); ++ err = -EFSCORRUPTED; ++ goto fail; ++ } ++ ++ /* Now we go through the checkpoints one by one */ ++ xid = le64_to_cpu(msb_raw->nx_o.o_xid); ++ for (i = 0; i < desc_blocks; ++i) { ++ struct apfs_nx_superblock *desc_raw; ++ ++ brelse(desc_bh); ++ desc_bh = apfs_sb_bread(sb, desc_base + i); ++ if (!desc_bh) { ++ apfs_err(sb, "unable to read checkpoint descriptor"); ++ goto fail; ++ } ++ desc_raw = (struct apfs_nx_superblock *)desc_bh->b_data; ++ ++ if (le32_to_cpu(desc_raw->nx_magic) != APFS_NX_MAGIC) ++ continue; /* Not a superblock */ ++ if (le64_to_cpu(desc_raw->nx_o.o_xid) <= xid) ++ continue; /* Old */ ++ if (!apfs_obj_verify_csum(sb, desc_bh)) ++ continue; /* Corrupted */ ++ ++ xid = le64_to_cpu(desc_raw->nx_o.o_xid); ++ msb_raw = desc_raw; ++ bno = desc_base + i; ++ brelse(bh); ++ bh = desc_bh; ++ desc_bh = NULL; ++ } ++ ++ nxi->nx_raw = kmalloc(sb->s_blocksize, GFP_KERNEL); ++ if (!nxi->nx_raw) { ++ err = -ENOMEM; ++ goto fail; ++ } ++ memcpy(nxi->nx_raw, bh->b_data, sb->s_blocksize); ++ nxi->nx_bno = bno; ++ nxi->nx_xid = xid; ++ ++ /* For now we only support blocksize < PAGE_SIZE */ ++ nxi->nx_blocksize = sb->s_blocksize; ++ nxi->nx_blocksize_bits = sb->s_blocksize_bits; ++ ++ err = apfs_check_nx_features(sb); ++fail: ++ brelse(bh); ++ return err; ++} ++ ++/** ++ * apfs_update_software_info - Write the module info to a modified volume ++ * @sb: superblock structure ++ * ++ * Writes this module's information to index zero of the apfs_modified_by ++ * array, shifting the rest of the entries to the right. ++ */ ++static void apfs_update_software_info(struct super_block *sb) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_superblock *raw = sbi->s_vsb_raw; ++ struct apfs_modified_by *mod_by; ++ ++ ASSERT(sbi->s_vsb_raw); ++ apfs_assert_in_transaction(sb, &raw->apfs_o); ++ ASSERT(strlen(APFS_MODULE_ID_STRING) < APFS_MODIFIED_NAMELEN); ++ mod_by = raw->apfs_modified_by; ++ ++ memmove(mod_by + 1, mod_by, (APFS_MAX_HIST - 1) * sizeof(*mod_by)); ++ memset(mod_by->id, 0, sizeof(mod_by->id)); ++ strscpy(mod_by->id, APFS_MODULE_ID_STRING, sizeof(mod_by->id)); ++ mod_by->timestamp = cpu_to_le64(ktime_get_real_ns()); ++ mod_by->last_xid = cpu_to_le64(APFS_NXI(sb)->nx_xid); ++} ++ ++static struct file_system_type apfs_fs_type; ++ ++/** ++ * apfs_free_main_super - Clean up apfs_read_main_super() ++ * @sbi: in-memory superblock info ++ * ++ * It also cleans up after apfs_attach_nxi(), so the name is no longer accurate. ++ */ ++static inline void apfs_free_main_super(struct apfs_sb_info *sbi) ++{ ++ struct apfs_nxsb_info *nxi = sbi->s_nxi; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 5, 0) ++ fmode_t mode = FMODE_READ | FMODE_EXCL; ++#endif ++ struct apfs_ephemeral_object_info *eph_list = NULL; ++ int i; ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 5, 0) ++ if (nxi->nx_flags & APFS_READWRITE) ++ mode |= FMODE_WRITE; ++#endif ++ ++ lockdep_assert_held(&nxs_mutex); ++ ++ list_del(&sbi->list); ++ if (--nxi->nx_refcnt) ++ goto out; ++ ++ /* Clean up all the ephemeral objects in memory */ ++ eph_list = nxi->nx_eph_list; ++ if (eph_list) { ++ for (i = 0; i < nxi->nx_eph_count; ++i) { ++ kfree(eph_list[i].object); ++ eph_list[i].object = NULL; ++ } ++ kfree(eph_list); ++ eph_list = nxi->nx_eph_list = NULL; ++ nxi->nx_eph_count = 0; ++ } ++ ++ kfree(nxi->nx_raw); ++ nxi->nx_raw = NULL; ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 9, 0) ++ fput(nxi->nx_bdev_file); ++#elif LINUX_VERSION_CODE >= KERNEL_VERSION(6, 8, 0) ++ bdev_release(nxi->nx_bdev_handle); ++#elif LINUX_VERSION_CODE >= KERNEL_VERSION(6, 5, 0) ++ blkdev_put(nxi->nx_bdev, &apfs_fs_type); ++#else ++ blkdev_put(nxi->nx_bdev, mode); ++#endif ++ ++ list_del(&nxi->nx_list); ++ kfree(nxi->nx_spaceman); ++ nxi->nx_spaceman = NULL; ++ kfree(nxi); ++out: ++ sbi->s_nxi = NULL; ++} ++ ++/** ++ * apfs_map_volume_super_bno - Map a block containing a volume superblock ++ * @sb: superblock structure ++ * @bno: block to map ++ * @check: verify the checksum? ++ */ ++int apfs_map_volume_super_bno(struct super_block *sb, u64 bno, bool check) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_superblock *vsb_raw = NULL; ++ struct buffer_head *bh = NULL; ++ int err; ++ ++ bh = apfs_sb_bread(sb, bno); ++ if (!bh) { ++ apfs_err(sb, "unable to read volume superblock"); ++ return -EINVAL; ++ } ++ ++ vsb_raw = (struct apfs_superblock *)bh->b_data; ++ if (le32_to_cpu(vsb_raw->apfs_magic) != APFS_MAGIC) { ++ apfs_err(sb, "wrong magic in volume superblock"); ++ err = -EINVAL; ++ goto fail; ++ } ++ ++ /* ++ * XXX: apfs_omap_lookup_block() only runs this check when write ++ * is true, but it should always do it. ++ */ ++ if (check && !apfs_obj_verify_csum(sb, bh)) { ++ apfs_err(sb, "inconsistent volume superblock"); ++ err = -EFSBADCRC; ++ goto fail; ++ } ++ ++ sbi->s_vsb_raw = vsb_raw; ++ sbi->s_vobject.sb = sb; ++ sbi->s_vobject.block_nr = bno; ++ sbi->s_vobject.oid = le64_to_cpu(vsb_raw->apfs_o.o_oid); ++ brelse(sbi->s_vobject.o_bh); ++ sbi->s_vobject.o_bh = bh; ++ sbi->s_vobject.data = bh->b_data; ++ return 0; ++ ++fail: ++ brelse(bh); ++ return err; ++} ++ ++/** ++ * apfs_map_volume_super - Find the volume superblock and map it into memory ++ * @sb: superblock structure ++ * @write: request write access? ++ * ++ * Returns a negative error code in case of failure. On success, returns 0 ++ * and sets APFS_SB(@sb)->s_vsb_raw and APFS_SB(@sb)->s_vobject. ++ */ ++int apfs_map_volume_super(struct super_block *sb, bool write) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_nx_superblock *msb_raw = nxi->nx_raw; ++ struct apfs_omap_phys *msb_omap_raw; ++ struct apfs_omap *omap = NULL; ++ struct apfs_node *vnode; ++ struct buffer_head *bh; ++ struct apfs_vol_transaction *trans = &sbi->s_transaction; ++ u64 vol_id; ++ u64 vsb; ++ int err; ++ ++ ASSERT(msb_raw); ++ ASSERT(trans->t_old_vsb == sbi->s_vobject.o_bh); ++ (void)trans; ++ ++ /* Get the id for the requested volume number */ ++ if (sbi->s_vol_nr >= APFS_NX_MAX_FILE_SYSTEMS) { ++ apfs_err(sb, "volume number out of range"); ++ return -EINVAL; ++ } ++ vol_id = le64_to_cpu(msb_raw->nx_fs_oid[sbi->s_vol_nr]); ++ if (vol_id == 0) { ++ apfs_err(sb, "requested volume does not exist"); ++ return -EINVAL; ++ } ++ ++ /* Get the container's object map */ ++ bh = apfs_read_object_block(sb, le64_to_cpu(msb_raw->nx_omap_oid), ++ write, false /* preserve */); ++ if (IS_ERR(bh)) { ++ apfs_err(sb, "unable to read container object map"); ++ return PTR_ERR(bh); ++ } ++ if (write) { ++ ASSERT(buffer_trans(bh)); ++ msb_raw->nx_omap_oid = cpu_to_le64(bh->b_blocknr); ++ } ++ msb_omap_raw = (struct apfs_omap_phys *)bh->b_data; ++ ++ /* Get the root node for the container's omap */ ++ vnode = apfs_read_node(sb, le64_to_cpu(msb_omap_raw->om_tree_oid), ++ APFS_OBJ_PHYSICAL, write); ++ if (IS_ERR(vnode)) { ++ apfs_err(sb, "unable to read volume block"); ++ err = PTR_ERR(vnode); ++ goto fail; ++ } ++ if (write) { ++ ASSERT(buffer_trans(bh)); ++ msb_omap_raw->om_tree_oid = cpu_to_le64(vnode->object.block_nr); ++ } ++ msb_omap_raw = NULL; ++ brelse(bh); ++ bh = NULL; ++ ++ omap = kzalloc(sizeof(*omap), GFP_KERNEL); ++ if (!omap) { ++ apfs_node_free(vnode); ++ return -ENOMEM; ++ } ++ omap->omap_root = vnode; ++ ++ err = apfs_omap_lookup_block(sb, omap, vol_id, &vsb, write); ++ apfs_node_free(vnode); ++ vnode = NULL; ++ kfree(omap); ++ omap = NULL; ++ if (err) { ++ apfs_err(sb, "volume not found, likely corruption"); ++ return err; ++ } ++ ++ /* ++ * Snapshots could get mounted during a transaction, so the fletcher ++ * checksum doesn't have to be valid. ++ */ ++ return apfs_map_volume_super_bno(sb, vsb, !write && !sbi->s_snap_name); ++ ++fail: ++ brelse(bh); ++ return err; ++} ++ ++/** ++ * apfs_unmap_volume_super - Clean up apfs_map_volume_super() ++ * @sb: filesystem superblock ++ */ ++void apfs_unmap_volume_super(struct super_block *sb) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_object *obj = &sbi->s_vobject; ++ ++ obj->data = NULL; ++ brelse(obj->o_bh); ++ obj->o_bh = NULL; ++} ++ ++/** ++ * apfs_get_omap - Get a reference to the omap, if it's already read ++ * @sb: filesystem superblock ++ * ++ * Returns the omap struct, or NULL on failure. ++ */ ++static struct apfs_omap *apfs_get_omap(struct super_block *sb) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_sb_info *curr = NULL; ++ struct apfs_omap *omap = NULL; ++ struct apfs_omap_cache *cache = NULL; ++ ++ lockdep_assert_held(&nxs_mutex); ++ ++ list_for_each_entry(curr, &nxi->vol_list, list) { ++ if (curr == sbi) ++ continue; ++ if (curr->s_vol_nr == sbi->s_vol_nr) { ++ omap = curr->s_omap; ++ cache = &omap->omap_cache; ++ ++omap->omap_refcnt; ++ /* Right now the cache can't be shared like this */ ++ cache->disabled = true; ++ return omap; ++ } ++ } ++ return NULL; ++} ++ ++/** ++ * apfs_read_omap - Find and read the omap root node ++ * @sb: superblock structure ++ * @write: request write access? ++ * ++ * On success, returns 0 and sets the fields of APFS_SB(@sb)->s_omap; on failure ++ * returns a negative error code. ++ */ ++int apfs_read_omap(struct super_block *sb, bool write) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_superblock *vsb_raw = sbi->s_vsb_raw; ++ struct apfs_omap_phys *omap_raw; ++ struct apfs_node *omap_root; ++ struct apfs_omap *omap = NULL; ++ struct buffer_head *bh; ++ u64 omap_blk; ++ int err; ++ ++ ASSERT(sbi->s_vsb_raw); ++ ++ ASSERT(sbi->s_omap); ++ omap = sbi->s_omap; ++ ++ /* Get the block holding the volume omap information */ ++ omap_blk = le64_to_cpu(vsb_raw->apfs_omap_oid); ++ bh = apfs_read_object_block(sb, omap_blk, write, false /* preserve */); ++ if (IS_ERR(bh)) { ++ apfs_err(sb, "unable to read the volume object map"); ++ return PTR_ERR(bh); ++ } ++ if (write) { ++ apfs_assert_in_transaction(sb, &vsb_raw->apfs_o); ++ vsb_raw->apfs_omap_oid = cpu_to_le64(bh->b_blocknr); ++ } ++ omap_raw = (struct apfs_omap_phys *)bh->b_data; ++ ++ /* Get the volume's object map */ ++ omap_root = apfs_read_node(sb, le64_to_cpu(omap_raw->om_tree_oid), ++ APFS_OBJ_PHYSICAL, write); ++ if (IS_ERR(omap_root)) { ++ apfs_err(sb, "unable to read the omap root node"); ++ err = PTR_ERR(omap_root); ++ goto fail; ++ } ++ if (write) { ++ apfs_assert_in_transaction(sb, &omap_raw->om_o); ++ ASSERT(buffer_trans(bh)); ++ omap_raw->om_tree_oid = cpu_to_le64(omap_root->object.block_nr); ++ } ++ omap->omap_latest_snap = le64_to_cpu(omap_raw->om_most_recent_snap); ++ omap_raw = NULL; ++ brelse(bh); ++ ++ if (omap->omap_root) ++ apfs_node_free(omap->omap_root); ++ omap->omap_root = omap_root; ++ return 0; ++ ++fail: ++ brelse(bh); ++ return err; ++} ++ ++/** ++ * apfs_first_read_omap - Find and read the omap root node during mount ++ * @sb: superblock structure ++ * ++ * On success, returns 0 and sets APFS_SB(@sb)->s_omap; on failure returns a ++ * negative error code. ++ */ ++static int apfs_first_read_omap(struct super_block *sb) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_omap *omap = NULL; ++ int err; ++ ++ lockdep_assert_held(&nxs_mutex); ++ ++ /* The current transaction and all snapshots share a single omap */ ++ omap = apfs_get_omap(sb); ++ if (omap) { ++ sbi->s_omap = omap; ++ return 0; ++ } ++ ++ omap = kzalloc(sizeof(*omap), GFP_KERNEL); ++ if (!omap) ++ return -ENOMEM; ++ ++ sbi->s_omap = omap; ++ err = apfs_read_omap(sb, false /* write */); ++ if (err) { ++ kfree(omap); ++ sbi->s_omap = NULL; ++ return err; ++ } ++ ++ ++omap->omap_refcnt; ++ return 0; ++} ++ ++/** ++ * apfs_put_omap - Release a reference to an object map ++ * @omap: the object map ++ */ ++static void apfs_put_omap(struct apfs_omap *omap) ++{ ++ lockdep_assert_held(&nxs_mutex); ++ ++ if (!omap) ++ return; ++ ++ if (--omap->omap_refcnt != 0) ++ return; ++ ++ apfs_node_free(omap->omap_root); ++ kfree(omap); ++} ++ ++/** ++ * apfs_read_catalog - Find and read the catalog root node ++ * @sb: superblock structure ++ * @write: request write access? ++ * ++ * On success, returns 0 and sets APFS_SB(@sb)->s_cat_root; on failure returns ++ * a negative error code. ++ */ ++int apfs_read_catalog(struct super_block *sb, bool write) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_superblock *vsb_raw = sbi->s_vsb_raw; ++ struct apfs_node *root_node; ++ ++ ASSERT(sbi->s_omap && sbi->s_omap->omap_root); ++ ++ root_node = apfs_read_node(sb, le64_to_cpu(vsb_raw->apfs_root_tree_oid), ++ APFS_OBJ_VIRTUAL, write); ++ if (IS_ERR(root_node)) { ++ apfs_err(sb, "unable to read catalog root node"); ++ return PTR_ERR(root_node); ++ } ++ ++ if (sbi->s_cat_root) ++ apfs_node_free(sbi->s_cat_root); ++ sbi->s_cat_root = root_node; ++ return 0; ++} ++ ++static void apfs_put_super(struct super_block *sb) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ ++ /* Cleanups won't reschedule themselves during unmount */ ++ flush_work(&sbi->s_orphan_cleanup_work); ++ ++ /* Stop flushing orphans and update the volume as needed */ ++ if (!(sb->s_flags & SB_RDONLY)) { ++ struct apfs_superblock *vsb_raw; ++ struct buffer_head *vsb_bh; ++ struct apfs_max_ops maxops = {0}; ++ ++ if (apfs_transaction_start(sb, maxops)) ++ goto fail; ++ vsb_raw = sbi->s_vsb_raw; ++ vsb_bh = sbi->s_vobject.o_bh; ++ ++ apfs_assert_in_transaction(sb, &vsb_raw->apfs_o); ++ ASSERT(buffer_trans(vsb_bh)); ++ ++ apfs_update_software_info(sb); ++ vsb_raw->apfs_unmount_time = cpu_to_le64(ktime_get_real_ns()); ++ set_buffer_csum(vsb_bh); ++ ++ /* Guarantee commit */ ++ sbi->s_nxi->nx_transaction.t_state |= APFS_NX_TRANS_FORCE_COMMIT; ++ if (apfs_transaction_commit(sb)) { ++ apfs_transaction_abort(sb); ++ goto fail; ++ } ++ } ++ ++ /* ++ * Even if this particular volume/snapshot was read-only, the container ++ * may have changed and need an update here. ++ */ ++ apfs_make_super_copy(sb); ++ ++fail: ++ iput(sbi->s_private_dir); ++ sbi->s_private_dir = NULL; ++ ++ apfs_node_free(sbi->s_cat_root); ++ apfs_unmap_volume_super(sb); ++ ++ mutex_lock(&nxs_mutex); ++ apfs_put_omap(sbi->s_omap); ++ sbi->s_omap = NULL; ++ apfs_free_main_super(sbi); ++ mutex_unlock(&nxs_mutex); ++ ++ sb->s_fs_info = NULL; ++ ++ kfree(sbi->s_snap_name); ++ sbi->s_snap_name = NULL; ++ if (sbi->s_dflt_pfk) ++ kfree(sbi->s_dflt_pfk); ++ kfree(sbi); ++} ++ ++static struct kmem_cache *apfs_inode_cachep; ++ ++static struct inode *apfs_alloc_inode(struct super_block *sb) ++{ ++ struct apfs_inode_info *ai; ++ struct apfs_dstream_info *dstream; ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 18, 0) ++ ai = alloc_inode_sb(sb, apfs_inode_cachep, GFP_KERNEL); ++#else ++ ai = kmem_cache_alloc(apfs_inode_cachep, GFP_KERNEL); ++#endif ++ if (!ai) ++ return NULL; ++ dstream = &ai->i_dstream; ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) /* iversion came in 4.16 */ ++ inode_set_iversion(&ai->vfs_inode, 1); ++#else ++ ai->vfs_inode.i_version = 1; ++#endif ++ dstream->ds_sb = sb; ++ dstream->ds_inode = &ai->vfs_inode; ++ dstream->ds_cached_ext.len = 0; ++ dstream->ds_ext_dirty = false; ++ ai->i_nchildren = 0; ++ INIT_LIST_HEAD(&ai->i_list); ++ ai->i_cleaned = false; ++ return &ai->vfs_inode; ++} ++ ++static void apfs_i_callback(struct rcu_head *head) ++{ ++ struct inode *inode = container_of(head, struct inode, i_rcu); ++ ++ kmem_cache_free(apfs_inode_cachep, APFS_I(inode)); ++} ++ ++static void apfs_destroy_inode(struct inode *inode) ++{ ++ call_rcu(&inode->i_rcu, apfs_i_callback); ++} ++ ++static void init_once(void *p) ++{ ++ struct apfs_inode_info *ai = (struct apfs_inode_info *)p; ++ struct apfs_dstream_info *dstream = &ai->i_dstream; ++ ++ spin_lock_init(&dstream->ds_ext_lock); ++ inode_init_once(&ai->vfs_inode); ++} ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 9, 0) ++#define SLAB_MEM_SPREAD 0 ++#endif ++ ++static int __init init_inodecache(void) ++{ ++ apfs_inode_cachep = kmem_cache_create("apfs_inode_cache", ++ sizeof(struct apfs_inode_info), ++ 0, (SLAB_RECLAIM_ACCOUNT| ++ SLAB_MEM_SPREAD|SLAB_ACCOUNT), ++ init_once); ++ if (apfs_inode_cachep == NULL) ++ return -ENOMEM; ++ return 0; ++} ++ ++static int apfs_write_inode(struct inode *inode, struct writeback_control *wbc) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct apfs_nxsb_info *nxi = APFS_SB(sb)->s_nxi; ++ struct apfs_max_ops maxops; ++ int err; ++ ++ maxops.cat = APFS_UPDATE_INODE_MAXOPS(); ++ maxops.blks = 0; ++ ++ err = apfs_transaction_start(sb, maxops); ++ if (err) ++ return err; ++ err = apfs_update_inode(inode, NULL /* new_name */); ++ if (err) ++ goto fail; ++ /* Don't commit yet, or the inode will get flushed again and lock up */ ++ nxi->nx_transaction.t_state |= APFS_NX_TRANS_DEFER_COMMIT; ++ err = apfs_transaction_commit(sb); ++ if (err) ++ goto fail; ++ return 0; ++ ++fail: ++ apfs_transaction_abort(sb); ++ return err; ++} ++ ++static void destroy_inodecache(void) ++{ ++ /* ++ * Make sure all delayed rcu free inodes are flushed before we ++ * destroy cache. ++ */ ++ rcu_barrier(); ++ kmem_cache_destroy(apfs_inode_cachep); ++} ++ ++/** ++ * apfs_count_used_blocks - Count the blocks in use across all volumes ++ * @sb: filesystem superblock ++ * @count: on return it will store the block count ++ * ++ * This function probably belongs in a separate file, but for now it is ++ * only called by statfs. ++ */ ++static int apfs_count_used_blocks(struct super_block *sb, u64 *count) ++{ ++ struct apfs_nx_superblock *msb_raw = APFS_NXI(sb)->nx_raw; ++ struct apfs_node *vnode; ++ struct apfs_omap_phys *msb_omap_raw; ++ struct buffer_head *bh; ++ struct apfs_omap *omap = NULL; ++ u64 msb_omap, vb; ++ int i; ++ int err = 0; ++ ++ /* Get the container's object map */ ++ msb_omap = le64_to_cpu(msb_raw->nx_omap_oid); ++ bh = apfs_sb_bread(sb, msb_omap); ++ if (!bh) { ++ apfs_err(sb, "unable to read container object map"); ++ return -EIO; ++ } ++ msb_omap_raw = (struct apfs_omap_phys *)bh->b_data; ++ ++ /* Get the Volume Block */ ++ vb = le64_to_cpu(msb_omap_raw->om_tree_oid); ++ msb_omap_raw = NULL; ++ brelse(bh); ++ bh = NULL; ++ vnode = apfs_read_node(sb, vb, APFS_OBJ_PHYSICAL, false /* write */); ++ if (IS_ERR(vnode)) { ++ apfs_err(sb, "unable to read volume block"); ++ return PTR_ERR(vnode); ++ } ++ ++ omap = kzalloc(sizeof(*omap), GFP_KERNEL); ++ if (!omap) { ++ err = -ENOMEM; ++ goto fail; ++ } ++ omap->omap_root = vnode; ++ ++ /* Iterate through the checkpoint superblocks and add the used blocks */ ++ *count = 0; ++ for (i = 0; i < APFS_NX_MAX_FILE_SYSTEMS; i++) { ++ struct apfs_superblock *vsb_raw; ++ u64 vol_id; ++ u64 vol_bno; ++ ++ vol_id = le64_to_cpu(msb_raw->nx_fs_oid[i]); ++ if (vol_id == 0) /* All volumes have been checked */ ++ break; ++ err = apfs_omap_lookup_newest_block(sb, omap, vol_id, &vol_bno, false /* write */); ++ if (err) { ++ apfs_err(sb, "omap lookup failed for vol id 0x%llx", vol_id); ++ break; ++ } ++ ++ bh = apfs_sb_bread(sb, vol_bno); ++ if (!bh) { ++ err = -EIO; ++ apfs_err(sb, "unable to read volume superblock"); ++ break; ++ } ++ vsb_raw = (struct apfs_superblock *)bh->b_data; ++ *count += le64_to_cpu(vsb_raw->apfs_fs_alloc_count); ++ brelse(bh); ++ } ++ ++fail: ++ kfree(omap); ++ apfs_node_free(vnode); ++ return err; ++} ++ ++static int apfs_statfs(struct dentry *dentry, struct kstatfs *buf) ++{ ++ struct super_block *sb = dentry->d_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_nx_superblock *msb_raw; ++ struct apfs_superblock *vol; ++ u64 fsid, used_blocks = 0; ++ int err; ++ ++ down_read(&nxi->nx_big_sem); ++ msb_raw = nxi->nx_raw; ++ vol = sbi->s_vsb_raw; ++ ++ buf->f_type = APFS_NX_MAGIC; ++ /* Nodes are assumed to fit in a page, for now */ ++ buf->f_bsize = sb->s_blocksize; ++ ++ /* Volumes share the whole disk space */ ++ buf->f_blocks = le64_to_cpu(msb_raw->nx_block_count); ++ err = apfs_count_used_blocks(sb, &used_blocks); ++ if (err) ++ goto fail; ++ buf->f_bfree = buf->f_blocks - used_blocks; ++ buf->f_bavail = buf->f_bfree; /* I don't know any better */ ++ ++ /* The file count is only for the mounted volume */ ++ buf->f_files = le64_to_cpu(vol->apfs_num_files) + ++ le64_to_cpu(vol->apfs_num_directories) + ++ le64_to_cpu(vol->apfs_num_symlinks) + ++ le64_to_cpu(vol->apfs_num_other_fsobjects); ++ ++ /* ++ * buf->f_ffree is left undefined for now. Maybe it should report the ++ * number of available cnids, like hfsplus attempts to do. ++ */ ++ ++ buf->f_namelen = APFS_NAME_LEN; ++ ++ /* There are no clear rules for the fsid, so we follow ext2 here */ ++ fsid = le64_to_cpup((void *)vol->apfs_vol_uuid) ^ ++ le64_to_cpup((void *)vol->apfs_vol_uuid + sizeof(u64)); ++ buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; ++ buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; ++ ++fail: ++ up_read(&nxi->nx_big_sem); ++ return err; ++} ++ ++static int apfs_show_options(struct seq_file *seq, struct dentry *root) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(root->d_sb); ++ struct apfs_nxsb_info *nxi = APFS_NXI(root->d_sb); ++ ++ if (sbi->s_vol_nr != 0) ++ seq_printf(seq, ",vol=%u", sbi->s_vol_nr); ++ if (sbi->s_snap_name) ++ seq_printf(seq, ",snap=%s", sbi->s_snap_name); ++ if (uid_valid(sbi->s_uid)) ++ seq_printf(seq, ",uid=%u", from_kuid(&init_user_ns, ++ sbi->s_uid)); ++ if (gid_valid(sbi->s_gid)) ++ seq_printf(seq, ",gid=%u", from_kgid(&init_user_ns, ++ sbi->s_gid)); ++ if (nxi->nx_flags & APFS_CHECK_NODES) ++ seq_puts(seq, ",cknodes"); ++ ++ return 0; ++} ++ ++int apfs_sync_fs(struct super_block *sb, int wait) ++{ ++ struct apfs_max_ops maxops = {0}; ++ int err; ++ ++ /* TODO: actually start the commit and return without waiting? */ ++ if (wait == 0) ++ return 0; ++ ++ err = apfs_transaction_start(sb, maxops); ++ if (err) ++ return err; ++ APFS_SB(sb)->s_nxi->nx_transaction.t_state |= APFS_NX_TRANS_FORCE_COMMIT; ++ err = apfs_transaction_commit(sb); ++ if (err) ++ apfs_transaction_abort(sb); ++ return err; ++} ++ ++/* Only supports read-only remounts, everything else is silently ignored */ ++static int apfs_remount(struct super_block *sb, int *flags, char *data) ++{ ++ int err = 0; ++ ++ err = sync_filesystem(sb); ++ if (err) ++ return err; ++ ++ /* TODO: race? Could a new transaction have started already? */ ++ if (*flags & SB_RDONLY) ++ sb->s_flags |= SB_RDONLY; ++ ++ /* ++ * TODO: readwrite remounts seem simple enough, but I worry about ++ * remounting aborted transactions. I would probably also need a ++ * dry-run version of parse_options(). ++ */ ++ apfs_notice(sb, "all remounts can do is turn a volume read-only"); ++ return 0; ++} ++ ++static const struct super_operations apfs_sops = { ++ .alloc_inode = apfs_alloc_inode, ++ .destroy_inode = apfs_destroy_inode, ++ .write_inode = apfs_write_inode, ++ .evict_inode = apfs_evict_inode, ++ .put_super = apfs_put_super, ++ .sync_fs = apfs_sync_fs, ++ .statfs = apfs_statfs, ++ .remount_fs = apfs_remount, ++ .show_options = apfs_show_options, ++}; ++ ++enum { ++ Opt_readwrite, Opt_cknodes, Opt_uid, Opt_gid, Opt_vol, Opt_snap, Opt_err, ++}; ++ ++static const match_table_t tokens = { ++ {Opt_readwrite, "readwrite"}, ++ {Opt_cknodes, "cknodes"}, ++ {Opt_uid, "uid=%u"}, ++ {Opt_gid, "gid=%u"}, ++ {Opt_vol, "vol=%u"}, ++ {Opt_snap, "snap=%s"}, ++ {Opt_err, NULL} ++}; ++ ++/** ++ * apfs_set_nx_flags - Set the mount flags for the container, if allowed ++ * @sb: superblock structure ++ * @flags: flags to set ++ */ ++static void apfs_set_nx_flags(struct super_block *sb, unsigned int flags) ++{ ++ struct apfs_nxsb_info *nxi = APFS_SB(sb)->s_nxi; ++ ++ lockdep_assert_held(&nxs_mutex); ++ ++ /* The mount flags can only be set when the container is first mounted */ ++ if (nxi->nx_refcnt == 1) ++ nxi->nx_flags = flags; ++ else if (flags != nxi->nx_flags) ++ apfs_warn(sb, "ignoring mount flags - container already mounted"); ++} ++ ++/** ++ * apfs_get_vol_number - Retrieve the volume number from the mount options ++ * @options: string of mount options ++ * ++ * On error, it will just return the default volume 0. ++ */ ++static unsigned int apfs_get_vol_number(char *options) ++{ ++ char needle[] = "vol="; ++ char *volstr; ++ long vol; ++ ++ if (!options) ++ return 0; ++ ++ /* TODO: just parse all the options once... */ ++ volstr = strstr(options, needle); ++ if (!volstr) ++ return 0; ++ volstr += sizeof(needle) - 1; ++ ++ /* TODO: other bases? */ ++ if (kstrtol(volstr, 10, &vol) < 0) ++ return 0; ++ return vol; ++} ++ ++/** ++ * apfs_get_snap_name - Duplicate the snapshot label from the mount options ++ * @options: string of mount options ++ * ++ * On error, it will just return the default NULL snapshot name. TODO: this is ++ * actually a bit dangerous because a memory allocation failure might get the ++ * same snapshot mounted twice, without a shared superblock. ++ */ ++static char *apfs_get_snap_name(char *options) ++{ ++ char needle[] = "snap="; ++ char *name = NULL, *end = NULL; ++ ++ if (!options) ++ return NULL; ++ ++ name = strstr(options, needle); ++ if (!name) ++ return NULL; ++ ++ name += sizeof(needle) - 1; ++ end = strchrnul(name, ','); ++ ++ return kmemdup_nul(name, end - name, GFP_KERNEL); ++} ++ ++/* ++ * Many of the parse_options() functions in other file systems return 0 ++ * on error. This one returns an error code, and 0 on success. ++ */ ++static int parse_options(struct super_block *sb, char *options) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_nxsb_info *nxi = sbi->s_nxi; ++ char *p; ++ substring_t args[MAX_OPT_ARGS]; ++ int option; ++ int err = 0; ++ unsigned int nx_flags; ++ ++ lockdep_assert_held(&nxs_mutex); ++ ++ /* Set default values before parsing */ ++ sbi->s_vol_nr = 0; ++ nx_flags = 0; ++ ++ if (!options) ++ goto out; ++ ++ while ((p = strsep(&options, ",")) != NULL) { ++ int token; ++ ++ if (!*p) ++ continue; ++ token = match_token(p, tokens, args); ++ switch (token) { ++ case Opt_readwrite: ++ /* ++ * Write support is not safe yet, so keep it disabled ++ * unless the user requests it explicitly. ++ */ ++ nx_flags |= APFS_READWRITE; ++ break; ++ case Opt_cknodes: ++ /* ++ * Right now, node checksums are too costly to enable ++ * by default. TODO: try to improve this. ++ */ ++ nx_flags |= APFS_CHECK_NODES; ++ break; ++ case Opt_uid: ++ err = match_int(&args[0], &option); ++ if (err) ++ return err; ++ sbi->s_uid = make_kuid(current_user_ns(), option); ++ if (!uid_valid(sbi->s_uid)) { ++ apfs_err(sb, "invalid uid"); ++ return -EINVAL; ++ } ++ break; ++ case Opt_gid: ++ err = match_int(&args[0], &option); ++ if (err) ++ return err; ++ sbi->s_gid = make_kgid(current_user_ns(), option); ++ if (!gid_valid(sbi->s_gid)) { ++ apfs_err(sb, "invalid gid"); ++ return -EINVAL; ++ } ++ break; ++ case Opt_vol: ++ err = match_int(&args[0], &sbi->s_vol_nr); ++ if (err) ++ return err; ++ break; ++ case Opt_snap: ++ kfree(sbi->s_snap_name); ++ sbi->s_snap_name = match_strdup(&args[0]); ++ if (!sbi->s_snap_name) ++ return -ENOMEM; ++ break; ++ default: ++ return -EINVAL; ++ } ++ } ++ ++out: ++ apfs_set_nx_flags(sb, nx_flags); ++ if (!(sb->s_flags & SB_RDONLY)) { ++ if (nxi->nx_flags & APFS_READWRITE) { ++ apfs_notice(sb, "experimental write support is enabled"); ++ } else { ++ apfs_warn(sb, "experimental writes disabled to avoid data loss"); ++ apfs_warn(sb, "if you really want them, check the README"); ++ sb->s_flags |= SB_RDONLY; ++ } ++ } ++ return 0; ++} ++ ++/** ++ * apfs_check_nx_features - Check for unsupported features in the container ++ * @sb: superblock structure ++ * ++ * Returns -EINVAL if unsupported incompatible features are found, otherwise ++ * returns 0. ++ */ ++static int apfs_check_nx_features(struct super_block *sb) ++{ ++ struct apfs_nx_superblock *msb_raw = NULL; ++ u64 features; ++ ++ msb_raw = APFS_NXI(sb)->nx_raw; ++ if (!msb_raw) { ++ apfs_alert(sb, "feature checks are misplaced"); ++ return -EINVAL; ++ } ++ ++ features = le64_to_cpu(msb_raw->nx_incompatible_features); ++ if (features & ~APFS_NX_SUPPORTED_INCOMPAT_MASK) { ++ apfs_warn(sb, "unknown incompatible container features (0x%llx)", features); ++ return -EINVAL; ++ } ++ if (features & APFS_NX_INCOMPAT_FUSION) { ++ apfs_warn(sb, "fusion drives are not supported"); ++ return -EINVAL; ++ } ++ ++ features = le64_to_cpu(msb_raw->nx_readonly_compatible_features); ++ if (features & ~APFS_NX_SUPPORTED_ROCOMPAT_MASK) { ++ apfs_warn(sb, "unknown read-only compatible container features (0x%llx)", features); ++ if (!sb_rdonly(sb)) { ++ apfs_warn(sb, "container can't be mounted read-write"); ++ return -EINVAL; ++ } ++ } ++ return 0; ++} ++ ++/** ++ * apfs_check_vol_features - Check for unsupported features in the volume ++ * @sb: superblock structure ++ * ++ * Returns -EINVAL if unsupported incompatible features are found, otherwise ++ * returns 0. ++ */ ++static int apfs_check_vol_features(struct super_block *sb) ++{ ++ struct apfs_superblock *vsb_raw = NULL; ++ u64 features; ++ ++ vsb_raw = APFS_SB(sb)->s_vsb_raw; ++ if (!vsb_raw) { ++ apfs_alert(sb, "feature checks are misplaced"); ++ return -EINVAL; ++ } ++ ++ features = le64_to_cpu(vsb_raw->apfs_incompatible_features); ++ if (features & ~APFS_SUPPORTED_INCOMPAT_MASK) { ++ apfs_warn(sb, "unknown incompatible volume features (0x%llx)", features); ++ return -EINVAL; ++ } ++ if (features & APFS_INCOMPAT_DATALESS_SNAPS) { ++ apfs_warn(sb, "snapshots with no data are not supported"); ++ return -EINVAL; ++ } ++ if (features & APFS_INCOMPAT_ENC_ROLLED) { ++ apfs_warn(sb, "encrypted volumes are not supported"); ++ return -EINVAL; ++ } ++ if (features & APFS_INCOMPAT_INCOMPLETE_RESTORE) { ++ apfs_warn(sb, "incomplete restore is not supported"); ++ return -EINVAL; ++ } ++ if (features & APFS_INCOMPAT_PFK) { ++ apfs_warn(sb, "PFK is not supported"); ++ return -EINVAL; ++ } ++ if (features & APFS_INCOMPAT_SECONDARY_FSROOT) { ++ apfs_warn(sb, "secondary fsroot is not supported"); ++ return -EINVAL; ++ } ++ if (features & APFS_INCOMPAT_SEALED_VOLUME) { ++ if (!sb_rdonly(sb)) { ++ apfs_warn(sb, "writes to sealed volumes are not yet supported"); ++ return -EINVAL; ++ } ++ apfs_info(sb, "volume is sealed"); ++ } ++ /* ++ * As far as I can see, all this feature seems to do is define a new ++ * flag (which I call APFS_FILE_EXTENT_PREALLOCATED) for extents that ++ * are fully after the end of their file. I don't get why this change ++ * is incompatible instead of read-only compatible, so I fear I might ++ * be missing something. I will never be certain though, so for now ++ * allow the mount and hope for the best. ++ */ ++ if (features & APFS_INCOMPAT_EXTENT_PREALLOC_FLAG) ++ apfs_warn(sb, "extent prealloc flag is set"); ++ ++ features = le64_to_cpu(vsb_raw->apfs_fs_flags); ++ /* Some encrypted volumes are readable anyway */ ++ if (!(features & APFS_FS_UNENCRYPTED)) ++ apfs_warn(sb, "volume is encrypted, may not be read correctly"); ++ ++ features = le64_to_cpu(vsb_raw->apfs_readonly_compatible_features); ++ if (features & ~APFS_SUPPORTED_ROCOMPAT_MASK) { ++ apfs_warn(sb, "unknown read-only compatible volume features (0x%llx)", features); ++ if (!sb_rdonly(sb)) { ++ apfs_warn(sb, "volume can't be mounted read-write"); ++ return -EINVAL; ++ } ++ } ++ return 0; ++} ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 12, 0) ++ ++/** ++ * apfs_setup_bdi - Set up the bdi for the superblock ++ * @sb: superblock structure ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++static int apfs_setup_bdi(struct super_block *sb) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct backing_dev_info *bdi_dev = NULL, *bdi_sb = NULL; ++ int err; ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 15, 0) || (defined(RHEL_RELEASE) && LINUX_VERSION_CODE == KERNEL_VERSION(5, 14, 0)) ++ bdi_dev = nxi->nx_bdev->bd_disk->bdi; ++#else ++ bdi_dev = nxi->nx_bdev->bd_bdi; ++#endif ++ ++ err = super_setup_bdi(sb); ++ if (err) ++ return err; ++ bdi_sb = sb->s_bdi; ++ ++ bdi_sb->ra_pages = bdi_dev->ra_pages; ++ bdi_sb->io_pages = bdi_dev->io_pages; ++ ++ bdi_sb->capabilities = bdi_dev->capabilities; ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0) ++ bdi_sb->capabilities &= ~BDI_CAP_WRITEBACK; ++#else ++ bdi_sb->capabilities |= BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY; ++#endif ++ ++ return 0; ++} ++ ++#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(4, 12, 0) */ ++ ++/* This is needed for readahead, so old kernels will be slower */ ++static int apfs_setup_bdi(struct super_block *sb) ++{ ++ return 0; ++} ++ ++#endif ++ ++static void apfs_set_trans_buffer_limit(struct super_block *sb) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ unsigned long memsize_in_blocks; ++ struct sysinfo info = {0}; ++ ++ si_meminfo(&info); ++ memsize_in_blocks = info.totalram << (PAGE_SHIFT - sb->s_blocksize_bits); ++ ++ /* ++ * Buffer heads are not reclaimed while they are part of the current ++ * transaction, so systems with little memory will crash if we don't ++ * commit often enough. This hack should make that happen in general, ++ * but I still need to get the reclaim to work eventually (TODO). ++ */ ++ if (memsize_in_blocks >= 16 * TRANSACTION_BUFFERS_MAX) ++ sbi->s_trans_buffers_max = TRANSACTION_BUFFERS_MAX; ++ else ++ sbi->s_trans_buffers_max = memsize_in_blocks / 16; ++} ++ ++static int apfs_fill_super(struct super_block *sb, void *data, int silent) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct inode *root = NULL, *priv = NULL; ++ int err; ++ ++ ASSERT(sbi); ++ lockdep_assert_held(&nxs_mutex); ++ ++ err = apfs_setup_bdi(sb); ++ if (err) ++ return err; ++ ++ apfs_set_trans_buffer_limit(sb); ++ ++ sbi->s_uid = INVALID_UID; ++ sbi->s_gid = INVALID_GID; ++ err = parse_options(sb, data); ++ if (err) ++ return err; ++ ++ err = apfs_map_volume_super(sb, false /* write */); ++ if (err) ++ return err; ++ ++ err = apfs_check_vol_features(sb); ++ if (err) ++ goto failed_omap; ++ ++ /* ++ * The omap needs to be set before the call to apfs_read_catalog(). ++ * It's also shared with all the snapshots, so it needs to be read ++ * before we switch to the old superblock. ++ */ ++ err = apfs_first_read_omap(sb); ++ if (err) ++ goto failed_omap; ++ ++ if (sbi->s_snap_name) { ++ err = apfs_switch_to_snapshot(sb); ++ if (err) ++ goto failed_cat; ++ } ++ ++ err = apfs_read_catalog(sb, false /* write */); ++ if (err) ++ goto failed_cat; ++ ++ sb->s_op = &apfs_sops; ++ sb->s_d_op = &apfs_dentry_operations; ++ sb->s_xattr = apfs_xattr_handlers; ++ sb->s_maxbytes = MAX_LFS_FILESIZE; ++ sb->s_time_gran = 1; /* Nanosecond granularity */ ++ ++ sbi->s_private_dir = apfs_iget(sb, APFS_PRIV_DIR_INO_NUM); ++ if (IS_ERR(sbi->s_private_dir)) { ++ apfs_err(sb, "unable to get private-dir inode"); ++ err = PTR_ERR(sbi->s_private_dir); ++ goto failed_private_dir; ++ } ++ ++ root = apfs_iget(sb, APFS_ROOT_DIR_INO_NUM); ++ if (IS_ERR(root)) { ++ apfs_err(sb, "unable to get root inode"); ++ err = PTR_ERR(root); ++ goto failed_mount; ++ } ++ ++ sb->s_root = d_make_root(root); ++ if (!sb->s_root) { ++ apfs_err(sb, "unable to get root dentry"); ++ err = -ENOMEM; ++ goto failed_mount; ++ } ++ ++ INIT_WORK(&sbi->s_orphan_cleanup_work, apfs_orphan_cleanup_work); ++ if (!(sb->s_flags & SB_RDONLY)) { ++ priv = sbi->s_private_dir; ++ if (APFS_I(priv)->i_nchildren) ++ schedule_work(&sbi->s_orphan_cleanup_work); ++ } ++ return 0; ++ ++failed_mount: ++ iput(sbi->s_private_dir); ++failed_private_dir: ++ sbi->s_private_dir = NULL; ++ apfs_node_free(sbi->s_cat_root); ++failed_cat: ++ apfs_put_omap(sbi->s_omap); ++ sbi->s_omap = NULL; ++failed_omap: ++ apfs_unmap_volume_super(sb); ++ return err; ++} ++ ++/** ++ * apfs_strings_are_equal - Compare two possible NULL strings ++ * @str1: the first string ++ * @str2: the second string ++ */ ++static bool apfs_strings_are_equal(const char *str1, const char *str2) ++{ ++ if (str1 == str2) /* Both are NULL */ ++ return true; ++ if (!str1 || !str2) /* One is NULL */ ++ return false; ++ return strcmp(str1, str2) == 0; ++} ++ ++/** ++ * apfs_test_super - Check if two volume superblocks are for the same volume ++ * @sb: superblock structure for a currently mounted volume ++ * @data: superblock info for the volume being mounted ++ */ ++static int apfs_test_super(struct super_block *sb, void *data) ++{ ++ struct apfs_sb_info *sbi_1 = data; ++ struct apfs_sb_info *sbi_2 = APFS_SB(sb); ++ ++ if (sbi_1->s_nxi != sbi_2->s_nxi) ++ return false; ++ if (sbi_1->s_vol_nr != sbi_2->s_vol_nr) ++ return false; ++ return apfs_strings_are_equal(sbi_1->s_snap_name, sbi_2->s_snap_name); ++} ++ ++/** ++ * apfs_set_super - Assign the device and an info struct to a superblock ++ * @sb: superblock structure to set ++ * @data: superblock info for the volume being mounted ++ */ ++static int apfs_set_super(struct super_block *sb, void *data) ++{ ++ struct apfs_sb_info *sbi = data; ++ struct apfs_nxsb_info *nxi = sbi->s_nxi; ++ int err; ++ ++ /* ++ * This fake device number will be unique to this volume-snapshot ++ * combination. It gets reported by stat(), so that userland tools can ++ * use it to tell different mountpoints apart. ++ */ ++ err = get_anon_bdev(&sbi->s_anon_dev); ++ if (err) ++ return err; ++ ++ /* ++ * This is the actual device number, shared by all volumes and ++ * snapshots. It gets reported by the mountinfo file, and it seems that ++ * udisks uses it to decide if a device is mounted, so it must be set. ++ */ ++ sb->s_dev = nxi->nx_bdev->bd_dev; ++ ++ sb->s_fs_info = sbi; ++ return 0; ++} ++ ++/* ++ * Wrapper for lookup_bdev() that supports older kernels. ++ */ ++static int apfs_lookup_bdev(const char *pathname, dev_t *dev) ++{ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 11, 0) ++ struct block_device *bdev; ++ ++ bdev = lookup_bdev(pathname); ++ if (IS_ERR(bdev)) ++ return PTR_ERR(bdev); ++ ++ *dev = bdev->bd_dev; ++ bdput(bdev); ++ return 0; ++#else ++ return lookup_bdev(pathname, dev); ++#endif ++} ++ ++/** ++ * apfs_attach_nxi - Attach container sb info to a volume's sb info ++ * @sbi: new superblock info structure for the volume to be mounted ++ * @dev_name: path name for the container's block device ++ * @mode: FMODE_* mask ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 5, 0) ++static int apfs_attach_nxi(struct apfs_sb_info *sbi, const char *dev_name, blk_mode_t mode) ++#else ++static int apfs_attach_nxi(struct apfs_sb_info *sbi, const char *dev_name, fmode_t mode) ++#endif ++{ ++ struct apfs_nxsb_info *nxi; ++ dev_t dev = 0; ++ int ret; ++ ++ lockdep_assert_held(&nxs_mutex); ++ ++ ret = apfs_lookup_bdev(dev_name, &dev); ++ if (ret) ++ return ret; ++ ++ nxi = apfs_nx_find_by_dev(dev); ++ if (!nxi) { ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 9, 0) ++ struct file *file = NULL; ++#elif LINUX_VERSION_CODE >= KERNEL_VERSION(6, 8, 0) ++ struct bdev_handle *handle = NULL; ++#endif ++ struct block_device *bdev = NULL; ++ ++ nxi = kzalloc(sizeof(*nxi), GFP_KERNEL); ++ if (!nxi) ++ return -ENOMEM; ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 9, 0) ++ file = bdev_file_open_by_path(dev_name, mode, &apfs_fs_type, NULL); ++ if (IS_ERR(file)) { ++ kfree(nxi); ++ return PTR_ERR(file); ++ } ++ nxi->nx_bdev_file = file; ++ bdev = file_bdev(file); ++#elif LINUX_VERSION_CODE >= KERNEL_VERSION(6, 8, 0) ++ handle = bdev_open_by_path(dev_name, mode, &apfs_fs_type, NULL); ++ if (IS_ERR(handle)) { ++ kfree(nxi); ++ return PTR_ERR(handle); ++ } ++ nxi->nx_bdev_handle = handle; ++ bdev = handle->bdev; ++#elif LINUX_VERSION_CODE >= KERNEL_VERSION(6, 5, 0) ++ bdev = blkdev_get_by_path(dev_name, mode, &apfs_fs_type, NULL); ++#else ++ bdev = blkdev_get_by_path(dev_name, mode, &apfs_fs_type); ++#endif ++ if (IS_ERR(bdev)) { ++ kfree(nxi); ++ return PTR_ERR(bdev); ++ } ++ ++ nxi->nx_bdev = bdev; ++ init_rwsem(&nxi->nx_big_sem); ++ list_add(&nxi->nx_list, &nxs); ++ INIT_LIST_HEAD(&nxi->vol_list); ++ } ++ ++ list_add(&sbi->list, &nxi->vol_list); ++ sbi->s_nxi = nxi; ++ ++nxi->nx_refcnt; ++ return 0; ++} ++ ++/* ++ * This function is a copy of mount_bdev() that allows multiple mounts. ++ */ ++static struct dentry *apfs_mount(struct file_system_type *fs_type, int flags, ++ const char *dev_name, void *data) ++{ ++ struct apfs_nxsb_info *nxi; ++ struct super_block *sb; ++ struct apfs_sb_info *sbi; ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 5, 0) ++ blk_mode_t mode = sb_open_mode(flags); ++#else ++ fmode_t mode = FMODE_READ | FMODE_EXCL; ++#endif ++ int error = 0; ++ ++ mutex_lock(&nxs_mutex); ++ ++ sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); ++ if (!sbi) { ++ error = -ENOMEM; ++ goto out_unlock; ++ } ++ sbi->s_vol_nr = apfs_get_vol_number(data); ++ sbi->s_snap_name = apfs_get_snap_name(data); ++ ++ /* Make sure that snapshots are mounted read-only */ ++ if (sbi->s_snap_name) ++ flags |= SB_RDONLY; ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 5, 0) ++ if (!(flags & SB_RDONLY)) ++ mode |= FMODE_WRITE; ++#endif ++ ++ error = apfs_attach_nxi(sbi, dev_name, mode); ++ if (error) ++ goto out_free_sbi; ++ nxi = sbi->s_nxi; ++ ++ /* TODO: lockfs stuff? Btrfs doesn't seem to care */ ++ sb = sget(fs_type, apfs_test_super, apfs_set_super, flags | SB_NOSEC, sbi); ++ if (IS_ERR(sb)) ++ goto out_unmap_super; ++ ++ if (sb->s_root) { ++ if ((flags ^ sb->s_flags) & SB_RDONLY) { ++ error = -EBUSY; ++ goto out_deactivate_super; ++ } ++ /* Only one superblock per volume */ ++ apfs_free_main_super(sbi); ++ kfree(sbi->s_snap_name); ++ sbi->s_snap_name = NULL; ++ kfree(sbi); ++ sbi = NULL; ++ } else { ++ error = apfs_read_main_super(sb); ++ if (error) ++ goto out_deactivate_super; ++#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 5, 0) ++ sb->s_mode = mode; ++#endif ++ snprintf(sb->s_id, sizeof(sb->s_id), "%xg", sb->s_dev); ++ error = apfs_fill_super(sb, data, flags & SB_SILENT ? 1 : 0); ++ if (error) ++ goto out_deactivate_super; ++ sb->s_flags |= SB_ACTIVE; ++ } ++ ++ mutex_unlock(&nxs_mutex); ++ return dget(sb->s_root); ++ ++out_deactivate_super: ++ deactivate_locked_super(sb); ++out_unmap_super: ++ apfs_free_main_super(sbi); ++out_free_sbi: ++ kfree(sbi->s_snap_name); ++ kfree(sbi); ++out_unlock: ++ mutex_unlock(&nxs_mutex); ++ return ERR_PTR(error); ++} ++ ++static void apfs_kill_sb(struct super_block *sb) ++{ ++ dev_t anon_dev = APFS_SB(sb)->s_anon_dev; ++ ++ generic_shutdown_super(sb); ++ free_anon_bdev(anon_dev); ++} ++ ++static struct file_system_type apfs_fs_type = { ++ .owner = THIS_MODULE, ++ .name = "apfs", ++ .mount = apfs_mount, ++ .kill_sb = apfs_kill_sb, ++ .fs_flags = FS_REQUIRES_DEV, ++}; ++MODULE_ALIAS_FS("apfs"); ++ ++static int __init init_apfs_fs(void) ++{ ++ int err = 0; ++ ++ err = init_inodecache(); ++ if (err) ++ return err; ++ err = register_filesystem(&apfs_fs_type); ++ if (err) ++ destroy_inodecache(); ++ return err; ++} ++ ++static void __exit exit_apfs_fs(void) ++{ ++ unregister_filesystem(&apfs_fs_type); ++ destroy_inodecache(); ++} ++ ++MODULE_AUTHOR("Ernesto A. Fernández"); ++MODULE_DESCRIPTION("Apple File System"); ++MODULE_VERSION(GIT_COMMIT); ++MODULE_LICENSE("GPL"); ++module_init(init_apfs_fs) ++module_exit(exit_apfs_fs) +diff --git a/fs/apfs/symlink.c b/fs/apfs/symlink.c +new file mode 100644 +index 000000000..be4f9df8f +--- /dev/null ++++ b/fs/apfs/symlink.c +@@ -0,0 +1,80 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (C) 2018 Ernesto A. Fernández ++ */ ++ ++#include ++#include ++#include "apfs.h" ++ ++/** ++ * apfs_get_link - Follow a symbolic link ++ * @dentry: dentry for the link ++ * @inode: inode for the link ++ * @done: delayed call to free the returned buffer after use ++ * ++ * Returns a pointer to a buffer containing the target path, or an appropriate ++ * error pointer in case of failure. ++ */ ++static const char *apfs_get_link(struct dentry *dentry, struct inode *inode, ++ struct delayed_call *done) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ char *target = NULL; ++ int err; ++ int size; ++ ++ down_read(&nxi->nx_big_sem); ++ ++ if (!dentry) { ++ err = -ECHILD; ++ goto fail; ++ } ++ ++ size = __apfs_xattr_get(inode, APFS_XATTR_NAME_SYMLINK, ++ NULL /* buffer */, 0 /* size */); ++ if (size < 0) { /* TODO: return a better error code */ ++ apfs_err(sb, "symlink size read failed"); ++ err = size; ++ goto fail; ++ } ++ ++ target = kmalloc(size, GFP_KERNEL); ++ if (!target) { ++ err = -ENOMEM; ++ goto fail; ++ } ++ ++ size = __apfs_xattr_get(inode, APFS_XATTR_NAME_SYMLINK, target, size); ++ if (size < 0) { ++ apfs_err(sb, "symlink read failed"); ++ err = size; ++ goto fail; ++ } ++ if (size == 0 || *(target + size - 1) != 0) { ++ /* Target path must be NULL-terminated */ ++ apfs_err(sb, "bad link target in inode 0x%llx", apfs_ino(inode)); ++ err = -EFSCORRUPTED; ++ goto fail; ++ } ++ ++ up_read(&nxi->nx_big_sem); ++ set_delayed_call(done, kfree_link, target); ++ return target; ++ ++fail: ++ kfree(target); ++ up_read(&nxi->nx_big_sem); ++ return ERR_PTR(err); ++} ++ ++const struct inode_operations apfs_symlink_inode_operations = { ++ .get_link = apfs_get_link, ++ .getattr = apfs_getattr, ++ .listxattr = apfs_listxattr, ++ .update_time = apfs_update_time, ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) /* Now this is the default */ ++ .readlink = generic_readlink, ++#endif ++}; +diff --git a/fs/apfs/transaction.c b/fs/apfs/transaction.c +new file mode 100644 +index 000000000..a79a6a564 +--- /dev/null ++++ b/fs/apfs/transaction.c +@@ -0,0 +1,978 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (C) 2019 Ernesto A. Fernández ++ */ ++ ++#include ++#include ++#include "apfs.h" ++ ++/** ++ * apfs_checkpoint_end - End the new checkpoint ++ * @sb: filesystem superblock ++ * ++ * Flushes all changes to disk, and commits the new checkpoint by setting the ++ * fletcher checksum on its superblock. Returns 0 on success, or a negative ++ * error code in case of failure. ++ */ ++static int apfs_checkpoint_end(struct super_block *sb) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_obj_phys *obj = &nxi->nx_raw->nx_o; ++ struct buffer_head *bh = NULL; ++ struct inode *bdev_inode = nxi->nx_bdev->bd_inode; ++ struct address_space *bdev_map = bdev_inode->i_mapping; ++ int err; ++ ++ ASSERT(!(sb->s_flags & SB_RDONLY)); ++ ++ bh = apfs_getblk(sb, nxi->nx_bno); ++ if (!bh) { ++ apfs_err(sb, "failed to map new checkpoint superblock"); ++ return -EIO; ++ } ++ obj->o_xid = cpu_to_le64(nxi->nx_xid); ++ apfs_obj_set_csum(sb, obj); ++ memcpy(bh->b_data, obj, sb->s_blocksize); ++ ++ err = filemap_write_and_wait(bdev_map); ++ if (err) ++ goto out; ++ ++ mark_buffer_dirty(bh); ++ err = sync_dirty_buffer(bh); ++ if (err) ++ goto out; ++ ++ err = filemap_write_and_wait(bdev_map); ++out: ++ brelse(bh); ++ bh = NULL; ++ return err; ++} ++ ++/** ++ * apfs_transaction_has_room - Is there enough free space for this transaction? ++ * @sb: superblock structure ++ * @maxops: maximum operations expected ++ */ ++static bool apfs_transaction_has_room(struct super_block *sb, struct apfs_max_ops maxops) ++{ ++ u64 max_cat_blks, max_omap_blks, max_extref_blks, max_blks; ++ /* I don't know the actual maximum heights, just guessing */ ++ const u64 max_cat_height = 8, max_omap_height = 3, max_extref_height = 3; ++ ++ /* ++ * On the worst possible case (a tree of max_height), each new insertion ++ * to the catalog may both cow and split every node up to the root. The ++ * root though, is only cowed once. ++ */ ++ max_cat_blks = 1 + 2 * maxops.cat * max_cat_height; ++ ++ /* ++ * Any new catalog node could require a new entry in the object map, ++ * because the original might belong to a snapshot. ++ */ ++ max_omap_blks = 1 + 2 * max_cat_blks * max_omap_height; ++ ++ /* The extent reference tree needs a maximum of one record per block */ ++ max_extref_blks = 1 + 2 * maxops.blks * max_extref_height; ++ ++ /* ++ * Ephemeral allocations shouldn't fail, and neither should those in the ++ * internal pool. So just add the actual file blocks and we are done. ++ */ ++ max_blks = max_cat_blks + max_omap_blks + max_extref_blks + maxops.blks; ++ ++ return max_blks < APFS_SM(sb)->sm_free_count; ++} ++ ++/** ++ * apfs_read_single_ephemeral_object - Read a single ephemeral object to memory ++ * @sb: filesystem superblock ++ * @map: checkpoint mapping for the object ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_read_single_ephemeral_object(struct super_block *sb, struct apfs_checkpoint_mapping *map) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_ephemeral_object_info *list = NULL; ++ struct buffer_head *bh = NULL; ++ char *object = NULL; ++ int count; ++ u32 size; ++ u64 bno, oid; ++ int err, i; ++ ++ list = nxi->nx_eph_list; ++ count = nxi->nx_eph_count; ++ if (count >= APFS_EPHEMERAL_LIST_LIMIT) { ++ apfs_err(sb, "too many ephemeral objects?"); ++ return -EOPNOTSUPP; ++ } ++ ++ bno = le64_to_cpu(map->cpm_paddr); ++ oid = le64_to_cpu(map->cpm_oid); ++ size = le32_to_cpu(map->cpm_size); ++ if (size > sb->s_blocksize << 1) { ++ /* ++ * No reason not to support bigger objects, but there has to be ++ * a limit somewhere and this is all I've seen so far. ++ */ ++ apfs_warn(sb, "ephemeral object has more than 2 blocks"); ++ return -EOPNOTSUPP; ++ } ++ if (!size || (size & (sb->s_blocksize - 1))) { ++ apfs_err(sb, "invalid object size (0x%x)", size); ++ return -EFSCORRUPTED; ++ } ++ object = kmalloc(size, GFP_KERNEL); ++ if (!object) ++ return -ENOMEM; ++ ++ for (i = 0; i < size >> sb->s_blocksize_bits; ++i) { ++ bh = apfs_sb_bread(sb, bno + i); ++ if (!bh) { ++ apfs_err(sb, "failed to read ephemeral block"); ++ err = -EIO; ++ goto fail; ++ } ++ memcpy(object + (i << sb->s_blocksize_bits), bh->b_data, sb->s_blocksize); ++ brelse(bh); ++ bh = NULL; ++ } ++ ++ /* ++ * The official reference requires that we always verify ephemeral ++ * checksums on mount, so do it even if the user didn't ask. We should ++ * actually try to mount an older checkpoint when this fails (TODO), ++ * which I guess means that the official driver writes all checkpoint ++ * blocks at once, instead of leaving the superblock for last like we ++ * do. ++ */ ++ if (!apfs_multiblock_verify_csum(object, size)) { ++ apfs_err(sb, "bad checksum for ephemeral object 0x%llx", oid); ++ err = -EFSBADCRC; ++ goto fail; ++ } ++ ++ list[count].oid = oid; ++ list[count].size = size; ++ list[count].object = object; ++ object = NULL; ++ nxi->nx_eph_count = count + 1; ++ return 0; ++ ++fail: ++ kfree(object); ++ object = NULL; ++ return err; ++} ++ ++/** ++ * apfs_read_single_cpm_block - Read all ephemeral objects in a cpm block ++ * @sb: filesystem superblock ++ * @cpm_bno: block number for the cpm block ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_read_single_cpm_block(struct super_block *sb, u64 cpm_bno) ++{ ++ struct buffer_head *bh = NULL; ++ struct apfs_checkpoint_map_phys *cpm = NULL; ++ u32 map_count; ++ int err, i; ++ ++ bh = apfs_sb_bread(sb, cpm_bno); ++ if (!bh) { ++ apfs_err(sb, "failed to read cpm block"); ++ return -EIO; ++ } ++ if (!apfs_obj_verify_csum(sb, bh)) { ++ /* ++ * The reference seems to imply that we need to check these on ++ * mount, and retry an older checkpoint on failure (TODO). ++ */ ++ apfs_err(sb, "bad checksum for cpm block at 0x%llx", cpm_bno); ++ err = -EFSBADCRC; ++ goto out; ++ } ++ cpm = (struct apfs_checkpoint_map_phys *)bh->b_data; ++ ++ map_count = le32_to_cpu(cpm->cpm_count); ++ if (map_count > apfs_max_maps_per_block(sb)) { ++ apfs_err(sb, "block has too many maps (%d)", map_count); ++ err = -EFSCORRUPTED; ++ goto out; ++ } ++ ++ for (i = 0; i < map_count; ++i) { ++ err = apfs_read_single_ephemeral_object(sb, &cpm->cpm_map[i]); ++ if (err) { ++ apfs_err(sb, "failed to read ephemeral object %u", i); ++ goto out; ++ } ++ } ++ ++out: ++ brelse(bh); ++ cpm = NULL; ++ return err; ++} ++ ++/** ++ * apfs_read_ephemeral_objects - Read all ephemeral objects to memory ++ * @sb: superblock structure ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_read_ephemeral_objects(struct super_block *sb) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_nx_superblock *raw_sb = nxi->nx_raw; ++ u64 desc_base; ++ u32 desc_index, desc_blks, desc_len, i; ++ int err; ++ ++ if (nxi->nx_eph_list) { ++ apfs_alert(sb, "attempt to reread ephemeral object list"); ++ return -EFSCORRUPTED; ++ } ++ nxi->nx_eph_list = kzalloc(APFS_EPHEMERAL_LIST_SIZE, GFP_KERNEL); ++ if (!nxi->nx_eph_list) ++ return -ENOMEM; ++ nxi->nx_eph_count = 0; ++ ++ desc_base = le64_to_cpu(raw_sb->nx_xp_desc_base); ++ desc_index = le32_to_cpu(raw_sb->nx_xp_desc_index); ++ desc_blks = le32_to_cpu(raw_sb->nx_xp_desc_blocks); ++ desc_len = le32_to_cpu(raw_sb->nx_xp_desc_len); ++ ++ /* Last block in the area is superblock; the rest are mapping blocks */ ++ for (i = 0; i < desc_len - 1; ++i) { ++ u64 cpm_bno = desc_base + (desc_index + i) % desc_blks; ++ ++ err = apfs_read_single_cpm_block(sb, cpm_bno); ++ if (err) { ++ apfs_err(sb, "failed to read cpm block %u", i); ++ return err; ++ } ++ } ++ return 0; ++} ++ ++/** ++ * apfs_transaction_start - Begin a new transaction ++ * @sb: superblock structure ++ * @maxops: maximum operations expected ++ * ++ * Also locks the filesystem for writing; returns 0 on success or a negative ++ * error code in case of failure. ++ */ ++int apfs_transaction_start(struct super_block *sb, struct apfs_max_ops maxops) ++{ ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_vol_transaction *vol_trans = &sbi->s_transaction; ++ struct apfs_nx_transaction *nx_trans = &nxi->nx_transaction; ++ int err; ++ ++ down_write(&nxi->nx_big_sem); ++ mutex_lock(&nxs_mutex); /* Don't mount during a transaction */ ++ ++ if (sb->s_flags & SB_RDONLY) { ++ /* A previous transaction has failed; this should be rare */ ++ mutex_unlock(&nxs_mutex); ++ up_write(&nxi->nx_big_sem); ++ return -EROFS; ++ } ++ ++ /* ++ * Ephemeral objects are read only once, kept in memory, and committed ++ * to disk along with each transaction. ++ */ ++ if (!nxi->nx_eph_list) { ++ err = apfs_read_ephemeral_objects(sb); ++ if (err) { ++ mutex_unlock(&nxs_mutex); ++ up_write(&nxi->nx_big_sem); ++ apfs_err(sb, "failed to read the ephemeral objects"); ++ return err; ++ } ++ } ++ ++ if (nx_trans->t_starts_count == 0) { ++ ++nxi->nx_xid; ++ nxi->nx_raw->nx_next_xid = cpu_to_le64(nxi->nx_xid + 1); ++ ++ INIT_LIST_HEAD(&nx_trans->t_inodes); ++ INIT_LIST_HEAD(&nx_trans->t_buffers); ++ ++ err = apfs_read_spaceman(sb); ++ if (err) { ++ apfs_err(sb, "failed to read the spaceman"); ++ goto fail; ++ } ++ } ++ ++ /* Don't start transactions unless we are sure they fit in disk */ ++ if (!apfs_transaction_has_room(sb, maxops)) { ++ /* Commit what we have so far to flush the queues */ ++ nx_trans->t_state |= APFS_NX_TRANS_FORCE_COMMIT; ++ err = apfs_transaction_commit(sb); ++ if (err) { ++ apfs_err(sb, "commit failed"); ++ goto fail; ++ } ++ return -ENOSPC; ++ } ++ ++ if (!vol_trans->t_old_vsb) { ++ vol_trans->t_old_vsb = sbi->s_vobject.o_bh; ++ get_bh(vol_trans->t_old_vsb); ++ ++ /* Backup the old tree roots; the node struct issues make this ugly */ ++ vol_trans->t_old_cat_root = *sbi->s_cat_root; ++ get_bh(vol_trans->t_old_cat_root.object.o_bh); ++ vol_trans->t_old_omap_root = *sbi->s_omap->omap_root; ++ get_bh(vol_trans->t_old_omap_root.object.o_bh); ++ ++ err = apfs_map_volume_super(sb, true /* write */); ++ if (err) { ++ apfs_err(sb, "CoW failed for volume super"); ++ goto fail; ++ } ++ ++ /* TODO: don't copy these nodes for transactions that don't use them */ ++ err = apfs_read_omap(sb, true /* write */); ++ if (err) { ++ apfs_err(sb, "CoW failed for omap"); ++ goto fail; ++ } ++ err = apfs_read_catalog(sb, true /* write */); ++ if (err) { ++ apfs_err(sb, "Cow failed for catalog"); ++ goto fail; ++ } ++ } ++ ++ nx_trans->t_starts_count++; ++ return 0; ++ ++fail: ++ apfs_transaction_abort(sb); ++ return err; ++} ++ ++/** ++ * apfs_transaction_flush_all_inodes - Flush inode metadata to the buffer heads ++ * @sb: superblock structure ++ * ++ * This messes a lot with the disk layout, so it must be called ahead of time ++ * if we need it to be stable for the rest or the transaction (for example, if ++ * we are setting up a snapshot). ++ */ ++int apfs_transaction_flush_all_inodes(struct super_block *sb) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_nx_transaction *nx_trans = &nxi->nx_transaction; ++ int err = 0, curr_err; ++ ++ ASSERT(!(sb->s_flags & SB_RDONLY)); ++ ++ while (!list_empty(&nx_trans->t_inodes)) { ++ struct apfs_inode_info *ai = NULL; ++ struct inode *inode = NULL; ++ ++ ai = list_first_entry(&nx_trans->t_inodes, struct apfs_inode_info, i_list); ++ inode = &ai->vfs_inode; ++ ++ /* This is a bit wasteful if the inode will get deleted */ ++ curr_err = apfs_update_inode(inode, NULL /* new_name */); ++ if (curr_err) ++ err = curr_err; ++ inode->i_state &= ~I_DIRTY_ALL; ++ ++ /* ++ * The same inode may get dirtied again as soon as we release ++ * the lock, and we don't want to miss that. ++ */ ++ list_del_init(&ai->i_list); ++ ++ nx_trans->t_state |= APFS_NX_TRANS_COMMITTING; ++ mutex_unlock(&nxs_mutex); ++ up_write(&nxi->nx_big_sem); ++ ++ /* Unlocked, so it may call evict() and wait for writeback */ ++ iput(inode); ++ ++ down_write(&nxi->nx_big_sem); ++ mutex_lock(&nxs_mutex); ++ nx_trans->t_state = 0; ++ ++ /* Transaction aborted during writeback, error code is lost */ ++ if (sb->s_flags & SB_RDONLY) { ++ apfs_err(sb, "abort during inode writeback"); ++ return -EROFS; ++ } ++ } ++ ++ return err; ++} ++ ++/** ++ * apfs_write_single_ephemeral_object - Write a single ephemeral object to bh's ++ * @sb: filesystem superblock ++ * @obj_raw: contents of the object ++ * @map: checkpoint mapping for the object, already updated ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_write_single_ephemeral_object(struct super_block *sb, struct apfs_obj_phys *obj_raw, const struct apfs_checkpoint_mapping *map) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct buffer_head *bh = NULL; ++ u64 bno; ++ u32 size; ++ int err, i; ++ ++ bno = le64_to_cpu(map->cpm_paddr); ++ size = le32_to_cpu(map->cpm_size); ++ obj_raw->o_xid = cpu_to_le64(nxi->nx_xid); ++ apfs_multiblock_set_csum((char *)obj_raw, size); ++ ++ for (i = 0; i < size >> sb->s_blocksize_bits; ++i) { ++ bh = apfs_getblk(sb, bno + i); ++ if (!bh) { ++ apfs_err(sb, "failed to map ephemeral block"); ++ return -EIO; ++ } ++ err = apfs_transaction_join(sb, bh); ++ if (err) { ++ brelse(bh); ++ bh = NULL; ++ return err; ++ } ++ memcpy(bh->b_data, (char *)obj_raw + (i << sb->s_blocksize_bits), sb->s_blocksize); ++ brelse(bh); ++ bh = NULL; ++ } ++ return 0; ++} ++ ++/** ++ * apfs_write_ephemeral_objects - Write all ephemeral objects to bh's ++ * @sb: filesystem superblock ++ * ++ * Returns 0 on sucess, or a negative error code in case of failure. ++ */ ++static int apfs_write_ephemeral_objects(struct super_block *sb) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_nx_superblock *raw_sb = nxi->nx_raw; ++ struct apfs_checkpoint_map_phys *cpm = NULL; ++ struct buffer_head *cpm_bh = NULL; ++ struct apfs_ephemeral_object_info *eph_info = NULL; ++ u64 cpm_bno; ++ u64 desc_base, data_base; ++ u32 desc_index, desc_blks, desc_len, desc_next; ++ u32 data_index, data_blks, data_len, data_next; ++ u32 desc_limit, data_limit; ++ u32 obj_blkcnt; ++ int err, i, cpm_start; ++ ++ if (!nxi->nx_eph_list) { ++ apfs_alert(sb, "missing ephemeral object list"); ++ return -EFSCORRUPTED; ++ } ++ ++ desc_next = le32_to_cpu(raw_sb->nx_xp_desc_next); ++ desc_base = le64_to_cpu(raw_sb->nx_xp_desc_base); ++ desc_index = desc_next; /* New checkpoint */ ++ desc_blks = le32_to_cpu(raw_sb->nx_xp_desc_blocks); ++ desc_len = 0; /* For now */ ++ ++ data_next = le32_to_cpu(raw_sb->nx_xp_data_next); ++ data_base = le64_to_cpu(raw_sb->nx_xp_data_base); ++ data_index = data_next; /* New checkpoint */ ++ data_blks = le32_to_cpu(raw_sb->nx_xp_data_blocks); ++ data_len = 0; /* For now */ ++ ++ /* ++ * The reference doesn't mention anything about this, but I need to ++ * put some sort of a limit or else the rings could wrap around and ++ * corrupt themselves. ++ */ ++ desc_limit = desc_blks >> 2; ++ data_limit = data_blks >> 2; ++ ++ for (i = 0; i < nxi->nx_eph_count; ++i) { ++ if (data_len == data_limit) { ++ apfs_err(sb, "too many checkpoint data blocks"); ++ return -EFSCORRUPTED; ++ } ++ ++ if (!cpm) { ++ cpm_start = i; ++ if (desc_len == desc_limit) { ++ apfs_err(sb, "too many checkpoint descriptor blocks"); ++ return -EFSCORRUPTED; ++ } ++ cpm_bno = desc_base + (desc_index + desc_len) % desc_blks; ++ err = apfs_create_cpm_block(sb, cpm_bno, &cpm_bh); ++ if (err) { ++ apfs_err(sb, "failed to create cpm block"); ++ return err; ++ } ++ cpm = (void *)cpm_bh->b_data; ++ desc_len += 1; ++ } ++ ++ eph_info = &nxi->nx_eph_list[i]; ++ data_next = (data_index + data_len) % data_blks; ++ obj_blkcnt = eph_info->size >> sb->s_blocksize_bits; ++ if (obj_blkcnt > data_blks - data_next) { ++ /* ++ * This multiblock object does not fit in what's left ++ * of the ring buffer, so move it to the beginning and ++ * leave some empty blocks. ++ */ ++ data_len += data_blks - data_next; ++ data_next = 0; ++ } ++ ++ err = apfs_create_cpoint_map(sb, cpm, eph_info->object, data_base + data_next, eph_info->size); ++ if (err) { ++ if (err == -ENOSPC) ++ cpm->cpm_flags = 0; /* No longer the last */ ++ brelse(cpm_bh); ++ cpm = NULL; ++ cpm_bh = NULL; ++ if (err == -ENOSPC) { ++ --i; ++ continue; ++ } ++ apfs_err(sb, "failed to create cpm map %d", i); ++ return err; ++ } ++ err = apfs_write_single_ephemeral_object(sb, eph_info->object, &cpm->cpm_map[i - cpm_start]); ++ if (err) { ++ brelse(cpm_bh); ++ cpm = NULL; ++ cpm_bh = NULL; ++ apfs_err(sb, "failed to write ephemeral object %d", i); ++ return err; ++ } ++ data_len += obj_blkcnt; ++ } ++ ++ /* ++ * The checkpoint superblock can't be set until the very end of the ++ * transaction commit, but allocate its block here already. ++ */ ++ nxi->nx_bno = desc_base + (desc_index + desc_len) % desc_blks; ++ desc_len += 1; ++ ++ desc_next = (desc_index + desc_len) % desc_blks; ++ data_next = (data_index + data_len) % data_blks; ++ ++ raw_sb->nx_xp_desc_next = cpu_to_le32(desc_next); ++ raw_sb->nx_xp_desc_index = cpu_to_le32(desc_index); ++ raw_sb->nx_xp_desc_len = cpu_to_le32(desc_len); ++ ++ raw_sb->nx_xp_data_next = cpu_to_le32(data_next); ++ raw_sb->nx_xp_data_index = cpu_to_le32(data_index); ++ raw_sb->nx_xp_data_len = cpu_to_le32(data_len); ++ ++ return 0; ++} ++ ++/** ++ * apfs_transaction_commit_nx - Definitely commit the current transaction ++ * @sb: superblock structure ++ */ ++static int apfs_transaction_commit_nx(struct super_block *sb) ++{ ++ struct apfs_spaceman *sm = APFS_SM(sb); ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_sb_info *sbi; ++ struct apfs_nx_transaction *nx_trans = &nxi->nx_transaction; ++ struct apfs_bh_info *bhi, *tmp; ++ int err = 0; ++ u32 bmap_idx; ++ ++ ASSERT(!(sb->s_flags & SB_RDONLY)); ++ ++ /* Before committing the bhs, write all inode metadata to them */ ++ err = apfs_transaction_flush_all_inodes(sb); ++ if (err) { ++ apfs_err(sb, "failed to flush all inodes"); ++ return err; ++ } ++ ++ /* ++ * Now that nothing else will be freed, flush the last update to the ++ * free queues so that it can be committed to disk along with all the ++ * ephemeral objects. ++ */ ++ if (sm->sm_free_cache_base) { ++ err = apfs_free_queue_insert_nocache(sb, sm->sm_free_cache_base, sm->sm_free_cache_blkcnt); ++ if (err) { ++ apfs_err(sb, "fq cache flush failed (0x%llx-0x%llx)", sm->sm_free_cache_base, sm->sm_free_cache_blkcnt); ++ return err; ++ } ++ sm->sm_free_cache_base = sm->sm_free_cache_blkcnt = 0; ++ } ++ err = apfs_write_ephemeral_objects(sb); ++ if (err) ++ return err; ++ ++ list_for_each_entry(bhi, &nx_trans->t_buffers, list) { ++ struct buffer_head *bh = bhi->bh; ++ ++ ASSERT(buffer_trans(bh)); ++ ++ if (buffer_csum(bh)) ++ apfs_obj_set_csum(sb, (void *)bh->b_data); ++ ++ clear_buffer_dirty(bh); ++ get_bh(bh); ++ lock_buffer(bh); ++ bh->b_end_io = end_buffer_write_sync; ++ apfs_submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); ++ } ++ list_for_each_entry_safe(bhi, tmp, &nx_trans->t_buffers, list) { ++ struct buffer_head *bh = bhi->bh; ++ struct page *page = NULL; ++ bool is_metadata; ++ ++ ASSERT(buffer_trans(bh)); ++ ++ wait_on_buffer(bh); ++ if (!buffer_uptodate(bh)) { ++ apfs_err(sb, "failed to write some blocks"); ++ return -EIO; ++ } ++ ++ list_del(&bhi->list); ++ clear_buffer_trans(bh); ++ nx_trans->t_buffers_count--; ++ ++ bh->b_private = NULL; ++ bhi->bh = NULL; ++ kfree(bhi); ++ bhi = NULL; ++ ++ page = bh->b_page; ++ get_page(page); ++ ++ is_metadata = buffer_csum(bh); ++ clear_buffer_csum(bh); ++ put_bh(bh); ++ bh = NULL; ++ ++ /* Future writes to mmapped areas should fault for CoW */ ++ lock_page(page); ++ page_mkclean(page); ++ /* XXX: otherwise, the page cache fills up and crashes the machine */ ++ if (!is_metadata) { ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 19, 0) ++ try_to_free_buffers(page_folio(page)); ++#else ++ try_to_free_buffers(page); ++#endif ++ } ++ unlock_page(page); ++ put_page(page); ++ } ++ err = apfs_checkpoint_end(sb); ++ if (err) { ++ apfs_err(sb, "failed to end the checkpoint"); ++ return err; ++ } ++ ++ list_for_each_entry(sbi, &nxi->vol_list, list) { ++ struct apfs_vol_transaction *vol_trans = &sbi->s_transaction; ++ ++ if (!vol_trans->t_old_vsb) ++ continue; ++ ++ brelse(vol_trans->t_old_vsb); ++ vol_trans->t_old_vsb = NULL; ++ ++ /* XXX: forget the buffers for the b-tree roots */ ++ vol_trans->t_old_omap_root.object.data = NULL; ++ brelse(vol_trans->t_old_omap_root.object.o_bh); ++ vol_trans->t_old_omap_root.object.o_bh = NULL; ++ vol_trans->t_old_cat_root.object.data = NULL; ++ brelse(vol_trans->t_old_cat_root.object.o_bh); ++ vol_trans->t_old_cat_root.object.o_bh = NULL; ++ } ++ ++ for (bmap_idx = 0; bmap_idx < APFS_SM(sb)->sm_ip_bmaps_count; ++bmap_idx) { ++ brelse(APFS_SM(sb)->sm_ip_bmaps[bmap_idx]); ++ APFS_SM(sb)->sm_ip_bmaps[bmap_idx] = NULL; ++ } ++ APFS_SM(sb)->sm_raw = NULL; ++ ++ nx_trans->t_starts_count = 0; ++ nx_trans->t_buffers_count = 0; ++ return 0; ++} ++ ++/** ++ * apfs_transaction_need_commit - Evaluate if a commit is required ++ * @sb: superblock structure ++ */ ++static bool apfs_transaction_need_commit(struct super_block *sb) ++{ ++ struct apfs_spaceman *sm = APFS_SM(sb); ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_nx_transaction *nx_trans = &nxi->nx_transaction; ++ ++ if (nx_trans->t_state & APFS_NX_TRANS_DEFER_COMMIT) { ++ nx_trans->t_state &= ~APFS_NX_TRANS_DEFER_COMMIT; ++ return false; ++ } ++ ++ /* Avoid nested commits on inode writeback */ ++ if (nx_trans->t_state & APFS_NX_TRANS_COMMITTING) ++ return false; ++ ++ if (nx_trans->t_state & APFS_NX_TRANS_FORCE_COMMIT) { ++ nx_trans->t_state = 0; ++ return true; ++ } ++ ++ if (sm) { ++ struct apfs_spaceman_phys *sm_raw = sm->sm_raw; ++ struct apfs_spaceman_free_queue *fq_ip = &sm_raw->sm_fq[APFS_SFQ_IP]; ++ struct apfs_spaceman_free_queue *fq_main = &sm_raw->sm_fq[APFS_SFQ_MAIN]; ++ int buffers_max = APFS_SB(sb)->s_trans_buffers_max; ++ int starts_max = TRANSACTION_STARTS_MAX; ++ int mq_max = TRANSACTION_MAIN_QUEUE_MAX; ++ ++ /* ++ * Try to avoid committing halfway through a data block write, ++ * otherwise the block will be put through copy-on-write again, ++ * causing unnecessary fragmentation. ++ */ ++ if (nx_trans->t_state & APFS_NX_TRANS_INCOMPLETE_BLOCK) { ++ buffers_max += 50; ++ starts_max += 50; ++ mq_max += 20; ++ } ++ ++ if (nx_trans->t_buffers_count > buffers_max) ++ return true; ++ if (nx_trans->t_starts_count > starts_max) ++ return true; ++ ++ /* ++ * The internal pool has enough blocks to map the container ++ * exactly 3 times. Don't allow large transactions if we can't ++ * be sure the bitmap changes will all fit. ++ */ ++ if (le64_to_cpu(fq_ip->sfq_count) * 3 > le64_to_cpu(sm_raw->sm_ip_block_count)) ++ return true; ++ ++ /* Don't let the main queue get too full either */ ++ if (le64_to_cpu(fq_main->sfq_count) > mq_max) ++ return true; ++ } ++ ++ return false; ++} ++ ++/** ++ * apfs_transaction_commit - Possibly commit the current transaction ++ * @sb: superblock structure ++ * ++ * On success returns 0 and releases the big filesystem lock. On failure, ++ * returns a negative error code, and the caller is responsibly for aborting ++ * the transaction. ++ */ ++int apfs_transaction_commit(struct super_block *sb) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ int err = 0; ++ ++ if (apfs_transaction_need_commit(sb)) { ++ err = apfs_transaction_commit_nx(sb); ++ if (err) { ++ apfs_err(sb, "transaction commit failed"); ++ return err; ++ } ++ } ++ ++ mutex_unlock(&nxs_mutex); ++ up_write(&nxi->nx_big_sem); ++ return 0; ++} ++ ++/** ++ * apfs_inode_join_transaction - Add an inode to the current transaction ++ * @sb: superblock structure ++ * @inode: vfs inode to add ++ */ ++void apfs_inode_join_transaction(struct super_block *sb, struct inode *inode) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_nx_transaction *nx_trans = &nxi->nx_transaction; ++ struct apfs_inode_info *ai = APFS_I(inode); ++ ++ ASSERT(!(sb->s_flags & SB_RDONLY)); ++ lockdep_assert_held_write(&nxi->nx_big_sem); ++ ++ if (!list_empty(&ai->i_list)) /* Already in the transaction */ ++ return; ++ ++ ihold(inode); ++ list_add(&ai->i_list, &nx_trans->t_inodes); ++} ++ ++/** ++ * apfs_transaction_join - Add a buffer head to the current transaction ++ * @sb: superblock structure ++ * @bh: the buffer head ++ */ ++int apfs_transaction_join(struct super_block *sb, struct buffer_head *bh) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_nx_transaction *nx_trans = &nxi->nx_transaction; ++ struct apfs_bh_info *bhi; ++ ++ ASSERT(!(sb->s_flags & SB_RDONLY)); ++ lockdep_assert_held_write(&nxi->nx_big_sem); ++ ++ if (buffer_trans(bh)) /* Already part of the only transaction */ ++ return 0; ++ ++ /* TODO: use a slab cache */ ++ bhi = kzalloc(sizeof(*bhi), GFP_NOFS); ++ if (!bhi) ++ return -ENOMEM; ++ get_bh(bh); ++ bhi->bh = bh; ++ list_add(&bhi->list, &nx_trans->t_buffers); ++ nx_trans->t_buffers_count++; ++ ++ set_buffer_trans(bh); ++ bh->b_private = bhi; ++ return 0; ++} ++ ++/** ++ * apfs_force_readonly - Set the whole container as read-only ++ * @nxi: container superblock info ++ */ ++static void apfs_force_readonly(struct apfs_nxsb_info *nxi) ++{ ++ struct apfs_sb_info *sbi = NULL; ++ struct super_block *sb = NULL; ++ ++ list_for_each_entry(sbi, &nxi->vol_list, list) { ++ sb = sbi->s_vobject.sb; ++ sb->s_flags |= SB_RDONLY; ++ } ++ nxi->nx_flags &= ~APFS_READWRITE; ++} ++ ++/** ++ * apfs_transaction_abort - Abort the current transaction ++ * @sb: superblock structure ++ * ++ * Releases the big filesystem lock and clears the in-memory transaction data; ++ * the on-disk changes are irrelevant because the superblock checksum hasn't ++ * been written yet. Leaves the filesystem in read-only state. ++ */ ++void apfs_transaction_abort(struct super_block *sb) ++{ ++ struct apfs_sb_info *sbi; ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_nx_transaction *nx_trans = &nxi->nx_transaction; ++ struct apfs_bh_info *bhi, *tmp; ++ struct apfs_inode_info *ai, *ai_tmp; ++ struct apfs_spaceman *sm = NULL; ++ u32 bmap_idx; ++ ++ if (sb->s_flags & SB_RDONLY) { ++ /* Transaction already aborted, do nothing */ ++ ASSERT(list_empty(&nx_trans->t_inodes)); ++ ASSERT(list_empty(&nx_trans->t_buffers)); ++ mutex_unlock(&nxs_mutex); ++ up_write(&nxi->nx_big_sem); ++ return; ++ } ++ ++ nx_trans->t_state = 0; ++ apfs_err(sb, "aborting transaction"); ++ ++ --nxi->nx_xid; ++ list_for_each_entry_safe(bhi, tmp, &nx_trans->t_buffers, list) { ++ struct buffer_head *bh = bhi->bh; ++ ++ bh->b_private = NULL; ++ clear_buffer_dirty(bh); ++ clear_buffer_trans(bh); ++ clear_buffer_csum(bh); ++ brelse(bh); ++ bhi->bh = NULL; ++ ++ list_del(&bhi->list); ++ kfree(bhi); ++ } ++ ++ /* ++ * TODO: get rid of all this stuff, it makes little sense. Maybe do an ++ * actual read-only remount? ++ */ ++ list_for_each_entry(sbi, &nxi->vol_list, list) { ++ struct apfs_vol_transaction *vol_trans = &sbi->s_transaction; ++ ++ if (!vol_trans->t_old_vsb) ++ continue; ++ ++ /* Restore volume state for all volumes */ ++ brelse(sbi->s_vobject.o_bh); ++ sbi->s_vobject.o_bh = vol_trans->t_old_vsb; ++ sbi->s_vobject.data = sbi->s_vobject.o_bh->b_data; ++ sbi->s_vobject.block_nr = vol_trans->t_old_vsb->b_blocknr; ++ sbi->s_vsb_raw = (void *)vol_trans->t_old_vsb->b_data; ++ vol_trans->t_old_vsb = NULL; ++ ++ /* XXX: restore the old b-tree root nodes */ ++ brelse(sbi->s_omap->omap_root->object.o_bh); ++ *(sbi->s_omap->omap_root) = vol_trans->t_old_omap_root; ++ vol_trans->t_old_omap_root.object.o_bh = NULL; ++ vol_trans->t_old_omap_root.object.data = NULL; ++ brelse(sbi->s_cat_root->object.o_bh); ++ *(sbi->s_cat_root) = vol_trans->t_old_cat_root; ++ vol_trans->t_old_cat_root.object.o_bh = NULL; ++ vol_trans->t_old_cat_root.object.data = NULL; ++ } ++ ++ sm = APFS_SM(sb); ++ if (sm) { ++ for (bmap_idx = 0; bmap_idx < sm->sm_ip_bmaps_count; ++bmap_idx) { ++ brelse(sm->sm_ip_bmaps[bmap_idx]); ++ sm->sm_ip_bmaps[bmap_idx] = NULL; ++ } ++ APFS_SM(sb)->sm_raw = NULL; ++ } ++ ++ /* ++ * It's not possible to undo in-memory changes from old operations in ++ * the aborted transaction. To avoid corruption, never write again. ++ */ ++ apfs_force_readonly(nxi); ++ ++ mutex_unlock(&nxs_mutex); ++ up_write(&nxi->nx_big_sem); ++ ++ list_for_each_entry_safe(ai, ai_tmp, &nx_trans->t_inodes, i_list) { ++ list_del_init(&ai->i_list); ++ iput(&ai->vfs_inode); ++ } ++} +diff --git a/fs/apfs/unicode.c b/fs/apfs/unicode.c +new file mode 100644 +index 000000000..d17064e1f +--- /dev/null ++++ b/fs/apfs/unicode.c +@@ -0,0 +1,3157 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (C) 2018 Ernesto A. Fernández ++ * ++ * Routines and data for the normalization of unicode strings. ++ * Somewhat based on linux/fs/hfsplus/unicode.c ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include "unicode.h" ++ ++#define MIN(X, Y) ((X) <= (Y) ? (X) : (Y)) ++ ++/* The arrays of unicode data are defined at the bottom of the file */ ++/* TODO: would a single trie with all the data be more efficient? */ ++static u16 apfs_nfd_trie[]; ++static unicode_t apfs_nfd[]; ++static u16 apfs_cf_trie[]; ++static unicode_t apfs_cf[]; ++static u8 apfs_ccc_trie[]; ++ ++#define TRIE_HEIGHT 5 ++ ++/* A trie node has one child for each possible nibble in the key */ ++#define TRIE_CHILD_SHIFT 4 ++#define TRIE_CHILD_MASK ((1 << TRIE_CHILD_SHIFT) - 1) ++ ++/* A trie value length is stored in the last three bits of its position */ ++#define TRIE_POS_SHIFT 3 ++#define TRIE_SIZE_MASK ((1 << TRIE_POS_SHIFT) - 1) ++ ++/** ++ * apfs_trie_find - Look up a trie value ++ * @trie: trie to search ++ * @key: search key (a unicode character) ++ * @result: on return, this either holds the value (on a ccc lookup) or its ++ * position in the value array (on a cf or nfd lookup). ++ * @is_ccc: true if this a ccc (canonical combining class) lookup ++ * ++ * Returns the length of the value (0 if it doesn't exist). ++ */ ++static int apfs_trie_find(void *trie, unicode_t key, void *result, bool is_ccc) ++{ ++ int node = 0; ++ int h; ++ ++ for (h = TRIE_HEIGHT - 1; h >= 0; --h) { ++ int child = (key >> (TRIE_CHILD_SHIFT * h)) & TRIE_CHILD_MASK; ++ int child_index = (node << TRIE_CHILD_SHIFT) + child; ++ ++ if (is_ccc) ++ node = ((u8 *)trie)[child_index]; ++ else ++ node = ((u16 *)trie)[child_index]; ++ ++ if (node == 0) { ++ *(u8 *)result = 0; ++ return 0; ++ } ++ } ++ ++ if (is_ccc) { ++ /* ccc values fit in one byte, so no need for a value array */ ++ *(u8 *)result = node; ++ return 1; ++ } ++ ++ *(u16 *)result = node >> TRIE_POS_SHIFT; ++ return node & TRIE_SIZE_MASK; ++} ++ ++/** ++ * apfs_init_unicursor - Initialize an apfs_unicursor structure ++ * @cursor: cursor to initialize ++ * @utf8str: string to normalize ++ * @total_len: length of the string ++ */ ++void apfs_init_unicursor(struct apfs_unicursor *cursor, const char *utf8str, unsigned int total_len) ++{ ++ cursor->utf8curr = utf8str; ++ cursor->total_len = total_len; ++ cursor->length = -1; ++ cursor->last_pos = -1; ++ cursor->last_ccc = 0; ++} ++ ++#define HANGUL_S_BASE 0xac00 ++#define HANGUL_L_BASE 0x1100 ++#define HANGUL_V_BASE 0x1161 ++#define HANGUL_T_BASE 0x11a7 ++#define HANGUL_L_COUNT 19 ++#define HANGUL_V_COUNT 21 ++#define HANGUL_T_COUNT 28 ++#define HANGUL_N_COUNT (HANGUL_V_COUNT * HANGUL_T_COUNT) ++#define HANGUL_S_COUNT (HANGUL_L_COUNT * HANGUL_N_COUNT) ++ ++/** ++ * apfs_is_precomposed_hangul - Check if a character is a Hangul syllable ++ * @utf32char: character to check ++ * ++ * This function was adapted from sample code in section 3.12 of the ++ * Unicode Standard, version 9.0. ++ * ++ * Copyright (C) 1991-2018 Unicode, Inc. All rights reserved. Distributed ++ * under the Terms of Use in http://www.unicode.org/copyright.html. ++ */ ++static bool apfs_is_precomposed_hangul(unicode_t utf32char) ++{ ++ int index; ++ ++ index = utf32char - HANGUL_S_BASE; ++ return (index >= 0 && index < HANGUL_S_COUNT); ++} ++ ++/* Signals the end of the normalization for a single character */ ++#define NORM_END (unicode_t)(-1) ++ ++/** ++ * apfs_decompose_hangul - Decompose a Hangul syllable ++ * @utf32char: Hangul syllable to decompose ++ * @off: offset of the wanted character from the decomposition ++ * ++ * Returns the single character at offset @off in the decomposition of ++ * @utf32char, or NORM_END if this offset is past the end. ++ * ++ * This function was adapted from sample code in section 3.12 of the ++ * Unicode Standard, version 9.0. ++ * ++ * Copyright (C) 1991-2018 Unicode, Inc. All rights reserved. Distributed ++ * under the Terms of Use in http://www.unicode.org/copyright.html. ++ */ ++static unicode_t apfs_decompose_hangul(unicode_t utf32char, int off) ++{ ++ int index; ++ int l, v, t; ++ ++ index = utf32char - HANGUL_S_BASE; ++ ++ l = HANGUL_L_BASE + index / HANGUL_N_COUNT; ++ if (off == 0) ++ return l; ++ ++ v = HANGUL_V_BASE + (index % HANGUL_N_COUNT) / HANGUL_T_COUNT; ++ if (off == 1) ++ return v; ++ ++ t = HANGUL_T_BASE + index % HANGUL_T_COUNT; ++ if (off == 2 && t != HANGUL_T_BASE) ++ return t; ++ ++ return NORM_END; ++} ++ ++/** ++ * apfs_normalize_char - Normalize a unicode character ++ * @utf32char: character to normalize ++ * @off: offset of the wanted character from the normalization ++ * @case_fold: case fold the char? ++ * ++ * Returns the single character at offset @off in the normalization of ++ * @utf32char, or NORM_END if this offset is past the end. ++ */ ++static unicode_t apfs_normalize_char(unicode_t utf32char, int off, ++ bool case_fold) ++{ ++ int nfd_len; ++ unicode_t *nfd, *cf; ++ u16 pos; ++ int ret; ++ ++ if (apfs_is_precomposed_hangul(utf32char)) /* Hangul has no case */ ++ return apfs_decompose_hangul(utf32char, off); ++ ++ ret = apfs_trie_find(apfs_nfd_trie, utf32char, ++ &pos, false /* is_ccc */); ++ if (!ret) { ++ /* The decomposition is just the same character */ ++ nfd_len = 1; ++ nfd = &utf32char; ++ } else { ++ nfd_len = ret; ++ nfd = &apfs_nfd[pos]; ++ } ++ ++ if (!case_fold) { ++ if (off < nfd_len) ++ return nfd[off]; ++ return NORM_END; ++ } ++ ++ for (; nfd_len > 0; nfd++, nfd_len--) { ++ int cf_len; ++ ++ ret = apfs_trie_find(apfs_cf_trie, *nfd, ++ &pos, false /* is_ccc */); ++ if (!ret) { ++ /* The case folding is just the same character */ ++ cf_len = 1; ++ cf = nfd; ++ } else { ++ cf_len = ret; ++ cf = &apfs_cf[pos]; ++ } ++ ++ if (off < cf_len) ++ return cf[off]; ++ off -= cf_len; ++ } ++ ++ return NORM_END; ++} ++ ++/** ++ * apfs_get_normalization_length - Count the characters until the next starter ++ * @utf8str: string to normalize, may begin with several starters ++ * @total_len: length of the string to normalize ++ * @case_fold: true if the count should consider case folding ++ * ++ * Returns the number of unicode characters in the normalization of the ++ * substring that begins at @utf8str and ends at the first nonconsecutive ++ * starter. Or 0 if the substring has invalid UTF-8. ++ */ ++static int apfs_get_normalization_length(const char *utf8str, unsigned int total_len, bool case_fold) ++{ ++ int utf8len, pos, norm_len = 0; ++ bool starters_over = false; ++ unicode_t utf32char; ++ ++ while (1) { ++ if (!total_len || !*utf8str) ++ return norm_len; ++ utf8len = utf8_to_utf32(utf8str, MIN(total_len, 4), &utf32char); ++ if (utf8len < 0) /* Invalid unicode; don't normalize anything */ ++ return 0; ++ ++ for (pos = 0;; pos++, norm_len++) { ++ unicode_t utf32norm; ++ u8 ccc; ++ ++ utf32norm = apfs_normalize_char(utf32char, pos, ++ case_fold); ++ if (utf32norm == NORM_END) ++ break; ++ ++ apfs_trie_find(apfs_ccc_trie, utf32norm, &ccc, ++ true /* is_ccc */); ++ ++ if (ccc != 0) ++ starters_over = true; ++ else if (starters_over) /* Reached the next starter */ ++ return norm_len; ++ } ++ utf8str += utf8len; ++ total_len -= utf8len; ++ } ++} ++ ++/** ++ * apfs_normalize_next - Return the next normalized character from a string ++ * @cursor: unicode cursor for the string ++ * @case_fold: case fold the string? ++ * ++ * Sets @cursor->length to the length of the normalized substring between ++ * @cursor->utf8curr and the first nonconsecutive starter. Returns a single ++ * normalized character, setting @cursor->last_ccc and @cursor->last_pos to ++ * its CCC and position in the substring. When the end of the substring is ++ * reached, updates @cursor->utf8curr to point to the beginning of the next ++ * one. ++ * ++ * Returns 0 if the substring has invalid UTF-8. ++ */ ++unicode_t apfs_normalize_next(struct apfs_unicursor *cursor, bool case_fold) ++{ ++ const char *utf8str = cursor->utf8curr; ++ unsigned int total_len = cursor->total_len; ++ int str_pos, min_pos = -1; ++ unicode_t utf32min = 0; ++ u8 min_ccc; ++ ++new_starter: ++ if (likely(isascii(*utf8str))) { ++ if (!total_len) ++ return 0; ++ cursor->utf8curr = utf8str + 1; ++ cursor->total_len = total_len - 1; ++ if (case_fold) ++ return tolower(*utf8str); ++ return *utf8str; ++ } ++ ++ if (cursor->length < 0) { ++ cursor->length = apfs_get_normalization_length(utf8str, total_len, case_fold); ++ if (cursor->length == 0) ++ return 0; ++ } ++ ++ str_pos = 0; ++ min_ccc = 0xFF; /* Above all possible ccc's */ ++ ++ while (1) { ++ unicode_t utf32char; ++ int utf8len, pos; ++ ++ utf8len = utf8_to_utf32(utf8str, MIN(total_len, 4), &utf32char); ++ for (pos = 0;; pos++, str_pos++) { ++ unicode_t utf32norm; ++ u8 ccc; ++ ++ utf32norm = apfs_normalize_char(utf32char, pos, ++ case_fold); ++ if (utf32norm == NORM_END) ++ break; ++ ++ apfs_trie_find(apfs_ccc_trie, utf32norm, &ccc, ++ true /* is_ccc */); ++ ++ if (ccc >= min_ccc || ccc < cursor->last_ccc) ++ continue; ++ if (ccc > cursor->last_ccc || ++ str_pos > cursor->last_pos) { ++ utf32min = utf32norm; ++ min_ccc = ccc; ++ min_pos = str_pos; ++ } ++ } ++ ++ utf8str += utf8len; ++ total_len -= utf8len; ++ if (str_pos == cursor->length) { ++ /* Reached the following starter */ ++ if (min_ccc != 0xFF) { ++ /* Not done with this substring yet */ ++ cursor->last_ccc = min_ccc; ++ cursor->last_pos = min_pos; ++ return utf32min; ++ } ++ /* Continue from the next starter */ ++ apfs_init_unicursor(cursor, utf8str, total_len); ++ goto new_starter; ++ } ++ } ++} ++ ++/* ++ * The following arrays were built with data provided by the Unicode Standard, ++ * version 9.0. ++ * ++ * Copyright (C) 1991-2018 Unicode, Inc. All rights reserved. Distributed ++ * under the Terms of Use in http://www.unicode.org/copyright.html. ++ */ ++ ++static u16 apfs_nfd_trie[] = { ++ /* Node for range 0x_____ */ ++ 0x0001, 0x0002, 0x0003, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0____ */ ++ 0x0004, 0x0005, 0x0006, 0x0007, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0008, ++ /* Node for range 0x1____ */ ++ 0x0000, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x000a, 0x0000, 0x0000, ++ /* Node for range 0x2____ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x000b, ++ /* Node for range 0x00___ */ ++ 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0000, 0x0011, 0x0000, ++ 0x0000, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0000, 0x0017, ++ /* Node for range 0x01___ */ ++ 0x0018, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0019, 0x0000, 0x0000, 0x001a, 0x001b, ++ /* Node for range 0x02___ */ ++ 0x001c, 0x001d, 0x001e, 0x001f, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0020, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x03___ */ ++ 0x0021, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0f___ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0022, 0x0023, 0x0024, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x11___ */ ++ 0x0025, 0x0026, 0x0000, 0x0027, 0x0028, 0x0029, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x1d___ */ ++ 0x0000, 0x002a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x2f___ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x002b, 0x002c, 0x002d, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x000__ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x002e, 0x002f, 0x0030, 0x0031, ++ /* Node for range 0x001__ */ ++ 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, ++ 0x0000, 0x0000, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, ++ /* Node for range 0x002__ */ ++ 0x0040, 0x0041, 0x0042, 0x0043, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x003__ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0044, 0x0000, 0x0000, 0x0045, ++ 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x0000, 0x0000, ++ /* Node for range 0x004__ */ ++ 0x004c, 0x004d, 0x0000, 0x004e, 0x0000, 0x004f, 0x0000, 0x0050, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0051, 0x0052, 0x0053, 0x0054, ++ /* Node for range 0x006__ */ ++ 0x0000, 0x0000, 0x0055, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0056, 0x0057, 0x0000, 0x0000, ++ /* Node for range 0x009__ */ ++ 0x0000, 0x0000, 0x0058, 0x0059, 0x0000, 0x005a, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x005b, 0x005c, 0x0000, 0x0000, ++ /* Node for range 0x00a__ */ ++ 0x0000, 0x0000, 0x0000, 0x005d, 0x0000, 0x005e, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x00b__ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x005f, 0x0060, 0x0000, 0x0000, ++ 0x0000, 0x0061, 0x0000, 0x0000, 0x0062, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x00c__ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0063, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0064, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x00d__ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0065, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0066, 0x0000, 0x0000, ++ /* Node for range 0x00f__ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0067, 0x0068, 0x0069, 0x006a, ++ 0x006b, 0x006c, 0x006d, 0x006e, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x010__ */ ++ 0x0000, 0x0000, 0x006f, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x01b__ */ ++ 0x0070, 0x0071, 0x0000, 0x0072, 0x0073, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x01e__ */ ++ 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, ++ 0x007c, 0x007d, 0x007e, 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, ++ /* Node for range 0x01f__ */ ++ 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008a, 0x008b, ++ 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092, 0x0093, ++ /* Node for range 0x020__ */ ++ 0x0094, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x021__ */ ++ 0x0000, 0x0000, 0x0095, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0096, 0x0097, 0x0000, 0x0098, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x022__ */ ++ 0x0099, 0x0000, 0x009a, 0x0000, 0x009b, 0x0000, 0x009c, 0x009d, ++ 0x009e, 0x0000, 0x009f, 0x0000, 0x0000, 0x0000, 0x00a0, 0x0000, ++ /* Node for range 0x023__ */ ++ 0x0000, 0x0000, 0x00a1, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x02a__ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00a2, 0x0000, 0x0000, ++ /* Node for range 0x030__ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x00a3, 0x00a4, 0x00a5, 0x00a6, ++ 0x0000, 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x0000, 0x00ac, ++ /* Node for range 0x0f9__ */ ++ 0x00ad, 0x00ae, 0x00af, 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, ++ 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, ++ /* Node for range 0x0fa__ */ ++ 0x00bd, 0x00be, 0x00bf, 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, ++ 0x00c5, 0x00c6, 0x00c7, 0x00c8, 0x00c9, 0x00ca, 0x0000, 0x0000, ++ /* Node for range 0x0fb__ */ ++ 0x0000, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x110__ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x00cf, 0x00d0, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x111__ */ ++ 0x0000, 0x0000, 0x00d1, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x113__ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x00d2, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x114__ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x00d3, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x115__ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x00d4, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x1d1__ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00d5, 0x00d6, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x00d7, 0x00d8, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x2f8__ */ ++ 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 0x00e0, ++ 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, ++ /* Node for range 0x2f9__ */ ++ 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 0x00f0, ++ 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, ++ /* Node for range 0x2fa__ */ ++ 0x00f9, 0x00fa, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x000c_ */ ++ 0x0002, 0x0012, 0x0022, 0x0032, 0x0042, 0x0052, 0x0000, 0x0062, ++ 0x0072, 0x0082, 0x0092, 0x00a2, 0x00b2, 0x00c2, 0x00d2, 0x00e2, ++ /* Node for range 0x000d_ */ ++ 0x0000, 0x00f2, 0x0102, 0x0112, 0x0122, 0x0132, 0x0142, 0x0000, ++ 0x0000, 0x0152, 0x0162, 0x0172, 0x0182, 0x0192, 0x0000, 0x0000, ++ /* Node for range 0x000e_ */ ++ 0x01a2, 0x01b2, 0x01c2, 0x01d2, 0x01e2, 0x01f2, 0x0000, 0x0202, ++ 0x0212, 0x0222, 0x0232, 0x0242, 0x0252, 0x0262, 0x0272, 0x0282, ++ /* Node for range 0x000f_ */ ++ 0x0000, 0x0292, 0x02a2, 0x02b2, 0x02c2, 0x02d2, 0x02e2, 0x0000, ++ 0x0000, 0x02f2, 0x0302, 0x0312, 0x0322, 0x0332, 0x0000, 0x0342, ++ /* Node for range 0x0010_ */ ++ 0x0352, 0x0362, 0x0372, 0x0382, 0x0392, 0x03a2, 0x03b2, 0x03c2, ++ 0x03d2, 0x03e2, 0x03f2, 0x0402, 0x0412, 0x0422, 0x0432, 0x0442, ++ /* Node for range 0x0011_ */ ++ 0x0000, 0x0000, 0x0452, 0x0462, 0x0472, 0x0482, 0x0492, 0x04a2, ++ 0x04b2, 0x04c2, 0x04d2, 0x04e2, 0x04f2, 0x0502, 0x0512, 0x0522, ++ /* Node for range 0x0012_ */ ++ 0x0532, 0x0542, 0x0552, 0x0562, 0x0572, 0x0582, 0x0000, 0x0000, ++ 0x0592, 0x05a2, 0x05b2, 0x05c2, 0x05d2, 0x05e2, 0x05f2, 0x0602, ++ /* Node for range 0x0013_ */ ++ 0x0612, 0x0000, 0x0000, 0x0000, 0x0622, 0x0632, 0x0642, 0x0652, ++ 0x0000, 0x0662, 0x0672, 0x0682, 0x0692, 0x06a2, 0x06b2, 0x0000, ++ /* Node for range 0x0014_ */ ++ 0x0000, 0x0000, 0x0000, 0x06c2, 0x06d2, 0x06e2, 0x06f2, 0x0702, ++ 0x0712, 0x0000, 0x0000, 0x0000, 0x0722, 0x0732, 0x0742, 0x0752, ++ /* Node for range 0x0015_ */ ++ 0x0762, 0x0772, 0x0000, 0x0000, 0x0782, 0x0792, 0x07a2, 0x07b2, ++ 0x07c2, 0x07d2, 0x07e2, 0x07f2, 0x0802, 0x0812, 0x0822, 0x0832, ++ /* Node for range 0x0016_ */ ++ 0x0842, 0x0852, 0x0862, 0x0872, 0x0882, 0x0892, 0x0000, 0x0000, ++ 0x08a2, 0x08b2, 0x08c2, 0x08d2, 0x08e2, 0x08f2, 0x0902, 0x0912, ++ /* Node for range 0x0017_ */ ++ 0x0922, 0x0932, 0x0942, 0x0952, 0x0962, 0x0972, 0x0982, 0x0992, ++ 0x09a2, 0x09b2, 0x09c2, 0x09d2, 0x09e2, 0x09f2, 0x0a02, 0x0000, ++ /* Node for range 0x001a_ */ ++ 0x0a12, 0x0a22, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0a32, ++ /* Node for range 0x001b_ */ ++ 0x0a42, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x001c_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0a52, 0x0a62, 0x0a72, ++ /* Node for range 0x001d_ */ ++ 0x0a82, 0x0a92, 0x0aa2, 0x0ab2, 0x0ac2, 0x0ad3, 0x0aeb, 0x0b03, ++ 0x0b1b, 0x0b33, 0x0b4b, 0x0b63, 0x0b7b, 0x0000, 0x0b93, 0x0bab, ++ /* Node for range 0x001e_ */ ++ 0x0bc3, 0x0bdb, 0x0bf2, 0x0c02, 0x0000, 0x0000, 0x0c12, 0x0c22, ++ 0x0c32, 0x0c42, 0x0c52, 0x0c62, 0x0c73, 0x0c8b, 0x0ca2, 0x0cb2, ++ /* Node for range 0x001f_ */ ++ 0x0cc2, 0x0000, 0x0000, 0x0000, 0x0cd2, 0x0ce2, 0x0000, 0x0000, ++ 0x0cf2, 0x0d02, 0x0d13, 0x0d2b, 0x0d42, 0x0d52, 0x0d62, 0x0d72, ++ /* Node for range 0x0020_ */ ++ 0x0d82, 0x0d92, 0x0da2, 0x0db2, 0x0dc2, 0x0dd2, 0x0de2, 0x0df2, ++ 0x0e02, 0x0e12, 0x0e22, 0x0e32, 0x0e42, 0x0e52, 0x0e62, 0x0e72, ++ /* Node for range 0x0021_ */ ++ 0x0e82, 0x0e92, 0x0ea2, 0x0eb2, 0x0ec2, 0x0ed2, 0x0ee2, 0x0ef2, ++ 0x0f02, 0x0f12, 0x0f22, 0x0f32, 0x0000, 0x0000, 0x0f42, 0x0f52, ++ /* Node for range 0x0022_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0f62, 0x0f72, ++ 0x0f82, 0x0f92, 0x0fa3, 0x0fbb, 0x0fd3, 0x0feb, 0x1002, 0x1012, ++ /* Node for range 0x0023_ */ ++ 0x1023, 0x103b, 0x1052, 0x1062, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0034_ */ ++ 0x1071, 0x1079, 0x0000, 0x1081, 0x108a, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0037_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x1099, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x10a1, 0x0000, ++ /* Node for range 0x0038_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x10aa, 0x10ba, 0x10c9, ++ 0x10d2, 0x10e2, 0x10f2, 0x0000, 0x1102, 0x0000, 0x1112, 0x1122, ++ /* Node for range 0x0039_ */ ++ 0x1133, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x003a_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x114a, 0x115a, 0x116a, 0x117a, 0x118a, 0x119a, ++ /* Node for range 0x003b_ */ ++ 0x11ab, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x003c_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x11c2, 0x11d2, 0x11e2, 0x11f2, 0x1202, 0x0000, ++ /* Node for range 0x003d_ */ ++ 0x0000, 0x0000, 0x0000, 0x1212, 0x1222, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0040_ */ ++ 0x1232, 0x1242, 0x0000, 0x1252, 0x0000, 0x0000, 0x0000, 0x1262, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x1272, 0x1282, 0x1292, 0x0000, ++ /* Node for range 0x0041_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x12a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0043_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x12b2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0045_ */ ++ 0x12c2, 0x12d2, 0x0000, 0x12e2, 0x0000, 0x0000, 0x0000, 0x12f2, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x1302, 0x1312, 0x1322, 0x0000, ++ /* Node for range 0x0047_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x1332, 0x1342, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x004c_ */ ++ 0x0000, 0x1352, 0x1362, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x004d_ */ ++ 0x1372, 0x1382, 0x1392, 0x13a2, 0x0000, 0x0000, 0x13b2, 0x13c2, ++ 0x0000, 0x0000, 0x13d2, 0x13e2, 0x13f2, 0x1402, 0x1412, 0x1422, ++ /* Node for range 0x004e_ */ ++ 0x0000, 0x0000, 0x1432, 0x1442, 0x1452, 0x1462, 0x1472, 0x1482, ++ 0x0000, 0x0000, 0x1492, 0x14a2, 0x14b2, 0x14c2, 0x14d2, 0x14e2, ++ /* Node for range 0x004f_ */ ++ 0x14f2, 0x1502, 0x1512, 0x1522, 0x1532, 0x1542, 0x0000, 0x0000, ++ 0x1552, 0x1562, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0062_ */ ++ 0x0000, 0x0000, 0x1572, 0x1582, 0x1592, 0x15a2, 0x15b2, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x006c_ */ ++ 0x15c2, 0x0000, 0x15d2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x006d_ */ ++ 0x0000, 0x0000, 0x0000, 0x15e2, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0092_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x15f2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0093_ */ ++ 0x0000, 0x1602, 0x0000, 0x0000, 0x1612, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0095_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x1622, 0x1632, 0x1642, 0x1652, 0x1662, 0x1672, 0x1682, 0x1692, ++ /* Node for range 0x009c_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x16a2, 0x16b2, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x009d_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x16c2, 0x16d2, 0x0000, 0x16e2, ++ /* Node for range 0x00a3_ */ ++ 0x0000, 0x0000, 0x0000, 0x16f2, 0x0000, 0x0000, 0x1702, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x00a5_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x1712, 0x1722, 0x1732, 0x0000, 0x0000, 0x1742, 0x0000, ++ /* Node for range 0x00b4_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x1752, 0x0000, 0x0000, 0x1762, 0x1772, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x00b5_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x1782, 0x1792, 0x0000, 0x0000, ++ /* Node for range 0x00b9_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x17a2, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x00bc_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x17b2, 0x17c2, 0x17d2, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x00c4_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x17e2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x00cc_ */ ++ 0x17f2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x1802, ++ 0x1812, 0x0000, 0x1822, 0x1833, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x00d4_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x184a, 0x185a, 0x186a, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x00dd_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x187a, 0x0000, 0x188a, 0x189b, 0x18b2, 0x0000, ++ /* Node for range 0x00f4_ */ ++ 0x0000, 0x0000, 0x0000, 0x18c2, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x18d2, 0x0000, 0x0000, ++ /* Node for range 0x00f5_ */ ++ 0x0000, 0x0000, 0x18e2, 0x0000, 0x0000, 0x0000, 0x0000, 0x18f2, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x1902, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x00f6_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x1912, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x00f7_ */ ++ 0x0000, 0x0000, 0x0000, 0x1922, 0x0000, 0x1932, 0x1942, 0x0000, ++ 0x1952, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x00f8_ */ ++ 0x0000, 0x1962, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x00f9_ */ ++ 0x0000, 0x0000, 0x0000, 0x1972, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x1982, 0x0000, 0x0000, ++ /* Node for range 0x00fa_ */ ++ 0x0000, 0x0000, 0x1992, 0x0000, 0x0000, 0x0000, 0x0000, 0x19a2, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x19b2, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x00fb_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x19c2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0102_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x19d2, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x01b0_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x19e2, 0x0000, ++ 0x19f2, 0x0000, 0x1a02, 0x0000, 0x1a12, 0x0000, 0x1a22, 0x0000, ++ /* Node for range 0x01b1_ */ ++ 0x0000, 0x0000, 0x1a32, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x01b3_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x1a42, 0x0000, 0x1a52, 0x0000, 0x0000, ++ /* Node for range 0x01b4_ */ ++ 0x1a62, 0x1a72, 0x0000, 0x1a82, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x01e0_ */ ++ 0x1a92, 0x1aa2, 0x1ab2, 0x1ac2, 0x1ad2, 0x1ae2, 0x1af2, 0x1b02, ++ 0x1b13, 0x1b2b, 0x1b42, 0x1b52, 0x1b62, 0x1b72, 0x1b82, 0x1b92, ++ /* Node for range 0x01e1_ */ ++ 0x1ba2, 0x1bb2, 0x1bc2, 0x1bd2, 0x1be3, 0x1bfb, 0x1c13, 0x1c2b, ++ 0x1c42, 0x1c52, 0x1c62, 0x1c72, 0x1c83, 0x1c9b, 0x1cb2, 0x1cc2, ++ /* Node for range 0x01e2_ */ ++ 0x1cd2, 0x1ce2, 0x1cf2, 0x1d02, 0x1d12, 0x1d22, 0x1d32, 0x1d42, ++ 0x1d52, 0x1d62, 0x1d72, 0x1d82, 0x1d92, 0x1da2, 0x1db3, 0x1dcb, ++ /* Node for range 0x01e3_ */ ++ 0x1de2, 0x1df2, 0x1e02, 0x1e12, 0x1e22, 0x1e32, 0x1e42, 0x1e52, ++ 0x1e63, 0x1e7b, 0x1e92, 0x1ea2, 0x1eb2, 0x1ec2, 0x1ed2, 0x1ee2, ++ /* Node for range 0x01e4_ */ ++ 0x1ef2, 0x1f02, 0x1f12, 0x1f22, 0x1f32, 0x1f42, 0x1f52, 0x1f62, ++ 0x1f72, 0x1f82, 0x1f92, 0x1fa2, 0x1fb3, 0x1fcb, 0x1fe3, 0x1ffb, ++ /* Node for range 0x01e5_ */ ++ 0x2013, 0x202b, 0x2043, 0x205b, 0x2072, 0x2082, 0x2092, 0x20a2, ++ 0x20b2, 0x20c2, 0x20d2, 0x20e2, 0x20f3, 0x210b, 0x2122, 0x2132, ++ /* Node for range 0x01e6_ */ ++ 0x2142, 0x2152, 0x2162, 0x2172, 0x2183, 0x219b, 0x21b3, 0x21cb, ++ 0x21e3, 0x21fb, 0x2212, 0x2222, 0x2232, 0x2242, 0x2252, 0x2262, ++ /* Node for range 0x01e7_ */ ++ 0x2272, 0x2282, 0x2292, 0x22a2, 0x22b2, 0x22c2, 0x22d2, 0x22e2, ++ 0x22f3, 0x230b, 0x2323, 0x233b, 0x2352, 0x2362, 0x2372, 0x2382, ++ /* Node for range 0x01e8_ */ ++ 0x2392, 0x23a2, 0x23b2, 0x23c2, 0x23d2, 0x23e2, 0x23f2, 0x2402, ++ 0x2412, 0x2422, 0x2432, 0x2442, 0x2452, 0x2462, 0x2472, 0x2482, ++ /* Node for range 0x01e9_ */ ++ 0x2492, 0x24a2, 0x24b2, 0x24c2, 0x24d2, 0x24e2, 0x24f2, 0x2502, ++ 0x2512, 0x2522, 0x0000, 0x2532, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x01ea_ */ ++ 0x2542, 0x2552, 0x2562, 0x2572, 0x2583, 0x259b, 0x25b3, 0x25cb, ++ 0x25e3, 0x25fb, 0x2613, 0x262b, 0x2643, 0x265b, 0x2673, 0x268b, ++ /* Node for range 0x01eb_ */ ++ 0x26a3, 0x26bb, 0x26d3, 0x26eb, 0x2703, 0x271b, 0x2733, 0x274b, ++ 0x2762, 0x2772, 0x2782, 0x2792, 0x27a2, 0x27b2, 0x27c3, 0x27db, ++ /* Node for range 0x01ec_ */ ++ 0x27f3, 0x280b, 0x2823, 0x283b, 0x2853, 0x286b, 0x2883, 0x289b, ++ 0x28b2, 0x28c2, 0x28d2, 0x28e2, 0x28f2, 0x2902, 0x2912, 0x2922, ++ /* Node for range 0x01ed_ */ ++ 0x2933, 0x294b, 0x2963, 0x297b, 0x2993, 0x29ab, 0x29c3, 0x29db, ++ 0x29f3, 0x2a0b, 0x2a23, 0x2a3b, 0x2a53, 0x2a6b, 0x2a83, 0x2a9b, ++ /* Node for range 0x01ee_ */ ++ 0x2ab3, 0x2acb, 0x2ae3, 0x2afb, 0x2b12, 0x2b22, 0x2b32, 0x2b42, ++ 0x2b53, 0x2b6b, 0x2b83, 0x2b9b, 0x2bb3, 0x2bcb, 0x2be3, 0x2bfb, ++ /* Node for range 0x01ef_ */ ++ 0x2c13, 0x2c2b, 0x2c42, 0x2c52, 0x2c62, 0x2c72, 0x2c82, 0x2c92, ++ 0x2ca2, 0x2cb2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x01f0_ */ ++ 0x2cc2, 0x2cd2, 0x2ce3, 0x2cfb, 0x2d13, 0x2d2b, 0x2d43, 0x2d5b, ++ 0x2d72, 0x2d82, 0x2d93, 0x2dab, 0x2dc3, 0x2ddb, 0x2df3, 0x2e0b, ++ /* Node for range 0x01f1_ */ ++ 0x2e22, 0x2e32, 0x2e43, 0x2e5b, 0x2e73, 0x2e8b, 0x0000, 0x0000, ++ 0x2ea2, 0x2eb2, 0x2ec3, 0x2edb, 0x2ef3, 0x2f0b, 0x0000, 0x0000, ++ /* Node for range 0x01f2_ */ ++ 0x2f22, 0x2f32, 0x2f43, 0x2f5b, 0x2f73, 0x2f8b, 0x2fa3, 0x2fbb, ++ 0x2fd2, 0x2fe2, 0x2ff3, 0x300b, 0x3023, 0x303b, 0x3053, 0x306b, ++ /* Node for range 0x01f3_ */ ++ 0x3082, 0x3092, 0x30a3, 0x30bb, 0x30d3, 0x30eb, 0x3103, 0x311b, ++ 0x3132, 0x3142, 0x3153, 0x316b, 0x3183, 0x319b, 0x31b3, 0x31cb, ++ /* Node for range 0x01f4_ */ ++ 0x31e2, 0x31f2, 0x3203, 0x321b, 0x3233, 0x324b, 0x0000, 0x0000, ++ 0x3262, 0x3272, 0x3283, 0x329b, 0x32b3, 0x32cb, 0x0000, 0x0000, ++ /* Node for range 0x01f5_ */ ++ 0x32e2, 0x32f2, 0x3303, 0x331b, 0x3333, 0x334b, 0x3363, 0x337b, ++ 0x0000, 0x3392, 0x0000, 0x33a3, 0x0000, 0x33bb, 0x0000, 0x33d3, ++ /* Node for range 0x01f6_ */ ++ 0x33ea, 0x33fa, 0x340b, 0x3423, 0x343b, 0x3453, 0x346b, 0x3483, ++ 0x349a, 0x34aa, 0x34bb, 0x34d3, 0x34eb, 0x3503, 0x351b, 0x3533, ++ /* Node for range 0x01f7_ */ ++ 0x354a, 0x355a, 0x356a, 0x357a, 0x358a, 0x359a, 0x35aa, 0x35ba, ++ 0x35ca, 0x35da, 0x35ea, 0x35fa, 0x360a, 0x361a, 0x0000, 0x0000, ++ /* Node for range 0x01f8_ */ ++ 0x362b, 0x3643, 0x365c, 0x367c, 0x369c, 0x36bc, 0x36dc, 0x36fc, ++ 0x371b, 0x3733, 0x374c, 0x376c, 0x378c, 0x37ac, 0x37cc, 0x37ec, ++ /* Node for range 0x01f9_ */ ++ 0x380b, 0x3823, 0x383c, 0x385c, 0x387c, 0x389c, 0x38bc, 0x38dc, ++ 0x38fb, 0x3913, 0x392c, 0x394c, 0x396c, 0x398c, 0x39ac, 0x39cc, ++ /* Node for range 0x01fa_ */ ++ 0x39eb, 0x3a03, 0x3a1c, 0x3a3c, 0x3a5c, 0x3a7c, 0x3a9c, 0x3abc, ++ 0x3adb, 0x3af3, 0x3b0c, 0x3b2c, 0x3b4c, 0x3b6c, 0x3b8c, 0x3bac, ++ /* Node for range 0x01fb_ */ ++ 0x3bca, 0x3bda, 0x3beb, 0x3c02, 0x3c13, 0x0000, 0x3c2a, 0x3c3b, ++ 0x3c52, 0x3c62, 0x3c72, 0x3c82, 0x3c92, 0x0000, 0x3ca1, 0x0000, ++ /* Node for range 0x01fc_ */ ++ 0x0000, 0x3caa, 0x3cbb, 0x3cd2, 0x3ce3, 0x0000, 0x3cfa, 0x3d0b, ++ 0x3d22, 0x3d32, 0x3d42, 0x3d52, 0x3d62, 0x3d72, 0x3d82, 0x3d92, ++ /* Node for range 0x01fd_ */ ++ 0x3da2, 0x3db2, 0x3dc3, 0x3ddb, 0x0000, 0x0000, 0x3df2, 0x3e03, ++ 0x3e1a, 0x3e2a, 0x3e3a, 0x3e4a, 0x0000, 0x3e5a, 0x3e6a, 0x3e7a, ++ /* Node for range 0x01fe_ */ ++ 0x3e8a, 0x3e9a, 0x3eab, 0x3ec3, 0x3eda, 0x3eea, 0x3efa, 0x3f0b, ++ 0x3f22, 0x3f32, 0x3f42, 0x3f52, 0x3f62, 0x3f72, 0x3f82, 0x3f91, ++ /* Node for range 0x01ff_ */ ++ 0x0000, 0x0000, 0x3f9b, 0x3fb2, 0x3fc3, 0x0000, 0x3fda, 0x3feb, ++ 0x4002, 0x4012, 0x4022, 0x4032, 0x4042, 0x4051, 0x0000, 0x0000, ++ /* Node for range 0x0200_ */ ++ 0x4059, 0x4061, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0212_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x4069, 0x0000, ++ 0x0000, 0x0000, 0x4071, 0x407a, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0219_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x408a, 0x409a, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x021a_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x40aa, 0x0000, ++ /* Node for range 0x021c_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x40ba, 0x40ca, 0x40da, ++ /* Node for range 0x0220_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x40ea, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x40fa, 0x0000, 0x0000, 0x410a, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0222_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x411a, 0x0000, 0x412a, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0224_ */ ++ 0x0000, 0x413a, 0x0000, 0x0000, 0x414a, 0x0000, 0x0000, 0x415a, ++ 0x0000, 0x416a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0226_ */ ++ 0x417a, 0x0000, 0x418a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x419a, 0x41aa, 0x41ba, ++ /* Node for range 0x0227_ */ ++ 0x41ca, 0x41da, 0x0000, 0x0000, 0x41ea, 0x41fa, 0x0000, 0x0000, ++ 0x420a, 0x421a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0228_ */ ++ 0x422a, 0x423a, 0x0000, 0x0000, 0x424a, 0x425a, 0x0000, 0x0000, ++ 0x426a, 0x427a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x022a_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x428a, 0x429a, 0x42aa, 0x42ba, ++ /* Node for range 0x022e_ */ ++ 0x42ca, 0x42da, 0x42ea, 0x42fa, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x430a, 0x431a, 0x432a, 0x433a, 0x0000, 0x0000, ++ /* Node for range 0x0232_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x4349, 0x4351, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x02ad_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x435a, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0304_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x436a, 0x0000, 0x437a, 0x0000, ++ /* Node for range 0x0305_ */ ++ 0x438a, 0x0000, 0x439a, 0x0000, 0x43aa, 0x0000, 0x43ba, 0x0000, ++ 0x43ca, 0x0000, 0x43da, 0x0000, 0x43ea, 0x0000, 0x43fa, 0x0000, ++ /* Node for range 0x0306_ */ ++ 0x440a, 0x0000, 0x441a, 0x0000, 0x0000, 0x442a, 0x0000, 0x443a, ++ 0x0000, 0x444a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0307_ */ ++ 0x445a, 0x446a, 0x0000, 0x447a, 0x448a, 0x0000, 0x449a, 0x44aa, ++ 0x0000, 0x44ba, 0x44ca, 0x0000, 0x44da, 0x44ea, 0x0000, 0x0000, ++ /* Node for range 0x0309_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x44fa, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x450a, 0x0000, ++ /* Node for range 0x030a_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x451a, 0x0000, 0x452a, 0x0000, ++ /* Node for range 0x030b_ */ ++ 0x453a, 0x0000, 0x454a, 0x0000, 0x455a, 0x0000, 0x456a, 0x0000, ++ 0x457a, 0x0000, 0x458a, 0x0000, 0x459a, 0x0000, 0x45aa, 0x0000, ++ /* Node for range 0x030c_ */ ++ 0x45ba, 0x0000, 0x45ca, 0x0000, 0x0000, 0x45da, 0x0000, 0x45ea, ++ 0x0000, 0x45fa, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x030d_ */ ++ 0x460a, 0x461a, 0x0000, 0x462a, 0x463a, 0x0000, 0x464a, 0x465a, ++ 0x0000, 0x466a, 0x467a, 0x0000, 0x468a, 0x469a, 0x0000, 0x0000, ++ /* Node for range 0x030f_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x46aa, 0x0000, 0x0000, 0x46ba, ++ 0x46ca, 0x46da, 0x46ea, 0x0000, 0x0000, 0x0000, 0x46fa, 0x0000, ++ /* Node for range 0x0f90_ */ ++ 0x4709, 0x4711, 0x4719, 0x4721, 0x4729, 0x4731, 0x4739, 0x4741, ++ 0x4749, 0x4751, 0x4759, 0x4761, 0x4769, 0x4771, 0x4779, 0x4781, ++ /* Node for range 0x0f91_ */ ++ 0x4789, 0x4791, 0x4799, 0x47a1, 0x47a9, 0x47b1, 0x47b9, 0x47c1, ++ 0x47c9, 0x47d1, 0x47d9, 0x47e1, 0x47e9, 0x47f1, 0x47f9, 0x4801, ++ /* Node for range 0x0f92_ */ ++ 0x4809, 0x4811, 0x4819, 0x4821, 0x4829, 0x4831, 0x4839, 0x4841, ++ 0x4849, 0x4851, 0x4859, 0x4861, 0x4869, 0x4871, 0x4879, 0x4881, ++ /* Node for range 0x0f93_ */ ++ 0x4889, 0x4891, 0x4899, 0x48a1, 0x48a9, 0x48b1, 0x48b9, 0x48c1, ++ 0x48c9, 0x48d1, 0x48d9, 0x48e1, 0x48e9, 0x48f1, 0x48f9, 0x4901, ++ /* Node for range 0x0f94_ */ ++ 0x4909, 0x4911, 0x4919, 0x4921, 0x4929, 0x4931, 0x4939, 0x4941, ++ 0x4949, 0x4951, 0x4959, 0x4961, 0x4969, 0x4971, 0x4979, 0x4981, ++ /* Node for range 0x0f95_ */ ++ 0x4989, 0x4991, 0x4999, 0x49a1, 0x49a9, 0x49b1, 0x49b9, 0x49c1, ++ 0x49c9, 0x49d1, 0x49d9, 0x49e1, 0x49e9, 0x49f1, 0x49f9, 0x4a01, ++ /* Node for range 0x0f96_ */ ++ 0x4a09, 0x4a11, 0x4a19, 0x4a21, 0x4a29, 0x4a31, 0x4a39, 0x4a41, ++ 0x4a49, 0x4a51, 0x4a59, 0x4a61, 0x4a69, 0x4a71, 0x4a79, 0x4a81, ++ /* Node for range 0x0f97_ */ ++ 0x4a89, 0x4a91, 0x4a99, 0x4aa1, 0x4aa9, 0x4ab1, 0x4ab9, 0x4ac1, ++ 0x4ac9, 0x4ad1, 0x4ad9, 0x4ae1, 0x4ae9, 0x4af1, 0x4af9, 0x4b01, ++ /* Node for range 0x0f98_ */ ++ 0x4b09, 0x4b11, 0x4b19, 0x4b21, 0x4b29, 0x4b31, 0x4b39, 0x4b41, ++ 0x4b49, 0x4b51, 0x4b59, 0x4b61, 0x4b69, 0x4b71, 0x4b79, 0x4b81, ++ /* Node for range 0x0f99_ */ ++ 0x4b89, 0x4b91, 0x4b99, 0x4ba1, 0x4ba9, 0x4bb1, 0x4bb9, 0x4bc1, ++ 0x4bc9, 0x4bd1, 0x4bd9, 0x4be1, 0x4be9, 0x4bf1, 0x4bf9, 0x4c01, ++ /* Node for range 0x0f9a_ */ ++ 0x4c09, 0x4c11, 0x4c19, 0x4c21, 0x4c29, 0x4c31, 0x4c39, 0x4c41, ++ 0x4c49, 0x4c51, 0x4c59, 0x4c61, 0x4c69, 0x4c71, 0x4c79, 0x4c81, ++ /* Node for range 0x0f9b_ */ ++ 0x4c89, 0x4c91, 0x4c99, 0x4ca1, 0x4ca9, 0x4cb1, 0x4cb9, 0x4cc1, ++ 0x4cc9, 0x4cd1, 0x4cd9, 0x4ce1, 0x4ce9, 0x4cf1, 0x4cf9, 0x4d01, ++ /* Node for range 0x0f9c_ */ ++ 0x4d09, 0x4d11, 0x4d19, 0x4d21, 0x4d29, 0x4d31, 0x4d39, 0x4d41, ++ 0x4d49, 0x4d51, 0x4d59, 0x4d61, 0x4d69, 0x4d71, 0x4d79, 0x4d81, ++ /* Node for range 0x0f9d_ */ ++ 0x4d89, 0x4d91, 0x4d99, 0x4da1, 0x4da9, 0x4db1, 0x4db9, 0x4dc1, ++ 0x4dc9, 0x4dd1, 0x4dd9, 0x4de1, 0x4de9, 0x4df1, 0x4df9, 0x4e01, ++ /* Node for range 0x0f9e_ */ ++ 0x4e09, 0x4e11, 0x4e19, 0x4e21, 0x4e29, 0x4e31, 0x4e39, 0x4e41, ++ 0x4e49, 0x4e51, 0x4e59, 0x4e61, 0x4e69, 0x4e71, 0x4e79, 0x4e81, ++ /* Node for range 0x0f9f_ */ ++ 0x4e89, 0x4e91, 0x4e99, 0x4ea1, 0x4ea9, 0x4eb1, 0x4eb9, 0x4ec1, ++ 0x4ec9, 0x4ed1, 0x4ed9, 0x4ee1, 0x4ee9, 0x4ef1, 0x4ef9, 0x4f01, ++ /* Node for range 0x0fa0_ */ ++ 0x4f09, 0x4f11, 0x4f19, 0x4f21, 0x4f29, 0x4f31, 0x4f39, 0x4f41, ++ 0x4f49, 0x4f51, 0x4f59, 0x4f61, 0x4f69, 0x4f71, 0x0000, 0x0000, ++ /* Node for range 0x0fa1_ */ ++ 0x4f79, 0x0000, 0x4f81, 0x0000, 0x0000, 0x4f89, 0x4f91, 0x4f99, ++ 0x4fa1, 0x4fa9, 0x4fb1, 0x4fb9, 0x4fc1, 0x4fc9, 0x4fd1, 0x0000, ++ /* Node for range 0x0fa2_ */ ++ 0x4fd9, 0x0000, 0x4fe1, 0x0000, 0x0000, 0x4fe9, 0x4ff1, 0x0000, ++ 0x0000, 0x0000, 0x4ff9, 0x5001, 0x5009, 0x5011, 0x5019, 0x5021, ++ /* Node for range 0x0fa3_ */ ++ 0x5029, 0x5031, 0x5039, 0x5041, 0x5049, 0x5051, 0x5059, 0x5061, ++ 0x5069, 0x5071, 0x5079, 0x5081, 0x5089, 0x5091, 0x5099, 0x50a1, ++ /* Node for range 0x0fa4_ */ ++ 0x50a9, 0x50b1, 0x50b9, 0x50c1, 0x50c9, 0x50d1, 0x50d9, 0x50e1, ++ 0x50e9, 0x50f1, 0x50f9, 0x5101, 0x5109, 0x5111, 0x5119, 0x5121, ++ /* Node for range 0x0fa5_ */ ++ 0x5129, 0x5131, 0x5139, 0x5141, 0x5149, 0x5151, 0x5159, 0x5161, ++ 0x5169, 0x5171, 0x5179, 0x5181, 0x5189, 0x5191, 0x5199, 0x51a1, ++ /* Node for range 0x0fa6_ */ ++ 0x51a9, 0x51b1, 0x51b9, 0x51c1, 0x51c9, 0x51d1, 0x51d9, 0x51e1, ++ 0x51e9, 0x51f1, 0x51f9, 0x5201, 0x5209, 0x5211, 0x0000, 0x0000, ++ /* Node for range 0x0fa7_ */ ++ 0x5219, 0x5221, 0x5229, 0x5231, 0x5239, 0x5241, 0x5249, 0x5251, ++ 0x5259, 0x5261, 0x5269, 0x5271, 0x5279, 0x5281, 0x5289, 0x5291, ++ /* Node for range 0x0fa8_ */ ++ 0x5299, 0x52a1, 0x52a9, 0x52b1, 0x52b9, 0x52c1, 0x52c9, 0x52d1, ++ 0x52d9, 0x52e1, 0x52e9, 0x52f1, 0x52f9, 0x5301, 0x5309, 0x5311, ++ /* Node for range 0x0fa9_ */ ++ 0x5319, 0x5321, 0x5329, 0x5331, 0x5339, 0x5341, 0x5349, 0x5351, ++ 0x5359, 0x5361, 0x5369, 0x5371, 0x5379, 0x5381, 0x5389, 0x5391, ++ /* Node for range 0x0faa_ */ ++ 0x5399, 0x53a1, 0x53a9, 0x53b1, 0x53b9, 0x53c1, 0x53c9, 0x53d1, ++ 0x53d9, 0x53e1, 0x53e9, 0x53f1, 0x53f9, 0x5401, 0x5409, 0x5411, ++ /* Node for range 0x0fab_ */ ++ 0x5419, 0x5421, 0x5429, 0x5431, 0x5439, 0x5441, 0x5449, 0x5451, ++ 0x5459, 0x5461, 0x5469, 0x5471, 0x5479, 0x5481, 0x5489, 0x5491, ++ /* Node for range 0x0fac_ */ ++ 0x5499, 0x54a1, 0x54a9, 0x54b1, 0x54b9, 0x54c1, 0x54c9, 0x54d1, ++ 0x54d9, 0x54e1, 0x54e9, 0x54f1, 0x54f9, 0x5501, 0x5509, 0x5511, ++ /* Node for range 0x0fad_ */ ++ 0x5519, 0x5521, 0x5529, 0x5531, 0x5539, 0x5541, 0x5549, 0x5551, ++ 0x5559, 0x5561, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0fb1_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x556a, 0x0000, 0x557a, ++ /* Node for range 0x0fb2_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x558a, 0x559a, 0x55ab, 0x55c3, 0x55da, 0x55ea, ++ /* Node for range 0x0fb3_ */ ++ 0x55fa, 0x560a, 0x561a, 0x562a, 0x563a, 0x564a, 0x565a, 0x0000, ++ 0x566a, 0x567a, 0x568a, 0x569a, 0x56aa, 0x0000, 0x56ba, 0x0000, ++ /* Node for range 0x0fb4_ */ ++ 0x56ca, 0x56da, 0x0000, 0x56ea, 0x56fa, 0x0000, 0x570a, 0x571a, ++ 0x572a, 0x573a, 0x574a, 0x575a, 0x576a, 0x577a, 0x578a, 0x0000, ++ /* Node for range 0x1109_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x579a, 0x0000, 0x57aa, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x110a_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x57ba, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x1112_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x57ca, 0x57da, ++ /* Node for range 0x1134_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x57ea, 0x57fa, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x114b_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x580a, 0x581a, 0x0000, 0x582a, 0x0000, ++ /* Node for range 0x115b_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x583a, 0x584a, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x1d15_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x585a, 0x586a, ++ /* Node for range 0x1d16_ */ ++ 0x587b, 0x5893, 0x58ab, 0x58c3, 0x58db, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x1d1b_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x58f2, 0x5902, 0x5913, 0x592b, 0x5943, ++ /* Node for range 0x1d1c_ */ ++ 0x595b, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x2f80_ */ ++ 0x5971, 0x5979, 0x5981, 0x5989, 0x5991, 0x5999, 0x59a1, 0x59a9, ++ 0x59b1, 0x59b9, 0x59c1, 0x59c9, 0x59d1, 0x59d9, 0x59e1, 0x59e9, ++ /* Node for range 0x2f81_ */ ++ 0x59f1, 0x59f9, 0x5a01, 0x5a09, 0x5a11, 0x5a19, 0x5a21, 0x5a29, ++ 0x5a31, 0x5a39, 0x5a41, 0x5a49, 0x5a51, 0x5a59, 0x5a61, 0x5a69, ++ /* Node for range 0x2f82_ */ ++ 0x5a71, 0x5a79, 0x5a81, 0x5a89, 0x5a91, 0x5a99, 0x5aa1, 0x5aa9, ++ 0x5ab1, 0x5ab9, 0x5ac1, 0x5ac9, 0x5ad1, 0x5ad9, 0x5ae1, 0x5ae9, ++ /* Node for range 0x2f83_ */ ++ 0x5af1, 0x5af9, 0x5b01, 0x5b09, 0x5b11, 0x5b19, 0x5b21, 0x5b29, ++ 0x5b31, 0x5b39, 0x5b41, 0x5b49, 0x5b51, 0x5b59, 0x5b61, 0x5b69, ++ /* Node for range 0x2f84_ */ ++ 0x5b71, 0x5b79, 0x5b81, 0x5b89, 0x5b91, 0x5b99, 0x5ba1, 0x5ba9, ++ 0x5bb1, 0x5bb9, 0x5bc1, 0x5bc9, 0x5bd1, 0x5bd9, 0x5be1, 0x5be9, ++ /* Node for range 0x2f85_ */ ++ 0x5bf1, 0x5bf9, 0x5c01, 0x5c09, 0x5c11, 0x5c19, 0x5c21, 0x5c29, ++ 0x5c31, 0x5c39, 0x5c41, 0x5c49, 0x5c51, 0x5c59, 0x5c61, 0x5c69, ++ /* Node for range 0x2f86_ */ ++ 0x5c71, 0x5c79, 0x5c81, 0x5c89, 0x5c91, 0x5c99, 0x5ca1, 0x5ca9, ++ 0x5cb1, 0x5cb9, 0x5cc1, 0x5cc9, 0x5cd1, 0x5cd9, 0x5ce1, 0x5ce9, ++ /* Node for range 0x2f87_ */ ++ 0x5cf1, 0x5cf9, 0x5d01, 0x5d09, 0x5d11, 0x5d19, 0x5d21, 0x5d29, ++ 0x5d31, 0x5d39, 0x5d41, 0x5d49, 0x5d51, 0x5d59, 0x5d61, 0x5d69, ++ /* Node for range 0x2f88_ */ ++ 0x5d71, 0x5d79, 0x5d81, 0x5d89, 0x5d91, 0x5d99, 0x5da1, 0x5da9, ++ 0x5db1, 0x5db9, 0x5dc1, 0x5dc9, 0x5dd1, 0x5dd9, 0x5de1, 0x5de9, ++ /* Node for range 0x2f89_ */ ++ 0x5df1, 0x5df9, 0x5e01, 0x5e09, 0x5e11, 0x5e19, 0x5e21, 0x5e29, ++ 0x5e31, 0x5e39, 0x5e41, 0x5e49, 0x5e51, 0x5e59, 0x5e61, 0x5e69, ++ /* Node for range 0x2f8a_ */ ++ 0x5e71, 0x5e79, 0x5e81, 0x5e89, 0x5e91, 0x5e99, 0x5ea1, 0x5ea9, ++ 0x5eb1, 0x5eb9, 0x5ec1, 0x5ec9, 0x5ed1, 0x5ed9, 0x5ee1, 0x5ee9, ++ /* Node for range 0x2f8b_ */ ++ 0x5ef1, 0x5ef9, 0x5f01, 0x5f09, 0x5f11, 0x5f19, 0x5f21, 0x5f29, ++ 0x5f31, 0x5f39, 0x5f41, 0x5f49, 0x5f51, 0x5f59, 0x5f61, 0x5f69, ++ /* Node for range 0x2f8c_ */ ++ 0x5f71, 0x5f79, 0x5f81, 0x5f89, 0x5f91, 0x5f99, 0x5fa1, 0x5fa9, ++ 0x5fb1, 0x5fb9, 0x5fc1, 0x5fc9, 0x5fd1, 0x5fd9, 0x5fe1, 0x5fe9, ++ /* Node for range 0x2f8d_ */ ++ 0x5ff1, 0x5ff9, 0x6001, 0x6009, 0x6011, 0x6019, 0x6021, 0x6029, ++ 0x6031, 0x6039, 0x6041, 0x6049, 0x6051, 0x6059, 0x6061, 0x6069, ++ /* Node for range 0x2f8e_ */ ++ 0x6071, 0x6079, 0x6081, 0x6089, 0x6091, 0x6099, 0x60a1, 0x60a9, ++ 0x60b1, 0x60b9, 0x60c1, 0x60c9, 0x60d1, 0x60d9, 0x60e1, 0x60e9, ++ /* Node for range 0x2f8f_ */ ++ 0x60f1, 0x60f9, 0x6101, 0x6109, 0x6111, 0x6119, 0x6121, 0x6129, ++ 0x6131, 0x6139, 0x6141, 0x6149, 0x6151, 0x6159, 0x6161, 0x6169, ++ /* Node for range 0x2f90_ */ ++ 0x6171, 0x6179, 0x6181, 0x6189, 0x6191, 0x6199, 0x61a1, 0x61a9, ++ 0x61b1, 0x61b9, 0x61c1, 0x61c9, 0x61d1, 0x61d9, 0x61e1, 0x61e9, ++ /* Node for range 0x2f91_ */ ++ 0x61f1, 0x61f9, 0x6201, 0x6209, 0x6211, 0x6219, 0x6221, 0x6229, ++ 0x6231, 0x6239, 0x6241, 0x6249, 0x6251, 0x6259, 0x6261, 0x6269, ++ /* Node for range 0x2f92_ */ ++ 0x6271, 0x6279, 0x6281, 0x6289, 0x6291, 0x6299, 0x62a1, 0x62a9, ++ 0x62b1, 0x62b9, 0x62c1, 0x62c9, 0x62d1, 0x62d9, 0x62e1, 0x62e9, ++ /* Node for range 0x2f93_ */ ++ 0x62f1, 0x62f9, 0x6301, 0x6309, 0x6311, 0x6319, 0x6321, 0x6329, ++ 0x6331, 0x6339, 0x6341, 0x6349, 0x6351, 0x6359, 0x6361, 0x6369, ++ /* Node for range 0x2f94_ */ ++ 0x6371, 0x6379, 0x6381, 0x6389, 0x6391, 0x6399, 0x63a1, 0x63a9, ++ 0x63b1, 0x63b9, 0x63c1, 0x63c9, 0x63d1, 0x63d9, 0x63e1, 0x63e9, ++ /* Node for range 0x2f95_ */ ++ 0x63f1, 0x63f9, 0x6401, 0x6409, 0x6411, 0x6419, 0x6421, 0x6429, ++ 0x6431, 0x6439, 0x6441, 0x6449, 0x6451, 0x6459, 0x6461, 0x6469, ++ /* Node for range 0x2f96_ */ ++ 0x6471, 0x6479, 0x6481, 0x6489, 0x6491, 0x6499, 0x64a1, 0x64a9, ++ 0x64b1, 0x64b9, 0x64c1, 0x64c9, 0x64d1, 0x64d9, 0x64e1, 0x64e9, ++ /* Node for range 0x2f97_ */ ++ 0x64f1, 0x64f9, 0x6501, 0x6509, 0x6511, 0x6519, 0x6521, 0x6529, ++ 0x6531, 0x6539, 0x6541, 0x6549, 0x6551, 0x6559, 0x6561, 0x6569, ++ /* Node for range 0x2f98_ */ ++ 0x6571, 0x6579, 0x6581, 0x6589, 0x6591, 0x6599, 0x65a1, 0x65a9, ++ 0x65b1, 0x65b9, 0x65c1, 0x65c9, 0x65d1, 0x65d9, 0x65e1, 0x65e9, ++ /* Node for range 0x2f99_ */ ++ 0x65f1, 0x65f9, 0x6601, 0x6609, 0x6611, 0x6619, 0x6621, 0x6629, ++ 0x6631, 0x6639, 0x6641, 0x6649, 0x6651, 0x6659, 0x6661, 0x6669, ++ /* Node for range 0x2f9a_ */ ++ 0x6671, 0x6679, 0x6681, 0x6689, 0x6691, 0x6699, 0x66a1, 0x66a9, ++ 0x66b1, 0x66b9, 0x66c1, 0x66c9, 0x66d1, 0x66d9, 0x66e1, 0x66e9, ++ /* Node for range 0x2f9b_ */ ++ 0x66f1, 0x66f9, 0x6701, 0x6709, 0x6711, 0x6719, 0x6721, 0x6729, ++ 0x6731, 0x6739, 0x6741, 0x6749, 0x6751, 0x6759, 0x6761, 0x6769, ++ /* Node for range 0x2f9c_ */ ++ 0x6771, 0x6779, 0x6781, 0x6789, 0x6791, 0x6799, 0x67a1, 0x67a9, ++ 0x67b1, 0x67b9, 0x67c1, 0x67c9, 0x67d1, 0x67d9, 0x67e1, 0x67e9, ++ /* Node for range 0x2f9d_ */ ++ 0x67f1, 0x67f9, 0x6801, 0x6809, 0x6811, 0x6819, 0x6821, 0x6829, ++ 0x6831, 0x6839, 0x6841, 0x6849, 0x6851, 0x6859, 0x6861, 0x6869, ++ /* Node for range 0x2f9e_ */ ++ 0x6871, 0x6879, 0x6881, 0x6889, 0x6891, 0x6899, 0x68a1, 0x68a9, ++ 0x68b1, 0x68b9, 0x68c1, 0x68c9, 0x68d1, 0x68d9, 0x68e1, 0x68e9, ++ /* Node for range 0x2f9f_ */ ++ 0x68f1, 0x68f9, 0x6901, 0x6909, 0x6911, 0x6919, 0x6921, 0x6929, ++ 0x6931, 0x6939, 0x6941, 0x6949, 0x6951, 0x6959, 0x6961, 0x6969, ++ /* Node for range 0x2fa0_ */ ++ 0x6971, 0x6979, 0x6981, 0x6989, 0x6991, 0x6999, 0x69a1, 0x69a9, ++ 0x69b1, 0x69b9, 0x69c1, 0x69c9, 0x69d1, 0x69d9, 0x69e1, 0x69e9, ++ /* Node for range 0x2fa1_ */ ++ 0x69f1, 0x69f9, 0x6a01, 0x6a09, 0x6a11, 0x6a19, 0x6a21, 0x6a29, ++ 0x6a31, 0x6a39, 0x6a41, 0x6a49, 0x6a51, 0x6a59, 0x0000, 0x0000, ++}; ++ ++static unicode_t apfs_nfd[] = { ++ 0x000041, 0x000300, 0x000041, 0x000301, 0x000041, 0x000302, ++ 0x000041, 0x000303, 0x000041, 0x000308, 0x000041, 0x00030a, ++ 0x000043, 0x000327, 0x000045, 0x000300, 0x000045, 0x000301, ++ 0x000045, 0x000302, 0x000045, 0x000308, 0x000049, 0x000300, ++ 0x000049, 0x000301, 0x000049, 0x000302, 0x000049, 0x000308, ++ 0x00004e, 0x000303, 0x00004f, 0x000300, 0x00004f, 0x000301, ++ 0x00004f, 0x000302, 0x00004f, 0x000303, 0x00004f, 0x000308, ++ 0x000055, 0x000300, 0x000055, 0x000301, 0x000055, 0x000302, ++ 0x000055, 0x000308, 0x000059, 0x000301, 0x000061, 0x000300, ++ 0x000061, 0x000301, 0x000061, 0x000302, 0x000061, 0x000303, ++ 0x000061, 0x000308, 0x000061, 0x00030a, 0x000063, 0x000327, ++ 0x000065, 0x000300, 0x000065, 0x000301, 0x000065, 0x000302, ++ 0x000065, 0x000308, 0x000069, 0x000300, 0x000069, 0x000301, ++ 0x000069, 0x000302, 0x000069, 0x000308, 0x00006e, 0x000303, ++ 0x00006f, 0x000300, 0x00006f, 0x000301, 0x00006f, 0x000302, ++ 0x00006f, 0x000303, 0x00006f, 0x000308, 0x000075, 0x000300, ++ 0x000075, 0x000301, 0x000075, 0x000302, 0x000075, 0x000308, ++ 0x000079, 0x000301, 0x000079, 0x000308, 0x000041, 0x000304, ++ 0x000061, 0x000304, 0x000041, 0x000306, 0x000061, 0x000306, ++ 0x000041, 0x000328, 0x000061, 0x000328, 0x000043, 0x000301, ++ 0x000063, 0x000301, 0x000043, 0x000302, 0x000063, 0x000302, ++ 0x000043, 0x000307, 0x000063, 0x000307, 0x000043, 0x00030c, ++ 0x000063, 0x00030c, 0x000044, 0x00030c, 0x000064, 0x00030c, ++ 0x000045, 0x000304, 0x000065, 0x000304, 0x000045, 0x000306, ++ 0x000065, 0x000306, 0x000045, 0x000307, 0x000065, 0x000307, ++ 0x000045, 0x000328, 0x000065, 0x000328, 0x000045, 0x00030c, ++ 0x000065, 0x00030c, 0x000047, 0x000302, 0x000067, 0x000302, ++ 0x000047, 0x000306, 0x000067, 0x000306, 0x000047, 0x000307, ++ 0x000067, 0x000307, 0x000047, 0x000327, 0x000067, 0x000327, ++ 0x000048, 0x000302, 0x000068, 0x000302, 0x000049, 0x000303, ++ 0x000069, 0x000303, 0x000049, 0x000304, 0x000069, 0x000304, ++ 0x000049, 0x000306, 0x000069, 0x000306, 0x000049, 0x000328, ++ 0x000069, 0x000328, 0x000049, 0x000307, 0x00004a, 0x000302, ++ 0x00006a, 0x000302, 0x00004b, 0x000327, 0x00006b, 0x000327, ++ 0x00004c, 0x000301, 0x00006c, 0x000301, 0x00004c, 0x000327, ++ 0x00006c, 0x000327, 0x00004c, 0x00030c, 0x00006c, 0x00030c, ++ 0x00004e, 0x000301, 0x00006e, 0x000301, 0x00004e, 0x000327, ++ 0x00006e, 0x000327, 0x00004e, 0x00030c, 0x00006e, 0x00030c, ++ 0x00004f, 0x000304, 0x00006f, 0x000304, 0x00004f, 0x000306, ++ 0x00006f, 0x000306, 0x00004f, 0x00030b, 0x00006f, 0x00030b, ++ 0x000052, 0x000301, 0x000072, 0x000301, 0x000052, 0x000327, ++ 0x000072, 0x000327, 0x000052, 0x00030c, 0x000072, 0x00030c, ++ 0x000053, 0x000301, 0x000073, 0x000301, 0x000053, 0x000302, ++ 0x000073, 0x000302, 0x000053, 0x000327, 0x000073, 0x000327, ++ 0x000053, 0x00030c, 0x000073, 0x00030c, 0x000054, 0x000327, ++ 0x000074, 0x000327, 0x000054, 0x00030c, 0x000074, 0x00030c, ++ 0x000055, 0x000303, 0x000075, 0x000303, 0x000055, 0x000304, ++ 0x000075, 0x000304, 0x000055, 0x000306, 0x000075, 0x000306, ++ 0x000055, 0x00030a, 0x000075, 0x00030a, 0x000055, 0x00030b, ++ 0x000075, 0x00030b, 0x000055, 0x000328, 0x000075, 0x000328, ++ 0x000057, 0x000302, 0x000077, 0x000302, 0x000059, 0x000302, ++ 0x000079, 0x000302, 0x000059, 0x000308, 0x00005a, 0x000301, ++ 0x00007a, 0x000301, 0x00005a, 0x000307, 0x00007a, 0x000307, ++ 0x00005a, 0x00030c, 0x00007a, 0x00030c, 0x00004f, 0x00031b, ++ 0x00006f, 0x00031b, 0x000055, 0x00031b, 0x000075, 0x00031b, ++ 0x000041, 0x00030c, 0x000061, 0x00030c, 0x000049, 0x00030c, ++ 0x000069, 0x00030c, 0x00004f, 0x00030c, 0x00006f, 0x00030c, ++ 0x000055, 0x00030c, 0x000075, 0x00030c, 0x000055, 0x000308, ++ 0x000304, 0x000075, 0x000308, 0x000304, 0x000055, 0x000308, ++ 0x000301, 0x000075, 0x000308, 0x000301, 0x000055, 0x000308, ++ 0x00030c, 0x000075, 0x000308, 0x00030c, 0x000055, 0x000308, ++ 0x000300, 0x000075, 0x000308, 0x000300, 0x000041, 0x000308, ++ 0x000304, 0x000061, 0x000308, 0x000304, 0x000041, 0x000307, ++ 0x000304, 0x000061, 0x000307, 0x000304, 0x0000c6, 0x000304, ++ 0x0000e6, 0x000304, 0x000047, 0x00030c, 0x000067, 0x00030c, ++ 0x00004b, 0x00030c, 0x00006b, 0x00030c, 0x00004f, 0x000328, ++ 0x00006f, 0x000328, 0x00004f, 0x000328, 0x000304, 0x00006f, ++ 0x000328, 0x000304, 0x0001b7, 0x00030c, 0x000292, 0x00030c, ++ 0x00006a, 0x00030c, 0x000047, 0x000301, 0x000067, 0x000301, ++ 0x00004e, 0x000300, 0x00006e, 0x000300, 0x000041, 0x00030a, ++ 0x000301, 0x000061, 0x00030a, 0x000301, 0x0000c6, 0x000301, ++ 0x0000e6, 0x000301, 0x0000d8, 0x000301, 0x0000f8, 0x000301, ++ 0x000041, 0x00030f, 0x000061, 0x00030f, 0x000041, 0x000311, ++ 0x000061, 0x000311, 0x000045, 0x00030f, 0x000065, 0x00030f, ++ 0x000045, 0x000311, 0x000065, 0x000311, 0x000049, 0x00030f, ++ 0x000069, 0x00030f, 0x000049, 0x000311, 0x000069, 0x000311, ++ 0x00004f, 0x00030f, 0x00006f, 0x00030f, 0x00004f, 0x000311, ++ 0x00006f, 0x000311, 0x000052, 0x00030f, 0x000072, 0x00030f, ++ 0x000052, 0x000311, 0x000072, 0x000311, 0x000055, 0x00030f, ++ 0x000075, 0x00030f, 0x000055, 0x000311, 0x000075, 0x000311, ++ 0x000053, 0x000326, 0x000073, 0x000326, 0x000054, 0x000326, ++ 0x000074, 0x000326, 0x000048, 0x00030c, 0x000068, 0x00030c, ++ 0x000041, 0x000307, 0x000061, 0x000307, 0x000045, 0x000327, ++ 0x000065, 0x000327, 0x00004f, 0x000308, 0x000304, 0x00006f, ++ 0x000308, 0x000304, 0x00004f, 0x000303, 0x000304, 0x00006f, ++ 0x000303, 0x000304, 0x00004f, 0x000307, 0x00006f, 0x000307, ++ 0x00004f, 0x000307, 0x000304, 0x00006f, 0x000307, 0x000304, ++ 0x000059, 0x000304, 0x000079, 0x000304, 0x000300, 0x000301, ++ 0x000313, 0x000308, 0x000301, 0x0002b9, 0x00003b, 0x0000a8, ++ 0x000301, 0x000391, 0x000301, 0x0000b7, 0x000395, 0x000301, ++ 0x000397, 0x000301, 0x000399, 0x000301, 0x00039f, 0x000301, ++ 0x0003a5, 0x000301, 0x0003a9, 0x000301, 0x0003b9, 0x000308, ++ 0x000301, 0x000399, 0x000308, 0x0003a5, 0x000308, 0x0003b1, ++ 0x000301, 0x0003b5, 0x000301, 0x0003b7, 0x000301, 0x0003b9, ++ 0x000301, 0x0003c5, 0x000308, 0x000301, 0x0003b9, 0x000308, ++ 0x0003c5, 0x000308, 0x0003bf, 0x000301, 0x0003c5, 0x000301, ++ 0x0003c9, 0x000301, 0x0003d2, 0x000301, 0x0003d2, 0x000308, ++ 0x000415, 0x000300, 0x000415, 0x000308, 0x000413, 0x000301, ++ 0x000406, 0x000308, 0x00041a, 0x000301, 0x000418, 0x000300, ++ 0x000423, 0x000306, 0x000418, 0x000306, 0x000438, 0x000306, ++ 0x000435, 0x000300, 0x000435, 0x000308, 0x000433, 0x000301, ++ 0x000456, 0x000308, 0x00043a, 0x000301, 0x000438, 0x000300, ++ 0x000443, 0x000306, 0x000474, 0x00030f, 0x000475, 0x00030f, ++ 0x000416, 0x000306, 0x000436, 0x000306, 0x000410, 0x000306, ++ 0x000430, 0x000306, 0x000410, 0x000308, 0x000430, 0x000308, ++ 0x000415, 0x000306, 0x000435, 0x000306, 0x0004d8, 0x000308, ++ 0x0004d9, 0x000308, 0x000416, 0x000308, 0x000436, 0x000308, ++ 0x000417, 0x000308, 0x000437, 0x000308, 0x000418, 0x000304, ++ 0x000438, 0x000304, 0x000418, 0x000308, 0x000438, 0x000308, ++ 0x00041e, 0x000308, 0x00043e, 0x000308, 0x0004e8, 0x000308, ++ 0x0004e9, 0x000308, 0x00042d, 0x000308, 0x00044d, 0x000308, ++ 0x000423, 0x000304, 0x000443, 0x000304, 0x000423, 0x000308, ++ 0x000443, 0x000308, 0x000423, 0x00030b, 0x000443, 0x00030b, ++ 0x000427, 0x000308, 0x000447, 0x000308, 0x00042b, 0x000308, ++ 0x00044b, 0x000308, 0x000627, 0x000653, 0x000627, 0x000654, ++ 0x000648, 0x000654, 0x000627, 0x000655, 0x00064a, 0x000654, ++ 0x0006d5, 0x000654, 0x0006c1, 0x000654, 0x0006d2, 0x000654, ++ 0x000928, 0x00093c, 0x000930, 0x00093c, 0x000933, 0x00093c, ++ 0x000915, 0x00093c, 0x000916, 0x00093c, 0x000917, 0x00093c, ++ 0x00091c, 0x00093c, 0x000921, 0x00093c, 0x000922, 0x00093c, ++ 0x00092b, 0x00093c, 0x00092f, 0x00093c, 0x0009c7, 0x0009be, ++ 0x0009c7, 0x0009d7, 0x0009a1, 0x0009bc, 0x0009a2, 0x0009bc, ++ 0x0009af, 0x0009bc, 0x000a32, 0x000a3c, 0x000a38, 0x000a3c, ++ 0x000a16, 0x000a3c, 0x000a17, 0x000a3c, 0x000a1c, 0x000a3c, ++ 0x000a2b, 0x000a3c, 0x000b47, 0x000b56, 0x000b47, 0x000b3e, ++ 0x000b47, 0x000b57, 0x000b21, 0x000b3c, 0x000b22, 0x000b3c, ++ 0x000b92, 0x000bd7, 0x000bc6, 0x000bbe, 0x000bc7, 0x000bbe, ++ 0x000bc6, 0x000bd7, 0x000c46, 0x000c56, 0x000cbf, 0x000cd5, ++ 0x000cc6, 0x000cd5, 0x000cc6, 0x000cd6, 0x000cc6, 0x000cc2, ++ 0x000cc6, 0x000cc2, 0x000cd5, 0x000d46, 0x000d3e, 0x000d47, ++ 0x000d3e, 0x000d46, 0x000d57, 0x000dd9, 0x000dca, 0x000dd9, ++ 0x000dcf, 0x000dd9, 0x000dcf, 0x000dca, 0x000dd9, 0x000ddf, ++ 0x000f42, 0x000fb7, 0x000f4c, 0x000fb7, 0x000f51, 0x000fb7, ++ 0x000f56, 0x000fb7, 0x000f5b, 0x000fb7, 0x000f40, 0x000fb5, ++ 0x000f71, 0x000f72, 0x000f71, 0x000f74, 0x000fb2, 0x000f80, ++ 0x000fb3, 0x000f80, 0x000f71, 0x000f80, 0x000f92, 0x000fb7, ++ 0x000f9c, 0x000fb7, 0x000fa1, 0x000fb7, 0x000fa6, 0x000fb7, ++ 0x000fab, 0x000fb7, 0x000f90, 0x000fb5, 0x001025, 0x00102e, ++ 0x001b05, 0x001b35, 0x001b07, 0x001b35, 0x001b09, 0x001b35, ++ 0x001b0b, 0x001b35, 0x001b0d, 0x001b35, 0x001b11, 0x001b35, ++ 0x001b3a, 0x001b35, 0x001b3c, 0x001b35, 0x001b3e, 0x001b35, ++ 0x001b3f, 0x001b35, 0x001b42, 0x001b35, 0x000041, 0x000325, ++ 0x000061, 0x000325, 0x000042, 0x000307, 0x000062, 0x000307, ++ 0x000042, 0x000323, 0x000062, 0x000323, 0x000042, 0x000331, ++ 0x000062, 0x000331, 0x000043, 0x000327, 0x000301, 0x000063, ++ 0x000327, 0x000301, 0x000044, 0x000307, 0x000064, 0x000307, ++ 0x000044, 0x000323, 0x000064, 0x000323, 0x000044, 0x000331, ++ 0x000064, 0x000331, 0x000044, 0x000327, 0x000064, 0x000327, ++ 0x000044, 0x00032d, 0x000064, 0x00032d, 0x000045, 0x000304, ++ 0x000300, 0x000065, 0x000304, 0x000300, 0x000045, 0x000304, ++ 0x000301, 0x000065, 0x000304, 0x000301, 0x000045, 0x00032d, ++ 0x000065, 0x00032d, 0x000045, 0x000330, 0x000065, 0x000330, ++ 0x000045, 0x000327, 0x000306, 0x000065, 0x000327, 0x000306, ++ 0x000046, 0x000307, 0x000066, 0x000307, 0x000047, 0x000304, ++ 0x000067, 0x000304, 0x000048, 0x000307, 0x000068, 0x000307, ++ 0x000048, 0x000323, 0x000068, 0x000323, 0x000048, 0x000308, ++ 0x000068, 0x000308, 0x000048, 0x000327, 0x000068, 0x000327, ++ 0x000048, 0x00032e, 0x000068, 0x00032e, 0x000049, 0x000330, ++ 0x000069, 0x000330, 0x000049, 0x000308, 0x000301, 0x000069, ++ 0x000308, 0x000301, 0x00004b, 0x000301, 0x00006b, 0x000301, ++ 0x00004b, 0x000323, 0x00006b, 0x000323, 0x00004b, 0x000331, ++ 0x00006b, 0x000331, 0x00004c, 0x000323, 0x00006c, 0x000323, ++ 0x00004c, 0x000323, 0x000304, 0x00006c, 0x000323, 0x000304, ++ 0x00004c, 0x000331, 0x00006c, 0x000331, 0x00004c, 0x00032d, ++ 0x00006c, 0x00032d, 0x00004d, 0x000301, 0x00006d, 0x000301, ++ 0x00004d, 0x000307, 0x00006d, 0x000307, 0x00004d, 0x000323, ++ 0x00006d, 0x000323, 0x00004e, 0x000307, 0x00006e, 0x000307, ++ 0x00004e, 0x000323, 0x00006e, 0x000323, 0x00004e, 0x000331, ++ 0x00006e, 0x000331, 0x00004e, 0x00032d, 0x00006e, 0x00032d, ++ 0x00004f, 0x000303, 0x000301, 0x00006f, 0x000303, 0x000301, ++ 0x00004f, 0x000303, 0x000308, 0x00006f, 0x000303, 0x000308, ++ 0x00004f, 0x000304, 0x000300, 0x00006f, 0x000304, 0x000300, ++ 0x00004f, 0x000304, 0x000301, 0x00006f, 0x000304, 0x000301, ++ 0x000050, 0x000301, 0x000070, 0x000301, 0x000050, 0x000307, ++ 0x000070, 0x000307, 0x000052, 0x000307, 0x000072, 0x000307, ++ 0x000052, 0x000323, 0x000072, 0x000323, 0x000052, 0x000323, ++ 0x000304, 0x000072, 0x000323, 0x000304, 0x000052, 0x000331, ++ 0x000072, 0x000331, 0x000053, 0x000307, 0x000073, 0x000307, ++ 0x000053, 0x000323, 0x000073, 0x000323, 0x000053, 0x000301, ++ 0x000307, 0x000073, 0x000301, 0x000307, 0x000053, 0x00030c, ++ 0x000307, 0x000073, 0x00030c, 0x000307, 0x000053, 0x000323, ++ 0x000307, 0x000073, 0x000323, 0x000307, 0x000054, 0x000307, ++ 0x000074, 0x000307, 0x000054, 0x000323, 0x000074, 0x000323, ++ 0x000054, 0x000331, 0x000074, 0x000331, 0x000054, 0x00032d, ++ 0x000074, 0x00032d, 0x000055, 0x000324, 0x000075, 0x000324, ++ 0x000055, 0x000330, 0x000075, 0x000330, 0x000055, 0x00032d, ++ 0x000075, 0x00032d, 0x000055, 0x000303, 0x000301, 0x000075, ++ 0x000303, 0x000301, 0x000055, 0x000304, 0x000308, 0x000075, ++ 0x000304, 0x000308, 0x000056, 0x000303, 0x000076, 0x000303, ++ 0x000056, 0x000323, 0x000076, 0x000323, 0x000057, 0x000300, ++ 0x000077, 0x000300, 0x000057, 0x000301, 0x000077, 0x000301, ++ 0x000057, 0x000308, 0x000077, 0x000308, 0x000057, 0x000307, ++ 0x000077, 0x000307, 0x000057, 0x000323, 0x000077, 0x000323, ++ 0x000058, 0x000307, 0x000078, 0x000307, 0x000058, 0x000308, ++ 0x000078, 0x000308, 0x000059, 0x000307, 0x000079, 0x000307, ++ 0x00005a, 0x000302, 0x00007a, 0x000302, 0x00005a, 0x000323, ++ 0x00007a, 0x000323, 0x00005a, 0x000331, 0x00007a, 0x000331, ++ 0x000068, 0x000331, 0x000074, 0x000308, 0x000077, 0x00030a, ++ 0x000079, 0x00030a, 0x00017f, 0x000307, 0x000041, 0x000323, ++ 0x000061, 0x000323, 0x000041, 0x000309, 0x000061, 0x000309, ++ 0x000041, 0x000302, 0x000301, 0x000061, 0x000302, 0x000301, ++ 0x000041, 0x000302, 0x000300, 0x000061, 0x000302, 0x000300, ++ 0x000041, 0x000302, 0x000309, 0x000061, 0x000302, 0x000309, ++ 0x000041, 0x000302, 0x000303, 0x000061, 0x000302, 0x000303, ++ 0x000041, 0x000323, 0x000302, 0x000061, 0x000323, 0x000302, ++ 0x000041, 0x000306, 0x000301, 0x000061, 0x000306, 0x000301, ++ 0x000041, 0x000306, 0x000300, 0x000061, 0x000306, 0x000300, ++ 0x000041, 0x000306, 0x000309, 0x000061, 0x000306, 0x000309, ++ 0x000041, 0x000306, 0x000303, 0x000061, 0x000306, 0x000303, ++ 0x000041, 0x000323, 0x000306, 0x000061, 0x000323, 0x000306, ++ 0x000045, 0x000323, 0x000065, 0x000323, 0x000045, 0x000309, ++ 0x000065, 0x000309, 0x000045, 0x000303, 0x000065, 0x000303, ++ 0x000045, 0x000302, 0x000301, 0x000065, 0x000302, 0x000301, ++ 0x000045, 0x000302, 0x000300, 0x000065, 0x000302, 0x000300, ++ 0x000045, 0x000302, 0x000309, 0x000065, 0x000302, 0x000309, ++ 0x000045, 0x000302, 0x000303, 0x000065, 0x000302, 0x000303, ++ 0x000045, 0x000323, 0x000302, 0x000065, 0x000323, 0x000302, ++ 0x000049, 0x000309, 0x000069, 0x000309, 0x000049, 0x000323, ++ 0x000069, 0x000323, 0x00004f, 0x000323, 0x00006f, 0x000323, ++ 0x00004f, 0x000309, 0x00006f, 0x000309, 0x00004f, 0x000302, ++ 0x000301, 0x00006f, 0x000302, 0x000301, 0x00004f, 0x000302, ++ 0x000300, 0x00006f, 0x000302, 0x000300, 0x00004f, 0x000302, ++ 0x000309, 0x00006f, 0x000302, 0x000309, 0x00004f, 0x000302, ++ 0x000303, 0x00006f, 0x000302, 0x000303, 0x00004f, 0x000323, ++ 0x000302, 0x00006f, 0x000323, 0x000302, 0x00004f, 0x00031b, ++ 0x000301, 0x00006f, 0x00031b, 0x000301, 0x00004f, 0x00031b, ++ 0x000300, 0x00006f, 0x00031b, 0x000300, 0x00004f, 0x00031b, ++ 0x000309, 0x00006f, 0x00031b, 0x000309, 0x00004f, 0x00031b, ++ 0x000303, 0x00006f, 0x00031b, 0x000303, 0x00004f, 0x00031b, ++ 0x000323, 0x00006f, 0x00031b, 0x000323, 0x000055, 0x000323, ++ 0x000075, 0x000323, 0x000055, 0x000309, 0x000075, 0x000309, ++ 0x000055, 0x00031b, 0x000301, 0x000075, 0x00031b, 0x000301, ++ 0x000055, 0x00031b, 0x000300, 0x000075, 0x00031b, 0x000300, ++ 0x000055, 0x00031b, 0x000309, 0x000075, 0x00031b, 0x000309, ++ 0x000055, 0x00031b, 0x000303, 0x000075, 0x00031b, 0x000303, ++ 0x000055, 0x00031b, 0x000323, 0x000075, 0x00031b, 0x000323, ++ 0x000059, 0x000300, 0x000079, 0x000300, 0x000059, 0x000323, ++ 0x000079, 0x000323, 0x000059, 0x000309, 0x000079, 0x000309, ++ 0x000059, 0x000303, 0x000079, 0x000303, 0x0003b1, 0x000313, ++ 0x0003b1, 0x000314, 0x0003b1, 0x000313, 0x000300, 0x0003b1, ++ 0x000314, 0x000300, 0x0003b1, 0x000313, 0x000301, 0x0003b1, ++ 0x000314, 0x000301, 0x0003b1, 0x000313, 0x000342, 0x0003b1, ++ 0x000314, 0x000342, 0x000391, 0x000313, 0x000391, 0x000314, ++ 0x000391, 0x000313, 0x000300, 0x000391, 0x000314, 0x000300, ++ 0x000391, 0x000313, 0x000301, 0x000391, 0x000314, 0x000301, ++ 0x000391, 0x000313, 0x000342, 0x000391, 0x000314, 0x000342, ++ 0x0003b5, 0x000313, 0x0003b5, 0x000314, 0x0003b5, 0x000313, ++ 0x000300, 0x0003b5, 0x000314, 0x000300, 0x0003b5, 0x000313, ++ 0x000301, 0x0003b5, 0x000314, 0x000301, 0x000395, 0x000313, ++ 0x000395, 0x000314, 0x000395, 0x000313, 0x000300, 0x000395, ++ 0x000314, 0x000300, 0x000395, 0x000313, 0x000301, 0x000395, ++ 0x000314, 0x000301, 0x0003b7, 0x000313, 0x0003b7, 0x000314, ++ 0x0003b7, 0x000313, 0x000300, 0x0003b7, 0x000314, 0x000300, ++ 0x0003b7, 0x000313, 0x000301, 0x0003b7, 0x000314, 0x000301, ++ 0x0003b7, 0x000313, 0x000342, 0x0003b7, 0x000314, 0x000342, ++ 0x000397, 0x000313, 0x000397, 0x000314, 0x000397, 0x000313, ++ 0x000300, 0x000397, 0x000314, 0x000300, 0x000397, 0x000313, ++ 0x000301, 0x000397, 0x000314, 0x000301, 0x000397, 0x000313, ++ 0x000342, 0x000397, 0x000314, 0x000342, 0x0003b9, 0x000313, ++ 0x0003b9, 0x000314, 0x0003b9, 0x000313, 0x000300, 0x0003b9, ++ 0x000314, 0x000300, 0x0003b9, 0x000313, 0x000301, 0x0003b9, ++ 0x000314, 0x000301, 0x0003b9, 0x000313, 0x000342, 0x0003b9, ++ 0x000314, 0x000342, 0x000399, 0x000313, 0x000399, 0x000314, ++ 0x000399, 0x000313, 0x000300, 0x000399, 0x000314, 0x000300, ++ 0x000399, 0x000313, 0x000301, 0x000399, 0x000314, 0x000301, ++ 0x000399, 0x000313, 0x000342, 0x000399, 0x000314, 0x000342, ++ 0x0003bf, 0x000313, 0x0003bf, 0x000314, 0x0003bf, 0x000313, ++ 0x000300, 0x0003bf, 0x000314, 0x000300, 0x0003bf, 0x000313, ++ 0x000301, 0x0003bf, 0x000314, 0x000301, 0x00039f, 0x000313, ++ 0x00039f, 0x000314, 0x00039f, 0x000313, 0x000300, 0x00039f, ++ 0x000314, 0x000300, 0x00039f, 0x000313, 0x000301, 0x00039f, ++ 0x000314, 0x000301, 0x0003c5, 0x000313, 0x0003c5, 0x000314, ++ 0x0003c5, 0x000313, 0x000300, 0x0003c5, 0x000314, 0x000300, ++ 0x0003c5, 0x000313, 0x000301, 0x0003c5, 0x000314, 0x000301, ++ 0x0003c5, 0x000313, 0x000342, 0x0003c5, 0x000314, 0x000342, ++ 0x0003a5, 0x000314, 0x0003a5, 0x000314, 0x000300, 0x0003a5, ++ 0x000314, 0x000301, 0x0003a5, 0x000314, 0x000342, 0x0003c9, ++ 0x000313, 0x0003c9, 0x000314, 0x0003c9, 0x000313, 0x000300, ++ 0x0003c9, 0x000314, 0x000300, 0x0003c9, 0x000313, 0x000301, ++ 0x0003c9, 0x000314, 0x000301, 0x0003c9, 0x000313, 0x000342, ++ 0x0003c9, 0x000314, 0x000342, 0x0003a9, 0x000313, 0x0003a9, ++ 0x000314, 0x0003a9, 0x000313, 0x000300, 0x0003a9, 0x000314, ++ 0x000300, 0x0003a9, 0x000313, 0x000301, 0x0003a9, 0x000314, ++ 0x000301, 0x0003a9, 0x000313, 0x000342, 0x0003a9, 0x000314, ++ 0x000342, 0x0003b1, 0x000300, 0x0003b1, 0x000301, 0x0003b5, ++ 0x000300, 0x0003b5, 0x000301, 0x0003b7, 0x000300, 0x0003b7, ++ 0x000301, 0x0003b9, 0x000300, 0x0003b9, 0x000301, 0x0003bf, ++ 0x000300, 0x0003bf, 0x000301, 0x0003c5, 0x000300, 0x0003c5, ++ 0x000301, 0x0003c9, 0x000300, 0x0003c9, 0x000301, 0x0003b1, ++ 0x000313, 0x000345, 0x0003b1, 0x000314, 0x000345, 0x0003b1, ++ 0x000313, 0x000300, 0x000345, 0x0003b1, 0x000314, 0x000300, ++ 0x000345, 0x0003b1, 0x000313, 0x000301, 0x000345, 0x0003b1, ++ 0x000314, 0x000301, 0x000345, 0x0003b1, 0x000313, 0x000342, ++ 0x000345, 0x0003b1, 0x000314, 0x000342, 0x000345, 0x000391, ++ 0x000313, 0x000345, 0x000391, 0x000314, 0x000345, 0x000391, ++ 0x000313, 0x000300, 0x000345, 0x000391, 0x000314, 0x000300, ++ 0x000345, 0x000391, 0x000313, 0x000301, 0x000345, 0x000391, ++ 0x000314, 0x000301, 0x000345, 0x000391, 0x000313, 0x000342, ++ 0x000345, 0x000391, 0x000314, 0x000342, 0x000345, 0x0003b7, ++ 0x000313, 0x000345, 0x0003b7, 0x000314, 0x000345, 0x0003b7, ++ 0x000313, 0x000300, 0x000345, 0x0003b7, 0x000314, 0x000300, ++ 0x000345, 0x0003b7, 0x000313, 0x000301, 0x000345, 0x0003b7, ++ 0x000314, 0x000301, 0x000345, 0x0003b7, 0x000313, 0x000342, ++ 0x000345, 0x0003b7, 0x000314, 0x000342, 0x000345, 0x000397, ++ 0x000313, 0x000345, 0x000397, 0x000314, 0x000345, 0x000397, ++ 0x000313, 0x000300, 0x000345, 0x000397, 0x000314, 0x000300, ++ 0x000345, 0x000397, 0x000313, 0x000301, 0x000345, 0x000397, ++ 0x000314, 0x000301, 0x000345, 0x000397, 0x000313, 0x000342, ++ 0x000345, 0x000397, 0x000314, 0x000342, 0x000345, 0x0003c9, ++ 0x000313, 0x000345, 0x0003c9, 0x000314, 0x000345, 0x0003c9, ++ 0x000313, 0x000300, 0x000345, 0x0003c9, 0x000314, 0x000300, ++ 0x000345, 0x0003c9, 0x000313, 0x000301, 0x000345, 0x0003c9, ++ 0x000314, 0x000301, 0x000345, 0x0003c9, 0x000313, 0x000342, ++ 0x000345, 0x0003c9, 0x000314, 0x000342, 0x000345, 0x0003a9, ++ 0x000313, 0x000345, 0x0003a9, 0x000314, 0x000345, 0x0003a9, ++ 0x000313, 0x000300, 0x000345, 0x0003a9, 0x000314, 0x000300, ++ 0x000345, 0x0003a9, 0x000313, 0x000301, 0x000345, 0x0003a9, ++ 0x000314, 0x000301, 0x000345, 0x0003a9, 0x000313, 0x000342, ++ 0x000345, 0x0003a9, 0x000314, 0x000342, 0x000345, 0x0003b1, ++ 0x000306, 0x0003b1, 0x000304, 0x0003b1, 0x000300, 0x000345, ++ 0x0003b1, 0x000345, 0x0003b1, 0x000301, 0x000345, 0x0003b1, ++ 0x000342, 0x0003b1, 0x000342, 0x000345, 0x000391, 0x000306, ++ 0x000391, 0x000304, 0x000391, 0x000300, 0x000391, 0x000301, ++ 0x000391, 0x000345, 0x0003b9, 0x0000a8, 0x000342, 0x0003b7, ++ 0x000300, 0x000345, 0x0003b7, 0x000345, 0x0003b7, 0x000301, ++ 0x000345, 0x0003b7, 0x000342, 0x0003b7, 0x000342, 0x000345, ++ 0x000395, 0x000300, 0x000395, 0x000301, 0x000397, 0x000300, ++ 0x000397, 0x000301, 0x000397, 0x000345, 0x001fbf, 0x000300, ++ 0x001fbf, 0x000301, 0x001fbf, 0x000342, 0x0003b9, 0x000306, ++ 0x0003b9, 0x000304, 0x0003b9, 0x000308, 0x000300, 0x0003b9, ++ 0x000308, 0x000301, 0x0003b9, 0x000342, 0x0003b9, 0x000308, ++ 0x000342, 0x000399, 0x000306, 0x000399, 0x000304, 0x000399, ++ 0x000300, 0x000399, 0x000301, 0x001ffe, 0x000300, 0x001ffe, ++ 0x000301, 0x001ffe, 0x000342, 0x0003c5, 0x000306, 0x0003c5, ++ 0x000304, 0x0003c5, 0x000308, 0x000300, 0x0003c5, 0x000308, ++ 0x000301, 0x0003c1, 0x000313, 0x0003c1, 0x000314, 0x0003c5, ++ 0x000342, 0x0003c5, 0x000308, 0x000342, 0x0003a5, 0x000306, ++ 0x0003a5, 0x000304, 0x0003a5, 0x000300, 0x0003a5, 0x000301, ++ 0x0003a1, 0x000314, 0x0000a8, 0x000300, 0x0000a8, 0x000301, ++ 0x000060, 0x0003c9, 0x000300, 0x000345, 0x0003c9, 0x000345, ++ 0x0003c9, 0x000301, 0x000345, 0x0003c9, 0x000342, 0x0003c9, ++ 0x000342, 0x000345, 0x00039f, 0x000300, 0x00039f, 0x000301, ++ 0x0003a9, 0x000300, 0x0003a9, 0x000301, 0x0003a9, 0x000345, ++ 0x0000b4, 0x002002, 0x002003, 0x0003a9, 0x00004b, 0x000041, ++ 0x00030a, 0x002190, 0x000338, 0x002192, 0x000338, 0x002194, ++ 0x000338, 0x0021d0, 0x000338, 0x0021d4, 0x000338, 0x0021d2, ++ 0x000338, 0x002203, 0x000338, 0x002208, 0x000338, 0x00220b, ++ 0x000338, 0x002223, 0x000338, 0x002225, 0x000338, 0x00223c, ++ 0x000338, 0x002243, 0x000338, 0x002245, 0x000338, 0x002248, ++ 0x000338, 0x00003d, 0x000338, 0x002261, 0x000338, 0x00224d, ++ 0x000338, 0x00003c, 0x000338, 0x00003e, 0x000338, 0x002264, ++ 0x000338, 0x002265, 0x000338, 0x002272, 0x000338, 0x002273, ++ 0x000338, 0x002276, 0x000338, 0x002277, 0x000338, 0x00227a, ++ 0x000338, 0x00227b, 0x000338, 0x002282, 0x000338, 0x002283, ++ 0x000338, 0x002286, 0x000338, 0x002287, 0x000338, 0x0022a2, ++ 0x000338, 0x0022a8, 0x000338, 0x0022a9, 0x000338, 0x0022ab, ++ 0x000338, 0x00227c, 0x000338, 0x00227d, 0x000338, 0x002291, ++ 0x000338, 0x002292, 0x000338, 0x0022b2, 0x000338, 0x0022b3, ++ 0x000338, 0x0022b4, 0x000338, 0x0022b5, 0x000338, 0x003008, ++ 0x003009, 0x002add, 0x000338, 0x00304b, 0x003099, 0x00304d, ++ 0x003099, 0x00304f, 0x003099, 0x003051, 0x003099, 0x003053, ++ 0x003099, 0x003055, 0x003099, 0x003057, 0x003099, 0x003059, ++ 0x003099, 0x00305b, 0x003099, 0x00305d, 0x003099, 0x00305f, ++ 0x003099, 0x003061, 0x003099, 0x003064, 0x003099, 0x003066, ++ 0x003099, 0x003068, 0x003099, 0x00306f, 0x003099, 0x00306f, ++ 0x00309a, 0x003072, 0x003099, 0x003072, 0x00309a, 0x003075, ++ 0x003099, 0x003075, 0x00309a, 0x003078, 0x003099, 0x003078, ++ 0x00309a, 0x00307b, 0x003099, 0x00307b, 0x00309a, 0x003046, ++ 0x003099, 0x00309d, 0x003099, 0x0030ab, 0x003099, 0x0030ad, ++ 0x003099, 0x0030af, 0x003099, 0x0030b1, 0x003099, 0x0030b3, ++ 0x003099, 0x0030b5, 0x003099, 0x0030b7, 0x003099, 0x0030b9, ++ 0x003099, 0x0030bb, 0x003099, 0x0030bd, 0x003099, 0x0030bf, ++ 0x003099, 0x0030c1, 0x003099, 0x0030c4, 0x003099, 0x0030c6, ++ 0x003099, 0x0030c8, 0x003099, 0x0030cf, 0x003099, 0x0030cf, ++ 0x00309a, 0x0030d2, 0x003099, 0x0030d2, 0x00309a, 0x0030d5, ++ 0x003099, 0x0030d5, 0x00309a, 0x0030d8, 0x003099, 0x0030d8, ++ 0x00309a, 0x0030db, 0x003099, 0x0030db, 0x00309a, 0x0030a6, ++ 0x003099, 0x0030ef, 0x003099, 0x0030f0, 0x003099, 0x0030f1, ++ 0x003099, 0x0030f2, 0x003099, 0x0030fd, 0x003099, 0x008c48, ++ 0x0066f4, 0x008eca, 0x008cc8, 0x006ed1, 0x004e32, 0x0053e5, ++ 0x009f9c, 0x009f9c, 0x005951, 0x0091d1, 0x005587, 0x005948, ++ 0x0061f6, 0x007669, 0x007f85, 0x00863f, 0x0087ba, 0x0088f8, ++ 0x00908f, 0x006a02, 0x006d1b, 0x0070d9, 0x0073de, 0x00843d, ++ 0x00916a, 0x0099f1, 0x004e82, 0x005375, 0x006b04, 0x00721b, ++ 0x00862d, 0x009e1e, 0x005d50, 0x006feb, 0x0085cd, 0x008964, ++ 0x0062c9, 0x0081d8, 0x00881f, 0x005eca, 0x006717, 0x006d6a, ++ 0x0072fc, 0x0090ce, 0x004f86, 0x0051b7, 0x0052de, 0x0064c4, ++ 0x006ad3, 0x007210, 0x0076e7, 0x008001, 0x008606, 0x00865c, ++ 0x008def, 0x009732, 0x009b6f, 0x009dfa, 0x00788c, 0x00797f, ++ 0x007da0, 0x0083c9, 0x009304, 0x009e7f, 0x008ad6, 0x0058df, ++ 0x005f04, 0x007c60, 0x00807e, 0x007262, 0x0078ca, 0x008cc2, ++ 0x0096f7, 0x0058d8, 0x005c62, 0x006a13, 0x006dda, 0x006f0f, ++ 0x007d2f, 0x007e37, 0x00964b, 0x0052d2, 0x00808b, 0x0051dc, ++ 0x0051cc, 0x007a1c, 0x007dbe, 0x0083f1, 0x009675, 0x008b80, ++ 0x0062cf, 0x006a02, 0x008afe, 0x004e39, 0x005be7, 0x006012, ++ 0x007387, 0x007570, 0x005317, 0x0078fb, 0x004fbf, 0x005fa9, ++ 0x004e0d, 0x006ccc, 0x006578, 0x007d22, 0x0053c3, 0x00585e, ++ 0x007701, 0x008449, 0x008aaa, 0x006bba, 0x008fb0, 0x006c88, ++ 0x0062fe, 0x0082e5, 0x0063a0, 0x007565, 0x004eae, 0x005169, ++ 0x0051c9, 0x006881, 0x007ce7, 0x00826f, 0x008ad2, 0x0091cf, ++ 0x0052f5, 0x005442, 0x005973, 0x005eec, 0x0065c5, 0x006ffe, ++ 0x00792a, 0x0095ad, 0x009a6a, 0x009e97, 0x009ece, 0x00529b, ++ 0x0066c6, 0x006b77, 0x008f62, 0x005e74, 0x006190, 0x006200, ++ 0x00649a, 0x006f23, 0x007149, 0x007489, 0x0079ca, 0x007df4, ++ 0x00806f, 0x008f26, 0x0084ee, 0x009023, 0x00934a, 0x005217, ++ 0x0052a3, 0x0054bd, 0x0070c8, 0x0088c2, 0x008aaa, 0x005ec9, ++ 0x005ff5, 0x00637b, 0x006bae, 0x007c3e, 0x007375, 0x004ee4, ++ 0x0056f9, 0x005be7, 0x005dba, 0x00601c, 0x0073b2, 0x007469, ++ 0x007f9a, 0x008046, 0x009234, 0x0096f6, 0x009748, 0x009818, ++ 0x004f8b, 0x0079ae, 0x0091b4, 0x0096b8, 0x0060e1, 0x004e86, ++ 0x0050da, 0x005bee, 0x005c3f, 0x006599, 0x006a02, 0x0071ce, ++ 0x007642, 0x0084fc, 0x00907c, 0x009f8d, 0x006688, 0x00962e, ++ 0x005289, 0x00677b, 0x0067f3, 0x006d41, 0x006e9c, 0x007409, ++ 0x007559, 0x00786b, 0x007d10, 0x00985e, 0x00516d, 0x00622e, ++ 0x009678, 0x00502b, 0x005d19, 0x006dea, 0x008f2a, 0x005f8b, ++ 0x006144, 0x006817, 0x007387, 0x009686, 0x005229, 0x00540f, ++ 0x005c65, 0x006613, 0x00674e, 0x0068a8, 0x006ce5, 0x007406, ++ 0x0075e2, 0x007f79, 0x0088cf, 0x0088e1, 0x0091cc, 0x0096e2, ++ 0x00533f, 0x006eba, 0x00541d, 0x0071d0, 0x007498, 0x0085fa, ++ 0x0096a3, 0x009c57, 0x009e9f, 0x006797, 0x006dcb, 0x0081e8, ++ 0x007acb, 0x007b20, 0x007c92, 0x0072c0, 0x007099, 0x008b58, ++ 0x004ec0, 0x008336, 0x00523a, 0x005207, 0x005ea6, 0x0062d3, ++ 0x007cd6, 0x005b85, 0x006d1e, 0x0066b4, 0x008f3b, 0x00884c, ++ 0x00964d, 0x00898b, 0x005ed3, 0x005140, 0x0055c0, 0x00585a, ++ 0x006674, 0x0051de, 0x00732a, 0x0076ca, 0x00793c, 0x00795e, ++ 0x007965, 0x00798f, 0x009756, 0x007cbe, 0x007fbd, 0x008612, ++ 0x008af8, 0x009038, 0x0090fd, 0x0098ef, 0x0098fc, 0x009928, ++ 0x009db4, 0x0090de, 0x0096b7, 0x004fae, 0x0050e7, 0x00514d, ++ 0x0052c9, 0x0052e4, 0x005351, 0x00559d, 0x005606, 0x005668, ++ 0x005840, 0x0058a8, 0x005c64, 0x005c6e, 0x006094, 0x006168, ++ 0x00618e, 0x0061f2, 0x00654f, 0x0065e2, 0x006691, 0x006885, ++ 0x006d77, 0x006e1a, 0x006f22, 0x00716e, 0x00722b, 0x007422, ++ 0x007891, 0x00793e, 0x007949, 0x007948, 0x007950, 0x007956, ++ 0x00795d, 0x00798d, 0x00798e, 0x007a40, 0x007a81, 0x007bc0, ++ 0x007df4, 0x007e09, 0x007e41, 0x007f72, 0x008005, 0x0081ed, ++ 0x008279, 0x008279, 0x008457, 0x008910, 0x008996, 0x008b01, ++ 0x008b39, 0x008cd3, 0x008d08, 0x008fb6, 0x009038, 0x0096e3, ++ 0x0097ff, 0x00983b, 0x006075, 0x0242ee, 0x008218, 0x004e26, ++ 0x0051b5, 0x005168, 0x004f80, 0x005145, 0x005180, 0x0052c7, ++ 0x0052fa, 0x00559d, 0x005555, 0x005599, 0x0055e2, 0x00585a, ++ 0x0058b3, 0x005944, 0x005954, 0x005a62, 0x005b28, 0x005ed2, ++ 0x005ed9, 0x005f69, 0x005fad, 0x0060d8, 0x00614e, 0x006108, ++ 0x00618e, 0x006160, 0x0061f2, 0x006234, 0x0063c4, 0x00641c, ++ 0x006452, 0x006556, 0x006674, 0x006717, 0x00671b, 0x006756, ++ 0x006b79, 0x006bba, 0x006d41, 0x006edb, 0x006ecb, 0x006f22, ++ 0x00701e, 0x00716e, 0x0077a7, 0x007235, 0x0072af, 0x00732a, ++ 0x007471, 0x007506, 0x00753b, 0x00761d, 0x00761f, 0x0076ca, ++ 0x0076db, 0x0076f4, 0x00774a, 0x007740, 0x0078cc, 0x007ab1, ++ 0x007bc0, 0x007c7b, 0x007d5b, 0x007df4, 0x007f3e, 0x008005, ++ 0x008352, 0x0083ef, 0x008779, 0x008941, 0x008986, 0x008996, ++ 0x008abf, 0x008af8, 0x008acb, 0x008b01, 0x008afe, 0x008aed, ++ 0x008b39, 0x008b8a, 0x008d08, 0x008f38, 0x009072, 0x009199, ++ 0x009276, 0x00967c, 0x0096e3, 0x009756, 0x0097db, 0x0097ff, ++ 0x00980b, 0x00983b, 0x009b12, 0x009f9c, 0x02284a, 0x022844, ++ 0x0233d5, 0x003b9d, 0x004018, 0x004039, 0x025249, 0x025cd0, ++ 0x027ed3, 0x009f43, 0x009f8e, 0x0005d9, 0x0005b4, 0x0005f2, ++ 0x0005b7, 0x0005e9, 0x0005c1, 0x0005e9, 0x0005c2, 0x0005e9, ++ 0x0005bc, 0x0005c1, 0x0005e9, 0x0005bc, 0x0005c2, 0x0005d0, ++ 0x0005b7, 0x0005d0, 0x0005b8, 0x0005d0, 0x0005bc, 0x0005d1, ++ 0x0005bc, 0x0005d2, 0x0005bc, 0x0005d3, 0x0005bc, 0x0005d4, ++ 0x0005bc, 0x0005d5, 0x0005bc, 0x0005d6, 0x0005bc, 0x0005d8, ++ 0x0005bc, 0x0005d9, 0x0005bc, 0x0005da, 0x0005bc, 0x0005db, ++ 0x0005bc, 0x0005dc, 0x0005bc, 0x0005de, 0x0005bc, 0x0005e0, ++ 0x0005bc, 0x0005e1, 0x0005bc, 0x0005e3, 0x0005bc, 0x0005e4, ++ 0x0005bc, 0x0005e6, 0x0005bc, 0x0005e7, 0x0005bc, 0x0005e8, ++ 0x0005bc, 0x0005e9, 0x0005bc, 0x0005ea, 0x0005bc, 0x0005d5, ++ 0x0005b9, 0x0005d1, 0x0005bf, 0x0005db, 0x0005bf, 0x0005e4, ++ 0x0005bf, 0x011099, 0x0110ba, 0x01109b, 0x0110ba, 0x0110a5, ++ 0x0110ba, 0x011131, 0x011127, 0x011132, 0x011127, 0x011347, ++ 0x01133e, 0x011347, 0x011357, 0x0114b9, 0x0114ba, 0x0114b9, ++ 0x0114b0, 0x0114b9, 0x0114bd, 0x0115b8, 0x0115af, 0x0115b9, ++ 0x0115af, 0x01d157, 0x01d165, 0x01d158, 0x01d165, 0x01d158, ++ 0x01d165, 0x01d16e, 0x01d158, 0x01d165, 0x01d16f, 0x01d158, ++ 0x01d165, 0x01d170, 0x01d158, 0x01d165, 0x01d171, 0x01d158, ++ 0x01d165, 0x01d172, 0x01d1b9, 0x01d165, 0x01d1ba, 0x01d165, ++ 0x01d1b9, 0x01d165, 0x01d16e, 0x01d1ba, 0x01d165, 0x01d16e, ++ 0x01d1b9, 0x01d165, 0x01d16f, 0x01d1ba, 0x01d165, 0x01d16f, ++ 0x004e3d, 0x004e38, 0x004e41, 0x020122, 0x004f60, 0x004fae, ++ 0x004fbb, 0x005002, 0x00507a, 0x005099, 0x0050e7, 0x0050cf, ++ 0x00349e, 0x02063a, 0x00514d, 0x005154, 0x005164, 0x005177, ++ 0x02051c, 0x0034b9, 0x005167, 0x00518d, 0x02054b, 0x005197, ++ 0x0051a4, 0x004ecc, 0x0051ac, 0x0051b5, 0x0291df, 0x0051f5, ++ 0x005203, 0x0034df, 0x00523b, 0x005246, 0x005272, 0x005277, ++ 0x003515, 0x0052c7, 0x0052c9, 0x0052e4, 0x0052fa, 0x005305, ++ 0x005306, 0x005317, 0x005349, 0x005351, 0x00535a, 0x005373, ++ 0x00537d, 0x00537f, 0x00537f, 0x00537f, 0x020a2c, 0x007070, ++ 0x0053ca, 0x0053df, 0x020b63, 0x0053eb, 0x0053f1, 0x005406, ++ 0x00549e, 0x005438, 0x005448, 0x005468, 0x0054a2, 0x0054f6, ++ 0x005510, 0x005553, 0x005563, 0x005584, 0x005584, 0x005599, ++ 0x0055ab, 0x0055b3, 0x0055c2, 0x005716, 0x005606, 0x005717, ++ 0x005651, 0x005674, 0x005207, 0x0058ee, 0x0057ce, 0x0057f4, ++ 0x00580d, 0x00578b, 0x005832, 0x005831, 0x0058ac, 0x0214e4, ++ 0x0058f2, 0x0058f7, 0x005906, 0x00591a, 0x005922, 0x005962, ++ 0x0216a8, 0x0216ea, 0x0059ec, 0x005a1b, 0x005a27, 0x0059d8, ++ 0x005a66, 0x0036ee, 0x0036fc, 0x005b08, 0x005b3e, 0x005b3e, ++ 0x0219c8, 0x005bc3, 0x005bd8, 0x005be7, 0x005bf3, 0x021b18, ++ 0x005bff, 0x005c06, 0x005f53, 0x005c22, 0x003781, 0x005c60, ++ 0x005c6e, 0x005cc0, 0x005c8d, 0x021de4, 0x005d43, 0x021de6, ++ 0x005d6e, 0x005d6b, 0x005d7c, 0x005de1, 0x005de2, 0x00382f, ++ 0x005dfd, 0x005e28, 0x005e3d, 0x005e69, 0x003862, 0x022183, ++ 0x00387c, 0x005eb0, 0x005eb3, 0x005eb6, 0x005eca, 0x02a392, ++ 0x005efe, 0x022331, 0x022331, 0x008201, 0x005f22, 0x005f22, ++ 0x0038c7, 0x0232b8, 0x0261da, 0x005f62, 0x005f6b, 0x0038e3, ++ 0x005f9a, 0x005fcd, 0x005fd7, 0x005ff9, 0x006081, 0x00393a, ++ 0x00391c, 0x006094, 0x0226d4, 0x0060c7, 0x006148, 0x00614c, ++ 0x00614e, 0x00614c, 0x00617a, 0x00618e, 0x0061b2, 0x0061a4, ++ 0x0061af, 0x0061de, 0x0061f2, 0x0061f6, 0x006210, 0x00621b, ++ 0x00625d, 0x0062b1, 0x0062d4, 0x006350, 0x022b0c, 0x00633d, ++ 0x0062fc, 0x006368, 0x006383, 0x0063e4, 0x022bf1, 0x006422, ++ 0x0063c5, 0x0063a9, 0x003a2e, 0x006469, 0x00647e, 0x00649d, ++ 0x006477, 0x003a6c, 0x00654f, 0x00656c, 0x02300a, 0x0065e3, ++ 0x0066f8, 0x006649, 0x003b19, 0x006691, 0x003b08, 0x003ae4, ++ 0x005192, 0x005195, 0x006700, 0x00669c, 0x0080ad, 0x0043d9, ++ 0x006717, 0x00671b, 0x006721, 0x00675e, 0x006753, 0x0233c3, ++ 0x003b49, 0x0067fa, 0x006785, 0x006852, 0x006885, 0x02346d, ++ 0x00688e, 0x00681f, 0x006914, 0x003b9d, 0x006942, 0x0069a3, ++ 0x0069ea, 0x006aa8, 0x0236a3, 0x006adb, 0x003c18, 0x006b21, ++ 0x0238a7, 0x006b54, 0x003c4e, 0x006b72, 0x006b9f, 0x006bba, ++ 0x006bbb, 0x023a8d, 0x021d0b, 0x023afa, 0x006c4e, 0x023cbc, ++ 0x006cbf, 0x006ccd, 0x006c67, 0x006d16, 0x006d3e, 0x006d77, ++ 0x006d41, 0x006d69, 0x006d78, 0x006d85, 0x023d1e, 0x006d34, ++ 0x006e2f, 0x006e6e, 0x003d33, 0x006ecb, 0x006ec7, 0x023ed1, ++ 0x006df9, 0x006f6e, 0x023f5e, 0x023f8e, 0x006fc6, 0x007039, ++ 0x00701e, 0x00701b, 0x003d96, 0x00704a, 0x00707d, 0x007077, ++ 0x0070ad, 0x020525, 0x007145, 0x024263, 0x00719c, 0x0243ab, ++ 0x007228, 0x007235, 0x007250, 0x024608, 0x007280, 0x007295, ++ 0x024735, 0x024814, 0x00737a, 0x00738b, 0x003eac, 0x0073a5, ++ 0x003eb8, 0x003eb8, 0x007447, 0x00745c, 0x007471, 0x007485, ++ 0x0074ca, 0x003f1b, 0x007524, 0x024c36, 0x00753e, 0x024c92, ++ 0x007570, 0x02219f, 0x007610, 0x024fa1, 0x024fb8, 0x025044, ++ 0x003ffc, 0x004008, 0x0076f4, 0x0250f3, 0x0250f2, 0x025119, ++ 0x025133, 0x00771e, 0x00771f, 0x00771f, 0x00774a, 0x004039, ++ 0x00778b, 0x004046, 0x004096, 0x02541d, 0x00784e, 0x00788c, ++ 0x0078cc, 0x0040e3, 0x025626, 0x007956, 0x02569a, 0x0256c5, ++ 0x00798f, 0x0079eb, 0x00412f, 0x007a40, 0x007a4a, 0x007a4f, ++ 0x02597c, 0x025aa7, 0x025aa7, 0x007aee, 0x004202, 0x025bab, ++ 0x007bc6, 0x007bc9, 0x004227, 0x025c80, 0x007cd2, 0x0042a0, ++ 0x007ce8, 0x007ce3, 0x007d00, 0x025f86, 0x007d63, 0x004301, ++ 0x007dc7, 0x007e02, 0x007e45, 0x004334, 0x026228, 0x026247, ++ 0x004359, 0x0262d9, 0x007f7a, 0x02633e, 0x007f95, 0x007ffa, ++ 0x008005, 0x0264da, 0x026523, 0x008060, 0x0265a8, 0x008070, ++ 0x02335f, 0x0043d5, 0x0080b2, 0x008103, 0x00440b, 0x00813e, ++ 0x005ab5, 0x0267a7, 0x0267b5, 0x023393, 0x02339c, 0x008201, ++ 0x008204, 0x008f9e, 0x00446b, 0x008291, 0x00828b, 0x00829d, ++ 0x0052b3, 0x0082b1, 0x0082b3, 0x0082bd, 0x0082e6, 0x026b3c, ++ 0x0082e5, 0x00831d, 0x008363, 0x0083ad, 0x008323, 0x0083bd, ++ 0x0083e7, 0x008457, 0x008353, 0x0083ca, 0x0083cc, 0x0083dc, ++ 0x026c36, 0x026d6b, 0x026cd5, 0x00452b, 0x0084f1, 0x0084f3, ++ 0x008516, 0x0273ca, 0x008564, 0x026f2c, 0x00455d, 0x004561, ++ 0x026fb1, 0x0270d2, 0x00456b, 0x008650, 0x00865c, 0x008667, ++ 0x008669, 0x0086a9, 0x008688, 0x00870e, 0x0086e2, 0x008779, ++ 0x008728, 0x00876b, 0x008786, 0x0045d7, 0x0087e1, 0x008801, ++ 0x0045f9, 0x008860, 0x008863, 0x027667, 0x0088d7, 0x0088de, ++ 0x004635, 0x0088fa, 0x0034bb, 0x0278ae, 0x027966, 0x0046be, ++ 0x0046c7, 0x008aa0, 0x008aed, 0x008b8a, 0x008c55, 0x027ca8, ++ 0x008cab, 0x008cc1, 0x008d1b, 0x008d77, 0x027f2f, 0x020804, ++ 0x008dcb, 0x008dbc, 0x008df0, 0x0208de, 0x008ed4, 0x008f38, ++ 0x0285d2, 0x0285ed, 0x009094, 0x0090f1, 0x009111, 0x02872e, ++ 0x00911b, 0x009238, 0x0092d7, 0x0092d8, 0x00927c, 0x0093f9, ++ 0x009415, 0x028bfa, 0x00958b, 0x004995, 0x0095b7, 0x028d77, ++ 0x0049e6, 0x0096c3, 0x005db2, 0x009723, 0x029145, 0x02921a, ++ 0x004a6e, 0x004a76, 0x0097e0, 0x02940a, 0x004ab2, 0x029496, ++ 0x00980b, 0x00980b, 0x009829, 0x0295b6, 0x0098e2, 0x004b33, ++ 0x009929, 0x0099a7, 0x0099c2, 0x0099fe, 0x004bce, 0x029b30, ++ 0x009b12, 0x009c40, 0x009cfd, 0x004cce, 0x004ced, 0x009d67, ++ 0x02a0ce, 0x004cf8, 0x02a105, 0x02a20e, 0x02a291, 0x009ebb, ++ 0x004d56, 0x009ef9, 0x009efe, 0x009f05, 0x009f0f, 0x009f16, ++ 0x009f3b, 0x02a600, ++}; ++ ++static u16 apfs_cf_trie[] = { ++ /* Node for range 0x_____ */ ++ 0x0001, 0x0002, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0____ */ ++ 0x0003, 0x0004, 0x0005, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0006, 0x0000, 0x0000, 0x0000, 0x0000, 0x0007, ++ /* Node for range 0x1____ */ ++ 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x000a, 0x0000, ++ /* Node for range 0x00___ */ ++ 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x01___ */ ++ 0x0011, 0x0000, 0x0000, 0x0012, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0013, 0x0000, 0x0014, 0x0015, ++ /* Node for range 0x02___ */ ++ 0x0000, 0x0016, 0x0000, 0x0000, 0x0017, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0018, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0a___ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0019, 0x001a, ++ 0x0000, 0x0000, 0x0000, 0x001b, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0f___ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x001c, 0x0000, 0x0000, 0x0000, 0x001d, ++ /* Node for range 0x10___ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x001e, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x001f, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x11___ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0020, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x1e___ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0021, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x000__ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0022, 0x0023, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0024, 0x0025, 0x0026, 0x0000, 0x0000, ++ /* Node for range 0x001__ */ ++ 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, ++ 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, ++ /* Node for range 0x002__ */ ++ 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x003__ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x003c, 0x0000, 0x0000, 0x003d, ++ 0x003e, 0x003f, 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, ++ /* Node for range 0x004__ */ ++ 0x0046, 0x0047, 0x0048, 0x0000, 0x0000, 0x0000, 0x0049, 0x004a, ++ 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 0x0050, 0x0051, 0x0052, ++ /* Node for range 0x005__ */ ++ 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0000, 0x0000, ++ 0x0059, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x010__ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x005a, 0x005b, 0x005c, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x013__ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x005d, ++ /* Node for range 0x01c__ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x005e, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x01e__ */ ++ 0x005f, 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, ++ 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, ++ /* Node for range 0x01f__ */ ++ 0x006f, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0000, ++ 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, ++ /* Node for range 0x021__ */ ++ 0x0000, 0x0000, 0x007e, 0x007f, 0x0000, 0x0000, 0x0080, 0x0000, ++ 0x0081, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x024__ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0082, 0x0083, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x02c__ */ ++ 0x0084, 0x0085, 0x0086, 0x0000, 0x0000, 0x0000, 0x0087, 0x0088, ++ 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, ++ /* Node for range 0x0a6__ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0091, 0x0092, 0x0093, 0x0000, ++ 0x0094, 0x0095, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0a7__ */ ++ 0x0000, 0x0000, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, ++ 0x009c, 0x009d, 0x009e, 0x009f, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0ab__ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00a0, ++ 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0fb__ */ ++ 0x00a5, 0x00a6, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0ff__ */ ++ 0x0000, 0x0000, 0x00a7, 0x00a8, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x104__ */ ++ 0x00a9, 0x00aa, 0x00ab, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x00ac, 0x00ad, 0x00ae, 0x0000, 0x0000, ++ /* Node for range 0x10c__ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x00af, 0x00b0, 0x00b1, 0x00b2, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x118__ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x00b3, 0x00b4, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x1e9__ */ ++ 0x00b5, 0x00b6, 0x00b7, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0004_ */ ++ 0x0000, 0x0001, 0x0009, 0x0011, 0x0019, 0x0021, 0x0029, 0x0031, ++ 0x0039, 0x0041, 0x0049, 0x0051, 0x0059, 0x0061, 0x0069, 0x0071, ++ /* Node for range 0x0005_ */ ++ 0x0079, 0x0081, 0x0089, 0x0091, 0x0099, 0x00a1, 0x00a9, 0x00b1, ++ 0x00b9, 0x00c1, 0x00c9, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x000b_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00d1, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x000c_ */ ++ 0x00d9, 0x00e1, 0x00e9, 0x00f1, 0x00f9, 0x0101, 0x0109, 0x0111, ++ 0x0119, 0x0121, 0x0129, 0x0131, 0x0139, 0x0141, 0x0149, 0x0151, ++ /* Node for range 0x000d_ */ ++ 0x0159, 0x0161, 0x0169, 0x0171, 0x0179, 0x0181, 0x0189, 0x0000, ++ 0x0191, 0x0199, 0x01a1, 0x01a9, 0x01b1, 0x01b9, 0x01c1, 0x01ca, ++ /* Node for range 0x0010_ */ ++ 0x01d9, 0x0000, 0x01e1, 0x0000, 0x01e9, 0x0000, 0x01f1, 0x0000, ++ 0x01f9, 0x0000, 0x0201, 0x0000, 0x0209, 0x0000, 0x0211, 0x0000, ++ /* Node for range 0x0011_ */ ++ 0x0219, 0x0000, 0x0221, 0x0000, 0x0229, 0x0000, 0x0231, 0x0000, ++ 0x0239, 0x0000, 0x0241, 0x0000, 0x0249, 0x0000, 0x0251, 0x0000, ++ /* Node for range 0x0012_ */ ++ 0x0259, 0x0000, 0x0261, 0x0000, 0x0269, 0x0000, 0x0271, 0x0000, ++ 0x0279, 0x0000, 0x0281, 0x0000, 0x0289, 0x0000, 0x0291, 0x0000, ++ /* Node for range 0x0013_ */ ++ 0x029a, 0x0000, 0x02a9, 0x0000, 0x02b1, 0x0000, 0x02b9, 0x0000, ++ 0x0000, 0x02c1, 0x0000, 0x02c9, 0x0000, 0x02d1, 0x0000, 0x02d9, ++ /* Node for range 0x0014_ */ ++ 0x0000, 0x02e1, 0x0000, 0x02e9, 0x0000, 0x02f1, 0x0000, 0x02f9, ++ 0x0000, 0x0302, 0x0311, 0x0000, 0x0319, 0x0000, 0x0321, 0x0000, ++ /* Node for range 0x0015_ */ ++ 0x0329, 0x0000, 0x0331, 0x0000, 0x0339, 0x0000, 0x0341, 0x0000, ++ 0x0349, 0x0000, 0x0351, 0x0000, 0x0359, 0x0000, 0x0361, 0x0000, ++ /* Node for range 0x0016_ */ ++ 0x0369, 0x0000, 0x0371, 0x0000, 0x0379, 0x0000, 0x0381, 0x0000, ++ 0x0389, 0x0000, 0x0391, 0x0000, 0x0399, 0x0000, 0x03a1, 0x0000, ++ /* Node for range 0x0017_ */ ++ 0x03a9, 0x0000, 0x03b1, 0x0000, 0x03b9, 0x0000, 0x03c1, 0x0000, ++ 0x03c9, 0x03d1, 0x0000, 0x03d9, 0x0000, 0x03e1, 0x0000, 0x03e9, ++ /* Node for range 0x0018_ */ ++ 0x0000, 0x03f1, 0x03f9, 0x0000, 0x0401, 0x0000, 0x0409, 0x0411, ++ 0x0000, 0x0419, 0x0421, 0x0429, 0x0000, 0x0000, 0x0431, 0x0439, ++ /* Node for range 0x0019_ */ ++ 0x0441, 0x0449, 0x0000, 0x0451, 0x0459, 0x0000, 0x0461, 0x0469, ++ 0x0471, 0x0000, 0x0000, 0x0000, 0x0479, 0x0481, 0x0000, 0x0489, ++ /* Node for range 0x001a_ */ ++ 0x0491, 0x0000, 0x0499, 0x0000, 0x04a1, 0x0000, 0x04a9, 0x04b1, ++ 0x0000, 0x04b9, 0x0000, 0x0000, 0x04c1, 0x0000, 0x04c9, 0x04d1, ++ /* Node for range 0x001b_ */ ++ 0x0000, 0x04d9, 0x04e1, 0x04e9, 0x0000, 0x04f1, 0x0000, 0x04f9, ++ 0x0501, 0x0000, 0x0000, 0x0000, 0x0509, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x001c_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0511, 0x0519, 0x0000, 0x0521, ++ 0x0529, 0x0000, 0x0531, 0x0539, 0x0000, 0x0541, 0x0000, 0x0549, ++ /* Node for range 0x001d_ */ ++ 0x0000, 0x0551, 0x0000, 0x0559, 0x0000, 0x0561, 0x0000, 0x0569, ++ 0x0000, 0x0571, 0x0000, 0x0579, 0x0000, 0x0000, 0x0581, 0x0000, ++ /* Node for range 0x001e_ */ ++ 0x0589, 0x0000, 0x0591, 0x0000, 0x0599, 0x0000, 0x05a1, 0x0000, ++ 0x05a9, 0x0000, 0x05b1, 0x0000, 0x05b9, 0x0000, 0x05c1, 0x0000, ++ /* Node for range 0x001f_ */ ++ 0x05ca, 0x05d9, 0x05e1, 0x0000, 0x05e9, 0x0000, 0x05f1, 0x05f9, ++ 0x0601, 0x0000, 0x0609, 0x0000, 0x0611, 0x0000, 0x0619, 0x0000, ++ /* Node for range 0x0020_ */ ++ 0x0621, 0x0000, 0x0629, 0x0000, 0x0631, 0x0000, 0x0639, 0x0000, ++ 0x0641, 0x0000, 0x0649, 0x0000, 0x0651, 0x0000, 0x0659, 0x0000, ++ /* Node for range 0x0021_ */ ++ 0x0661, 0x0000, 0x0669, 0x0000, 0x0671, 0x0000, 0x0679, 0x0000, ++ 0x0681, 0x0000, 0x0689, 0x0000, 0x0691, 0x0000, 0x0699, 0x0000, ++ /* Node for range 0x0022_ */ ++ 0x06a1, 0x0000, 0x06a9, 0x0000, 0x06b1, 0x0000, 0x06b9, 0x0000, ++ 0x06c1, 0x0000, 0x06c9, 0x0000, 0x06d1, 0x0000, 0x06d9, 0x0000, ++ /* Node for range 0x0023_ */ ++ 0x06e1, 0x0000, 0x06e9, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x06f1, 0x06f9, 0x0000, 0x0701, 0x0709, 0x0000, ++ /* Node for range 0x0024_ */ ++ 0x0000, 0x0711, 0x0000, 0x0719, 0x0721, 0x0729, 0x0731, 0x0000, ++ 0x0739, 0x0000, 0x0741, 0x0000, 0x0749, 0x0000, 0x0751, 0x0000, ++ /* Node for range 0x0034_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0759, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0037_ */ ++ 0x0761, 0x0000, 0x0769, 0x0000, 0x0000, 0x0000, 0x0771, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0779, ++ /* Node for range 0x0038_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0781, 0x0000, ++ 0x0789, 0x0791, 0x0799, 0x0000, 0x07a1, 0x0000, 0x07a9, 0x07b1, ++ /* Node for range 0x0039_ */ ++ 0x07bb, 0x07d1, 0x07d9, 0x07e1, 0x07e9, 0x07f1, 0x07f9, 0x0801, ++ 0x0809, 0x0811, 0x0819, 0x0821, 0x0829, 0x0831, 0x0839, 0x0841, ++ /* Node for range 0x003a_ */ ++ 0x0849, 0x0851, 0x0000, 0x0859, 0x0861, 0x0869, 0x0871, 0x0879, ++ 0x0881, 0x0889, 0x0891, 0x0899, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x003b_ */ ++ 0x08a3, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x003c_ */ ++ 0x0000, 0x0000, 0x08b9, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x08c1, ++ /* Node for range 0x003d_ */ ++ 0x08c9, 0x08d1, 0x0000, 0x0000, 0x0000, 0x08d9, 0x08e1, 0x0000, ++ 0x08e9, 0x0000, 0x08f1, 0x0000, 0x08f9, 0x0000, 0x0901, 0x0000, ++ /* Node for range 0x003e_ */ ++ 0x0909, 0x0000, 0x0911, 0x0000, 0x0919, 0x0000, 0x0921, 0x0000, ++ 0x0929, 0x0000, 0x0931, 0x0000, 0x0939, 0x0000, 0x0941, 0x0000, ++ /* Node for range 0x003f_ */ ++ 0x0949, 0x0951, 0x0000, 0x0000, 0x0959, 0x0961, 0x0000, 0x0969, ++ 0x0000, 0x0971, 0x0979, 0x0000, 0x0000, 0x0981, 0x0989, 0x0991, ++ /* Node for range 0x0040_ */ ++ 0x0999, 0x09a1, 0x09a9, 0x09b1, 0x09b9, 0x09c1, 0x09c9, 0x09d1, ++ 0x09d9, 0x09e1, 0x09e9, 0x09f1, 0x09f9, 0x0a01, 0x0a09, 0x0a11, ++ /* Node for range 0x0041_ */ ++ 0x0a19, 0x0a21, 0x0a29, 0x0a31, 0x0a39, 0x0a41, 0x0a49, 0x0a51, ++ 0x0a59, 0x0a61, 0x0a69, 0x0a71, 0x0a79, 0x0a81, 0x0a89, 0x0a91, ++ /* Node for range 0x0042_ */ ++ 0x0a99, 0x0aa1, 0x0aa9, 0x0ab1, 0x0ab9, 0x0ac1, 0x0ac9, 0x0ad1, ++ 0x0ad9, 0x0ae1, 0x0ae9, 0x0af1, 0x0af9, 0x0b01, 0x0b09, 0x0b11, ++ /* Node for range 0x0046_ */ ++ 0x0b19, 0x0000, 0x0b21, 0x0000, 0x0b29, 0x0000, 0x0b31, 0x0000, ++ 0x0b39, 0x0000, 0x0b41, 0x0000, 0x0b49, 0x0000, 0x0b51, 0x0000, ++ /* Node for range 0x0047_ */ ++ 0x0b59, 0x0000, 0x0b61, 0x0000, 0x0b69, 0x0000, 0x0b71, 0x0000, ++ 0x0b79, 0x0000, 0x0b81, 0x0000, 0x0b89, 0x0000, 0x0b91, 0x0000, ++ /* Node for range 0x0048_ */ ++ 0x0b99, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0ba1, 0x0000, 0x0ba9, 0x0000, 0x0bb1, 0x0000, ++ /* Node for range 0x0049_ */ ++ 0x0bb9, 0x0000, 0x0bc1, 0x0000, 0x0bc9, 0x0000, 0x0bd1, 0x0000, ++ 0x0bd9, 0x0000, 0x0be1, 0x0000, 0x0be9, 0x0000, 0x0bf1, 0x0000, ++ /* Node for range 0x004a_ */ ++ 0x0bf9, 0x0000, 0x0c01, 0x0000, 0x0c09, 0x0000, 0x0c11, 0x0000, ++ 0x0c19, 0x0000, 0x0c21, 0x0000, 0x0c29, 0x0000, 0x0c31, 0x0000, ++ /* Node for range 0x004b_ */ ++ 0x0c39, 0x0000, 0x0c41, 0x0000, 0x0c49, 0x0000, 0x0c51, 0x0000, ++ 0x0c59, 0x0000, 0x0c61, 0x0000, 0x0c69, 0x0000, 0x0c71, 0x0000, ++ /* Node for range 0x004c_ */ ++ 0x0c79, 0x0c81, 0x0000, 0x0c89, 0x0000, 0x0c91, 0x0000, 0x0c99, ++ 0x0000, 0x0ca1, 0x0000, 0x0ca9, 0x0000, 0x0cb1, 0x0000, 0x0000, ++ /* Node for range 0x004d_ */ ++ 0x0cb9, 0x0000, 0x0cc1, 0x0000, 0x0cc9, 0x0000, 0x0cd1, 0x0000, ++ 0x0cd9, 0x0000, 0x0ce1, 0x0000, 0x0ce9, 0x0000, 0x0cf1, 0x0000, ++ /* Node for range 0x004e_ */ ++ 0x0cf9, 0x0000, 0x0d01, 0x0000, 0x0d09, 0x0000, 0x0d11, 0x0000, ++ 0x0d19, 0x0000, 0x0d21, 0x0000, 0x0d29, 0x0000, 0x0d31, 0x0000, ++ /* Node for range 0x004f_ */ ++ 0x0d39, 0x0000, 0x0d41, 0x0000, 0x0d49, 0x0000, 0x0d51, 0x0000, ++ 0x0d59, 0x0000, 0x0d61, 0x0000, 0x0d69, 0x0000, 0x0d71, 0x0000, ++ /* Node for range 0x0050_ */ ++ 0x0d79, 0x0000, 0x0d81, 0x0000, 0x0d89, 0x0000, 0x0d91, 0x0000, ++ 0x0d99, 0x0000, 0x0da1, 0x0000, 0x0da9, 0x0000, 0x0db1, 0x0000, ++ /* Node for range 0x0051_ */ ++ 0x0db9, 0x0000, 0x0dc1, 0x0000, 0x0dc9, 0x0000, 0x0dd1, 0x0000, ++ 0x0dd9, 0x0000, 0x0de1, 0x0000, 0x0de9, 0x0000, 0x0df1, 0x0000, ++ /* Node for range 0x0052_ */ ++ 0x0df9, 0x0000, 0x0e01, 0x0000, 0x0e09, 0x0000, 0x0e11, 0x0000, ++ 0x0e19, 0x0000, 0x0e21, 0x0000, 0x0e29, 0x0000, 0x0e31, 0x0000, ++ /* Node for range 0x0053_ */ ++ 0x0000, 0x0e39, 0x0e41, 0x0e49, 0x0e51, 0x0e59, 0x0e61, 0x0e69, ++ 0x0e71, 0x0e79, 0x0e81, 0x0e89, 0x0e91, 0x0e99, 0x0ea1, 0x0ea9, ++ /* Node for range 0x0054_ */ ++ 0x0eb1, 0x0eb9, 0x0ec1, 0x0ec9, 0x0ed1, 0x0ed9, 0x0ee1, 0x0ee9, ++ 0x0ef1, 0x0ef9, 0x0f01, 0x0f09, 0x0f11, 0x0f19, 0x0f21, 0x0f29, ++ /* Node for range 0x0055_ */ ++ 0x0f31, 0x0f39, 0x0f41, 0x0f49, 0x0f51, 0x0f59, 0x0f61, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0058_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0f6a, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x010a_ */ ++ 0x0f79, 0x0f81, 0x0f89, 0x0f91, 0x0f99, 0x0fa1, 0x0fa9, 0x0fb1, ++ 0x0fb9, 0x0fc1, 0x0fc9, 0x0fd1, 0x0fd9, 0x0fe1, 0x0fe9, 0x0ff1, ++ /* Node for range 0x010b_ */ ++ 0x0ff9, 0x1001, 0x1009, 0x1011, 0x1019, 0x1021, 0x1029, 0x1031, ++ 0x1039, 0x1041, 0x1049, 0x1051, 0x1059, 0x1061, 0x1069, 0x1071, ++ /* Node for range 0x010c_ */ ++ 0x1079, 0x1081, 0x1089, 0x1091, 0x1099, 0x10a1, 0x0000, 0x10a9, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x10b1, 0x0000, 0x0000, ++ /* Node for range 0x013f_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x10b9, 0x10c1, 0x10c9, 0x10d1, 0x10d9, 0x10e1, 0x0000, 0x0000, ++ /* Node for range 0x01c8_ */ ++ 0x10e9, 0x10f1, 0x10f9, 0x1101, 0x1109, 0x1111, 0x1119, 0x1121, ++ 0x1129, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x01e0_ */ ++ 0x1131, 0x0000, 0x1139, 0x0000, 0x1141, 0x0000, 0x1149, 0x0000, ++ 0x1151, 0x0000, 0x1159, 0x0000, 0x1161, 0x0000, 0x1169, 0x0000, ++ /* Node for range 0x01e1_ */ ++ 0x1171, 0x0000, 0x1179, 0x0000, 0x1181, 0x0000, 0x1189, 0x0000, ++ 0x1191, 0x0000, 0x1199, 0x0000, 0x11a1, 0x0000, 0x11a9, 0x0000, ++ /* Node for range 0x01e2_ */ ++ 0x11b1, 0x0000, 0x11b9, 0x0000, 0x11c1, 0x0000, 0x11c9, 0x0000, ++ 0x11d1, 0x0000, 0x11d9, 0x0000, 0x11e1, 0x0000, 0x11e9, 0x0000, ++ /* Node for range 0x01e3_ */ ++ 0x11f1, 0x0000, 0x11f9, 0x0000, 0x1201, 0x0000, 0x1209, 0x0000, ++ 0x1211, 0x0000, 0x1219, 0x0000, 0x1221, 0x0000, 0x1229, 0x0000, ++ /* Node for range 0x01e4_ */ ++ 0x1231, 0x0000, 0x1239, 0x0000, 0x1241, 0x0000, 0x1249, 0x0000, ++ 0x1251, 0x0000, 0x1259, 0x0000, 0x1261, 0x0000, 0x1269, 0x0000, ++ /* Node for range 0x01e5_ */ ++ 0x1271, 0x0000, 0x1279, 0x0000, 0x1281, 0x0000, 0x1289, 0x0000, ++ 0x1291, 0x0000, 0x1299, 0x0000, 0x12a1, 0x0000, 0x12a9, 0x0000, ++ /* Node for range 0x01e6_ */ ++ 0x12b1, 0x0000, 0x12b9, 0x0000, 0x12c1, 0x0000, 0x12c9, 0x0000, ++ 0x12d1, 0x0000, 0x12d9, 0x0000, 0x12e1, 0x0000, 0x12e9, 0x0000, ++ /* Node for range 0x01e7_ */ ++ 0x12f1, 0x0000, 0x12f9, 0x0000, 0x1301, 0x0000, 0x1309, 0x0000, ++ 0x1311, 0x0000, 0x1319, 0x0000, 0x1321, 0x0000, 0x1329, 0x0000, ++ /* Node for range 0x01e8_ */ ++ 0x1331, 0x0000, 0x1339, 0x0000, 0x1341, 0x0000, 0x1349, 0x0000, ++ 0x1351, 0x0000, 0x1359, 0x0000, 0x1361, 0x0000, 0x1369, 0x0000, ++ /* Node for range 0x01e9_ */ ++ 0x1371, 0x0000, 0x1379, 0x0000, 0x1381, 0x0000, 0x138a, 0x139a, ++ 0x13aa, 0x13ba, 0x13ca, 0x13d9, 0x0000, 0x0000, 0x13e2, 0x0000, ++ /* Node for range 0x01ea_ */ ++ 0x13f1, 0x0000, 0x13f9, 0x0000, 0x1401, 0x0000, 0x1409, 0x0000, ++ 0x1411, 0x0000, 0x1419, 0x0000, 0x1421, 0x0000, 0x1429, 0x0000, ++ /* Node for range 0x01eb_ */ ++ 0x1431, 0x0000, 0x1439, 0x0000, 0x1441, 0x0000, 0x1449, 0x0000, ++ 0x1451, 0x0000, 0x1459, 0x0000, 0x1461, 0x0000, 0x1469, 0x0000, ++ /* Node for range 0x01ec_ */ ++ 0x1471, 0x0000, 0x1479, 0x0000, 0x1481, 0x0000, 0x1489, 0x0000, ++ 0x1491, 0x0000, 0x1499, 0x0000, 0x14a1, 0x0000, 0x14a9, 0x0000, ++ /* Node for range 0x01ed_ */ ++ 0x14b1, 0x0000, 0x14b9, 0x0000, 0x14c1, 0x0000, 0x14c9, 0x0000, ++ 0x14d1, 0x0000, 0x14d9, 0x0000, 0x14e1, 0x0000, 0x14e9, 0x0000, ++ /* Node for range 0x01ee_ */ ++ 0x14f1, 0x0000, 0x14f9, 0x0000, 0x1501, 0x0000, 0x1509, 0x0000, ++ 0x1511, 0x0000, 0x1519, 0x0000, 0x1521, 0x0000, 0x1529, 0x0000, ++ /* Node for range 0x01ef_ */ ++ 0x1531, 0x0000, 0x1539, 0x0000, 0x1541, 0x0000, 0x1549, 0x0000, ++ 0x1551, 0x0000, 0x1559, 0x0000, 0x1561, 0x0000, 0x1569, 0x0000, ++ /* Node for range 0x01f0_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x1571, 0x1579, 0x1581, 0x1589, 0x1591, 0x1599, 0x15a1, 0x15a9, ++ /* Node for range 0x01f1_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x15b1, 0x15b9, 0x15c1, 0x15c9, 0x15d1, 0x15d9, 0x0000, 0x0000, ++ /* Node for range 0x01f2_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x15e1, 0x15e9, 0x15f1, 0x15f9, 0x1601, 0x1609, 0x1611, 0x1619, ++ /* Node for range 0x01f3_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x1621, 0x1629, 0x1631, 0x1639, 0x1641, 0x1649, 0x1651, 0x1659, ++ /* Node for range 0x01f4_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x1661, 0x1669, 0x1671, 0x1679, 0x1681, 0x1689, 0x0000, 0x0000, ++ /* Node for range 0x01f5_ */ ++ 0x1692, 0x0000, 0x16a3, 0x0000, 0x16bb, 0x0000, 0x16d3, 0x0000, ++ 0x0000, 0x16e9, 0x0000, 0x16f1, 0x0000, 0x16f9, 0x0000, 0x1701, ++ /* Node for range 0x01f6_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x1709, 0x1711, 0x1719, 0x1721, 0x1729, 0x1731, 0x1739, 0x1741, ++ /* Node for range 0x01f8_ */ ++ 0x174a, 0x175a, 0x176a, 0x177a, 0x178a, 0x179a, 0x17aa, 0x17ba, ++ 0x17ca, 0x17da, 0x17ea, 0x17fa, 0x180a, 0x181a, 0x182a, 0x183a, ++ /* Node for range 0x01f9_ */ ++ 0x184a, 0x185a, 0x186a, 0x187a, 0x188a, 0x189a, 0x18aa, 0x18ba, ++ 0x18ca, 0x18da, 0x18ea, 0x18fa, 0x190a, 0x191a, 0x192a, 0x193a, ++ /* Node for range 0x01fa_ */ ++ 0x194a, 0x195a, 0x196a, 0x197a, 0x198a, 0x199a, 0x19aa, 0x19ba, ++ 0x19ca, 0x19da, 0x19ea, 0x19fa, 0x1a0a, 0x1a1a, 0x1a2a, 0x1a3a, ++ /* Node for range 0x01fb_ */ ++ 0x0000, 0x0000, 0x1a4a, 0x1a5a, 0x1a6a, 0x0000, 0x1a7a, 0x1a8b, ++ 0x1aa1, 0x1aa9, 0x1ab1, 0x1ab9, 0x1ac2, 0x0000, 0x1ad1, 0x0000, ++ /* Node for range 0x01fc_ */ ++ 0x0000, 0x0000, 0x1ada, 0x1aea, 0x1afa, 0x0000, 0x1b0a, 0x1b1b, ++ 0x1b31, 0x1b39, 0x1b41, 0x1b49, 0x1b52, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x01fd_ */ ++ 0x0000, 0x0000, 0x1b63, 0x1b7b, 0x0000, 0x0000, 0x1b92, 0x1ba3, ++ 0x1bb9, 0x1bc1, 0x1bc9, 0x1bd1, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x01fe_ */ ++ 0x0000, 0x0000, 0x1bdb, 0x1bf3, 0x1c0a, 0x0000, 0x1c1a, 0x1c2b, ++ 0x1c41, 0x1c49, 0x1c51, 0x1c59, 0x1c61, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x01ff_ */ ++ 0x0000, 0x0000, 0x1c6a, 0x1c7a, 0x1c8a, 0x0000, 0x1c9a, 0x1cab, ++ 0x1cc1, 0x1cc9, 0x1cd1, 0x1cd9, 0x1ce2, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0212_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x1cf1, 0x0000, ++ 0x0000, 0x0000, 0x1cf9, 0x1d01, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0213_ */ ++ 0x0000, 0x0000, 0x1d09, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0216_ */ ++ 0x1d11, 0x1d19, 0x1d21, 0x1d29, 0x1d31, 0x1d39, 0x1d41, 0x1d49, ++ 0x1d51, 0x1d59, 0x1d61, 0x1d69, 0x1d71, 0x1d79, 0x1d81, 0x1d89, ++ /* Node for range 0x0218_ */ ++ 0x0000, 0x0000, 0x0000, 0x1d91, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x024b_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x1d99, 0x1da1, ++ 0x1da9, 0x1db1, 0x1db9, 0x1dc1, 0x1dc9, 0x1dd1, 0x1dd9, 0x1de1, ++ /* Node for range 0x024c_ */ ++ 0x1de9, 0x1df1, 0x1df9, 0x1e01, 0x1e09, 0x1e11, 0x1e19, 0x1e21, ++ 0x1e29, 0x1e31, 0x1e39, 0x1e41, 0x1e49, 0x1e51, 0x1e59, 0x1e61, ++ /* Node for range 0x02c0_ */ ++ 0x1e69, 0x1e71, 0x1e79, 0x1e81, 0x1e89, 0x1e91, 0x1e99, 0x1ea1, ++ 0x1ea9, 0x1eb1, 0x1eb9, 0x1ec1, 0x1ec9, 0x1ed1, 0x1ed9, 0x1ee1, ++ /* Node for range 0x02c1_ */ ++ 0x1ee9, 0x1ef1, 0x1ef9, 0x1f01, 0x1f09, 0x1f11, 0x1f19, 0x1f21, ++ 0x1f29, 0x1f31, 0x1f39, 0x1f41, 0x1f49, 0x1f51, 0x1f59, 0x1f61, ++ /* Node for range 0x02c2_ */ ++ 0x1f69, 0x1f71, 0x1f79, 0x1f81, 0x1f89, 0x1f91, 0x1f99, 0x1fa1, ++ 0x1fa9, 0x1fb1, 0x1fb9, 0x1fc1, 0x1fc9, 0x1fd1, 0x1fd9, 0x0000, ++ /* Node for range 0x02c6_ */ ++ 0x1fe1, 0x0000, 0x1fe9, 0x1ff1, 0x1ff9, 0x0000, 0x0000, 0x2001, ++ 0x0000, 0x2009, 0x0000, 0x2011, 0x0000, 0x2019, 0x2021, 0x2029, ++ /* Node for range 0x02c7_ */ ++ 0x2031, 0x0000, 0x2039, 0x0000, 0x0000, 0x2041, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2049, 0x2051, ++ /* Node for range 0x02c8_ */ ++ 0x2059, 0x0000, 0x2061, 0x0000, 0x2069, 0x0000, 0x2071, 0x0000, ++ 0x2079, 0x0000, 0x2081, 0x0000, 0x2089, 0x0000, 0x2091, 0x0000, ++ /* Node for range 0x02c9_ */ ++ 0x2099, 0x0000, 0x20a1, 0x0000, 0x20a9, 0x0000, 0x20b1, 0x0000, ++ 0x20b9, 0x0000, 0x20c1, 0x0000, 0x20c9, 0x0000, 0x20d1, 0x0000, ++ /* Node for range 0x02ca_ */ ++ 0x20d9, 0x0000, 0x20e1, 0x0000, 0x20e9, 0x0000, 0x20f1, 0x0000, ++ 0x20f9, 0x0000, 0x2101, 0x0000, 0x2109, 0x0000, 0x2111, 0x0000, ++ /* Node for range 0x02cb_ */ ++ 0x2119, 0x0000, 0x2121, 0x0000, 0x2129, 0x0000, 0x2131, 0x0000, ++ 0x2139, 0x0000, 0x2141, 0x0000, 0x2149, 0x0000, 0x2151, 0x0000, ++ /* Node for range 0x02cc_ */ ++ 0x2159, 0x0000, 0x2161, 0x0000, 0x2169, 0x0000, 0x2171, 0x0000, ++ 0x2179, 0x0000, 0x2181, 0x0000, 0x2189, 0x0000, 0x2191, 0x0000, ++ /* Node for range 0x02cd_ */ ++ 0x2199, 0x0000, 0x21a1, 0x0000, 0x21a9, 0x0000, 0x21b1, 0x0000, ++ 0x21b9, 0x0000, 0x21c1, 0x0000, 0x21c9, 0x0000, 0x21d1, 0x0000, ++ /* Node for range 0x02ce_ */ ++ 0x21d9, 0x0000, 0x21e1, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x21e9, 0x0000, 0x21f1, 0x0000, 0x0000, ++ /* Node for range 0x02cf_ */ ++ 0x0000, 0x0000, 0x21f9, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0a64_ */ ++ 0x2201, 0x0000, 0x2209, 0x0000, 0x2211, 0x0000, 0x2219, 0x0000, ++ 0x2221, 0x0000, 0x2229, 0x0000, 0x2231, 0x0000, 0x2239, 0x0000, ++ /* Node for range 0x0a65_ */ ++ 0x2241, 0x0000, 0x2249, 0x0000, 0x2251, 0x0000, 0x2259, 0x0000, ++ 0x2261, 0x0000, 0x2269, 0x0000, 0x2271, 0x0000, 0x2279, 0x0000, ++ /* Node for range 0x0a66_ */ ++ 0x2281, 0x0000, 0x2289, 0x0000, 0x2291, 0x0000, 0x2299, 0x0000, ++ 0x22a1, 0x0000, 0x22a9, 0x0000, 0x22b1, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0a68_ */ ++ 0x22b9, 0x0000, 0x22c1, 0x0000, 0x22c9, 0x0000, 0x22d1, 0x0000, ++ 0x22d9, 0x0000, 0x22e1, 0x0000, 0x22e9, 0x0000, 0x22f1, 0x0000, ++ /* Node for range 0x0a69_ */ ++ 0x22f9, 0x0000, 0x2301, 0x0000, 0x2309, 0x0000, 0x2311, 0x0000, ++ 0x2319, 0x0000, 0x2321, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0a72_ */ ++ 0x0000, 0x0000, 0x2329, 0x0000, 0x2331, 0x0000, 0x2339, 0x0000, ++ 0x2341, 0x0000, 0x2349, 0x0000, 0x2351, 0x0000, 0x2359, 0x0000, ++ /* Node for range 0x0a73_ */ ++ 0x0000, 0x0000, 0x2361, 0x0000, 0x2369, 0x0000, 0x2371, 0x0000, ++ 0x2379, 0x0000, 0x2381, 0x0000, 0x2389, 0x0000, 0x2391, 0x0000, ++ /* Node for range 0x0a74_ */ ++ 0x2399, 0x0000, 0x23a1, 0x0000, 0x23a9, 0x0000, 0x23b1, 0x0000, ++ 0x23b9, 0x0000, 0x23c1, 0x0000, 0x23c9, 0x0000, 0x23d1, 0x0000, ++ /* Node for range 0x0a75_ */ ++ 0x23d9, 0x0000, 0x23e1, 0x0000, 0x23e9, 0x0000, 0x23f1, 0x0000, ++ 0x23f9, 0x0000, 0x2401, 0x0000, 0x2409, 0x0000, 0x2411, 0x0000, ++ /* Node for range 0x0a76_ */ ++ 0x2419, 0x0000, 0x2421, 0x0000, 0x2429, 0x0000, 0x2431, 0x0000, ++ 0x2439, 0x0000, 0x2441, 0x0000, 0x2449, 0x0000, 0x2451, 0x0000, ++ /* Node for range 0x0a77_ */ ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x2459, 0x0000, 0x2461, 0x0000, 0x2469, 0x2471, 0x0000, ++ /* Node for range 0x0a78_ */ ++ 0x2479, 0x0000, 0x2481, 0x0000, 0x2489, 0x0000, 0x2491, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x2499, 0x0000, 0x24a1, 0x0000, 0x0000, ++ /* Node for range 0x0a79_ */ ++ 0x24a9, 0x0000, 0x24b1, 0x0000, 0x0000, 0x0000, 0x24b9, 0x0000, ++ 0x24c1, 0x0000, 0x24c9, 0x0000, 0x24d1, 0x0000, 0x24d9, 0x0000, ++ /* Node for range 0x0a7a_ */ ++ 0x24e1, 0x0000, 0x24e9, 0x0000, 0x24f1, 0x0000, 0x24f9, 0x0000, ++ 0x2501, 0x0000, 0x2509, 0x2511, 0x2519, 0x2521, 0x2529, 0x0000, ++ /* Node for range 0x0a7b_ */ ++ 0x2531, 0x2539, 0x2541, 0x2549, 0x2551, 0x0000, 0x2559, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0ab7_ */ ++ 0x2561, 0x2569, 0x2571, 0x2579, 0x2581, 0x2589, 0x2591, 0x2599, ++ 0x25a1, 0x25a9, 0x25b1, 0x25b9, 0x25c1, 0x25c9, 0x25d1, 0x25d9, ++ /* Node for range 0x0ab8_ */ ++ 0x25e1, 0x25e9, 0x25f1, 0x25f9, 0x2601, 0x2609, 0x2611, 0x2619, ++ 0x2621, 0x2629, 0x2631, 0x2639, 0x2641, 0x2649, 0x2651, 0x2659, ++ /* Node for range 0x0ab9_ */ ++ 0x2661, 0x2669, 0x2671, 0x2679, 0x2681, 0x2689, 0x2691, 0x2699, ++ 0x26a1, 0x26a9, 0x26b1, 0x26b9, 0x26c1, 0x26c9, 0x26d1, 0x26d9, ++ /* Node for range 0x0aba_ */ ++ 0x26e1, 0x26e9, 0x26f1, 0x26f9, 0x2701, 0x2709, 0x2711, 0x2719, ++ 0x2721, 0x2729, 0x2731, 0x2739, 0x2741, 0x2749, 0x2751, 0x2759, ++ /* Node for range 0x0abb_ */ ++ 0x2761, 0x2769, 0x2771, 0x2779, 0x2781, 0x2789, 0x2791, 0x2799, ++ 0x27a1, 0x27a9, 0x27b1, 0x27b9, 0x27c1, 0x27c9, 0x27d1, 0x27d9, ++ /* Node for range 0x0fb0_ */ ++ 0x27e2, 0x27f2, 0x2802, 0x2813, 0x282b, 0x2842, 0x2852, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0fb1_ */ ++ 0x0000, 0x0000, 0x0000, 0x2862, 0x2872, 0x2882, 0x2892, 0x28a2, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x0ff2_ */ ++ 0x0000, 0x28b1, 0x28b9, 0x28c1, 0x28c9, 0x28d1, 0x28d9, 0x28e1, ++ 0x28e9, 0x28f1, 0x28f9, 0x2901, 0x2909, 0x2911, 0x2919, 0x2921, ++ /* Node for range 0x0ff3_ */ ++ 0x2929, 0x2931, 0x2939, 0x2941, 0x2949, 0x2951, 0x2959, 0x2961, ++ 0x2969, 0x2971, 0x2979, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x1040_ */ ++ 0x2981, 0x2989, 0x2991, 0x2999, 0x29a1, 0x29a9, 0x29b1, 0x29b9, ++ 0x29c1, 0x29c9, 0x29d1, 0x29d9, 0x29e1, 0x29e9, 0x29f1, 0x29f9, ++ /* Node for range 0x1041_ */ ++ 0x2a01, 0x2a09, 0x2a11, 0x2a19, 0x2a21, 0x2a29, 0x2a31, 0x2a39, ++ 0x2a41, 0x2a49, 0x2a51, 0x2a59, 0x2a61, 0x2a69, 0x2a71, 0x2a79, ++ /* Node for range 0x1042_ */ ++ 0x2a81, 0x2a89, 0x2a91, 0x2a99, 0x2aa1, 0x2aa9, 0x2ab1, 0x2ab9, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x104b_ */ ++ 0x2ac1, 0x2ac9, 0x2ad1, 0x2ad9, 0x2ae1, 0x2ae9, 0x2af1, 0x2af9, ++ 0x2b01, 0x2b09, 0x2b11, 0x2b19, 0x2b21, 0x2b29, 0x2b31, 0x2b39, ++ /* Node for range 0x104c_ */ ++ 0x2b41, 0x2b49, 0x2b51, 0x2b59, 0x2b61, 0x2b69, 0x2b71, 0x2b79, ++ 0x2b81, 0x2b89, 0x2b91, 0x2b99, 0x2ba1, 0x2ba9, 0x2bb1, 0x2bb9, ++ /* Node for range 0x104d_ */ ++ 0x2bc1, 0x2bc9, 0x2bd1, 0x2bd9, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x10c8_ */ ++ 0x2be1, 0x2be9, 0x2bf1, 0x2bf9, 0x2c01, 0x2c09, 0x2c11, 0x2c19, ++ 0x2c21, 0x2c29, 0x2c31, 0x2c39, 0x2c41, 0x2c49, 0x2c51, 0x2c59, ++ /* Node for range 0x10c9_ */ ++ 0x2c61, 0x2c69, 0x2c71, 0x2c79, 0x2c81, 0x2c89, 0x2c91, 0x2c99, ++ 0x2ca1, 0x2ca9, 0x2cb1, 0x2cb9, 0x2cc1, 0x2cc9, 0x2cd1, 0x2cd9, ++ /* Node for range 0x10ca_ */ ++ 0x2ce1, 0x2ce9, 0x2cf1, 0x2cf9, 0x2d01, 0x2d09, 0x2d11, 0x2d19, ++ 0x2d21, 0x2d29, 0x2d31, 0x2d39, 0x2d41, 0x2d49, 0x2d51, 0x2d59, ++ /* Node for range 0x10cb_ */ ++ 0x2d61, 0x2d69, 0x2d71, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ /* Node for range 0x118a_ */ ++ 0x2d79, 0x2d81, 0x2d89, 0x2d91, 0x2d99, 0x2da1, 0x2da9, 0x2db1, ++ 0x2db9, 0x2dc1, 0x2dc9, 0x2dd1, 0x2dd9, 0x2de1, 0x2de9, 0x2df1, ++ /* Node for range 0x118b_ */ ++ 0x2df9, 0x2e01, 0x2e09, 0x2e11, 0x2e19, 0x2e21, 0x2e29, 0x2e31, ++ 0x2e39, 0x2e41, 0x2e49, 0x2e51, 0x2e59, 0x2e61, 0x2e69, 0x2e71, ++ /* Node for range 0x1e90_ */ ++ 0x2e79, 0x2e81, 0x2e89, 0x2e91, 0x2e99, 0x2ea1, 0x2ea9, 0x2eb1, ++ 0x2eb9, 0x2ec1, 0x2ec9, 0x2ed1, 0x2ed9, 0x2ee1, 0x2ee9, 0x2ef1, ++ /* Node for range 0x1e91_ */ ++ 0x2ef9, 0x2f01, 0x2f09, 0x2f11, 0x2f19, 0x2f21, 0x2f29, 0x2f31, ++ 0x2f39, 0x2f41, 0x2f49, 0x2f51, 0x2f59, 0x2f61, 0x2f69, 0x2f71, ++ /* Node for range 0x1e92_ */ ++ 0x2f79, 0x2f81, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, ++}; ++ ++static unicode_t apfs_cf[] = { ++ 0x000061, 0x000062, 0x000063, 0x000064, 0x000065, 0x000066, ++ 0x000067, 0x000068, 0x000069, 0x00006a, 0x00006b, 0x00006c, ++ 0x00006d, 0x00006e, 0x00006f, 0x000070, 0x000071, 0x000072, ++ 0x000073, 0x000074, 0x000075, 0x000076, 0x000077, 0x000078, ++ 0x000079, 0x00007a, 0x0003bc, 0x0000e0, 0x0000e1, 0x0000e2, ++ 0x0000e3, 0x0000e4, 0x0000e5, 0x0000e6, 0x0000e7, 0x0000e8, ++ 0x0000e9, 0x0000ea, 0x0000eb, 0x0000ec, 0x0000ed, 0x0000ee, ++ 0x0000ef, 0x0000f0, 0x0000f1, 0x0000f2, 0x0000f3, 0x0000f4, ++ 0x0000f5, 0x0000f6, 0x0000f8, 0x0000f9, 0x0000fa, 0x0000fb, ++ 0x0000fc, 0x0000fd, 0x0000fe, 0x000073, 0x000073, 0x000101, ++ 0x000103, 0x000105, 0x000107, 0x000109, 0x00010b, 0x00010d, ++ 0x00010f, 0x000111, 0x000113, 0x000115, 0x000117, 0x000119, ++ 0x00011b, 0x00011d, 0x00011f, 0x000121, 0x000123, 0x000125, ++ 0x000127, 0x000129, 0x00012b, 0x00012d, 0x00012f, 0x000069, ++ 0x000307, 0x000133, 0x000135, 0x000137, 0x00013a, 0x00013c, ++ 0x00013e, 0x000140, 0x000142, 0x000144, 0x000146, 0x000148, ++ 0x0002bc, 0x00006e, 0x00014b, 0x00014d, 0x00014f, 0x000151, ++ 0x000153, 0x000155, 0x000157, 0x000159, 0x00015b, 0x00015d, ++ 0x00015f, 0x000161, 0x000163, 0x000165, 0x000167, 0x000169, ++ 0x00016b, 0x00016d, 0x00016f, 0x000171, 0x000173, 0x000175, ++ 0x000177, 0x0000ff, 0x00017a, 0x00017c, 0x00017e, 0x000073, ++ 0x000253, 0x000183, 0x000185, 0x000254, 0x000188, 0x000256, ++ 0x000257, 0x00018c, 0x0001dd, 0x000259, 0x00025b, 0x000192, ++ 0x000260, 0x000263, 0x000269, 0x000268, 0x000199, 0x00026f, ++ 0x000272, 0x000275, 0x0001a1, 0x0001a3, 0x0001a5, 0x000280, ++ 0x0001a8, 0x000283, 0x0001ad, 0x000288, 0x0001b0, 0x00028a, ++ 0x00028b, 0x0001b4, 0x0001b6, 0x000292, 0x0001b9, 0x0001bd, ++ 0x0001c6, 0x0001c6, 0x0001c9, 0x0001c9, 0x0001cc, 0x0001cc, ++ 0x0001ce, 0x0001d0, 0x0001d2, 0x0001d4, 0x0001d6, 0x0001d8, ++ 0x0001da, 0x0001dc, 0x0001df, 0x0001e1, 0x0001e3, 0x0001e5, ++ 0x0001e7, 0x0001e9, 0x0001eb, 0x0001ed, 0x0001ef, 0x00006a, ++ 0x00030c, 0x0001f3, 0x0001f3, 0x0001f5, 0x000195, 0x0001bf, ++ 0x0001f9, 0x0001fb, 0x0001fd, 0x0001ff, 0x000201, 0x000203, ++ 0x000205, 0x000207, 0x000209, 0x00020b, 0x00020d, 0x00020f, ++ 0x000211, 0x000213, 0x000215, 0x000217, 0x000219, 0x00021b, ++ 0x00021d, 0x00021f, 0x00019e, 0x000223, 0x000225, 0x000227, ++ 0x000229, 0x00022b, 0x00022d, 0x00022f, 0x000231, 0x000233, ++ 0x002c65, 0x00023c, 0x00019a, 0x002c66, 0x000242, 0x000180, ++ 0x000289, 0x00028c, 0x000247, 0x000249, 0x00024b, 0x00024d, ++ 0x00024f, 0x0003b9, 0x000371, 0x000373, 0x000377, 0x0003f3, ++ 0x0003ac, 0x0003ad, 0x0003ae, 0x0003af, 0x0003cc, 0x0003cd, ++ 0x0003ce, 0x0003b9, 0x000308, 0x000301, 0x0003b1, 0x0003b2, ++ 0x0003b3, 0x0003b4, 0x0003b5, 0x0003b6, 0x0003b7, 0x0003b8, ++ 0x0003b9, 0x0003ba, 0x0003bb, 0x0003bc, 0x0003bd, 0x0003be, ++ 0x0003bf, 0x0003c0, 0x0003c1, 0x0003c3, 0x0003c4, 0x0003c5, ++ 0x0003c6, 0x0003c7, 0x0003c8, 0x0003c9, 0x0003ca, 0x0003cb, ++ 0x0003c5, 0x000308, 0x000301, 0x0003c3, 0x0003d7, 0x0003b2, ++ 0x0003b8, 0x0003c6, 0x0003c0, 0x0003d9, 0x0003db, 0x0003dd, ++ 0x0003df, 0x0003e1, 0x0003e3, 0x0003e5, 0x0003e7, 0x0003e9, ++ 0x0003eb, 0x0003ed, 0x0003ef, 0x0003ba, 0x0003c1, 0x0003b8, ++ 0x0003b5, 0x0003f8, 0x0003f2, 0x0003fb, 0x00037b, 0x00037c, ++ 0x00037d, 0x000450, 0x000451, 0x000452, 0x000453, 0x000454, ++ 0x000455, 0x000456, 0x000457, 0x000458, 0x000459, 0x00045a, ++ 0x00045b, 0x00045c, 0x00045d, 0x00045e, 0x00045f, 0x000430, ++ 0x000431, 0x000432, 0x000433, 0x000434, 0x000435, 0x000436, ++ 0x000437, 0x000438, 0x000439, 0x00043a, 0x00043b, 0x00043c, ++ 0x00043d, 0x00043e, 0x00043f, 0x000440, 0x000441, 0x000442, ++ 0x000443, 0x000444, 0x000445, 0x000446, 0x000447, 0x000448, ++ 0x000449, 0x00044a, 0x00044b, 0x00044c, 0x00044d, 0x00044e, ++ 0x00044f, 0x000461, 0x000463, 0x000465, 0x000467, 0x000469, ++ 0x00046b, 0x00046d, 0x00046f, 0x000471, 0x000473, 0x000475, ++ 0x000477, 0x000479, 0x00047b, 0x00047d, 0x00047f, 0x000481, ++ 0x00048b, 0x00048d, 0x00048f, 0x000491, 0x000493, 0x000495, ++ 0x000497, 0x000499, 0x00049b, 0x00049d, 0x00049f, 0x0004a1, ++ 0x0004a3, 0x0004a5, 0x0004a7, 0x0004a9, 0x0004ab, 0x0004ad, ++ 0x0004af, 0x0004b1, 0x0004b3, 0x0004b5, 0x0004b7, 0x0004b9, ++ 0x0004bb, 0x0004bd, 0x0004bf, 0x0004cf, 0x0004c2, 0x0004c4, ++ 0x0004c6, 0x0004c8, 0x0004ca, 0x0004cc, 0x0004ce, 0x0004d1, ++ 0x0004d3, 0x0004d5, 0x0004d7, 0x0004d9, 0x0004db, 0x0004dd, ++ 0x0004df, 0x0004e1, 0x0004e3, 0x0004e5, 0x0004e7, 0x0004e9, ++ 0x0004eb, 0x0004ed, 0x0004ef, 0x0004f1, 0x0004f3, 0x0004f5, ++ 0x0004f7, 0x0004f9, 0x0004fb, 0x0004fd, 0x0004ff, 0x000501, ++ 0x000503, 0x000505, 0x000507, 0x000509, 0x00050b, 0x00050d, ++ 0x00050f, 0x000511, 0x000513, 0x000515, 0x000517, 0x000519, ++ 0x00051b, 0x00051d, 0x00051f, 0x000521, 0x000523, 0x000525, ++ 0x000527, 0x000529, 0x00052b, 0x00052d, 0x00052f, 0x000561, ++ 0x000562, 0x000563, 0x000564, 0x000565, 0x000566, 0x000567, ++ 0x000568, 0x000569, 0x00056a, 0x00056b, 0x00056c, 0x00056d, ++ 0x00056e, 0x00056f, 0x000570, 0x000571, 0x000572, 0x000573, ++ 0x000574, 0x000575, 0x000576, 0x000577, 0x000578, 0x000579, ++ 0x00057a, 0x00057b, 0x00057c, 0x00057d, 0x00057e, 0x00057f, ++ 0x000580, 0x000581, 0x000582, 0x000583, 0x000584, 0x000585, ++ 0x000586, 0x000565, 0x000582, 0x002d00, 0x002d01, 0x002d02, ++ 0x002d03, 0x002d04, 0x002d05, 0x002d06, 0x002d07, 0x002d08, ++ 0x002d09, 0x002d0a, 0x002d0b, 0x002d0c, 0x002d0d, 0x002d0e, ++ 0x002d0f, 0x002d10, 0x002d11, 0x002d12, 0x002d13, 0x002d14, ++ 0x002d15, 0x002d16, 0x002d17, 0x002d18, 0x002d19, 0x002d1a, ++ 0x002d1b, 0x002d1c, 0x002d1d, 0x002d1e, 0x002d1f, 0x002d20, ++ 0x002d21, 0x002d22, 0x002d23, 0x002d24, 0x002d25, 0x002d27, ++ 0x002d2d, 0x0013f0, 0x0013f1, 0x0013f2, 0x0013f3, 0x0013f4, ++ 0x0013f5, 0x000432, 0x000434, 0x00043e, 0x000441, 0x000442, ++ 0x000442, 0x00044a, 0x000463, 0x00a64b, 0x001e01, 0x001e03, ++ 0x001e05, 0x001e07, 0x001e09, 0x001e0b, 0x001e0d, 0x001e0f, ++ 0x001e11, 0x001e13, 0x001e15, 0x001e17, 0x001e19, 0x001e1b, ++ 0x001e1d, 0x001e1f, 0x001e21, 0x001e23, 0x001e25, 0x001e27, ++ 0x001e29, 0x001e2b, 0x001e2d, 0x001e2f, 0x001e31, 0x001e33, ++ 0x001e35, 0x001e37, 0x001e39, 0x001e3b, 0x001e3d, 0x001e3f, ++ 0x001e41, 0x001e43, 0x001e45, 0x001e47, 0x001e49, 0x001e4b, ++ 0x001e4d, 0x001e4f, 0x001e51, 0x001e53, 0x001e55, 0x001e57, ++ 0x001e59, 0x001e5b, 0x001e5d, 0x001e5f, 0x001e61, 0x001e63, ++ 0x001e65, 0x001e67, 0x001e69, 0x001e6b, 0x001e6d, 0x001e6f, ++ 0x001e71, 0x001e73, 0x001e75, 0x001e77, 0x001e79, 0x001e7b, ++ 0x001e7d, 0x001e7f, 0x001e81, 0x001e83, 0x001e85, 0x001e87, ++ 0x001e89, 0x001e8b, 0x001e8d, 0x001e8f, 0x001e91, 0x001e93, ++ 0x001e95, 0x000068, 0x000331, 0x000074, 0x000308, 0x000077, ++ 0x00030a, 0x000079, 0x00030a, 0x000061, 0x0002be, 0x001e61, ++ 0x000073, 0x000073, 0x001ea1, 0x001ea3, 0x001ea5, 0x001ea7, ++ 0x001ea9, 0x001eab, 0x001ead, 0x001eaf, 0x001eb1, 0x001eb3, ++ 0x001eb5, 0x001eb7, 0x001eb9, 0x001ebb, 0x001ebd, 0x001ebf, ++ 0x001ec1, 0x001ec3, 0x001ec5, 0x001ec7, 0x001ec9, 0x001ecb, ++ 0x001ecd, 0x001ecf, 0x001ed1, 0x001ed3, 0x001ed5, 0x001ed7, ++ 0x001ed9, 0x001edb, 0x001edd, 0x001edf, 0x001ee1, 0x001ee3, ++ 0x001ee5, 0x001ee7, 0x001ee9, 0x001eeb, 0x001eed, 0x001eef, ++ 0x001ef1, 0x001ef3, 0x001ef5, 0x001ef7, 0x001ef9, 0x001efb, ++ 0x001efd, 0x001eff, 0x001f00, 0x001f01, 0x001f02, 0x001f03, ++ 0x001f04, 0x001f05, 0x001f06, 0x001f07, 0x001f10, 0x001f11, ++ 0x001f12, 0x001f13, 0x001f14, 0x001f15, 0x001f20, 0x001f21, ++ 0x001f22, 0x001f23, 0x001f24, 0x001f25, 0x001f26, 0x001f27, ++ 0x001f30, 0x001f31, 0x001f32, 0x001f33, 0x001f34, 0x001f35, ++ 0x001f36, 0x001f37, 0x001f40, 0x001f41, 0x001f42, 0x001f43, ++ 0x001f44, 0x001f45, 0x0003c5, 0x000313, 0x0003c5, 0x000313, ++ 0x000300, 0x0003c5, 0x000313, 0x000301, 0x0003c5, 0x000313, ++ 0x000342, 0x001f51, 0x001f53, 0x001f55, 0x001f57, 0x001f60, ++ 0x001f61, 0x001f62, 0x001f63, 0x001f64, 0x001f65, 0x001f66, ++ 0x001f67, 0x001f00, 0x0003b9, 0x001f01, 0x0003b9, 0x001f02, ++ 0x0003b9, 0x001f03, 0x0003b9, 0x001f04, 0x0003b9, 0x001f05, ++ 0x0003b9, 0x001f06, 0x0003b9, 0x001f07, 0x0003b9, 0x001f00, ++ 0x0003b9, 0x001f01, 0x0003b9, 0x001f02, 0x0003b9, 0x001f03, ++ 0x0003b9, 0x001f04, 0x0003b9, 0x001f05, 0x0003b9, 0x001f06, ++ 0x0003b9, 0x001f07, 0x0003b9, 0x001f20, 0x0003b9, 0x001f21, ++ 0x0003b9, 0x001f22, 0x0003b9, 0x001f23, 0x0003b9, 0x001f24, ++ 0x0003b9, 0x001f25, 0x0003b9, 0x001f26, 0x0003b9, 0x001f27, ++ 0x0003b9, 0x001f20, 0x0003b9, 0x001f21, 0x0003b9, 0x001f22, ++ 0x0003b9, 0x001f23, 0x0003b9, 0x001f24, 0x0003b9, 0x001f25, ++ 0x0003b9, 0x001f26, 0x0003b9, 0x001f27, 0x0003b9, 0x001f60, ++ 0x0003b9, 0x001f61, 0x0003b9, 0x001f62, 0x0003b9, 0x001f63, ++ 0x0003b9, 0x001f64, 0x0003b9, 0x001f65, 0x0003b9, 0x001f66, ++ 0x0003b9, 0x001f67, 0x0003b9, 0x001f60, 0x0003b9, 0x001f61, ++ 0x0003b9, 0x001f62, 0x0003b9, 0x001f63, 0x0003b9, 0x001f64, ++ 0x0003b9, 0x001f65, 0x0003b9, 0x001f66, 0x0003b9, 0x001f67, ++ 0x0003b9, 0x001f70, 0x0003b9, 0x0003b1, 0x0003b9, 0x0003ac, ++ 0x0003b9, 0x0003b1, 0x000342, 0x0003b1, 0x000342, 0x0003b9, ++ 0x001fb0, 0x001fb1, 0x001f70, 0x001f71, 0x0003b1, 0x0003b9, ++ 0x0003b9, 0x001f74, 0x0003b9, 0x0003b7, 0x0003b9, 0x0003ae, ++ 0x0003b9, 0x0003b7, 0x000342, 0x0003b7, 0x000342, 0x0003b9, ++ 0x001f72, 0x001f73, 0x001f74, 0x001f75, 0x0003b7, 0x0003b9, ++ 0x0003b9, 0x000308, 0x000300, 0x0003b9, 0x000308, 0x000301, ++ 0x0003b9, 0x000342, 0x0003b9, 0x000308, 0x000342, 0x001fd0, ++ 0x001fd1, 0x001f76, 0x001f77, 0x0003c5, 0x000308, 0x000300, ++ 0x0003c5, 0x000308, 0x000301, 0x0003c1, 0x000313, 0x0003c5, ++ 0x000342, 0x0003c5, 0x000308, 0x000342, 0x001fe0, 0x001fe1, ++ 0x001f7a, 0x001f7b, 0x001fe5, 0x001f7c, 0x0003b9, 0x0003c9, ++ 0x0003b9, 0x0003ce, 0x0003b9, 0x0003c9, 0x000342, 0x0003c9, ++ 0x000342, 0x0003b9, 0x001f78, 0x001f79, 0x001f7c, 0x001f7d, ++ 0x0003c9, 0x0003b9, 0x0003c9, 0x00006b, 0x0000e5, 0x00214e, ++ 0x002170, 0x002171, 0x002172, 0x002173, 0x002174, 0x002175, ++ 0x002176, 0x002177, 0x002178, 0x002179, 0x00217a, 0x00217b, ++ 0x00217c, 0x00217d, 0x00217e, 0x00217f, 0x002184, 0x0024d0, ++ 0x0024d1, 0x0024d2, 0x0024d3, 0x0024d4, 0x0024d5, 0x0024d6, ++ 0x0024d7, 0x0024d8, 0x0024d9, 0x0024da, 0x0024db, 0x0024dc, ++ 0x0024dd, 0x0024de, 0x0024df, 0x0024e0, 0x0024e1, 0x0024e2, ++ 0x0024e3, 0x0024e4, 0x0024e5, 0x0024e6, 0x0024e7, 0x0024e8, ++ 0x0024e9, 0x002c30, 0x002c31, 0x002c32, 0x002c33, 0x002c34, ++ 0x002c35, 0x002c36, 0x002c37, 0x002c38, 0x002c39, 0x002c3a, ++ 0x002c3b, 0x002c3c, 0x002c3d, 0x002c3e, 0x002c3f, 0x002c40, ++ 0x002c41, 0x002c42, 0x002c43, 0x002c44, 0x002c45, 0x002c46, ++ 0x002c47, 0x002c48, 0x002c49, 0x002c4a, 0x002c4b, 0x002c4c, ++ 0x002c4d, 0x002c4e, 0x002c4f, 0x002c50, 0x002c51, 0x002c52, ++ 0x002c53, 0x002c54, 0x002c55, 0x002c56, 0x002c57, 0x002c58, ++ 0x002c59, 0x002c5a, 0x002c5b, 0x002c5c, 0x002c5d, 0x002c5e, ++ 0x002c61, 0x00026b, 0x001d7d, 0x00027d, 0x002c68, 0x002c6a, ++ 0x002c6c, 0x000251, 0x000271, 0x000250, 0x000252, 0x002c73, ++ 0x002c76, 0x00023f, 0x000240, 0x002c81, 0x002c83, 0x002c85, ++ 0x002c87, 0x002c89, 0x002c8b, 0x002c8d, 0x002c8f, 0x002c91, ++ 0x002c93, 0x002c95, 0x002c97, 0x002c99, 0x002c9b, 0x002c9d, ++ 0x002c9f, 0x002ca1, 0x002ca3, 0x002ca5, 0x002ca7, 0x002ca9, ++ 0x002cab, 0x002cad, 0x002caf, 0x002cb1, 0x002cb3, 0x002cb5, ++ 0x002cb7, 0x002cb9, 0x002cbb, 0x002cbd, 0x002cbf, 0x002cc1, ++ 0x002cc3, 0x002cc5, 0x002cc7, 0x002cc9, 0x002ccb, 0x002ccd, ++ 0x002ccf, 0x002cd1, 0x002cd3, 0x002cd5, 0x002cd7, 0x002cd9, ++ 0x002cdb, 0x002cdd, 0x002cdf, 0x002ce1, 0x002ce3, 0x002cec, ++ 0x002cee, 0x002cf3, 0x00a641, 0x00a643, 0x00a645, 0x00a647, ++ 0x00a649, 0x00a64b, 0x00a64d, 0x00a64f, 0x00a651, 0x00a653, ++ 0x00a655, 0x00a657, 0x00a659, 0x00a65b, 0x00a65d, 0x00a65f, ++ 0x00a661, 0x00a663, 0x00a665, 0x00a667, 0x00a669, 0x00a66b, ++ 0x00a66d, 0x00a681, 0x00a683, 0x00a685, 0x00a687, 0x00a689, ++ 0x00a68b, 0x00a68d, 0x00a68f, 0x00a691, 0x00a693, 0x00a695, ++ 0x00a697, 0x00a699, 0x00a69b, 0x00a723, 0x00a725, 0x00a727, ++ 0x00a729, 0x00a72b, 0x00a72d, 0x00a72f, 0x00a733, 0x00a735, ++ 0x00a737, 0x00a739, 0x00a73b, 0x00a73d, 0x00a73f, 0x00a741, ++ 0x00a743, 0x00a745, 0x00a747, 0x00a749, 0x00a74b, 0x00a74d, ++ 0x00a74f, 0x00a751, 0x00a753, 0x00a755, 0x00a757, 0x00a759, ++ 0x00a75b, 0x00a75d, 0x00a75f, 0x00a761, 0x00a763, 0x00a765, ++ 0x00a767, 0x00a769, 0x00a76b, 0x00a76d, 0x00a76f, 0x00a77a, ++ 0x00a77c, 0x001d79, 0x00a77f, 0x00a781, 0x00a783, 0x00a785, ++ 0x00a787, 0x00a78c, 0x000265, 0x00a791, 0x00a793, 0x00a797, ++ 0x00a799, 0x00a79b, 0x00a79d, 0x00a79f, 0x00a7a1, 0x00a7a3, ++ 0x00a7a5, 0x00a7a7, 0x00a7a9, 0x000266, 0x00025c, 0x000261, ++ 0x00026c, 0x00026a, 0x00029e, 0x000287, 0x00029d, 0x00ab53, ++ 0x00a7b5, 0x00a7b7, 0x0013a0, 0x0013a1, 0x0013a2, 0x0013a3, ++ 0x0013a4, 0x0013a5, 0x0013a6, 0x0013a7, 0x0013a8, 0x0013a9, ++ 0x0013aa, 0x0013ab, 0x0013ac, 0x0013ad, 0x0013ae, 0x0013af, ++ 0x0013b0, 0x0013b1, 0x0013b2, 0x0013b3, 0x0013b4, 0x0013b5, ++ 0x0013b6, 0x0013b7, 0x0013b8, 0x0013b9, 0x0013ba, 0x0013bb, ++ 0x0013bc, 0x0013bd, 0x0013be, 0x0013bf, 0x0013c0, 0x0013c1, ++ 0x0013c2, 0x0013c3, 0x0013c4, 0x0013c5, 0x0013c6, 0x0013c7, ++ 0x0013c8, 0x0013c9, 0x0013ca, 0x0013cb, 0x0013cc, 0x0013cd, ++ 0x0013ce, 0x0013cf, 0x0013d0, 0x0013d1, 0x0013d2, 0x0013d3, ++ 0x0013d4, 0x0013d5, 0x0013d6, 0x0013d7, 0x0013d8, 0x0013d9, ++ 0x0013da, 0x0013db, 0x0013dc, 0x0013dd, 0x0013de, 0x0013df, ++ 0x0013e0, 0x0013e1, 0x0013e2, 0x0013e3, 0x0013e4, 0x0013e5, ++ 0x0013e6, 0x0013e7, 0x0013e8, 0x0013e9, 0x0013ea, 0x0013eb, ++ 0x0013ec, 0x0013ed, 0x0013ee, 0x0013ef, 0x000066, 0x000066, ++ 0x000066, 0x000069, 0x000066, 0x00006c, 0x000066, 0x000066, ++ 0x000069, 0x000066, 0x000066, 0x00006c, 0x000073, 0x000074, ++ 0x000073, 0x000074, 0x000574, 0x000576, 0x000574, 0x000565, ++ 0x000574, 0x00056b, 0x00057e, 0x000576, 0x000574, 0x00056d, ++ 0x00ff41, 0x00ff42, 0x00ff43, 0x00ff44, 0x00ff45, 0x00ff46, ++ 0x00ff47, 0x00ff48, 0x00ff49, 0x00ff4a, 0x00ff4b, 0x00ff4c, ++ 0x00ff4d, 0x00ff4e, 0x00ff4f, 0x00ff50, 0x00ff51, 0x00ff52, ++ 0x00ff53, 0x00ff54, 0x00ff55, 0x00ff56, 0x00ff57, 0x00ff58, ++ 0x00ff59, 0x00ff5a, 0x010428, 0x010429, 0x01042a, 0x01042b, ++ 0x01042c, 0x01042d, 0x01042e, 0x01042f, 0x010430, 0x010431, ++ 0x010432, 0x010433, 0x010434, 0x010435, 0x010436, 0x010437, ++ 0x010438, 0x010439, 0x01043a, 0x01043b, 0x01043c, 0x01043d, ++ 0x01043e, 0x01043f, 0x010440, 0x010441, 0x010442, 0x010443, ++ 0x010444, 0x010445, 0x010446, 0x010447, 0x010448, 0x010449, ++ 0x01044a, 0x01044b, 0x01044c, 0x01044d, 0x01044e, 0x01044f, ++ 0x0104d8, 0x0104d9, 0x0104da, 0x0104db, 0x0104dc, 0x0104dd, ++ 0x0104de, 0x0104df, 0x0104e0, 0x0104e1, 0x0104e2, 0x0104e3, ++ 0x0104e4, 0x0104e5, 0x0104e6, 0x0104e7, 0x0104e8, 0x0104e9, ++ 0x0104ea, 0x0104eb, 0x0104ec, 0x0104ed, 0x0104ee, 0x0104ef, ++ 0x0104f0, 0x0104f1, 0x0104f2, 0x0104f3, 0x0104f4, 0x0104f5, ++ 0x0104f6, 0x0104f7, 0x0104f8, 0x0104f9, 0x0104fa, 0x0104fb, ++ 0x010cc0, 0x010cc1, 0x010cc2, 0x010cc3, 0x010cc4, 0x010cc5, ++ 0x010cc6, 0x010cc7, 0x010cc8, 0x010cc9, 0x010cca, 0x010ccb, ++ 0x010ccc, 0x010ccd, 0x010cce, 0x010ccf, 0x010cd0, 0x010cd1, ++ 0x010cd2, 0x010cd3, 0x010cd4, 0x010cd5, 0x010cd6, 0x010cd7, ++ 0x010cd8, 0x010cd9, 0x010cda, 0x010cdb, 0x010cdc, 0x010cdd, ++ 0x010cde, 0x010cdf, 0x010ce0, 0x010ce1, 0x010ce2, 0x010ce3, ++ 0x010ce4, 0x010ce5, 0x010ce6, 0x010ce7, 0x010ce8, 0x010ce9, ++ 0x010cea, 0x010ceb, 0x010cec, 0x010ced, 0x010cee, 0x010cef, ++ 0x010cf0, 0x010cf1, 0x010cf2, 0x0118c0, 0x0118c1, 0x0118c2, ++ 0x0118c3, 0x0118c4, 0x0118c5, 0x0118c6, 0x0118c7, 0x0118c8, ++ 0x0118c9, 0x0118ca, 0x0118cb, 0x0118cc, 0x0118cd, 0x0118ce, ++ 0x0118cf, 0x0118d0, 0x0118d1, 0x0118d2, 0x0118d3, 0x0118d4, ++ 0x0118d5, 0x0118d6, 0x0118d7, 0x0118d8, 0x0118d9, 0x0118da, ++ 0x0118db, 0x0118dc, 0x0118dd, 0x0118de, 0x0118df, 0x01e922, ++ 0x01e923, 0x01e924, 0x01e925, 0x01e926, 0x01e927, 0x01e928, ++ 0x01e929, 0x01e92a, 0x01e92b, 0x01e92c, 0x01e92d, 0x01e92e, ++ 0x01e92f, 0x01e930, 0x01e931, 0x01e932, 0x01e933, 0x01e934, ++ 0x01e935, 0x01e936, 0x01e937, 0x01e938, 0x01e939, 0x01e93a, ++ 0x01e93b, 0x01e93c, 0x01e93d, 0x01e93e, 0x01e93f, 0x01e940, ++ 0x01e941, 0x01e942, 0x01e943, ++}; ++ ++static u8 apfs_ccc_trie[] = { ++ /* Node for range 0x_____ */ ++ 0x01, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x0____ */ ++ 0x03, 0x04, 0x05, 0x06, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x08, ++ /* Node for range 0x1____ */ ++ 0x09, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, ++ 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0d, 0x0e, 0x00, ++ /* Node for range 0x00___ */ ++ 0x00, 0x00, 0x00, 0x0f, 0x10, 0x11, 0x12, 0x13, ++ 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, ++ /* Node for range 0x01___ */ ++ 0x1c, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x1e, ++ 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x00, 0x00, ++ /* Node for range 0x02___ */ ++ 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x26, 0x27, 0x00, 0x00, ++ /* Node for range 0x03___ */ ++ 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x0a___ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0x00, ++ 0x2a, 0x2b, 0x2c, 0x2d, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x0f___ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x2f, 0x00, ++ /* Node for range 0x10___ */ ++ 0x00, 0x30, 0x31, 0x32, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x11___ */ ++ 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, ++ 0x00, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, ++ /* Node for range 0x16___ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x3d, 0x3e, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x1b___ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, ++ /* Node for range 0x1d___ */ ++ 0x00, 0x40, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x1e___ */ ++ 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x43, 0x44, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x003__ */ ++ 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x004__ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x4c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x005__ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x4d, 0x4e, 0x4f, 0x50, 0x00, 0x00, 0x00, ++ /* Node for range 0x006__ */ ++ 0x00, 0x51, 0x00, 0x00, 0x52, 0x53, 0x00, 0x54, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0x56, 0x00, ++ /* Node for range 0x007__ */ ++ 0x00, 0x57, 0x00, 0x58, 0x59, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5a, 0x5b, ++ /* Node for range 0x008__ */ ++ 0x00, 0x5c, 0x5d, 0x00, 0x00, 0x5e, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x5f, 0x60, 0x61, ++ /* Node for range 0x009__ */ ++ 0x00, 0x00, 0x00, 0x62, 0x63, 0x64, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x65, 0x66, 0x00, 0x00, 0x00, ++ /* Node for range 0x00a__ */ ++ 0x00, 0x00, 0x00, 0x67, 0x68, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x69, 0x6a, 0x00, 0x00, 0x00, ++ /* Node for range 0x00b__ */ ++ 0x00, 0x00, 0x00, 0x6b, 0x6c, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x6d, 0x00, 0x00, 0x00, ++ /* Node for range 0x00c__ */ ++ 0x00, 0x00, 0x00, 0x00, 0x6e, 0x6f, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x70, 0x71, 0x00, 0x00, 0x00, ++ /* Node for range 0x00d__ */ ++ 0x00, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x73, 0x00, 0x00, 0x00, ++ /* Node for range 0x00e__ */ ++ 0x00, 0x00, 0x00, 0x74, 0x75, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x76, 0x77, 0x00, 0x00, 0x00, ++ /* Node for range 0x00f__ */ ++ 0x00, 0x78, 0x00, 0x79, 0x00, 0x00, 0x00, 0x7a, ++ 0x7b, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, 0x00, ++ /* Node for range 0x010__ */ ++ 0x00, 0x00, 0x00, 0x7d, 0x00, 0x00, 0x00, 0x00, ++ 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x013__ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x017__ */ ++ 0x00, 0x80, 0x00, 0x81, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x82, 0x00, 0x00, ++ /* Node for range 0x018__ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x83, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x019__ */ ++ 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x01a__ */ ++ 0x00, 0x85, 0x00, 0x00, 0x00, 0x00, 0x86, 0x87, ++ 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x01b__ */ ++ 0x00, 0x00, 0x00, 0x89, 0x8a, 0x00, 0x8b, 0x8c, ++ 0x00, 0x00, 0x8d, 0x00, 0x00, 0x00, 0x8e, 0x8f, ++ /* Node for range 0x01c__ */ ++ 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x91, 0x92, 0x93, ++ /* Node for range 0x01d__ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x94, 0x95, 0x96, 0x97, ++ /* Node for range 0x020__ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x98, 0x99, 0x9a, ++ /* Node for range 0x02c__ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9b, 0x9c, ++ /* Node for range 0x02d__ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9d, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9e, 0x9f, ++ /* Node for range 0x030__ */ ++ 0x00, 0x00, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0xa1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x0a6__ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa2, 0xa3, ++ 0x00, 0xa4, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa5, ++ /* Node for range 0x0a8__ */ ++ 0xa6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0xa7, 0x00, 0xa8, 0xa9, ++ /* Node for range 0x0a9__ */ ++ 0x00, 0x00, 0xaa, 0x00, 0x00, 0xab, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0xac, 0xad, 0x00, 0x00, 0x00, ++ /* Node for range 0x0aa__ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0xae, 0xaf, 0x00, 0x00, 0xb0, ++ /* Node for range 0x0ab__ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb1, 0x00, ++ /* Node for range 0x0fb__ */ ++ 0x00, 0xb2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x0fe__ */ ++ 0x00, 0x00, 0xb3, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x101__ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb4, ++ /* Node for range 0x102__ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb5, 0x00, ++ /* Node for range 0x103__ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb6, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x10a__ */ ++ 0xb7, 0x00, 0x00, 0xb8, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb9, 0x00, ++ /* Node for range 0x110__ */ ++ 0x00, 0x00, 0x00, 0x00, 0xba, 0x00, 0x00, 0xbb, ++ 0x00, 0x00, 0x00, 0xbc, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x111__ */ ++ 0xbd, 0x00, 0x00, 0xbe, 0x00, 0x00, 0x00, 0xbf, ++ 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, ++ /* Node for range 0x112__ */ ++ 0x00, 0x00, 0x00, 0xc1, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, 0x00, ++ /* Node for range 0x113__ */ ++ 0x00, 0x00, 0x00, 0xc3, 0xc4, 0x00, 0xc5, 0xc6, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x114__ */ ++ 0x00, 0x00, 0x00, 0x00, 0xc7, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0xc8, 0x00, 0x00, 0x00, ++ /* Node for range 0x115__ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0xc9, 0xca, 0x00, 0x00, 0x00, ++ /* Node for range 0x116__ */ ++ 0x00, 0x00, 0x00, 0xcb, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0xcc, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x117__ */ ++ 0x00, 0x00, 0xcd, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x11c__ */ ++ 0x00, 0x00, 0x00, 0xce, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x16a__ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xcf, ++ /* Node for range 0x16b__ */ ++ 0x00, 0x00, 0x00, 0xd0, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x1bc__ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0xd1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x1d1__ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd2, 0xd3, ++ 0xd4, 0x00, 0xd5, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x1d2__ */ ++ 0x00, 0x00, 0x00, 0x00, 0xd6, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x1e0__ */ ++ 0xd7, 0xd8, 0xd9, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x1e8__ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0xda, 0x00, 0x00, ++ /* Node for range 0x1e9__ */ ++ 0x00, 0x00, 0x00, 0x00, 0xdb, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x0030_ */ ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, ++ /* Node for range 0x0031_ */ ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe8, 0xdc, 0xdc, ++ 0xdc, 0xdc, 0xe8, 0xd8, 0xdc, 0xdc, 0xdc, 0xdc, ++ /* Node for range 0x0032_ */ ++ 0xdc, 0xca, 0xca, 0xdc, 0xdc, 0xdc, 0xdc, 0xca, ++ 0xca, 0xdc, 0xdc, 0xdc, 0xdc, 0xdc, 0xdc, 0xdc, ++ /* Node for range 0x0033_ */ ++ 0xdc, 0xdc, 0xdc, 0xdc, 0x01, 0x01, 0x01, 0x01, ++ 0x01, 0xdc, 0xdc, 0xdc, 0xdc, 0xe6, 0xe6, 0xe6, ++ /* Node for range 0x0034_ */ ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xf0, 0xe6, 0xdc, ++ 0xdc, 0xdc, 0xe6, 0xe6, 0xe6, 0xdc, 0xdc, 0x00, ++ /* Node for range 0x0035_ */ ++ 0xe6, 0xe6, 0xe6, 0xdc, 0xdc, 0xdc, 0xdc, 0xe6, ++ 0xe8, 0xdc, 0xdc, 0xe6, 0xe9, 0xea, 0xea, 0xe9, ++ /* Node for range 0x0036_ */ ++ 0xea, 0xea, 0xe9, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, ++ /* Node for range 0x0048_ */ ++ 0x00, 0x00, 0x00, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x0059_ */ ++ 0x00, 0xdc, 0xe6, 0xe6, 0xe6, 0xe6, 0xdc, 0xe6, ++ 0xe6, 0xe6, 0xde, 0xdc, 0xe6, 0xe6, 0xe6, 0xe6, ++ /* Node for range 0x005a_ */ ++ 0xe6, 0xe6, 0xdc, 0xdc, 0xdc, 0xdc, 0xdc, 0xdc, ++ 0xe6, 0xe6, 0xdc, 0xe6, 0xe6, 0xde, 0xe4, 0xe6, ++ /* Node for range 0x005b_ */ ++ 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, ++ 0x12, 0x13, 0x13, 0x14, 0x15, 0x16, 0x00, 0x17, ++ /* Node for range 0x005c_ */ ++ 0x00, 0x18, 0x19, 0x00, 0xe6, 0xdc, 0x00, 0x12, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x0061_ */ ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, ++ 0x1e, 0x1f, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x0064_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, ++ /* Node for range 0x0065_ */ ++ 0x20, 0x21, 0x22, 0xe6, 0xe6, 0xdc, 0xdc, 0xe6, ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xdc, 0xe6, 0xe6, 0xdc, ++ /* Node for range 0x0067_ */ ++ 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x006d_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xe6, ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0x00, 0x00, 0xe6, ++ /* Node for range 0x006e_ */ ++ 0xe6, 0xe6, 0xe6, 0xdc, 0xe6, 0x00, 0x00, 0xe6, ++ 0xe6, 0x00, 0xdc, 0xe6, 0xe6, 0xdc, 0x00, 0x00, ++ /* Node for range 0x0071_ */ ++ 0x00, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x0073_ */ ++ 0xe6, 0xdc, 0xe6, 0xe6, 0xdc, 0xe6, 0xe6, 0xdc, ++ 0xdc, 0xdc, 0xe6, 0xdc, 0xdc, 0xe6, 0xdc, 0xe6, ++ /* Node for range 0x0074_ */ ++ 0xe6, 0xe6, 0xdc, 0xe6, 0xdc, 0xe6, 0xdc, 0xe6, ++ 0xdc, 0xe6, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x007e_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, ++ /* Node for range 0x007f_ */ ++ 0xe6, 0xe6, 0xdc, 0xe6, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x0081_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xe6, ++ 0xe6, 0xe6, 0x00, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, ++ /* Node for range 0x0082_ */ ++ 0xe6, 0xe6, 0xe6, 0xe6, 0x00, 0xe6, 0xe6, 0xe6, ++ 0x00, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0x00, 0x00, ++ /* Node for range 0x0085_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0xdc, 0xdc, 0xdc, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x008d_ */ ++ 0x00, 0x00, 0x00, 0x00, 0xe6, 0xe6, 0xe6, 0xe6, ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, ++ /* Node for range 0x008e_ */ ++ 0xe6, 0xe6, 0x00, 0xdc, 0xe6, 0xe6, 0xdc, 0xe6, ++ 0xe6, 0xdc, 0xe6, 0xe6, 0xe6, 0xdc, 0xdc, 0xdc, ++ /* Node for range 0x008f_ */ ++ 0x1b, 0x1c, 0x1d, 0xe6, 0xe6, 0xe6, 0xdc, 0xe6, ++ 0xe6, 0xdc, 0xdc, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, ++ /* Node for range 0x0093_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, ++ /* Node for range 0x0094_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, ++ /* Node for range 0x0095_ */ ++ 0x00, 0xe6, 0xdc, 0xe6, 0xe6, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x009b_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, ++ /* Node for range 0x009c_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, ++ /* Node for range 0x00a3_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, ++ /* Node for range 0x00a4_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, ++ /* Node for range 0x00ab_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, ++ /* Node for range 0x00ac_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, ++ /* Node for range 0x00b3_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, ++ /* Node for range 0x00b4_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, ++ /* Node for range 0x00bc_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, ++ /* Node for range 0x00c4_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, ++ /* Node for range 0x00c5_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0x5b, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x00cb_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, ++ /* Node for range 0x00cc_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, ++ /* Node for range 0x00d4_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, ++ /* Node for range 0x00dc_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x00e3_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x67, 0x67, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x00e4_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x6b, 0x6b, 0x6b, 0x6b, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x00eb_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x76, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x00ec_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x7a, 0x7a, 0x7a, 0x7a, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x00f1_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0xdc, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x00f3_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x00, 0xdc, ++ 0x00, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x00f7_ */ ++ 0x00, 0x81, 0x82, 0x00, 0x84, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x82, 0x82, 0x82, 0x82, 0x00, 0x00, ++ /* Node for range 0x00f8_ */ ++ 0x82, 0x00, 0xe6, 0xe6, 0x09, 0x00, 0xe6, 0xe6, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x00fc_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x0103_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, ++ 0x00, 0x09, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x0108_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x00, 0x00, ++ /* Node for range 0x0135_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xe6, 0xe6, ++ /* Node for range 0x0171_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x0173_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x017d_ */ ++ 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0x00, 0x00, ++ /* Node for range 0x018a_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0xe4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x0193_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0xde, 0xe6, 0xdc, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x01a1_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, ++ 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x01a6_ */ ++ 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x01a7_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xe6, 0xe6, ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0x00, 0x00, 0xdc, ++ /* Node for range 0x01ab_ */ ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xdc, 0xdc, 0xdc, ++ 0xdc, 0xdc, 0xdc, 0xe6, 0xe6, 0xdc, 0x00, 0x00, ++ /* Node for range 0x01b3_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x01b4_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x01b6_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0xe6, 0xdc, 0xe6, 0xe6, 0xe6, ++ /* Node for range 0x01b7_ */ ++ 0xe6, 0xe6, 0xe6, 0xe6, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x01ba_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x09, 0x09, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x01be_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x01bf_ */ ++ 0x00, 0x00, 0x09, 0x09, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x01c3_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x01cd_ */ ++ 0xe6, 0xe6, 0xe6, 0x00, 0x01, 0xdc, 0xdc, 0xdc, ++ 0xdc, 0xdc, 0xe6, 0xe6, 0xdc, 0xdc, 0xdc, 0xdc, ++ /* Node for range 0x01ce_ */ ++ 0xe6, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, ++ 0x01, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x00, 0x00, ++ /* Node for range 0x01cf_ */ ++ 0x00, 0x00, 0x00, 0x00, 0xe6, 0x00, 0x00, 0x00, ++ 0xe6, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x01dc_ */ ++ 0xe6, 0xe6, 0xdc, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, ++ 0xe6, 0xe6, 0xdc, 0xe6, 0xe6, 0xea, 0xd6, 0xdc, ++ /* Node for range 0x01dd_ */ ++ 0xca, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, ++ /* Node for range 0x01de_ */ ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, ++ /* Node for range 0x01df_ */ ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0xe6, 0xe9, 0xdc, 0xe6, 0xdc, ++ /* Node for range 0x020d_ */ ++ 0xe6, 0xe6, 0x01, 0x01, 0xe6, 0xe6, 0xe6, 0xe6, ++ 0x01, 0x01, 0x01, 0xe6, 0xe6, 0x00, 0x00, 0x00, ++ /* Node for range 0x020e_ */ ++ 0x00, 0xe6, 0x00, 0x00, 0x00, 0x01, 0x01, 0xe6, ++ 0xdc, 0xe6, 0x01, 0x01, 0xdc, 0xdc, 0xdc, 0xdc, ++ /* Node for range 0x020f_ */ ++ 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x02ce_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, ++ /* Node for range 0x02cf_ */ ++ 0xe6, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x02d7_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, ++ /* Node for range 0x02de_ */ ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, ++ /* Node for range 0x02df_ */ ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, ++ /* Node for range 0x0302_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0xda, 0xe4, 0xe8, 0xde, 0xe0, 0xe0, ++ /* Node for range 0x0309_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x08, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x0a66_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, ++ /* Node for range 0x0a67_ */ ++ 0x00, 0x00, 0x00, 0x00, 0xe6, 0xe6, 0xe6, 0xe6, ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0x00, 0x00, ++ /* Node for range 0x0a69_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xe6, ++ /* Node for range 0x0a6f_ */ ++ 0xe6, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x0a80_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x0a8c_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x0a8e_ */ ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, ++ /* Node for range 0x0a8f_ */ ++ 0xe6, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x0a92_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0xdc, 0xdc, 0xdc, 0x00, 0x00, ++ /* Node for range 0x0a95_ */ ++ 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x0a9b_ */ ++ 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x0a9c_ */ ++ 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x0aab_ */ ++ 0xe6, 0x00, 0xe6, 0xe6, 0xdc, 0x00, 0x00, 0xe6, ++ 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xe6, ++ /* Node for range 0x0aac_ */ ++ 0x00, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x0aaf_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x0abe_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, ++ /* Node for range 0x0fb1_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1a, 0x00, ++ /* Node for range 0x0fe2_ */ ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xdc, ++ 0xdc, 0xdc, 0xdc, 0xdc, 0xdc, 0xdc, 0xe6, 0xe6, ++ /* Node for range 0x101f_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x00, 0x00, ++ /* Node for range 0x102e_ */ ++ 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x1037_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xe6, ++ 0xe6, 0xe6, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x10a0_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x00, 0xe6, ++ /* Node for range 0x10a3_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0xe6, 0x01, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x09, ++ /* Node for range 0x10ae_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xdc, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x1104_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x1107_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, ++ /* Node for range 0x110b_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x09, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x1110_ */ ++ 0xe6, 0xe6, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x1113_ */ ++ 0x00, 0x00, 0x00, 0x09, 0x09, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x1117_ */ ++ 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x111c_ */ ++ 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x1123_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x07, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x112e_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x07, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x1133_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, ++ /* Node for range 0x1134_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, ++ /* Node for range 0x1136_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xe6, ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0x00, 0x00, 0x00, ++ /* Node for range 0x1137_ */ ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x1144_ */ ++ 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x07, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x114c_ */ ++ 0x00, 0x00, 0x09, 0x07, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x115b_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, ++ /* Node for range 0x115c_ */ ++ 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x1163_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, ++ /* Node for range 0x116b_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x07, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x1172_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x11c3_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, ++ /* Node for range 0x16af_ */ ++ 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x16b3_ */ ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x1bc9_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, ++ /* Node for range 0x1d16_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0xd8, 0xd8, 0x01, ++ 0x01, 0x01, 0x00, 0x00, 0x00, 0xe2, 0xd8, 0xd8, ++ /* Node for range 0x1d17_ */ ++ 0xd8, 0xd8, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0xdc, 0xdc, 0xdc, 0xdc, 0xdc, ++ /* Node for range 0x1d18_ */ ++ 0xdc, 0xdc, 0xdc, 0x00, 0x00, 0xe6, 0xe6, 0xe6, ++ 0xe6, 0xe6, 0xdc, 0xdc, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x1d1a_ */ ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0xe6, 0xe6, 0xe6, 0xe6, 0x00, 0x00, ++ /* Node for range 0x1d24_ */ ++ 0x00, 0x00, 0xe6, 0xe6, 0xe6, 0x00, 0x00, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x1e00_ */ ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0x00, ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, ++ /* Node for range 0x1e01_ */ ++ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, ++ 0xe6, 0x00, 0x00, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, ++ /* Node for range 0x1e02_ */ ++ 0xe6, 0xe6, 0x00, 0xe6, 0xe6, 0x00, 0xe6, 0xe6, ++ 0xe6, 0xe6, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x1e8d_ */ ++ 0xdc, 0xdc, 0xdc, 0xdc, 0xdc, 0xdc, 0xdc, 0x00, ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ++ /* Node for range 0x1e94_ */ ++ 0x00, 0x00, 0x00, 0x00, 0xe6, 0xe6, 0xe6, 0xe6, ++ 0xe6, 0xe6, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, ++}; +diff --git a/fs/apfs/unicode.h b/fs/apfs/unicode.h +new file mode 100644 +index 000000000..e3b7edc51 +--- /dev/null ++++ b/fs/apfs/unicode.h +@@ -0,0 +1,27 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* ++ * Copyright (C) 2018 Ernesto A. Fernández ++ */ ++ ++#ifndef _APFS_UNICODE_H ++#define _APFS_UNICODE_H ++ ++#include ++ ++/* ++ * This structure helps apfs_normalize_next() to retrieve one normalized ++ * (and case-folded) UTF-32 character at a time from a UTF-8 string. ++ */ ++struct apfs_unicursor { ++ const char *utf8curr; /* Start of UTF-8 to decompose and reorder */ ++ unsigned int total_len; /* Length of the whole UTF-8 string */ ++ int length; /* Length of normalization until next starter */ ++ int last_pos; /* Offset in substring of last char returned */ ++ u8 last_ccc; /* CCC of the last character returned */ ++}; ++ ++extern void apfs_init_unicursor(struct apfs_unicursor *cursor, const char *utf8str, unsigned int total_len); ++extern unicode_t apfs_normalize_next(struct apfs_unicursor *cursor, ++ bool case_fold); ++ ++#endif /* _APFS_UNICODE_H */ +diff --git a/fs/apfs/xattr.c b/fs/apfs/xattr.c +new file mode 100644 +index 000000000..0de1db140 +--- /dev/null ++++ b/fs/apfs/xattr.c +@@ -0,0 +1,922 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (C) 2018 Ernesto A. Fernández ++ */ ++ ++#include ++#include ++#include ++#include "apfs.h" ++ ++/** ++ * apfs_xattr_from_query - Read the xattr record found by a successful query ++ * @query: the query that found the record ++ * @xattr: Return parameter. The xattr record found. ++ * ++ * Reads the xattr record into @xattr and performs some basic sanity checks ++ * as a protection against crafted filesystems. Returns 0 on success or ++ * -EFSCORRUPTED otherwise. ++ * ++ * The caller must not free @query while @xattr is in use, because @xattr->name ++ * and @xattr->xdata point to data on disk. ++ */ ++static int apfs_xattr_from_query(struct apfs_query *query, ++ struct apfs_xattr *xattr) ++{ ++ struct super_block *sb = query->node->object.sb; ++ struct apfs_xattr_val *xattr_val; ++ struct apfs_xattr_key *xattr_key; ++ char *raw = query->node->object.data; ++ int datalen = query->len - sizeof(*xattr_val); ++ int namelen = query->key_len - sizeof(*xattr_key); ++ ++ if (namelen < 1 || datalen < 0) { ++ apfs_err(sb, "bad length of name (%d) or data (%d)", namelen, datalen); ++ return -EFSCORRUPTED; ++ } ++ ++ xattr_val = (struct apfs_xattr_val *)(raw + query->off); ++ xattr_key = (struct apfs_xattr_key *)(raw + query->key_off); ++ ++ if (namelen != le16_to_cpu(xattr_key->name_len)) { ++ apfs_err(sb, "inconsistent name length (%d vs %d)", namelen, le16_to_cpu(xattr_key->name_len)); ++ return -EFSCORRUPTED; ++ } ++ ++ /* The xattr name must be NULL-terminated */ ++ if (xattr_key->name[namelen - 1] != 0) { ++ apfs_err(sb, "null termination missing"); ++ return -EFSCORRUPTED; ++ } ++ ++ xattr->has_dstream = le16_to_cpu(xattr_val->flags) & ++ APFS_XATTR_DATA_STREAM; ++ ++ if (xattr->has_dstream && datalen != sizeof(struct apfs_xattr_dstream)) { ++ apfs_err(sb, "bad data length (%d)", datalen); ++ return -EFSCORRUPTED; ++ } ++ if (!xattr->has_dstream && datalen != le16_to_cpu(xattr_val->xdata_len)) { ++ apfs_err(sb, "inconsistent data length (%d vs %d)", datalen, le16_to_cpu(xattr_val->xdata_len)); ++ return -EFSCORRUPTED; ++ } ++ ++ xattr->name = xattr_key->name; ++ xattr->name_len = namelen - 1; /* Don't count the NULL termination */ ++ xattr->xdata = xattr_val->xdata; ++ xattr->xdata_len = datalen; ++ return 0; ++} ++ ++/** ++ * apfs_dstream_from_xattr - Get the dstream info for a dstream xattr ++ * @sb: filesystem superblock ++ * @xattr: in-memory xattr record (already sanity-checked) ++ * @dstream: on return, the data stream info ++ */ ++static void apfs_dstream_from_xattr(struct super_block *sb, struct apfs_xattr *xattr, struct apfs_dstream_info *dstream) ++{ ++ struct apfs_xattr_dstream *xdata = (void *)xattr->xdata; ++ ++ dstream->ds_sb = sb; ++ dstream->ds_inode = NULL; ++ dstream->ds_id = le64_to_cpu(xdata->xattr_obj_id); ++ dstream->ds_size = le64_to_cpu(xdata->dstream.size); ++ dstream->ds_sparse_bytes = 0; /* Irrelevant for xattrs */ ++ ++ dstream->ds_cached_ext.len = 0; ++ dstream->ds_ext_dirty = false; ++ spin_lock_init(&dstream->ds_ext_lock); ++ ++ /* Xattrs can't be cloned */ ++ dstream->ds_shared = false; ++} ++ ++/** ++ * apfs_xattr_extents_read - Read the value of a xattr from its extents ++ * @parent: inode the attribute belongs to ++ * @xattr: the xattr structure ++ * @buffer: where to copy the attribute value ++ * @size: size of @buffer ++ * @only_whole: are partial reads banned? ++ * ++ * Copies the value of @xattr to @buffer, if provided. If @buffer is NULL, just ++ * computes the size of the buffer required. ++ * ++ * Returns the number of bytes used/required, or a negative error code in case ++ * of failure. ++ */ ++static int apfs_xattr_extents_read(struct inode *parent, ++ struct apfs_xattr *xattr, ++ void *buffer, size_t size, bool only_whole) ++{ ++ struct super_block *sb = parent->i_sb; ++ struct apfs_dstream_info *dstream; ++ int length, ret; ++ ++ dstream = kzalloc(sizeof(*dstream), GFP_KERNEL); ++ if (!dstream) ++ return -ENOMEM; ++ apfs_dstream_from_xattr(sb, xattr, dstream); ++ ++ length = dstream->ds_size; ++ if (length < 0 || length < dstream->ds_size) { ++ /* TODO: avoid overflow here for huge compressed files */ ++ apfs_warn(sb, "xattr is too big to read on linux (0x%llx)", dstream->ds_size); ++ ret = -E2BIG; ++ goto out; ++ } ++ ++ if (!buffer) { ++ /* All we want is the length */ ++ ret = length; ++ goto out; ++ } ++ ++ if (only_whole) { ++ if (length > size) { ++ /* xattr won't fit in the buffer */ ++ ret = -ERANGE; ++ goto out; ++ } ++ } else { ++ if (length > size) ++ length = size; ++ } ++ ++ ret = apfs_nonsparse_dstream_read(dstream, buffer, length, 0 /* offset */); ++ if (ret == 0) ++ ret = length; ++ ++out: ++ kfree(dstream); ++ return ret; ++} ++ ++/** ++ * apfs_xattr_inline_read - Read the value of an inline xattr ++ * @xattr: the xattr structure ++ * @buffer: where to copy the attribute value ++ * @size: size of @buffer ++ * @only_whole: are partial reads banned? ++ * ++ * Copies the inline value of @xattr to @buffer, if provided. If @buffer is ++ * NULL, just computes the size of the buffer required. ++ * ++ * Returns the number of bytes used/required, or a negative error code in case ++ * of failure. ++ */ ++static int apfs_xattr_inline_read(struct apfs_xattr *xattr, void *buffer, size_t size, bool only_whole) ++{ ++ int length = xattr->xdata_len; ++ ++ if (!buffer) /* All we want is the length */ ++ return length; ++ if (only_whole) { ++ if (length > size) /* xattr won't fit in the buffer */ ++ return -ERANGE; ++ } else { ++ if (length > size) ++ length = size; ++ } ++ memcpy(buffer, xattr->xdata, length); ++ return length; ++} ++ ++/** ++ * apfs_xattr_get_compressed_data - Get the compressed data in a named attribute ++ * @inode: inode the attribute belongs to ++ * @name: name of the attribute ++ * @cdata: compressed data struct to set on return ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++int apfs_xattr_get_compressed_data(struct inode *inode, const char *name, struct apfs_compressed_data *cdata) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_query *query; ++ struct apfs_xattr xattr; ++ u64 cnid = apfs_ino(inode); ++ int ret; ++ ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_xattr_key(cnid, name, &query->key); ++ query->flags |= APFS_QUERY_CAT | APFS_QUERY_EXACT; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret) { ++ apfs_err(sb, "query failed for id 0x%llx (%s)", cnid, name); ++ goto done; ++ } ++ ++ ret = apfs_xattr_from_query(query, &xattr); ++ if (ret) { ++ apfs_err(sb, "bad xattr record in inode 0x%llx", cnid); ++ goto done; ++ } ++ ++ cdata->has_dstream = xattr.has_dstream; ++ if (cdata->has_dstream) { ++ struct apfs_dstream_info *dstream = NULL; ++ ++ dstream = kzalloc(sizeof(*dstream), GFP_KERNEL); ++ if (!dstream) { ++ ret = -ENOMEM; ++ goto done; ++ } ++ apfs_dstream_from_xattr(sb, &xattr, dstream); ++ ++ cdata->dstream = dstream; ++ cdata->size = dstream->ds_size; ++ } else { ++ void *data = NULL; ++ int len; ++ ++ len = xattr.xdata_len; ++ if (len > APFS_XATTR_MAX_EMBEDDED_SIZE) { ++ apfs_err(sb, "inline xattr too big"); ++ ret = -EFSCORRUPTED; ++ goto done; ++ } ++ data = kzalloc(len, GFP_KERNEL); ++ if (!data) { ++ ret = -ENOMEM; ++ goto done; ++ } ++ memcpy(data, xattr.xdata, len); ++ ++ cdata->data = data; ++ cdata->size = len; ++ } ++ ret = 0; ++ ++done: ++ apfs_free_query(query); ++ return ret; ++} ++ ++/** ++ * apfs_release_compressed_data - Clean up a compressed data struct ++ * @cdata: struct to clean up (but not free) ++ */ ++void apfs_release_compressed_data(struct apfs_compressed_data *cdata) ++{ ++ if (!cdata) ++ return; ++ ++ if (cdata->has_dstream) { ++ kfree(cdata->dstream); ++ cdata->dstream = NULL; ++ } else { ++ kfree(cdata->data); ++ cdata->data = NULL; ++ } ++} ++ ++/** ++ * apfs_compressed_data_read - Read from a compressed data struct ++ * @cdata: compressed data struct ++ * @buf: destination buffer ++ * @count: exact number of bytes to read ++ * @offset: dstream offset to read from ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++int apfs_compressed_data_read(struct apfs_compressed_data *cdata, void *buf, size_t count, u64 offset) ++{ ++ if (cdata->has_dstream) ++ return apfs_nonsparse_dstream_read(cdata->dstream, buf, count, offset); ++ ++ if (offset > cdata->size || count > cdata->size - offset) { ++ apfs_err(NULL, "reading past the end (0x%llx-0x%llx)", offset, (unsigned long long)count); ++ /* No caller is expected to legitimately read out-of-bounds */ ++ return -EFSCORRUPTED; ++ } ++ memcpy(buf, cdata->data + offset, count); ++ return 0; ++} ++ ++/** ++ * __apfs_xattr_get - Find and read a named attribute ++ * @inode: inode the attribute belongs to ++ * @name: name of the attribute ++ * @buffer: where to copy the attribute value ++ * @size: size of @buffer ++ * ++ * This does the same as apfs_xattr_get(), but without taking any locks. ++ */ ++int __apfs_xattr_get(struct inode *inode, const char *name, void *buffer, ++ size_t size) ++{ ++ return ____apfs_xattr_get(inode, name, buffer, size, true /* only_whole */); ++} ++ ++/** ++ * ____apfs_xattr_get - Find and read a named attribute, optionally header only ++ * @inode: inode the attribute belongs to ++ * @name: name of the attribute ++ * @buffer: where to copy the attribute value ++ * @size: size of @buffer ++ * @only_whole: must read complete (no partial header read allowed) ++ */ ++int ____apfs_xattr_get(struct inode *inode, const char *name, void *buffer, ++ size_t size, bool only_whole) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_query *query; ++ struct apfs_xattr xattr; ++ u64 cnid = apfs_ino(inode); ++ int ret; ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_xattr_key(cnid, name, &query->key); ++ query->flags |= APFS_QUERY_CAT | APFS_QUERY_EXACT; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret) { ++ if (ret != -ENODATA) ++ apfs_err(sb, "query failed for id 0x%llx (%s)", cnid, name); ++ goto done; ++ } ++ ++ ret = apfs_xattr_from_query(query, &xattr); ++ if (ret) { ++ apfs_err(sb, "bad xattr record in inode 0x%llx", cnid); ++ goto done; ++ } ++ ++ if (xattr.has_dstream) ++ ret = apfs_xattr_extents_read(inode, &xattr, buffer, size, only_whole); ++ else ++ ret = apfs_xattr_inline_read(&xattr, buffer, size, only_whole); ++ ++done: ++ apfs_free_query(query); ++ return ret; ++} ++ ++/** ++ * apfs_xattr_get - Find and read a named attribute ++ * @inode: inode the attribute belongs to ++ * @name: name of the attribute ++ * @buffer: where to copy the attribute value ++ * @size: size of @buffer ++ * ++ * Finds an extended attribute and copies its value to @buffer, if provided. If ++ * @buffer is NULL, just computes the size of the buffer required. ++ * ++ * Returns the number of bytes used/required, or a negative error code in case ++ * of failure. ++ */ ++static int apfs_xattr_get(struct inode *inode, const char *name, void *buffer, size_t size) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(inode->i_sb); ++ int ret; ++ ++ down_read(&nxi->nx_big_sem); ++ ret = __apfs_xattr_get(inode, name, buffer, size); ++ up_read(&nxi->nx_big_sem); ++ if (ret > XATTR_SIZE_MAX) { ++ apfs_warn(inode->i_sb, "xattr is too big to read on linux (%d)", ret); ++ return -E2BIG; ++ } ++ return ret; ++} ++ ++static int apfs_xattr_osx_get(const struct xattr_handler *handler, ++ struct dentry *unused, struct inode *inode, ++ const char *name, void *buffer, size_t size) ++{ ++ /* Ignore the fake 'osx' prefix */ ++ return apfs_xattr_get(inode, name, buffer, size); ++} ++ ++/** ++ * apfs_delete_xattr - Delete an extended attribute ++ * @query: successful query pointing to the xattr to delete ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++static int apfs_delete_xattr(struct apfs_query *query) ++{ ++ struct super_block *sb = query->node->object.sb; ++ struct apfs_xattr xattr; ++ struct apfs_dstream_info *dstream; ++ int err; ++ ++ err = apfs_xattr_from_query(query, &xattr); ++ if (err) { ++ apfs_err(sb, "bad xattr record"); ++ return err; ++ } ++ ++ if (!xattr.has_dstream) ++ return apfs_btree_remove(query); ++ ++ dstream = kzalloc(sizeof(*dstream), GFP_KERNEL); ++ if (!dstream) ++ return -ENOMEM; ++ apfs_dstream_from_xattr(sb, &xattr, dstream); ++ ++ /* ++ * Remove the xattr record before truncation, because truncation creates ++ * new queries and makes ours invalid. This stuff is all too subtle, I ++ * really need to add some assertions (TODO). ++ */ ++ err = apfs_btree_remove(query); ++ if (err) { ++ apfs_err(sb, "removal failed"); ++ goto fail; ++ } ++ err = apfs_truncate(dstream, 0); ++ if (err) ++ apfs_err(sb, "truncation failed for dstream 0x%llx", dstream->ds_id); ++ ++fail: ++ kfree(dstream); ++ return err; ++} ++ ++/** ++ * apfs_delete_any_xattr - Delete any single xattr for a given inode ++ * @inode: the vfs inode ++ * ++ * Intended to be called repeatedly, to delete all the xattrs one by one. ++ * Returns -EAGAIN on success until the process is complete, then it returns ++ * 0. Returns other negative error codes in case of failure. ++ */ ++static int apfs_delete_any_xattr(struct inode *inode) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_query *query; ++ int ret; ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) ++ return -ENOMEM; ++ apfs_init_xattr_key(apfs_ino(inode), NULL /* name */, &query->key); ++ query->flags = APFS_QUERY_CAT | APFS_QUERY_ANY_NAME | APFS_QUERY_EXACT; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret) { ++ if (ret == -ENODATA) ++ ret = 0; /* No more xattrs, we are done */ ++ else ++ apfs_err(sb, "query failed for ino 0x%llx", apfs_ino(inode)); ++ goto out; ++ } ++ ++ ret = apfs_delete_xattr(query); ++ if (!ret) ++ ret = -EAGAIN; ++ else ++ apfs_err(sb, "xattr deletion failed"); ++ ++out: ++ apfs_free_query(query); ++ return ret; ++} ++ ++/** ++ * apfs_delete_all_xattrs - Delete all xattrs for a given inode ++ * @inode: the vfs inode ++ * ++ * Returns 0 on success or a negative error code in case of failure. ++ */ ++int apfs_delete_all_xattrs(struct inode *inode) ++{ ++ struct apfs_nxsb_info *nxi = APFS_NXI(inode->i_sb); ++ int ret; ++ ++ lockdep_assert_held_write(&nxi->nx_big_sem); ++ ++ do { ++ ret = apfs_delete_any_xattr(inode); ++ } while (ret == -EAGAIN); ++ ++ return ret; ++} ++ ++/** ++ * apfs_build_xattr_key - Allocate and initialize the key for a xattr record ++ * @name: xattr name ++ * @ino: inode number for xattr's owner ++ * @key_p: on return, a pointer to the new on-disk key structure ++ * ++ * Returns the length of the key, or a negative error code in case of failure. ++ */ ++static int apfs_build_xattr_key(const char *name, u64 ino, struct apfs_xattr_key **key_p) ++{ ++ struct apfs_xattr_key *key; ++ u16 namelen = strlen(name) + 1; /* We count the null-termination */ ++ int key_len; ++ ++ key_len = sizeof(*key) + namelen; ++ key = kmalloc(key_len, GFP_KERNEL); ++ if (!key) ++ return -ENOMEM; ++ ++ apfs_key_set_hdr(APFS_TYPE_XATTR, ino, key); ++ key->name_len = cpu_to_le16(namelen); ++ strscpy(key->name, name, namelen); ++ ++ *key_p = key; ++ return key_len; ++} ++ ++/** ++ * apfs_build_dstream_xattr_val - Allocate and init value for a dstream xattr ++ * @dstream: data stream info ++ * @val_p: on return, a pointer to the new on-disk value structure ++ * ++ * Returns the length of the value, or a negative error code in case of failure. ++ */ ++static int apfs_build_dstream_xattr_val(struct apfs_dstream_info *dstream, struct apfs_xattr_val **val_p) ++{ ++ struct apfs_xattr_val *val; ++ struct apfs_xattr_dstream *dstream_raw; ++ int val_len; ++ ++ val_len = sizeof(*val) + sizeof(*dstream_raw); ++ val = kzalloc(val_len, GFP_KERNEL); ++ if (!val) ++ return -ENOMEM; ++ ++ val->flags = cpu_to_le16(APFS_XATTR_DATA_STREAM); ++ val->xdata_len = cpu_to_le16(sizeof(*dstream_raw)); ++ ++ dstream_raw = (void *)val->xdata; ++ dstream_raw->xattr_obj_id = cpu_to_le64(dstream->ds_id); ++ dstream_raw->dstream.size = cpu_to_le64(dstream->ds_size); ++ dstream_raw->dstream.alloced_size = cpu_to_le64(apfs_alloced_size(dstream)); ++ if (apfs_vol_is_encrypted(dstream->ds_sb)) ++ dstream_raw->dstream.default_crypto_id = cpu_to_le64(dstream->ds_id); ++ ++ *val_p = val; ++ return val_len; ++} ++ ++/** ++ * apfs_build_inline_xattr_val - Allocate and init value for an inline xattr ++ * @value: content of the xattr ++ * @size: size of @value ++ * @val_p: on return, a pointer to the new on-disk value structure ++ * ++ * Returns the length of the value, or a negative error code in case of failure. ++ */ ++static int apfs_build_inline_xattr_val(const void *value, size_t size, struct apfs_xattr_val **val_p) ++{ ++ struct apfs_xattr_val *val; ++ int val_len; ++ ++ val_len = sizeof(*val) + size; ++ val = kmalloc(val_len, GFP_KERNEL); ++ if (!val) ++ return -ENOMEM; ++ ++ val->flags = cpu_to_le16(APFS_XATTR_DATA_EMBEDDED); ++ val->xdata_len = cpu_to_le16(size); ++ memcpy(val->xdata, value, size); ++ ++ *val_p = val; ++ return val_len; ++} ++ ++/** ++ * apfs_create_xattr_dstream - Create the extents for a dstream xattr ++ * @sb: filesystem superblock ++ * @value: value for the attribute ++ * @size: sizeo of @value ++ * ++ * Returns the info for the created data stream, or an error pointer in case ++ * of failure. ++ */ ++static struct apfs_dstream_info *apfs_create_xattr_dstream(struct super_block *sb, const void *value, size_t size) ++{ ++ struct apfs_superblock *vsb_raw = APFS_SB(sb)->s_vsb_raw; ++ struct apfs_dstream_info *dstream; ++ int blkcnt, i; ++ int err; ++ ++ dstream = kzalloc(sizeof(*dstream), GFP_KERNEL); ++ if (!dstream) ++ return ERR_PTR(-ENOMEM); ++ dstream->ds_sb = sb; ++ spin_lock_init(&dstream->ds_ext_lock); ++ ++ apfs_assert_in_transaction(sb, &vsb_raw->apfs_o); ++ dstream->ds_id = le64_to_cpu(vsb_raw->apfs_next_obj_id); ++ le64_add_cpu(&vsb_raw->apfs_next_obj_id, 1); ++ ++ blkcnt = (size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; ++ for (i = 0; i < blkcnt; i++) { ++ struct buffer_head *bh; ++ int off, tocopy; ++ u64 bno; ++ ++ err = apfs_dstream_get_new_bno(dstream, i, &bno); ++ if (err) { ++ apfs_err(sb, "failed to get new block in dstream 0x%llx", dstream->ds_id); ++ goto fail; ++ } ++ bh = apfs_sb_bread(sb, bno); ++ if (!bh) { ++ apfs_err(sb, "failed to read new block"); ++ err = -EIO; ++ goto fail; ++ } ++ ++ err = apfs_transaction_join(sb, bh); ++ if (err) { ++ brelse(bh); ++ goto fail; ++ } ++ ++ off = i << sb->s_blocksize_bits; ++ tocopy = min(sb->s_blocksize, (unsigned long)(size - off)); ++ memcpy(bh->b_data, value + off, tocopy); ++ if (tocopy < sb->s_blocksize) ++ memset(bh->b_data + tocopy, 0, sb->s_blocksize - tocopy); ++ brelse(bh); ++ ++ dstream->ds_size += tocopy; ++ } ++ ++ err = apfs_flush_extent_cache(dstream); ++ if (err) { ++ apfs_err(sb, "extent cache flush failed for dstream 0x%llx", dstream->ds_id); ++ goto fail; ++ } ++ return dstream; ++ ++fail: ++ kfree(dstream); ++ return ERR_PTR(err); ++} ++ ++/** ++ * apfs_xattr_dstream_from_query - Extract the dstream from a xattr record ++ * @query: the query that found the record ++ * @dstream_p: on return, the newly allocated dstream info (or NULL if none) ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++static int apfs_xattr_dstream_from_query(struct apfs_query *query, struct apfs_dstream_info **dstream_p) ++{ ++ struct super_block *sb = query->node->object.sb; ++ struct apfs_dstream_info *dstream = NULL; ++ struct apfs_xattr xattr; ++ int err; ++ ++ err = apfs_xattr_from_query(query, &xattr); ++ if (err) { ++ apfs_err(sb, "bad xattr record"); ++ return err; ++ } ++ ++ if (xattr.has_dstream) { ++ dstream = kzalloc(sizeof(*dstream), GFP_KERNEL); ++ if (!dstream) ++ return -ENOMEM; ++ apfs_dstream_from_xattr(sb, &xattr, dstream); ++ } ++ *dstream_p = dstream; ++ return 0; ++} ++ ++/** ++ * apfs_xattr_set - Write a named attribute ++ * @inode: inode the attribute will belong to ++ * @name: name for the attribute ++ * @value: value for the attribute ++ * @size: size of @value ++ * @flags: XATTR_REPLACE and XATTR_CREATE ++ * ++ * Returns 0 on success, or a negative error code in case of failure. ++ */ ++int apfs_xattr_set(struct inode *inode, const char *name, const void *value, ++ size_t size, int flags) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_query *query = NULL; ++ u64 cnid = apfs_ino(inode); ++ int key_len, val_len; ++ struct apfs_xattr_key *raw_key = NULL; ++ struct apfs_xattr_val *raw_val = NULL; ++ struct apfs_dstream_info *dstream = NULL; ++ struct apfs_dstream_info *old_dstream = NULL; ++ int ret; ++ ++ if (size > APFS_XATTR_MAX_EMBEDDED_SIZE) { ++ dstream = apfs_create_xattr_dstream(sb, value, size); ++ if (IS_ERR(dstream)) { ++ apfs_err(sb, "failed to set xattr dstream for ino 0x%llx", apfs_ino(inode)); ++ return PTR_ERR(dstream); ++ } ++ } ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) { ++ ret = -ENOMEM; ++ goto done; ++ } ++ apfs_init_xattr_key(cnid, name, &query->key); ++ query->flags = APFS_QUERY_CAT | APFS_QUERY_EXACT; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret) { ++ if (ret != -ENODATA) { ++ apfs_err(sb, "query failed for id 0x%llx (%s)", cnid, name); ++ goto done; ++ } else if (flags & XATTR_REPLACE) { ++ goto done; ++ } ++ } else if (flags & XATTR_CREATE) { ++ ret = -EEXIST; ++ goto done; ++ } else if (!value) { ++ ret = apfs_delete_xattr(query); ++ if (ret) ++ apfs_err(sb, "xattr deletion failed"); ++ goto done; ++ } else { ++ /* Remember the old dstream to clean it up later */ ++ ret = apfs_xattr_dstream_from_query(query, &old_dstream); ++ if (ret) { ++ apfs_err(sb, "failed to get the old dstream"); ++ goto done; ++ } ++ } ++ ++ key_len = apfs_build_xattr_key(name, cnid, &raw_key); ++ if (key_len < 0) { ++ ret = key_len; ++ goto done; ++ } ++ ++ if (dstream) ++ val_len = apfs_build_dstream_xattr_val(dstream, &raw_val); ++ else ++ val_len = apfs_build_inline_xattr_val(value, size, &raw_val); ++ if (val_len < 0) { ++ ret = val_len; ++ goto done; ++ } ++ ++ /* For now this is the only system xattr we support */ ++ if (strcmp(name, APFS_XATTR_NAME_SYMLINK) == 0) ++ raw_val->flags |= cpu_to_le16(APFS_XATTR_FILE_SYSTEM_OWNED); ++ ++ if (ret) ++ ret = apfs_btree_insert(query, raw_key, key_len, raw_val, val_len); ++ else ++ ret = apfs_btree_replace(query, raw_key, key_len, raw_val, val_len); ++ if (ret) { ++ apfs_err(sb, "insertion/update failed for id 0x%llx (%s)", cnid, name); ++ goto done; ++ } ++ ++ if (old_dstream) { ++ ret = apfs_truncate(old_dstream, 0); ++ if (ret) ++ apfs_err(sb, "truncation failed for dstream 0x%llx", old_dstream->ds_id); ++ } ++ ++done: ++ kfree(dstream); ++ kfree(old_dstream); ++ kfree(raw_val); ++ kfree(raw_key); ++ apfs_free_query(query); ++ return ret; ++} ++int APFS_XATTR_SET_MAXOPS(void) ++{ ++ return 1; ++} ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0) ++static int apfs_xattr_osx_set(const struct xattr_handler *handler, ++ struct dentry *unused, struct inode *inode, const char *name, ++ const void *value, size_t size, int flags) ++#elif LINUX_VERSION_CODE < KERNEL_VERSION(6, 3, 0) ++static int apfs_xattr_osx_set(const struct xattr_handler *handler, ++ struct user_namespace *mnt_userns, struct dentry *unused, ++ struct inode *inode, const char *name, const void *value, ++ size_t size, int flags) ++#else ++static int apfs_xattr_osx_set(const struct xattr_handler *handler, ++ struct mnt_idmap *idmap, struct dentry *unused, ++ struct inode *inode, const char *name, const void *value, ++ size_t size, int flags) ++#endif ++{ ++ struct super_block *sb = inode->i_sb; ++ struct apfs_max_ops maxops; ++ int err; ++ ++ maxops.cat = APFS_XATTR_SET_MAXOPS(); ++ maxops.blks = 0; ++ ++ err = apfs_transaction_start(sb, maxops); ++ if (err) ++ return err; ++ ++ /* Ignore the fake 'osx' prefix */ ++ err = apfs_xattr_set(inode, name, value, size, flags); ++ if (err) ++ goto fail; ++ ++ err = apfs_transaction_commit(sb); ++ if (!err) ++ return 0; ++ ++fail: ++ apfs_transaction_abort(sb); ++ return err; ++} ++ ++static const struct xattr_handler apfs_xattr_osx_handler = { ++ .prefix = XATTR_MAC_OSX_PREFIX, ++ .get = apfs_xattr_osx_get, ++ .set = apfs_xattr_osx_set, ++}; ++ ++/* On-disk xattrs have no namespace; use a fake 'osx' prefix in the kernel */ ++const struct xattr_handler *apfs_xattr_handlers[] = { ++ &apfs_xattr_osx_handler, ++ NULL ++}; ++ ++ssize_t apfs_listxattr(struct dentry *dentry, char *buffer, size_t size) ++{ ++ struct inode *inode = d_inode(dentry); ++ struct super_block *sb = inode->i_sb; ++ struct apfs_sb_info *sbi = APFS_SB(sb); ++ struct apfs_nxsb_info *nxi = APFS_NXI(sb); ++ struct apfs_query *query; ++ u64 cnid = apfs_ino(inode); ++ size_t free = size; ++ ssize_t ret; ++ ++ down_read(&nxi->nx_big_sem); ++ ++ query = apfs_alloc_query(sbi->s_cat_root, NULL /* parent */); ++ if (!query) { ++ ret = -ENOMEM; ++ goto fail; ++ } ++ ++ /* We want all the xattrs for the cnid, regardless of the name */ ++ apfs_init_xattr_key(cnid, NULL /* name */, &query->key); ++ query->flags = APFS_QUERY_CAT | APFS_QUERY_MULTIPLE | APFS_QUERY_EXACT; ++ ++ while (1) { ++ struct apfs_xattr xattr; ++ ++ ret = apfs_btree_query(sb, &query); ++ if (ret == -ENODATA) { /* Got all the xattrs */ ++ ret = size - free; ++ break; ++ } ++ if (ret) { ++ apfs_err(sb, "query failed for id 0x%llx", cnid); ++ break; ++ } ++ ++ ret = apfs_xattr_from_query(query, &xattr); ++ if (ret) { ++ apfs_err(sb, "bad xattr key in inode %llx", cnid); ++ break; ++ } ++ ++ if (buffer) { ++ /* Prepend the fake 'osx' prefix before listing */ ++ if (xattr.name_len + XATTR_MAC_OSX_PREFIX_LEN + 1 > ++ free) { ++ ret = -ERANGE; ++ break; ++ } ++ memcpy(buffer, XATTR_MAC_OSX_PREFIX, ++ XATTR_MAC_OSX_PREFIX_LEN); ++ buffer += XATTR_MAC_OSX_PREFIX_LEN; ++ memcpy(buffer, xattr.name, xattr.name_len + 1); ++ buffer += xattr.name_len + 1; ++ } ++ free -= xattr.name_len + XATTR_MAC_OSX_PREFIX_LEN + 1; ++ } ++ ++fail: ++ apfs_free_query(query); ++ up_read(&nxi->nx_big_sem); ++ return ret; ++} +diff --git a/fs/apfs/xfield.c b/fs/apfs/xfield.c +new file mode 100644 +index 000000000..b8cbe17fd +--- /dev/null ++++ b/fs/apfs/xfield.c +@@ -0,0 +1,171 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (C) 2019 Ernesto A. Fernández ++ */ ++ ++#include ++#include ++#include "apfs.h" ++ ++/** ++ * apfs_find_xfield - Find an extended field value in an inode or dentry record ++ * @xfields: pointer to the on-disk xfield collection for the record ++ * @len: length of the collection ++ * @xtype: type of the xfield to retrieve ++ * @xval: on return, a pointer to the wanted on-disk xfield value ++ * ++ * Returns the length of @xval on success, or 0 if no matching xfield was found; ++ * the caller must check that the expected structures fit before casting @xval. ++ */ ++int apfs_find_xfield(u8 *xfields, int len, u8 xtype, char **xval) ++{ ++ struct apfs_xf_blob *blob; ++ struct apfs_x_field *xfield; ++ int count; ++ int rest = len; ++ int i; ++ ++ if (!len) ++ return 0; /* No xfield data */ ++ ++ rest -= sizeof(*blob); ++ if (rest < 0) ++ return 0; /* Corruption */ ++ blob = (struct apfs_xf_blob *)xfields; ++ ++ count = le16_to_cpu(blob->xf_num_exts); ++ rest -= count * sizeof(*xfield); ++ if (rest < 0) ++ return 0; /* Corruption */ ++ xfield = (struct apfs_x_field *)blob->xf_data; ++ ++ for (i = 0; i < count; ++i) { ++ int xlen; ++ ++ /* Attribute length is padded to a multiple of 8 */ ++ xlen = round_up(le16_to_cpu(xfield[i].x_size), 8); ++ if (xlen > rest) ++ return 0; /* Corruption */ ++ ++ if (xfield[i].x_type == xtype) { ++ *xval = (char *)xfields + len - rest; ++ return xlen; ++ } ++ rest -= xlen; ++ } ++ return 0; ++} ++ ++/** ++ * apfs_init_xfields - Set an empty collection of xfields in a buffer ++ * @buffer: buffer to hold the xfields ++ * @buflen: length of the buffer; should be enough to fit an xfield blob ++ * ++ * Returns 0 on success, or -1 if the buffer isn't long enough. ++ */ ++int apfs_init_xfields(u8 *buffer, int buflen) ++{ ++ struct apfs_xf_blob *blob; ++ ++ if (buflen < sizeof(*blob)) ++ return -1; ++ blob = (struct apfs_xf_blob *)buffer; ++ ++ blob->xf_num_exts = 0; ++ blob->xf_used_data = 0; ++ return 0; ++} ++ ++/** ++ * apfs_insert_xfield - Add a new xfield to an in-memory collection ++ * @buffer: buffer holding the collection of xfields ++ * @buflen: length of the buffer; should be enough to fit the new xfield ++ * @xkey: metadata for the new xfield ++ * @xval: value for the new xfield ++ * ++ * Returns the new length of the collection, or 0 if it the allocation would ++ * overflow @buffer. ++ */ ++int apfs_insert_xfield(u8 *buffer, int buflen, const struct apfs_x_field *xkey, ++ const void *xval) ++{ ++ struct apfs_xf_blob *blob; ++ struct apfs_x_field *curr_xkey; ++ void *curr_xval; ++ int count; ++ int rest = buflen; ++ u16 used_data; ++ int xlen, padded_xlen; ++ int meta_len, total_len; ++ int i; ++ ++ xlen = le16_to_cpu(xkey->x_size); ++ padded_xlen = round_up(xlen, 8); ++ ++ if (!buflen) ++ return 0; ++ ++ rest -= sizeof(*blob); ++ if (rest < 0) ++ return 0; ++ blob = (struct apfs_xf_blob *)buffer; ++ used_data = le16_to_cpu(blob->xf_used_data); ++ ++ count = le16_to_cpu(blob->xf_num_exts); ++ rest -= count * sizeof(*curr_xkey); ++ if (rest < 0) ++ return 0; ++ meta_len = buflen - rest; ++ curr_xkey = (struct apfs_x_field *)blob->xf_data; ++ ++ for (i = 0; i < count; ++i, ++curr_xkey) { ++ int curr_xlen; ++ ++ /* Attribute length is padded to a multiple of 8 */ ++ curr_xlen = round_up(le16_to_cpu(curr_xkey->x_size), 8); ++ if (curr_xlen > rest) ++ return 0; ++ if (curr_xkey->x_type != xkey->x_type) { ++ rest -= curr_xlen; ++ continue; ++ } ++ ++ /* The xfield already exists, so just resize it and set it */ ++ memcpy(curr_xkey, xkey, sizeof(*curr_xkey)); ++ if (padded_xlen > rest) ++ return 0; ++ curr_xval = buffer + buflen - rest; ++ rest -= max(padded_xlen, curr_xlen); ++ memmove(curr_xval + padded_xlen, curr_xval + curr_xlen, rest); ++ memcpy(curr_xval, xval, xlen); ++ memset(curr_xval + xlen, 0, padded_xlen - xlen); ++ used_data += padded_xlen - curr_xlen; ++ ++ goto done; ++ } ++ ++ /* Create a metadata entry for the new xfield */ ++ rest -= sizeof(*curr_xkey); ++ if (rest < 0) ++ return 0; ++ meta_len += sizeof(*curr_xkey); ++ memmove(curr_xkey + 1, curr_xkey, buflen - meta_len); ++ memcpy(curr_xkey, xkey, sizeof(*curr_xkey)); ++ ++count; ++ ++ /* Now set the xfield value */ ++ if (padded_xlen > rest) ++ return 0; ++ curr_xval = buffer + buflen - rest; ++ memcpy(curr_xval, xval, xlen); ++ memset(curr_xval + xlen, 0, padded_xlen - xlen); ++ used_data += padded_xlen; ++ ++done: ++ total_len = used_data + meta_len; ++ if (total_len > buflen) ++ return 0; ++ blob->xf_num_exts = cpu_to_le16(count); ++ blob->xf_used_data = cpu_to_le16(used_data); ++ return total_len; ++} +-- +2.43.0 + diff --git a/8002-Necessary-modifications-to-build-APFS-with-the-kerne.patch b/8002-Necessary-modifications-to-build-APFS-with-the-kerne.patch new file mode 100644 index 0000000..a6ed91e --- /dev/null +++ b/8002-Necessary-modifications-to-build-APFS-with-the-kerne.patch @@ -0,0 +1,92 @@ +From bdc9881b548752150cdbf9f952c58747fc256b9d Mon Sep 17 00:00:00 2001 +From: Aditya Garg +Date: Sat, 8 Jun 2024 17:48:28 +0530 +Subject: [PATCH] Necessary modifications to build APFS with the kernel + +--- + fs/Kconfig | 1 + + fs/Makefile | 1 + + fs/apfs/Kconfig | 16 ++++++++++++++++ + fs/apfs/Makefile | 17 ++--------------- + 4 files changed, 20 insertions(+), 15 deletions(-) + create mode 100644 fs/apfs/Kconfig + +diff --git a/fs/Kconfig b/fs/Kconfig +index a46b0cbc4..d914838fd 100644 +--- a/fs/Kconfig ++++ b/fs/Kconfig +@@ -317,6 +317,7 @@ source "fs/affs/Kconfig" + source "fs/ecryptfs/Kconfig" + source "fs/hfs/Kconfig" + source "fs/hfsplus/Kconfig" ++source "fs/apfs/Kconfig" + source "fs/befs/Kconfig" + source "fs/bfs/Kconfig" + source "fs/efs/Kconfig" +diff --git a/fs/Makefile b/fs/Makefile +index 6ecc9b0a5..0cfeacc98 100644 +--- a/fs/Makefile ++++ b/fs/Makefile +@@ -120,6 +120,7 @@ obj-$(CONFIG_DEBUG_FS) += debugfs/ + obj-$(CONFIG_TRACING) += tracefs/ + obj-$(CONFIG_OCFS2_FS) += ocfs2/ + obj-$(CONFIG_BTRFS_FS) += btrfs/ ++obj-$(CONFIG_APFS_FS) += apfs/ + obj-$(CONFIG_GFS2_FS) += gfs2/ + obj-$(CONFIG_F2FS_FS) += f2fs/ + obj-$(CONFIG_BCACHEFS_FS) += bcachefs/ +diff --git a/fs/apfs/Kconfig b/fs/apfs/Kconfig +new file mode 100644 +index 000000000..3c209463d +--- /dev/null ++++ b/fs/apfs/Kconfig +@@ -0,0 +1,16 @@ ++# SPDX-License-Identifier: GPL-2.0-only ++config APFS_FS ++ tristate "Apple file system support" ++ select CRYPTO ++ select CRYPTO_CRC32C ++ select LIBCRC32C ++ select CRYPTO_XXHASH ++ select CRYPTO_SHA256 ++ select CRYPTO_BLAKE2B ++ select BUFFER_HEAD ++ select FS_IOMAP ++ select LEGACY_DIRECT_IO ++ help ++ If you say Y here, you will be able to mount APFS partitions ++ with read-only access. Write access is experimental and will ++ corrupt your container. +diff --git a/fs/apfs/Makefile b/fs/apfs/Makefile +index ab4c49d55..bc7bc8cc5 100644 +--- a/fs/apfs/Makefile ++++ b/fs/apfs/Makefile +@@ -1,23 +1,10 @@ + # SPDX-License-Identifier: GPL-2.0-only + # +-# Makefile for the out-of-tree Linux APFS module. ++# Makefile for the Linux APFS module. + # + +-KERNELRELEASE ?= $(shell uname -r) +-KERNEL_DIR ?= /lib/modules/$(KERNELRELEASE)/build +-PWD := $(shell pwd) +- +-obj-m = apfs.o ++obj-$(CONFIG_APFS_FS) = apfs.o + apfs-y := btree.o compress.o dir.o extents.o file.o inode.o key.o libzbitmap.o \ + lzfse/lzfse_decode.o lzfse/lzfse_decode_base.o lzfse/lzfse_fse.o \ + lzfse/lzvn_decode_base.o message.o namei.o node.o object.o snapshot.o \ + spaceman.o super.o symlink.o transaction.o unicode.o xattr.o xfield.o +- +-default: +- ./genver.sh +- make -C $(KERNEL_DIR) M=$(PWD) +-install: +- make -C $(KERNEL_DIR) M=$(PWD) modules_install +-clean: +- rm -f version.h +- make -C $(KERNEL_DIR) M=$(PWD) clean +-- +2.43.0 + diff --git a/extra_config b/extra_config index c8c81ed..ae3d6bb 100644 --- a/extra_config +++ b/extra_config @@ -14,3 +14,4 @@ CONFIG_HID_APPLE_MAGIC_BACKLIGHT=m CONFIG_HID_SENSOR_ALS=m CONFIG_SND_PCM=m CONFIG_STAGING=y +CONFIG_APFS_FS=m