From 6a81fea8782ec18f99f894a775b95f2bc7096768 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 5 Nov 2024 09:25:06 +0000 Subject: [PATCH 1/4] CP-52226 - oxenstored: Implement partial directory call for dom0 with per-connection cache Implements XS_DIRECTORY_PART (originally specified in https://lore.kernel.org/xen-devel/20161205074853.13268-1-jgross@suse.com/). Like in CXenstored, it's limited to dom0. It is intended to list directories larger than the size of a single payload (4096 bytes) over several calls (not necessarily inside a single transaction). To alleviate the impact of the differences in how node's children are represented between C and OCaml Xenstoreds, implements a per-connection cache allowing the string representation of node's children to be generated once and reused later as long as the underlying list of children doesn't change (and the connection is reused). Whereas CXenstore uses a global generation count, modified at transaction creation and node changes, the OCaml version relies on comparisons of immutable trees. A per-node generation count is instead faked in the connection cache - it allows clients to know that the underlying information has changed and they should restart the sequence of partial directory calls to get a consistent picture. (i.e., while the generation count returned by CXenstore is globally unique, this is not useful for this particular function, since generation count is only compared against the results of previous invocations of partial directory calls on the same node) The cache is refreshed by OXenstored itself, and is limited to 8 entries (by default, configurable). A direct translation of the C code when it comes to string handling and allocation. Signed-off-by: Andrii Sultanov --- oxenstored/connection.ml | 30 +++++++++++++++++ oxenstored/logging.ml | 2 ++ oxenstored/process.ml | 60 ++++++++++++++++++++++++++++++++++ oxenstored/store.ml | 12 ++++--- oxenstored/transaction.ml | 69 ++++++++++++++++++++++++++++++++++++++- xenbus/op.ml | 5 +++ xenbus/op.mli | 1 + xenbus/xb.mli | 1 + 8 files changed, 174 insertions(+), 6 deletions(-) diff --git a/oxenstored/connection.ml b/oxenstored/connection.ml index 3341d7e..885f47f 100644 --- a/oxenstored/connection.ml +++ b/oxenstored/connection.ml @@ -103,6 +103,25 @@ and t = { xb: Xenbus.Xb.t ; dom: Domain.t option ; transactions: (int, Transaction.t) Hashtbl.t + ; directory_cache: + (Store.Path.t, Store.Node.t * int64 * string * float) Hashtbl.t option + (* Used for partial_directory calls, maps + path -> (node, generation_count, children, timestamp) + + Generation count is used by the client to determine if the node has + changed inbetween partial directory calls (OXenstored itself uses + physical comparison between nodes, this exists for compatibility + with the original protocol implemented by CXenstored). Documentation + specifies that " being the same for multiple reads guarantees + the node hasn't changed", so we fake a per-node generation count + (CXenstored uses a global generation count that gets modified on every + node write, incremented on transaction creation instead) + + This needs to be per-connection rather than per-transaction because + clients might not necessarily create a transaction for all partial + directory calls, we should keep the cache inbetween the transactions + as well. + *) ; mutable next_tid: int ; watches: (string, watch list) Hashtbl.t ; mutable nb_watches: int @@ -205,11 +224,22 @@ let create xbcon dom = | Some _ -> 0 in + let directory_cache = + match dom with + | Some dom -> + if Domain.get_id dom = 0 then + Some (Hashtbl.create 8) + else + None + | None -> + Some (Hashtbl.create 8) + in let con = { xb= xbcon ; dom ; transactions= Hashtbl.create 5 + ; directory_cache ; next_tid= initial_next_tid ; watches= Hashtbl.create 8 ; nb_watches= 0 diff --git a/oxenstored/logging.ml b/oxenstored/logging.ml index c7360a1..29165e7 100644 --- a/oxenstored/logging.ml +++ b/oxenstored/logging.ml @@ -283,6 +283,8 @@ let string_of_access_type = function "debug " | Xenbus.Xb.Op.Directory -> "directory" + | Xenbus.Xb.Op.Directory_part -> + "directory_partial" | Xenbus.Xb.Op.Read -> "read " | Xenbus.Xb.Op.Getperms -> diff --git a/oxenstored/process.ml b/oxenstored/process.ml index 676d920..44b6bba 100644 --- a/oxenstored/process.ml +++ b/oxenstored/process.ml @@ -341,6 +341,63 @@ let do_directory con t _domains _cons data = else "" +let do_directory_part con t _domains _cons data = + (* Call only available to Dom0 *) + if not (Connection.is_dom0 con) then + raise Domain_not_match ; + + let directory_cache = Option.get con.Connection.directory_cache in + + let split_two_args data con = + let args = split (Some 3) '\000' data in + match args with + | path :: offset :: _ -> + (Store.Path.create path (Connection.get_path con), int_of_string offset) + | _ -> + raise Invalid_Cmd_Args + in + + (* First arg is node name. Second arg is childlist offset. *) + let path, offset = split_two_args data con in + + let generation, children = + Transaction.ls_partial t (Connection.get_perm con) path directory_cache + in + + let generation_s = Int64.to_string generation ^ "\000" in + let genlen = String.length generation_s in + let children_length = String.length children in + + (* Offset behind list: just return a list with an empty string. *) + if offset >= children_length then + generation_s ^ "\000" + else + let buffer_length = ref 0 in + let maxlen = Connection.xenstore_payload_max - genlen - 1 in + let child_length = ref 0 in + let i = ref offset in + let cache_length = String.length children in + while !buffer_length + !child_length < maxlen && !i < cache_length do + child_length := !child_length + 1 ; + + if children.[!i] = '\000' then ( + if !buffer_length + !child_length < maxlen then + buffer_length := !buffer_length + !child_length ; + + child_length := 0 + ) ; + i := !i + 1 + done ; + let last_chunk = offset + !buffer_length = children_length in + let buffer = + Bytes.create (!buffer_length + genlen + if last_chunk then 1 else 0) + in + String.blit generation_s 0 buffer 0 genlen ; + String.blit children offset buffer genlen !buffer_length ; + if last_chunk then + Bytes.set buffer (!buffer_length + genlen) '\000' ; + Bytes.to_string buffer + let do_read con t _domains _cons data = let path = split_one_path data con in Transaction.read t (Connection.get_perm con) path @@ -482,6 +539,8 @@ let function_of_type_simple_op ty = raise (Invalid_argument (Xenbus.Xb.Op.to_string ty)) | Xenbus.Xb.Op.Directory -> reply_data do_directory + | Xenbus.Xb.Op.Directory_part -> + reply_data do_directory_part | Xenbus.Xb.Op.Read -> reply_data do_read | Xenbus.Xb.Op.Getperms -> @@ -819,6 +878,7 @@ let retain_op_in_history ty = true | Xenbus.Xb.Op.Debug | Xenbus.Xb.Op.Directory + | Xenbus.Xb.Op.Directory_part | Xenbus.Xb.Op.Read | Xenbus.Xb.Op.Getperms | Xenbus.Xb.Op.Watch diff --git a/oxenstored/store.ml b/oxenstored/store.ml index d851352..dc48cb7 100644 --- a/oxenstored/store.ml +++ b/oxenstored/store.ml @@ -168,6 +168,8 @@ module Path = struct let to_string t = "/" ^ String.concat "/" t + let compare a b = String.compare (to_string a) (to_string b) + let to_string_list x = x let get_parent t = if t = [] then [] else List.rev (List.tl (List.rev t)) @@ -361,19 +363,19 @@ let read store perm path = Path.apply store.root path do_read let ls store perm path = - let children = + let node, children = if path = [] then ( Node.check_perm store.root perm Perms.READ ; - Node.get_children store.root + (store.root, Node.get_children store.root) ) else let do_ls node name = let cnode = Node.find node name in Node.check_perm cnode perm Perms.READ ; - cnode.Node.children + (cnode, cnode.Node.children) in Path.apply store.root path do_ls in - SymbolMap.fold (fun k _ accu -> Symbol.to_string k :: accu) children [] + (node, SymbolMap.fold (fun k _ accu -> Symbol.to_string k :: accu) children []) let getperms store perm path = if path = [] then ( @@ -516,7 +518,7 @@ type ops = { ; mkdir: Path.t -> unit ; rm: Path.t -> unit ; setperms: Path.t -> Perms.Node.t -> unit - ; ls: Path.t -> string list + ; ls: Path.t -> Node.t * string list ; read: Path.t -> string ; getperms: Path.t -> Perms.Node.t ; path_exists: Path.t -> bool diff --git a/oxenstored/transaction.ml b/oxenstored/transaction.ml index 1c5211b..1c840e4 100644 --- a/oxenstored/transaction.ml +++ b/oxenstored/transaction.ml @@ -219,9 +219,76 @@ let rm t perm path = add_wop t Xenbus.Xb.Op.Rm path let ls t perm path = - let r = Store.ls t.store perm path in + let _node, r = Store.ls t.store perm path in set_read_lowpath t path ; r +let ls_partial t perm path directory_cache = + let configurable_cache_size = 8 in + let check_hashtable_bounds ht = + if Hashtbl.length ht >= configurable_cache_size then + let key, _min_timestamp = + Hashtbl.fold + (fun key (_, _, _, timestamp) (min_key, min_timestamp) -> + if timestamp < min_timestamp then + (Some key, timestamp) + else + (min_key, min_timestamp) + ) + ht (None, infinity) + in + Hashtbl.remove ht (Option.get key) + in + + (* Return the cached buffer if the node hasn't changed; otherwise + create or refresh the cache *) + let ( let* ) = Option.bind in + let cache_opt = Hashtbl.find_opt directory_cache path in + let cache = + let* cached_node, gen_count, cache, _timestamp = cache_opt in + + let* cur_node = Store.get_node t.store path in + (* Regenerate the cache entry if the underlying node changed *) + if cached_node == cur_node then + (* TODO check_parents_perms_identical oldroot currentroot path*) + Some (cached_node, gen_count, cache) + else + None + in + let ret = + match cache with + | Some (cached_node, gen_count, cache) -> + (* Update the timestamp in the cache *) + Hashtbl.replace directory_cache path + (cached_node, gen_count, cache, Unix.gettimeofday ()) ; + (gen_count, cache) + | None -> + let node, entries = Store.ls t.store perm path in + let r = + if List.length entries > 0 then + Utils.join_by_null entries ^ "\000" + else + "" + in + (* We need to increment the generation count if the cache existed + but needed to be refreshed *) + let generation_count = + match cache_opt with + | Some (_, generation_count, _, _) -> + Int64.add generation_count 1L + | None -> + 1L + (* Start the generation count with 1, since 0 is a special + NULL value in CXenstored *) + in + (* Check the bounds, evict something from the hashtable + to keep its size bounded *) + check_hashtable_bounds directory_cache ; + Hashtbl.replace directory_cache path + (node, generation_count, r, Unix.gettimeofday ()) ; + (generation_count, r) + in + set_read_lowpath t path ; ret + let read t perm path = let r = Store.read t.store perm path in set_read_lowpath t path ; r diff --git a/xenbus/op.ml b/xenbus/op.ml index 62969ed..17e2bc1 100644 --- a/xenbus/op.ml +++ b/xenbus/op.ml @@ -36,8 +36,10 @@ type operation = | Resume | Set_target | Reset_watches + | Directory_part | Invalid +(* See xs_wire.h for the required order *) let operation_c_mapping = [| Debug @@ -62,6 +64,7 @@ let operation_c_mapping = ; Set_target ; Invalid ; Reset_watches + ; Directory_part |] let size = Array.length operation_c_mapping @@ -126,5 +129,7 @@ let to_string ty = "SET_TARGET" | Reset_watches -> "RESET_WATCHES" + | Directory_part -> + "DIRECTORY_PART" | Invalid -> "INVALID" diff --git a/xenbus/op.mli b/xenbus/op.mli index dae3c07..9e10d76 100644 --- a/xenbus/op.mli +++ b/xenbus/op.mli @@ -20,6 +20,7 @@ type operation = | Resume | Set_target | Reset_watches + | Directory_part | Invalid val operation_c_mapping : operation array diff --git a/xenbus/xb.mli b/xenbus/xb.mli index 0792e5d..8f2d2de 100644 --- a/xenbus/xb.mli +++ b/xenbus/xb.mli @@ -21,6 +21,7 @@ module Op : sig | Resume | Set_target | Reset_watches + | Directory_part | Invalid val operation_c_mapping : operation array From bccbb74e3fd722e7ba3d076c3fef5dac0aea8794 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 12 Nov 2024 13:41:11 +0000 Subject: [PATCH 2/4] CP-52226 - oxenstored: Return XS_ERROR (E2BIG) on socket connections too Original XSA fix only returns XS_ERROR payload with E2BIG inside for domU connections. libxenstore, however, expects an E2BIG response for proper handling of partial directory calls, for dom0 as well. In practice this removes the possibility for a "large packets" patch: https://github.com/xenserver/xen.pg/blob/9cde7213128d777c4856d719c10945c052df0ce1/patches/oxenstore-large-packets.patch It should be now superseded with the proper partial directory support. Signed-off-by: Andrii Sultanov --- oxenstored/connection.ml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/oxenstored/connection.ml b/oxenstored/connection.ml index 885f47f..ac1e398 100644 --- a/oxenstored/connection.ml +++ b/oxenstored/connection.ml @@ -276,8 +276,8 @@ let set_target con target_domid = let is_backend_mmap con = Xenbus.Xb.is_mmap con.xb -let packet_of con tid rid ty data = - if String.length data > xenstore_payload_max && is_backend_mmap con then +let packet_of _con tid rid ty data = + if String.length data > xenstore_payload_max then Xenbus.Xb.Packet.create tid rid Xenbus.Xb.Op.Error "E2BIG\000" else Xenbus.Xb.Packet.create tid rid ty data From 0fea4922b8d925fc930a4079e4a12934f83e5978 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 12 Nov 2024 14:18:33 +0000 Subject: [PATCH 3/4] fixup! CP-52226 - oxenstored: Implement partial directory call for dom0 with per-connection cache Signed-off-by: Andrii Sultanov --- oxenstored/store.ml | 2 -- 1 file changed, 2 deletions(-) diff --git a/oxenstored/store.ml b/oxenstored/store.ml index dc48cb7..fd218f6 100644 --- a/oxenstored/store.ml +++ b/oxenstored/store.ml @@ -168,8 +168,6 @@ module Path = struct let to_string t = "/" ^ String.concat "/" t - let compare a b = String.compare (to_string a) (to_string b) - let to_string_list x = x let get_parent t = if t = [] then [] else List.rev (List.tl (List.rev t)) From 680ccc8f765c2fe54286b94a8acec4c1e2f95f12 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 12 Nov 2024 14:30:38 +0000 Subject: [PATCH 4/4] Change List.length l > 0 to l <> [], avoiding an unnecessary iteration Signed-off-by: Andrii Sultanov --- oxenstored/connection.ml | 2 +- oxenstored/process.ml | 2 +- oxenstored/transaction.ml | 4 ++-- oxenstored/xenstored.ml | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/oxenstored/connection.ml b/oxenstored/connection.ml index ac1e398..71f4ee2 100644 --- a/oxenstored/connection.ml +++ b/oxenstored/connection.ml @@ -337,7 +337,7 @@ let del_watch con path token = let ws = Hashtbl.find con.watches apath in let w = List.find (fun w -> w.token = token) ws in let filtered = Utils.list_remove w ws in - if List.length filtered > 0 then + if filtered <> [] then Hashtbl.replace con.watches apath filtered else Hashtbl.remove con.watches apath ; diff --git a/oxenstored/process.ml b/oxenstored/process.ml index 44b6bba..ca692b0 100644 --- a/oxenstored/process.ml +++ b/oxenstored/process.ml @@ -336,7 +336,7 @@ let do_debug con t _domains cons data = let do_directory con t _domains _cons data = let path = split_one_path data con in let entries = Transaction.ls t (Connection.get_perm con) path in - if List.length entries > 0 then + if entries <> [] then Utils.join_by_null entries ^ "\000" else "" diff --git a/oxenstored/transaction.ml b/oxenstored/transaction.ml index 1c840e4..94be2b9 100644 --- a/oxenstored/transaction.ml +++ b/oxenstored/transaction.ml @@ -264,7 +264,7 @@ let ls_partial t perm path directory_cache = | None -> let node, entries = Store.ls t.store perm path in let r = - if List.length entries > 0 then + if entries <> [] then Utils.join_by_null entries ^ "\000" else "" @@ -298,7 +298,7 @@ let getperms t perm path = set_read_lowpath t path ; r let commit ~con t = - let has_write_ops = List.length t.paths > 0 in + let has_write_ops = t.paths <> [] in let has_coalesced = ref false in let has_commited = match t.ty with diff --git a/oxenstored/xenstored.ml b/oxenstored/xenstored.ml index 789cdf9..e0f63e8 100644 --- a/oxenstored/xenstored.ml +++ b/oxenstored/xenstored.ml @@ -687,10 +687,10 @@ let () = with Unix.Unix_error (Unix.EINTR, _, _) -> ([], [], []) in let sfds, cfds = List.partition (fun fd -> List.mem fd spec_fds) rset in - if List.length sfds > 0 then + if sfds <> [] then process_special_fds sfds ; - if List.length cfds > 0 || List.length wset > 0 then + if cfds <> [] || wset <> [] then process_connection_fds store cons domains cfds wset ; ( if timeout <> 0. then let now = Unix.gettimeofday () in