From 1301bef46b89f7c06f8cf61b3a1238ec1dd10c28 Mon Sep 17 00:00:00 2001 From: Thibaut Mattio Date: Sat, 13 May 2023 20:07:23 +0200 Subject: [PATCH] Add encoding primitives and a compression middleware --- dream.opam | 1 + example/m-compress/README.md | 25 ++++ example/m-compress/compress.ml | 3 + example/m-compress/dune | 5 + example/m-compress/dune-project | 1 + example/m-compress/esy.json | 18 +++ src/dream.ml | 11 ++ src/dream.mli | 65 +++++++++++ src/server/accept.ml | 169 +++++++++++++++++++++++++++ src/server/accept.mli | 41 +++++++ src/server/dune | 2 + src/unix/encoding.ml | 201 ++++++++++++++++++++++++++++++++ 12 files changed, 542 insertions(+) create mode 100644 example/m-compress/README.md create mode 100644 example/m-compress/compress.ml create mode 100644 example/m-compress/dune create mode 100644 example/m-compress/dune-project create mode 100644 example/m-compress/esy.json create mode 100644 src/server/accept.ml create mode 100644 src/server/accept.mli create mode 100644 src/unix/encoding.ml diff --git a/dream.opam b/dream.opam index 07e14a0b..5f13a313 100644 --- a/dream.opam +++ b/dream.opam @@ -54,6 +54,7 @@ depends: [ "caqti-lwt" "conf-libev" {os != "win32"} "cstruct" {>= "6.0.0"} + "decompress" {>= "1.4.1"} "dream-httpaf" {>= "1.0.0~alpha2"} "dream-pure" {>= "1.0.0~alpha2"} "dune" {>= "2.7.0"} # --instrument-with. diff --git a/example/m-compress/README.md b/example/m-compress/README.md new file mode 100644 index 00000000..3d156d81 --- /dev/null +++ b/example/m-compress/README.md @@ -0,0 +1,25 @@ +# `m-compress` + +
+ +Demonstrate how to compress responses using the `Dream.compress` middleware. + +```ocaml +let () = + Dream.run + @@ Dream.logger + @@ Dream.compress + @@ Dream.router [ Dream.get "/" (fun _ -> Dream.html "Hello World!") ] +``` + +The middleware will parse the `Accept-Encoding` header from the requests and compress the responses accordingly. + +## Limitation + +As of now, the only supported encoding directives are `gzip` and `deflate`. + +Support for more compression methods will come when they are supported in `decompress`, the underlying compression library used in `dream-encoding`. + +
+ +[Up to the tutorial index](../#readme) diff --git a/example/m-compress/compress.ml b/example/m-compress/compress.ml new file mode 100644 index 00000000..8c1d0875 --- /dev/null +++ b/example/m-compress/compress.ml @@ -0,0 +1,3 @@ +let () = + Dream.run @@ Dream.logger @@ Dream.compress + @@ Dream.router [ Dream.get "/" (fun _ -> Dream.html "Hello World!") ] \ No newline at end of file diff --git a/example/m-compress/dune b/example/m-compress/dune new file mode 100644 index 00000000..354c6dc2 --- /dev/null +++ b/example/m-compress/dune @@ -0,0 +1,5 @@ +(executable + (name compress) + (libraries dream)) + +(data_only_dirs _esy esy.lock lib node_modules) diff --git a/example/m-compress/dune-project b/example/m-compress/dune-project new file mode 100644 index 00000000..929c696e --- /dev/null +++ b/example/m-compress/dune-project @@ -0,0 +1 @@ +(lang dune 2.0) diff --git a/example/m-compress/esy.json b/example/m-compress/esy.json new file mode 100644 index 00000000..b15b44f0 --- /dev/null +++ b/example/m-compress/esy.json @@ -0,0 +1,18 @@ +{ + "dependencies": { + "@opam/conf-libssl": "3", + "@opam/dream": "1.0.0~alpha5", + "@opam/dune": "^3.0", + "ocaml": "^4.14.0" + }, + "devDependencies": { + "@opam/ocaml-lsp-server": "*" + }, + "resolutions": { + "@opam/conf-libev": "esy-packages/libev:package.json#0b5eb6685b688649045aceac55dc559f6f21b829", + "esy-openssl": "esy-packages/esy-openssl#619ae2d46ca981ec26ab3287487ad98b157a01d1" + }, + "scripts": { + "start": "dune exec --root . ./compress.exe" + } +} diff --git a/src/dream.ml b/src/dream.ml index 7409ac00..c5c54ffc 100644 --- a/src/dream.ml +++ b/src/dream.ml @@ -10,6 +10,7 @@ module Cipher = Dream__cipher.Cipher module Cookie = Dream__server.Cookie module Csrf = Dream__server.Csrf module Echo = Dream__server.Echo +module Encoding = Dream__unix.Encoding module Error_handler = Dream__http.Error_handler module Flash = Dream__server.Flash module Form = Dream__server.Form @@ -185,6 +186,14 @@ let receive_fragment = Helpers.receive_fragment let close_websocket = Message.close_websocket +(* Encoding *) + +let with_encoded_body = Encoding.with_encoded_body +let accepted_encodings = Encoding.accepted_encodings +let accepted_encodings_with_weights = Encoding.accepted_encodings_with_weights +let content_encodings = Encoding.content_encodings +let preferred_content_encoding = Encoding.preferred_content_encoding + (* JSON *) @@ -216,6 +225,8 @@ let csrf_tag = Tag.csrf_tag ~now let no_middleware = Message.no_middleware let pipeline = Message.pipeline +let compress = Encoding.compress +let decompress = Encoding.decompress diff --git a/src/dream.mli b/src/dream.mli index 4d122e2f..74a9df22 100644 --- a/src/dream.mli +++ b/src/dream.mli @@ -898,6 +898,56 @@ val close_websocket : ?code:int -> websocket -> unit promise {{:https://tools.ietf.org/html/rfc6455#section-7.4} RFC 6455 ยง7.4}. *) +(** {1 Encoding} *) + +val with_encoded_body : + ?algorithm:[`Deflate | `Gzip] -> string -> response -> response +(** [with_encoded_body ?algorithm body response] replaces the body of the + response with [body] compressed with [algorithm] and adds the corresponding + [Content-Encoding] header. + + [algorithm] defaults to [`Deflate]. *) + +val accepted_encodings : + 'a message -> + [`Gzip | `Compress | `Deflate | `Identity | `Any | `Unknown of string] list + option +(** Retrieve the list of accepted encoding directives from the [Accept-Encoding] + header, ordered by quality weight in decreasing order. + + If the request does not have an [Accept-Encoding] header, this returns + [None]. *) + +val accepted_encodings_with_weights : + 'a message -> + ([`Gzip | `Compress | `Deflate | `Identity | `Any | `Unknown of string] * int) + list + option +(** Same as [accepted_encoding], but returns the quality weights associated to + the encoding directive. *) + +val content_encodings : + 'a message -> + [`Gzip | `Compress | `Deflate | `Identity | `Any | `Unknown of string] list + option +(** Retrieve the list of content encoding directives from the [Content-Encoding] + header. + + If the request does not have an [Content-Encoding] header, this returns + [None]. *) + +val preferred_content_encoding : 'a message -> [`Deflate | `Gzip] option +(** Retrieve preferred encoding directive from the [Accept-Encoding]. + + The preferred encoding directive is the first supported algorithm in the + list of accepted directives sorted by quality weight. + + If [*] is given as the preferred encoding, [`Gzip] is returned. This is to + be on par with the behavior of [compress]. + + If no algorithm is supported, or if the request does not have an + [Accept-Encoding] header, this returns [None]. *) + (** {1 JSON} @@ -1315,6 +1365,21 @@ Dream.pipeline [middleware_1; middleware_2] @@ handler middleware_1 @@ middleware_2 @@ handler v} *) +val compress : middleware +(** Middleware that reads the [Accept-Encoding] header of the request and + compresses the responses with the preferred supported algorithm. *) + +val decompress : middleware +(** Middleware that reads the [Content-Encoding] of the request and decompresses + the body if all of the directives of the header are supported. + + If one or more of the directive is not supported, an HTTP response + [415 Unsupported Media Type] is returned to the client. + + Note that although HTTP supports encoding requests, it is rarely used in + practice. See [compress] to for a middleware that compresses the responses + instead. *) + (* TODO Need a way to create fresh streams. *) (** {2 Stream transformers} diff --git a/src/server/accept.ml b/src/server/accept.ml new file mode 100644 index 00000000..ae36b319 --- /dev/null +++ b/src/server/accept.ml @@ -0,0 +1,169 @@ +(* From + https://github.com/lyrm/ocaml-httpadapter/blob/master/src-httpaf/accept.ml + + Copyright (c) 2019 Carine Morel + + Permission to use, copy, modify, and distribute this software for any purpose + with or without fee is hereby granted, provided that the above copyright + notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH + REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, + INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + PERFORMANCE OF THIS SOFTWARE. *) + +open Angstrom +open Printf + +type encoding = + | Encoding of string + | Gzip + | Compress + | Deflate + | Identity + | Any + +type p = string * string +type q = int +type 'a qlist = (q * 'a) list + +(** Lexer *) +let is_space = function + | ' ' | '\t' -> true + | _ -> false + +let is_token = function + | '\000' .. '\031' + | '\127' + | ')' + | '(' + | '<' + | '>' + | '@' + | ',' + | ';' + | ':' + | '"' + | '/' + | '[' + | ']' + | '?' + | '=' + | '{' + | '}' + | ' ' -> false + | _s -> true + +let ows = skip is_space <|> return () +let token = take_while1 is_token +let sep_by1_comma value_parser = sep_by1 (char ',') value_parser <* end_of_input + +let eval_parser parser default_value = function + | None -> [(1000, default_value)] + | Some str -> ( + match parse_string ~consume:Angstrom.Consume.All parser str with + | Ok v -> v + | Error msg -> failwith msg) + +(** Parser for header parameters like defined in rfc + https://tools.ietf.org/html/rfc7231#section-5.3.2 *) +type param = + | Q of int + | Kv of p + +let q_of_string s = truncate (1000. *. float_of_string s) + +(* More restrictive than cohttp counterpart *) +let qs = char '"' *> token <* char '"' + +(* a header parameter can be : OWS ; OWS q=[value] OWS ; OWS [name]=[value] OWS + ; OWS [name]="[value]" *) +let param : param t = + ows + *> char ';' + *> ows + *> (* OWS ; OWS q=[value] OWS ; OWS [name]=[value]*) + (lift2 + (fun n v -> if n = "q" then Q (q_of_string v) else Kv (n, v)) + token + (char '=' *> token) + <|> (* OWS ; OWS [name]="[value]" *) + lift2 (fun n v -> Kv (n, v)) token (char '=' *> qs)) + +let params = many param + +let rec get_q params = + match params with + | [] -> 1000 + | Q q :: _ -> q + | _ :: r -> get_q r + +(** Parser for values of Accept-encoding header. Example: Accept-Encoding: + compress, gzip Accept-Encoding: Accept-Encoding: * Accept-Encoding: + compress;q=0.5, gzip;q=1.0 Accept-Encoding: gzip;q=1.0, identity; q=0.5, + *;q=0 *) +let encoding_value_parser = + ows + *> (char '*' *> return (Any : encoding) + <|> lift + (fun s -> + match String.lowercase_ascii s with + | "gzip" -> Gzip + | "compress" -> Compress + | "deflate" -> Deflate + | "identity" -> Identity + | enc -> Encoding enc) + token) + +let encoding_parser = + lift2 (fun value q -> (q, value)) encoding_value_parser (lift get_q params) + +let encodings_parser = sep_by1_comma encoding_parser +let encodings = eval_parser encodings_parser Any + +(** Other functions (from Cohttp.Accept) *) +let rec string_of_pl = function + | [] -> "" + | (k, v) :: r -> + let e = Stringext.quote v in + if v = e then + sprintf ";%s=%s%s" k v (string_of_pl r) + else + sprintf ";%s=\"%s\"%s" k e (string_of_pl r) + +let string_of_q = function + | q when q < 0 -> invalid_arg (Printf.sprintf "qvalue %d must be positive" q) + | q when q > 1000 -> + invalid_arg (Printf.sprintf "qvalue %d must be less than 1000" q) + | 1000 -> "1" + | q -> Printf.sprintf "0.%03d" q + +let accept_el ?q el pl = + match q with + | Some q -> sprintf "%s;q=%s%s" el (string_of_q q) (string_of_pl pl) + | None -> el + +let string_of_encoding ?q = function + | Encoding e -> accept_el ?q e [] + | Gzip -> accept_el ?q "gzip" [] + | Compress -> accept_el ?q "compress" [] + | Deflate -> accept_el ?q "deflate" [] + | Identity -> accept_el ?q "identity" [] + | Any -> accept_el ?q "*" [] + +let string_of_list s_of_el = + let rec aux s = function + | [(q, el)] -> s ^ s_of_el el q + | [] -> s + | (q, el) :: r -> aux (s ^ s_of_el el q ^ ",") r + in + aux "" + +let string_of_encodings = string_of_list (fun el q -> string_of_encoding ~q el) + +let qsort l = + let compare ((i : int), _) (i', _) = compare i' i in + List.stable_sort compare l diff --git a/src/server/accept.mli b/src/server/accept.mli new file mode 100644 index 00000000..544dbe18 --- /dev/null +++ b/src/server/accept.mli @@ -0,0 +1,41 @@ +(* From https://github.com/lyrm/ocaml-httpadapter/blob/master/src/http.mli + + Copyright (c) 2019 Carine Morel + + Permission to use, copy, modify, and distribute this software for any purpose + with or without fee is hereby granted, provided that the above copyright + notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH + REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, + INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + PERFORMANCE OF THIS SOFTWARE. *) + +type p = string * string + +type encoding = + | Encoding of string + | Gzip + | Compress + | Deflate + | Identity + | Any + +(** Accept-Encoding HTTP header parsing and generation *) + +type q = int +(** Qualities are integers between 0 and 1000. A header with ["q=0.7"] + corresponds to a quality of [700]. *) + +type 'a qlist = (q * 'a) list +(** Lists, annotated with qualities. *) + +val qsort : 'a qlist -> 'a qlist +(** Sort by quality, biggest first. Respect the initial ordering. *) + +val encodings : string option -> encoding qlist +val string_of_encoding : ?q:q -> encoding -> string +val string_of_encodings : encoding qlist -> string \ No newline at end of file diff --git a/src/server/dune b/src/server/dune index d7dd67b7..c40164ae 100644 --- a/src/server/dune +++ b/src/server/dune @@ -2,6 +2,8 @@ (public_name dream.server) (name dream__server) (libraries + decompress.de + decompress.gz digestif dream.cipher dream-pure diff --git a/src/unix/encoding.ml b/src/unix/encoding.ml new file mode 100644 index 00000000..8aabf30b --- /dev/null +++ b/src/unix/encoding.ml @@ -0,0 +1,201 @@ +(* This file is part of Dream, released under the MIT license. See LICENSE.md + for details, or visit https://github.com/aantron/dream. + + Copyright 2023 Thibaut Mattio *) + +module Accept = Dream__server.Accept +module Helpers = Dream__server.Helpers +module Log = Dream__server.Log +module Message = Dream_pure.Message + +let log = Log.sub_log "dream.encoding" + +let inflate_string_de str = + let i = De.bigstring_create De.io_buffer_size in + let o = De.bigstring_create De.io_buffer_size in + let w = De.make_window ~bits:15 in + let r = Buffer.create 0x1000 in + let p = ref 0 in + let refill buf = + let len = min (String.length str - !p) De.io_buffer_size in + Bigstringaf.blit_from_string str ~src_off:!p buf ~dst_off:0 ~len; + p := !p + len; + len + in + let flush buf len = + let str = Bigstringaf.substring buf ~off:0 ~len in + Buffer.add_string r str + in + match De.Higher.uncompress ~w ~refill ~flush i o with + | Ok () -> Ok (Buffer.contents r) + | Error _ as err -> err + +let deflate_string_de str = + let i = De.bigstring_create De.io_buffer_size in + let o = De.bigstring_create De.io_buffer_size in + let w = De.Lz77.make_window ~bits:15 in + let q = De.Queue.create 0x1000 in + let r = Buffer.create 0x1000 in + let p = ref 0 in + let refill buf = + (* assert (buf == i); *) + let len = min (String.length str - !p) De.io_buffer_size in + Bigstringaf.blit_from_string str ~src_off:!p buf ~dst_off:0 ~len; + p := !p + len; + len + in + let flush buf len = + (* assert (buf == o); *) + let str = Bigstringaf.substring buf ~off:0 ~len in + Buffer.add_string r str + in + De.Higher.compress ~w ~q ~refill ~flush i o; + Buffer.contents r + +let inflate_string_gz str = + let i = De.bigstring_create De.io_buffer_size in + let o = De.bigstring_create De.io_buffer_size in + let r = Buffer.create 0x1000 in + let p = ref 0 in + let refill buf = + let len = min (String.length str - !p) De.io_buffer_size in + Bigstringaf.blit_from_string str ~src_off:!p buf ~dst_off:0 ~len; + p := !p + len; + len + in + let flush buf len = + let str = Bigstringaf.substring buf ~off:0 ~len in + Buffer.add_string r str + in + match Gz.Higher.uncompress ~refill ~flush i o with + | Ok _ -> Ok (Buffer.contents r) + | Error _ as err -> err + +let time () = Int32.of_float (Unix.gettimeofday ()) + +let deflate_string_gz ?(level = 4) str = + let i = De.bigstring_create De.io_buffer_size in + let o = De.bigstring_create De.io_buffer_size in + let w = De.Lz77.make_window ~bits:15 in + let q = De.Queue.create 0x1000 in + let r = Buffer.create 0x1000 in + let p = ref 0 in + let cfg = Gz.Higher.configuration Gz.Unix time in + let refill buf = + let len = min (String.length str - !p) De.io_buffer_size in + Bigstringaf.blit_from_string str ~src_off:!p buf ~dst_off:0 ~len; + p := !p + len; + len + in + let flush buf len = + let str = Bigstringaf.substring buf ~off:0 ~len in + Buffer.add_string r str + in + Gz.Higher.compress ~w ~q ~level ~refill ~flush () cfg i o; + Buffer.contents r + +let inflate_string ~algorithm str = + match algorithm with + | `Deflate -> inflate_string_de str + | `Gzip -> inflate_string_gz str + +let deflate_string ~algorithm str = + match algorithm with + | `Deflate -> deflate_string_de str + | `Gzip -> deflate_string_gz str + +let encoding_of_string = function + | "deflate" -> `Deflate + | "gzip" -> `Gzip + | s -> `Unknown s + +let content_encodings request = + match Message.header request "content-encoding" with + | None -> None + | Some s -> + String.split_on_char ',' s + |> List.map (fun x -> x |> String.trim |> String.lowercase_ascii) + |> List.map encoding_of_string + |> Option.some + +let accepted_encodings_with_weights request = + match Message.header request "accept-encoding" with + | None -> None + | Some s -> + let encodings = Accept.encodings (Some s) |> Accept.qsort in + Some + (List.map + (fun (a, b) -> + ( (match b with + | Accept.Any -> `Any + | Accept.Gzip -> `Gzip + | Accept.Compress -> `Compress + | Accept.Deflate -> `Deflate + | Accept.Identity -> `Identity + | Accept.Encoding s -> `Unknown s), + a )) + encodings) + +let accepted_encodings request = + match accepted_encodings_with_weights request with + | None -> None + | Some encodings -> Some (List.map (fun (a, _) -> a) encodings) + +let preferred_content_encoding request = + match accepted_encodings request with + | None -> None + | Some l -> + let rec aux = function + | [] -> None + | `Any :: _rest -> Some `Gzip + | `Deflate :: _rest -> Some `Deflate + | `Gzip :: _rest -> Some `Gzip + | _ :: rest -> aux rest + in + aux l + +let algorithm_to_string = function + | `Deflate -> "deflate" + | `Gzip -> "gzip" + +let with_encoded_body ?(algorithm = `Deflate) body response = + match body with + | "" -> response + | _ -> + let encoded_body = deflate_string ~algorithm body in + Message.set_body response encoded_body; + Message.set_header response "Content-Encoding" + (algorithm_to_string algorithm); + response + +let compress handler req = + let%lwt response = handler req in + let preferred_algorithm = preferred_content_encoding req in + match preferred_algorithm with + | None -> Lwt.return response + | Some algorithm -> + log.info (fun log -> + log ~request:req "Compressing request with algorithm: %s" + (algorithm_to_string algorithm)); + let%lwt body = Message.body response in + Lwt.return @@ with_encoded_body ~algorithm body response + +let decompress handler req = + let rec aux algorithms content = + match algorithms with + | [] -> Ok content + | (`Deflate as el) :: rest | (`Gzip as el) :: rest -> + Result.bind (inflate_string ~algorithm:el content) (aux rest) + | _ :: _rest -> Error (`Msg "Unsopported encoding directive") + in + let algorithms = content_encodings req in + match algorithms with + | None -> handler req + | Some algorithms -> ( + let%lwt body = Message.body req in + let body = aux algorithms body in + match body with + | Ok body -> + Message.set_body req body; + handler req + | Error (`Msg err) -> Helpers.respond ~status:`Unsupported_Media_Type err)