From 79aa0ebc2bfb6f917178b10f7fc730aaa71f16f4 Mon Sep 17 00:00:00 2001 From: Lucas Pickering Date: Fri, 16 Feb 2024 22:09:47 -0500 Subject: [PATCH] Improve content parsing This makes content parsing a bit more generic, to set us up for more content types. Still needs some work though, not 100% ready. --- docs/src/SUMMARY.md | 2 + docs/src/api/chain.md | 11 +- docs/src/api/content_type.md | 11 ++ docs/src/user_guide/filter_query.md | 66 ++++++++++++ src/collection/models.rs | 12 ++- src/factory.rs | 1 + src/http/parse.rs | 161 ++++++++++++++++++++-------- src/http/record.rs | 6 +- src/template.rs | 18 +++- src/template/error.rs | 7 ++ src/template/render.rs | 94 +++++++++++----- src/util.rs | 15 +++ 12 files changed, 321 insertions(+), 83 deletions(-) create mode 100644 docs/src/api/content_type.md create mode 100644 docs/src/user_guide/filter_query.md diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md index f41daed2..89b9a551 100644 --- a/docs/src/SUMMARY.md +++ b/docs/src/SUMMARY.md @@ -11,6 +11,7 @@ - [Command Line Interface (CLI)](./user_guide/cli.md) - [Templates](./user_guide/templates.md) - [Collection Reuse & Inheritance](./user_guide/inheritance.md) +- [Data Filtering & Querying](./user_guide/filter_query.md) # API Reference @@ -22,3 +23,4 @@ - [Chain](./api/chain.md) - [Chain Source](./api/chain_source.md) - [Template](./api/template.md) + - [Content Type](./api/content_type.md) diff --git a/docs/src/api/chain.md b/docs/src/api/chain.md index e4df4ac9..e292f2b3 100644 --- a/docs/src/api/chain.md +++ b/docs/src/api/chain.md @@ -6,11 +6,12 @@ To use a chain in a template, reference it as `{{chains.}}`. ## Fields -| Field | Type | Description | Default | -| ----------- | -------------------------------------------------------------------------------------- | ------------------------------------------------------- | -------- | -| `source` | [`ChainSource`](./chain_source.md) | Source of the chained value | Required | -| `sensitive` | `boolean` | Should the value be hidden in the UI? | `false` | -| `selector` | [`JSONPath`](https://www.ietf.org/archive/id/draft-goessner-dispatch-jsonpath-00.html) | Selector to narrow down results in a chained JSON value | `null` | +| Field | Type | Description | Default | +| -------------- | -------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | -------- | +| `source` | [`ChainSource`](./chain_source.md) | Source of the chained value | Required | +| `sensitive` | `boolean` | Should the value be hidden in the UI? | `false` | +| `selector` | [`JSONPath`](https://www.ietf.org/archive/id/draft-goessner-dispatch-jsonpath-00.html) | Selector to transform/narrow down results in a chained value. See [Filtering & Querying](../user_guide/filter_query.md) | `null` | +| `content_type` | [`ContentType`](./content_type.md) | Force content type. Not required for `request` and `file` chains, as long as the `Content-Type` header/file extension matches the data | | See the [`ChainSource`](./chain_source.md) docs for more detail. diff --git a/docs/src/api/content_type.md b/docs/src/api/content_type.md new file mode 100644 index 00000000..a0e815e8 --- /dev/null +++ b/docs/src/api/content_type.md @@ -0,0 +1,11 @@ +# Content Type + +Content type defines the various data formats that Slumber recognizes and can manipulate. Slumber is capable of displaying any text-based data format, but only specific formats support additional features such as [querying](../user_guide/filter_query.md) and formatting. + +For chained requests, Slumber uses the [HTTP `Content-Type` header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type) to detect the content type. For chained files, it uses the file extension. For other [chain sources](./chain_source.md), or if the `Content-Type` header/file extension is missing or incorrect, you'll have to manually provide the content type via the [chain](./chain.md) `content_type` field. + +## Supported Content Types + +| Content Type | HTTP Header | File Extension(s) | +| ------------ | ------------------ | ----------------- | +| JSON | `application/json` | `json` | diff --git a/docs/src/user_guide/filter_query.md b/docs/src/user_guide/filter_query.md new file mode 100644 index 00000000..b2d9f95e --- /dev/null +++ b/docs/src/user_guide/filter_query.md @@ -0,0 +1,66 @@ +# Data Filtering & Querying + +Slumber supports querying data structures to transform or reduce response data. + +There are two main use cases for querying: + +- In [chained template values](../api/chain.md), to extract data + - Provided via chain's `selector` argument +- In the TUI response body browser, to limit the response data shown + +**Regardless of data format, querying is done via [JSONPath](https://www.ietf.org/archive/id/draft-goessner-dispatch-jsonpath-00.html).** For non-JSON formats, the data will be converted to JSON, queried, and converted back. This keeps querying simple and uniform across data types. + +## Querying Chained Values + +Here's some examples of using queries to extract data from a chained value. Let's say you have two chained value sources. The first is a JSON file, called `creds.json`. It has the following contents: + +```json +{ "user": "fishman", "pw": "hunter2" } +``` + +We'll use these credentials to log in and get an API token, so the second data source is the login response, which looks like so: + +```json +{ "token": "abcdef123" } +``` + +```yaml +chains: + username: + source: !file ./creds.json + selector: $.user + password: + source: !file ./creds.json + selector: $.pw + auth_token: + source: !request login + selector: $.token + +# Use YAML anchors for de-duplication +base: &base + headers: + Accept: application/json + Content-Type: application/json + +requests: + login: + <<: *base + method: POST + url: "https://myfishes.fish/anything/login" + body: | + { + "username": "{{chains.username}}", + "password": "{{chains.password}}" + } + + get_user: + <<: *base + method: GET + url: "https://myfishes.fish/anything/current-user" + query: + auth: "{{chains.auth_token}}" +``` + +While this example simple extracts inner fields, JSONPath can be used for much more powerful transformations. See the [JSONPath docs](https://www.ietf.org/archive/id/draft-goessner-dispatch-jsonpath-00.html) for more examples. + + diff --git a/src/collection/models.rs b/src/collection/models.rs index 4012fb88..d3e9af13 100644 --- a/src/collection/models.rs +++ b/src/collection/models.rs @@ -1,6 +1,6 @@ //! The plain data types that make up a request collection -use crate::{collection::cereal, template::Template}; +use crate::{collection::cereal, http::ContentType, template::Template}; use derive_more::{Deref, Display, From}; use equivalent::Equivalent; use indexmap::IndexMap; @@ -125,8 +125,16 @@ pub struct Chain { /// Mask chained value in the UI #[serde(default)] pub sensitive: bool, - /// JSONpath to extract a value from the response. For JSON data only. + /// Selector to extract a value from the response. This uses JSONPath + /// regardless of the content type. Non-JSON values will be converted to + /// JSON, then converted back. See [ResponseContent::select]. pub selector: Option, + /// Hard-code the content type of the response. Only needed if a selector + /// is given and the content type can't be dynamically determined + /// correctly. This is needed if the chain source is not an HTTP + /// response (e.g. a file) **or** if the response's `Content-Type` header + /// is incorrect. + pub content_type: Option, } /// Unique ID for a chain. Takes a generic param so we can create these during diff --git a/src/factory.rs b/src/factory.rs index 3d8df8c7..3a0b0c84 100644 --- a/src/factory.rs +++ b/src/factory.rs @@ -63,6 +63,7 @@ factori!(Chain, { source = ChainSource::Request(RecipeId::default()), sensitive = false, selector = None, + content_type = None, } }); diff --git a/src/http/parse.rs b/src/http/parse.rs index 41be65a5..ffc8a2fc 100644 --- a/src/http/parse.rs +++ b/src/http/parse.rs @@ -1,16 +1,30 @@ //! Utilities for parsing response bodies into a variety of known content types. //! Each supported content type has its own struct which implements -//! [ContentType]. If you want to parse as a statically known content type, just -//! use that struct. If you're want to parse dynamically based on the response's -//! metadata, use [parse_body]. +//! [ResponseContent]. If you want to parse as a statically known content type, +//! just use that struct. If you just need to refer to the content _type_, and +//! not a value, use [ContentType]. If you want to parse dynamically based on +//! the response's metadata, use [ContentType::parse_response]. use crate::http::Response; use anyhow::{anyhow, Context}; -use derive_more::Deref; -use std::fmt::Debug; +use derive_more::{Deref, Display}; +use serde::{de::IntoDeserializer, Deserialize, Serialize}; +use std::{borrow::Cow, ffi::OsStr, fmt::Debug, path::Path, str::FromStr}; + +/// All supported content types. Each variant should have a corresponding +/// implementation of [ResponseContent]. +#[derive(Copy, Clone, Debug, Serialize, Deserialize)] +#[cfg_attr(test, derive(PartialEq))] +pub enum ContentType { + // Primary serialization string here should match the HTTP Content-Type + // header. Others are for file extensions. + #[serde(rename = "application/json", alias = "json")] + Json, +} -/// A response content type that we know how to parse. -pub trait ContentType: Debug { +/// A response content type that we know how to parse. This is defined as a +/// trait rather than an enum because it breaks apart the logic more clearly. +pub trait ResponseContent: Debug + Display { /// Parse the response body as this type fn parse(body: &str) -> anyhow::Result where @@ -21,19 +35,19 @@ pub trait ContentType: Debug { /// though! fn prettify(&self) -> String; + /// Convert the content to JSON. JSON is the common language used for + /// querying intenally, so everything needs to be convertible to/from JSON. + fn to_json(&self) -> Cow<'_, serde_json::Value>; + /// Facilitate downcasting generic parsed bodies to concrete types for tests #[cfg(test)] fn as_any(&self) -> &dyn std::any::Any; } -#[derive(Debug, Deref, PartialEq)] +#[derive(Debug, Display, Deref, PartialEq)] pub struct Json(serde_json::Value); -impl Json { - pub const HEADER: &'static str = "application/json"; -} - -impl ContentType for Json { +impl ResponseContent for Json { fn parse(body: &str) -> anyhow::Result { Ok(Self(serde_json::from_str(body)?)) } @@ -43,41 +57,84 @@ impl ContentType for Json { serde_json::to_string_pretty(&self.0).unwrap() } + fn to_json(&self) -> Cow<'_, serde_json::Value> { + Cow::Borrowed(&self.0) + } + #[cfg(test)] fn as_any(&self) -> &dyn std::any::Any { self as &dyn std::any::Any } } -/// Helper for parsing the body of a response. Use [Response::parse_body] for -/// external usage. -pub(super) fn parse_body( - response: &Response, -) -> anyhow::Result> { - let body = &response.body; - match get_content_type(response)? { - Json::HEADER => Ok(Box::new(Json::parse(body.text())?)), - other => Err(anyhow!("Response has unknown content-type `{other}`",)), +impl ContentType { + /// Parse some content of this type. Return a dynamically dispatched content + /// object. + pub fn parse( + self, + content: &str, + ) -> anyhow::Result> { + match self { + Self::Json => Ok(Box::new(Json::parse(content)?)), + } + } + + /// Parse content from JSON into this format. Valid JSON should be valid + /// in any other format too, so this is infallible. + pub fn parse_json( + self, + content: &serde_json::Value, + ) -> Box { + match self { + Self::Json => Box::new(Json(content.clone())), + } + } + + /// Helper for parsing the body of a response. Use [Response::parse_body] + /// for external usage. + pub(super) fn parse_response( + response: &Response, + ) -> anyhow::Result> { + Self::from_header(response)?.parse(response.body.text()) + } + + /// Parse the content type from a file's extension + pub fn from_extension(path: &Path) -> anyhow::Result { + path.extension() + .and_then(OsStr::to_str) + .ok_or_else(|| anyhow!("Path {path:?} has no extension"))? + .parse() + } + + /// Parse the content type from a response's `Content-Type` header + pub fn from_header(response: &Response) -> anyhow::Result { + // If the header value isn't utf-8, we're hosed + let header_value = + std::str::from_utf8(response.content_type().ok_or_else(|| { + anyhow!("Response has no content-type header") + })?) + .context("content-type header is not valid utf-8")?; + + // Remove extra metadata from the header. It feels like there should be + // a helper for this in hyper or reqwest but I couldn't find it. + // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type + let content_type = header_value + .split_once(';') + .map(|t| t.0) + .unwrap_or(header_value); + + content_type.parse() } } -/// Parse the content type from a response's headers -fn get_content_type(response: &Response) -> anyhow::Result<&str> { - // If the header value isn't utf-8, we're hosed - let header_value = std::str::from_utf8( - response - .content_type() - .ok_or_else(|| anyhow!("Response has no content-type header"))?, - ) - .context("content-type header is not valid utf-8")?; - - // Remove extra metadata from the header. It feels like there should be a - // helper for this in hyper or reqwest but I couldn't find it. - // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type - Ok(header_value - .split_once(';') - .map(|t| t.0) - .unwrap_or(header_value)) +impl FromStr for ContentType { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + // Lean on serde for parsing + ContentType::deserialize(s.into_deserializer()) + .map_err(serde::de::value::Error::into) + } } #[cfg(test)] @@ -92,6 +149,24 @@ mod tests { use serde_json::json; use std::ops::Deref; + #[test] + fn test_from_extension() { + assert_eq!( + ContentType::from_extension(Path::new("turbo.json")).unwrap(), + ContentType::Json + ); + + // Errors + assert_err!( + ContentType::from_extension(Path::new("no_extension")), + "no extension" + ); + assert_err!( + ContentType::from_extension(Path::new("turbo.ohno")), + "unknown variant `ohno`" + ) + } + /// Test all content types #[rstest] #[case( @@ -105,7 +180,7 @@ mod tests { "{\"hello\": \"goodbye\"}", Json(json!({"hello": "goodbye"})) )] - fn test_parse_body( + fn test_parse_body( #[case] content_type: &str, #[case] body: String, #[case] expected: T, @@ -114,7 +189,7 @@ mod tests { Response, headers: headers(content_type), body: body.into() ); assert_eq!( - parse_body(&response) + ContentType::parse_response(&response) .unwrap() .deref() // Downcast the result to desired type @@ -128,7 +203,7 @@ mod tests { /// Test various failure cases #[rstest] #[case(None::<&str>, "", "no content-type header")] - #[case(Some("bad-header"), "", "unknown content-type")] + #[case(Some("bad-header"), "", "unknown variant `bad-header`")] #[case(Some(b"\xc3\x28".as_slice()), "", "not valid utf-8")] #[case(Some("application/json"), "not json!", "expected ident")] fn test_parse_body_error< @@ -143,7 +218,7 @@ mod tests { None => HeaderMap::new(), }; let response = create!(Response, headers: headers, body: body.into()); - assert_err!(parse_body(&response), expected_error); + assert_err!(ContentType::parse_response(&response), expected_error); } /// Create header map with the given value for the content-type header diff --git a/src/http/record.rs b/src/http/record.rs index 4cafb0b0..dcc74134 100644 --- a/src/http/record.rs +++ b/src/http/record.rs @@ -2,7 +2,7 @@ use crate::{ collection::{ProfileId, RecipeId}, - http::{parse, ContentType}, + http::{ContentType, ResponseContent}, util::ResultExt, }; use anyhow::Context; @@ -124,8 +124,8 @@ pub struct Response { impl Response { /// Parse the body of this response, based on its `content-type` header - pub fn parse_body(&self) -> anyhow::Result> { - parse::parse_body(self) + pub fn parse_body(&self) -> anyhow::Result> { + ContentType::parse_response(self) .context("Error parsing response body") .traced() } diff --git a/src/template.rs b/src/template.rs index d558744b..5250c106 100644 --- a/src/template.rs +++ b/src/template.rs @@ -154,7 +154,7 @@ mod tests { use crate::{ collection::{ChainSource, ProfileValue, RecipeId}, factory::*, - http::{Request, Response}, + http::{ContentType, Request, Response}, util::assert_err, }; use factori::create; @@ -292,6 +292,7 @@ mod tests { Chain, source: ChainSource::Request(recipe_id), selector: selector, + content_type: Some(ContentType::Json), )}; let context = create!( TemplateContext, database: database, chains: chains, @@ -324,6 +325,20 @@ mod tests { create!(Request, recipe_id: "recipe1".into()), create!(Response, body: "not json!".into()), )), + "content type not provided", + )] + #[case( + "chain1", + create!( + Chain, + source: ChainSource::Request("recipe1".into()), + selector: Some("$.message".parse().unwrap()), + content_type: Some(ContentType::Json), + ), + Some(( + create!(Request, recipe_id: "recipe1".into()), + create!(Response, body: "not json!".into()), + )), "Error parsing response", )] #[case( @@ -332,6 +347,7 @@ mod tests { Chain, source: ChainSource::Request("recipe1".into()), selector: Some("$.*".parse().unwrap()), + content_type: Some(ContentType::Json), ), Some(( create!(Request, recipe_id: "recipe1".into()), diff --git a/src/template/error.rs b/src/template/error.rs index b0371f12..01fcdc37 100644 --- a/src/template/error.rs +++ b/src/template/error.rs @@ -1,6 +1,7 @@ use crate::{ collection::{ChainId, ProfileId}, template::Template, + util::doc_link, }; use nom::error::VerboseError; use serde_json_path::ExactlyOneError; @@ -78,6 +79,12 @@ pub enum ChainError { /// response #[error("No response available")] NoResponse, + #[error( + "Selector cannot be applied; content type not provided and could not \ + be determined from metadata. See docs for supported content types: {}", + doc_link("api/content_type") + )] + UnknownContentType, /// Failed to parse the response body before applying a selector #[error("Error parsing response")] ParseResponse { diff --git a/src/template/render.rs b/src/template/render.rs index 4747b70e..e31d0869 100644 --- a/src/template/render.rs +++ b/src/template/render.rs @@ -2,7 +2,7 @@ use crate::{ collection::{ChainId, ChainSource, ProfileValue, RecipeId}, - http::{ContentType, Json}, + http::{ContentType, Response}, template::{ parse::TemplateInputChunk, ChainError, Prompt, Template, TemplateChunk, TemplateContext, TemplateError, TemplateKey, @@ -228,29 +228,54 @@ impl<'a> TemplateSource<'a> for ChainTemplateSource<'a> { .get(&self.chain_id) .ok_or(ChainError::Unknown)?; - // Resolve the value based on the source type - let value = match &chain.source { + // Resolve the value based on the source type. Also resolve its + // content type. For responses this will come from its header. For + // anything else, we'll fall back to the content_type field defined + // by the user. + // + // We intentionally throw the content detection error away here, + // because it isn't that intuitive for users and is hard to plumb + let (value, content_type) = match &chain.source { ChainSource::Request(recipe_id) => { - self.render_request(context, recipe_id).await? + let response = + self.get_response(context, recipe_id).await?; + // Guess content type based on HTTP header + let content_type = ContentType::from_header(&response).ok(); + (response.body.into_text(), content_type) + } + ChainSource::File(path) => { + // Guess content type based on file extension + let content_type = ContentType::from_extension(path).ok(); + (self.render_file(path).await?, content_type) } - ChainSource::File(path) => self.render_file(path).await?, ChainSource::Command(command) => { - self.render_command(command).await? + // No way to guess content type on this + (self.render_command(command).await?, None) } - ChainSource::Prompt(label) => { + ChainSource::Prompt(label) => ( self.render_prompt( context, label.as_deref(), chain.sensitive, ) - .await? - } + .await?, + // No way to guess content type on this + None, + ), }; + // If the user provided a content type, prefer that over the + // detected one + let content_type = chain.content_type.or(content_type); // If a selector path is present, filter down the value - let value = match &chain.selector { - Some(path) => self.apply_selector(&value, path)?, - None => value, + let value = if let Some(selector) = &chain.selector { + self.apply_selector( + content_type.ok_or(ChainError::UnknownContentType)?, + &value, + selector, + )? + } else { + value }; Ok(RenderedChunk { @@ -269,12 +294,12 @@ impl<'a> TemplateSource<'a> for ChainTemplateSource<'a> { } impl<'a> ChainTemplateSource<'a> { - /// Render a chained template value from a response - async fn render_request( + /// Get the most recent request for a recipe + async fn get_response( &self, context: &'a TemplateContext, recipe_id: &RecipeId, - ) -> Result { + ) -> Result { let record = context .database .get_last_request( @@ -284,7 +309,7 @@ impl<'a> ChainTemplateSource<'a> { .map_err(ChainError::Database)? .ok_or(ChainError::NoResponse)?; - Ok(record.response.body.into_text()) + Ok(record.response) } /// Render a chained value from a file @@ -346,30 +371,41 @@ impl<'a> ChainTemplateSource<'a> { rx.await.map_err(|_| ChainError::PromptNoResponse) } - /// Apply a selector path to a string value to filter it down. Right now - /// this only supports JSONpath but we could add support for more in the - /// future. The string value will be parsed as a JSON value. + /// Apply a selector path to a string value to filter it down. The filtering + /// method will be determined based on the content type of the response. + /// See [ResponseContent]. fn apply_selector( &self, + content_type: ContentType, value: &str, selector: &JsonPath, ) -> Result { - // Parse the response as JSON. Intentionally ignore the - // content-type. If the user wants to treat it as JSON, we - // should allow that even if the server is wrong. - let json_value = Json::parse(value) + // Parse according to detected content type + let value = content_type + .parse(value) .map_err(|err| ChainError::ParseResponse { error: err })?; - // Apply the path to the json - let found_value = selector + // All content types get converted to JSON for formatting, then + // converted back. This is fucky but we need *some* common format + let json_value = value.to_json(); + let filtered = selector .query(&json_value) .exactly_one() .map_err(|error| ChainError::InvalidResult { error })?; - match found_value { - serde_json::Value::String(s) => Ok(s.clone()), - other => Ok(other.to_string()), - } + // If we got a scalar value, use that. Otherwise convert back to the + // input content type to re-stringify + let stringified = match filtered { + serde_json::Value::Null => "".into(), + serde_json::Value::Number(n) => n.to_string(), + serde_json::Value::Bool(b) => b.to_string(), + serde_json::Value::String(s) => s.clone(), + serde_json::Value::Array(_) | serde_json::Value::Object(_) => { + content_type.parse_json(filtered).to_string() + } + }; + + Ok(stringified) } } diff --git a/src/util.rs b/src/util.rs index 2dfdd8ce..8d04bfd4 100644 --- a/src/util.rs +++ b/src/util.rs @@ -6,6 +6,21 @@ use std::{ }; use tracing::error; +const WEBSITE: &str = "https://slumber.lucaspickering.me"; + +/// Get a link to a page on the doc website. This will append the doc prefix, +/// as well as the suffix. +/// +/// ``` +/// assert_eq!( +/// doc_link("api/chain"), +/// "https://slumber.lucaspickering.me/book/api/chain.html", +/// ); +/// ``` +pub fn doc_link(path: &str) -> String { + format!("{WEBSITE}/book/{path}.html") +} + /// Parse bytes (probably from a file) into YAML. This will merge any /// anchors/aliases. pub fn parse_yaml(bytes: &[u8]) -> serde_yaml::Result {