diff --git a/src/cli.rs b/src/cli.rs index b9cef38b..e5d9d8ec 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -9,6 +9,8 @@ use std::path::PathBuf; use std::str::FromStr; use std::time::Duration; +use anyhow::anyhow; +use encoding_rs::Encoding; use reqwest::{Method, Url}; use serde::{Deserialize, Serialize}; use structopt::clap::{self, arg_enum, AppSettings, Error, ErrorKind, Result}; @@ -65,6 +67,20 @@ pub struct Cli { #[structopt(short = "s", long, value_name = "THEME", possible_values = &Theme::variants(), case_insensitive = true)] pub style: Option, + /// Override the response encoding for terminal display purposes. + /// + /// Example: `--response-charset=latin1` + /// {n}{n}{n} + #[structopt(long, value_name = "ENCODING", parse(try_from_str = parse_encoding))] + pub response_charset: Option<&'static Encoding>, + + /// Override the response mime type for coloring and formatting for the terminal + /// + /// Example: `--response-mime=application/json` + /// {n}{n}{n} + #[structopt(long, value_name = "MIME_TYPE")] + pub response_mime: Option, + /// String specifying what the output should contain. /// /// Use `H` and `B` for request header and body respectively, @@ -793,8 +809,8 @@ impl Print { } impl FromStr for Print { - type Err = Error; - fn from_str(s: &str) -> Result { + type Err = anyhow::Error; + fn from_str(s: &str) -> anyhow::Result { let mut request_headers = false; let mut request_body = false; let mut response_headers = false; @@ -806,12 +822,7 @@ impl FromStr for Print { 'B' => request_body = true, 'h' => response_headers = true, 'b' => response_body = true, - char => { - return Err(Error::with_description( - &format!("{:?} is not a valid value", char), - ErrorKind::InvalidValue, - )) - } + char => return Err(anyhow!("{:?} is not a valid value", char)), } } @@ -835,17 +846,12 @@ impl Timeout { } impl FromStr for Timeout { - type Err = Error; + type Err = anyhow::Error; - fn from_str(sec: &str) -> Result { + fn from_str(sec: &str) -> anyhow::Result { let pos_sec: f64 = match sec.parse::() { Ok(sec) if sec.is_sign_positive() => sec, - _ => { - return Err(Error::with_description( - "Invalid seconds as connection timeout", - ErrorKind::InvalidValue, - )) - } + _ => return Err(anyhow!("Invalid seconds as connection timeout")), }; let dur = Duration::from_secs_f64(pos_sec); @@ -861,19 +867,18 @@ pub enum Proxy { } impl FromStr for Proxy { - type Err = Error; + type Err = anyhow::Error; - fn from_str(s: &str) -> Result { + fn from_str(s: &str) -> anyhow::Result { let split_arg: Vec<&str> = s.splitn(2, ':').collect(); match split_arg[..] { [protocol, url] => { let url = reqwest::Url::try_from(url).map_err(|e| { - Error::with_description( - &format!( - "Invalid proxy URL '{}' for protocol '{}': {}", - url, protocol, e - ), - ErrorKind::InvalidValue, + anyhow!( + "Invalid proxy URL '{}' for protocol '{}': {}", + url, + protocol, + e ) })?; @@ -881,15 +886,11 @@ impl FromStr for Proxy { "http" => Ok(Proxy::Http(url)), "https" => Ok(Proxy::Https(url)), "all" => Ok(Proxy::All(url)), - _ => Err(Error::with_description( - &format!("Unknown protocol to set a proxy for: {}", protocol), - ErrorKind::InvalidValue, - )), + _ => Err(anyhow!("Unknown protocol to set a proxy for: {}", protocol)), } } - _ => Err(Error::with_description( - "The value passed to --proxy should be formatted as :", - ErrorKind::InvalidValue, + _ => Err(anyhow!( + "The value passed to --proxy should be formatted as :" )), } } @@ -957,6 +958,49 @@ impl FromStr for HttpVersion { } } +// HTTPie recognizes some encoding names that encoding_rs doesn't e.g utf16 has to spelled as utf-16. +// There are also some encodings which encoding_rs doesn't support but HTTPie does e.g utf-7. +// See https://github.com/ducaale/xh/pull/184#pullrequestreview-787528027 +fn parse_encoding(encoding: &str) -> anyhow::Result<&'static Encoding> { + let normalized_encoding = encoding.to_lowercase().replace( + |c: char| (!c.is_alphanumeric() && c != '_' && c != '-' && c != ':'), + "", + ); + + match normalized_encoding.as_str() { + "u8" | "utf" => return Ok(encoding_rs::UTF_8), + "u16" => return Ok(encoding_rs::UTF_16LE), + _ => (), + } + + for encoding in &[ + &normalized_encoding, + &normalized_encoding.replace(&['-', '_'][..], ""), + &normalized_encoding.replace('_', "-"), + &normalized_encoding.replace('-', "_"), + ] { + if let Some(encoding) = Encoding::for_label(encoding.as_bytes()) { + return Ok(encoding); + } + } + + { + let mut encoding = normalized_encoding.replace(&['-', '_'][..], ""); + if let Some(first_digit_index) = encoding.find(|c: char| c.is_digit(10)) { + encoding.insert(first_digit_index, '-'); + if let Some(encoding) = Encoding::for_label(encoding.as_bytes()) { + return Ok(encoding); + } + } + } + + Err(anyhow::anyhow!( + "{} is not a supported encoding, please refer to https://encoding.spec.whatwg.org/#names-and-labels \ + for supported encodings", + encoding + )) +} + /// Based on the function used by clap to abort fn safe_exit() -> ! { let _ = std::io::stdout().lock().flush(); @@ -1283,4 +1327,30 @@ mod tests { let cli = parse(&["--no-check-status", "--check-status", ":"]).unwrap(); assert_eq!(cli.check_status, Some(true)); } + + #[test] + fn parse_encoding_label() { + let test_cases = vec![ + ("~~~~UtF////16@@", encoding_rs::UTF_16LE), + ("utf16", encoding_rs::UTF_16LE), + ("utf_16_be", encoding_rs::UTF_16BE), + ("utf16be", encoding_rs::UTF_16BE), + ("utf-16-be", encoding_rs::UTF_16BE), + ("utf_8", encoding_rs::UTF_8), + ("utf8", encoding_rs::UTF_8), + ("utf-8", encoding_rs::UTF_8), + ("u8", encoding_rs::UTF_8), + ("iso8859_6", encoding_rs::ISO_8859_6), + ("iso_8859-2:1987", encoding_rs::ISO_8859_2), + ("l1", encoding_rs::WINDOWS_1252), + ("elot-928", encoding_rs::ISO_8859_7), + ]; + + for (input, output) in test_cases { + assert_eq!(parse_encoding(input).unwrap(), output) + } + + assert_eq!(parse_encoding("notreal").is_err(), true); + assert_eq!(parse_encoding("").is_err(), true); + } } diff --git a/src/main.rs b/src/main.rs index ac042c1b..a41f96dc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -398,6 +398,9 @@ fn run(args: Cli) -> Result { let pretty = args.pretty.unwrap_or_else(|| buffer.guess_pretty()); let mut printer = Printer::new(print.clone(), pretty, args.style, args.stream, buffer); + let response_charset = args.response_charset; + let response_mime = args.response_mime.as_deref(); + printer.print_request_headers(&request, &*cookie_jar)?; printer.print_request_body(&mut request)?; @@ -411,7 +414,7 @@ fn run(args: Cli) -> Result { if args.all { client.on_redirect(|prev_response, next_request| { printer.print_response_headers(&prev_response)?; - printer.print_response_body(prev_response)?; + printer.print_response_body(prev_response, response_charset, response_mime)?; printer.print_separator()?; printer.print_request_headers(next_request, &*cookie_jar)?; printer.print_request_body(next_request)?; @@ -450,7 +453,7 @@ fn run(args: Cli) -> Result { )?; } } else { - printer.print_response_body(response)?; + printer.print_response_body(response, response_charset, response_mime)?; } } diff --git a/src/printer.rs b/src/printer.rs index 706cf7f0..2f0c0b56 100644 --- a/src/printer.rs +++ b/src/printer.rs @@ -393,12 +393,21 @@ impl Printer { Ok(()) } - pub fn print_response_body(&mut self, mut response: Response) -> anyhow::Result<()> { + pub fn print_response_body( + &mut self, + mut response: Response, + encoding: Option<&'static Encoding>, + mime: Option<&str>, + ) -> anyhow::Result<()> { if !self.print.response_body { return Ok(()); } - let content_type = get_content_type(response.headers()); + let content_type = mime + .map(ContentType::from) + .unwrap_or_else(|| get_content_type(response.headers())); + let encoding = encoding.unwrap_or_else(|| guess_encoding(&response)); + if !self.buffer.is_terminal() { if (self.color || self.indent_json) && content_type.is_text() { // The user explicitly asked for formatting even though this is @@ -414,9 +423,13 @@ impl Printer { // Unconditionally decoding is not an option because the body // might not be text at all if self.stream { - self.print_body_stream(content_type, &mut decode_stream(&mut response))?; + self.print_body_stream( + content_type, + &mut decode_stream(&mut response, encoding), + )?; } else { - let text = response.text()?; + let bytes = response.bytes()?; + let (text, _, _) = encoding.decode(&bytes); self.print_body_text(content_type, &text)?; } } else if self.stream { @@ -426,7 +439,8 @@ impl Printer { self.buffer.print(&body)?; } } else if self.stream { - match self.print_body_stream(content_type, &mut decode_stream(&mut response)) { + match self.print_body_stream(content_type, &mut decode_stream(&mut response, encoding)) + { Ok(_) => { self.buffer.print("\n")?; } @@ -436,8 +450,9 @@ impl Printer { Err(err) => return Err(err.into()), } } else { - // Note that .text() behaves like String::from_utf8_lossy() - let text = response.text()?; + // Note that .decode() behaves like String::from_utf8_lossy() + let bytes = response.bytes()?; + let (text, _, _) = encoding.decode(&bytes); if text.contains('\0') { self.buffer.print(BINARY_SUPPRESSOR)?; return Ok(()); @@ -470,35 +485,39 @@ impl ContentType { } } +impl From<&str> for ContentType { + fn from(content_type: &str) -> Self { + if content_type.contains("json") { + ContentType::Json + } else if content_type.contains("html") { + ContentType::Html + } else if content_type.contains("xml") { + ContentType::Xml + } else if content_type.contains("multipart") { + ContentType::Multipart + } else if content_type.contains("x-www-form-urlencoded") { + ContentType::UrlencodedForm + } else if content_type.contains("javascript") { + ContentType::JavaScript + } else if content_type.contains("css") { + ContentType::Css + } else if content_type.contains("text") { + // We later check if this one's JSON + // HTTPie checks for "json", "javascript" and "text" in one place: + // https://github.com/httpie/httpie/blob/a32ad344dd/httpie/output/formatters/json.py#L14 + // We have it more spread out but it behaves more or less the same + ContentType::Text + } else { + ContentType::Unknown + } + } +} + pub fn get_content_type(headers: &HeaderMap) -> ContentType { headers .get(CONTENT_TYPE) .and_then(|value| value.to_str().ok()) - .and_then(|content_type| { - if content_type.contains("json") { - Some(ContentType::Json) - } else if content_type.contains("html") { - Some(ContentType::Html) - } else if content_type.contains("xml") { - Some(ContentType::Xml) - } else if content_type.contains("multipart") { - Some(ContentType::Multipart) - } else if content_type.contains("x-www-form-urlencoded") { - Some(ContentType::UrlencodedForm) - } else if content_type.contains("javascript") { - Some(ContentType::JavaScript) - } else if content_type.contains("css") { - Some(ContentType::Css) - } else if content_type.contains("text") { - // We later check if this one's JSON - // HTTPie checks for "json", "javascript" and "text" in one place: - // https://github.com/httpie/httpie/blob/a32ad344dd/httpie/output/formatters/json.py#L14 - // We have it more spread out but it behaves more or less the same - Some(ContentType::Text) - } else { - None - } - }) + .map(ContentType::from) .unwrap_or(ContentType::Unknown) } @@ -512,9 +531,7 @@ pub fn valid_json(text: &str) -> bool { /// but it makes no guarantees about outputting valid UTF-8 if the input is /// invalid UTF-8 (claiming to be UTF-8). So only pass data through here /// that's going to the terminal, and don't trust its output. -fn decode_stream(response: &mut Response) -> impl Read + '_ { - let encoding = guess_encoding(response); - +fn decode_stream<'a>(response: &'a mut Response, encoding: &'static Encoding) -> impl Read + 'a { DecodeReaderBytesBuilder::new() .encoding(Some(encoding)) .build(response) diff --git a/tests/cli.rs b/tests/cli.rs index f2575e2f..c0323000 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -2234,3 +2234,43 @@ fn http2() { .stdout(predicates::str::contains("GET / HTTP/2.0")) .stdout(predicates::str::contains("HTTP/2.0 200 OK")); } + +#[test] +fn override_response_charset() { + let server = MockServer::start(); + let mock = server.mock(|_when, then| { + then.header("Content-Type", "text/plain; charset=utf-8") + .body(b"\xe9"); + }); + + get_command() + .arg("--print=b") + .arg("--response-charset=latin1") + .arg(server.base_url()) + .assert() + .stdout("é\n"); + mock.assert(); +} + +#[test] +fn override_response_mime() { + let server = MockServer::start(); + let mock = server.mock(|_when, then| { + then.header("Content-Type", "text/html; charset=utf-8") + .body("{\"status\": \"ok\"}"); + }); + + get_command() + .arg("--print=b") + .arg("--response-mime=application/json") + .arg(server.base_url()) + .assert() + .stdout(indoc! {r#" + { + "status": "ok" + } + + + "#}); + mock.assert(); +}