From 8178f51d7ffc9cc9fec96e59f620fe13fa538e42 Mon Sep 17 00:00:00 2001 From: Markus Reiter Date: Wed, 28 Feb 2024 00:47:40 +0100 Subject: [PATCH] Gracefully handle UTF-8 errors. --- bindgen/ir/context.rs | 49 +++++++++++++++++++++++-------------------- bindgen/ir/var.rs | 13 +++++++++--- 2 files changed, 36 insertions(+), 26 deletions(-) diff --git a/bindgen/ir/context.rs b/bindgen/ir/context.rs index fdeef01fef..86f041b143 100644 --- a/bindgen/ir/context.rs +++ b/bindgen/ir/context.rs @@ -2159,7 +2159,12 @@ If you encounter an error missing from this list, please file an issue or a PR!" let mut kind = ModuleKind::Normal; let mut looking_for_name = false; for token in cursor.tokens().iter() { - match token.spelling().to_str().unwrap() { + let spelling = token.spelling(); + let name = match spelling.to_str() { + Ok(name) => Cow::Borrowed(name), + Err(_) => spelling.to_string_lossy(), + }; + match name.as_ref() { "inline" => { debug_assert!( kind != ModuleKind::Inline, @@ -2185,29 +2190,27 @@ If you encounter an error missing from this list, please file an issue or a PR!" assert!(looking_for_name); break; } - name => { - if looking_for_name { - if module_name.is_none() { - module_name = Some(name.to_owned()); - } - break; - } else { - // This is _likely_, but not certainly, a macro that's - // been placed just before the namespace keyword. - // Unfortunately, clang tokens don't let us easily see - // through the ifdef tokens, so we don't know what this - // token should really be. Instead of panicking though, - // we warn the user that we assumed the token was blank, - // and then move on. - // - // See also https://github.com/rust-lang/rust-bindgen/issues/1676. - warn!( - "Ignored unknown namespace prefix '{}' at {:?} in {:?}", - name, - token, - cursor - ); + name if looking_for_name => { + if module_name.is_none() { + module_name = + Some(name.into_owned()); } + break; + } + name => { + // This is _likely_, but not certainly, a macro that's + // been placed just before the namespace keyword. + // Unfortunately, clang tokens don't let us easily see + // through the ifdef tokens, so we don't know what this + // token should really be. Instead of panicking though, + // we warn the user that we assumed the token was blank, + // and then move on. + // + // See also https://github.com/rust-lang/rust-bindgen/issues/1676. + warn!( + "Ignored unknown namespace prefix '{}' at {:?} in {:?}", + name, token, cursor + ); } } } diff --git a/bindgen/ir/var.rs b/bindgen/ir/var.rs index 1221cb7b35..ab6922f5ad 100644 --- a/bindgen/ir/var.rs +++ b/bindgen/ir/var.rs @@ -169,10 +169,17 @@ fn handle_function_macro( }; let tokens: Vec<_> = cursor.tokens().iter().collect(); if let Some(boundary) = tokens.iter().position(is_closing_paren) { - let mut tokens = tokens + let tokens: Result, _> = tokens .iter() - .map(|token| token.spelling().to_str().unwrap()) - .collect::>(); + .map(|token| token.spelling().to_str()) + .collect(); + + let mut tokens = if let Ok(tokens) = tokens { + tokens + } else { + // Skip macros containing invalid UTF-8. + return; + }; let name = tokens.remove(0); let args: Vec<_> = tokens