Skip to content

Commit

Permalink
New implementation of rust cli args parsing. (#20698)
Browse files Browse the repository at this point in the history
The old implementation didn't support two features
that the Python implementation does: 

1) goal scoping (`pants test --foo` vs `pants test --test-foo`)
2) short flag values without `=` (`-ldebug` vs `-l=debug`)

It also did a lot of temporary String allocation.

It was also somewhat hard to grok, especially its handling
of negation. 

This new implementation supports the features above, 
and removes support for adding the negation prefix to
short flags (`--no-l`) which we don't support in 
python, and which doesn't really make sense.

It also minimizes String allocation: it only allocates
when parsing the args, or when finding a match,
but not when comparing option ids to args, which
it now does via char iterators.

One big difference between this new Rust implementation
and the Python one is that the Python parser knows
when it encounters a goal on the CLI by looking up
strings (that don't start with a dash) in a list of known
scopes. This was originally done to avoid confusion with
targets, but in practice targets always contain a `/`
(we require root-level targets to be prefixed with `//:`)
so this isn't necessary, and it simplifies things to 
detect goal names via regex match.
  • Loading branch information
benjyw authored Mar 31, 2024
1 parent 762b64f commit d127c83
Show file tree
Hide file tree
Showing 10 changed files with 268 additions and 123 deletions.
3 changes: 2 additions & 1 deletion src/python/pants/option/subsystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ class Subsystem(metaclass=_SubsystemMeta):
deprecated_options_scope: str | None = None
deprecated_options_scope_removal_version: str | None = None

# // Note: must be aligned with the regex in src/rust/engine/options/src/id.rs.
_scope_name_re = re.compile(r"^(?:[a-z0-9_])+(?:-(?:[a-z0-9_])+)*$")

_rules: ClassVar[Sequence[Rule] | None] = None
Expand Down Expand Up @@ -251,7 +252,7 @@ async def inner(*a, **k):

@classmethod
def is_valid_scope_name(cls, s: str) -> bool:
return s == "" or cls._scope_name_re.match(s) is not None
return s == "" or (cls._scope_name_re.match(s) is not None and s != "pants")

@classmethod
def validate_scope(cls) -> None:
Expand Down
1 change: 1 addition & 0 deletions src/python/pants/option/subsystem_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def check_false(s: str) -> None:
check_true("foo-bar0-1ba22z")
check_true("foo_bar")

check_false("pants")
check_false("Foo")
check_false("fOo")
check_false("foo.bar")
Expand Down
1 change: 1 addition & 0 deletions src/rust/engine/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions src/rust/engine/options/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ authors = ["Pants Build <[email protected]>"]
publish = false

[dependencies]
itertools = { workspace = true }
lazy_static = { workspace = true }
log = { workspace = true }
maplit = { workspace = true }
Expand Down
256 changes: 173 additions & 83 deletions src/rust/engine/options/src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,95 +3,162 @@

use std::env;

use super::id::{NameTransform, OptionId, Scope};
use super::id::{is_valid_scope_name, NameTransform, OptionId, Scope};
use super::{DictEdit, OptionsSource};
use crate::parse::{expand, expand_to_dict, expand_to_list, Parseable};
use crate::parse::{expand, expand_to_dict, expand_to_list, ParseError, Parseable};
use crate::ListEdit;
use std::collections::HashMap;

pub struct Args {
pub(crate) args: Vec<String>,
use core::iter::once;
use itertools::{chain, Itertools};

#[derive(Debug)]
struct Arg {
context: Scope,
flag: String,
value: Option<String>,
}

#[derive(PartialEq)]
enum Negate {
True,
False,
}
impl Arg {
/// Checks if this arg's flag is equal to the provided strings concatenated with dashes.
/// E.g., "--foo-bar" matches ["-", "foo", "bar"].
fn _flag_match<'a>(&self, dash_separated_strs: impl Iterator<Item = &'a str>) -> bool {
#[allow(unstable_name_collisions)]
// intersperse is provided by itertools::Itertools, but is also in the Rust nightly
// as an experimental feature of standard Iterator. If/when that becomes standard we
// can use it, but for now we must squelch the name collision.
itertools::equal(
self.flag.chars(),
dash_separated_strs
.map(str::chars)
.intersperse("-".chars())
.flatten(),
)
}

impl Args {
pub fn new(args: Vec<String>) -> Self {
Self { args }
fn _prefix<'a>(negate: bool) -> impl Iterator<Item = &'a str> {
if negate {
once("--no")
} else {
once("-")
}
}

pub fn argv() -> Self {
Self::new(env::args().collect::<Vec<_>>())
// Check if --scope-flag matches.
fn _matches_explicit_scope(&self, id: &OptionId, negate: bool) -> bool {
self._flag_match(chain![
Self::_prefix(negate),
once(id.scope.name()),
id.name_components_strs()
])
}

fn arg_name(id: &OptionId, negate: Negate) -> String {
format!(
"--{}{}{}",
match negate {
Negate::False => "",
Negate::True => "no-",
},
match &id.scope {
Scope::Global => "".to_string(),
Scope::Scope(scope) => format!("{}-", scope.to_ascii_lowercase()),
},
id.name("-", NameTransform::ToLower)
)
// Check if --flag matches in the context of the current goal's scope.
fn _matches_implicit_scope(&self, id: &OptionId, negate: bool) -> bool {
self.context == id.scope
&& self._flag_match(chain![Self::_prefix(negate), id.name_components_strs()])
}

fn arg_names(id: &OptionId, negate: Negate) -> HashMap<String, bool> {
let mut arg_names = HashMap::new();
if let Some(short_name) = &id.short_name {
arg_names.insert(format!("-{short_name}"), false);
if negate == Negate::True {
arg_names.insert(format!("--no-{short_name}"), true);
}
// Check if -s matches for a short name s, if any.
fn _matches_short(&self, id: &OptionId) -> bool {
if let Some(sn) = &id.short_name {
self._flag_match(chain![once(""), once(sn.as_ref())])
} else {
false
}
arg_names.insert(Self::arg_name(id, Negate::False), false);
if negate == Negate::True {
arg_names.insert(Self::arg_name(id, Negate::True), true);
}

/// Checks if this arg provides a value for the specified option, either negated or not.
fn _matches(&self, id: &OptionId, negate: bool) -> bool {
self._matches_explicit_scope(id, negate)
|| self._matches_implicit_scope(id, negate)
|| self._matches_short(id)
}

fn matches(&self, id: &OptionId) -> bool {
self._matches(id, false)
}

fn matches_negation(&self, id: &OptionId) -> bool {
self._matches(id, true)
}

fn to_bool(&self) -> Result<Option<bool>, ParseError> {
// An arg can represent a bool either by having an explicit value parseable as a bool,
// or by having no value (in which case it represents true).
match &self.value {
Some(value) => match expand(value.to_string())? {
Some(s) => bool::parse(&s).map(Some),
_ => Ok(None),
},
None => Ok(Some(true)),
}
arg_names
}
}

fn find_flag(
&self,
flag_names: HashMap<String, bool>,
) -> Result<Option<(String, String, bool)>, String> {
for arg in self.args.iter().rev() {
let mut components = arg.as_str().splitn(2, '=');
if let Some(name) = components.next() {
if let Some(negated) = flag_names.get(name) {
return Ok(Some((
name.to_owned(),
components.next().unwrap_or("").to_owned(),
*negated,
)));
#[derive(Debug)]
pub struct Args {
args: Vec<Arg>,
}

impl Args {
// Create an Args instance with the provided args, which must *not* include the
// argv[0] process name.
pub fn new<I: IntoIterator<Item = String>>(arg_strs: I) -> Self {
let mut args: Vec<Arg> = vec![];
let mut scope = Scope::Global;
for arg_str in arg_strs.into_iter() {
if arg_str.starts_with("--") {
let mut components = arg_str.splitn(2, '=');
let flag = components.next().unwrap();
if flag.is_empty() {
// We've hit the passthrough args delimiter (`--`), so don't look further.
break;
} else {
args.push(Arg {
context: scope.clone(),
flag: flag.to_string(),
value: components.next().map(str::to_string),
});
}
} else if arg_str.starts_with('-') && arg_str.len() >= 2 {
let (flag, mut value) = arg_str.split_at(2);
// We support -ldebug and -l=debug, so strip that extraneous equals sign.
if let Some(stripped) = value.strip_prefix('=') {
value = stripped;
}
args.push(Arg {
context: scope.clone(),
flag: flag.to_string(),
value: if value.is_empty() {
None
} else {
Some(value.to_string())
},
});
} else if is_valid_scope_name(&arg_str) {
scope = Scope::Scope(arg_str)
}
}
Ok(None)

Self { args }
}

pub fn argv() -> Self {
let mut args = env::args().collect::<Vec<_>>().into_iter();
args.next(); // Consume the process name (argv[0]).
Self::new(env::args().collect::<Vec<_>>())
}

fn get_list<T: Parseable>(&self, id: &OptionId) -> Result<Option<Vec<ListEdit<T>>>, String> {
let arg_names = Self::arg_names(id, Negate::False);
let mut edits = vec![];
for arg in &self.args {
let mut components = arg.as_str().splitn(2, '=');
if let Some(name) = components.next() {
if arg_names.contains_key(name) {
let value = components.next().ok_or_else(|| {
format!("Expected string list option {name} to have a value.")
})?;
if let Some(es) =
expand_to_list::<T>(value.to_string()).map_err(|e| e.render(name))?
{
edits.extend(es);
}
if arg.matches(id) {
let value = arg.value.as_ref().ok_or_else(|| {
format!("Expected list option {} to have a value.", self.display(id))
})?;
if let Some(es) =
expand_to_list::<T>(value.to_string()).map_err(|e| e.render(&arg.flag))?
{
edits.extend(es);
}
}
}
Expand All @@ -105,28 +172,44 @@ impl Args {

impl OptionsSource for Args {
fn display(&self, id: &OptionId) -> String {
Self::arg_name(id, Negate::False)
format!(
"--{}{}",
match &id.scope {
Scope::Global => "".to_string(),
Scope::Scope(scope) => format!("{}-", scope.to_ascii_lowercase()),
},
id.name("-", NameTransform::ToLower)
)
}

fn get_string(&self, id: &OptionId) -> Result<Option<String>, String> {
match self.find_flag(Self::arg_names(id, Negate::False))? {
Some((name, value, _)) => expand(value).map_err(|e| e.render(name)),
_ => Ok(None),
// We iterate in reverse so that the rightmost arg wins in case an option
// is specified multiple times.
for arg in self.args.iter().rev() {
if arg.matches(id) {
return expand(arg.value.clone().ok_or_else(|| {
format!("Expected list option {} to have a value.", self.display(id))
})?)
.map_err(|e| e.render(&arg.flag));
};
}
Ok(None)
}

fn get_bool(&self, id: &OptionId) -> Result<Option<bool>, String> {
let arg_names = Self::arg_names(id, Negate::True);
match self.find_flag(arg_names)? {
Some((_, s, negated)) if s.as_str() == "" => Ok(Some(!negated)),
Some((name, value, negated)) => match expand(value).map_err(|e| e.render(&name))? {
Some(value) => bool::parse(&value)
.map(|b| Some(b ^ negated))
.map_err(|e| e.render(&name)),
_ => Ok(None),
},
_ => Ok(None),
// We iterate in reverse so that the rightmost arg wins in case an option
// is specified multiple times.
for arg in self.args.iter().rev() {
if arg.matches(id) {
return arg.to_bool().map_err(|e| e.render(&arg.flag));
} else if arg.matches_negation(id) {
return arg
.to_bool()
.map(|ob| ob.map(|b| b ^ true))
.map_err(|e| e.render(&arg.flag));
}
}
Ok(None)
}

fn get_bool_list(&self, id: &OptionId) -> Result<Option<Vec<ListEdit<bool>>>, String> {
Expand All @@ -146,9 +229,16 @@ impl OptionsSource for Args {
}

fn get_dict(&self, id: &OptionId) -> Result<Option<DictEdit>, String> {
match self.find_flag(Self::arg_names(id, Negate::False))? {
Some((name, value, _)) => expand_to_dict(value).map_err(|e| e.render(name)),
None => Ok(None),
// We iterate in reverse so that the rightmost arg wins in case an option
// is specified multiple times.
for arg in self.args.iter().rev() {
if arg.matches(id) {
return expand_to_dict(arg.value.clone().ok_or_else(|| {
format!("Expected list option {} to have a value.", self.display(id))
})?)
.map_err(|e| e.render(&arg.flag));
}
}
Ok(None)
}
}
Loading

0 comments on commit d127c83

Please sign in to comment.