From d2e7c0554b1130eb7e7d30b1e89ede526eb39711 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Mon, 29 Apr 2024 14:03:17 -0400 Subject: [PATCH] uv-resolver: add initial version of universal lock file format (#3314) This is meant to be a base on which to build. There are some parts which are implicitly incomplete and others which are explicitly incomplete. The latter are indicated by TODO comments. Here is a non-exhaustive list of incomplete things. In many cases, these are incomplete simply because the data isn't present in a `ResolutionGraph`. Future work will need to refactor our resolver so that this data is correctly passed down. * Not all wheels are included. Only the "selected" wheel for the current distribution is included. * Marker expressions are always absent. * We don't emit hashes for certainly kinds of distributions (direct URLs, git, and path). * We don't capture git information from a dependency specification. Right now, we just always emit "default branch." There are perhaps also other changes we might want to make to the format of a more cosmetic nature. Right now, all arrays are encoded using whatever the `toml` crate decides to do. But we might want to exert more control over this. For example, by using inline tables or squashing more things into strings (like I did for `Source` and `Hash`). I think the main trade-off here is that table arrays are somewhat difficult to read (especially without indentation), where as squashing things down into a more condensed format potentially makes future compatible additions harder. I also went pretty light on the documentation here than what I would normally do. That's primarily because I think this code is going to go through some evolution and I didn't want to spend too much time documenting something that is likely to change. Finally, here's an example of the lock file format in TOML for the `anyio` dependency. I generated it with the following command: ``` cargo run -p uv -- pip compile -p3.10 ~/astral/tmp/reqs/anyio.in --unstable-uv-lock-file ``` And that writes out a `uv.lock` file: ```toml version = 1 [[distribution]] name = "anyio" version = "4.3.0" source = "registry+https://pypi.org/simple" [[distribution.wheel]] url = "https://files.pythonhosted.org/packages/14/fd/2f20c40b45e4fb4324834aea24bd4afdf1143390242c0b33774da0e2e34f/anyio-4.3.0-py3-none-any.whl" hash = "sha256:048e05d0f6caeed70d731f3db756d35dcc1f35747c8c403364a8332c630441b8" [[distribution.dependencies]] name = "exceptiongroup" version = "1.2.1" source = "registry+https://pypi.org/simple" [[distribution.dependencies]] name = "idna" version = "3.7" source = "registry+https://pypi.org/simple" [[distribution.dependencies]] name = "sniffio" version = "1.3.1" source = "registry+https://pypi.org/simple" [[distribution.dependencies]] name = "typing-extensions" version = "4.11.0" source = "registry+https://pypi.org/simple" [[distribution]] name = "exceptiongroup" version = "1.2.1" source = "registry+https://pypi.org/simple" [[distribution.wheel]] url = "https://files.pythonhosted.org/packages/01/90/79fe92dd413a9cab314ef5c591b5aa9b9ba787ae4cadab75055b0ae00b33/exceptiongroup-1.2.1-py3-none-any.whl" hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad" [[distribution]] name = "idna" version = "3.7" source = "registry+https://pypi.org/simple" [[distribution.wheel]] url = "https://files.pythonhosted.org/packages/e5/3e/741d8c82801c347547f8a2a06aa57dbb1992be9e948df2ea0eda2c8b79e8/idna-3.7-py3-none-any.whl" hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0" [[distribution]] name = "sniffio" version = "1.3.1" source = "registry+https://pypi.org/simple" [[distribution.wheel]] url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl" hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2" [[distribution]] name = "typing-extensions" version = "4.11.0" source = "registry+https://pypi.org/simple" [[distribution.wheel]] url = "https://files.pythonhosted.org/packages/01/f3/936e209267d6ef7510322191003885de524fc48d1b43269810cd589ceaf5/typing_extensions-4.11.0-py3-none-any.whl" hash = "sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a" ``` --- Cargo.lock | 1 + crates/distribution-types/src/file.rs | 98 ++++ crates/uv-resolver/src/lib.rs | 2 + crates/uv-resolver/src/lock.rs | 753 ++++++++++++++++++++++++++ crates/uv-resolver/src/resolution.rs | 16 + crates/uv/Cargo.toml | 1 + crates/uv/src/cli.rs | 6 + crates/uv/src/commands/pip_compile.rs | 8 + crates/uv/src/main.rs | 1 + crates/uv/src/settings.rs | 4 + 10 files changed, 890 insertions(+) create mode 100644 crates/uv-resolver/src/lock.rs diff --git a/Cargo.lock b/Cargo.lock index 519885d01150..9f13b43670f1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4457,6 +4457,7 @@ dependencies = [ "thiserror", "tikv-jemallocator", "tokio", + "toml", "tracing", "tracing-durations-export", "tracing-subscriber", diff --git a/crates/distribution-types/src/file.rs b/crates/distribution-types/src/file.rs index 05777daeecfb..4021ad2f98e6 100644 --- a/crates/distribution-types/src/file.rs +++ b/crates/distribution-types/src/file.rs @@ -82,6 +82,52 @@ pub enum FileLocation { Path(#[with(rkyv::with::AsString)] PathBuf), } +impl FileLocation { + /// Convert this location to a URL. + /// + /// A relative URL has its base joined to the path. An absolute URL is + /// parsed as-is. And a path location is turned into a URL via the `file` + /// protocol. + /// + /// # Errors + /// + /// This returns an error if any of the URL parsing fails, or if, for + /// example, the location is a path and the path isn't valid UTF-8. + /// (Because URLs must be valid UTF-8.) + pub fn to_url(&self) -> Result { + match *self { + FileLocation::RelativeUrl(ref base, ref path) => { + let base_url = Url::parse(base).map_err(|err| ToUrlError::InvalidBase { + base: base.clone(), + err, + })?; + let joined = base_url.join(path).map_err(|err| ToUrlError::InvalidJoin { + base: base.clone(), + path: path.clone(), + err, + })?; + Ok(joined) + } + FileLocation::AbsoluteUrl(ref absolute) => { + let url = Url::parse(absolute).map_err(|err| ToUrlError::InvalidAbsolute { + absolute: absolute.clone(), + err, + })?; + Ok(url) + } + FileLocation::Path(ref path) => { + let path = path + .to_str() + .ok_or_else(|| ToUrlError::PathNotUtf8 { path: path.clone() })?; + let url = Url::from_file_path(path).map_err(|()| ToUrlError::InvalidPath { + path: path.to_string(), + })?; + Ok(url) + } + } + } +} + impl Display for FileLocation { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { @@ -91,3 +137,55 @@ impl Display for FileLocation { } } } + +/// An error that occurs when a `FileLocation` is not a valid URL. +#[derive(Clone, Debug, Eq, PartialEq, thiserror::Error)] +pub enum ToUrlError { + /// An error that occurs when the base URL in `FileLocation::Relative` + /// could not be parsed as a valid URL. + #[error("could not parse base URL `{base}` as a valid URL")] + InvalidBase { + /// The base URL that could not be parsed as a valid URL. + base: String, + /// The underlying URL parse error. + #[source] + err: url::ParseError, + }, + /// An error that occurs when the base URL could not be joined with + /// the relative path in a `FileLocation::Relative`. + #[error("could not join base URL `{base}` to relative path `{path}`")] + InvalidJoin { + /// The base URL that could not be parsed as a valid URL. + base: String, + /// The relative path segment. + path: String, + /// The underlying URL parse error. + #[source] + err: url::ParseError, + }, + /// An error that occurs when the absolute URL in `FileLocation::Absolute` + /// could not be parsed as a valid URL. + #[error("could not parse absolute URL `{absolute}` as a valid URL")] + InvalidAbsolute { + /// The absolute URL that could not be parsed as a valid URL. + absolute: String, + /// The underlying URL parse error. + #[source] + err: url::ParseError, + }, + /// An error that occurs when the file path in `FileLocation::Path` is + /// not valid UTF-8. We need paths to be valid UTF-8 to be transformed + /// into URLs, which must also be UTF-8. + #[error("could not build URL from file path `{path}` because it is not valid UTF-8")] + PathNotUtf8 { + /// The original path that was not valid UTF-8. + path: PathBuf, + }, + /// An error that occurs when the file URL created from a file path is not + /// a valid URL. + #[error("could not parse file path `{path}` as a valid URL")] + InvalidPath { + /// The file path URL that could not be parsed as a valid URL. + path: String, + }, +} diff --git a/crates/uv-resolver/src/lib.rs b/crates/uv-resolver/src/lib.rs index c695c9e4dde4..2e6127f8ab5b 100644 --- a/crates/uv-resolver/src/lib.rs +++ b/crates/uv-resolver/src/lib.rs @@ -3,6 +3,7 @@ pub use error::ResolveError; pub use exclude_newer::ExcludeNewer; pub use exclusions::Exclusions; pub use flat_index::FlatIndex; +pub use lock::{Lock, LockError}; pub use manifest::Manifest; pub use options::{Options, OptionsBuilder}; pub use preferences::{Preference, PreferenceError}; @@ -28,6 +29,7 @@ mod error; mod exclude_newer; mod exclusions; mod flat_index; +mod lock; mod manifest; mod options; mod pins; diff --git a/crates/uv-resolver/src/lock.rs b/crates/uv-resolver/src/lock.rs new file mode 100644 index 000000000000..80695a508b32 --- /dev/null +++ b/crates/uv-resolver/src/lock.rs @@ -0,0 +1,753 @@ +// Temporarily allowed because this module is still in a state of flux +// as we build out universal locking. +#![allow(dead_code, unreachable_code)] + +use distribution_types::{ + BuiltDist, DirectUrlBuiltDist, DirectUrlSourceDist, Dist, DistributionMetadata, GitSourceDist, + IndexUrl, Name, PathBuiltDist, PathSourceDist, RegistryBuiltDist, RegistrySourceDist, + ResolvedDist, ToUrlError, VersionOrUrl, +}; +use pep440_rs::Version; +use pypi_types::HashDigest; +use url::Url; + +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] +pub struct Lock { + version: u32, + #[serde(rename = "distribution")] + distributions: Vec, +} + +impl Lock { + pub(crate) fn new(mut distributions: Vec) -> Result { + for dist in &mut distributions { + dist.dependencies.sort(); + for windows in dist.dependencies.windows(2) { + let (dep1, dep2) = (&windows[0], &windows[1]); + if dep1.id == dep2.id { + return Err(LockError::duplicate_dependency( + dist.id.clone(), + dep1.id.clone(), + )); + } + } + } + distributions.sort_by(|dist1, dist2| dist1.id.cmp(&dist2.id)); + for window in distributions.windows(2) { + let (dist1, dist2) = (&window[0], &window[1]); + if dist1.id == dist2.id { + return Err(LockError::duplicate_distribution(dist1.id.clone())); + } + } + Ok(Lock { + version: 1, + distributions, + }) + } +} + +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] +pub(crate) struct Distribution { + #[serde(flatten)] + pub(crate) id: DistributionId, + pub(crate) marker: Option, + pub(crate) sourcedist: Option, + #[serde(rename = "wheel", skip_serializing_if = "Vec::is_empty")] + pub(crate) wheels: Vec, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub(crate) dependencies: Vec, +} + +impl Distribution { + pub(crate) fn from_resolved_dist( + resolved_dist: &ResolvedDist, + ) -> Result { + let id = DistributionId::from_resolved_dist(resolved_dist); + let mut sourcedist = None; + let mut wheels = vec![]; + if let Some(wheel) = Wheel::from_resolved_dist(resolved_dist)? { + wheels.push(wheel); + } else if let Some(sdist) = SourceDist::from_resolved_dist(resolved_dist)? { + sourcedist = Some(sdist); + } + Ok(Distribution { + id, + // TODO: Refactoring is needed to get the marker expressions for a + // particular resolved dist to this point. + marker: None, + sourcedist, + wheels, + dependencies: vec![], + }) + } + + pub(crate) fn add_dependency(&mut self, resolved_dist: &ResolvedDist) { + self.dependencies + .push(Dependency::from_resolved_dist(resolved_dist)); + } +} + +#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, serde::Deserialize, serde::Serialize)] +pub(crate) struct DistributionId { + pub(crate) name: String, + pub(crate) version: Version, + pub(crate) source: Source, +} + +impl DistributionId { + fn from_resolved_dist(resolved_dist: &ResolvedDist) -> DistributionId { + let name = resolved_dist.name().to_string(); + let version = match resolved_dist.version_or_url() { + VersionOrUrl::Version(v) => v.clone(), + // TODO: We need a way to thread the version number for these + // cases down into this routine. The version number isn't yet in a + // `ResolutionGraph`, so this will require a bit of refactoring. + VersionOrUrl::Url(_) => todo!(), + }; + let source = Source::from_resolved_dist(resolved_dist); + DistributionId { + name, + version, + source, + } + } +} + +impl std::fmt::Display for DistributionId { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{} {} {}", self.name, self.version, self.source) + } +} + +#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord)] +pub(crate) struct Source { + kind: SourceKind, + url: Url, +} + +impl Source { + fn from_resolved_dist(resolved_dist: &ResolvedDist) -> Source { + match *resolved_dist { + // TODO: Do we want to try to lock already-installed distributions? + // Or should we return an error? + ResolvedDist::Installed(_) => todo!(), + ResolvedDist::Installable(ref dist) => Source::from_dist(dist), + } + } + + fn from_dist(dist: &Dist) -> Source { + match *dist { + Dist::Built(ref built_dist) => Source::from_built_dist(built_dist), + Dist::Source(ref source_dist) => Source::from_source_dist(source_dist), + } + } + + fn from_built_dist(built_dist: &BuiltDist) -> Source { + match *built_dist { + BuiltDist::Registry(ref reg_dist) => Source::from_registry_built_dist(reg_dist), + BuiltDist::DirectUrl(ref direct_dist) => Source::from_direct_built_dist(direct_dist), + BuiltDist::Path(ref path_dist) => Source::from_path_built_dist(path_dist), + } + } + + fn from_source_dist(source_dist: &distribution_types::SourceDist) -> Source { + match *source_dist { + distribution_types::SourceDist::Registry(ref reg_dist) => { + Source::from_registry_source_dist(reg_dist) + } + distribution_types::SourceDist::DirectUrl(ref direct_dist) => { + Source::from_direct_source_dist(direct_dist) + } + distribution_types::SourceDist::Git(ref git_dist) => Source::from_git_dist(git_dist), + distribution_types::SourceDist::Path(ref path_dist) => { + Source::from_path_source_dist(path_dist) + } + } + } + + fn from_registry_built_dist(reg_dist: &RegistryBuiltDist) -> Source { + Source::from_index_url(®_dist.index) + } + + fn from_registry_source_dist(reg_dist: &RegistrySourceDist) -> Source { + Source::from_index_url(®_dist.index) + } + + fn from_direct_built_dist(direct_dist: &DirectUrlBuiltDist) -> Source { + Source { + kind: SourceKind::Direct, + url: direct_dist.url.to_url(), + } + } + + fn from_direct_source_dist(direct_dist: &DirectUrlSourceDist) -> Source { + Source { + kind: SourceKind::Direct, + url: direct_dist.url.to_url(), + } + } + + fn from_path_built_dist(path_dist: &PathBuiltDist) -> Source { + Source { + kind: SourceKind::Path, + url: path_dist.url.to_url(), + } + } + + fn from_path_source_dist(path_dist: &PathSourceDist) -> Source { + Source { + kind: SourceKind::Path, + url: path_dist.url.to_url(), + } + } + + fn from_index_url(index_url: &IndexUrl) -> Source { + match *index_url { + IndexUrl::Pypi(ref verbatim_url) => Source { + kind: SourceKind::Registry, + url: verbatim_url.to_url(), + }, + IndexUrl::Url(ref verbatim_url) => Source { + kind: SourceKind::Registry, + url: verbatim_url.to_url(), + }, + IndexUrl::Path(ref verbatim_url) => Source { + kind: SourceKind::Path, + url: verbatim_url.to_url(), + }, + } + } + + fn from_git_dist(git_dist: &GitSourceDist) -> Source { + // FIXME: Fill in the git revision details here. They aren't in + // `GitSourceDist`, so this will likely need some refactoring. + Source { + kind: SourceKind::Git(GitSource { + precise: None, + kind: GitSourceKind::DefaultBranch, + }), + url: git_dist.url.to_url(), + } + } +} + +impl std::str::FromStr for Source { + type Err = SourceParseError; + + fn from_str(s: &str) -> Result { + let (kind, url) = s + .split_once('+') + .ok_or_else(|| SourceParseError::no_plus(s))?; + let mut url = Url::parse(url).map_err(|err| SourceParseError::invalid_url(s, err))?; + match kind { + "registry" => Ok(Source { + kind: SourceKind::Registry, + url, + }), + "git" => Ok(Source { + kind: SourceKind::Git(GitSource::from_url(&mut url)), + url, + }), + "direct" => Ok(Source { + kind: SourceKind::Direct, + url, + }), + "path" => Ok(Source { + kind: SourceKind::Path, + url, + }), + name => Err(SourceParseError::unrecognized_source_name(s, name)), + } + } +} + +impl std::fmt::Display for Source { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}+{}", self.kind.name(), self.url) + } +} + +impl serde::Serialize for Source { + fn serialize(&self, s: S) -> Result + where + S: serde::ser::Serializer, + { + s.collect_str(self) + } +} + +impl<'de> serde::Deserialize<'de> for Source { + fn deserialize(d: D) -> Result + where + D: serde::de::Deserializer<'de>, + { + let string = String::deserialize(d)?; + string.parse().map_err(serde::de::Error::custom) + } +} + +/// NOTE: Care should be taken when adding variants to this enum. Namely, new +/// variants should be added without changing the relative ordering of other +/// variants. Otherwise, this could cause the lock file to have a different +/// canonical ordering of distributions. +#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, serde::Deserialize, serde::Serialize)] +#[serde(rename_all = "kebab-case")] +pub(crate) enum SourceKind { + Registry, + Git(GitSource), + Direct, + Path, +} + +impl SourceKind { + fn name(&self) -> &str { + match *self { + SourceKind::Registry => "registry", + SourceKind::Git(_) => "git", + SourceKind::Direct => "direct", + SourceKind::Path => "path", + } + } +} + +/// NOTE: Care should be taken when adding variants to this enum. Namely, new +/// variants should be added without changing the relative ordering of other +/// variants. Otherwise, this could cause the lock file to have a different +/// canonical ordering of distributions. +#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, serde::Deserialize, serde::Serialize)] +pub(crate) struct GitSource { + precise: Option, + kind: GitSourceKind, +} + +impl GitSource { + /// Extracts a git source reference from the query pairs and the hash + /// fragment in the given URL. + /// + /// This also removes the query pairs and hash fragment from the given + /// URL in place. + fn from_url(url: &mut Url) -> GitSource { + let mut kind = GitSourceKind::DefaultBranch; + for (key, val) in url.query_pairs() { + kind = match &*key { + "tag" => GitSourceKind::Tag(val.into_owned()), + "branch" => GitSourceKind::Branch(val.into_owned()), + "rev" => GitSourceKind::Rev(val.into_owned()), + _ => continue, + }; + } + let precise = url.fragment().map(ToString::to_string); + url.set_query(None); + url.set_fragment(None); + GitSource { precise, kind } + } +} + +#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, serde::Deserialize, serde::Serialize)] +enum GitSourceKind { + Tag(String), + Branch(String), + Rev(String), + DefaultBranch, +} + +/// Inspired by: +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] +pub(crate) struct SourceDist { + /// A URL or file path (via `file://`) where the source dist that was + /// locked against was found. The location does not need to exist in the + /// future, so this should be treated as only a hint to where to look + /// and/or recording where the source dist file originally came from. + url: Url, + /// A hash of the source distribution. + hash: Hash, +} + +impl SourceDist { + fn from_resolved_dist(resolved_dist: &ResolvedDist) -> Result, LockError> { + match *resolved_dist { + // TODO: Do we want to try to lock already-installed distributions? + // Or should we return an error? + ResolvedDist::Installed(_) => todo!(), + ResolvedDist::Installable(ref dist) => SourceDist::from_dist(dist), + } + } + + fn from_dist(dist: &Dist) -> Result, LockError> { + match *dist { + Dist::Built(_) => Ok(None), + Dist::Source(ref source_dist) => SourceDist::from_source_dist(source_dist).map(Some), + } + } + + fn from_source_dist( + source_dist: &distribution_types::SourceDist, + ) -> Result { + match *source_dist { + distribution_types::SourceDist::Registry(ref reg_dist) => { + SourceDist::from_registry_dist(reg_dist) + } + distribution_types::SourceDist::DirectUrl(ref direct_dist) => { + Ok(SourceDist::from_direct_dist(direct_dist)) + } + distribution_types::SourceDist::Git(ref git_dist) => { + Ok(SourceDist::from_git_dist(git_dist)) + } + distribution_types::SourceDist::Path(ref path_dist) => { + Ok(SourceDist::from_path_dist(path_dist)) + } + } + } + + fn from_registry_dist(reg_dist: &RegistrySourceDist) -> Result { + // FIXME: Is it guaranteed that there is at least one hash? + // If not, we probably need to make this fallible. + let url = reg_dist + .file + .url + .to_url() + .map_err(LockError::invalid_file_url)?; + let hash = Hash::from(reg_dist.file.hashes[0].clone()); + Ok(SourceDist { url, hash }) + } + + fn from_direct_dist(direct_dist: &DirectUrlSourceDist) -> SourceDist { + SourceDist { + url: direct_dist.url.to_url(), + // TODO: We want a hash for the artifact at the URL. + hash: todo!(), + } + } + + fn from_git_dist(git_dist: &GitSourceDist) -> SourceDist { + SourceDist { + url: git_dist.url.to_url(), + // TODO: We want a hash for the artifact at the URL. + hash: todo!(), + } + } + + fn from_path_dist(path_dist: &PathSourceDist) -> SourceDist { + SourceDist { + url: path_dist.url.to_url(), + // TODO: We want a hash for the artifact at the URL. + hash: todo!(), + } + } +} + +/// Inspired by: +#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)] +pub(crate) struct Wheel { + /// A URL or file path (via `file://`) where the wheel that was locked + /// against was found. The location does not need to exist in the future, + /// so this should be treated as only a hint to where to look and/or + /// recording where the wheel file originally came from. + url: Url, + /// A hash of the source distribution. + hash: Hash, + // THOUGHT: Would it be better to include a more structured representation + // of the wheel's filename in the lock file itself? e.g., All of the wheel + // tags. This would avoid needing to parse the wheel tags out of the URL, + // which is a potentially fallible operation. But, I think it is nice to + // have just the URL which is more succinct and doesn't result in encoding + // the same information twice. Probably the best thing to do here is to add + // the wheel tags fields here, but don't serialize them. +} + +impl Wheel { + fn from_resolved_dist(resolved_dist: &ResolvedDist) -> Result, LockError> { + match *resolved_dist { + // TODO: Do we want to try to lock already-installed distributions? + // Or should we return an error? + ResolvedDist::Installed(_) => todo!(), + ResolvedDist::Installable(ref dist) => Wheel::from_dist(dist), + } + } + + fn from_dist(dist: &Dist) -> Result, LockError> { + match *dist { + Dist::Built(ref built_dist) => Wheel::from_built_dist(built_dist).map(Some), + Dist::Source(_) => Ok(None), + } + } + + fn from_built_dist(built_dist: &BuiltDist) -> Result { + match *built_dist { + BuiltDist::Registry(ref reg_dist) => Wheel::from_registry_dist(reg_dist), + BuiltDist::DirectUrl(ref direct_dist) => Ok(Wheel::from_direct_dist(direct_dist)), + BuiltDist::Path(ref path_dist) => Ok(Wheel::from_path_dist(path_dist)), + } + } + + fn from_registry_dist(reg_dist: &RegistryBuiltDist) -> Result { + // FIXME: Is it guaranteed that there is at least one hash? + // If not, we probably need to make this fallible. + let url = reg_dist + .file + .url + .to_url() + .map_err(LockError::invalid_file_url)?; + let hash = Hash::from(reg_dist.file.hashes[0].clone()); + Ok(Wheel { url, hash }) + } + + fn from_direct_dist(direct_dist: &DirectUrlBuiltDist) -> Wheel { + Wheel { + url: direct_dist.url.to_url(), + // TODO: We want a hash for the artifact at the URL. + hash: todo!(), + } + } + + fn from_path_dist(path_dist: &PathBuiltDist) -> Wheel { + Wheel { + url: path_dist.url.to_url(), + // TODO: We want a hash for the artifact at the URL. + hash: todo!(), + } + } +} + +/// A single dependency of a distribution in a lock file. +#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, serde::Deserialize, serde::Serialize)] +pub(crate) struct Dependency { + #[serde(flatten)] + id: DistributionId, +} + +impl Dependency { + fn from_resolved_dist(resolved_dist: &ResolvedDist) -> Dependency { + let id = DistributionId::from_resolved_dist(resolved_dist); + Dependency { id } + } +} + +/// A single hash for a distribution artifact in a lock file. +/// +/// A hash is encoded as a single TOML string in the format +/// `{algorithm}:{digest}`. +#[derive(Clone, Debug)] +pub(crate) struct Hash(HashDigest); + +impl From for Hash { + fn from(hd: HashDigest) -> Hash { + Hash(hd) + } +} + +impl std::str::FromStr for Hash { + type Err = HashParseError; + + fn from_str(s: &str) -> Result { + let (algorithm, digest) = s.split_once(':').ok_or(HashParseError( + "expected '{algorithm}:{digest}', but found no ':' in hash digest", + ))?; + let algorithm = algorithm + .parse() + .map_err(|_| HashParseError("unrecognized hash algorithm"))?; + Ok(Hash(HashDigest { + algorithm, + digest: digest.into(), + })) + } +} + +impl std::fmt::Display for Hash { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}:{}", self.0.algorithm, self.0.digest) + } +} + +impl serde::Serialize for Hash { + fn serialize(&self, s: S) -> Result + where + S: serde::ser::Serializer, + { + s.collect_str(self) + } +} + +impl<'de> serde::Deserialize<'de> for Hash { + fn deserialize(d: D) -> Result + where + D: serde::de::Deserializer<'de>, + { + let string = String::deserialize(d)?; + string.parse().map_err(serde::de::Error::custom) + } +} + +/// An error that occurs when generating a `Lock` data structure. +/// +/// These errors are sometimes the result of possible programming bugs. +/// For example, if there are two or more duplicative distributions given +/// to `Lock::new`, then an error is returned. It's likely that the fault +/// is with the caller somewhere in such cases. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct LockError { + kind: Box, +} + +impl LockError { + fn duplicate_distribution(id: DistributionId) -> LockError { + let kind = LockErrorKind::DuplicateDistribution { id }; + LockError { + kind: Box::new(kind), + } + } + + fn duplicate_dependency(id: DistributionId, dependency_id: DistributionId) -> LockError { + let kind = LockErrorKind::DuplicateDependency { id, dependency_id }; + LockError { + kind: Box::new(kind), + } + } + + fn invalid_file_url(err: ToUrlError) -> LockError { + let kind = LockErrorKind::InvalidFileUrl { err }; + LockError { + kind: Box::new(kind), + } + } +} + +impl std::error::Error for LockError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match *self.kind { + LockErrorKind::DuplicateDistribution { .. } => None, + LockErrorKind::DuplicateDependency { .. } => None, + LockErrorKind::InvalidFileUrl { ref err } => Some(err), + } + } +} + +impl std::fmt::Display for LockError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match *self.kind { + LockErrorKind::DuplicateDistribution { ref id } => { + write!(f, "found duplicate distribution {id}") + } + LockErrorKind::DuplicateDependency { + ref id, + ref dependency_id, + } => { + write!( + f, + "for distribution {id}, found duplicate dependency {dependency_id}" + ) + } + LockErrorKind::InvalidFileUrl { .. } => { + write!(f, "failed to parse wheel or source dist URL") + } + } + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +enum LockErrorKind { + /// An error that occurs when multiple distributions with the same + /// ID were found. + DuplicateDistribution { + /// The ID of the conflicting distributions. + id: DistributionId, + }, + /// An error that occurs when there are multiple dependencies for the + /// same distribution that have identical identifiers. + DuplicateDependency { + /// The ID of the distribution for which a duplicate dependency was + /// found. + id: DistributionId, + /// The ID of the conflicting dependency. + dependency_id: DistributionId, + }, + /// An error that occurs when the URL to a file for a wheel or + /// source dist could not be converted to a structured `url::Url`. + InvalidFileUrl { + /// The underlying error that occurred. This includes the + /// errant URL in its error message. + err: ToUrlError, + }, +} + +/// An error that occurs when a source string could not be parsed. +#[derive(Clone, Debug, Eq, PartialEq)] +pub(crate) struct SourceParseError { + given: String, + kind: SourceParseErrorKind, +} + +impl SourceParseError { + fn no_plus(given: &str) -> SourceParseError { + let given = given.to_string(); + let kind = SourceParseErrorKind::NoPlus; + SourceParseError { given, kind } + } + + fn unrecognized_source_name(given: &str, name: &str) -> SourceParseError { + let given = given.to_string(); + let kind = SourceParseErrorKind::UnrecognizedSourceName { + name: name.to_string(), + }; + SourceParseError { given, kind } + } + + fn invalid_url(given: &str, err: url::ParseError) -> SourceParseError { + let given = given.to_string(); + let kind = SourceParseErrorKind::InvalidUrl { err }; + SourceParseError { given, kind } + } +} + +impl std::error::Error for SourceParseError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self.kind { + SourceParseErrorKind::NoPlus | SourceParseErrorKind::UnrecognizedSourceName { .. } => { + None + } + SourceParseErrorKind::InvalidUrl { ref err } => Some(err), + } + } +} + +impl std::fmt::Display for SourceParseError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let given = &self.given; + match self.kind { + SourceParseErrorKind::NoPlus => write!(f, "could not find '+' in source `{given}`"), + SourceParseErrorKind::UnrecognizedSourceName { ref name } => { + write!(f, "unrecognized name `{name}` in source `{given}`") + } + SourceParseErrorKind::InvalidUrl { .. } => write!(f, "invalid URL in source `{given}`"), + } + } +} + +/// The kind of error that can occur when parsing a source string. +#[derive(Clone, Debug, Eq, PartialEq)] +enum SourceParseErrorKind { + /// An error that occurs when no '+' could be found. + NoPlus, + /// An error that occurs when the source name was unrecognized. + UnrecognizedSourceName { + /// The unrecognized name. + name: String, + }, + /// An error that occurs when the URL in the source is invalid. + InvalidUrl { + /// The URL parse error. + err: url::ParseError, + }, +} + +/// An error that occurs when a hash digest could not be parsed. +#[derive(Clone, Debug, Eq, PartialEq)] +pub(crate) struct HashParseError(&'static str); + +impl std::error::Error for HashParseError {} + +impl std::fmt::Display for HashParseError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + self.0.fmt(f) + } +} diff --git a/crates/uv-resolver/src/resolution.rs b/crates/uv-resolver/src/resolution.rs index 614697b8d53d..bd1e59e30595 100644 --- a/crates/uv-resolver/src/resolution.rs +++ b/crates/uv-resolver/src/resolution.rs @@ -25,6 +25,7 @@ use uv_normalize::{ExtraName, PackageName}; use crate::dependency_provider::UvDependencyProvider; use crate::editables::Editables; +use crate::lock::{self, Lock, LockError}; use crate::pins::FilePins; use crate::preferences::Preferences; use crate::pubgrub::{PubGrubDistribution, PubGrubPackage}; @@ -500,6 +501,21 @@ impl ResolutionGraph { } MarkerTree::And(conjuncts) } + + pub fn lock(&self) -> Result { + let mut locked_dists = vec![]; + for node_index in self.petgraph.node_indices() { + let dist = &self.petgraph[node_index]; + let mut locked_dist = lock::Distribution::from_resolved_dist(dist)?; + for edge in self.petgraph.neighbors(node_index) { + let dependency_dist = &self.petgraph[edge]; + locked_dist.add_dependency(dependency_dist); + } + locked_dists.push(locked_dist); + } + let lock = Lock::new(locked_dists)?; + Ok(lock) + } } /// A [`std::fmt::Display`] implementation for the resolution graph. diff --git a/crates/uv/Cargo.toml b/crates/uv/Cargo.toml index be4a1e5e3759..13dc5c6b5803 100644 --- a/crates/uv/Cargo.toml +++ b/crates/uv/Cargo.toml @@ -58,6 +58,7 @@ tempfile = { workspace = true } textwrap = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true } +toml = { workspace = true } tracing = { workspace = true } tracing-durations-export = { workspace = true, features = ["plot"], optional = true } tracing-subscriber = { workspace = true, features = ["json"] } diff --git a/crates/uv/src/cli.rs b/crates/uv/src/cli.rs index 42d92003f050..1ad9bedb9274 100644 --- a/crates/uv/src/cli.rs +++ b/crates/uv/src/cli.rs @@ -601,6 +601,12 @@ pub(crate) struct PipCompileArgs { #[arg(long, overrides_with("emit_index_annotation"), hide = true)] pub(crate) no_emit_index_annotation: bool, + #[arg(long, overrides_with("no_unstable_uv_lock_file"), hide = true)] + pub(crate) unstable_uv_lock_file: bool, + + #[arg(long, overrides_with("unstable_uv_lock_file"), hide = true)] + pub(crate) no_unstable_uv_lock_file: bool, + #[command(flatten)] pub(crate) compat_args: compat::PipCompileCompatArgs, } diff --git a/crates/uv/src/commands/pip_compile.rs b/crates/uv/src/commands/pip_compile.rs index 434fb7164ef3..0828ab5b15dd 100644 --- a/crates/uv/src/commands/pip_compile.rs +++ b/crates/uv/src/commands/pip_compile.rs @@ -8,6 +8,7 @@ use std::str::FromStr; use anstream::{eprint, AutoStream, StripStream}; use anyhow::{anyhow, Context, Result}; +use fs_err as fs; use itertools::Itertools; use owo_colors::OwoColorize; use tempfile::tempdir_in; @@ -84,6 +85,7 @@ pub(crate) async fn pip_compile( link_mode: LinkMode, python: Option, system: bool, + uv_lock: bool, native_tls: bool, quiet: bool, cache: Cache, @@ -525,6 +527,12 @@ pub(crate) async fn pip_compile( writeln!(writer, "{}", format!("# {relevant_markers}").green())?; } + if uv_lock { + let lock = resolution.lock()?; + let encoded = toml::to_string_pretty(&lock)?; + fs::tokio::write("uv.lock", encoded.as_bytes()).await?; + } + // Write the index locations to the output channel. let mut wrote_index = false; diff --git a/crates/uv/src/main.rs b/crates/uv/src/main.rs index 87701452a431..836f2199c1d0 100644 --- a/crates/uv/src/main.rs +++ b/crates/uv/src/main.rs @@ -229,6 +229,7 @@ async fn run() -> Result { args.shared.link_mode, args.shared.python, args.shared.system, + args.uv_lock, globals.native_tls, globals.quiet, cache, diff --git a/crates/uv/src/settings.rs b/crates/uv/src/settings.rs index 745c6767d371..a55d88fe24f2 100644 --- a/crates/uv/src/settings.rs +++ b/crates/uv/src/settings.rs @@ -127,6 +127,7 @@ pub(crate) struct PipCompileSettings { pub(crate) r#override: Vec, pub(crate) refresh: Refresh, pub(crate) upgrade: Upgrade, + pub(crate) uv_lock: bool, // Shared settings. pub(crate) shared: PipSharedSettings, @@ -194,6 +195,8 @@ impl PipCompileSettings { no_emit_marker_expression, emit_index_annotation, no_emit_index_annotation, + unstable_uv_lock_file, + no_unstable_uv_lock_file, compat_args: _, } = args; @@ -207,6 +210,7 @@ impl PipCompileSettings { r#override, refresh: Refresh::from_args(refresh, refresh_package), upgrade: Upgrade::from_args(upgrade, upgrade_package), + uv_lock: flag(unstable_uv_lock_file, no_unstable_uv_lock_file).unwrap_or(false), // Shared settings. shared: PipSharedSettings::combine(