6 files changed, 3298 insertions, 0 deletions
diff --git a/src/cargo/sources/registry/download.rs b/src/cargo/sources/registry/download.rs
new file mode 100644
index 0000000..723c55f
--- /dev/null
+++ b/src/cargo/sources/registry/download.rs
@@ -0,0 +1,130 @@
+use anyhow::Context;
+use cargo_util::Sha256;
+
+use crate::core::PackageId;
+use crate::sources::registry::make_dep_prefix;
+use crate::sources::registry::MaybeLock;
+use crate::sources::registry::{
+    RegistryConfig, CHECKSUM_TEMPLATE, CRATE_TEMPLATE, LOWER_PREFIX_TEMPLATE, PREFIX_TEMPLATE,
+    VERSION_TEMPLATE,
+};
+use crate::util::auth;
+use crate::util::errors::CargoResult;
+use crate::util::{Config, Filesystem};
+use std::fmt::Write as FmtWrite;
+use std::fs::{self, File, OpenOptions};
+use std::io::prelude::*;
+use std::io::SeekFrom;
+use std::str;
+
+pub(super) fn filename(pkg: PackageId) -> String {
+    format!("{}-{}.crate", pkg.name(), pkg.version())
+}
+
+pub(super) fn download(
+    cache_path: &Filesystem,
+    config: &Config,
+    pkg: PackageId,
+    checksum: &str,
+    registry_config: RegistryConfig,
+) -> CargoResult<MaybeLock> {
+    let filename = filename(pkg);
+    let path = cache_path.join(&filename);
+    let path = config.assert_package_cache_locked(&path);
+
+    // Attempt to open a read-only copy first to avoid an exclusive write
+    // lock and also work with read-only filesystems. Note that we check the
+    // length of the file like below to handle interrupted downloads.
+    //
+    // If this fails then we fall through to the exclusive path where we may
+    // have to redownload the file.
+    if let Ok(dst) = File::open(path) {
+        let meta = dst.metadata()?;
+        if meta.len() > 0 {
+            return Ok(MaybeLock::Ready(dst));
+        }
+    }
+
+    let mut url = registry_config.dl;
+    if !url.contains(CRATE_TEMPLATE)
+        && !url.contains(VERSION_TEMPLATE)
+        && !url.contains(PREFIX_TEMPLATE)
+        && !url.contains(LOWER_PREFIX_TEMPLATE)
+        && !url.contains(CHECKSUM_TEMPLATE)
+    {
+        // Original format before customizing the download URL was supported.
+        write!(
+            url,
+            "/{}/{}/download",
+            pkg.name(),
+            pkg.version().to_string()
+        )
+        .unwrap();
+    } else {
+        let prefix = make_dep_prefix(&*pkg.name());
+        url = url
+            .replace(CRATE_TEMPLATE, &*pkg.name())
+            .replace(VERSION_TEMPLATE, &pkg.version().to_string())
+            .replace(PREFIX_TEMPLATE, &prefix)
+            .replace(LOWER_PREFIX_TEMPLATE, &prefix.to_lowercase())
+            .replace(CHECKSUM_TEMPLATE, checksum);
+    }
+
+    let authorization = if registry_config.auth_required {
+        Some(auth::auth_token(config, &pkg.source_id(), None, None)?)
+    } else {
+        None
+    };
+
+    Ok(MaybeLock::Download {
+        url,
+        descriptor: pkg.to_string(),
+        authorization: authorization,
+    })
+}
+
+pub(super) fn finish_download(
+    cache_path: &Filesystem,
+    config: &Config,
+    pkg: PackageId,
+    checksum: &str,
+    data: &[u8],
+) -> CargoResult<File> {
+    // Verify what we just downloaded
+    let actual = Sha256::new().update(data).finish_hex();
+    if actual != checksum {
+        anyhow::bail!("failed to verify the checksum of `{}`", pkg)
+    }
+
+    let filename = filename(pkg);
+    cache_path.create_dir()?;
+    let path = cache_path.join(&filename);
+    let path = config.assert_package_cache_locked(&path);
+    let mut dst = OpenOptions::new()
+        .create(true)
+        .read(true)
+        .write(true)
+        .open(&path)
+        .with_context(|| format!("failed to open `{}`", path.display()))?;
+    let meta = dst.metadata()?;
+    if meta.len() > 0 {
+        return Ok(dst);
+    }
+
+    dst.write_all(data)?;
+    dst.seek(SeekFrom::Start(0))?;
+    Ok(dst)
+}
+
+pub(super) fn is_crate_downloaded(
+    cache_path: &Filesystem,
+    config: &Config,
+    pkg: PackageId,
+) -> bool {
+    let path = cache_path.join(filename(pkg));
+    let path = config.assert_package_cache_locked(&path);
+    if let Ok(meta) = fs::metadata(path) {
+        return meta.len() > 0;
+    }
+    false
+}
diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs
new file mode 100644
index 0000000..c3bcadf
--- /dev/null
+++ b/src/cargo/sources/registry/http_remote.rs
@@ -0,0 +1,802 @@
+//! Access to a HTTP-based crate registry.
+//!
+//! See [`HttpRegistry`] for details.
+
+use crate::core::{PackageId, SourceId};
+use crate::ops::{self};
+use crate::sources::registry::download;
+use crate::sources::registry::MaybeLock;
+use crate::sources::registry::{LoadResponse, RegistryConfig, RegistryData};
+use crate::util::errors::{CargoResult, HttpNotSuccessful};
+use crate::util::network::Retry;
+use crate::util::{auth, Config, Filesystem, IntoUrl, Progress, ProgressStyle};
+use anyhow::Context;
+use cargo_util::paths;
+use curl::easy::{HttpVersion, List};
+use curl::multi::{EasyHandle, Multi};
+use log::{debug, trace, warn};
+use std::cell::RefCell;
+use std::collections::{HashMap, HashSet};
+use std::fs::{self, File};
+use std::io::ErrorKind;
+use std::path::{Path, PathBuf};
+use std::str;
+use std::task::{ready, Poll};
+use std::time::Duration;
+use url::Url;
+
+// HTTP headers
+const ETAG: &'static str = "etag";
+const LAST_MODIFIED: &'static str = "last-modified";
+const WWW_AUTHENTICATE: &'static str = "www-authenticate";
+const IF_NONE_MATCH: &'static str = "if-none-match";
+const IF_MODIFIED_SINCE: &'static str = "if-modified-since";
+
+const UNKNOWN: &'static str = "Unknown";
+
+/// A registry served by the HTTP-based registry API.
+///
+/// This type is primarily accessed through the [`RegistryData`] trait.
+///
+/// `HttpRegistry` implements the HTTP-based registry API outlined in [RFC 2789]. Read the RFC for
+/// the complete protocol, but _roughly_ the implementation loads each index file (e.g.,
+/// config.json or re/ge/regex) from an HTTP service rather than from a locally cloned git
+/// repository. The remote service can more or less be a static file server that simply serves the
+/// contents of the origin git repository.
+///
+/// Implemented naively, this leads to a significant amount of network traffic, as a lookup of any
+/// index file would need to check with the remote backend if the index file has changed. This
+/// cost is somewhat mitigated by the use of HTTP conditional fetches (`If-Modified-Since` and
+/// `If-None-Match` for `ETag`s) which can be efficiently handled by HTTP/2.
+///
+/// [RFC 2789]: https://github.com/rust-lang/rfcs/pull/2789
+pub struct HttpRegistry<'cfg> {
+    index_path: Filesystem,
+    cache_path: Filesystem,
+    source_id: SourceId,
+    config: &'cfg Config,
+
+    /// Store the server URL without the protocol prefix (sparse+)
+    url: Url,
+
+    /// HTTP multi-handle for asynchronous/parallel requests.
+    multi: Multi,
+
+    /// Has the client requested a cache update?
+    ///
+    /// Only if they have do we double-check the freshness of each locally-stored index file.
+    requested_update: bool,
+
+    /// State for currently pending index downloads.
+    downloads: Downloads<'cfg>,
+
+    /// Does the config say that we can use HTTP multiplexing?
+    multiplexing: bool,
+
+    /// What paths have we already fetched since the last index update?
+    ///
+    /// We do not need to double-check any of these index files since we have already done so.
+    fresh: HashSet<PathBuf>,
+
+    /// Have we started to download any index files?
+    fetch_started: bool,
+
+    /// Cached registry configuration.
+    registry_config: Option<RegistryConfig>,
+
+    /// Should we include the authorization header?
+    auth_required: bool,
+
+    /// Url to get a token for the registry.
+    login_url: Option<Url>,
+}
+
+/// Helper for downloading crates.
+pub struct Downloads<'cfg> {
+    /// When a download is started, it is added to this map. The key is a
+    /// "token" (see `Download::token`). It is removed once the download is
+    /// finished.
+    pending: HashMap<usize, (Download<'cfg>, EasyHandle)>,
+    /// Set of paths currently being downloaded.
+    /// This should stay in sync with `pending`.
+    pending_paths: HashSet<PathBuf>,
+    /// The final result of each download.
+    results: HashMap<PathBuf, CargoResult<CompletedDownload>>,
+    /// The next ID to use for creating a token (see `Download::token`).
+    next: usize,
+    /// Progress bar.
+    progress: RefCell<Option<Progress<'cfg>>>,
+    /// Number of downloads that have successfully finished.
+    downloads_finished: usize,
+    /// Number of times the caller has requested blocking. This is used for
+    /// an estimate of progress.
+    blocking_calls: usize,
+}
+
+struct Download<'cfg> {
+    /// The token for this download, used as the key of the `Downloads::pending` map
+    /// and stored in `EasyHandle` as well.
+    token: usize,
+
+    /// The path of the package that we're downloading.
+    path: PathBuf,
+
+    /// Actual downloaded data, updated throughout the lifetime of this download.
+    data: RefCell<Vec<u8>>,
+
+    /// HTTP headers.
+    header_map: RefCell<Headers>,
+
+    /// Logic used to track retrying this download if it's a spurious failure.
+    retry: Retry<'cfg>,
+}
+
+#[derive(Default)]
+struct Headers {
+    last_modified: Option<String>,
+    etag: Option<String>,
+    www_authenticate: Vec<String>,
+}
+
+enum StatusCode {
+    Success,
+    NotModified,
+    NotFound,
+    Unauthorized,
+}
+
+struct CompletedDownload {
+    response_code: StatusCode,
+    data: Vec<u8>,
+    header_map: Headers,
+}
+
+impl<'cfg> HttpRegistry<'cfg> {
+    pub fn new(
+        source_id: SourceId,
+        config: &'cfg Config,
+        name: &str,
+    ) -> CargoResult<HttpRegistry<'cfg>> {
+        let url = source_id.url().as_str();
+        // Ensure the url ends with a slash so we can concatenate paths.
+        if !url.ends_with('/') {
+            anyhow::bail!("sparse registry url must end in a slash `/`: {url}")
+        }
+        assert!(source_id.is_sparse());
+        let url = url
+            .strip_prefix("sparse+")
+            .expect("sparse registry needs sparse+ prefix")
+            .into_url()
+            .expect("a url with the sparse+ stripped should still be valid");
+
+        Ok(HttpRegistry {
+            index_path: config.registry_index_path().join(name),
+            cache_path: config.registry_cache_path().join(name),
+            source_id,
+            config,
+            url,
+            multi: Multi::new(),
+            multiplexing: false,
+            downloads: Downloads {
+                next: 0,
+                pending: HashMap::new(),
+                pending_paths: HashSet::new(),
+                results: HashMap::new(),
+                progress: RefCell::new(Some(Progress::with_style(
+                    "Fetch",
+                    ProgressStyle::Indeterminate,
+                    config,
+                ))),
+                downloads_finished: 0,
+                blocking_calls: 0,
+            },
+            fresh: HashSet::new(),
+            requested_update: false,
+            fetch_started: false,
+            registry_config: None,
+            auth_required: false,
+            login_url: None,
+        })
+    }
+
+    fn handle_http_header(buf: &[u8]) -> Option<(&str, &str)> {
+        if buf.is_empty() {
+            return None;
+        }
+        let buf = std::str::from_utf8(buf).ok()?.trim_end();
+        // Don't let server sneak extra lines anywhere.
+        if buf.contains('\n') {
+            return None;
+        }
+        let (tag, value) = buf.split_once(':')?;
+        let value = value.trim();
+        Some((tag, value))
+    }
+
+    fn start_fetch(&mut self) -> CargoResult<()> {
+        if self.fetch_started {
+            // We only need to run the setup code once.
+            return Ok(());
+        }
+        self.fetch_started = true;
+
+        // We've enabled the `http2` feature of `curl` in Cargo, so treat
+        // failures here as fatal as it would indicate a build-time problem.
+        self.multiplexing = self.config.http_config()?.multiplexing.unwrap_or(true);
+
+        self.multi
+            .pipelining(false, self.multiplexing)
+            .with_context(|| "failed to enable multiplexing/pipelining in curl")?;
+
+        // let's not flood the server with connections
+        self.multi.set_max_host_connections(2)?;
+
+        self.config
+            .shell()
+            .status("Updating", self.source_id.display_index())?;
+
+        Ok(())
+    }
+
+    fn handle_completed_downloads(&mut self) -> CargoResult<()> {
+        assert_eq!(
+            self.downloads.pending.len(),
+            self.downloads.pending_paths.len()
+        );
+
+        // Collect the results from the Multi handle.
+        let results = {
+            let mut results = Vec::new();
+            let pending = &mut self.downloads.pending;
+            self.multi.messages(|msg| {
+                let token = msg.token().expect("failed to read token");
+                let (_, handle) = &pending[&token];
+                if let Some(result) = msg.result_for(handle) {
+                    results.push((token, result));
+                };
+            });
+            results
+        };
+        for (token, result) in results {
+            let (mut download, handle) = self.downloads.pending.remove(&token).unwrap();
+            let mut handle = self.multi.remove(handle)?;
+            let data = download.data.take();
+            let url = self.full_url(&download.path);
+            let result = match download.retry.r#try(|| {
+                result.with_context(|| format!("failed to download from `{}`", url))?;
+                let code = handle.response_code()?;
+                // Keep this list of expected status codes in sync with the codes handled in `load`
+                let code = match code {
+                    200 => StatusCode::Success,
+                    304 => StatusCode::NotModified,
+                    401 => StatusCode::Unauthorized,
+                    404 | 410 | 451 => StatusCode::NotFound,
+                    code => {
+                        let url = handle.effective_url()?.unwrap_or(&url);
+                        return Err(HttpNotSuccessful {
+                            code,
+                            url: url.to_owned(),
+                            body: data,
+                        }
+                        .into());
+                    }
+                };
+                Ok((data, code))
+            }) {
+                Ok(Some((data, code))) => Ok(CompletedDownload {
+                    response_code: code,
+                    data,
+                    header_map: download.header_map.take(),
+                }),
+                Ok(None) => {
+                    // retry the operation
+                    let handle = self.multi.add(handle)?;
+                    self.downloads.pending.insert(token, (download, handle));
+                    continue;
+                }
+                Err(e) => Err(e),
+            };
+
+            assert!(self.downloads.pending_paths.remove(&download.path));
+            self.downloads.results.insert(download.path, result);
+            self.downloads.downloads_finished += 1;
+        }
+
+        self.downloads.tick()?;
+
+        Ok(())
+    }
+
+    fn full_url(&self, path: &Path) -> String {
+        // self.url always ends with a slash.
+        format!("{}{}", self.url, path.display())
+    }
+
+    fn is_fresh(&self, path: &Path) -> bool {
+        if !self.requested_update {
+            trace!(
+                "using local {} as user did not request update",
+                path.display()
+            );
+            true
+        } else if self.config.cli_unstable().no_index_update {
+            trace!("using local {} in no_index_update mode", path.display());
+            true
+        } else if self.config.offline() {
+            trace!("using local {} in offline mode", path.display());
+            true
+        } else if self.fresh.contains(path) {
+            trace!("using local {} as it was already fetched", path.display());
+            true
+        } else {
+            debug!("checking freshness of {}", path.display());
+            false
+        }
+    }
+
+    /// Get the cached registry configuration, if it exists.
+    fn config_cached(&mut self) -> CargoResult<Option<&RegistryConfig>> {
+        if self.registry_config.is_some() {
+            return Ok(self.registry_config.as_ref());
+        }
+        let config_json_path = self
+            .assert_index_locked(&self.index_path)
+            .join("config.json");
+        match fs::read(&config_json_path) {
+            Ok(raw_data) => match serde_json::from_slice(&raw_data) {
+                Ok(json) => {
+                    self.registry_config = Some(json);
+                }
+                Err(e) => log::debug!("failed to decode cached config.json: {}", e),
+            },
+            Err(e) => {
+                if e.kind() != ErrorKind::NotFound {
+                    log::debug!("failed to read config.json cache: {}", e)
+                }
+            }
+        }
+        Ok(self.registry_config.as_ref())
+    }
+
+    /// Get the registry configuration.
+    fn config(&mut self) -> Poll<CargoResult<&RegistryConfig>> {
+        debug!("loading config");
+        let index_path = self.assert_index_locked(&self.index_path);
+        let config_json_path = index_path.join("config.json");
+        if self.is_fresh(Path::new("config.json")) && self.config_cached()?.is_some() {
+            return Poll::Ready(Ok(self.registry_config.as_ref().unwrap()));
+        }
+
+        match ready!(self.load(Path::new(""), Path::new("config.json"), None)?) {
+            LoadResponse::Data {
+                raw_data,
+                index_version: _,
+            } => {
+                trace!("config loaded");
+                self.registry_config = Some(serde_json::from_slice(&raw_data)?);
+                if paths::create_dir_all(&config_json_path.parent().unwrap()).is_ok() {
+                    if let Err(e) = fs::write(&config_json_path, &raw_data) {
+                        log::debug!("failed to write config.json cache: {}", e);
+                    }
+                }
+                Poll::Ready(Ok(self.registry_config.as_ref().unwrap()))
+            }
+            LoadResponse::NotFound => {
+                Poll::Ready(Err(anyhow::anyhow!("config.json not found in registry")))
+            }
+            LoadResponse::CacheValid => Poll::Ready(Err(crate::util::internal(
+                "config.json is never stored in the index cache",
+            ))),
+        }
+    }
+}
+
+impl<'cfg> RegistryData for HttpRegistry<'cfg> {
+    fn prepare(&self) -> CargoResult<()> {
+        Ok(())
+    }
+
+    fn index_path(&self) -> &Filesystem {
+        &self.index_path
+    }
+
+    fn assert_index_locked<'a>(&self, path: &'a Filesystem) -> &'a Path {
+        self.config.assert_package_cache_locked(path)
+    }
+
+    fn is_updated(&self) -> bool {
+        self.requested_update
+    }
+
+    fn load(
+        &mut self,
+        _root: &Path,
+        path: &Path,
+        index_version: Option<&str>,
+    ) -> Poll<CargoResult<LoadResponse>> {
+        trace!("load: {}", path.display());
+        if let Some(_token) = self.downloads.pending_paths.get(path) {
+            debug!("dependency is still pending: {}", path.display());
+            return Poll::Pending;
+        }
+
+        if let Some(index_version) = index_version {
+            trace!(
+                "local cache of {} is available at version `{}`",
+                path.display(),
+                index_version
+            );
+            if self.is_fresh(path) {
+                return Poll::Ready(Ok(LoadResponse::CacheValid));
+            }
+        } else if self.fresh.contains(path) {
+            // We have no cached copy of this file, and we already downloaded it.
+            debug!(
+                "cache did not contain previously downloaded file {}",
+                path.display()
+            );
+            return Poll::Ready(Ok(LoadResponse::NotFound));
+        }
+
+        if let Some(result) = self.downloads.results.remove(path) {
+            let result =
+                result.with_context(|| format!("download of {} failed", path.display()))?;
+
+            assert!(
+                self.fresh.insert(path.to_path_buf()),
+                "downloaded the index file `{}` twice",
+                path.display()
+            );
+
+            // The status handled here need to be kept in sync with the codes handled
+            // in `handle_completed_downloads`
+            match result.response_code {
+                StatusCode::Success => {
+                    let response_index_version = if let Some(etag) = result.header_map.etag {
+                        format!("{}: {}", ETAG, etag)
+                    } else if let Some(lm) = result.header_map.last_modified {
+                        format!("{}: {}", LAST_MODIFIED, lm)
+                    } else {
+                        UNKNOWN.to_string()
+                    };
+                    trace!("index file version: {}", response_index_version);
+                    return Poll::Ready(Ok(LoadResponse::Data {
+                        raw_data: result.data,
+                        index_version: Some(response_index_version),
+                    }));
+                }
+                StatusCode::NotModified => {
+                    // Not Modified: the data in the cache is still the latest.
+                    if index_version.is_none() {
+                        return Poll::Ready(Err(anyhow::anyhow!(
+                            "server said not modified (HTTP 304) when no local cache exists"
+                        )));
+                    }
+                    return Poll::Ready(Ok(LoadResponse::CacheValid));
+                }
+                StatusCode::NotFound => {
+                    // The crate was not found or deleted from the registry.
+                    return Poll::Ready(Ok(LoadResponse::NotFound));
+                }
+                StatusCode::Unauthorized
+                    if !self.auth_required
+                        && path == Path::new("config.json")
+                        && self.config.cli_unstable().registry_auth =>
+                {
+                    debug!("re-attempting request for config.json with authorization included.");
+                    self.fresh.remove(path);
+                    self.auth_required = true;
+
+                    // Look for a `www-authenticate` header with the `Cargo` scheme.
+                    for header in &result.header_map.www_authenticate {
+                        for challenge in http_auth::ChallengeParser::new(header) {
+                            match challenge {
+                                Ok(challenge) if challenge.scheme.eq_ignore_ascii_case("Cargo") => {
+                                    // Look for the `login_url` parameter.
+                                    for (param, value) in challenge.params {
+                                        if param.eq_ignore_ascii_case("login_url") {
+                                            self.login_url = Some(value.to_unescaped().into_url()?);
+                                        }
+                                    }
+                                }
+                                Ok(challenge) => {
+                                    debug!("ignoring non-Cargo challenge: {}", challenge.scheme)
+                                }
+                                Err(e) => debug!("failed to parse challenge: {}", e),
+                            }
+                        }
+                    }
+                }
+                StatusCode::Unauthorized => {
+                    let err = Err(HttpNotSuccessful {
+                        code: 401,
+                        body: result.data,
+                        url: self.full_url(path),
+                    }
+                    .into());
+                    if self.auth_required {
+                        return Poll::Ready(err.context(auth::AuthorizationError {
+                            sid: self.source_id.clone(),
+                            login_url: self.login_url.clone(),
+                            reason: auth::AuthorizationErrorReason::TokenRejected,
+                        }));
+                    } else {
+                        return Poll::Ready(err);
+                    }
+                }
+            }
+        }
+
+        if path != Path::new("config.json") {
+            self.auth_required = ready!(self.config()?).auth_required;
+        } else if !self.auth_required {
+            // Check if there's a cached config that says auth is required.
+            // This allows avoiding the initial unauthenticated request to probe.
+            if let Some(config) = self.config_cached()? {
+                self.auth_required = config.auth_required;
+            }
+        }
+
+        if !self.config.cli_unstable().registry_auth {
+            self.auth_required = false;
+        }
+
+        // Looks like we're going to have to do a network request.
+        self.start_fetch()?;
+
+        let mut handle = ops::http_handle(self.config)?;
+        let full_url = self.full_url(path);
+        debug!("fetch {}", full_url);
+        handle.get(true)?;
+        handle.url(&full_url)?;
+        handle.follow_location(true)?;
+
+        // Enable HTTP/2 if possible.
+        if self.multiplexing {
+            crate::try_old_curl!(handle.http_version(HttpVersion::V2), "HTTP2");
+        } else {
+            handle.http_version(HttpVersion::V11)?;
+        }
+
+        // This is an option to `libcurl` which indicates that if there's a
+        // bunch of parallel requests to the same host they all wait until the
+        // pipelining status of the host is known. This means that we won't
+        // initiate dozens of connections to crates.io, but rather only one.
+        // Once the main one is opened we realized that pipelining is possible
+        // and multiplexing is possible with static.crates.io. All in all this
+        // reduces the number of connections done to a more manageable state.
+        crate::try_old_curl!(handle.pipewait(true), "pipewait");
+
+        let mut headers = List::new();
+        // Include a header to identify the protocol. This allows the server to
+        // know that Cargo is attempting to use the sparse protocol.
+        headers.append("cargo-protocol: version=1")?;
+        headers.append("accept: text/plain")?;
+
+        // If we have a cached copy of the file, include IF_NONE_MATCH or IF_MODIFIED_SINCE header.
+        if let Some(index_version) = index_version {
+            if let Some((key, value)) = index_version.split_once(':') {
+                match key {
+                    ETAG => headers.append(&format!("{}: {}", IF_NONE_MATCH, value.trim()))?,
+                    LAST_MODIFIED => {
+                        headers.append(&format!("{}: {}", IF_MODIFIED_SINCE, value.trim()))?
+                    }
+                    _ => debug!("unexpected index version: {}", index_version),
+                }
+            }
+        }
+        if self.auth_required {
+            let authorization =
+                auth::auth_token(self.config, &self.source_id, self.login_url.as_ref(), None)?;
+            headers.append(&format!("Authorization: {}", authorization))?;
+            trace!("including authorization for {}", full_url);
+        }
+        handle.http_headers(headers)?;
+
+        // We're going to have a bunch of downloads all happening "at the same time".
+        // So, we need some way to track what headers/data/responses are for which request.
+        // We do that through this token. Each request (and associated response) gets one.
+        let token = self.downloads.next;
+        self.downloads.next += 1;
+        debug!("downloading {} as {}", path.display(), token);
+        assert!(
+            self.downloads.pending_paths.insert(path.to_path_buf()),
+            "path queued for download more than once"
+        );
+
+        // Each write should go to self.downloads.pending[&token].data.
+        // Since the write function must be 'static, we access downloads through a thread-local.
+        // That thread-local is set up in `block_until_ready` when it calls self.multi.perform,
+        // which is what ultimately calls this method.
+        handle.write_function(move |buf| {
+            trace!("{} - {} bytes of data", token, buf.len());
+            tls::with(|downloads| {
+                if let Some(downloads) = downloads {
+                    downloads.pending[&token]
+                        .0
+                        .data
+                        .borrow_mut()
+                        .extend_from_slice(buf);
+                }
+            });
+            Ok(buf.len())
+        })?;
+
+        // And ditto for the header function.
+        handle.header_function(move |buf| {
+            if let Some((tag, value)) = Self::handle_http_header(buf) {
+                tls::with(|downloads| {
+                    if let Some(downloads) = downloads {
+                        let mut header_map = downloads.pending[&token].0.header_map.borrow_mut();
+                        match tag.to_ascii_lowercase().as_str() {
+                            LAST_MODIFIED => header_map.last_modified = Some(value.to_string()),
+                            ETAG => header_map.etag = Some(value.to_string()),
+                            WWW_AUTHENTICATE => header_map.www_authenticate.push(value.to_string()),
+                            _ => {}
+                        }
+                    }
+                });
+            }
+
+            true
+        })?;
+
+        let dl = Download {
+            token,
+            path: path.to_path_buf(),
+            data: RefCell::new(Vec::new()),
+            header_map: Default::default(),
+            retry: Retry::new(self.config)?,
+        };
+
+        // Finally add the request we've lined up to the pool of requests that cURL manages.
+        let mut handle = self.multi.add(handle)?;
+        handle.set_token(token)?;
+        self.downloads.pending.insert(dl.token, (dl, handle));
+
+        Poll::Pending
+    }
+
+    fn config(&mut self) -> Poll<CargoResult<Option<RegistryConfig>>> {
+        let mut cfg = ready!(self.config()?).clone();
+        if !self.config.cli_unstable().registry_auth {
+            cfg.auth_required = false;
+        }
+        Poll::Ready(Ok(Some(cfg)))
+    }
+
+    fn invalidate_cache(&mut self) {
+        // Actually updating the index is more or less a no-op for this implementation.
+        // All it does is ensure that a subsequent load will double-check files with the
+        // server rather than rely on a locally cached copy of the index files.
+        debug!("invalidated index cache");
+        self.fresh.clear();
+        self.requested_update = true;
+    }
+
+    fn download(&mut self, pkg: PackageId, checksum: &str) -> CargoResult<MaybeLock> {
+        let registry_config = loop {
+            match self.config()? {
+                Poll::Pending => self.block_until_ready()?,
+                Poll::Ready(cfg) => break cfg.to_owned(),
+            }
+        };
+        download::download(
+            &self.cache_path,
+            &self.config,
+            pkg,
+            checksum,
+            registry_config,
+        )
+    }
+
+    fn finish_download(
+        &mut self,
+        pkg: PackageId,
+        checksum: &str,
+        data: &[u8],
+    ) -> CargoResult<File> {
+        download::finish_download(&self.cache_path, &self.config, pkg, checksum, data)
+    }
+
+    fn is_crate_downloaded(&self, pkg: PackageId) -> bool {
+        download::is_crate_downloaded(&self.cache_path, &self.config, pkg)
+    }
+
+    fn block_until_ready(&mut self) -> CargoResult<()> {
+        trace!(
+            "block_until_ready: {} transfers pending",
+            self.downloads.pending.len()
+        );
+        self.downloads.blocking_calls += 1;
+
+        loop {
+            self.handle_completed_downloads()?;
+
+            let remaining_in_multi = tls::set(&self.downloads, || {
+                self.multi
+                    .perform()
+                    .with_context(|| "failed to perform http requests")
+            })?;
+            trace!("{} transfers remaining", remaining_in_multi);
+
+            if remaining_in_multi == 0 {
+                return Ok(());
+            }
+
+            // We have no more replies to provide the caller with,
+            // so we need to wait until cURL has something new for us.
+            let timeout = self
+                .multi
+                .get_timeout()?
+                .unwrap_or_else(|| Duration::new(1, 0));
+            self.multi
+                .wait(&mut [], timeout)
+                .with_context(|| "failed to wait on curl `Multi`")?;
+        }
+    }
+}
+
+impl<'cfg> Downloads<'cfg> {
+    fn tick(&self) -> CargoResult<()> {
+        let mut progress = self.progress.borrow_mut();
+        let progress = progress.as_mut().unwrap();
+
+        // Since the sparse protocol discovers dependencies as it goes,
+        // it's not possible to get an accurate progress indication.
+        //
+        // As an approximation, we assume that the depth of the dependency graph
+        // is fixed, and base the progress on how many times the caller has asked
+        // for blocking. If there are actually additional dependencies, the progress
+        // bar will get stuck. If there are fewer dependencies, it will disappear
+        // early. It will never go backwards.
+        //
+        // The status text also contains the number of completed & pending requests, which
+        // gives an better indication of forward progress.
+        let approximate_tree_depth = 10;
+
+        progress.tick(
+            self.blocking_calls.min(approximate_tree_depth),
+            approximate_tree_depth + 1,
+            &format!(
+                " {} complete; {} pending",
+                self.downloads_finished,
+                self.pending.len()
+            ),
+        )
+    }
+}
+
+mod tls {
+    use super::Downloads;
+    use std::cell::Cell;
+
+    thread_local!(static PTR: Cell<usize> = Cell::new(0));
+
+    pub(crate) fn with<R>(f: impl FnOnce(Option<&Downloads<'_>>) -> R) -> R {
+        let ptr = PTR.with(|p| p.get());
+        if ptr == 0 {
+            f(None)
+        } else {
+            // Safety: * `ptr` is only set by `set` below which ensures the type is correct.
+            let ptr = unsafe { &*(ptr as *const Downloads<'_>) };
+            f(Some(ptr))
+        }
+    }
+
+    pub(crate) fn set<R>(dl: &Downloads<'_>, f: impl FnOnce() -> R) -> R {
+        struct Reset<'a, T: Copy>(&'a Cell<T>, T);
+
+        impl<'a, T: Copy> Drop for Reset<'a, T> {
+            fn drop(&mut self) {
+                self.0.set(self.1);
+            }
+        }
+
+        PTR.with(|p| {
+            let _reset = Reset(p, p.get());
+            p.set(dl as *const Downloads<'_> as usize);
+            f()
+        })
+    }
+}
diff --git a/src/cargo/sources/registry/index.rs b/src/cargo/sources/registry/index.rs
new file mode 100644
index 0000000..633ed74
--- /dev/null
+++ b/src/cargo/sources/registry/index.rs
@@ -0,0 +1,889 @@
+//! Management of the index of a registry source
+//!
+//! This module contains management of the index and various operations, such as
+//! actually parsing the index, looking for crates, etc. This is intended to be
+//! abstract over remote indices (downloaded via git) and local registry indices
+//! (which are all just present on the filesystem).
+//!
+//! ## Index Performance
+//!
+//! One important aspect of the index is that we want to optimize the "happy
+//! path" as much as possible. Whenever you type `cargo build` Cargo will
+//! *always* reparse the registry and learn about dependency information. This
+//! is done because Cargo needs to learn about the upstream crates.io crates
+//! that you're using and ensure that the preexisting `Cargo.lock` still matches
+//! the current state of the world.
+//!
+//! Consequently, Cargo "null builds" (the index that Cargo adds to each build
+//! itself) need to be fast when accessing the index. The primary performance
+//! optimization here is to avoid parsing JSON blobs from the registry if we
+//! don't need them. Most secondary optimizations are centered around removing
+//! allocations and such, but avoiding parsing JSON is the #1 optimization.
+//!
+//! When we get queries from the resolver we're given a `Dependency`. This
+//! dependency in turn has a version requirement, and with lock files that
+//! already exist these version requirements are exact version requirements
+//! `=a.b.c`. This means that we in theory only need to parse one line of JSON
+//! per query in the registry, the one that matches version `a.b.c`.
+//!
+//! The crates.io index, however, is not amenable to this form of query. Instead
+//! the crates.io index simply is a file where each line is a JSON blob. To
+//! learn about the versions in each JSON blob we would need to parse the JSON,
+//! defeating the purpose of trying to parse as little as possible.
+//!
+//! > Note that as a small aside even *loading* the JSON from the registry is
+//! > actually pretty slow. For crates.io and remote registries we don't
+//! > actually check out the git index on disk because that takes quite some
+//! > time and is quite large. Instead we use `libgit2` to read the JSON from
+//! > the raw git objects. This in turn can be slow (aka show up high in
+//! > profiles) because libgit2 has to do deflate decompression and such.
+//!
+//! To solve all these issues a strategy is employed here where Cargo basically
+//! creates an index into the index. The first time a package is queried about
+//! (first time being for an entire computer) Cargo will load the contents
+//! (slowly via libgit2) from the registry. It will then (slowly) parse every
+//! single line to learn about its versions. Afterwards, however, Cargo will
+//! emit a new file (a cache) which is amenable for speedily parsing in future
+//! invocations.
+//!
+//! This cache file is currently organized by basically having the semver
+//! version extracted from each JSON blob. That way Cargo can quickly and easily
+//! parse all versions contained and which JSON blob they're associated with.
+//! The JSON blob then doesn't actually need to get parsed unless the version is
+//! parsed.
+//!
+//! Altogether the initial measurements of this shows a massive improvement for
+//! Cargo null build performance. It's expected that the improvements earned
+//! here will continue to grow over time in the sense that the previous
+//! implementation (parse all lines each time) actually continues to slow down
+//! over time as new versions of a crate are published. In any case when first
+//! implemented a null build of Cargo itself would parse 3700 JSON blobs from
+//! the registry and load 150 blobs from git. Afterwards it parses 150 JSON
+//! blobs and loads 0 files git. Removing 200ms or more from Cargo's startup
+//! time is certainly nothing to sneeze at!
+//!
+//! Note that this is just a high-level overview, there's of course lots of
+//! details like invalidating caches and whatnot which are handled below, but
+//! hopefully those are more obvious inline in the code itself.
+
+use crate::core::dependency::Dependency;
+use crate::core::{PackageId, SourceId, Summary};
+use crate::sources::registry::{LoadResponse, RegistryData, RegistryPackage, INDEX_V_MAX};
+use crate::util::interning::InternedString;
+use crate::util::{internal, CargoResult, Config, Filesystem, OptVersionReq, ToSemver};
+use anyhow::bail;
+use cargo_util::{paths, registry::make_dep_path};
+use log::{debug, info};
+use semver::Version;
+use std::collections::{HashMap, HashSet};
+use std::fs;
+use std::io::ErrorKind;
+use std::path::Path;
+use std::str;
+use std::task::{ready, Poll};
+
+/// Crates.io treats hyphen and underscores as interchangeable, but the index and old Cargo do not.
+/// Therefore, the index must store uncanonicalized version of the name so old Cargo's can find it.
+/// This loop tries all possible combinations of switching hyphen and underscores to find the
+/// uncanonicalized one. As all stored inputs have the correct spelling, we start with the spelling
+/// as-provided.
+struct UncanonicalizedIter<'s> {
+    input: &'s str,
+    num_hyphen_underscore: u32,
+    hyphen_combination_num: u16,
+}
+
+impl<'s> UncanonicalizedIter<'s> {
+    fn new(input: &'s str) -> Self {
+        let num_hyphen_underscore = input.chars().filter(|&c| c == '_' || c == '-').count() as u32;
+        UncanonicalizedIter {
+            input,
+            num_hyphen_underscore,
+            hyphen_combination_num: 0,
+        }
+    }
+}
+
+impl<'s> Iterator for UncanonicalizedIter<'s> {
+    type Item = String;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.hyphen_combination_num > 0
+            && self.hyphen_combination_num.trailing_zeros() >= self.num_hyphen_underscore
+        {
+            return None;
+        }
+
+        let ret = Some(
+            self.input
+                .chars()
+                .scan(0u16, |s, c| {
+                    // the check against 15 here's to prevent
+                    // shift overflow on inputs with more than 15 hyphens
+                    if (c == '_' || c == '-') && *s <= 15 {
+                        let switch = (self.hyphen_combination_num & (1u16 << *s)) > 0;
+                        let out = if (c == '_') ^ switch { '_' } else { '-' };
+                        *s += 1;
+                        Some(out)
+                    } else {
+                        Some(c)
+                    }
+                })
+                .collect(),
+        );
+        self.hyphen_combination_num += 1;
+        ret
+    }
+}
+
+#[test]
+fn no_hyphen() {
+    assert_eq!(
+        UncanonicalizedIter::new("test").collect::<Vec<_>>(),
+        vec!["test".to_string()]
+    )
+}
+
+#[test]
+fn two_hyphen() {
+    assert_eq!(
+        UncanonicalizedIter::new("te-_st").collect::<Vec<_>>(),
+        vec![
+            "te-_st".to_string(),
+            "te__st".to_string(),
+            "te--st".to_string(),
+            "te_-st".to_string()
+        ]
+    )
+}
+
+#[test]
+fn overflow_hyphen() {
+    assert_eq!(
+        UncanonicalizedIter::new("te-_-_-_-_-_-_-_-_-st")
+            .take(100)
+            .count(),
+        100
+    )
+}
+
+/// Manager for handling the on-disk index.
+///
+/// Note that local and remote registries store the index differently. Local
+/// is a simple on-disk tree of files of the raw index. Remote registries are
+/// stored as a raw git repository. The different means of access are handled
+/// via the [`RegistryData`] trait abstraction.
+///
+/// This transparently handles caching of the index in a more efficient format.
+pub struct RegistryIndex<'cfg> {
+    source_id: SourceId,
+    /// Root directory of the index for the registry.
+    path: Filesystem,
+    /// Cache of summary data.
+    ///
+    /// This is keyed off the package name. The [`Summaries`] value handles
+    /// loading the summary data. It keeps an optimized on-disk representation
+    /// of the JSON files, which is created in an as-needed fashion. If it
+    /// hasn't been cached already, it uses [`RegistryData::load`] to access
+    /// to JSON files from the index, and the creates the optimized on-disk
+    /// summary cache.
+    summaries_cache: HashMap<InternedString, Summaries>,
+    /// [`Config`] reference for convenience.
+    config: &'cfg Config,
+}
+
+/// An internal cache of summaries for a particular package.
+///
+/// A list of summaries are loaded from disk via one of two methods:
+///
+/// 1. Primarily Cargo will parse the corresponding file for a crate in the
+///    upstream crates.io registry. That's just a JSON blob per line which we
+///    can parse, extract the version, and then store here.
+///
+/// 2. Alternatively, if Cargo has previously run, we'll have a cached index of
+///    dependencies for the upstream index. This is a file that Cargo maintains
+///    lazily on the local filesystem and is much faster to parse since it
+///    doesn't involve parsing all of the JSON.
+///
+/// The outward-facing interface of this doesn't matter too much where it's
+/// loaded from, but it's important when reading the implementation to note that
+/// we try to parse as little as possible!
+#[derive(Default)]
+struct Summaries {
+    /// A raw vector of uninterpreted bytes. This is what `Unparsed` start/end
+    /// fields are indexes into. If a `Summaries` is loaded from the crates.io
+    /// index then this field will be empty since nothing is `Unparsed`.
+    raw_data: Vec<u8>,
+
+    /// All known versions of a crate, keyed from their `Version` to the
+    /// possibly parsed or unparsed version of the full summary.
+    versions: HashMap<Version, MaybeIndexSummary>,
+}
+
+/// A lazily parsed `IndexSummary`.
+enum MaybeIndexSummary {
+    /// A summary which has not been parsed, The `start` and `end` are pointers
+    /// into `Summaries::raw_data` which this is an entry of.
+    Unparsed { start: usize, end: usize },
+
+    /// An actually parsed summary.
+    Parsed(IndexSummary),
+}
+
+/// A parsed representation of a summary from the index.
+///
+/// In addition to a full `Summary` we have information on whether it is `yanked`.
+pub struct IndexSummary {
+    pub summary: Summary,
+    pub yanked: bool,
+    /// Schema version, see [`RegistryPackage`].
+    v: u32,
+}
+
+/// A representation of the cache on disk that Cargo maintains of summaries.
+/// Cargo will initially parse all summaries in the registry and will then
+/// serialize that into this form and place it in a new location on disk,
+/// ensuring that access in the future is much speedier.
+#[derive(Default)]
+struct SummariesCache<'a> {
+    versions: Vec<(Version, &'a [u8])>,
+    index_version: &'a str,
+}
+
+impl<'cfg> RegistryIndex<'cfg> {
+    pub fn new(
+        source_id: SourceId,
+        path: &Filesystem,
+        config: &'cfg Config,
+    ) -> RegistryIndex<'cfg> {
+        RegistryIndex {
+            source_id,
+            path: path.clone(),
+            summaries_cache: HashMap::new(),
+            config,
+        }
+    }
+
+    /// Returns the hash listed for a specified `PackageId`.
+    pub fn hash(&mut self, pkg: PackageId, load: &mut dyn RegistryData) -> Poll<CargoResult<&str>> {
+        let req = OptVersionReq::exact(pkg.version());
+        let summary = self.summaries(pkg.name(), &req, load)?;
+        let summary = ready!(summary).next();
+        Poll::Ready(Ok(summary
+            .ok_or_else(|| internal(format!("no hash listed for {}", pkg)))?
+            .summary
+            .checksum()
+            .ok_or_else(|| internal(format!("no hash listed for {}", pkg)))?))
+    }
+
+    /// Load a list of summaries for `name` package in this registry which
+    /// match `req`
+    ///
+    /// This function will semantically parse the on-disk index, match all
+    /// versions, and then return an iterator over all summaries which matched.
+    /// Internally there's quite a few layer of caching to amortize this cost
+    /// though since this method is called quite a lot on null builds in Cargo.
+    pub fn summaries<'a, 'b>(
+        &'a mut self,
+        name: InternedString,
+        req: &'b OptVersionReq,
+        load: &mut dyn RegistryData,
+    ) -> Poll<CargoResult<impl Iterator<Item = &'a IndexSummary> + 'b>>
+    where
+        'a: 'b,
+    {
+        let source_id = self.source_id;
+        let config = self.config;
+
+        // First up actually parse what summaries we have available. If Cargo
+        // has run previously this will parse a Cargo-specific cache file rather
+        // than the registry itself. In effect this is intended to be a quite
+        // cheap operation.
+        let summaries = ready!(self.load_summaries(name, load)?);
+
+        // Iterate over our summaries, extract all relevant ones which match our
+        // version requirement, and then parse all corresponding rows in the
+        // registry. As a reminder this `summaries` method is called for each
+        // entry in a lock file on every build, so we want to absolutely
+        // minimize the amount of work being done here and parse as little as
+        // necessary.
+        let raw_data = &summaries.raw_data;
+        Poll::Ready(Ok(summaries
+            .versions
+            .iter_mut()
+            .filter_map(move |(k, v)| if req.matches(k) { Some(v) } else { None })
+            .filter_map(
+                move |maybe| match maybe.parse(config, raw_data, source_id) {
+                    Ok(summary) => Some(summary),
+                    Err(e) => {
+                        info!("failed to parse `{}` registry package: {}", name, e);
+                        None
+                    }
+                },
+            )
+            .filter(move |is| {
+                if is.v > INDEX_V_MAX {
+                    debug!(
+                        "unsupported schema version {} ({} {})",
+                        is.v,
+                        is.summary.name(),
+                        is.summary.version()
+                    );
+                    false
+                } else {
+                    true
+                }
+            })))
+    }
+
+    fn load_summaries(
+        &mut self,
+        name: InternedString,
+        load: &mut dyn RegistryData,
+    ) -> Poll<CargoResult<&mut Summaries>> {
+        // If we've previously loaded what versions are present for `name`, just
+        // return that since our cache should still be valid.
+        if self.summaries_cache.contains_key(&name) {
+            return Poll::Ready(Ok(self.summaries_cache.get_mut(&name).unwrap()));
+        }
+
+        // Prepare the `RegistryData` which will lazily initialize internal data
+        // structures.
+        load.prepare()?;
+
+        let root = load.assert_index_locked(&self.path);
+        let cache_root = root.join(".cache");
+
+        // See module comment in `registry/mod.rs` for why this is structured
+        // the way it is.
+        let fs_name = name
+            .chars()
+            .flat_map(|c| c.to_lowercase())
+            .collect::<String>();
+        let raw_path = make_dep_path(&fs_name, false);
+
+        let mut any_pending = false;
+        // Attempt to handle misspellings by searching for a chain of related
+        // names to the original `raw_path` name. Only return summaries
+        // associated with the first hit, however. The resolver will later
+        // reject any candidates that have the wrong name, and with this it'll
+        // along the way produce helpful "did you mean?" suggestions.
+        for (i, path) in UncanonicalizedIter::new(&raw_path).take(1024).enumerate() {
+            let summaries = Summaries::parse(
+                root,
+                &cache_root,
+                path.as_ref(),
+                self.source_id,
+                load,
+                self.config,
+            )?;
+            if summaries.is_pending() {
+                if i == 0 {
+                    // If we have not herd back about the name as requested
+                    // then don't ask about other spellings yet.
+                    // This prevents us spamming all the variations in the
+                    // case where we have the correct spelling.
+                    return Poll::Pending;
+                }
+                any_pending = true;
+            }
+            if let Poll::Ready(Some(summaries)) = summaries {
+                self.summaries_cache.insert(name, summaries);
+                return Poll::Ready(Ok(self.summaries_cache.get_mut(&name).unwrap()));
+            }
+        }
+
+        if any_pending {
+            return Poll::Pending;
+        }
+
+        // If nothing was found then this crate doesn't exists, so just use an
+        // empty `Summaries` list.
+        self.summaries_cache.insert(name, Summaries::default());
+        Poll::Ready(Ok(self.summaries_cache.get_mut(&name).unwrap()))
+    }
+
+    /// Clears the in-memory summaries cache.
+    pub fn clear_summaries_cache(&mut self) {
+        self.summaries_cache.clear();
+    }
+
+    pub fn query_inner(
+        &mut self,
+        dep: &Dependency,
+        load: &mut dyn RegistryData,
+        yanked_whitelist: &HashSet<PackageId>,
+        f: &mut dyn FnMut(Summary),
+    ) -> Poll<CargoResult<()>> {
+        if self.config.offline() {
+            // This should only return `Poll::Ready(Ok(()))` if there is at least 1 match.
+            //
+            // If there are 0 matches it should fall through and try again with online.
+            // This is necessary for dependencies that are not used (such as
+            // target-cfg or optional), but are not downloaded. Normally the
+            // build should succeed if they are not downloaded and not used,
+            // but they still need to resolve. If they are actually needed
+            // then cargo will fail to download and an error message
+            // indicating that the required dependency is unavailable while
+            // offline will be displayed.
+            if ready!(self.query_inner_with_online(dep, load, yanked_whitelist, f, false)?) > 0 {
+                return Poll::Ready(Ok(()));
+            }
+        }
+        self.query_inner_with_online(dep, load, yanked_whitelist, f, true)
+            .map_ok(|_| ())
+    }
+
+    fn query_inner_with_online(
+        &mut self,
+        dep: &Dependency,
+        load: &mut dyn RegistryData,
+        yanked_whitelist: &HashSet<PackageId>,
+        f: &mut dyn FnMut(Summary),
+        online: bool,
+    ) -> Poll<CargoResult<usize>> {
+        let source_id = self.source_id;
+
+        let summaries = ready!(self.summaries(dep.package_name(), dep.version_req(), load))?;
+
+        let summaries = summaries
+            // First filter summaries for `--offline`. If we're online then
+            // everything is a candidate, otherwise if we're offline we're only
+            // going to consider candidates which are actually present on disk.
+            //
+            // Note: This particular logic can cause problems with
+            // optional dependencies when offline. If at least 1 version
+            // of an optional dependency is downloaded, but that version
+            // does not satisfy the requirements, then resolution will
+            // fail. Unfortunately, whether or not something is optional
+            // is not known here.
+            .filter(|s| (online || load.is_crate_downloaded(s.summary.package_id())))
+            // Next filter out all yanked packages. Some yanked packages may
+            // leak through if they're in a whitelist (aka if they were
+            // previously in `Cargo.lock`
+            .filter(|s| !s.yanked || yanked_whitelist.contains(&s.summary.package_id()))
+            .map(|s| s.summary.clone());
+
+        // Handle `cargo update --precise` here. If specified, our own source
+        // will have a precise version listed of the form
+        // `<pkg>=<p_req>o-><f_req>` where `<pkg>` is the name of a crate on
+        // this source, `<p_req>` is the version installed and `<f_req> is the
+        // version requested (argument to `--precise`).
+        let name = dep.package_name().as_str();
+        let precise = match source_id.precise() {
+            Some(p) if p.starts_with(name) && p[name.len()..].starts_with('=') => {
+                let mut vers = p[name.len() + 1..].splitn(2, "->");
+                let current_vers = vers.next().unwrap().to_semver().unwrap();
+                let requested_vers = vers.next().unwrap().to_semver().unwrap();
+                Some((current_vers, requested_vers))
+            }
+            _ => None,
+        };
+        let summaries = summaries.filter(|s| match &precise {
+            Some((current, requested)) => {
+                if dep.version_req().matches(current) {
+                    // Unfortunately crates.io allows versions to differ only
+                    // by build metadata. This shouldn't be allowed, but since
+                    // it is, this will honor it if requested. However, if not
+                    // specified, then ignore it.
+                    let s_vers = s.version();
+                    match (s_vers.build.is_empty(), requested.build.is_empty()) {
+                        (true, true) => s_vers == requested,
+                        (true, false) => false,
+                        (false, true) => {
+                            // Strip out the metadata.
+                            s_vers.major == requested.major
+                                && s_vers.minor == requested.minor
+                                && s_vers.patch == requested.patch
+                                && s_vers.pre == requested.pre
+                        }
+                        (false, false) => s_vers == requested,
+                    }
+                } else {
+                    true
+                }
+            }
+            None => true,
+        });
+
+        let mut count = 0;
+        for summary in summaries {
+            f(summary);
+            count += 1;
+        }
+        Poll::Ready(Ok(count))
+    }
+
+    pub fn is_yanked(
+        &mut self,
+        pkg: PackageId,
+        load: &mut dyn RegistryData,
+    ) -> Poll<CargoResult<bool>> {
+        let req = OptVersionReq::exact(pkg.version());
+        let found = self
+            .summaries(pkg.name(), &req, load)
+            .map_ok(|mut p| p.any(|summary| summary.yanked));
+        found
+    }
+}
+
+impl Summaries {
+    /// Parse out a `Summaries` instances from on-disk state.
+    ///
+    /// This will attempt to prefer parsing a previous cache file that already
+    /// exists from a previous invocation of Cargo (aka you're typing `cargo
+    /// build` again after typing it previously). If parsing fails or the cache
+    /// isn't found, then we take a slower path which loads the full descriptor
+    /// for `relative` from the underlying index (aka typically libgit2 with
+    /// crates.io) and then parse everything in there.
+    ///
+    /// * `root` - this is the root argument passed to `load`
+    /// * `cache_root` - this is the root on the filesystem itself of where to
+    ///   store cache files.
+    /// * `relative` - this is the file we're loading from cache or the index
+    ///   data
+    /// * `source_id` - the registry's SourceId used when parsing JSON blobs to
+    ///   create summaries.
+    /// * `load` - the actual index implementation which may be very slow to
+    ///   call. We avoid this if we can.
+    pub fn parse(
+        root: &Path,
+        cache_root: &Path,
+        relative: &Path,
+        source_id: SourceId,
+        load: &mut dyn RegistryData,
+        config: &Config,
+    ) -> Poll<CargoResult<Option<Summaries>>> {
+        // First up, attempt to load the cache. This could fail for all manner
+        // of reasons, but consider all of them non-fatal and just log their
+        // occurrence in case anyone is debugging anything.
+        let cache_path = cache_root.join(relative);
+        let mut cached_summaries = None;
+        let mut index_version = None;
+        match fs::read(&cache_path) {
+            Ok(contents) => match Summaries::parse_cache(contents) {
+                Ok((s, v)) => {
+                    cached_summaries = Some(s);
+                    index_version = Some(v);
+                }
+                Err(e) => {
+                    log::debug!("failed to parse {:?} cache: {}", relative, e);
+                }
+            },
+            Err(e) => log::debug!("cache missing for {:?} error: {}", relative, e),
+        }
+
+        let response = ready!(load.load(root, relative, index_version.as_deref())?);
+
+        match response {
+            LoadResponse::CacheValid => {
+                log::debug!("fast path for registry cache of {:?}", relative);
+                return Poll::Ready(Ok(cached_summaries));
+            }
+            LoadResponse::NotFound => {
+                if let Err(e) = fs::remove_file(cache_path) {
+                    if e.kind() != ErrorKind::NotFound {
+                        log::debug!("failed to remove from cache: {}", e);
+                    }
+                }
+                return Poll::Ready(Ok(None));
+            }
+            LoadResponse::Data {
+                raw_data,
+                index_version,
+            } => {
+                // This is the fallback path where we actually talk to the registry backend to load
+                // information. Here we parse every single line in the index (as we need
+                // to find the versions)
+                log::debug!("slow path for {:?}", relative);
+                let mut cache = SummariesCache::default();
+                let mut ret = Summaries::default();
+                ret.raw_data = raw_data;
+                for line in split(&ret.raw_data, b'\n') {
+                    // Attempt forwards-compatibility on the index by ignoring
+                    // everything that we ourselves don't understand, that should
+                    // allow future cargo implementations to break the
+                    // interpretation of each line here and older cargo will simply
+                    // ignore the new lines.
+                    let summary = match IndexSummary::parse(config, line, source_id) {
+                        Ok(summary) => summary,
+                        Err(e) => {
+                            // This should only happen when there is an index
+                            // entry from a future version of cargo that this
+                            // version doesn't understand. Hopefully, those future
+                            // versions of cargo correctly set INDEX_V_MAX and
+                            // CURRENT_CACHE_VERSION, otherwise this will skip
+                            // entries in the cache preventing those newer
+                            // versions from reading them (that is, until the
+                            // cache is rebuilt).
+                            log::info!("failed to parse {:?} registry package: {}", relative, e);
+                            continue;
+                        }
+                    };
+                    let version = summary.summary.package_id().version().clone();
+                    cache.versions.push((version.clone(), line));
+                    ret.versions.insert(version, summary.into());
+                }
+                if let Some(index_version) = index_version {
+                    log::trace!("caching index_version {}", index_version);
+                    let cache_bytes = cache.serialize(index_version.as_str());
+                    // Once we have our `cache_bytes` which represents the `Summaries` we're
+                    // about to return, write that back out to disk so future Cargo
+                    // invocations can use it.
+                    //
+                    // This is opportunistic so we ignore failure here but are sure to log
+                    // something in case of error.
+                    if paths::create_dir_all(cache_path.parent().unwrap()).is_ok() {
+                        let path = Filesystem::new(cache_path.clone());
+                        config.assert_package_cache_locked(&path);
+                        if let Err(e) = fs::write(cache_path, &cache_bytes) {
+                            log::info!("failed to write cache: {}", e);
+                        }
+                    }
+
+                    // If we've got debug assertions enabled read back in the cached values
+                    // and assert they match the expected result.
+                    #[cfg(debug_assertions)]
+                    {
+                        let readback = SummariesCache::parse(&cache_bytes)
+                            .expect("failed to parse cache we just wrote");
+                        assert_eq!(
+                            readback.index_version, index_version,
+                            "index_version mismatch"
+                        );
+                        assert_eq!(readback.versions, cache.versions, "versions mismatch");
+                    }
+                }
+                Poll::Ready(Ok(Some(ret)))
+            }
+        }
+    }
+
+    /// Parses an open `File` which represents information previously cached by
+    /// Cargo.
+    pub fn parse_cache(contents: Vec<u8>) -> CargoResult<(Summaries, InternedString)> {
+        let cache = SummariesCache::parse(&contents)?;
+        let index_version = InternedString::new(cache.index_version);
+        let mut ret = Summaries::default();
+        for (version, summary) in cache.versions {
+            let (start, end) = subslice_bounds(&contents, summary);
+            ret.versions
+                .insert(version, MaybeIndexSummary::Unparsed { start, end });
+        }
+        ret.raw_data = contents;
+        return Ok((ret, index_version));
+
+        // Returns the start/end offsets of `inner` with `outer`. Asserts that
+        // `inner` is a subslice of `outer`.
+        fn subslice_bounds(outer: &[u8], inner: &[u8]) -> (usize, usize) {
+            let outer_start = outer.as_ptr() as usize;
+            let outer_end = outer_start + outer.len();
+            let inner_start = inner.as_ptr() as usize;
+            let inner_end = inner_start + inner.len();
+            assert!(inner_start >= outer_start);
+            assert!(inner_end <= outer_end);
+            (inner_start - outer_start, inner_end - outer_start)
+        }
+    }
+}
+
+// Implementation of serializing/deserializing the cache of summaries on disk.
+// Currently the format looks like:
+//
+// +--------------------+----------------------+-------------+---+
+// | cache version byte | index format version | git sha rev | 0 |
+// +--------------------+----------------------+-------------+---+
+//
+// followed by...
+//
+// +----------------+---+------------+---+
+// | semver version | 0 |  JSON blob | 0 | ...
+// +----------------+---+------------+---+
+//
+// The idea is that this is a very easy file for Cargo to parse in future
+// invocations. The read from disk should be quite fast and then afterwards all
+// we need to know is what versions correspond to which JSON blob.
+//
+// The leading version byte is intended to ensure that there's some level of
+// future compatibility against changes to this cache format so if different
+// versions of Cargo share the same cache they don't get too confused. The git
+// sha lets us know when the file needs to be regenerated (it needs regeneration
+// whenever the index itself updates).
+//
+// Cache versions:
+// * `1`: The original version.
+// * `2`: Added the "index format version" field so that if the index format
+//   changes, different versions of cargo won't get confused reading each
+//   other's caches.
+// * `3`: Bumped the version to work around an issue where multiple versions of
+//   a package were published that differ only by semver metadata. For
+//   example, openssl-src 110.0.0 and 110.0.0+1.1.0f. Previously, the cache
+//   would be incorrectly populated with two entries, both 110.0.0. After
+//   this, the metadata will be correctly included. This isn't really a format
+//   change, just a version bump to clear the incorrect cache entries. Note:
+//   the index shouldn't allow these, but unfortunately crates.io doesn't
+//   check it.
+
+const CURRENT_CACHE_VERSION: u8 = 3;
+
+impl<'a> SummariesCache<'a> {
+    fn parse(data: &'a [u8]) -> CargoResult<SummariesCache<'a>> {
+        // NB: keep this method in sync with `serialize` below
+        let (first_byte, rest) = data
+            .split_first()
+            .ok_or_else(|| anyhow::format_err!("malformed cache"))?;
+        if *first_byte != CURRENT_CACHE_VERSION {
+            bail!("looks like a different Cargo's cache, bailing out");
+        }
+        let index_v_bytes = rest
+            .get(..4)
+            .ok_or_else(|| anyhow::anyhow!("cache expected 4 bytes for index version"))?;
+        let index_v = u32::from_le_bytes(index_v_bytes.try_into().unwrap());
+        if index_v != INDEX_V_MAX {
+            bail!(
+                "index format version {} doesn't match the version I know ({})",
+                index_v,
+                INDEX_V_MAX
+            );
+        }
+        let rest = &rest[4..];
+
+        let mut iter = split(rest, 0);
+        let last_index_update = if let Some(update) = iter.next() {
+            str::from_utf8(update)?
+        } else {
+            bail!("malformed file");
+        };
+        let mut ret = SummariesCache::default();
+        ret.index_version = last_index_update;
+        while let Some(version) = iter.next() {
+            let version = str::from_utf8(version)?;
+            let version = Version::parse(version)?;
+            let summary = iter.next().unwrap();
+            ret.versions.push((version, summary));
+        }
+        Ok(ret)
+    }
+
+    fn serialize(&self, index_version: &str) -> Vec<u8> {
+        // NB: keep this method in sync with `parse` above
+        let size = self
+            .versions
+            .iter()
+            .map(|(_version, data)| (10 + data.len()))
+            .sum();
+        let mut contents = Vec::with_capacity(size);
+        contents.push(CURRENT_CACHE_VERSION);
+        contents.extend(&u32::to_le_bytes(INDEX_V_MAX));
+        contents.extend_from_slice(index_version.as_bytes());
+        contents.push(0);
+        for (version, data) in self.versions.iter() {
+            contents.extend_from_slice(version.to_string().as_bytes());
+            contents.push(0);
+            contents.extend_from_slice(data);
+            contents.push(0);
+        }
+        contents
+    }
+}
+
+impl MaybeIndexSummary {
+    /// Parses this "maybe a summary" into a `Parsed` for sure variant.
+    ///
+    /// Does nothing if this is already `Parsed`, and otherwise the `raw_data`
+    /// passed in is sliced with the bounds in `Unparsed` and then actually
+    /// parsed.
+    fn parse(
+        &mut self,
+        config: &Config,
+        raw_data: &[u8],
+        source_id: SourceId,
+    ) -> CargoResult<&IndexSummary> {
+        let (start, end) = match self {
+            MaybeIndexSummary::Unparsed { start, end } => (*start, *end),
+            MaybeIndexSummary::Parsed(summary) => return Ok(summary),
+        };
+        let summary = IndexSummary::parse(config, &raw_data[start..end], source_id)?;
+        *self = MaybeIndexSummary::Parsed(summary);
+        match self {
+            MaybeIndexSummary::Unparsed { .. } => unreachable!(),
+            MaybeIndexSummary::Parsed(summary) => Ok(summary),
+        }
+    }
+}
+
+impl From<IndexSummary> for MaybeIndexSummary {
+    fn from(summary: IndexSummary) -> MaybeIndexSummary {
+        MaybeIndexSummary::Parsed(summary)
+    }
+}
+
+impl IndexSummary {
+    /// Parses a line from the registry's index file into an `IndexSummary` for
+    /// a package.
+    ///
+    /// The `line` provided is expected to be valid JSON.
+    fn parse(config: &Config, line: &[u8], source_id: SourceId) -> CargoResult<IndexSummary> {
+        // ****CAUTION**** Please be extremely careful with returning errors
+        // from this function. Entries that error are not included in the
+        // index cache, and can cause cargo to get confused when switching
+        // between different versions that understand the index differently.
+        // Make sure to consider the INDEX_V_MAX and CURRENT_CACHE_VERSION
+        // values carefully when making changes here.
+        let RegistryPackage {
+            name,
+            vers,
+            cksum,
+            deps,
+            mut features,
+            features2,
+            yanked,
+            links,
+            v,
+        } = serde_json::from_slice(line)?;
+        let v = v.unwrap_or(1);
+        log::trace!("json parsed registry {}/{}", name, vers);
+        let pkgid = PackageId::new(name, &vers, source_id)?;
+        let deps = deps
+            .into_iter()
+            .map(|dep| dep.into_dep(source_id))
+            .collect::<CargoResult<Vec<_>>>()?;
+        if let Some(features2) = features2 {
+            for (name, values) in features2 {
+                features.entry(name).or_default().extend(values);
+            }
+        }
+        let mut summary = Summary::new(config, pkgid, deps, &features, links)?;
+        summary.set_checksum(cksum);
+        Ok(IndexSummary {
+            summary,
+            yanked: yanked.unwrap_or(false),
+            v,
+        })
+    }
+}
+
+fn split(haystack: &[u8], needle: u8) -> impl Iterator<Item = &[u8]> {
+    struct Split<'a> {
+        haystack: &'a [u8],
+        needle: u8,
+    }
+
+    impl<'a> Iterator for Split<'a> {
+        type Item = &'a [u8];
+
+        fn next(&mut self) -> Option<&'a [u8]> {
+            if self.haystack.is_empty() {
+                return None;
+            }
+            let (ret, remaining) = match memchr::memchr(self.needle, self.haystack) {
+                Some(pos) => (&self.haystack[..pos], &self.haystack[pos + 1..]),
+                None => (self.haystack, &[][..]),
+            };
+            self.haystack = remaining;
+            Some(ret)
+        }
+    }
+
+    Split { haystack, needle }
+}
diff --git a/src/cargo/sources/registry/local.rs b/src/cargo/sources/registry/local.rs
new file mode 100644
index 0000000..a4b57a9
--- /dev/null
+++ b/src/cargo/sources/registry/local.rs
@@ -0,0 +1,149 @@
+use crate::core::PackageId;
+use crate::sources::registry::{LoadResponse, MaybeLock, RegistryConfig, RegistryData};
+use crate::util::errors::CargoResult;
+use crate::util::{Config, Filesystem};
+use cargo_util::{paths, Sha256};
+use std::fs::File;
+use std::io::SeekFrom;
+use std::io::{self, prelude::*};
+use std::path::Path;
+use std::task::Poll;
+
+/// A local registry is a registry that lives on the filesystem as a set of
+/// `.crate` files with an `index` directory in the same format as a remote
+/// registry.
+pub struct LocalRegistry<'cfg> {
+    index_path: Filesystem,
+    root: Filesystem,
+    src_path: Filesystem,
+    config: &'cfg Config,
+    updated: bool,
+}
+
+impl<'cfg> LocalRegistry<'cfg> {
+    pub fn new(root: &Path, config: &'cfg Config, name: &str) -> LocalRegistry<'cfg> {
+        LocalRegistry {
+            src_path: config.registry_source_path().join(name),
+            index_path: Filesystem::new(root.join("index")),
+            root: Filesystem::new(root.to_path_buf()),
+            config,
+            updated: false,
+        }
+    }
+}
+
+impl<'cfg> RegistryData for LocalRegistry<'cfg> {
+    fn prepare(&self) -> CargoResult<()> {
+        Ok(())
+    }
+
+    fn index_path(&self) -> &Filesystem {
+        &self.index_path
+    }
+
+    fn assert_index_locked<'a>(&self, path: &'a Filesystem) -> &'a Path {
+        // Note that the `*_unlocked` variant is used here since we're not
+        // modifying the index and it's required to be externally synchronized.
+        path.as_path_unlocked()
+    }
+
+    fn load(
+        &mut self,
+        root: &Path,
+        path: &Path,
+        _index_version: Option<&str>,
+    ) -> Poll<CargoResult<LoadResponse>> {
+        if self.updated {
+            let raw_data = match paths::read_bytes(&root.join(path)) {
+                Err(e)
+                    if e.downcast_ref::<io::Error>()
+                        .map_or(false, |ioe| ioe.kind() == io::ErrorKind::NotFound) =>
+                {
+                    return Poll::Ready(Ok(LoadResponse::NotFound));
+                }
+                r => r,
+            }?;
+            Poll::Ready(Ok(LoadResponse::Data {
+                raw_data,
+                index_version: None,
+            }))
+        } else {
+            Poll::Pending
+        }
+    }
+
+    fn config(&mut self) -> Poll<CargoResult<Option<RegistryConfig>>> {
+        // Local registries don't have configuration for remote APIs or anything
+        // like that
+        Poll::Ready(Ok(None))
+    }
+
+    fn block_until_ready(&mut self) -> CargoResult<()> {
+        if self.updated {
+            return Ok(());
+        }
+        // Nothing to update, we just use what's on disk. Verify it actually
+        // exists though. We don't use any locks as we're just checking whether
+        // these directories exist.
+        let root = self.root.clone().into_path_unlocked();
+        if !root.is_dir() {
+            anyhow::bail!("local registry path is not a directory: {}", root.display());
+        }
+        let index_path = self.index_path.clone().into_path_unlocked();
+        if !index_path.is_dir() {
+            anyhow::bail!(
+                "local registry index path is not a directory: {}",
+                index_path.display()
+            );
+        }
+        self.updated = true;
+        Ok(())
+    }
+
+    fn invalidate_cache(&mut self) {
+        // Local registry has no cache - just reads from disk.
+    }
+
+    fn is_updated(&self) -> bool {
+        self.updated
+    }
+
+    fn download(&mut self, pkg: PackageId, checksum: &str) -> CargoResult<MaybeLock> {
+        let crate_file = format!("{}-{}.crate", pkg.name(), pkg.version());
+
+        // Note that the usage of `into_path_unlocked` here is because the local
+        // crate files here never change in that we're not the one writing them,
+        // so it's not our responsibility to synchronize access to them.
+        let path = self.root.join(&crate_file).into_path_unlocked();
+        let mut crate_file = paths::open(&path)?;
+
+        // If we've already got an unpacked version of this crate, then skip the
+        // checksum below as it is in theory already verified.
+        let dst = format!("{}-{}", pkg.name(), pkg.version());
+        if self.src_path.join(dst).into_path_unlocked().exists() {
+            return Ok(MaybeLock::Ready(crate_file));
+        }
+
+        self.config.shell().status("Unpacking", pkg)?;
+
+        // We don't actually need to download anything per-se, we just need to
+        // verify the checksum matches the .crate file itself.
+        let actual = Sha256::new().update_file(&crate_file)?.finish_hex();
+        if actual != checksum {
+            anyhow::bail!("failed to verify the checksum of `{}`", pkg)
+        }
+
+        crate_file.seek(SeekFrom::Start(0))?;
+
+        Ok(MaybeLock::Ready(crate_file))
+    }
+
+    fn finish_download(
+        &mut self,
+        _pkg: PackageId,
+        _checksum: &str,
+        _data: &[u8],
+    ) -> CargoResult<File> {
+        panic!("this source doesn't download")
+    }
+}
diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs
new file mode 100644
index 0000000..930a42f
--- /dev/null
+++ b/src/cargo/sources/registry/mod.rs
@@ -0,0 +1,970 @@
+//! A `Source` for registry-based packages.
+//!
+//! # What's a Registry?
+//!
+//! Registries are central locations where packages can be uploaded to,
+//! discovered, and searched for. The purpose of a registry is to have a
+//! location that serves as permanent storage for versions of a crate over time.
+//!
+//! Compared to git sources, a registry provides many packages as well as many
+//! versions simultaneously. Git sources can also have commits deleted through
+//! rebasings where registries cannot have their versions deleted.
+//!
+//! # The Index of a Registry
+//!
+//! One of the major difficulties with a registry is that hosting so many
+//! packages may quickly run into performance problems when dealing with
+//! dependency graphs. It's infeasible for cargo to download the entire contents
+//! of the registry just to resolve one package's dependencies, for example. As
+//! a result, cargo needs some efficient method of querying what packages are
+//! available on a registry, what versions are available, and what the
+//! dependencies for each version is.
+//!
+//! One method of doing so would be having the registry expose an HTTP endpoint
+//! which can be queried with a list of packages and a response of their
+//! dependencies and versions is returned. This is somewhat inefficient however
+//! as we may have to hit the endpoint many times and we may have already
+//! queried for much of the data locally already (for other packages, for
+//! example). This also involves inventing a transport format between the
+//! registry and Cargo itself, so this route was not taken.
+//!
+//! Instead, Cargo communicates with registries through a git repository
+//! referred to as the Index. The Index of a registry is essentially an easily
+//! query-able version of the registry's database for a list of versions of a
+//! package as well as a list of dependencies for each version.
+//!
+//! Using git to host this index provides a number of benefits:
+//!
+//! * The entire index can be stored efficiently locally on disk. This means
+//!   that all queries of a registry can happen locally and don't need to touch
+//!   the network.
+//!
+//! * Updates of the index are quite efficient. Using git buys incremental
+//!   updates, compressed transmission, etc for free. The index must be updated
+//!   each time we need fresh information from a registry, but this is one
+//!   update of a git repository that probably hasn't changed a whole lot so
+//!   it shouldn't be too expensive.
+//!
+//!   Additionally, each modification to the index is just appending a line at
+//!   the end of a file (the exact format is described later). This means that
+//!   the commits for an index are quite small and easily applied/compressible.
+//!
+//! ## The format of the Index
+//!
+//! The index is a store for the list of versions for all packages known, so its
+//! format on disk is optimized slightly to ensure that `ls registry` doesn't
+//! produce a list of all packages ever known. The index also wants to ensure
+//! that there's not a million files which may actually end up hitting
+//! filesystem limits at some point. To this end, a few decisions were made
+//! about the format of the registry:
+//!
+//! 1. Each crate will have one file corresponding to it. Each version for a
+//!    crate will just be a line in this file.
+//! 2. There will be two tiers of directories for crate names, under which
+//!    crates corresponding to those tiers will be located.
+//!
+//! As an example, this is an example hierarchy of an index:
+//!
+//! ```notrust
+//! .
+//! ├── 3
+//! │   └── u
+//! │       └── url
+//! ├── bz
+//! │   └── ip
+//! │       └── bzip2
+//! ├── config.json
+//! ├── en
+//! │   └── co
+//! │       └── encoding
+//! └── li
+//!     ├── bg
+//!     │   └── libgit2
+//!     └── nk
+//!         └── link-config
+//! ```
+//!
+//! The root of the index contains a `config.json` file with a few entries
+//! corresponding to the registry (see [`RegistryConfig`] below).
+//!
+//! Otherwise, there are three numbered directories (1, 2, 3) for crates with
+//! names 1, 2, and 3 characters in length. The 1/2 directories simply have the
+//! crate files underneath them, while the 3 directory is sharded by the first
+//! letter of the crate name.
+//!
+//! Otherwise the top-level directory contains many two-letter directory names,
+//! each of which has many sub-folders with two letters. At the end of all these
+//! are the actual crate files themselves.
+//!
+//! The purpose of this layout is to hopefully cut down on `ls` sizes as well as
+//! efficient lookup based on the crate name itself.
+//!
+//! ## Crate files
+//!
+//! Each file in the index is the history of one crate over time. Each line in
+//! the file corresponds to one version of a crate, stored in JSON format (see
+//! the `RegistryPackage` structure below).
+//!
+//! As new versions are published, new lines are appended to this file. The only
+//! modifications to this file that should happen over time are yanks of a
+//! particular version.
+//!
+//! # Downloading Packages
+//!
+//! The purpose of the Index was to provide an efficient method to resolve the
+//! dependency graph for a package. So far we only required one network
+//! interaction to update the registry's repository (yay!). After resolution has
+//! been performed, however we need to download the contents of packages so we
+//! can read the full manifest and build the source code.
+//!
+//! To accomplish this, this source's `download` method will make an HTTP
+//! request per-package requested to download tarballs into a local cache. These
+//! tarballs will then be unpacked into a destination folder.
+//!
+//! Note that because versions uploaded to the registry are frozen forever that
+//! the HTTP download and unpacking can all be skipped if the version has
+//! already been downloaded and unpacked. This caching allows us to only
+//! download a package when absolutely necessary.
+//!
+//! # Filesystem Hierarchy
+//!
+//! Overall, the `$HOME/.cargo` looks like this when talking about the registry:
+//!
+//! ```notrust
+//! # A folder under which all registry metadata is hosted (similar to
+//! # $HOME/.cargo/git)
+//! $HOME/.cargo/registry/
+//!
+//!     # For each registry that cargo knows about (keyed by hostname + hash)
+//!     # there is a folder which is the checked out version of the index for
+//!     # the registry in this location. Note that this is done so cargo can
+//!     # support multiple registries simultaneously
+//!     index/
+//!         registry1-<hash>/
+//!         registry2-<hash>/
+//!         ...
+//!
+//!     # This folder is a cache for all downloaded tarballs from a registry.
+//!     # Once downloaded and verified, a tarball never changes.
+//!     cache/
+//!         registry1-<hash>/<pkg>-<version>.crate
+//!         ...
+//!
+//!     # Location in which all tarballs are unpacked. Each tarball is known to
+//!     # be frozen after downloading, so transitively this folder is also
+//!     # frozen once its unpacked (it's never unpacked again)
+//!     src/
+//!         registry1-<hash>/<pkg>-<version>/...
+//!         ...
+//! ```
+
+use std::borrow::Cow;
+use std::collections::BTreeMap;
+use std::collections::HashSet;
+use std::fs::{File, OpenOptions};
+use std::io::{self, Write};
+use std::path::{Path, PathBuf};
+use std::task::Poll;
+
+use anyhow::Context as _;
+use cargo_util::paths::{self, exclude_from_backups_and_indexing};
+use flate2::read::GzDecoder;
+use log::debug;
+use semver::Version;
+use serde::Deserialize;
+use tar::Archive;
+
+use crate::core::dependency::{DepKind, Dependency};
+use crate::core::source::MaybePackage;
+use crate::core::{Package, PackageId, QueryKind, Source, SourceId, Summary};
+use crate::sources::PathSource;
+use crate::util::hex;
+use crate::util::interning::InternedString;
+use crate::util::into_url::IntoUrl;
+use crate::util::network::PollExt;
+use crate::util::{
+    restricted_names, CargoResult, Config, Filesystem, LimitErrorReader, OptVersionReq,
+};
+
+const PACKAGE_SOURCE_LOCK: &str = ".cargo-ok";
+pub const CRATES_IO_INDEX: &str = "https://github.com/rust-lang/crates.io-index";
+pub const CRATES_IO_HTTP_INDEX: &str = "sparse+https://index.crates.io/";
+pub const CRATES_IO_REGISTRY: &str = "crates-io";
+pub const CRATES_IO_DOMAIN: &str = "crates.io";
+const CRATE_TEMPLATE: &str = "{crate}";
+const VERSION_TEMPLATE: &str = "{version}";
+const PREFIX_TEMPLATE: &str = "{prefix}";
+const LOWER_PREFIX_TEMPLATE: &str = "{lowerprefix}";
+const CHECKSUM_TEMPLATE: &str = "{sha256-checksum}";
+const MAX_UNPACK_SIZE: u64 = 512 * 1024 * 1024;
+const MAX_COMPRESSION_RATIO: usize = 20; // 20:1
+
+/// A "source" for a local (see `local::LocalRegistry`) or remote (see
+/// `remote::RemoteRegistry`) registry.
+///
+/// This contains common functionality that is shared between the two registry
+/// kinds, with the registry-specific logic implemented as part of the
+/// [`RegistryData`] trait referenced via the `ops` field.
+pub struct RegistrySource<'cfg> {
+    source_id: SourceId,
+    /// The path where crate files are extracted (`$CARGO_HOME/registry/src/$REG-HASH`).
+    src_path: Filesystem,
+    /// Local reference to [`Config`] for convenience.
+    config: &'cfg Config,
+    /// Abstraction for interfacing to the different registry kinds.
+    ops: Box<dyn RegistryData + 'cfg>,
+    /// Interface for managing the on-disk index.
+    index: index::RegistryIndex<'cfg>,
+    /// A set of packages that should be allowed to be used, even if they are
+    /// yanked.
+    ///
+    /// This is populated from the entries in `Cargo.lock` to ensure that
+    /// `cargo update -p somepkg` won't unlock yanked entries in `Cargo.lock`.
+    /// Otherwise, the resolver would think that those entries no longer
+    /// exist, and it would trigger updates to unrelated packages.
+    yanked_whitelist: HashSet<PackageId>,
+}
+
+/// The `config.json` file stored in the index.
+#[derive(Deserialize, Debug, Clone)]
+#[serde(rename_all = "kebab-case")]
+pub struct RegistryConfig {
+    /// Download endpoint for all crates.
+    ///
+    /// The string is a template which will generate the download URL for the
+    /// tarball of a specific version of a crate. The substrings `{crate}` and
+    /// `{version}` will be replaced with the crate's name and version
+    /// respectively.  The substring `{prefix}` will be replaced with the
+    /// crate's prefix directory name, and the substring `{lowerprefix}` will
+    /// be replaced with the crate's prefix directory name converted to
+    /// lowercase. The substring `{sha256-checksum}` will be replaced with the
+    /// crate's sha256 checksum.
+    ///
+    /// For backwards compatibility, if the string does not contain any
+    /// markers (`{crate}`, `{version}`, `{prefix}`, or ``{lowerprefix}`), it
+    /// will be extended with `/{crate}/{version}/download` to
+    /// support registries like crates.io which were created before the
+    /// templating setup was created.
+    pub dl: String,
+
+    /// API endpoint for the registry. This is what's actually hit to perform
+    /// operations like yanks, owner modifications, publish new crates, etc.
+    /// If this is None, the registry does not support API commands.
+    pub api: Option<String>,
+
+    /// Whether all operations require authentication.
+    #[serde(default)]
+    pub auth_required: bool,
+}
+
+/// The maximum version of the `v` field in the index this version of cargo
+/// understands.
+pub(crate) const INDEX_V_MAX: u32 = 2;
+
+/// A single line in the index representing a single version of a package.
+#[derive(Deserialize)]
+pub struct RegistryPackage<'a> {
+    name: InternedString,
+    vers: Version,
+    #[serde(borrow)]
+    deps: Vec<RegistryDependency<'a>>,
+    features: BTreeMap<InternedString, Vec<InternedString>>,
+    /// This field contains features with new, extended syntax. Specifically,
+    /// namespaced features (`dep:`) and weak dependencies (`pkg?/feat`).
+    ///
+    /// This is separated from `features` because versions older than 1.19
+    /// will fail to load due to not being able to parse the new syntax, even
+    /// with a `Cargo.lock` file.
+    features2: Option<BTreeMap<InternedString, Vec<InternedString>>>,
+    cksum: String,
+    /// If `true`, Cargo will skip this version when resolving.
+    ///
+    /// This was added in 2014. Everything in the crates.io index has this set
+    /// now, so this probably doesn't need to be an option anymore.
+    yanked: Option<bool>,
+    /// Native library name this package links to.
+    ///
+    /// Added early 2018 (see <https://github.com/rust-lang/cargo/pull/4978>),
+    /// can be `None` if published before then.
+    links: Option<InternedString>,
+    /// The schema version for this entry.
+    ///
+    /// If this is None, it defaults to version 1. Entries with unknown
+    /// versions are ignored.
+    ///
+    /// Version `2` format adds the `features2` field.
+    ///
+    /// This provides a method to safely introduce changes to index entries
+    /// and allow older versions of cargo to ignore newer entries it doesn't
+    /// understand. This is honored as of 1.51, so unfortunately older
+    /// versions will ignore it, and potentially misinterpret version 2 and
+    /// newer entries.
+    ///
+    /// The intent is that versions older than 1.51 will work with a
+    /// pre-existing `Cargo.lock`, but they may not correctly process `cargo
+    /// update` or build a lock from scratch. In that case, cargo may
+    /// incorrectly select a new package that uses a new index format. A
+    /// workaround is to downgrade any packages that are incompatible with the
+    /// `--precise` flag of `cargo update`.
+    v: Option<u32>,
+}
+
+#[test]
+fn escaped_char_in_json() {
+    let _: RegistryPackage<'_> = serde_json::from_str(
+        r#"{"name":"a","vers":"0.0.1","deps":[],"cksum":"bae3","features":{}}"#,
+    )
+    .unwrap();
+    let _: RegistryPackage<'_> = serde_json::from_str(
+        r#"{"name":"a","vers":"0.0.1","deps":[],"cksum":"bae3","features":{"test":["k","q"]},"links":"a-sys"}"#
+    ).unwrap();
+
+    // Now we add escaped cher all the places they can go
+    // these are not valid, but it should error later than json parsing
+    let _: RegistryPackage<'_> = serde_json::from_str(
+        r#"{
+        "name":"This name has a escaped cher in it \n\t\" ",
+        "vers":"0.0.1",
+        "deps":[{
+            "name": " \n\t\" ",
+            "req": " \n\t\" ",
+            "features": [" \n\t\" "],
+            "optional": true,
+            "default_features": true,
+            "target": " \n\t\" ",
+            "kind": " \n\t\" ",
+            "registry": " \n\t\" "
+        }],
+        "cksum":"bae3",
+        "features":{"test \n\t\" ":["k \n\t\" ","q \n\t\" "]},
+        "links":" \n\t\" "}"#,
+    )
+    .unwrap();
+}
+
+/// A dependency as encoded in the index JSON.
+#[derive(Deserialize)]
+struct RegistryDependency<'a> {
+    name: InternedString,
+    #[serde(borrow)]
+    req: Cow<'a, str>,
+    features: Vec<InternedString>,
+    optional: bool,
+    default_features: bool,
+    target: Option<Cow<'a, str>>,
+    kind: Option<Cow<'a, str>>,
+    registry: Option<Cow<'a, str>>,
+    package: Option<InternedString>,
+    public: Option<bool>,
+}
+
+impl<'a> RegistryDependency<'a> {
+    /// Converts an encoded dependency in the registry to a cargo dependency
+    pub fn into_dep(self, default: SourceId) -> CargoResult<Dependency> {
+        let RegistryDependency {
+            name,
+            req,
+            mut features,
+            optional,
+            default_features,
+            target,
+            kind,
+            registry,
+            package,
+            public,
+        } = self;
+
+        let id = if let Some(registry) = &registry {
+            SourceId::for_registry(&registry.into_url()?)?
+        } else {
+            default
+        };
+
+        let mut dep = Dependency::parse(package.unwrap_or(name), Some(&req), id)?;
+        if package.is_some() {
+            dep.set_explicit_name_in_toml(name);
+        }
+        let kind = match kind.as_deref().unwrap_or("") {
+            "dev" => DepKind::Development,
+            "build" => DepKind::Build,
+            _ => DepKind::Normal,
+        };
+
+        let platform = match target {
+            Some(target) => Some(target.parse()?),
+            None => None,
+        };
+
+        // All dependencies are private by default
+        let public = public.unwrap_or(false);
+
+        // Unfortunately older versions of cargo and/or the registry ended up
+        // publishing lots of entries where the features array contained the
+        // empty feature, "", inside. This confuses the resolution process much
+        // later on and these features aren't actually valid, so filter them all
+        // out here.
+        features.retain(|s| !s.is_empty());
+
+        // In index, "registry" is null if it is from the same index.
+        // In Cargo.toml, "registry" is None if it is from the default
+        if !id.is_crates_io() {
+            dep.set_registry_id(id);
+        }
+
+        dep.set_optional(optional)
+            .set_default_features(default_features)
+            .set_features(features)
+            .set_platform(platform)
+            .set_kind(kind)
+            .set_public(public);
+
+        Ok(dep)
+    }
+}
+
+/// Result from loading data from a registry.
+pub enum LoadResponse {
+    /// The cache is valid. The cached data should be used.
+    CacheValid,
+
+    /// The cache is out of date. Returned data should be used.
+    Data {
+        raw_data: Vec<u8>,
+        index_version: Option<String>,
+    },
+
+    /// The requested crate was found.
+    NotFound,
+}
+
+/// An abstract interface to handle both a local (see `local::LocalRegistry`)
+/// and remote (see `remote::RemoteRegistry`) registry.
+///
+/// This allows [`RegistrySource`] to abstractly handle both registry kinds.
+pub trait RegistryData {
+    /// Performs initialization for the registry.
+    ///
+    /// This should be safe to call multiple times, the implementation is
+    /// expected to not do any work if it is already prepared.
+    fn prepare(&self) -> CargoResult<()>;
+
+    /// Returns the path to the index.
+    ///
+    /// Note that different registries store the index in different formats
+    /// (remote=git, local=files).
+    fn index_path(&self) -> &Filesystem;
+
+    /// Loads the JSON for a specific named package from the index.
+    ///
+    /// * `root` is the root path to the index.
+    /// * `path` is the relative path to the package to load (like `ca/rg/cargo`).
+    /// * `index_version` is the version of the requested crate data currently in cache.
+    fn load(
+        &mut self,
+        root: &Path,
+        path: &Path,
+        index_version: Option<&str>,
+    ) -> Poll<CargoResult<LoadResponse>>;
+
+    /// Loads the `config.json` file and returns it.
+    ///
+    /// Local registries don't have a config, and return `None`.
+    fn config(&mut self) -> Poll<CargoResult<Option<RegistryConfig>>>;
+
+    /// Invalidates locally cached data.
+    fn invalidate_cache(&mut self);
+
+    /// Is the local cached data up-to-date?
+    fn is_updated(&self) -> bool;
+
+    /// Prepare to start downloading a `.crate` file.
+    ///
+    /// Despite the name, this doesn't actually download anything. If the
+    /// `.crate` is already downloaded, then it returns [`MaybeLock::Ready`].
+    /// If it hasn't been downloaded, then it returns [`MaybeLock::Download`]
+    /// which contains the URL to download. The [`crate::core::package::Downloads`]
+    /// system handles the actual download process. After downloading, it
+    /// calls [`Self::finish_download`] to save the downloaded file.
+    ///
+    /// `checksum` is currently only used by local registries to verify the
+    /// file contents (because local registries never actually download
+    /// anything). Remote registries will validate the checksum in
+    /// `finish_download`. For already downloaded `.crate` files, it does not
+    /// validate the checksum, assuming the filesystem does not suffer from
+    /// corruption or manipulation.
+    fn download(&mut self, pkg: PackageId, checksum: &str) -> CargoResult<MaybeLock>;
+
+    /// Finish a download by saving a `.crate` file to disk.
+    ///
+    /// After [`crate::core::package::Downloads`] has finished a download,
+    /// it will call this to save the `.crate` file. This is only relevant
+    /// for remote registries. This should validate the checksum and save
+    /// the given data to the on-disk cache.
+    ///
+    /// Returns a [`File`] handle to the `.crate` file, positioned at the start.
+    fn finish_download(&mut self, pkg: PackageId, checksum: &str, data: &[u8])
+        -> CargoResult<File>;
+
+    /// Returns whether or not the `.crate` file is already downloaded.
+    fn is_crate_downloaded(&self, _pkg: PackageId) -> bool {
+        true
+    }
+
+    /// Validates that the global package cache lock is held.
+    ///
+    /// Given the [`Filesystem`], this will make sure that the package cache
+    /// lock is held. If not, it will panic. See
+    /// [`Config::acquire_package_cache_lock`] for acquiring the global lock.
+    ///
+    /// Returns the [`Path`] to the [`Filesystem`].
+    fn assert_index_locked<'a>(&self, path: &'a Filesystem) -> &'a Path;
+
+    /// Block until all outstanding Poll::Pending requests are Poll::Ready.
+    fn block_until_ready(&mut self) -> CargoResult<()>;
+}
+
+/// The status of [`RegistryData::download`] which indicates if a `.crate`
+/// file has already been downloaded, or if not then the URL to download.
+pub enum MaybeLock {
+    /// The `.crate` file is already downloaded. [`File`] is a handle to the
+    /// opened `.crate` file on the filesystem.
+    Ready(File),
+    /// The `.crate` file is not downloaded, here's the URL to download it from.
+    ///
+    /// `descriptor` is just a text string to display to the user of what is
+    /// being downloaded.
+    Download {
+        url: String,
+        descriptor: String,
+        authorization: Option<String>,
+    },
+}
+
+mod download;
+mod http_remote;
+mod index;
+mod local;
+mod remote;
+
+fn short_name(id: SourceId) -> String {
+    let hash = hex::short_hash(&id);
+    let ident = id.url().host_str().unwrap_or("").to_string();
+    format!("{}-{}", ident, hash)
+}
+
+impl<'cfg> RegistrySource<'cfg> {
+    pub fn remote(
+        source_id: SourceId,
+        yanked_whitelist: &HashSet<PackageId>,
+        config: &'cfg Config,
+    ) -> CargoResult<RegistrySource<'cfg>> {
+        assert!(source_id.is_remote_registry());
+        let name = short_name(source_id);
+        let ops = if source_id.is_sparse() {
+            Box::new(http_remote::HttpRegistry::new(source_id, config, &name)?) as Box<_>
+        } else {
+            Box::new(remote::RemoteRegistry::new(source_id, config, &name)) as Box<_>
+        };
+
+        Ok(RegistrySource::new(
+            source_id,
+            config,
+            &name,
+            ops,
+            yanked_whitelist,
+        ))
+    }
+
+    pub fn local(
+        source_id: SourceId,
+        path: &Path,
+        yanked_whitelist: &HashSet<PackageId>,
+        config: &'cfg Config,
+    ) -> RegistrySource<'cfg> {
+        let name = short_name(source_id);
+        let ops = local::LocalRegistry::new(path, config, &name);
+        RegistrySource::new(source_id, config, &name, Box::new(ops), yanked_whitelist)
+    }
+
+    fn new(
+        source_id: SourceId,
+        config: &'cfg Config,
+        name: &str,
+        ops: Box<dyn RegistryData + 'cfg>,
+        yanked_whitelist: &HashSet<PackageId>,
+    ) -> RegistrySource<'cfg> {
+        RegistrySource {
+            src_path: config.registry_source_path().join(name),
+            config,
+            source_id,
+            index: index::RegistryIndex::new(source_id, ops.index_path(), config),
+            yanked_whitelist: yanked_whitelist.clone(),
+            ops,
+        }
+    }
+
+    /// Decode the configuration stored within the registry.
+    ///
+    /// This requires that the index has been at least checked out.
+    pub fn config(&mut self) -> Poll<CargoResult<Option<RegistryConfig>>> {
+        self.ops.config()
+    }
+
+    /// Unpacks a downloaded package into a location where it's ready to be
+    /// compiled.
+    ///
+    /// No action is taken if the source looks like it's already unpacked.
+    fn unpack_package(&self, pkg: PackageId, tarball: &File) -> CargoResult<PathBuf> {
+        // The `.cargo-ok` file is used to track if the source is already
+        // unpacked.
+        let package_dir = format!("{}-{}", pkg.name(), pkg.version());
+        let dst = self.src_path.join(&package_dir);
+        let path = dst.join(PACKAGE_SOURCE_LOCK);
+        let path = self.config.assert_package_cache_locked(&path);
+        let unpack_dir = path.parent().unwrap();
+        match path.metadata() {
+            Ok(meta) if meta.len() > 0 => return Ok(unpack_dir.to_path_buf()),
+            Ok(_meta) => {
+                // The `.cargo-ok` file is not in a state we expect it to be
+                // (with two bytes containing "ok").
+                //
+                // Cargo has always included a `.cargo-ok` file to detect if
+                // extraction was interrupted, but it was originally empty.
+                //
+                // In 1.34, Cargo was changed to create the `.cargo-ok` file
+                // before it started extraction to implement fine-grained
+                // locking. After it was finished extracting, it wrote two
+                // bytes to indicate it was complete. It would use the length
+                // check to detect if it was possibly interrupted.
+                //
+                // In 1.36, Cargo changed to not use fine-grained locking, and
+                // instead used a global lock. The use of `.cargo-ok` was no
+                // longer needed for locking purposes, but was kept to detect
+                // when extraction was interrupted.
+                //
+                // In 1.49, Cargo changed to not create the `.cargo-ok` file
+                // before it started extraction to deal with `.crate` files
+                // that inexplicably had a `.cargo-ok` file in them.
+                //
+                // In 1.64, Cargo changed to detect `.crate` files with
+                // `.cargo-ok` files in them in response to CVE-2022-36113,
+                // which dealt with malicious `.crate` files making
+                // `.cargo-ok` a symlink causing cargo to write "ok" to any
+                // arbitrary file on the filesystem it has permission to.
+                //
+                // This is all a long-winded way of explaining the
+                // circumstances that might cause a directory to contain a
+                // `.cargo-ok` file that is empty or otherwise corrupted.
+                // Either this was extracted by a version of Rust before 1.34,
+                // in which case everything should be fine. However, an empty
+                // file created by versions 1.36 to 1.49 indicates that the
+                // extraction was interrupted and that we need to start again.
+                //
+                // Another possibility is that the filesystem is simply
+                // corrupted, in which case deleting the directory might be
+                // the safe thing to do. That is probably unlikely, though.
+                //
+                // To be safe, this deletes the directory and starts over
+                // again.
+                log::warn!("unexpected length of {path:?}, clearing cache");
+                paths::remove_dir_all(dst.as_path_unlocked())?;
+            }
+            Err(e) if e.kind() == io::ErrorKind::NotFound => {}
+            Err(e) => anyhow::bail!("failed to access package completion {path:?}: {e}"),
+        }
+        dst.create_dir()?;
+        let mut tar = {
+            let size_limit = max_unpack_size(self.config, tarball.metadata()?.len());
+            let gz = GzDecoder::new(tarball);
+            let gz = LimitErrorReader::new(gz, size_limit);
+            Archive::new(gz)
+        };
+        let prefix = unpack_dir.file_name().unwrap();
+        let parent = unpack_dir.parent().unwrap();
+        for entry in tar.entries()? {
+            let mut entry = entry.with_context(|| "failed to iterate over archive")?;
+            let entry_path = entry
+                .path()
+                .with_context(|| "failed to read entry path")?
+                .into_owned();
+
+            // We're going to unpack this tarball into the global source
+            // directory, but we want to make sure that it doesn't accidentally
+            // (or maliciously) overwrite source code from other crates. Cargo
+            // itself should never generate a tarball that hits this error, and
+            // crates.io should also block uploads with these sorts of tarballs,
+            // but be extra sure by adding a check here as well.
+            if !entry_path.starts_with(prefix) {
+                anyhow::bail!(
+                    "invalid tarball downloaded, contains \
+                     a file at {:?} which isn't under {:?}",
+                    entry_path,
+                    prefix
+                )
+            }
+            // Prevent unpacking the lockfile from the crate itself.
+            if entry_path
+                .file_name()
+                .map_or(false, |p| p == PACKAGE_SOURCE_LOCK)
+            {
+                continue;
+            }
+            // Unpacking failed
+            let mut result = entry.unpack_in(parent).map_err(anyhow::Error::from);
+            if cfg!(windows) && restricted_names::is_windows_reserved_path(&entry_path) {
+                result = result.with_context(|| {
+                    format!(
+                        "`{}` appears to contain a reserved Windows path, \
+                        it cannot be extracted on Windows",
+                        entry_path.display()
+                    )
+                });
+            }
+            result
+                .with_context(|| format!("failed to unpack entry at `{}`", entry_path.display()))?;
+        }
+
+        // Now that we've finished unpacking, create and write to the lock file to indicate that
+        // unpacking was successful.
+        let mut ok = OpenOptions::new()
+            .create_new(true)
+            .read(true)
+            .write(true)
+            .open(&path)
+            .with_context(|| format!("failed to open `{}`", path.display()))?;
+        write!(ok, "ok")?;
+
+        Ok(unpack_dir.to_path_buf())
+    }
+
+    fn get_pkg(&mut self, package: PackageId, path: &File) -> CargoResult<Package> {
+        let path = self
+            .unpack_package(package, path)
+            .with_context(|| format!("failed to unpack package `{}`", package))?;
+        let mut src = PathSource::new(&path, self.source_id, self.config);
+        src.update()?;
+        let mut pkg = match src.download(package)? {
+            MaybePackage::Ready(pkg) => pkg,
+            MaybePackage::Download { .. } => unreachable!(),
+        };
+
+        // After we've loaded the package configure its summary's `checksum`
+        // field with the checksum we know for this `PackageId`.
+        let req = OptVersionReq::exact(package.version());
+        let summary_with_cksum = self
+            .index
+            .summaries(package.name(), &req, &mut *self.ops)?
+            .expect("a downloaded dep now pending!?")
+            .map(|s| s.summary.clone())
+            .next()
+            .expect("summary not found");
+        if let Some(cksum) = summary_with_cksum.checksum() {
+            pkg.manifest_mut()
+                .summary_mut()
+                .set_checksum(cksum.to_string());
+        }
+
+        Ok(pkg)
+    }
+}
+
+impl<'cfg> Source for RegistrySource<'cfg> {
+    fn query(
+        &mut self,
+        dep: &Dependency,
+        kind: QueryKind,
+        f: &mut dyn FnMut(Summary),
+    ) -> Poll<CargoResult<()>> {
+        // If this is a precise dependency, then it came from a lock file and in
+        // theory the registry is known to contain this version. If, however, we
+        // come back with no summaries, then our registry may need to be
+        // updated, so we fall back to performing a lazy update.
+        if kind == QueryKind::Exact && dep.source_id().precise().is_some() && !self.ops.is_updated()
+        {
+            debug!("attempting query without update");
+            let mut called = false;
+            let pend =
+                self.index
+                    .query_inner(dep, &mut *self.ops, &self.yanked_whitelist, &mut |s| {
+                        if dep.matches(&s) {
+                            called = true;
+                            f(s);
+                        }
+                    })?;
+            if pend.is_pending() {
+                return Poll::Pending;
+            }
+            if called {
+                return Poll::Ready(Ok(()));
+            } else {
+                debug!("falling back to an update");
+                self.invalidate_cache();
+                return Poll::Pending;
+            }
+        }
+
+        self.index
+            .query_inner(dep, &mut *self.ops, &self.yanked_whitelist, &mut |s| {
+                let matched = match kind {
+                    QueryKind::Exact => dep.matches(&s),
+                    QueryKind::Fuzzy => true,
+                };
+                if matched {
+                    f(s);
+                }
+            })
+    }
+
+    fn supports_checksums(&self) -> bool {
+        true
+    }
+
+    fn requires_precise(&self) -> bool {
+        false
+    }
+
+    fn source_id(&self) -> SourceId {
+        self.source_id
+    }
+
+    fn invalidate_cache(&mut self) {
+        self.index.clear_summaries_cache();
+        self.ops.invalidate_cache();
+    }
+
+    fn download(&mut self, package: PackageId) -> CargoResult<MaybePackage> {
+        let hash = loop {
+            match self.index.hash(package, &mut *self.ops)? {
+                Poll::Pending => self.block_until_ready()?,
+                Poll::Ready(hash) => break hash,
+            }
+        };
+        match self.ops.download(package, hash)? {
+            MaybeLock::Ready(file) => self.get_pkg(package, &file).map(MaybePackage::Ready),
+            MaybeLock::Download {
+                url,
+                descriptor,
+                authorization,
+            } => Ok(MaybePackage::Download {
+                url,
+                descriptor,
+                authorization,
+            }),
+        }
+    }
+
+    fn finish_download(&mut self, package: PackageId, data: Vec<u8>) -> CargoResult<Package> {
+        let hash = loop {
+            match self.index.hash(package, &mut *self.ops)? {
+                Poll::Pending => self.block_until_ready()?,
+                Poll::Ready(hash) => break hash,
+            }
+        };
+        let file = self.ops.finish_download(package, hash, &data)?;
+        self.get_pkg(package, &file)
+    }
+
+    fn fingerprint(&self, pkg: &Package) -> CargoResult<String> {
+        Ok(pkg.package_id().version().to_string())
+    }
+
+    fn describe(&self) -> String {
+        self.source_id.display_index()
+    }
+
+    fn add_to_yanked_whitelist(&mut self, pkgs: &[PackageId]) {
+        self.yanked_whitelist.extend(pkgs);
+    }
+
+    fn is_yanked(&mut self, pkg: PackageId) -> Poll<CargoResult<bool>> {
+        self.index.is_yanked(pkg, &mut *self.ops)
+    }
+
+    fn block_until_ready(&mut self) -> CargoResult<()> {
+        // Before starting to work on the registry, make sure that
+        // `<cargo_home>/registry` is marked as excluded from indexing and
+        // backups. Older versions of Cargo didn't do this, so we do it here
+        // regardless of whether `<cargo_home>` exists.
+        //
+        // This does not use `create_dir_all_excluded_from_backups_atomic` for
+        // the same reason: we want to exclude it even if the directory already
+        // exists.
+        //
+        // IO errors in creating and marking it are ignored, e.g. in case we're on a
+        // read-only filesystem.
+        let registry_base = self.config.registry_base_path();
+        let _ = registry_base.create_dir();
+        exclude_from_backups_and_indexing(&registry_base.into_path_unlocked());
+
+        self.ops.block_until_ready()
+    }
+}
+
+/// Get the maximum upack size that Cargo permits
+/// based on a given `size of your compressed file.
+///
+/// Returns the larger one between `size * max compression ratio`
+/// and a fixed max unpacked size.
+///
+/// In reality, the compression ratio usually falls in the range of 2:1 to 10:1.
+/// We choose 20:1 to cover almost all possible cases hopefully.
+/// Any ratio higher than this is considered as a zip bomb.
+///
+/// In the future we might want to introduce a configurable size.
+///
+/// Some of the real world data from common compression algorithms:
+///
+/// * <https://www.zlib.net/zlib_tech.html>
+/// * <https://cran.r-project.org/web/packages/brotli/vignettes/brotli-2015-09-22.pdf>
+/// * <https://blog.cloudflare.com/results-experimenting-brotli/>
+/// * <https://tukaani.org/lzma/benchmarks.html>
+fn max_unpack_size(config: &Config, size: u64) -> u64 {
+    const SIZE_VAR: &str = "__CARGO_TEST_MAX_UNPACK_SIZE";
+    const RATIO_VAR: &str = "__CARGO_TEST_MAX_UNPACK_RATIO";
+    let max_unpack_size = if cfg!(debug_assertions) && config.get_env(SIZE_VAR).is_ok() {
+        // For integration test only.
+        config
+            .get_env(SIZE_VAR)
+            .unwrap()
+            .parse()
+            .expect("a max unpack size in bytes")
+    } else {
+        MAX_UNPACK_SIZE
+    };
+    let max_compression_ratio = if cfg!(debug_assertions) && config.get_env(RATIO_VAR).is_ok() {
+        // For integration test only.
+        config
+            .get_env(RATIO_VAR)
+            .unwrap()
+            .parse()
+            .expect("a max compression ratio in bytes")
+    } else {
+        MAX_COMPRESSION_RATIO
+    };
+
+    u64::max(max_unpack_size, size * max_compression_ratio as u64)
+}
+
+fn make_dep_prefix(name: &str) -> String {
+    match name.len() {
+        1 => String::from("1"),
+        2 => String::from("2"),
+        3 => format!("3/{}", &name[..1]),
+        _ => format!("{}/{}", &name[0..2], &name[2..4]),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::make_dep_prefix;
+
+    #[test]
+    fn dep_prefix() {
+        assert_eq!(make_dep_prefix("a"), "1");
+        assert_eq!(make_dep_prefix("ab"), "2");
+        assert_eq!(make_dep_prefix("abc"), "3/a");
+        assert_eq!(make_dep_prefix("Abc"), "3/A");
+        assert_eq!(make_dep_prefix("AbCd"), "Ab/Cd");
+        assert_eq!(make_dep_prefix("aBcDe"), "aB/cD");
+    }
+}
diff --git a/src/cargo/sources/registry/remote.rs b/src/cargo/sources/registry/remote.rs
new file mode 100644
index 0000000..aa0ec90
--- /dev/null
+++ b/src/cargo/sources/registry/remote.rs
@@ -0,0 +1,358 @@
+use crate::core::{GitReference, PackageId, SourceId};
+use crate::sources::git;
+use crate::sources::registry::download;
+use crate::sources::registry::MaybeLock;
+use crate::sources::registry::{LoadResponse, RegistryConfig, RegistryData};
+use crate::util::errors::CargoResult;
+use crate::util::interning::InternedString;
+use crate::util::{Config, Filesystem};
+use anyhow::Context as _;
+use cargo_util::paths;
+use lazycell::LazyCell;
+use log::{debug, trace};
+use std::cell::{Cell, Ref, RefCell};
+use std::fs::File;
+use std::mem;
+use std::path::Path;
+use std::str;
+use std::task::{ready, Poll};
+
+/// A remote registry is a registry that lives at a remote URL (such as
+/// crates.io). The git index is cloned locally, and `.crate` files are
+/// downloaded as needed and cached locally.
+pub struct RemoteRegistry<'cfg> {
+    index_path: Filesystem,
+    /// Path to the cache of `.crate` files (`$CARGO_HOME/registry/path/$REG-HASH`).
+    cache_path: Filesystem,
+    source_id: SourceId,
+    index_git_ref: GitReference,
+    config: &'cfg Config,
+    tree: RefCell<Option<git2::Tree<'static>>>,
+    repo: LazyCell<git2::Repository>,
+    head: Cell<Option<git2::Oid>>,
+    current_sha: Cell<Option<InternedString>>,
+    needs_update: bool, // Does this registry need to be updated?
+}
+
+impl<'cfg> RemoteRegistry<'cfg> {
+    pub fn new(source_id: SourceId, config: &'cfg Config, name: &str) -> RemoteRegistry<'cfg> {
+        RemoteRegistry {
+            index_path: config.registry_index_path().join(name),
+            cache_path: config.registry_cache_path().join(name),
+            source_id,
+            config,
+            // TODO: we should probably make this configurable
+            index_git_ref: GitReference::DefaultBranch,
+            tree: RefCell::new(None),
+            repo: LazyCell::new(),
+            head: Cell::new(None),
+            current_sha: Cell::new(None),
+            needs_update: false,
+        }
+    }
+
+    fn repo(&self) -> CargoResult<&git2::Repository> {
+        self.repo.try_borrow_with(|| {
+            let path = self.config.assert_package_cache_locked(&self.index_path);
+
+            // Fast path without a lock
+            if let Ok(repo) = git2::Repository::open(&path) {
+                trace!("opened a repo without a lock");
+                return Ok(repo);
+            }
+
+            // Ok, now we need to lock and try the whole thing over again.
+            trace!("acquiring registry index lock");
+            match git2::Repository::open(&path) {
+                Ok(repo) => Ok(repo),
+                Err(_) => {
+                    drop(paths::remove_dir_all(&path));
+                    paths::create_dir_all(&path)?;
+
+                    // Note that we'd actually prefer to use a bare repository
+                    // here as we're not actually going to check anything out.
+                    // All versions of Cargo, though, share the same CARGO_HOME,
+                    // so for compatibility with older Cargo which *does* do
+                    // checkouts we make sure to initialize a new full
+                    // repository (not a bare one).
+                    //
+                    // We should change this to `init_bare` whenever we feel
+                    // like enough time has passed or if we change the directory
+                    // that the folder is located in, such as by changing the
+                    // hash at the end of the directory.
+                    //
+                    // Note that in the meantime we also skip `init.templatedir`
+                    // as it can be misconfigured sometimes or otherwise add
+                    // things that we don't want.
+                    let mut opts = git2::RepositoryInitOptions::new();
+                    opts.external_template(false);
+                    Ok(git2::Repository::init_opts(&path, &opts).with_context(|| {
+                        format!("failed to initialize index git repository (in {:?})", path)
+                    })?)
+                }
+            }
+        })
+    }
+
+    fn head(&self) -> CargoResult<git2::Oid> {
+        if self.head.get().is_none() {
+            let repo = self.repo()?;
+            let oid = self.index_git_ref.resolve(repo)?;
+            self.head.set(Some(oid));
+        }
+        Ok(self.head.get().unwrap())
+    }
+
+    fn tree(&self) -> CargoResult<Ref<'_, git2::Tree<'_>>> {
+        {
+            let tree = self.tree.borrow();
+            if tree.is_some() {
+                return Ok(Ref::map(tree, |s| s.as_ref().unwrap()));
+            }
+        }
+        let repo = self.repo()?;
+        let commit = repo.find_commit(self.head()?)?;
+        let tree = commit.tree()?;
+
+        // Unfortunately in libgit2 the tree objects look like they've got a
+        // reference to the repository object which means that a tree cannot
+        // outlive the repository that it came from. Here we want to cache this
+        // tree, though, so to accomplish this we transmute it to a static
+        // lifetime.
+        //
+        // Note that we don't actually hand out the static lifetime, instead we
+        // only return a scoped one from this function. Additionally the repo
+        // we loaded from (above) lives as long as this object
+        // (`RemoteRegistry`) so we then just need to ensure that the tree is
+        // destroyed first in the destructor, hence the destructor on
+        // `RemoteRegistry` below.
+        let tree = unsafe { mem::transmute::<git2::Tree<'_>, git2::Tree<'static>>(tree) };
+        *self.tree.borrow_mut() = Some(tree);
+        Ok(Ref::map(self.tree.borrow(), |s| s.as_ref().unwrap()))
+    }
+
+    fn current_version(&self) -> Option<InternedString> {
+        if let Some(sha) = self.current_sha.get() {
+            return Some(sha);
+        }
+        let sha = InternedString::new(&self.head().ok()?.to_string());
+        self.current_sha.set(Some(sha));
+        Some(sha)
+    }
+
+    fn is_updated(&self) -> bool {
+        self.config.updated_sources().contains(&self.source_id)
+    }
+
+    fn mark_updated(&self) {
+        self.config.updated_sources().insert(self.source_id);
+    }
+}
+
+const LAST_UPDATED_FILE: &str = ".last-updated";
+
+impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
+    fn prepare(&self) -> CargoResult<()> {
+        self.repo()?; // create intermediate dirs and initialize the repo
+        Ok(())
+    }
+
+    fn index_path(&self) -> &Filesystem {
+        &self.index_path
+    }
+
+    fn assert_index_locked<'a>(&self, path: &'a Filesystem) -> &'a Path {
+        self.config.assert_package_cache_locked(path)
+    }
+
+    // `index_version` Is a string representing the version of the file used to construct the cached copy.
+    // Older versions of Cargo used the single value of the hash of the HEAD commit as a `index_version`.
+    // This is technically correct but a little too conservative. If a new commit is fetched all cached
+    // files need to be regenerated even if a particular file was not changed.
+    // However if an old cargo has written such a file we still know how to read it, as long as we check for that hash value.
+    //
+    // Cargo now uses a hash of the file's contents as provided by git.
+    fn load(
+        &mut self,
+        _root: &Path,
+        path: &Path,
+        index_version: Option<&str>,
+    ) -> Poll<CargoResult<LoadResponse>> {
+        if self.needs_update {
+            return Poll::Pending;
+        }
+        // Check if the cache is valid.
+        let git_commit_hash = self.current_version();
+        if index_version.is_some() && index_version == git_commit_hash.as_deref() {
+            // This file was written by an old version of cargo, but it is still up-to-date.
+            return Poll::Ready(Ok(LoadResponse::CacheValid));
+        }
+        // Note that the index calls this method and the filesystem is locked
+        // in the index, so we don't need to worry about an `update_index`
+        // happening in a different process.
+        fn load_helper(
+            registry: &RemoteRegistry<'_>,
+            path: &Path,
+            index_version: Option<&str>,
+        ) -> CargoResult<LoadResponse> {
+            let repo = registry.repo()?;
+            let tree = registry.tree()?;
+            let entry = tree.get_path(path);
+            let entry = entry?;
+            let git_file_hash = Some(entry.id().to_string());
+
+            // Check if the cache is valid.
+            if index_version.is_some() && index_version == git_file_hash.as_deref() {
+                return Ok(LoadResponse::CacheValid);
+            }
+
+            let object = entry.to_object(repo)?;
+            let blob = match object.as_blob() {
+                Some(blob) => blob,
+                None => anyhow::bail!("path `{}` is not a blob in the git repo", path.display()),
+            };
+
+            Ok(LoadResponse::Data {
+                raw_data: blob.content().to_vec(),
+                index_version: git_file_hash,
+            })
+        }
+
+        match load_helper(&self, path, index_version) {
+            Ok(result) => Poll::Ready(Ok(result)),
+            Err(_) if !self.is_updated() => {
+                // If git returns an error and we haven't updated the repo, return
+                // pending to allow an update to try again.
+                self.needs_update = true;
+                Poll::Pending
+            }
+            Err(e)
+                if e.downcast_ref::<git2::Error>()
+                    .map(|e| e.code() == git2::ErrorCode::NotFound)
+                    .unwrap_or_default() =>
+            {
+                // The repo has been updated and the file does not exist.
+                Poll::Ready(Ok(LoadResponse::NotFound))
+            }
+            Err(e) => Poll::Ready(Err(e)),
+        }
+    }
+
+    fn config(&mut self) -> Poll<CargoResult<Option<RegistryConfig>>> {
+        debug!("loading config");
+        self.prepare()?;
+        self.config.assert_package_cache_locked(&self.index_path);
+        match ready!(self.load(Path::new(""), Path::new("config.json"), None)?) {
+            LoadResponse::Data { raw_data, .. } => {
+                trace!("config loaded");
+                let mut cfg: RegistryConfig = serde_json::from_slice(&raw_data)?;
+                if !self.config.cli_unstable().registry_auth {
+                    cfg.auth_required = false;
+                }
+                Poll::Ready(Ok(Some(cfg)))
+            }
+            _ => Poll::Ready(Ok(None)),
+        }
+    }
+
+    fn block_until_ready(&mut self) -> CargoResult<()> {
+        if !self.needs_update {
+            return Ok(());
+        }
+
+        self.needs_update = false;
+
+        // Make sure the index is only updated once per session since it is an
+        // expensive operation. This generally only happens when the resolver
+        // is run multiple times, such as during `cargo publish`.
+        if self.is_updated() {
+            return Ok(());
+        }
+        self.mark_updated();
+
+        if self.config.offline() {
+            return Ok(());
+        }
+        if self.config.cli_unstable().no_index_update {
+            return Ok(());
+        }
+
+        debug!("updating the index");
+
+        // Ensure that we'll actually be able to acquire an HTTP handle later on
+        // once we start trying to download crates. This will weed out any
+        // problems with `.cargo/config` configuration related to HTTP.
+        //
+        // This way if there's a problem the error gets printed before we even
+        // hit the index, which may not actually read this configuration.
+        self.config.http()?;
+
+        self.prepare()?;
+        self.head.set(None);
+        *self.tree.borrow_mut() = None;
+        self.current_sha.set(None);
+        let path = self.config.assert_package_cache_locked(&self.index_path);
+        self.config
+            .shell()
+            .status("Updating", self.source_id.display_index())?;
+
+        // Fetch the latest version of our `index_git_ref` into the index
+        // checkout.
+        let url = self.source_id.url();
+        let repo = self.repo.borrow_mut().unwrap();
+        git::fetch(repo, url.as_str(), &self.index_git_ref, self.config)
+            .with_context(|| format!("failed to fetch `{}`", url))?;
+
+        // Create a dummy file to record the mtime for when we updated the
+        // index.
+        paths::create(&path.join(LAST_UPDATED_FILE))?;
+
+        Ok(())
+    }
+
+    fn invalidate_cache(&mut self) {
+        // To fully invalidate, undo `mark_updated`s work
+        self.needs_update = true;
+    }
+
+    fn is_updated(&self) -> bool {
+        self.is_updated()
+    }
+
+    fn download(&mut self, pkg: PackageId, checksum: &str) -> CargoResult<MaybeLock> {
+        let registry_config = loop {
+            match self.config()? {
+                Poll::Pending => self.block_until_ready()?,
+                Poll::Ready(cfg) => break cfg.unwrap(),
+            }
+        };
+
+        download::download(
+            &self.cache_path,
+            &self.config,
+            pkg,
+            checksum,
+            registry_config,
+        )
+    }
+
+    fn finish_download(
+        &mut self,
+        pkg: PackageId,
+        checksum: &str,
+        data: &[u8],
+    ) -> CargoResult<File> {
+        download::finish_download(&self.cache_path, &self.config, pkg, checksum, data)
+    }
+
+    fn is_crate_downloaded(&self, pkg: PackageId) -> bool {
+        download::is_crate_downloaded(&self.cache_path, &self.config, pkg)
+    }
+}
+
+impl<'cfg> Drop for RemoteRegistry<'cfg> {
+    fn drop(&mut self) {
+        // Just be sure to drop this before our other fields
+        self.tree.borrow_mut().take();
+    }
+}