#![allow(clippy::all)] use std::collections::BTreeMap; use std::fs::File; use std::io::prelude::*; use std::io::{Cursor, SeekFrom}; use std::time::Instant; use curl::easy::{Easy, List}; use percent_encoding::{percent_encode, NON_ALPHANUMERIC}; use serde::{Deserialize, Serialize}; use url::Url; pub type Result = std::result::Result; pub struct Registry { /// The base URL for issuing API requests. host: String, /// Optional authorization token. /// If None, commands requiring authorization will fail. token: Option, /// Curl handle for issuing requests. handle: Easy, /// Whether to include the authorization token with all requests. auth_required: bool, } #[derive(PartialEq, Clone, Copy)] pub enum Auth { Authorized, Unauthorized, } #[derive(Deserialize)] pub struct Crate { pub name: String, pub description: Option, pub max_version: String, } #[derive(Serialize, Deserialize)] pub struct NewCrate { pub name: String, pub vers: String, pub deps: Vec, pub features: BTreeMap>, pub authors: Vec, pub description: Option, pub documentation: Option, pub homepage: Option, pub readme: Option, pub readme_file: Option, pub keywords: Vec, pub categories: Vec, pub license: Option, pub license_file: Option, pub repository: Option, pub badges: BTreeMap>, pub links: Option, pub rust_version: Option, } #[derive(Serialize, Deserialize)] pub struct NewCrateDependency { pub optional: bool, pub default_features: bool, pub name: String, pub features: Vec, pub version_req: String, pub target: Option, pub kind: String, #[serde(skip_serializing_if = "Option::is_none")] pub registry: Option, #[serde(skip_serializing_if = "Option::is_none")] pub explicit_name_in_toml: Option, #[serde(skip_serializing_if = "Option::is_none")] pub artifact: Option>, #[serde(skip_serializing_if = "Option::is_none")] pub bindep_target: Option, #[serde(default, skip_serializing_if = "is_false")] pub lib: bool, } fn is_false(x: &bool) -> bool { *x == false } #[derive(Deserialize)] pub struct User { pub id: u32, pub login: String, pub avatar: Option, pub email: Option, pub name: Option, } pub struct Warnings { pub invalid_categories: Vec, pub invalid_badges: Vec, pub other: Vec, } #[derive(Deserialize)] struct R { ok: bool, } #[derive(Deserialize)] struct OwnerResponse { ok: bool, msg: String, } #[derive(Deserialize)] struct ApiErrorList { errors: Vec, } #[derive(Deserialize)] struct ApiError { detail: String, } #[derive(Serialize)] struct OwnersReq<'a> { users: &'a [&'a str], } #[derive(Deserialize)] struct Users { users: Vec, } #[derive(Deserialize)] struct TotalCrates { total: u32, } #[derive(Deserialize)] struct Crates { crates: Vec, meta: TotalCrates, } /// Error returned when interacting with a registry. #[derive(Debug, thiserror::Error)] pub enum Error { /// Error from libcurl. #[error(transparent)] Curl(#[from] curl::Error), /// Error from seriailzing the request payload and deserializing the /// response body (like response body didn't match expected structure). #[error(transparent)] Json(#[from] serde_json::Error), /// Error from IO. Mostly from reading the tarball to upload. #[error("failed to seek tarball")] Io(#[from] std::io::Error), /// Response body was not valid utf8. #[error("invalid response body from server")] Utf8(#[from] std::string::FromUtf8Error), /// Error from API response containing JSON field `errors.details`. #[error( "the remote server responded with an error{}: {}", status(*code), errors.join(", "), )] Api { code: u32, headers: Vec, errors: Vec, }, /// Error from API response which didn't have pre-programmed `errors.details`. #[error( "failed to get a 200 OK response, got {code}\nheaders:\n\t{}\nbody:\n{body}", headers.join("\n\t"), )] Code { code: u32, headers: Vec, body: String, }, /// Reason why the token was invalid. #[error("{0}")] InvalidToken(&'static str), /// Server was unavailable and timeouted. Happened when uploading a way /// too large tarball to crates.io. #[error( "Request timed out after 30 seconds. If you're trying to \ upload a crate it may be too large. If the crate is under \ 10MB in size, you can email help@crates.io for assistance.\n\ Total size was {0}." )] Timeout(u64), } impl Registry { /// Creates a new `Registry`. /// /// ## Example /// /// ```rust /// use curl::easy::Easy; /// use crates_io::Registry; /// /// let mut handle = Easy::new(); /// // If connecting to crates.io, a user-agent is required. /// handle.useragent("my_crawler (example.com/info)"); /// let mut reg = Registry::new_handle(String::from("https://crates.io"), None, handle, true); /// ``` pub fn new_handle( host: String, token: Option, handle: Easy, auth_required: bool, ) -> Registry { Registry { host, token, handle, auth_required, } } pub fn set_token(&mut self, token: Option) { self.token = token; } fn token(&self) -> Result<&str> { let token = self.token.as_ref().ok_or_else(|| { Error::InvalidToken("no upload token found, please run `cargo login`") })?; check_token(token)?; Ok(token) } pub fn host(&self) -> &str { &self.host } pub fn host_is_crates_io(&self) -> bool { is_url_crates_io(&self.host) } pub fn add_owners(&mut self, krate: &str, owners: &[&str]) -> Result { let body = serde_json::to_string(&OwnersReq { users: owners })?; let body = self.put(&format!("/crates/{}/owners", krate), body.as_bytes())?; assert!(serde_json::from_str::(&body)?.ok); Ok(serde_json::from_str::(&body)?.msg) } pub fn remove_owners(&mut self, krate: &str, owners: &[&str]) -> Result<()> { let body = serde_json::to_string(&OwnersReq { users: owners })?; let body = self.delete(&format!("/crates/{}/owners", krate), Some(body.as_bytes()))?; assert!(serde_json::from_str::(&body)?.ok); Ok(()) } pub fn list_owners(&mut self, krate: &str) -> Result> { let body = self.get(&format!("/crates/{}/owners", krate))?; Ok(serde_json::from_str::(&body)?.users) } pub fn publish(&mut self, krate: &NewCrate, mut tarball: &File) -> Result { let json = serde_json::to_string(krate)?; // Prepare the body. The format of the upload request is: // // // (metadata for the package) // // // NOTE: This can be replaced with `stream_len` if it is ever stabilized. // // This checks the length using seeking instead of metadata, because // on some filesystems, getting the metadata will fail because // the file was renamed in ops::package. let tarball_len = tarball.seek(SeekFrom::End(0))?; tarball.seek(SeekFrom::Start(0))?; let header = { let mut w = Vec::new(); w.extend(&(json.len() as u32).to_le_bytes()); w.extend(json.as_bytes().iter().cloned()); w.extend(&(tarball_len as u32).to_le_bytes()); w }; let size = tarball_len as usize + header.len(); let mut body = Cursor::new(header).chain(tarball); let url = format!("{}/api/v1/crates/new", self.host); self.handle.put(true)?; self.handle.url(&url)?; self.handle.in_filesize(size as u64)?; let mut headers = List::new(); headers.append("Accept: application/json")?; headers.append(&format!("Authorization: {}", self.token()?))?; self.handle.http_headers(headers)?; let started = Instant::now(); let body = self .handle(&mut |buf| body.read(buf).unwrap_or(0)) .map_err(|e| match e { Error::Code { code, .. } if code == 503 && started.elapsed().as_secs() >= 29 && self.host_is_crates_io() => { Error::Timeout(tarball_len) } _ => e.into(), })?; let response = if body.is_empty() { "{}".parse()? } else { body.parse::()? }; let invalid_categories: Vec = response .get("warnings") .and_then(|j| j.get("invalid_categories")) .and_then(|j| j.as_array()) .map(|x| x.iter().flat_map(|j| j.as_str()).map(Into::into).collect()) .unwrap_or_else(Vec::new); let invalid_badges: Vec = response .get("warnings") .and_then(|j| j.get("invalid_badges")) .and_then(|j| j.as_array()) .map(|x| x.iter().flat_map(|j| j.as_str()).map(Into::into).collect()) .unwrap_or_else(Vec::new); let other: Vec = response .get("warnings") .and_then(|j| j.get("other")) .and_then(|j| j.as_array()) .map(|x| x.iter().flat_map(|j| j.as_str()).map(Into::into).collect()) .unwrap_or_else(Vec::new); Ok(Warnings { invalid_categories, invalid_badges, other, }) } pub fn search(&mut self, query: &str, limit: u32) -> Result<(Vec, u32)> { let formatted_query = percent_encode(query.as_bytes(), NON_ALPHANUMERIC); let body = self.req( &format!("/crates?q={}&per_page={}", formatted_query, limit), None, Auth::Unauthorized, )?; let crates = serde_json::from_str::(&body)?; Ok((crates.crates, crates.meta.total)) } pub fn yank(&mut self, krate: &str, version: &str) -> Result<()> { let body = self.delete(&format!("/crates/{}/{}/yank", krate, version), None)?; assert!(serde_json::from_str::(&body)?.ok); Ok(()) } pub fn unyank(&mut self, krate: &str, version: &str) -> Result<()> { let body = self.put(&format!("/crates/{}/{}/unyank", krate, version), &[])?; assert!(serde_json::from_str::(&body)?.ok); Ok(()) } fn put(&mut self, path: &str, b: &[u8]) -> Result { self.handle.put(true)?; self.req(path, Some(b), Auth::Authorized) } fn get(&mut self, path: &str) -> Result { self.handle.get(true)?; self.req(path, None, Auth::Authorized) } fn delete(&mut self, path: &str, b: Option<&[u8]>) -> Result { self.handle.custom_request("DELETE")?; self.req(path, b, Auth::Authorized) } fn req(&mut self, path: &str, body: Option<&[u8]>, authorized: Auth) -> Result { self.handle.url(&format!("{}/api/v1{}", self.host, path))?; let mut headers = List::new(); headers.append("Accept: application/json")?; headers.append("Content-Type: application/json")?; if self.auth_required || authorized == Auth::Authorized { headers.append(&format!("Authorization: {}", self.token()?))?; } self.handle.http_headers(headers)?; match body { Some(mut body) => { self.handle.upload(true)?; self.handle.in_filesize(body.len() as u64)?; self.handle(&mut |buf| body.read(buf).unwrap_or(0)) .map_err(|e| e.into()) } None => self.handle(&mut |_| 0).map_err(|e| e.into()), } } fn handle(&mut self, read: &mut dyn FnMut(&mut [u8]) -> usize) -> Result { let mut headers = Vec::new(); let mut body = Vec::new(); { let mut handle = self.handle.transfer(); handle.read_function(|buf| Ok(read(buf)))?; handle.write_function(|data| { body.extend_from_slice(data); Ok(data.len()) })?; handle.header_function(|data| { // Headers contain trailing \r\n, trim them to make it easier // to work with. let s = String::from_utf8_lossy(data).trim().to_string(); // Don't let server sneak extra lines anywhere. if s.contains('\n') { return true; } headers.push(s); true })?; handle.perform()?; } let body = String::from_utf8(body)?; let errors = serde_json::from_str::(&body) .ok() .map(|s| s.errors.into_iter().map(|s| s.detail).collect::>()); match (self.handle.response_code()?, errors) { (0, None) | (200, None) => Ok(body), (code, Some(errors)) => Err(Error::Api { code, headers, errors, }), (code, None) => Err(Error::Code { code, headers, body, }), } } } fn status(code: u32) -> String { if code == 200 { String::new() } else { let reason = reason(code); format!(" (status {code} {reason})") } } fn reason(code: u32) -> &'static str { // Taken from https://developer.mozilla.org/en-US/docs/Web/HTTP/Status match code { 100 => "Continue", 101 => "Switching Protocol", 103 => "Early Hints", 200 => "OK", 201 => "Created", 202 => "Accepted", 203 => "Non-Authoritative Information", 204 => "No Content", 205 => "Reset Content", 206 => "Partial Content", 300 => "Multiple Choice", 301 => "Moved Permanently", 302 => "Found", 303 => "See Other", 304 => "Not Modified", 307 => "Temporary Redirect", 308 => "Permanent Redirect", 400 => "Bad Request", 401 => "Unauthorized", 402 => "Payment Required", 403 => "Forbidden", 404 => "Not Found", 405 => "Method Not Allowed", 406 => "Not Acceptable", 407 => "Proxy Authentication Required", 408 => "Request Timeout", 409 => "Conflict", 410 => "Gone", 411 => "Length Required", 412 => "Precondition Failed", 413 => "Payload Too Large", 414 => "URI Too Long", 415 => "Unsupported Media Type", 416 => "Request Range Not Satisfiable", 417 => "Expectation Failed", 429 => "Too Many Requests", 431 => "Request Header Fields Too Large", 500 => "Internal Server Error", 501 => "Not Implemented", 502 => "Bad Gateway", 503 => "Service Unavailable", 504 => "Gateway Timeout", _ => "", } } /// Returns `true` if the host of the given URL is "crates.io". pub fn is_url_crates_io(url: &str) -> bool { Url::parse(url) .map(|u| u.host_str() == Some("crates.io")) .unwrap_or(false) } /// Checks if a token is valid or malformed. /// /// This check is necessary to prevent sending tokens which create an invalid HTTP request. /// It would be easier to check just for alphanumeric tokens, but we can't be sure that all /// registries only create tokens in that format so that is as less restricted as possible. pub fn check_token(token: &str) -> Result<()> { if token.is_empty() { return Err(Error::InvalidToken("please provide a non-empty token")); } if token.bytes().all(|b| { // This is essentially the US-ASCII limitation of // https://www.rfc-editor.org/rfc/rfc9110#name-field-values. That is, // visible ASCII characters (0x21-0x7e), space, and tab. We want to be // able to pass this in an HTTP header without encoding. b >= 32 && b < 127 || b == b'\t' }) { Ok(()) } else { Err(Error::InvalidToken( "token contains invalid characters.\nOnly printable ISO-8859-1 characters \ are allowed as it is sent in a HTTPS header.", )) } }