use std::{ collections::BTreeMap, convert::TryInto as _, env, fmt, fs, path::{Path, PathBuf}, process, str::FromStr, }; use chrono::{Datelike as _, Month, TimeZone as _, Utc}; use glob::glob; use regex::Regex; #[derive(Debug, Copy, Clone, PartialEq, Eq)] struct Date { year: u32, month: u32, } impl Date { fn months_since(self, other: Date) -> Option { let self_chrono = Utc.ymd(self.year.try_into().unwrap(), self.month, 1); let other_chrono = Utc.ymd(other.year.try_into().unwrap(), other.month, 1); let duration_since = self_chrono.signed_duration_since(other_chrono); let months_since = duration_since.num_days() / 30; if months_since < 0 { None } else { Some(months_since.try_into().unwrap()) } } } impl fmt::Display for Date { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{:04}-{:02}", self.year, self.month) } } fn make_date_regex() -> Regex { Regex::new( r"(?x) # insignificant whitespace mode ( ) | (\s+ (?P[[:alpha:]]+)\s+ (?P\d{4})\b ) ", ) .unwrap() } fn collect_dates_from_file(date_regex: &Regex, text: &str) -> Vec<(usize, Date)> { let mut line = 1; let mut end_of_last_cap = 0; date_regex .captures_iter(text) .filter_map(|cap| { if let (Some(month), Some(year), None, None) | (None, None, Some(month), Some(year)) = ( cap.name("m1"), cap.name("y1"), cap.name("m2"), cap.name("y2"), ) { let year = year.as_str().parse().expect("year"); let month = Month::from_str(month.as_str()) .expect("month") .number_from_month(); Some((cap.get(0).expect("all").range(), Date { year, month })) } else { None } }) .map(|(byte_range, date)| { line += text[end_of_last_cap..byte_range.end] .chars() .filter(|c| *c == '\n') .count(); end_of_last_cap = byte_range.end; (line, date) }) .collect() } fn collect_dates(paths: impl Iterator) -> BTreeMap> { let date_regex = make_date_regex(); let mut data = BTreeMap::new(); for path in paths { let text = fs::read_to_string(&path).unwrap(); let dates = collect_dates_from_file(&date_regex, &text); if !dates.is_empty() { data.insert(path, dates); } } data } fn filter_dates( current_month: Date, min_months_since: u32, dates_by_file: impl Iterator)>, ) -> impl Iterator)> { dates_by_file .map(move |(path, dates)| { ( path, dates .into_iter() .filter(|(_, date)| { current_month .months_since(*date) .expect("found date that is after current month") >= min_months_since }) .collect::>(), ) }) .filter(|(_, dates)| !dates.is_empty()) } fn main() { let mut args = env::args(); if args.len() == 1 { eprintln!("error: expected root Markdown directory as CLI argument"); process::exit(1); } let root_dir = args.nth(1).unwrap(); let root_dir_path = Path::new(&root_dir); let glob_pat = format!("{}/**/*.md", root_dir); let today_chrono = Utc::today(); let current_month = Date { year: today_chrono.year_ce().1, month: today_chrono.month(), }; let dates_by_file = collect_dates(glob(&glob_pat).unwrap().map(Result::unwrap)); let dates_by_file: BTreeMap<_, _> = filter_dates(current_month, 6, dates_by_file.into_iter()).collect(); if dates_by_file.is_empty() { println!("empty"); } else { println!("Date Reference Triage for {}", current_month); println!("## Procedure"); println!(); println!( "Each of these dates should be checked to see if the docs they annotate are \ up-to-date. Each date should be updated (in the Markdown file where it appears) to \ use the current month ({current_month}), or removed if the docs it annotates are not \ expected to fall out of date quickly.", current_month = today_chrono.format("%B %Y"), ); println!(); println!( "Please check off each date once a PR to update it (and, if applicable, its \ surrounding docs) has been merged. Please also mention that you are working on a \ particular set of dates so duplicate work is avoided." ); println!(); println!("Finally, once all the dates have been updated, please close this issue."); println!(); println!("## Dates"); println!(); for (path, dates) in dates_by_file { println!( "- [ ] {}", path.strip_prefix(&root_dir_path).unwrap_or(&path).display(), ); for (line, date) in dates { println!(" - [ ] line {}: {}", line, date); } } println!(); } } #[cfg(test)] mod tests { use super::*; #[test] fn test_months_since() { let date1 = Date { year: 2020, month: 3, }; let date2 = Date { year: 2021, month: 1, }; assert_eq!(date2.months_since(date1), Some(10)); } #[test] fn test_date_regex() { let regex = &make_date_regex(); assert!(regex.is_match("")); assert!(regex.is_match("")); assert!(regex.is_match("")); assert!(regex.is_match("")); assert!(regex.is_match(" jan 2021")); assert!(regex.is_match(" january 2021")); assert!(regex.is_match(" Jan 2021")); assert!(regex.is_match(" January 2021")); assert!(regex.is_match(" jan 2021 ")); assert!(regex.is_match(" jan 2021.")); } #[test] fn test_date_regex_fail() { let regexes = &make_date_regex(); assert!(!regexes.is_match("")); assert!(!regexes.is_match("")); assert!(!regexes.is_match("")); assert!(!regexes.is_match(" jan 221")); assert!(!regexes.is_match(" jan 20222")); assert!(!regexes.is_match(" 01 2021")); } #[test] fn test_collect_dates_from_file() { let text = r" Test1 Test2 Foo Test3 Test4 FooBar Test5 Test6 Test7 Test8 Test1 jan 2021 Test2 Foo february 2021 Test3 Test4 Foo mar 2021 Bar apr 2021 Test5 Test6 Test7 may 2021 Test8 june 2021. "; assert_eq!( collect_dates_from_file(&make_date_regex(), text), vec![ ( 3, Date { year: 2021, month: 1, } ), ( 6, Date { year: 2021, month: 2, } ), ( 9, Date { year: 2021, month: 3, } ), ( 11, Date { year: 2021, month: 4, } ), ( 17, Date { year: 2021, month: 5, } ), ( 20, Date { year: 2021, month: 1, } ), ( 23, Date { year: 2021, month: 2, } ), ( 26, Date { year: 2021, month: 3, } ), ( 28, Date { year: 2021, month: 4, } ), ( 34, Date { year: 2021, month: 5, } ), ( 38, Date { year: 2021, month: 6, } ), ], ); } }