diff options
Diffstat (limited to 'third_party/rust/regex')
160 files changed, 30850 insertions, 0 deletions
diff --git a/third_party/rust/regex/.cargo-checksum.json b/third_party/rust/regex/.cargo-checksum.json new file mode 100644 index 0000000000..c0eab61942 --- /dev/null +++ b/third_party/rust/regex/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"CHANGELOG.md":"920271ea6fdc901f01e8535d732370509714326874a4073ab75cd0c9d899fc14","Cargo.toml":"2913e983f1b366ef6be4ac7da62d3e2c8e4847cfea4257d5618a90e67363d26e","Cross.toml":"4a11d6c63ecc919016b59fa0fe23674eb05682fb91ffbe677a4a7077e9e684ff","HACKING.md":"17818f7a17723608f6bdbe6388ad0a913d4f96f76a16649aaf4e274b1fa0ea97","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","README.md":"932f36c0fc3ac869fcca51018e6c87c75278665d42e51d22834fa52ceec2c95c","UNICODE.md":"845fca1982e82e190109a784952579fce54faea120b702b7efd61164a12f601f","record/README.md":"02e6f85f8a43f18540e4a52a75d1001494df7aceac3873e9a13e3ceba190206d","record/compile-test/2023-04-19_1.7.3.csv":"460059ba2f10456175ff92bd75d4a365b14a1843e2b46e7b285d58da59e6d3ca","record/compile-test/2023-04-20_master.csv":"6b94df278e4ed82a3fd0d4bfe92a4614714e00435e983c7649ee9f54925f906e","record/compile-test/2023-07-05.csv":"cf00b4981b8c12980113810dba40e2063a8400354ad4dab16f7c212ff0b5db74","record/compile-test/README.md":"ba2b606993edd8d705ad1677ec954862614e52b028407e1908bb5dfb07767f2d","record/old-bench-log/01-lazy-dfa/dynamic":"dec9f74b8835403c71edc0c2d93bbdde0f5a0e37d46585e416c80496d5b14497","record/old-bench-log/01-lazy-dfa/dynamic-no-lazy-dfa":"c0ce02bef9ada8cd55672f0a9c3c5fc64f71e08bfb2b45978082a140b4fc111f","record/old-bench-log/01-lazy-dfa/native":"9de61ff787e36f5c6f1eaec68b8bb0583e57b0aad23712afe8c0048988c761b8","record/old-bench-log/01-lazy-dfa/nfa":"38c0be44a00b2caef17101bc425410fec2958e4df6da25d2ba5b6664f8bccad9","record/old-bench-log/01-lazy-dfa/pcre":"3b38026c24e4ca487ff62de83cc093ccb46b918f4875663249ff84ce27636942","record/old-bench-log/02-set/dynamic":"8ef5c00f0ac42e5f008e4b6337669527b48fba38df94c50d3e683c6aac66a48c","record/old-bench-log/03-bytes/onig":"f32347a6e0f25f46ad1b0aa736c29eca47c25f90d32c8823ea0d14204859a35b","record/old-bench-log/03-bytes/pcre":"b90982575c0ad55617b2ce50c2e9853d090502bf07e1eb19edf9009d3c9f2987","record/old-bench-log/03-bytes/rust":"b1e70e5ae48a9c726d8cd8a98019c0efe5a1095563c61cf0ac75e24de32461b4","record/old-bench-log/03-bytes/rust-bytes":"fbf0e6cb8102c7ca8e59bd459bb0ae7f1feaf8103def70b8d4793c59e68e8736","record/old-bench-log/04/onig":"4e34e2ede0a806b8ee540e63e4babee38049e5a8ab3be99c4f5d8b02bbc653fd","record/old-bench-log/04/pcre1-jit":"736c4941e991ef94f76379cf2187d0ea2a41b052cf80c94d0dd0c9ea758a6491","record/old-bench-log/04/pcre2-jit":"00e7bbf7749904fca8dff9b441d15bbe670f37b427e385ddf740f7a49de3b1fb","record/old-bench-log/04/re2":"b8b8595f6b68da127b56dc7c61a9fd15548251fda1be9d2c50c2d48382e887b6","record/old-bench-log/04/rust":"c5a6b918e815294f0c4e3d37267c444d49692ff131c5a08f7462c24d0721fcec","record/old-bench-log/04/tcl":"c4d8d12b8cf48ff2017549e95e49dc95a90ea15483834cd70d2d7d7c237bbd32","record/old-bench-log/05/onig":"70a4da9aafaefa6493cd09d3a529dd5d2d9eacf390bb093681bc7be28a1f926c","record/old-bench-log/05/onig-vs-rust":"b942a79735b7330241437776c15b18f4db3eff01d3e6c35494f4a8732e74a23a","record/old-bench-log/05/pcre1":"b29b7efbe79b55ce0aaf24bbbecc376a865fa219a68d96124e3d95951cdb47f4","record/old-bench-log/05/pcre1-vs-rust":"a458e5c62f0500898e08757753c10981551649656432ec096f0c82b414ef8d82","record/old-bench-log/05/pcre2":"faa93937c3490cfdff88c32dc04e57f2ae881923b87781e5fe876535fd690770","record/old-bench-log/05/pcre2-vs-rust":"bf9faa6a679dd98e9452e52c0941d2eb84dcf0b6632c15507f8334ed7bc309da","record/old-bench-log/05/re2":"692866b28e1bc368c7a59f519b8dfe1da50a135946ce153298a0ab228a5ee59d","record/old-bench-log/05/re2-vs-rust":"55e4cb14c397574751aebe38068c429a4580a5e309857b2715047944903dca58","record/old-bench-log/05/rust":"aac6acda9f63e51613712d0a33bb7fb46dfc7adc425f76b9b71195be8c8a42e7","record/old-bench-log/05/tcl":"f03e39eccd3252162cc6099bb0426014df669d299ba0ef79e89b8401886a5172","record/old-bench-log/05/tcl-vs-rust":"ae6ac4668573bf5488cc235c5da16ad9358d07b7644207d9bcea88ba6f5514a6","record/old-bench-log/06/dphobos-dmd":"473328306be335a0320c690d9c2dbefdf7f2f5a80e4ca69443c7ed2e81bb093f","record/old-bench-log/06/dphobos-dmd-ct":"60341c736382a6db21d9889369ea4617c521acbf30d4b3bf38bcd17f4f85b9b1","record/old-bench-log/06/dphobos-ldc":"ae60c2bed84afb89ae43615f26de4cc5d0042e179089b639507378518eed3252","record/old-bench-log/06/dphobos-ldc-ct":"a157ef450793b73de3a816fab1d93a6d11e90a817082bae5e3da02a66fcc833f","record/old-bench-log/06/pcre1":"ad10fd7db732e8670dd3d4eedb05f48f547b4782495aaadff8ec25a6ea1992a0","record/old-bench-log/06/pcre2":"f789a73bd41a0bc401bdebe7f10a03a8aa587de48643d88507d16181a8fa39d3","record/old-bench-log/06/re2":"203c273a110d71f5edf722630202a6142c39d6b7a9951686adf8b9c20c5db278","record/old-bench-log/06/rust":"6a642a16cd279c99ef5a580a25fb3a63ac6239cd73df9261c02912fa08145753","record/old-bench-log/07/boost":"255bc652c4d9e9d20aa9b22d8d86e952e7ec6c8b9fcde0c3d6e38c967e04d40e","record/old-bench-log/07/dphobos-dmd":"fb3ac60037050858611145ca3e71412164688dcdec52c022787d33304e022260","record/old-bench-log/07/dphobos-dmd-ct":"40a5088441f8ffe3dae0abaf31c105cedfbe3b56c06772f075947d504976d2ed","record/old-bench-log/07/oniguruma":"ae0cd60adb15845eb9ef706111d4ee0e6ad5a58f0276b787d68bd7d637f8f7c6","record/old-bench-log/07/pcre1":"a812d065ec248249f9bb3d6d970f15c18d342f6b443265ad4b07fa91b73575cc","record/old-bench-log/07/pcre2":"88230663eccd0b382cf5be81ce1ae6cfa3fa835a65a31c1eba4369d2e8de5d27","record/old-bench-log/07/re2":"e330ef21ce44351afc3c43821d862e9c625877606569f3af0ddbadcd7b21c602","record/old-bench-log/07/rust":"d8c6bd5c46f5df9d0ac222f7be7793527a8137d273c8826b3715c67e16209aac","record/old-bench-log/07/rust-bytes":"e21d02fa2ef1e5ed7204920b33ed24c9fb620e068ed47ed6879b72e76369a27e","record/old-bench-log/07/stdcpp":"9df02d89dc8232c700b8cf8bc6f1ece3ca7af84ab52e67a660039d6c9168aed4","record/old-bench-log/07/stdcpp-libcxx":"f90849a0b5dc11dc0280ad97886e92e1d91c080403ad7a4ecd638a26fe5e8c5e","record/old-bench-log/07/tcl":"7f6e347bb507f9c00ff664d3e627c0a9cf842b416eeb2af9f3b6cccd041c58e4","record/old-bench-log/08-new-memmem/rust-after-01":"646c7d50aea9c560a35eb60116f301f4fb8d4b03fd5814d8b24adffd070332e3","record/old-bench-log/08-new-memmem/rust-after-02":"14e7fb6c6faa85a8f90617528cef79ae382aeba07c2e5c253c68445902b060ba","record/old-bench-log/08-new-memmem/rust-before-01":"7e3b58de0f502c1a1bf6d27e0e85c654b1189716f7374cec4ed4dd365b13101f","record/old-bench-log/08-new-memmem/rust-before-02":"ab6d09529eeeca7ff0da945d59701dbbcfdec5e05581bb9bf154779d12a35e53","record/old-bench-log/09-new-baseline/pcre2":"28df8e2762f267d1ea628906a6e4bbc21f99e6a445bd322c86d0ca483b21b5b3","record/old-bench-log/09-new-baseline/re2":"421437193cc3f159c178479f98bde8dbe27883ec7757b1ddd8d745862f5899ff","record/old-bench-log/09-new-baseline/rust":"6f932a769171b6cdb717c9d01e44a70762ef660c4045b9d2bb3797a9bdf65405","record/old-bench-log/09-new-baseline/rust-bytes":"9c5acd5c1eeac9acfe76d03588041f9b6d65b4351085c3510888ceeb83e8a7b5","record/old-bench-log/10-last-frontier/rust-after-literal.log":"02baef9b3b49acbbff43e81f48ea5a9287e30ff4fc298a3f3b48991d8374aabf","record/old-bench-log/10-last-frontier/rust-before-literal.log":"e5a3bcc2b9e93cf3cb27bc9e6305b3bc03215751bbeef2a70fb25577d6b42874","record/old-bench-log/10-last-frontier/rust-bytes-after-literal.log":"29834c7a5396ac61acedd07c0b7ca60716865ec3e70f35fbaa7826a2309a79d9","record/old-bench-log/10-last-frontier/rust-bytes-before-literal.log":"4e7468a3e8629814bd4af91e2a8eb42d0899d352b5dff3058b801aa637046be2","record/old-bench-log/11-regex-1.7.3/rust":"d7cc18a62070ea7a999e1ba2458f26cf94595f1af276c2b3e96cee638eccf3f0","record/old-bench-log/11-regex-1.7.3/rust-bytes":"64c7458020139bd7a03d1cb0927b741e6972377b686626563acb86fbc66414ca","record/old-bench-log/12-regex-1.8.1/rust":"a538c42e77e20956e81fb5a4e2e1e7d3fdf60da019d7e3df52d93f57367a3fbd","record/old-bench-log/12-regex-1.8.1/rust-bytes":"fbb00fdf8f039ce312f5346a67dddaa5e129280a93a90d7aaf6b5a9a71d2f212","record/old-bench-log/13-regex-1.9.0/rust":"0ef62700ba3fc24887af74b7942490c90b4cd2814b8fda200f7376e43391bfce","record/old-bench-log/13-regex-1.9.0/rust-bytes":"676d501d4667f0a945c88ebb56839176dd3a5a6b45af7708b1e870bf26d12603","record/old-bench-log/README.md":"d359f536fb4b8c1af9af3465a027c3522f62c3871aad44645a955b650d7deec0","record/old-bench-log/old/01-before":"c2ea2750fca8ac1742003fe2106e9422d49e92967d3fe0267f24b7ec830b07e3","record/old-bench-log/old/02-new-syntax-crate":"27fd8b3d35cf08d434035ff7d9f2e9e3c94a167e45ba655567c73ae96830f1d8","record/old-bench-log/old/03-new-syntax-crate":"d942a2b95c3a2d8f85f3f17934f258bdc84baa33e91986e8a6810ca8d6e9cc50","record/old-bench-log/old/04-fixed-benchmark":"0da29ef39ac07ece411c151ab479a76944946aba992547b15d90ec2d5484e85c","record/old-bench-log/old/05-thread-caching":"e364d87131e43187d6757426839789d1b6b47b3f3af21280daa9193d5ab19f64","record/old-bench-log/old/06-major-dynamic":"3bc2b8fd2714ae9f19b2e4f4219654982522daf01b5d3055b4aec0458afeaf13","record/old-bench-log/old/06-major-macro":"d5617ed23e71d5298ed4d629eee257e401c352fd1c91a2048dfeb1677527d4e7","record/old-bench-log/old/07-prefix-improvements":"9277d1392c85a38db215a9b69e3b0cd4a9901f8f1c72c706ca262e5f099b8819","record/old-bench-log/old/08-case-fixes":"f97cd3675cf5c967e4ca8841f2368e9eadf538b542bfe3035d31492afc5934bf","record/old-bench-log/old/09-before-compiler-rewrite":"b928686819dbd9aeaa6639b01b63a48428653f2f676a4e15d61cddec421e0389","record/old-bench-log/old/10-compiler-rewrite":"697b295ee377a5cb287d403593bfb8c078270b4e19e8d61d0b95b06ee7c903ab","record/old-bench-log/old/11-compiler-rewrite":"3f0ba494a0d82e7419285a9686474dc7763d4da0dd3faaa3bec3f624bbede481","record/old-bench-log/old/12-executor":"962e182f9a1cfddb8c0cd2d8c4681febef1430082c9a38e5373c9117b853e65e","record/old-bench-log/old/12-executor-bytes":"e01a1e878b44c80724e9bf09bb11210eeb8f01518ac7f0e3e7f2ee241281e500","record/old-bench-log/old/13-cache-byte-range-suffixes":"1d67d58a39f9177a79c26f3c6c2a1caaf51f085ce137711ab9ba74071c14680c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/builders.rs":"67b4f07ba91794c1e076d092a55f9377301ef5fa8f0dcef13699842583c14865","src/bytes.rs":"6a576b7cec3bc120b9a444f9d12378ac3f4c4f6443e44e4befce6c9c356d8d78","src/error.rs":"4ac8361e900627a87a2ac78e5d475be17c455fe6850d1515bf5b874837c4ae25","src/find_byte.rs":"e17cd3b765467685946707840b92ea4e37d3c11081fbf316174a15858cd4bd99","src/lib.rs":"93cb2a92745dd656b00746b45bdec726cf0cdb0016384ba25ca816b870ca83c8","src/pattern.rs":"5f37755a7c16902d861377645f57a20314961f86298d4b35ae6e1058ca4e9801","src/regex/bytes.rs":"a32de892ae0eb5a68c54a83b0540b1d595e8263b4efe9f521e4ac7cd0f2efa6d","src/regex/mod.rs":"c220b6dd7a5e1945f8e743d1dcd796c5f782c91b0c34eb9915c588174a517fe8","src/regex/string.rs":"4f7fea1a54c099fe79769e1e97ab9d4e4c5a569c48dbcab6db2b35a597953a04","src/regexset/bytes.rs":"6290dd81f47fb6cdbaa358be2340398a2a640320373d334e4c977bf30b5a9220","src/regexset/mod.rs":"c220b6dd7a5e1945f8e743d1dcd796c5f782c91b0c34eb9915c588174a517fe8","src/regexset/string.rs":"977bc167c48c7c99187599c5071ca197e61a56359d32a26b9dbc1b58a5ef1c4d","test":"c0122c20a2c9b7ba6e9a8aaeb2b7d9910315ef31063539949f28d9501ef3193c","testdata/README.md":"c0514501526560d7f6171eb6d982ad61b4527760cb38a4bfbe8e28036ff37b95","testdata/anchored.toml":"7a1b5cd81deed2099796a451bf764a3f9bd21f0d60c0fa46accd3a35666866f2","testdata/bytes.toml":"1d84179165fd25f3b94bd2bfbeb43fc8a162041f7bf98b717e0f85cef7fb652b","testdata/crazy.toml":"a146e2d2e23f1a57168979d9b1fc193c2ba38dca66294b61140d6d2a2958ec86","testdata/crlf.toml":"d19cf22756434d145dd20946c00af01c102a556a252070405c3c8294129d9ece","testdata/earliest.toml":"d561e643623ee1889b5b049fdcf3c7cb71b0c746d7eb822ddbd09d0acda2620b","testdata/empty.toml":"738dbe92fbd8971385a1cf3affb0e956e5b692c858b9b48439d718f10801c08e","testdata/expensive.toml":"5ce2f60209c99cdd2cdcb9d3069d1d5ca13d5e08a85e913efe57267b2f5f0e9d","testdata/flags.toml":"9a7e001808195c84f2a7d3e18bc0a82c7386e60f03a616e99af00c3f7f2c3fd4","testdata/fowler/basic.toml":"a82c7e233451cd7cfe0c3d817f3a1ab44478bb81ae62432efdd515fa8370275e","testdata/fowler/dat/README":"e53d6c37b5931cb26dc9ae4c40358eea63f7a469c4db6ca816c072a8ced6a61a","testdata/fowler/dat/basic.dat":"b1126dda59075c08f574987090273c9977790115f1e1941d0708c0b82b256905","testdata/fowler/dat/nullsubexpr.dat":"e5cd4145dffa8bc66f2d39079950b2bb7bae21a521514b83b557b92f4a871a9e","testdata/fowler/dat/repetition.dat":"2b8b2b191229a804fba49e6b888d8194bf488f7744057b550da9d95a2aa6617a","testdata/fowler/nullsubexpr.toml":"cd812e7e8fa0469253b34f0db93b5883c9d8b9740fc4f7825a38e7df880a4eed","testdata/fowler/repetition.toml":"8c09164f064b3db81309c53483863bdcec493781644de162416e9f485e772615","testdata/iter.toml":"6875460302974a5b3073a7304a865c45aba9653c54afea2c4d26e1ea248a81f7","testdata/leftmost-all.toml":"903bfbeff888b7664296f4d5aa367ce53d1dafe249ab0a3359223ae94d596396","testdata/line-terminator.toml":"3255c305687eaa8b39939855ce8e0379bdc0e70d7b0cd818856f65fb44a48c0f","testdata/misc.toml":"32c9591655c6fb118dfefcb4de49a04820a63cb960533dfc2538cdaabf4f4047","testdata/multiline.toml":"eb07cf5427e6ddbcf61f4cc64c2d74ff41b5ef75ef857959651b20196f3cd157","testdata/no-unicode.toml":"d209da04506900fd5f69e48170cddaad0702355ac6176c3a75ab3ff96974457c","testdata/overlapping.toml":"5d96497a7233566d40b05ba22047e483fa8662e45515a9be86da45cf6c28703a","testdata/regex-lite.toml":"fecca7cc8c9cea2e1f84f846a89fd9b3ca7011c83698211a2eeda8924deb900c","testdata/regression.toml":"6af7b4ac5eb257af52bcf9647f0f7ecb02e87ce5e57306825f2b5ac800151758","testdata/set.toml":"dfd265dc1aee80026e881616840df0236ae9abf12467d7ec0e141a52c236128c","testdata/substring.toml":"48122d9f3477ed81f95e3ad42c06e9bb25f849b66994601a75ceae0693b81866","testdata/unicode.toml":"7e4b013039b0cdd85fa73f32d15d096182fe901643d4e40c0910087a736cd46d","testdata/utf8.toml":"2eabce0582bcacb2073e08bbe7ca413f096d14d06e917b107949691e24f84b20","testdata/word-boundary.toml":"51bc1c498ab825420340a2dd3e6623de4054937ba6d5020ff8cd14b1c1e45271","tests/fuzz/mod.rs":"7b01a803e1c0b5a45c062d493723553f263c57e269eade1475eb789694635d5c","tests/fuzz/testdata/crash-7eb3351f0965e5d6c1cb98aa8585949ef96531ff":"be4f42497ac9358eb020bf17cd8fdb9743691824e01d744504613ea2bfb2f663","tests/fuzz/testdata/crash-8760b19b25d74e3603d4c643e9c7404fdd3631f9":"19df9a1e1b5a3c0f31cc038b9f2991b161d8577b4a0c8b2fc391cdfecdb6dd85","tests/fuzz/testdata/crash-cd33b13df59ea9d74503986f9d32a270dd43cc04":"2fde1668e9e3e60943c28d97c01c90dd3d3882f48475f060ccaf961c228069e8","tests/fuzz/testdata/minimized-from-8760b19b25d74e3603d4c643e9c7404fdd3631f9":"c9e00f7a31453708560900aa51e358dd5551df494439860594be97bb1fb933ba","tests/fuzz/testdata/slow-unit-3ab758ea520027fefd3f00e1384d9aeef155739e":"4433011f7af46e855e843635cf24a49713bd5705f67176ed928f04d24eda1857","tests/fuzz/testdata/slow-unit-5345fccadf3812c53c3ccc7af5aa2741b7b2106c":"95782a847fc64e9cccdf76e9540b0d16ce80db5d05157a88b958b763f9b8479b","tests/fuzz/testdata/slow-unit-6bd643eec330166e4ada91da2d3f284268481085":"8ddff12288f6f20cc9d65db76bd8187834f64f844aad48a340d082555ad5cb56","tests/fuzz/testdata/slow-unit-93c73a43581f205f9aaffd9c17e52b34b17becd0":"eea6919a75fde163634b890e2253a0918cf0ba092357fa617f368bbfa131ba30","tests/fuzz/testdata/slow-unit-9ca9cc9929fee1fcbb847a78384effb8b98ea18a":"a806f73b900046977267acceb83b105bac7ee21ede2edc2927afe1e1f0149f00","tests/fuzz/testdata/slow-unit-b8a052f4254802edbe5f569b6ce6e9b6c927e9d6":"9540cf58241cde3bc0db8364e0ccff67ff1ff9721c85b0d2ca27354c0cbf2650","tests/lib.rs":"33a37711d8677994f87b19be40b24a323d41de18fb161c850b4ccea9fe4c4156","tests/misc.rs":"5ac5858325451e1d70f308ca0bcead5a354d095a7473800c11065231c319c456","tests/regression.rs":"3490aac99fdbf3f0949ba1f338d5184a84b505ebd96d0b6d6145c610587aa60b","tests/regression_fuzz.rs":"57e0bcba0fdfa7797865e35ae547cd7fe1c6132b80a7bfdfb06eb053a568b00d","tests/replace.rs":"78ff9bf7f78783ad83a78041bb7ee0705c7efc85b4d12301581d0ce5b2a59325","tests/searcher.rs":"04152e5c86431deec0c196d2564a11bc4ec36f14c77e8c16a2f9d1cbc9fc574e","tests/suite_bytes.rs":"7697b04e5b181aa78b3654bd2dbe1c792d9626197573ed8c649f1da8b481817d","tests/suite_bytes_set.rs":"d970168fab57a9edc60ff26a2bb7d0cc714d4298e4ee9eadba9da44a6569f2bb","tests/suite_string.rs":"1be0cf8922171f8323f99e8ecedbbf1846d339620d0dc2fd490901cbbbd2622e","tests/suite_string_set.rs":"22743107206d913521f9adb728482aed3a9625ff7b15a83df057bbf1f7050e03"},"package":"12de2eff854e5fa4b1295edd650e227e9d8fb0c9e90b12e7f36d6a6811791a29"}
\ No newline at end of file diff --git a/third_party/rust/regex/CHANGELOG.md b/third_party/rust/regex/CHANGELOG.md new file mode 100644 index 0000000000..a5f2180105 --- /dev/null +++ b/third_party/rust/regex/CHANGELOG.md @@ -0,0 +1,1437 @@ +1.9.4 (2023-08-26) +================== +This is a patch release that fixes a bug where `RegexSet::is_match(..)` could +incorrectly return false (even when `RegexSet::matches(..).matched_any()` +returns true). + +Bug fixes: + +* [BUG #1070](https://github.com/rust-lang/regex/issues/1070): +Fix a bug where a prefilter was incorrectly configured for a `RegexSet`. + + +1.9.3 (2023-08-05) +================== +This is a patch release that fixes a bug where some searches could result in +incorrect match offsets being reported. It is difficult to characterize the +types of regexes susceptible to this bug. They generally involve patterns +that contain no prefix or suffix literals, but have an inner literal along with +a regex prefix that can conditionally match. + +Bug fixes: + +* [BUG #1060](https://github.com/rust-lang/regex/issues/1060): +Fix a bug with the reverse inner literal optimization reporting incorrect match +offsets. + + +1.9.2 (2023-08-05) +================== +This is a patch release that fixes another memory usage regression. This +particular regression occurred only when using a `RegexSet`. In some cases, +much more heap memory (by one or two orders of magnitude) was allocated than in +versions prior to 1.9.0. + +Bug fixes: + +* [BUG #1059](https://github.com/rust-lang/regex/issues/1059): +Fix a memory usage regression when using a `RegexSet`. + + +1.9.1 (2023-07-07) +================== +This is a patch release which fixes a memory usage regression. In the regex +1.9 release, one of the internal engines used a more aggressive allocation +strategy than what was done previously. This patch release reverts to the +prior on-demand strategy. + +Bug fixes: + +* [BUG #1027](https://github.com/rust-lang/regex/issues/1027): +Change the allocation strategy for the backtracker to be less aggressive. + + +1.9.0 (2023-07-05) +================== +This release marks the end of a [years long rewrite of the regex crate +internals](https://github.com/rust-lang/regex/issues/656). Since this is +such a big release, please report any issues or regressions you find. We would +also love to hear about improvements as well. + +In addition to many internal improvements that should hopefully result in +"my regex searches are faster," there have also been a few API additions: + +* A new `Captures::extract` method for quickly accessing the substrings +that match each capture group in a regex. +* A new inline flag, `R`, which enables CRLF mode. This makes `.` match any +Unicode scalar value except for `\r` and `\n`, and also makes `(?m:^)` and +`(?m:$)` match after and before both `\r` and `\n`, respectively, but never +between a `\r` and `\n`. +* `RegexBuilder::line_terminator` was added to further customize the line +terminator used by `(?m:^)` and `(?m:$)` to be any arbitrary byte. +* The `std` Cargo feature is now actually optional. That is, the `regex` crate +can be used without the standard library. +* Because `regex 1.9` may make binary size and compile times even worse, a +new experimental crate called `regex-lite` has been published. It prioritizes +binary size and compile times over functionality (like Unicode) and +performance. It shares no code with the `regex` crate. + +New features: + +* [FEATURE #244](https://github.com/rust-lang/regex/issues/244): +One can opt into CRLF mode via the `R` flag. +e.g., `(?mR:$)` matches just before `\r\n`. +* [FEATURE #259](https://github.com/rust-lang/regex/issues/259): +Multi-pattern searches with offsets can be done with `regex-automata 0.3`. +* [FEATURE #476](https://github.com/rust-lang/regex/issues/476): +`std` is now an optional feature. `regex` may be used with only `alloc`. +* [FEATURE #644](https://github.com/rust-lang/regex/issues/644): +`RegexBuilder::line_terminator` configures how `(?m:^)` and `(?m:$)` behave. +* [FEATURE #675](https://github.com/rust-lang/regex/issues/675): +Anchored search APIs are now available in `regex-automata 0.3`. +* [FEATURE #824](https://github.com/rust-lang/regex/issues/824): +Add new `Captures::extract` method for easier capture group access. +* [FEATURE #961](https://github.com/rust-lang/regex/issues/961): +Add `regex-lite` crate with smaller binary sizes and faster compile times. +* [FEATURE #1022](https://github.com/rust-lang/regex/pull/1022): +Add `TryFrom` implementations for the `Regex` type. + +Performance improvements: + +* [PERF #68](https://github.com/rust-lang/regex/issues/68): +Added a one-pass DFA engine for faster capture group matching. +* [PERF #510](https://github.com/rust-lang/regex/issues/510): +Inner literals are now used to accelerate searches, e.g., `\w+@\w+` will scan +for `@`. +* [PERF #787](https://github.com/rust-lang/regex/issues/787), +[PERF #891](https://github.com/rust-lang/regex/issues/891): +Makes literal optimizations apply to regexes of the form `\b(foo|bar|quux)\b`. + +(There are many more performance improvements as well, but not all of them have +specific issues devoted to them.) + +Bug fixes: + +* [BUG #429](https://github.com/rust-lang/regex/issues/429): +Fix matching bugs related to `\B` and inconsistencies across internal engines. +* [BUG #517](https://github.com/rust-lang/regex/issues/517): +Fix matching bug with capture groups. +* [BUG #579](https://github.com/rust-lang/regex/issues/579): +Fix matching bug with word boundaries. +* [BUG #779](https://github.com/rust-lang/regex/issues/779): +Fix bug where some regexes like `(re)+` were not equivalent to `(re)(re)*`. +* [BUG #850](https://github.com/rust-lang/regex/issues/850): +Fix matching bug inconsistency between NFA and DFA engines. +* [BUG #921](https://github.com/rust-lang/regex/issues/921): +Fix matching bug where literal extraction got confused by `$`. +* [BUG #976](https://github.com/rust-lang/regex/issues/976): +Add documentation to replacement routines about dealing with fallibility. +* [BUG #1002](https://github.com/rust-lang/regex/issues/1002): +Use corpus rejection in fuzz testing. + + +1.8.4 (2023-06-05) +================== +This is a patch release that fixes a bug where `(?-u:\B)` was allowed in +Unicode regexes, despite the fact that the current matching engines can report +match offsets between the code units of a single UTF-8 encoded codepoint. That +in turn means that match offsets that split a codepoint could be reported, +which in turn results in panicking when one uses them to slice a `&str`. + +This bug occurred in the transition to `regex 1.8` because the underlying +syntactical error that prevented this regex from compiling was intentionally +removed. That's because `(?-u:\B)` will be permitted in Unicode regexes in +`regex 1.9`, but the matching engines will guarantee to never report match +offsets that split a codepoint. When the underlying syntactical error was +removed, no code was added to ensure that `(?-u:\B)` didn't compile in the +`regex 1.8` transition release. This release, `regex 1.8.4`, adds that code +such that `Regex::new(r"(?-u:\B)")` returns to the `regex <1.8` behavior of +not compiling. (A `bytes::Regex` can still of course compile it.) + +Bug fixes: + +* [BUG #1006](https://github.com/rust-lang/regex/issues/1006): +Fix a bug where `(?-u:\B)` was allowed in Unicode regexes, and in turn could +lead to match offsets that split a codepoint in `&str`. + + +1.8.3 (2023-05-25) +================== +This is a patch release that fixes a bug where the regex would report a +match at every position even when it shouldn't. This could occur in a very +small subset of regexes, usually an alternation of simple literals that +have particular properties. (See the issue linked below for a more precise +description.) + +Bug fixes: + +* [BUG #999](https://github.com/rust-lang/regex/issues/999): +Fix a bug where a match at every position is erroneously reported. + + +1.8.2 (2023-05-22) +================== +This is a patch release that fixes a bug where regex compilation could panic +in debug mode for regexes with large counted repetitions. For example, +`a{2147483516}{2147483416}{5}` resulted in an integer overflow that wrapped +in release mode but panicking in debug mode. Despite the unintended wrapping +arithmetic in release mode, it didn't cause any other logical bugs since the +errant code was for new analysis that wasn't used yet. + +Bug fixes: + +* [BUG #995](https://github.com/rust-lang/regex/issues/995): +Fix a bug where regex compilation with large counted repetitions could panic. + + +1.8.1 (2023-04-21) +================== +This is a patch release that fixes a bug where a regex match could be reported +where none was found. Specifically, the bug occurs when a pattern contains some +literal prefixes that could be extracted _and_ an optional word boundary in the +prefix. + +Bug fixes: + +* [BUG #981](https://github.com/rust-lang/regex/issues/981): +Fix a bug where a word boundary could interact with prefix literal +optimizations and lead to a false positive match. + + +1.8.0 (2023-04-20) +================== +This is a sizeable release that will be soon followed by another sizeable +release. Both of them will combined close over 40 existing issues and PRs. + +This first release, despite its size, essentially represents preparatory work +for the second release, which will be even bigger. Namely, this release: + +* Increases the MSRV to Rust 1.60.0, which was released about 1 year ago. +* Upgrades its dependency on `aho-corasick` to the recently released 1.0 +version. +* Upgrades its dependency on `regex-syntax` to the simultaneously released +`0.7` version. The changes to `regex-syntax` principally revolve around a +rewrite of its literal extraction code and a number of simplifications and +optimizations to its high-level intermediate representation (HIR). + +The second release, which will follow ~shortly after the release above, will +contain a soup-to-nuts rewrite of every regex engine. This will be done by +bringing [`regex-automata`](https://github.com/BurntSushi/regex-automata) into +this repository, and then changing the `regex` crate to be nothing but an API +shim layer on top of `regex-automata`'s API. + +These tandem releases are the culmination of about 3 +years of on-and-off work that [began in earnest in March +2020](https://github.com/rust-lang/regex/issues/656). + +Because of the scale of changes involved in these releases, I would love to +hear about your experience. Especially if you notice undocumented changes in +behavior or performance changes (positive *or* negative). + +Most changes in the first release are listed below. For more details, please +see the commit log, which reflects a linear and decently documented history +of all changes. + +New features: + +* [FEATURE #501](https://github.com/rust-lang/regex/issues/501): +Permit many more characters to be escaped, even if they have no significance. +More specifically, any ASCII character except for `[0-9A-Za-z<>]` can now be +escaped. Also, a new routine, `is_escapeable_character`, has been added to +`regex-syntax` to query whether a character is escapeable or not. +* [FEATURE #547](https://github.com/rust-lang/regex/issues/547): +Add `Regex::captures_at`. This fills a hole in the API, but doesn't otherwise +introduce any new expressive power. +* [FEATURE #595](https://github.com/rust-lang/regex/issues/595): +Capture group names are now Unicode-aware. They can now begin with either a `_` +or any "alphabetic" codepoint. After the first codepoint, subsequent codepoints +can be any sequence of alpha-numeric codepoints, along with `_`, `.`, `[` and +`]`. Note that replacement syntax has not changed. +* [FEATURE #810](https://github.com/rust-lang/regex/issues/810): +Add `Match::is_empty` and `Match::len` APIs. +* [FEATURE #905](https://github.com/rust-lang/regex/issues/905): +Add an `impl Default for RegexSet`, with the default being the empty set. +* [FEATURE #908](https://github.com/rust-lang/regex/issues/908): +A new method, `Regex::static_captures_len`, has been added which returns the +number of capture groups in the pattern if and only if every possible match +always contains the same number of matching groups. +* [FEATURE #955](https://github.com/rust-lang/regex/issues/955): +Named captures can now be written as `(?<name>re)` in addition to +`(?P<name>re)`. +* FEATURE: `regex-syntax` now supports empty character classes. +* FEATURE: `regex-syntax` now has an optional `std` feature. (This will come +to `regex` in the second release.) +* FEATURE: The `Hir` type in `regex-syntax` has had a number of simplifications +made to it. +* FEATURE: `regex-syntax` has support for a new `R` flag for enabling CRLF +mode. This will be supported in `regex` proper in the second release. +* FEATURE: `regex-syntax` now has proper support for "regex that never +matches" via `Hir::fail()`. +* FEATURE: The `hir::literal` module of `regex-syntax` has been completely +re-worked. It now has more documentation, examples and advice. +* FEATURE: The `allow_invalid_utf8` option in `regex-syntax` has been renamed +to `utf8`, and the meaning of the boolean has been flipped. + +Performance improvements: + +* PERF: The upgrade to `aho-corasick 1.0` may improve performance in some +cases. It's difficult to characterize exactly which patterns this might impact, +but if there are a small number of longish (>= 4 bytes) prefix literals, then +it might be faster than before. + +Bug fixes: + +* [BUG #514](https://github.com/rust-lang/regex/issues/514): +Improve `Debug` impl for `Match` so that it doesn't show the entire haystack. +* BUGS [#516](https://github.com/rust-lang/regex/issues/516), +[#731](https://github.com/rust-lang/regex/issues/731): +Fix a number of issues with printing `Hir` values as regex patterns. +* [BUG #610](https://github.com/rust-lang/regex/issues/610): +Add explicit example of `foo|bar` in the regex syntax docs. +* [BUG #625](https://github.com/rust-lang/regex/issues/625): +Clarify that `SetMatches::len` does not (regretably) refer to the number of +matches in the set. +* [BUG #660](https://github.com/rust-lang/regex/issues/660): +Clarify "verbose mode" in regex syntax documentation. +* BUG [#738](https://github.com/rust-lang/regex/issues/738), +[#950](https://github.com/rust-lang/regex/issues/950): +Fix `CaptureLocations::get` so that it never panics. +* [BUG #747](https://github.com/rust-lang/regex/issues/747): +Clarify documentation for `Regex::shortest_match`. +* [BUG #835](https://github.com/rust-lang/regex/issues/835): +Fix `\p{Sc}` so that it is equivalent to `\p{Currency_Symbol}`. +* [BUG #846](https://github.com/rust-lang/regex/issues/846): +Add more clarifying documentation to the `CompiledTooBig` error variant. +* [BUG #854](https://github.com/rust-lang/regex/issues/854): +Clarify that `regex::Regex` searches as if the haystack is a sequence of +Unicode scalar values. +* [BUG #884](https://github.com/rust-lang/regex/issues/884): +Replace `__Nonexhaustive` variants with `#[non_exhaustive]` attribute. +* [BUG #893](https://github.com/rust-lang/regex/pull/893): +Optimize case folding since it can get quite slow in some pathological cases. +* [BUG #895](https://github.com/rust-lang/regex/issues/895): +Reject `(?-u:\W)` in `regex::Regex` APIs. +* [BUG #942](https://github.com/rust-lang/regex/issues/942): +Add a missing `void` keyword to indicate "no parameters" in C API. +* [BUG #965](https://github.com/rust-lang/regex/issues/965): +Fix `\p{Lc}` so that it is equivalent to `\p{Cased_Letter}`. +* [BUG #975](https://github.com/rust-lang/regex/issues/975): +Clarify documentation for `\pX` syntax. + + +1.7.3 (2023-03-24) +================== +This is a small release that fixes a bug in `Regex::shortest_match_at` that +could cause it to panic, even when the offset given is valid. + +Bug fixes: + +* [BUG #969](https://github.com/rust-lang/regex/issues/969): + Fix a bug in how the reverse DFA was called for `Regex::shortest_match_at`. + + +1.7.2 (2023-03-21) +================== +This is a small release that fixes a failing test on FreeBSD. + +Bug fixes: + +* [BUG #967](https://github.com/rust-lang/regex/issues/967): + Fix "no stack overflow" test which can fail due to the small stack size. + + +1.7.1 (2023-01-09) +================== +This release was done principally to try and fix the doc.rs rendering for the +regex crate. + +Performance improvements: + +* [PERF #930](https://github.com/rust-lang/regex/pull/930): + Optimize `replacen`. This also applies to `replace`, but not `replace_all`. + +Bug fixes: + +* [BUG #945](https://github.com/rust-lang/regex/issues/945): + Maybe fix rustdoc rendering by just bumping a new release? + + +1.7.0 (2022-11-05) +================== +This release principally includes an upgrade to Unicode 15. + +New features: + +* [FEATURE #832](https://github.com/rust-lang/regex/issues/916): + Upgrade to Unicode 15. + + +1.6.0 (2022-07-05) +================== +This release principally includes an upgrade to Unicode 14. + +New features: + +* [FEATURE #832](https://github.com/rust-lang/regex/pull/832): + Clarify that `Captures::len` includes all groups, not just matching groups. +* [FEATURE #857](https://github.com/rust-lang/regex/pull/857): + Add an `ExactSizeIterator` impl for `SubCaptureMatches`. +* [FEATURE #861](https://github.com/rust-lang/regex/pull/861): + Improve `RegexSet` documentation examples. +* [FEATURE #877](https://github.com/rust-lang/regex/issues/877): + Upgrade to Unicode 14. + +Bug fixes: + +* [BUG #792](https://github.com/rust-lang/regex/issues/792): + Fix error message rendering bug. + + +1.5.6 (2022-05-20) +================== +This release includes a few bug fixes, including a bug that produced incorrect +matches when a non-greedy `?` operator was used. + +* [BUG #680](https://github.com/rust-lang/regex/issues/680): + Fixes a bug where `[[:alnum:][:^ascii:]]` dropped `[:alnum:]` from the class. +* [BUG #859](https://github.com/rust-lang/regex/issues/859): + Fixes a bug where `Hir::is_match_empty` returned `false` for `\b`. +* [BUG #862](https://github.com/rust-lang/regex/issues/862): + Fixes a bug where 'ab??' matches 'ab' instead of 'a' in 'ab'. + + +1.5.5 (2022-03-08) +================== +This releases fixes a security bug in the regex compiler. This bug permits a +vector for a denial-of-service attack in cases where the regex being compiled +is untrusted. There are no known problems where the regex is itself trusted, +including in cases of untrusted haystacks. + +* [SECURITY #GHSA-m5pq-gvj9-9vr8](https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8): + Fixes a bug in the regex compiler where empty sub-expressions subverted the + existing mitigations in place to enforce a size limit on compiled regexes. + The Rust Security Response WG published an advisory about this: + https://groups.google.com/g/rustlang-security-announcements/c/NcNNL1Jq7Yw + + +1.5.4 (2021-05-06) +================== +This release fixes another compilation failure when building regex. This time, +the fix is for when the `pattern` feature is enabled, which only works on +nightly Rust. CI has been updated to test this case. + +* [BUG #772](https://github.com/rust-lang/regex/pull/772): + Fix build when `pattern` feature is enabled. + + +1.5.3 (2021-05-01) +================== +This releases fixes a bug when building regex with only the `unicode-perl` +feature. It turns out that while CI was building this configuration, it wasn't +actually failing the overall build on a failed compilation. + +* [BUG #769](https://github.com/rust-lang/regex/issues/769): + Fix build in `regex-syntax` when only the `unicode-perl` feature is enabled. + + +1.5.2 (2021-05-01) +================== +This release fixes a performance bug when Unicode word boundaries are used. +Namely, for certain regexes on certain inputs, it's possible for the lazy DFA +to stop searching (causing a fallback to a slower engine) when it doesn't +actually need to. + +[PR #768](https://github.com/rust-lang/regex/pull/768) fixes the bug, which was +originally reported in +[ripgrep#1860](https://github.com/BurntSushi/ripgrep/issues/1860). + + +1.5.1 (2021-04-30) +================== +This is a patch release that fixes a compilation error when the `perf-literal` +feature is not enabled. + + +1.5.0 (2021-04-30) +================== +This release primarily updates to Rust 2018 (finally) and bumps the MSRV to +Rust 1.41 (from Rust 1.28). Rust 1.41 was chosen because it's still reasonably +old, and is what's in Debian stable at the time of writing. + +This release also drops this crate's own bespoke substring search algorithms +in favor of a new +[`memmem` implementation provided by the `memchr` crate](https://docs.rs/memchr/2.4.0/memchr/memmem/index.html). +This will change the performance profile of some regexes, sometimes getting a +little worse, and hopefully more frequently, getting a lot better. Please +report any serious performance regressions if you find them. + + +1.4.6 (2021-04-22) +================== +This is a small patch release that fixes the compiler's size check on how much +heap memory a regex uses. Previously, the compiler did not account for the +heap usage of Unicode character classes. Now it does. It's possible that this +may make some regexes fail to compile that previously did compile. If that +happens, please file an issue. + +* [BUG OSS-fuzz#33579](https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=33579): + Some regexes can use more heap memory than one would expect. + + +1.4.5 (2021-03-14) +================== +This is a small patch release that fixes a regression in the size of a `Regex` +in the 1.4.4 release. Prior to 1.4.4, a `Regex` was 552 bytes. In the 1.4.4 +release, it was 856 bytes due to internal changes. In this release, a `Regex` +is now 16 bytes. In general, the size of a `Regex` was never something that was +on my radar, but this increased size in the 1.4.4 release seems to have crossed +a threshold and resulted in stack overflows in some programs. + +* [BUG #750](https://github.com/rust-lang/regex/pull/750): + Fixes stack overflows seemingly caused by a large `Regex` size by decreasing + its size. + + +1.4.4 (2021-03-11) +================== +This is a small patch release that contains some bug fixes. Notably, it also +drops the `thread_local` (and `lazy_static`, via transitivity) dependencies. + +Bug fixes: + +* [BUG #362](https://github.com/rust-lang/regex/pull/362): + Memory leaks caused by an internal caching strategy should now be fixed. +* [BUG #576](https://github.com/rust-lang/regex/pull/576): + All regex types now implement `UnwindSafe` and `RefUnwindSafe`. +* [BUG #728](https://github.com/rust-lang/regex/pull/749): + Add missing `Replacer` impls for `Vec<u8>`, `String`, `Cow`, etc. + + +1.4.3 (2021-01-08) +================== +This is a small patch release that adds some missing standard trait +implementations for some types in the public API. + +Bug fixes: + +* [BUG #734](https://github.com/rust-lang/regex/pull/734): + Add `FusedIterator` and `ExactSizeIterator` impls to iterator types. +* [BUG #735](https://github.com/rust-lang/regex/pull/735): + Add missing `Debug` impls to public API types. + + +1.4.2 (2020-11-01) +================== +This is a small bug fix release that bans `\P{any}`. We previously banned empty +classes like `[^\w\W]`, but missed the `\P{any}` case. In the future, we hope +to permit empty classes. + +* [BUG #722](https://github.com/rust-lang/regex/issues/722): + Ban `\P{any}` to avoid a panic in the regex compiler. Found by OSS-Fuzz. + + +1.4.1 (2020-10-13) +================== +This is a small bug fix release that makes `\p{cf}` work. Previously, it would +report "property not found" even though `cf` is a valid abbreviation for the +`Format` general category. + +* [BUG #719](https://github.com/rust-lang/regex/issues/719): + Fixes bug that prevented `\p{cf}` from working. + + +1.4.0 (2020-10-11) +================== +This releases has a few minor documentation fixes as well as some very minor +API additions. The MSRV remains at Rust 1.28 for now, but this is intended to +increase to at least Rust 1.41.1 soon. + +This release also adds support for OSS-Fuzz. Kudos to +[@DavidKorczynski](https://github.com/DavidKorczynski) +for doing the heavy lifting for that! + +New features: + +* [FEATURE #649](https://github.com/rust-lang/regex/issues/649): + Support `[`, `]` and `.` in capture group names. +* [FEATURE #687](https://github.com/rust-lang/regex/issues/687): + Add `is_empty` predicate to `RegexSet`. +* [FEATURE #689](https://github.com/rust-lang/regex/issues/689): + Implement `Clone` for `SubCaptureMatches`. +* [FEATURE #715](https://github.com/rust-lang/regex/issues/715): + Add `empty` constructor to `RegexSet` for convenience. + +Bug fixes: + +* [BUG #694](https://github.com/rust-lang/regex/issues/694): + Fix doc example for `Replacer::replace_append`. +* [BUG #698](https://github.com/rust-lang/regex/issues/698): + Clarify docs for `s` flag when using a `bytes::Regex`. +* [BUG #711](https://github.com/rust-lang/regex/issues/711): + Clarify `is_match` docs to indicate that it can match anywhere in string. + + +1.3.9 (2020-05-28) +================== +This release fixes a MSRV (Minimum Support Rust Version) regression in the +1.3.8 release. Namely, while 1.3.8 compiles on Rust 1.28, it actually does not +compile on other Rust versions, such as Rust 1.39. + +Bug fixes: + +* [BUG #685](https://github.com/rust-lang/regex/issues/685): + Remove use of `doc_comment` crate, which cannot be used before Rust 1.43. + + +1.3.8 (2020-05-28) +================== +This release contains a couple of important bug fixes driven +by better support for empty-subexpressions in regexes. For +example, regexes like `b|` are now allowed. Major thanks to +[@sliquister](https://github.com/sliquister) for implementing support for this +in [#677](https://github.com/rust-lang/regex/pull/677). + +Bug fixes: + +* [BUG #523](https://github.com/rust-lang/regex/pull/523): + Add note to documentation that spaces can be escaped in `x` mode. +* [BUG #524](https://github.com/rust-lang/regex/issues/524): + Add support for empty sub-expressions, including empty alternations. +* [BUG #659](https://github.com/rust-lang/regex/issues/659): + Fix match bug caused by an empty sub-expression miscompilation. + + +1.3.7 (2020-04-17) +================== +This release contains a small bug fix that fixes how `regex` forwards crate +features to `regex-syntax`. In particular, this will reduce recompilations in +some cases. + +Bug fixes: + +* [BUG #665](https://github.com/rust-lang/regex/pull/665): + Fix feature forwarding to `regex-syntax`. + + +1.3.6 (2020-03-24) +================== +This release contains a sizable (~30%) performance improvement when compiling +some kinds of large regular expressions. + +Performance improvements: + +* [PERF #657](https://github.com/rust-lang/regex/pull/657): + Improvement performance of compiling large regular expressions. + + +1.3.5 (2020-03-12) +================== +This release updates this crate to Unicode 13. + +New features: + +* [FEATURE #653](https://github.com/rust-lang/regex/pull/653): + Update `regex-syntax` to Unicode 13. + + +1.3.4 (2020-01-30) +================== +This is a small bug fix release that fixes a bug related to the scoping of +flags in a regex. Namely, before this fix, a regex like `((?i)a)b)` would +match `aB` despite the fact that `b` should not be matched case insensitively. + +Bug fixes: + +* [BUG #640](https://github.com/rust-lang/regex/issues/640): + Fix bug related to the scoping of flags in a regex. + + +1.3.3 (2020-01-09) +================== +This is a small maintenance release that upgrades the dependency on +`thread_local` from `0.3` to `1.0`. The minimum supported Rust version remains +at Rust 1.28. + + +1.3.2 (2020-01-09) +================== +This is a small maintenance release with some house cleaning and bug fixes. + +New features: + +* [FEATURE #631](https://github.com/rust-lang/regex/issues/631): + Add a `Match::range` method an a `From<Match> for Range` impl. + +Bug fixes: + +* [BUG #521](https://github.com/rust-lang/regex/issues/521): + Corrects `/-/.splitn("a", 2)` to return `["a"]` instead of `["a", ""]`. +* [BUG #594](https://github.com/rust-lang/regex/pull/594): + Improve error reporting when writing `\p\`. +* [BUG #627](https://github.com/rust-lang/regex/issues/627): + Corrects `/-/.split("a-")` to return `["a", ""]` instead of `["a"]`. +* [BUG #633](https://github.com/rust-lang/regex/pull/633): + Squash deprecation warnings for the `std::error::Error::description` method. + + +1.3.1 (2019-09-04) +================== +This is a maintenance release with no changes in order to try to work-around +a [docs.rs/Cargo issue](https://github.com/rust-lang/docs.rs/issues/400). + + +1.3.0 (2019-09-03) +================== +This release adds a plethora of new crate features that permit users of regex +to shrink its size considerably, in exchange for giving up either functionality +(such as Unicode support) or runtime performance. When all such features are +disabled, the dependency tree for `regex` shrinks to exactly 1 crate +(`regex-syntax`). More information about the new crate features can be +[found in the docs](https://docs.rs/regex/*/#crate-features). + +Note that while this is a new minor version release, the minimum supported +Rust version for this crate remains at `1.28.0`. + +New features: + +* [FEATURE #474](https://github.com/rust-lang/regex/issues/474): + The `use_std` feature has been deprecated in favor of the `std` feature. + The `use_std` feature will be removed in regex 2. Until then, `use_std` will + remain as an alias for the `std` feature. +* [FEATURE #583](https://github.com/rust-lang/regex/issues/583): + Add a substantial number of crate features shrinking `regex`. + + +1.2.1 (2019-08-03) +================== +This release does a bit of house cleaning. Namely: + +* This repository is now using rustfmt. +* License headers have been removed from all files, in following suit with the + Rust project. +* Teddy has been removed from the `regex` crate, and is now part of the + `aho-corasick` crate. + [See `aho-corasick`'s new `packed` sub-module for details](https://docs.rs/aho-corasick/0.7.6/aho_corasick/packed/index.html). +* The `utf8-ranges` crate has been deprecated, with its functionality moving + into the + [`utf8` sub-module of `regex-syntax`](https://docs.rs/regex-syntax/0.6.11/regex_syntax/utf8/index.html). +* The `ucd-util` dependency has been dropped, in favor of implementing what + little we need inside of `regex-syntax` itself. + +In general, this is part of an ongoing (long term) effort to make optimizations +in the regex engine easier to reason about. The current code is too convoluted +and thus it is very easy to introduce new bugs. This simplification effort is +the primary motivation behind re-working the `aho-corasick` crate to not only +bundle algorithms like Teddy, but to also provide regex-like match semantics +automatically. + +Moving forward, the plan is to join up with the `bstr` and `regex-automata` +crates, with the former providing more sophisticated substring search +algorithms (thereby deleting existing code in `regex`) and the latter providing +ahead-of-time compiled DFAs for cases where they are inexpensive to compute. + + +1.2.0 (2019-07-20) +================== +This release updates regex's minimum supported Rust version to 1.28, which was +release almost 1 year ago. This release also updates regex's Unicode data +tables to 12.1.0. + + +1.1.9 (2019-07-06) +================== +This release contains a bug fix that caused regex's tests to fail, due to a +dependency on an unreleased behavior in regex-syntax. + +* [BUG #593](https://github.com/rust-lang/regex/issues/593): + Move an integration-style test on error messages into regex-syntax. + + +1.1.8 (2019-07-04) +================== +This release contains a few small internal refactorings. One of which fixes +an instance of undefined behavior in a part of the SIMD code. + +Bug fixes: + +* [BUG #545](https://github.com/rust-lang/regex/issues/545): + Improves error messages when a repetition operator is used without a number. +* [BUG #588](https://github.com/rust-lang/regex/issues/588): + Removes use of a repr(Rust) union used for type punning in the Teddy matcher. +* [BUG #591](https://github.com/rust-lang/regex/issues/591): + Update docs for running benchmarks and improve failure modes. + + +1.1.7 (2019-06-09) +================== +This release fixes up a few warnings as a result of recent deprecations. + + +1.1.6 (2019-04-16) +================== +This release fixes a regression introduced by a bug fix (for +[BUG #557](https://github.com/rust-lang/regex/issues/557)) which could cause +the regex engine to enter an infinite loop. This bug was originally +[reported against ripgrep](https://github.com/BurntSushi/ripgrep/issues/1247). + + +1.1.5 (2019-04-01) +================== +This release fixes a bug in regex's dependency specification where it requires +a newer version of regex-syntax, but this wasn't communicated correctly in the +Cargo.toml. This would have been caught by a minimal version check, but this +check was disabled because the `rand` crate itself advertises incorrect +dependency specifications. + +Bug fixes: + +* [BUG #570](https://github.com/rust-lang/regex/pull/570): + Fix regex-syntax minimal version. + + +1.1.4 (2019-03-31) +================== +This release fixes a backwards compatibility regression where Regex was no +longer UnwindSafe. This was caused by the upgrade to aho-corasick 0.7, whose +AhoCorasick type was itself not UnwindSafe. This has been fixed in aho-corasick +0.7.4, which we now require. + +Bug fixes: + +* [BUG #568](https://github.com/rust-lang/regex/pull/568): + Fix an API regression where Regex was no longer UnwindSafe. + + +1.1.3 (2019-03-30) +================== +This releases fixes a few bugs and adds a performance improvement when a regex +is a simple alternation of literals. + +Performance improvements: + +* [OPT #566](https://github.com/rust-lang/regex/pull/566): + Upgrades `aho-corasick` to 0.7 and uses it for `foo|bar|...|quux` regexes. + +Bug fixes: + +* [BUG #527](https://github.com/rust-lang/regex/issues/527): + Fix a bug where the parser would panic on patterns like `((?x))`. +* [BUG #555](https://github.com/rust-lang/regex/issues/555): + Fix a bug where the parser would panic on patterns like `(?m){1,1}`. +* [BUG #557](https://github.com/rust-lang/regex/issues/557): + Fix a bug where captures could lead to an incorrect match. + + +1.1.2 (2019-02-27) +================== +This release fixes a bug found in the fix introduced in 1.1.1. + +Bug fixes: + +* [BUG edf45e6f](https://github.com/rust-lang/regex/commit/edf45e6f): + Fix bug introduced in reverse suffix literal matcher in the 1.1.1 release. + + +1.1.1 (2019-02-27) +================== +This is a small release with one fix for a bug caused by literal optimizations. + +Bug fixes: + +* [BUG 661bf53d](https://github.com/rust-lang/regex/commit/661bf53d): + Fixes a bug in the reverse suffix literal optimization. This was originally + reported + [against ripgrep](https://github.com/BurntSushi/ripgrep/issues/1203). + + +1.1.0 (2018-11-30) +================== +This is a small release with a couple small enhancements. This release also +increases the minimal supported Rust version (MSRV) to 1.24.1 (from 1.20.0). In +accordance with this crate's MSRV policy, this release bumps the minor version +number. + +Performance improvements: + +* [OPT #511](https://github.com/rust-lang/regex/pull/511), + [OPT #540](https://github.com/rust-lang/regex/pull/540): + Improve lazy DFA construction for large regex sets. + +New features: + +* [FEATURE #538](https://github.com/rust-lang/regex/pull/538): + Add Emoji and "break" Unicode properties. See [UNICODE.md](UNICODE.md). + +Bug fixes: + +* [BUG #530](https://github.com/rust-lang/regex/pull/530): + Add Unicode license (for data tables). +* Various typo/doc fixups. + + +1.0.6 (2018-11-06) +================== +This is a small release. + +Performance improvements: + +* [OPT #513](https://github.com/rust-lang/regex/pull/513): + Improve performance of compiling large Unicode classes by 8-10%. + +Bug fixes: + +* [BUG #533](https://github.com/rust-lang/regex/issues/533): + Fix definition of `[[:blank:]]` class that regressed in `regex-syntax 0.5`. + + +1.0.5 (2018-09-06) +================== +This is a small release with an API enhancement. + +New features: + +* [FEATURE #509](https://github.com/rust-lang/regex/pull/509): + Generalize impls of the `Replacer` trait. + + +1.0.4 (2018-08-25) +================== +This is a small release that bumps the quickcheck dependency. + + +1.0.3 (2018-08-24) +================== +This is a small bug fix release. + +Bug fixes: + +* [BUG #504](https://github.com/rust-lang/regex/pull/504): + Fix for Cargo's "minimal version" support. +* [BUG 1e39165f](https://github.com/rust-lang/regex/commit/1e39165f): + Fix doc examples for byte regexes. + + +1.0.2 (2018-07-18) +================== +This release exposes some new lower level APIs on `Regex` that permit +amortizing allocation and controlling the location at which a search is +performed in a more granular way. Most users of the regex crate will not +need or want to use these APIs. + +New features: + +* [FEATURE #493](https://github.com/rust-lang/regex/pull/493): + Add a few lower level APIs for amortizing allocation and more fine grained + searching. + +Bug fixes: + +* [BUG 3981d2ad](https://github.com/rust-lang/regex/commit/3981d2ad): + Correct outdated documentation on `RegexBuilder::dot_matches_new_line`. +* [BUG 7ebe4ae0](https://github.com/rust-lang/regex/commit/7ebe4ae0): + Correct outdated documentation on `Parser::allow_invalid_utf8` in the + `regex-syntax` crate. +* [BUG 24c7770b](https://github.com/rust-lang/regex/commit/24c7770b): + Fix a bug in the HIR printer where it wouldn't correctly escape meta + characters in character classes. + + +1.0.1 (2018-06-19) +================== +This release upgrades regex's Unicode tables to Unicode 11, and enables SIMD +optimizations automatically on Rust stable (1.27 or newer). + +New features: + +* [FEATURE #486](https://github.com/rust-lang/regex/pull/486): + Implement `size_hint` on `RegexSet` match iterators. +* [FEATURE #488](https://github.com/rust-lang/regex/pull/488): + Update Unicode tables for Unicode 11. +* [FEATURE #490](https://github.com/rust-lang/regex/pull/490): + SIMD optimizations are now enabled automatically in Rust stable, for versions + 1.27 and up. No compilation flags or features need to be set. CPU support + SIMD is detected automatically at runtime. + +Bug fixes: + +* [BUG #482](https://github.com/rust-lang/regex/pull/482): + Present a better compilation error when the `use_std` feature isn't used. + + +1.0.0 (2018-05-01) +================== +This release marks the 1.0 release of regex. + +While this release includes some breaking changes, most users of older versions +of the regex library should be able to migrate to 1.0 by simply bumping the +version number. The important changes are as follows: + +* We adopt Rust 1.20 as the new minimum supported version of Rust for regex. + We also tentativley adopt a policy that permits bumping the minimum supported + version of Rust in minor version releases of regex, but no patch releases. + That is, with respect to semver, we do not strictly consider bumping the + minimum version of Rust to be a breaking change, but adopt a conservative + stance as a compromise. +* Octal syntax in regular expressions has been disabled by default. This + permits better error messages that inform users that backreferences aren't + available. Octal syntax can be re-enabled via the corresponding option on + `RegexBuilder`. +* `(?-u:\B)` is no longer allowed in Unicode regexes since it can match at + invalid UTF-8 code unit boundaries. `(?-u:\b)` is still allowed in Unicode + regexes. +* The `From<regex_syntax::Error>` impl has been removed. This formally removes + the public dependency on `regex-syntax`. +* A new feature, `use_std`, has been added and enabled by default. Disabling + the feature will result in a compilation error. In the future, this may + permit us to support `no_std` environments (w/ `alloc`) in a backwards + compatible way. + +For more information and discussion, please see +[1.0 release tracking issue](https://github.com/rust-lang/regex/issues/457). + + +0.2.11 (2018-05-01) +=================== +This release primarily contains bug fixes. Some of them resolve bugs where +the parser could panic. + +New features: + +* [FEATURE #459](https://github.com/rust-lang/regex/pull/459): + Include C++'s standard regex library and Boost's regex library in the + benchmark harness. We now include D/libphobos, C++/std, C++/boost, Oniguruma, + PCRE1, PCRE2, RE2 and Tcl in the harness. + +Bug fixes: + +* [BUG #445](https://github.com/rust-lang/regex/issues/445): + Clarify order of indices returned by RegexSet match iterator. +* [BUG #461](https://github.com/rust-lang/regex/issues/461): + Improve error messages for invalid regexes like `[\d-a]`. +* [BUG #464](https://github.com/rust-lang/regex/issues/464): + Fix a bug in the error message pretty printer that could cause a panic when + a regex contained a literal `\n` character. +* [BUG #465](https://github.com/rust-lang/regex/issues/465): + Fix a panic in the parser that was caused by applying a repetition operator + to `(?flags)`. +* [BUG #466](https://github.com/rust-lang/regex/issues/466): + Fix a bug where `\pC` was not recognized as an alias for `\p{Other}`. +* [BUG #470](https://github.com/rust-lang/regex/pull/470): + Fix a bug where literal searches did more work than necessary for anchored + regexes. + + +0.2.10 (2018-03-16) +=================== +This release primarily updates the regex crate to changes made in `std::arch` +on nightly Rust. + +New features: + +* [FEATURE #458](https://github.com/rust-lang/regex/pull/458): + The `Hir` type in `regex-syntax` now has a printer. + + +0.2.9 (2018-03-12) +================== +This release introduces a new nightly only feature, `unstable`, which enables +SIMD optimizations for certain types of regexes. No additional compile time +options are necessary, and the regex crate will automatically choose the +best CPU features at run time. As a result, the `simd` (nightly only) crate +dependency has been dropped. + +New features: + +* [FEATURE #456](https://github.com/rust-lang/regex/pull/456): + The regex crate now includes AVX2 optimizations in addition to the extant + SSSE3 optimization. + +Bug fixes: + +* [BUG #455](https://github.com/rust-lang/regex/pull/455): + Fix a bug where `(?x)[ / - ]` failed to parse. + + +0.2.8 (2018-03-12) +================== +Bug gixes: + +* [BUG #454](https://github.com/rust-lang/regex/pull/454): + Fix a bug in the nest limit checker being too aggressive. + + +0.2.7 (2018-03-07) +================== +This release includes a ground-up rewrite of the regex-syntax crate, which has +been in development for over a year. +731 +New features: + +* Error messages for invalid regexes have been greatly improved. You get these + automatically; you don't need to do anything. In addition to better + formatting, error messages will now explicitly call out the use of look + around. When regex 1.0 is released, this will happen for backreferences as + well. +* Full support for intersection, difference and symmetric difference of + character classes. These can be used via the `&&`, `--` and `~~` binary + operators within classes. +* A Unicode Level 1 conformat implementation of `\p{..}` character classes. + Things like `\p{scx:Hira}`, `\p{age:3.2}` or `\p{Changes_When_Casefolded}` + now work. All property name and value aliases are supported, and properties + are selected via loose matching. e.g., `\p{Greek}` is the same as + `\p{G r E e K}`. +* A new `UNICODE.md` document has been added to this repository that + exhaustively documents support for UTS#18. +* Empty sub-expressions are now permitted in most places. That is, `()+` is + now a valid regex. +* Almost everything in regex-syntax now uses constant stack space, even when + performing analysis that requires structural induction. This reduces the risk + of a user provided regular expression causing a stack overflow. +* [FEATURE #174](https://github.com/rust-lang/regex/issues/174): + The `Ast` type in `regex-syntax` now contains span information. +* [FEATURE #424](https://github.com/rust-lang/regex/issues/424): + Support `\u`, `\u{...}`, `\U` and `\U{...}` syntax for specifying code points + in a regular expression. +* [FEATURE #449](https://github.com/rust-lang/regex/pull/449): + Add a `Replace::by_ref` adapter for use of a replacer without consuming it. + +Bug fixes: + +* [BUG #446](https://github.com/rust-lang/regex/issues/446): + We re-enable the Boyer-Moore literal matcher. + + +0.2.6 (2018-02-08) +================== +Bug fixes: + +* [BUG #446](https://github.com/rust-lang/regex/issues/446): + Fixes a bug in the new Boyer-Moore searcher that results in a match failure. + We fix this bug by temporarily disabling Boyer-Moore. + + +0.2.5 (2017-12-30) +================== +Bug fixes: + +* [BUG #437](https://github.com/rust-lang/regex/issues/437): + Fixes a bug in the new Boyer-Moore searcher that results in a panic. + + +0.2.4 (2017-12-30) +================== +New features: + +* [FEATURE #348](https://github.com/rust-lang/regex/pull/348): + Improve performance for capture searches on anchored regex. + (Contributed by @ethanpailes. Nice work!) +* [FEATURE #419](https://github.com/rust-lang/regex/pull/419): + Expand literal searching to include Tuned Boyer-Moore in some cases. + (Contributed by @ethanpailes. Nice work!) + +Bug fixes: + +* [BUG](https://github.com/rust-lang/regex/pull/436): + The regex compiler plugin has been removed. +* [BUG](https://github.com/rust-lang/regex/pull/436): + `simd` has been bumped to `0.2.1`, which fixes a Rust nightly build error. +* [BUG](https://github.com/rust-lang/regex/pull/436): + Bring the benchmark harness up to date. + + +0.2.3 (2017-11-30) +================== +New features: + +* [FEATURE #374](https://github.com/rust-lang/regex/pull/374): + Add `impl From<Match> for &str`. +* [FEATURE #380](https://github.com/rust-lang/regex/pull/380): + Derive `Clone` and `PartialEq` on `Error`. +* [FEATURE #400](https://github.com/rust-lang/regex/pull/400): + Update to Unicode 10. + +Bug fixes: + +* [BUG #375](https://github.com/rust-lang/regex/issues/375): + Fix a bug that prevented the bounded backtracker from terminating. +* [BUG #393](https://github.com/rust-lang/regex/issues/393), + [BUG #394](https://github.com/rust-lang/regex/issues/394): + Fix bug with `replace` methods for empty matches. + + +0.2.2 (2017-05-21) +================== +New features: + +* [FEATURE #341](https://github.com/rust-lang/regex/issues/341): + Support nested character classes and intersection operation. + For example, `[\p{Greek}&&\pL]` matches greek letters and + `[[0-9]&&[^4]]` matches every decimal digit except `4`. + (Much thanks to @robinst, who contributed this awesome feature.) + +Bug fixes: + +* [BUG #321](https://github.com/rust-lang/regex/issues/321): + Fix bug in literal extraction and UTF-8 decoding. +* [BUG #326](https://github.com/rust-lang/regex/issues/326): + Add documentation tip about the `(?x)` flag. +* [BUG #333](https://github.com/rust-lang/regex/issues/333): + Show additional replacement example using curly braces. +* [BUG #334](https://github.com/rust-lang/regex/issues/334): + Fix bug when resolving captures after a match. +* [BUG #338](https://github.com/rust-lang/regex/issues/338): + Add example that uses `Captures::get` to API documentation. +* [BUG #353](https://github.com/rust-lang/regex/issues/353): + Fix RegexSet bug that caused match failure in some cases. +* [BUG #354](https://github.com/rust-lang/regex/pull/354): + Fix panic in parser when `(?x)` is used. +* [BUG #358](https://github.com/rust-lang/regex/issues/358): + Fix literal optimization bug with RegexSet. +* [BUG #359](https://github.com/rust-lang/regex/issues/359): + Fix example code in README. +* [BUG #365](https://github.com/rust-lang/regex/pull/365): + Fix bug in `rure_captures_len` in the C binding. +* [BUG #367](https://github.com/rust-lang/regex/issues/367): + Fix byte class bug that caused a panic. + + +0.2.1 +===== +One major bug with `replace_all` has been fixed along with a couple of other +touchups. + +* [BUG #312](https://github.com/rust-lang/regex/issues/312): + Fix documentation for `NoExpand` to reference correct lifetime parameter. +* [BUG #314](https://github.com/rust-lang/regex/issues/314): + Fix a bug with `replace_all` when replacing a match with the empty string. +* [BUG #316](https://github.com/rust-lang/regex/issues/316): + Note a missing breaking change from the `0.2.0` CHANGELOG entry. + (`RegexBuilder::compile` was renamed to `RegexBuilder::build`.) +* [BUG #324](https://github.com/rust-lang/regex/issues/324): + Compiling `regex` should only require one version of `memchr` crate. + + +0.2.0 +===== +This is a new major release of the regex crate, and is an implementation of the +[regex 1.0 RFC](https://github.com/rust-lang/rfcs/blob/master/text/1620-regex-1.0.md). +We are releasing a `0.2` first, and if there are no major problems, we will +release a `1.0` shortly. For `0.2`, the minimum *supported* Rust version is +1.12. + +There are a number of **breaking changes** in `0.2`. They are split into two +types. The first type correspond to breaking changes in regular expression +syntax. The second type correspond to breaking changes in the API. + +Breaking changes for regex syntax: + +* POSIX character classes now require double bracketing. Previously, the regex + `[:upper:]` would parse as the `upper` POSIX character class. Now it parses + as the character class containing the characters `:upper:`. The fix to this + change is to use `[[:upper:]]` instead. Note that variants like + `[[:upper:][:blank:]]` continue to work. +* The character `[` must always be escaped inside a character class. +* The characters `&`, `-` and `~` must be escaped if any one of them are + repeated consecutively. For example, `[&]`, `[\&]`, `[\&\&]`, `[&-&]` are all + equivalent while `[&&]` is illegal. (The motivation for this and the prior + change is to provide a backwards compatible path for adding character class + set notation.) +* A `bytes::Regex` now has Unicode mode enabled by default (like the main + `Regex` type). This means regexes compiled with `bytes::Regex::new` that + don't have the Unicode flag set should add `(?-u)` to recover the original + behavior. + +Breaking changes for the regex API: + +* `find` and `find_iter` now **return `Match` values instead of + `(usize, usize)`.** `Match` values have `start` and `end` methods, which + return the match offsets. `Match` values also have an `as_str` method, + which returns the text of the match itself. +* The `Captures` type now only provides a single iterator over all capturing + matches, which should replace uses of `iter` and `iter_pos`. Uses of + `iter_named` should use the `capture_names` method on `Regex`. +* The `at` method on the `Captures` type has been renamed to `get`, and it + now returns a `Match`. Similarly, the `name` method on `Captures` now returns + a `Match`. +* The `replace` methods now return `Cow` values. The `Cow::Borrowed` variant + is returned when no replacements are made. +* The `Replacer` trait has been completely overhauled. This should only + impact clients that implement this trait explicitly. Standard uses of + the `replace` methods should continue to work unchanged. If you implement + the `Replacer` trait, please consult the new documentation. +* The `quote` free function has been renamed to `escape`. +* The `Regex::with_size_limit` method has been removed. It is replaced by + `RegexBuilder::size_limit`. +* The `RegexBuilder` type has switched from owned `self` method receivers to + `&mut self` method receivers. Most uses will continue to work unchanged, but + some code may require naming an intermediate variable to hold the builder. +* The `compile` method on `RegexBuilder` has been renamed to `build`. +* The free `is_match` function has been removed. It is replaced by compiling + a `Regex` and calling its `is_match` method. +* The `PartialEq` and `Eq` impls on `Regex` have been dropped. If you relied + on these impls, the fix is to define a wrapper type around `Regex`, impl + `Deref` on it and provide the necessary impls. +* The `is_empty` method on `Captures` has been removed. This always returns + `false`, so its use is superfluous. +* The `Syntax` variant of the `Error` type now contains a string instead of + a `regex_syntax::Error`. If you were examining syntax errors more closely, + you'll need to explicitly use the `regex_syntax` crate to re-parse the regex. +* The `InvalidSet` variant of the `Error` type has been removed since it is + no longer used. +* Most of the iterator types have been renamed to match conventions. If you + were using these iterator types explicitly, please consult the documentation + for its new name. For example, `RegexSplits` has been renamed to `Split`. + +A number of bugs have been fixed: + +* [BUG #151](https://github.com/rust-lang/regex/issues/151): + The `Replacer` trait has been changed to permit the caller to control + allocation. +* [BUG #165](https://github.com/rust-lang/regex/issues/165): + Remove the free `is_match` function. +* [BUG #166](https://github.com/rust-lang/regex/issues/166): + Expose more knobs (available in `0.1`) and remove `with_size_limit`. +* [BUG #168](https://github.com/rust-lang/regex/issues/168): + Iterators produced by `Captures` now have the correct lifetime parameters. +* [BUG #175](https://github.com/rust-lang/regex/issues/175): + Fix a corner case in the parsing of POSIX character classes. +* [BUG #178](https://github.com/rust-lang/regex/issues/178): + Drop the `PartialEq` and `Eq` impls on `Regex`. +* [BUG #179](https://github.com/rust-lang/regex/issues/179): + Remove `is_empty` from `Captures` since it always returns false. +* [BUG #276](https://github.com/rust-lang/regex/issues/276): + Position of named capture can now be retrieved from a `Captures`. +* [BUG #296](https://github.com/rust-lang/regex/issues/296): + Remove winapi/kernel32-sys dependency on UNIX. +* [BUG #307](https://github.com/rust-lang/regex/issues/307): + Fix error on emscripten. + + +0.1.80 +====== +* [PR #292](https://github.com/rust-lang/regex/pull/292): + Fixes bug #291, which was introduced by PR #290. + +0.1.79 +====== +* Require regex-syntax 0.3.8. + +0.1.78 +====== +* [PR #290](https://github.com/rust-lang/regex/pull/290): + Fixes bug #289, which caused some regexes with a certain combination + of literals to match incorrectly. + +0.1.77 +====== +* [PR #281](https://github.com/rust-lang/regex/pull/281): + Fixes bug #280 by disabling all literal optimizations when a pattern + is partially anchored. + +0.1.76 +====== +* Tweak criteria for using the Teddy literal matcher. + +0.1.75 +====== +* [PR #275](https://github.com/rust-lang/regex/pull/275): + Improves match verification performance in the Teddy SIMD searcher. +* [PR #278](https://github.com/rust-lang/regex/pull/278): + Replaces slow substring loop in the Teddy SIMD searcher with Aho-Corasick. +* Implemented DoubleEndedIterator on regex set match iterators. + +0.1.74 +====== +* Release regex-syntax 0.3.5 with a minor bug fix. +* Fix bug #272. +* Fix bug #277. +* [PR #270](https://github.com/rust-lang/regex/pull/270): + Fixes bugs #264, #268 and an unreported where the DFA cache size could be + drastically under estimated in some cases (leading to high unexpected memory + usage). + +0.1.73 +====== +* Release `regex-syntax 0.3.4`. +* Bump `regex-syntax` dependency version for `regex` to `0.3.4`. + +0.1.72 +====== +* [PR #262](https://github.com/rust-lang/regex/pull/262): + Fixes a number of small bugs caught by fuzz testing (AFL). + +0.1.71 +====== +* [PR #236](https://github.com/rust-lang/regex/pull/236): + Fix a bug in how suffix literals were extracted, which could lead + to invalid match behavior in some cases. + +0.1.70 +====== +* [PR #231](https://github.com/rust-lang/regex/pull/231): + Add SIMD accelerated multiple pattern search. +* [PR #228](https://github.com/rust-lang/regex/pull/228): + Reintroduce the reverse suffix literal optimization. +* [PR #226](https://github.com/rust-lang/regex/pull/226): + Implements NFA state compression in the lazy DFA. +* [PR #223](https://github.com/rust-lang/regex/pull/223): + A fully anchored RegexSet can now short-circuit. + +0.1.69 +====== +* [PR #216](https://github.com/rust-lang/regex/pull/216): + Tweak the threshold for running backtracking. +* [PR #217](https://github.com/rust-lang/regex/pull/217): + Add upper limit (from the DFA) to capture search (for the NFA). +* [PR #218](https://github.com/rust-lang/regex/pull/218): + Add rure, a C API. + +0.1.68 +====== +* [PR #210](https://github.com/rust-lang/regex/pull/210): + Fixed a performance bug in `bytes::Regex::replace` where `extend` was used + instead of `extend_from_slice`. +* [PR #211](https://github.com/rust-lang/regex/pull/211): + Fixed a bug in the handling of word boundaries in the DFA. +* [PR #213](https://github.com/rust-lang/pull/213): + Added RE2 and Tcl to the benchmark harness. Also added a CLI utility from + running regexes using any of the following regex engines: PCRE1, PCRE2, + Oniguruma, RE2, Tcl and of course Rust's own regexes. + +0.1.67 +====== +* [PR #201](https://github.com/rust-lang/regex/pull/201): + Fix undefined behavior in the `regex!` compiler plugin macro. +* [PR #205](https://github.com/rust-lang/regex/pull/205): + More improvements to DFA performance. Competitive with RE2. See PR for + benchmarks. +* [PR #209](https://github.com/rust-lang/regex/pull/209): + Release 0.1.66 was semver incompatible since it required a newer version + of Rust than previous releases. This PR fixes that. (And `0.1.66` was + yanked.) + +0.1.66 +====== +* Speculative support for Unicode word boundaries was added to the DFA. This + should remove the last common case that disqualified use of the DFA. +* An optimization that scanned for suffix literals and then matched the regular + expression in reverse was removed because it had worst case quadratic time + complexity. It was replaced with a more limited optimization where, given any + regex of the form `re$`, it will be matched in reverse from the end of the + haystack. +* [PR #202](https://github.com/rust-lang/regex/pull/202): + The inner loop of the DFA was heavily optimized to improve cache locality + and reduce the overall number of instructions run on each iteration. This + represents the first use of `unsafe` in `regex` (to elide bounds checks). +* [PR #200](https://github.com/rust-lang/regex/pull/200): + Use of the `mempool` crate (which used thread local storage) was replaced + with a faster version of a similar API in @Amanieu's `thread_local` crate. + It should reduce contention when using a regex from multiple threads + simultaneously. +* PCRE2 JIT benchmarks were added. A benchmark comparison can be found + [here](https://gist.github.com/anonymous/14683c01993e91689f7206a18675901b). + (Includes a comparison with PCRE1's JIT and Oniguruma.) +* A bug where word boundaries weren't being matched correctly in the DFA was + fixed. This only affected use of `bytes::Regex`. +* [#160](https://github.com/rust-lang/regex/issues/160): + `Captures` now has a `Debug` impl. diff --git a/third_party/rust/regex/Cargo.toml b/third_party/rust/regex/Cargo.toml new file mode 100644 index 0000000000..f4812c7f28 --- /dev/null +++ b/third_party/rust/regex/Cargo.toml @@ -0,0 +1,190 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +rust-version = "1.60.0" +name = "regex" +version = "1.9.4" +authors = [ + "The Rust Project Developers", + "Andrew Gallant <jamslam@gmail.com>", +] +exclude = [ + "/scripts/*", + "/.github/*", +] +autotests = false +description = """ +An implementation of regular expressions for Rust. This implementation uses +finite automata and guarantees linear time matching on all inputs. +""" +homepage = "https://github.com/rust-lang/regex" +documentation = "https://docs.rs/regex" +readme = "README.md" +categories = ["text-processing"] +license = "MIT OR Apache-2.0" +repository = "https://github.com/rust-lang/regex" + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = [ + "--cfg", + "docsrs", +] + +[profile.bench] +debug = 2 + +[profile.dev] +opt-level = 3 +debug = 2 + +[profile.release] +debug = 2 + +[profile.test] +opt-level = 3 +debug = 2 + +[[test]] +name = "integration" +path = "tests/lib.rs" + +[dependencies.aho-corasick] +version = "1.0.0" +optional = true + +[dependencies.memchr] +version = "2.5.0" +optional = true + +[dependencies.regex-automata] +version = "0.3.7" +features = [ + "alloc", + "syntax", + "meta", + "nfa-pikevm", +] +default-features = false + +[dependencies.regex-syntax] +version = "0.7.5" +default-features = false + +[dev-dependencies.anyhow] +version = "1.0.69" + +[dev-dependencies.doc-comment] +version = "0.3" + +[dev-dependencies.env_logger] +version = "0.9.3" +features = [ + "atty", + "humantime", + "termcolor", +] +default-features = false + +[dev-dependencies.once_cell] +version = "1.17.1" + +[dev-dependencies.quickcheck] +version = "1.0.3" +default-features = false + +[dev-dependencies.regex-test] +version = "0.1.0" + +[features] +default = [ + "std", + "perf", + "unicode", + "regex-syntax/default", +] +logging = [ + "aho-corasick?/logging", + "regex-automata/logging", +] +pattern = [] +perf = [ + "perf-cache", + "perf-dfa", + "perf-onepass", + "perf-backtrack", + "perf-inline", + "perf-literal", +] +perf-backtrack = ["regex-automata/nfa-backtrack"] +perf-cache = [] +perf-dfa = ["regex-automata/hybrid"] +perf-dfa-full = [ + "regex-automata/dfa-build", + "regex-automata/dfa-search", +] +perf-inline = ["regex-automata/perf-inline"] +perf-literal = [ + "dep:aho-corasick", + "dep:memchr", + "regex-automata/perf-literal", +] +perf-onepass = ["regex-automata/dfa-onepass"] +std = [ + "aho-corasick?/std", + "memchr?/std", + "regex-automata/std", + "regex-syntax/std", +] +unicode = [ + "unicode-age", + "unicode-bool", + "unicode-case", + "unicode-gencat", + "unicode-perl", + "unicode-script", + "unicode-segment", + "regex-automata/unicode", + "regex-syntax/unicode", +] +unicode-age = [ + "regex-automata/unicode-age", + "regex-syntax/unicode-age", +] +unicode-bool = [ + "regex-automata/unicode-bool", + "regex-syntax/unicode-bool", +] +unicode-case = [ + "regex-automata/unicode-case", + "regex-syntax/unicode-case", +] +unicode-gencat = [ + "regex-automata/unicode-gencat", + "regex-syntax/unicode-gencat", +] +unicode-perl = [ + "regex-automata/unicode-perl", + "regex-automata/unicode-word-boundary", + "regex-syntax/unicode-perl", +] +unicode-script = [ + "regex-automata/unicode-script", + "regex-syntax/unicode-script", +] +unicode-segment = [ + "regex-automata/unicode-segment", + "regex-syntax/unicode-segment", +] +unstable = ["pattern"] +use_std = ["std"] diff --git a/third_party/rust/regex/Cross.toml b/third_party/rust/regex/Cross.toml new file mode 100644 index 0000000000..5415e7a451 --- /dev/null +++ b/third_party/rust/regex/Cross.toml @@ -0,0 +1,7 @@ +[build.env] +passthrough = [ + "RUST_BACKTRACE", + "RUST_LOG", + "REGEX_TEST", + "REGEX_TEST_VERBOSE", +] diff --git a/third_party/rust/regex/HACKING.md b/third_party/rust/regex/HACKING.md new file mode 100644 index 0000000000..34af5b517c --- /dev/null +++ b/third_party/rust/regex/HACKING.md @@ -0,0 +1,341 @@ +Your friendly guide to hacking and navigating the regex library. + +This guide assumes familiarity with Rust and Cargo, and at least a perusal of +the user facing documentation for this crate. + +If you're looking for background on the implementation in this library, then +you can do no better than Russ Cox's article series on implementing regular +expressions using finite automata: https://swtch.com/~rsc/regexp/ + + +## Architecture overview + +As you probably already know, this library executes regular expressions using +finite automata. In particular, a design goal is to make searching linear +with respect to both the regular expression and the text being searched. +Meeting that design goal on its own is not so hard and can be done with an +implementation of the Pike VM (similar to Thompson's construction, but supports +capturing groups), as described in: https://swtch.com/~rsc/regexp/regexp2.html +--- This library contains such an implementation in src/pikevm.rs. + +Making it fast is harder. One of the key problems with the Pike VM is that it +can be in more than one state at any point in time, and must shuffle capture +positions between them. The Pike VM also spends a lot of time following the +same epsilon transitions over and over again. We can employ one trick to +speed up the Pike VM: extract one or more literal prefixes from the regular +expression and execute specialized code to quickly find matches of those +prefixes in the search text. The Pike VM can then be avoided for most the +search, and instead only executed when a prefix is found. The code to find +prefixes is in the regex-syntax crate (in this repository). The code to search +for literals is in src/literals.rs. When more than one literal prefix is found, +we fall back to an Aho-Corasick DFA using the aho-corasick crate. For one +literal, we use a variant of the Boyer-Moore algorithm. Both Aho-Corasick and +Boyer-Moore use `memchr` when appropriate. The Boyer-Moore variant in this +library also uses elementary frequency analysis to choose the right byte to run +`memchr` with. + +Of course, detecting prefix literals can only take us so far. Not all regular +expressions have literal prefixes. To remedy this, we try another approach +to executing the Pike VM: backtracking, whose implementation can be found in +src/backtrack.rs. One reason why backtracking can be faster is that it avoids +excessive shuffling of capture groups. Of course, backtracking is susceptible +to exponential runtimes, so we keep track of every state we've visited to make +sure we never visit it again. This guarantees linear time execution, but we +pay for it with the memory required to track visited states. Because of the +memory requirement, we only use this engine on small search strings *and* small +regular expressions. + +Lastly, the real workhorse of this library is the "lazy" DFA in src/dfa.rs. +It is distinct from the Pike VM in that the DFA is explicitly represented in +memory and is only ever in one state at a time. It is said to be "lazy" because +the DFA is computed as text is searched, where each byte in the search text +results in at most one new DFA state. It is made fast by caching states. DFAs +are susceptible to exponential state blow up (where the worst case is computing +a new state for every input byte, regardless of what's in the state cache). To +avoid using a lot of memory, the lazy DFA uses a bounded cache. Once the cache +is full, it is wiped and state computation starts over again. If the cache is +wiped too frequently, then the DFA gives up and searching falls back to one of +the aforementioned algorithms. + +All of the above matching engines expose precisely the same matching semantics. +This is indeed tested. (See the section below about testing.) + +The following sub-sections describe the rest of the library and how each of the +matching engines are actually used. + +### Parsing + +Regular expressions are parsed using the regex-syntax crate, which is +maintained in this repository. The regex-syntax crate defines an abstract +syntax and provides very detailed error messages when a parse error is +encountered. Parsing is done in a separate crate so that others may benefit +from its existence, and because it is relatively divorced from the rest of the +regex library. + +The regex-syntax crate also provides sophisticated support for extracting +prefix and suffix literals from regular expressions. + +### Compilation + +The compiler is in src/compile.rs. The input to the compiler is some abstract +syntax for a regular expression and the output is a sequence of opcodes that +matching engines use to execute a search. (One can think of matching engines as +mini virtual machines.) The sequence of opcodes is a particular encoding of a +non-deterministic finite automaton. In particular, the opcodes explicitly rely +on epsilon transitions. + +Consider a simple regular expression like `a|b`. Its compiled form looks like +this: + + 000 Save(0) + 001 Split(2, 3) + 002 'a' (goto: 4) + 003 'b' + 004 Save(1) + 005 Match + +The first column is the instruction pointer and the second column is the +instruction. Save instructions indicate that the current position in the input +should be stored in a captured location. Split instructions represent a binary +branch in the program (i.e., epsilon transitions). The instructions `'a'` and +`'b'` indicate that the literal bytes `'a'` or `'b'` should match. + +In older versions of this library, the compilation looked like this: + + 000 Save(0) + 001 Split(2, 3) + 002 'a' + 003 Jump(5) + 004 'b' + 005 Save(1) + 006 Match + +In particular, empty instructions that merely served to move execution from one +point in the program to another were removed. Instead, every instruction has a +`goto` pointer embedded into it. This resulted in a small performance boost for +the Pike VM, because it was one fewer epsilon transition that it had to follow. + +There exist more instructions and they are defined and documented in +src/prog.rs. + +Compilation has several knobs and a few unfortunately complicated invariants. +Namely, the output of compilation can be one of two types of programs: a +program that executes on Unicode scalar values or a program that executes +on raw bytes. In the former case, the matching engine is responsible for +performing UTF-8 decoding and executing instructions using Unicode codepoints. +In the latter case, the program handles UTF-8 decoding implicitly, so that the +matching engine can execute on raw bytes. All matching engines can execute +either Unicode or byte based programs except for the lazy DFA, which requires +byte based programs. In general, both representations were kept because (1) the +lazy DFA requires byte based programs so that states can be encoded in a memory +efficient manner and (2) the Pike VM benefits greatly from inlining Unicode +character classes into fewer instructions as it results in fewer epsilon +transitions. + +N.B. UTF-8 decoding is built into the compiled program by making use of the +utf8-ranges crate. The compiler in this library factors out common suffixes to +reduce the size of huge character classes (e.g., `\pL`). + +A regrettable consequence of this split in instruction sets is we generally +need to compile two programs; one for NFA execution and one for the lazy DFA. + +In fact, it is worse than that: the lazy DFA is not capable of finding the +starting location of a match in a single scan, and must instead execute a +backwards search after finding the end location. To execute a backwards search, +we must have compiled the regular expression *in reverse*. + +This means that every compilation of a regular expression generally results in +three distinct programs. It would be possible to lazily compile the Unicode +program, since it is never needed if (1) the regular expression uses no word +boundary assertions and (2) the caller never asks for sub-capture locations. + +### Execution + +At the time of writing, there are four matching engines in this library: + +1. The Pike VM (supports captures). +2. Bounded backtracking (supports captures). +3. Literal substring or multi-substring search. +4. Lazy DFA (no support for Unicode word boundary assertions). + +Only the first two matching engines are capable of executing every regular +expression program. They also happen to be the slowest, which means we need +some logic that (1) knows various facts about the regular expression and (2) +knows what the caller wants. Using this information, we can determine which +engine (or engines) to use. + +The logic for choosing which engine to execute is in src/exec.rs and is +documented on the Exec type. Exec values contain regular expression Programs +(defined in src/prog.rs), which contain all the necessary tidbits for actually +executing a regular expression on search text. + +For the most part, the execution logic is straight-forward and follows the +limitations of each engine described above pretty faithfully. The hairiest +part of src/exec.rs by far is the execution of the lazy DFA, since it requires +a forwards and backwards search, and then falls back to either the Pike VM or +backtracking if the caller requested capture locations. + +The Exec type also contains mutable scratch space for each type of matching +engine. This scratch space is used during search (for example, for the lazy +DFA, it contains compiled states that are reused on subsequent searches). + +### Programs + +A regular expression program is essentially a sequence of opcodes produced by +the compiler plus various facts about the regular expression (such as whether +it is anchored, its capture names, etc.). + +### The regex! macro + +The `regex!` macro no longer exists. It was developed in a bygone era as a +compiler plugin during the infancy of the regex crate. Back then, then only +matching engine in the crate was the Pike VM. The `regex!` macro was, itself, +also a Pike VM. The only advantages it offered over the dynamic Pike VM that +was built at runtime were the following: + + 1. Syntax checking was done at compile time. Your Rust program wouldn't + compile if your regex didn't compile. + 2. Reduction of overhead that was proportional to the size of the regex. + For the most part, this overhead consisted of heap allocation, which + was nearly eliminated in the compiler plugin. + +The main takeaway here is that the compiler plugin was a marginally faster +version of a slow regex engine. As the regex crate evolved, it grew other regex +engines (DFA, bounded backtracker) and sophisticated literal optimizations. +The regex macro didn't keep pace, and it therefore became (dramatically) slower +than the dynamic engines. The only reason left to use it was for the compile +time guarantee that your regex is correct. Fortunately, Clippy (the Rust lint +tool) has a lint that checks your regular expression validity, which mostly +replaces that use case. + +Additionally, the regex compiler plugin stopped receiving maintenance. Nobody +complained. At that point, it seemed prudent to just remove it. + +Will a compiler plugin be brought back? The future is murky, but there is +definitely an opportunity there to build something that is faster than the +dynamic engines in some cases. But it will be challenging! As of now, there +are no plans to work on this. + + +## Testing + +A key aspect of any mature regex library is its test suite. A subset of the +tests in this library come from Glenn Fowler's AT&T test suite (its online +presence seems gone at the time of writing). The source of the test suite is +located in src/testdata. The scripts/regex-match-tests.py takes the test suite +in src/testdata and generates tests/matches.rs. + +There are also many other manually crafted tests and regression tests in +tests/tests.rs. Some of these tests were taken from RE2. + +The biggest source of complexity in the tests is related to answering this +question: how can we reuse the tests to check all of our matching engines? One +approach would have been to encode every test into some kind of format (like +the AT&T test suite) and code generate tests for each matching engine. The +approach we use in this library is to create a Cargo.toml entry point for each +matching engine we want to test. The entry points are: + +* `tests/test_default.rs` - tests `Regex::new` +* `tests/test_default_bytes.rs` - tests `bytes::Regex::new` +* `tests/test_nfa.rs` - tests `Regex::new`, forced to use the NFA + algorithm on every regex. +* `tests/test_nfa_bytes.rs` - tests `Regex::new`, forced to use the NFA + algorithm on every regex and use *arbitrary* byte based programs. +* `tests/test_nfa_utf8bytes.rs` - tests `Regex::new`, forced to use the NFA + algorithm on every regex and use *UTF-8* byte based programs. +* `tests/test_backtrack.rs` - tests `Regex::new`, forced to use + backtracking on every regex. +* `tests/test_backtrack_bytes.rs` - tests `Regex::new`, forced to use + backtracking on every regex and use *arbitrary* byte based programs. +* `tests/test_backtrack_utf8bytes.rs` - tests `Regex::new`, forced to use + backtracking on every regex and use *UTF-8* byte based programs. +* `tests/test_crates_regex.rs` - tests to make sure that all of the + backends behave in the same way against a number of quickcheck + generated random inputs. These tests need to be enabled through + the `RUST_REGEX_RANDOM_TEST` environment variable (see + below). + +The lazy DFA and pure literal engines are absent from this list because +they cannot be used on every regular expression. Instead, we rely on +`tests/test_dynamic.rs` to test the lazy DFA and literal engines when possible. + +Since the tests are repeated several times, and because `cargo test` runs all +entry points, it can take a while to compile everything. To reduce compile +times slightly, try using `cargo test --test default`, which will only use the +`tests/test_default.rs` entry point. + +The random testing takes quite a while, so it is not enabled by default. +In order to run the random testing you can set the +`RUST_REGEX_RANDOM_TEST` environment variable to anything before +invoking `cargo test`. Note that this variable is inspected at compile +time, so if the tests don't seem to be running, you may need to run +`cargo clean`. + +## Benchmarking + +The benchmarking in this crate is made up of many micro-benchmarks. Currently, +there are two primary sets of benchmarks: the benchmarks that were adopted +at this library's inception (in `bench/src/misc.rs`) and a newer set of +benchmarks meant to test various optimizations. Specifically, the latter set +contain some analysis and are in `bench/src/sherlock.rs`. Also, the latter +set are all executed on the same lengthy input whereas the former benchmarks +are executed on strings of varying length. + +There is also a smattering of benchmarks for parsing and compilation. + +Benchmarks are in a separate crate so that its dependencies can be managed +separately from the main regex crate. + +Benchmarking follows a similarly wonky setup as tests. There are multiple entry +points: + +* `bench_rust.rs` - benchmarks `Regex::new` +* `bench_rust_bytes.rs` benchmarks `bytes::Regex::new` +* `bench_pcre.rs` - benchmarks PCRE +* `bench_onig.rs` - benchmarks Oniguruma + +The PCRE and Oniguruma benchmarks exist as a comparison point to a mature +regular expression library. In general, this regex library compares favorably +(there are even a few benchmarks that PCRE simply runs too slowly on or +outright can't execute at all). I would love to add other regular expression +library benchmarks (especially RE2). + +If you're hacking on one of the matching engines and just want to see +benchmarks, then all you need to run is: + + $ (cd bench && ./run rust) + +If you want to compare your results with older benchmarks, then try: + + $ (cd bench && ./run rust | tee old) + $ ... make it faster + $ (cd bench && ./run rust | tee new) + $ cargo benchcmp old new --improvements + +The `cargo-benchcmp` utility is available here: +https://github.com/BurntSushi/cargo-benchcmp + +The `./bench/run` utility can run benchmarks for PCRE and Oniguruma too. See +`./bench/bench --help`. + +## Dev Docs + +When digging your teeth into the codebase for the first time, the +crate documentation can be a great resource. By default `rustdoc` +will strip out all documentation of private crate members in an +effort to help consumers of the crate focus on the *interface* +without having to concern themselves with the *implementation*. +Normally this is a great thing, but if you want to start hacking +on regex internals it is not what you want. Many of the private members +of this crate are well documented with rustdoc style comments, and +it would be a shame to miss out on the opportunity that presents. +You can generate the private docs with: + +``` +$ rustdoc --crate-name docs src/lib.rs -o target/doc -L target/debug/deps --no-defaults --passes collapse-docs --passes unindent-comments +``` + +Then just point your browser at `target/doc/regex/index.html`. + +See https://github.com/rust-lang/rust/issues/15347 for more info +about generating developer docs for internal use. diff --git a/third_party/rust/regex/LICENSE-APACHE b/third_party/rust/regex/LICENSE-APACHE new file mode 100644 index 0000000000..16fe87b06e --- /dev/null +++ b/third_party/rust/regex/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/third_party/rust/regex/LICENSE-MIT b/third_party/rust/regex/LICENSE-MIT new file mode 100644 index 0000000000..39d4bdb5ac --- /dev/null +++ b/third_party/rust/regex/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2014 The Rust Project Developers + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/third_party/rust/regex/README.md b/third_party/rust/regex/README.md new file mode 100644 index 0000000000..a9d6fcd373 --- /dev/null +++ b/third_party/rust/regex/README.md @@ -0,0 +1,246 @@ +regex +===== +This crate provides routines for searching strings for matches of a [regular +expression] (aka "regex"). The regex syntax supported by this crate is similar +to other regex engines, but it lacks several features that are not known how to +implement efficiently. This includes, but is not limited to, look-around and +backreferences. In exchange, all regex searches in this crate have worst case +`O(m * n)` time complexity, where `m` is proportional to the size of the regex +and `n` is proportional to the size of the string being searched. + +[regular expression]: https://en.wikipedia.org/wiki/Regular_expression + +[![Build status](https://github.com/rust-lang/regex/workflows/ci/badge.svg)](https://github.com/rust-lang/regex/actions) +[![Crates.io](https://img.shields.io/crates/v/regex.svg)](https://crates.io/crates/regex) + +### Documentation + +[Module documentation with examples](https://docs.rs/regex). +The module documentation also includes a comprehensive description of the +syntax supported. + +Documentation with examples for the various matching functions and iterators +can be found on the +[`Regex` type](https://docs.rs/regex/*/regex/struct.Regex.html). + +### Usage + +To bring this crate into your repository, either add `regex` to your +`Cargo.toml`, or run `cargo add regex`. + +Here's a simple example that matches a date in YYYY-MM-DD format and prints the +year, month and day: + +```rust +use regex::Regex; + +fn main() { + let re = Regex::new(r"(?x) +(?P<year>\d{4}) # the year +- +(?P<month>\d{2}) # the month +- +(?P<day>\d{2}) # the day +").unwrap(); + + let caps = re.captures("2010-03-14").unwrap(); + assert_eq!("2010", &caps["year"]); + assert_eq!("03", &caps["month"]); + assert_eq!("14", &caps["day"]); +} +``` + +If you have lots of dates in text that you'd like to iterate over, then it's +easy to adapt the above example with an iterator: + +```rust +use regex::Regex; + +fn main() { + let re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap(); + let hay = "On 2010-03-14, foo happened. On 2014-10-14, bar happened."; + + let mut dates = vec![]; + for (_, [year, month, day]) in re.captures_iter(hay).map(|c| c.extract()) { + dates.push((year, month, day)); + } + assert_eq!(dates, vec![ + ("2010", "03", "14"), + ("2014", "10", "14"), + ]); +} +``` + +### Usage: Avoid compiling the same regex in a loop + +It is an anti-pattern to compile the same regular expression in a loop since +compilation is typically expensive. (It takes anywhere from a few microseconds +to a few **milliseconds** depending on the size of the regex.) Not only is +compilation itself expensive, but this also prevents optimizations that reuse +allocations internally to the matching engines. + +In Rust, it can sometimes be a pain to pass regular expressions around if +they're used from inside a helper function. Instead, we recommend using the +[`once_cell`](https://crates.io/crates/once_cell) crate to ensure that +regular expressions are compiled exactly once. For example: + +```rust +use { + once_cell::sync::Lazy, + regex::Regex, +}; + +fn some_helper_function(haystack: &str) -> bool { + static RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"...").unwrap()); + RE.is_match(haystack) +} + +fn main() { + assert!(some_helper_function("abc")); + assert!(!some_helper_function("ac")); +} +``` + +Specifically, in this example, the regex will be compiled when it is used for +the first time. On subsequent uses, it will reuse the previous compilation. + +### Usage: match regular expressions on `&[u8]` + +The main API of this crate (`regex::Regex`) requires the caller to pass a +`&str` for searching. In Rust, an `&str` is required to be valid UTF-8, which +means the main API can't be used for searching arbitrary bytes. + +To match on arbitrary bytes, use the `regex::bytes::Regex` API. The API is +identical to the main API, except that it takes an `&[u8]` to search on instead +of an `&str`. The `&[u8]` APIs also permit disabling Unicode mode in the regex +even when the pattern would match invalid UTF-8. For example, `(?-u:.)` is +not allowed in `regex::Regex` but is allowed in `regex::bytes::Regex` since +`(?-u:.)` matches any byte except for `\n`. Conversely, `.` will match the +UTF-8 encoding of any Unicode scalar value except for `\n`. + +This example shows how to find all null-terminated strings in a slice of bytes: + +```rust +use regex::bytes::Regex; + +let re = Regex::new(r"(?-u)(?<cstr>[^\x00]+)\x00").unwrap(); +let text = b"foo\xFFbar\x00baz\x00"; + +// Extract all of the strings without the null terminator from each match. +// The unwrap is OK here since a match requires the `cstr` capture to match. +let cstrs: Vec<&[u8]> = + re.captures_iter(text) + .map(|c| c.name("cstr").unwrap().as_bytes()) + .collect(); +assert_eq!(vec![&b"foo\xFFbar"[..], &b"baz"[..]], cstrs); +``` + +Notice here that the `[^\x00]+` will match any *byte* except for `NUL`, +including bytes like `\xFF` which are not valid UTF-8. When using the main API, +`[^\x00]+` would instead match any valid UTF-8 sequence except for `NUL`. + +### Usage: match multiple regular expressions simultaneously + +This demonstrates how to use a `RegexSet` to match multiple (possibly +overlapping) regular expressions in a single scan of the search text: + +```rust +use regex::RegexSet; + +let set = RegexSet::new(&[ + r"\w+", + r"\d+", + r"\pL+", + r"foo", + r"bar", + r"barfoo", + r"foobar", +]).unwrap(); + +// Iterate over and collect all of the matches. +let matches: Vec<_> = set.matches("foobar").into_iter().collect(); +assert_eq!(matches, vec![0, 2, 3, 4, 6]); + +// You can also test whether a particular regex matched: +let matches = set.matches("foobar"); +assert!(!matches.matched(5)); +assert!(matches.matched(6)); +``` + + +### Usage: regex internals as a library + +The [`regex-automata` directory](./regex-automata/) contains a crate that +exposes all of the internal matching engines used by the `regex` crate. The +idea is that the `regex` crate exposes a simple API for 99% of use cases, but +`regex-automata` exposes oodles of customizable behaviors. + +[Documentation for `regex-automata`.](https://docs.rs/regex-automata) + + +### Usage: a regular expression parser + +This repository contains a crate that provides a well tested regular expression +parser, abstract syntax and a high-level intermediate representation for +convenient analysis. It provides no facilities for compilation or execution. +This may be useful if you're implementing your own regex engine or otherwise +need to do analysis on the syntax of a regular expression. It is otherwise not +recommended for general use. + +[Documentation for `regex-syntax`.](https://docs.rs/regex-syntax) + + +### Crate features + +This crate comes with several features that permit tweaking the trade off +between binary size, compilation time and runtime performance. Users of this +crate can selectively disable Unicode tables, or choose from a variety of +optimizations performed by this crate to disable. + +When all of these features are disabled, runtime match performance may be much +worse, but if you're matching on short strings, or if high performance isn't +necessary, then such a configuration is perfectly serviceable. To disable +all such features, use the following `Cargo.toml` dependency configuration: + +```toml +[dependencies.regex] +version = "1.3" +default-features = false +# Unless you have a specific reason not to, it's good sense to enable standard +# library support. It enables several optimizations and avoids spin locks. It +# also shouldn't meaningfully impact compile times or binary size. +features = ["std"] +``` + +This will reduce the dependency tree of `regex` down to two crates: +`regex-syntax` and `regex-automata`. + +The full set of features one can disable are +[in the "Crate features" section of the documentation](https://docs.rs/regex/1.*/#crate-features). + + +### Minimum Rust version policy + +This crate's minimum supported `rustc` version is `1.60.0`. + +The policy is that the minimum Rust version required to use this crate can be +increased in minor version updates. For example, if regex 1.0 requires Rust +1.20.0, then regex 1.0.z for all values of `z` will also require Rust 1.20.0 or +newer. However, regex 1.y for `y > 0` may require a newer minimum version of +Rust. + + +### License + +This project is licensed under either of + + * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or + https://www.apache.org/licenses/LICENSE-2.0) + * MIT license ([LICENSE-MIT](LICENSE-MIT) or + https://opensource.org/licenses/MIT) + +at your option. + +The data in `regex-syntax/src/unicode_tables/` is licensed under the Unicode +License Agreement +([LICENSE-UNICODE](https://www.unicode.org/copyright.html#License)). diff --git a/third_party/rust/regex/UNICODE.md b/third_party/rust/regex/UNICODE.md new file mode 100644 index 0000000000..60db0aad1f --- /dev/null +++ b/third_party/rust/regex/UNICODE.md @@ -0,0 +1,258 @@ +# Unicode conformance + +This document describes the regex crate's conformance to Unicode's +[UTS#18](https://unicode.org/reports/tr18/) +report, which lays out 3 levels of support: Basic, Extended and Tailored. + +Full support for Level 1 ("Basic Unicode Support") is provided with two +exceptions: + +1. Line boundaries are not Unicode aware. Namely, only the `\n` + (`END OF LINE`) character is recognized as a line boundary by default. + One can opt into `\r\n|\r|\n` being a line boundary via CRLF mode. +2. The compatibility properties specified by + [RL1.2a](https://unicode.org/reports/tr18/#RL1.2a) + are ASCII-only definitions. + +Little to no support is provided for either Level 2 or Level 3. For the most +part, this is because the features are either complex/hard to implement, or at +the very least, very difficult to implement without sacrificing performance. +For example, tackling canonical equivalence such that matching worked as one +would expect regardless of normalization form would be a significant +undertaking. This is at least partially a result of the fact that this regex +engine is based on finite automata, which admits less flexibility normally +associated with backtracking implementations. + + +## RL1.1 Hex Notation + +[UTS#18 RL1.1](https://unicode.org/reports/tr18/#Hex_notation) + +Hex Notation refers to the ability to specify a Unicode code point in a regular +expression via its hexadecimal code point representation. This is useful in +environments that have poor Unicode font rendering or if you need to express a +code point that is not normally displayable. All forms of hexadecimal notation +are supported + + \x7F hex character code (exactly two digits) + \x{10FFFF} any hex character code corresponding to a Unicode code point + \u007F hex character code (exactly four digits) + \u{7F} any hex character code corresponding to a Unicode code point + \U0000007F hex character code (exactly eight digits) + \U{7F} any hex character code corresponding to a Unicode code point + +Briefly, the `\x{...}`, `\u{...}` and `\U{...}` are all exactly equivalent ways +of expressing hexadecimal code points. Any number of digits can be written +within the brackets. In contrast, `\xNN`, `\uNNNN`, `\UNNNNNNNN` are all +fixed-width variants of the same idea. + +Note that when Unicode mode is disabled, any non-ASCII Unicode codepoint is +banned. Additionally, the `\xNN` syntax represents arbitrary bytes when Unicode +mode is disabled. That is, the regex `\xFF` matches the Unicode codepoint +U+00FF (encoded as `\xC3\xBF` in UTF-8) while the regex `(?-u)\xFF` matches +the literal byte `\xFF`. + + +## RL1.2 Properties + +[UTS#18 RL1.2](https://unicode.org/reports/tr18/#Categories) + +Full support for Unicode property syntax is provided. Unicode properties +provide a convenient way to construct character classes of groups of code +points specified by Unicode. The regex crate does not provide exhaustive +support, but covers a useful subset. In particular: + +* [General categories](https://unicode.org/reports/tr18/#General_Category_Property) +* [Scripts and Script Extensions](https://unicode.org/reports/tr18/#Script_Property) +* [Age](https://unicode.org/reports/tr18/#Age) +* A smattering of boolean properties, including all of those specified by + [RL1.2](https://unicode.org/reports/tr18/#RL1.2) explicitly. + +In all cases, property name and value abbreviations are supported, and all +names/values are matched loosely without regard for case, whitespace or +underscores. Property name aliases can be found in Unicode's +[`PropertyAliases.txt`](https://www.unicode.org/Public/UCD/latest/ucd/PropertyAliases.txt) +file, while property value aliases can be found in Unicode's +[`PropertyValueAliases.txt`](https://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt) +file. + +The syntax supported is also consistent with the UTS#18 recommendation: + +* `\p{Greek}` selects the `Greek` script. Equivalent expressions follow: + `\p{sc:Greek}`, `\p{Script:Greek}`, `\p{Sc=Greek}`, `\p{script=Greek}`, + `\P{sc!=Greek}`. Similarly for `General_Category` (or `gc` for short) and + `Script_Extensions` (or `scx` for short). +* `\p{age:3.2}` selects all code points in Unicode 3.2. +* `\p{Alphabetic}` selects the "alphabetic" property and can be abbreviated + via `\p{alpha}` (for example). +* Single letter variants for properties with single letter abbreviations. + For example, `\p{Letter}` can be equivalently written as `\pL`. + +The following is a list of all properties supported by the regex crate (starred +properties correspond to properties required by RL1.2): + +* `General_Category` \* (including `Any`, `ASCII` and `Assigned`) +* `Script` \* +* `Script_Extensions` \* +* `Age` +* `ASCII_Hex_Digit` +* `Alphabetic` \* +* `Bidi_Control` +* `Case_Ignorable` +* `Cased` +* `Changes_When_Casefolded` +* `Changes_When_Casemapped` +* `Changes_When_Lowercased` +* `Changes_When_Titlecased` +* `Changes_When_Uppercased` +* `Dash` +* `Default_Ignorable_Code_Point` \* +* `Deprecated` +* `Diacritic` +* `Emoji` +* `Emoji_Presentation` +* `Emoji_Modifier` +* `Emoji_Modifier_Base` +* `Emoji_Component` +* `Extended_Pictographic` +* `Extender` +* `Grapheme_Base` +* `Grapheme_Cluster_Break` +* `Grapheme_Extend` +* `Hex_Digit` +* `IDS_Binary_Operator` +* `IDS_Trinary_Operator` +* `ID_Continue` +* `ID_Start` +* `Join_Control` +* `Logical_Order_Exception` +* `Lowercase` \* +* `Math` +* `Noncharacter_Code_Point` \* +* `Pattern_Syntax` +* `Pattern_White_Space` +* `Prepended_Concatenation_Mark` +* `Quotation_Mark` +* `Radical` +* `Regional_Indicator` +* `Sentence_Break` +* `Sentence_Terminal` +* `Soft_Dotted` +* `Terminal_Punctuation` +* `Unified_Ideograph` +* `Uppercase` \* +* `Variation_Selector` +* `White_Space` \* +* `Word_Break` +* `XID_Continue` +* `XID_Start` + + +## RL1.2a Compatibility Properties + +[UTS#18 RL1.2a](https://unicode.org/reports/tr18/#RL1.2a) + +The regex crate only provides ASCII definitions of the +[compatibility properties documented in UTS#18 Annex C](https://unicode.org/reports/tr18/#Compatibility_Properties) +(sans the `\X` class, for matching grapheme clusters, which isn't provided +at all). This is because it seems to be consistent with most other regular +expression engines, and in particular, because these are often referred to as +"ASCII" or "POSIX" character classes. + +Note that the `\w`, `\s` and `\d` character classes **are** Unicode aware. +Their traditional ASCII definition can be used by disabling Unicode. That is, +`[[:word:]]` and `(?-u)\w` are equivalent. + + +## RL1.3 Subtraction and Intersection + +[UTS#18 RL1.3](https://unicode.org/reports/tr18/#Subtraction_and_Intersection) + +The regex crate provides full support for nested character classes, along with +union, intersection (`&&`), difference (`--`) and symmetric difference (`~~`) +operations on arbitrary character classes. + +For example, to match all non-ASCII letters, you could use either +`[\p{Letter}--\p{Ascii}]` (difference) or `[\p{Letter}&&[^\p{Ascii}]]` +(intersecting the negation). + + +## RL1.4 Simple Word Boundaries + +[UTS#18 RL1.4](https://unicode.org/reports/tr18/#Simple_Word_Boundaries) + +The regex crate provides basic Unicode aware word boundary assertions. A word +boundary assertion can be written as `\b`, or `\B` as its negation. A word +boundary negation corresponds to a zero-width match, where its adjacent +characters correspond to word and non-word, or non-word and word characters. + +Conformance in this case chooses to define word character in the same way that +the `\w` character class is defined: a code point that is a member of one of +the following classes: + +* `\p{Alphabetic}` +* `\p{Join_Control}` +* `\p{gc:Mark}` +* `\p{gc:Decimal_Number}` +* `\p{gc:Connector_Punctuation}` + +In particular, this differs slightly from the +[prescription given in RL1.4](https://unicode.org/reports/tr18/#Simple_Word_Boundaries) +but is permissible according to +[UTS#18 Annex C](https://unicode.org/reports/tr18/#Compatibility_Properties). +Namely, it is convenient and simpler to have `\w` and `\b` be in sync with +one another. + +Finally, Unicode word boundaries can be disabled, which will cause ASCII word +boundaries to be used instead. That is, `\b` is a Unicode word boundary while +`(?-u)\b` is an ASCII-only word boundary. This can occasionally be beneficial +if performance is important, since the implementation of Unicode word +boundaries is currently sub-optimal on non-ASCII text. + + +## RL1.5 Simple Loose Matches + +[UTS#18 RL1.5](https://unicode.org/reports/tr18/#Simple_Loose_Matches) + +The regex crate provides full support for case insensitive matching in +accordance with RL1.5. That is, it uses the "simple" case folding mapping. The +"simple" mapping was chosen because of a key convenient property: every +"simple" mapping is a mapping from exactly one code point to exactly one other +code point. This makes case insensitive matching of character classes, for +example, straight-forward to implement. + +When case insensitive mode is enabled (e.g., `(?i)[a]` is equivalent to `a|A`), +then all characters classes are case folded as well. + + +## RL1.6 Line Boundaries + +[UTS#18 RL1.6](https://unicode.org/reports/tr18/#Line_Boundaries) + +The regex crate only provides support for recognizing the `\n` (`END OF LINE`) +character as a line boundary by default. One can also opt into treating +`\r\n|\r|\n` as a line boundary via CRLF mode. This choice was made mostly for +implementation convenience, and to avoid performance cliffs that Unicode word +boundaries are subject to. + + +## RL1.7 Code Points + +[UTS#18 RL1.7](https://unicode.org/reports/tr18/#Supplementary_Characters) + +The regex crate provides full support for Unicode code point matching. Namely, +the fundamental atom of any match is always a single code point. + +Given Rust's strong ties to UTF-8, the following guarantees are also provided: + +* All matches are reported on valid UTF-8 code unit boundaries. That is, any + match range returned by the public regex API is guaranteed to successfully + slice the string that was searched. +* By consequence of the above, it is impossible to match surrogode code points. + No support for UTF-16 is provided, so this is never necessary. + +Note that when Unicode mode is disabled, the fundamental atom of matching is +no longer a code point but a single byte. When Unicode mode is disabled, many +Unicode features are disabled as well. For example, `(?-u)\pL` is not a valid +regex but `\pL(?-u)\xFF` (matches any Unicode `Letter` followed by the literal +byte `\xFF`) is, for example. diff --git a/third_party/rust/regex/record/README.md b/third_party/rust/regex/record/README.md new file mode 100644 index 0000000000..432b06ab9a --- /dev/null +++ b/third_party/rust/regex/record/README.md @@ -0,0 +1,4 @@ +This directory contains various recordings of results. These are committed to +the repository so that they can be compared over time. (At the time of writing, +there is no tooling for facilitating this comparison. It has to be done +manually.) diff --git a/third_party/rust/regex/record/compile-test/2023-04-19_1.7.3.csv b/third_party/rust/regex/record/compile-test/2023-04-19_1.7.3.csv new file mode 100644 index 0000000000..af62da10ac --- /dev/null +++ b/third_party/rust/regex/record/compile-test/2023-04-19_1.7.3.csv @@ -0,0 +1,11 @@ +name,crate,revision,profile,duration,size,relative-size +regex__dev__std_perf_unicode,regex,9582040009,dev,1.824209152s,3434992,3113064 +regex__dev__std,regex,9582040009,dev,1.206314935s,1362392,1040464 +regex__dev__std_perf,regex,9582040009,dev,1.543583435s,2726384,2404456 +regex__dev__std_unicode,regex,9582040009,dev,1.490095643s,2066904,1744976 +regex__dev__std_unicode-case_unicode-perl,regex,9582040009,dev,1.292011694s,1812952,1491024 +regex__release__std_perf_unicode,regex,9582040009,release,2.398133563s,1616216,1294368 +regex__release__std,regex,9582040009,release,1.413680252s,694592,372744 +regex__release__std_perf,regex,9582040009,release,2.341496191s,1124696,802848 +regex__release__std_unicode,regex,9582040009,release,1.671407822s,1190208,868360 +regex__release__std_unicode-case_unicode-perl,regex,9582040009,release,1.441712198s,932160,610312 diff --git a/third_party/rust/regex/record/compile-test/2023-04-20_master.csv b/third_party/rust/regex/record/compile-test/2023-04-20_master.csv new file mode 100644 index 0000000000..4c3e916740 --- /dev/null +++ b/third_party/rust/regex/record/compile-test/2023-04-20_master.csv @@ -0,0 +1,11 @@ +name,crate,revision,profile,duration,size,relative-size +regex__dev__std_perf_unicode,regex,f1f99af2bc,dev,1.834267609s,3799536,3477608 +regex__dev__std,regex,f1f99af2bc,dev,1.263958602s,1427928,1106000 +regex__dev__std_perf,regex,f1f99af2bc,dev,1.631302845s,3234288,2912360 +regex__dev__std_unicode,regex,f1f99af2bc,dev,1.550536696s,1997272,1675344 +regex__dev__std_unicode-case_unicode-perl,regex,f1f99af2bc,dev,1.341622852s,1739224,1417296 +regex__release__std_perf_unicode,regex,f1f99af2bc,release,2.475080323s,1755480,1433632 +regex__release__std,regex,f1f99af2bc,release,1.45990031s,731456,409608 +regex__release__std_perf,regex,f1f99af2bc,release,2.421787211s,1259864,938016 +regex__release__std_unicode,regex,f1f99af2bc,release,1.693972619s,1227072,905224 +regex__release__std_unicode-case_unicode-perl,regex,f1f99af2bc,release,1.528003306s,969024,647176 diff --git a/third_party/rust/regex/record/compile-test/2023-07-05.csv b/third_party/rust/regex/record/compile-test/2023-07-05.csv new file mode 100644 index 0000000000..6ec81f5588 --- /dev/null +++ b/third_party/rust/regex/record/compile-test/2023-07-05.csv @@ -0,0 +1,37 @@ +name,crate,revision,profile,duration,size,relative-size +regex__dev__std_perf_unicode,regex,53786ce797,dev,2.414172223s,4143600,3764328 +regex__dev__std_perf_unicode_perf-dfa-full,regex,53786ce797,dev,2.900927164s,4815368,4436096 +regex__dev__std,regex,53786ce797,dev,1.662626059s,2062808,1683536 +regex__dev__std_perf,regex,53786ce797,dev,2.136755026s,3574256,3194984 +regex__dev__std_unicode,regex,53786ce797,dev,1.943953132s,2623960,2244688 +regex__dev__std_unicode-case_unicode-perl,regex,53786ce797,dev,1.753222606s,2374104,1994832 +regex-lite__dev__std_string,regex,53786ce797,dev,498.158769ms,727504,348232 +regex-automata__dev__std_syntax_perf_unicode_meta_nfa_dfa_hybrid,regex-automata,53786ce797,dev,2.900832296s,4872712,4493440 +regex-automata__dev__std_syntax_nfa-pikevm,regex-automata,53786ce797,dev,1.413429089s,1501648,1122376 +regex-automata__dev__std_syntax_nfa-backtrack,regex-automata,53786ce797,dev,1.412429191s,1505744,1126472 +regex-automata__dev__std_syntax_hybrid,regex-automata,53786ce797,dev,1.678331978s,1632720,1253448 +regex-automata__dev__std_syntax_dfa-onepass,regex-automata,53786ce797,dev,1.594526299s,1526224,1146952 +regex-automata__dev__std_syntax_unicode_meta_nfa_dfa_hybrid,regex-automata,53786ce797,dev,2.992024402s,3500504,3121232 +regex-automata__dev__std_syntax_perf_unicode_meta_nfa_hybrid_dfa-onepass,regex-automata,53786ce797,dev,2.378489598s,4119024,3739752 +regex-automata__dev__std_syntax_perf_meta_nfa_dfa_hybrid,regex-automata,53786ce797,dev,2.695475914s,4299272,3920000 +regex-automata__dev__std_syntax_perf_meta_nfa_hybrid_dfa-onepass,regex-automata,53786ce797,dev,2.120929251s,3549680,3170408 +regex-automata__dev__std_unicode_meta,regex-automata,53786ce797,dev,1.89728585s,2492888,2113616 +regex-automata__dev__std_meta,regex-automata,53786ce797,dev,1.604628942s,1927640,1548368 +regex__release__std_perf_unicode,regex,53786ce797,release,3.333636908s,2025816,1650720 +regex__release__std_perf_unicode_perf-dfa-full,regex,53786ce797,release,3.805434309s,2210160,1835064 +regex__release__std,regex,53786ce797,release,1.789749444s,932160,557064 +regex__release__std_perf,regex,53786ce797,release,2.734249431s,1505624,1130528 +regex__release__std_unicode,regex,53786ce797,release,2.04945845s,1431872,1056776 +regex__release__std_unicode-case_unicode-perl,regex,53786ce797,release,1.893829903s,1173824,798728 +regex-lite__release__std_string,regex,53786ce797,release,648.517079ms,473400,98304 +regex-automata__release__std_syntax_perf_unicode_meta_nfa_dfa_hybrid,regex-automata,53786ce797,release,3.893237683s,2242928,1867832 +regex-automata__release__std_syntax_nfa-pikevm,regex-automata,53786ce797,release,1.556952008s,780600,405504 +regex-automata__release__std_syntax_nfa-backtrack,regex-automata,53786ce797,release,1.576471926s,768312,393216 +regex-automata__release__std_syntax_hybrid,regex-automata,53786ce797,release,1.819539266s,813368,438272 +regex-automata__release__std_syntax_dfa-onepass,regex-automata,53786ce797,release,1.672511482s,776504,401408 +regex-automata__release__std_syntax_unicode_meta_nfa_dfa_hybrid,regex-automata,53786ce797,release,3.227157436s,1767744,1392648 +regex-automata__release__std_syntax_perf_unicode_meta_nfa_hybrid_dfa-onepass,regex-automata,53786ce797,release,3.340235296s,2005336,1630240 +regex-automata__release__std_syntax_perf_meta_nfa_dfa_hybrid,regex-automata,53786ce797,release,3.640335773s,1718640,1343544 +regex-automata__release__std_syntax_perf_meta_nfa_hybrid_dfa-onepass,regex-automata,53786ce797,release,2.876306297s,1489240,1114144 +regex-automata__release__std_unicode_meta,regex-automata,53786ce797,release,1.945654415s,1362240,987144 +regex-automata__release__std_meta,regex-automata,53786ce797,release,1.740500411s,862528,487432 diff --git a/third_party/rust/regex/record/compile-test/README.md b/third_party/rust/regex/record/compile-test/README.md new file mode 100644 index 0000000000..7291d5d376 --- /dev/null +++ b/third_party/rust/regex/record/compile-test/README.md @@ -0,0 +1,27 @@ +This directory contains the results of compilation tests. Specifically, +the results are from testing both the from scratch compilation time and +relative binary size increases of various features for both the `regex` and +`regex-automata` crates. + +Here's an example of how to run these tests for just the `regex` crate. You'll +need the `regex-cli` command installed, which can be found in the `regex-cli` +directory in the root of this repository. + +This must be run in the root of a checkout of this repository. + +``` +$ mkdir /tmp/regex-compile-test +$ regex-cli compile-test ./ /tmp/regex-compile-test | tee record/compile-test/2023-04-19_1.7.3.csv +``` + +You can then look at the results using a tool like [`xsv`][xsv]: + +``` +$ xsv table record/compile-test/2023-04-19_1.7.3.csv +``` + +Note that the relative binary size is computed by building a "baseline" hello +world program, and then subtracting that from the size of a binary that uses +the regex crate. + +[xsv]: https://github.com/BurntSushi/xsv diff --git a/third_party/rust/regex/record/old-bench-log/01-lazy-dfa/dynamic b/third_party/rust/regex/record/old-bench-log/01-lazy-dfa/dynamic new file mode 100644 index 0000000000..9ef21737d0 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/01-lazy-dfa/dynamic @@ -0,0 +1,73 @@ + Running target/release/dynamic-e87a67d7ea67f0eb + +running 67 tests +test bench::anchored_literal_long_match ... bench: 75 ns/iter (+/- 3) = 5200 MB/s +test bench::anchored_literal_long_non_match ... bench: 61 ns/iter (+/- 2) = 6393 MB/s +test bench::anchored_literal_short_match ... bench: 75 ns/iter (+/- 3) = 346 MB/s +test bench::anchored_literal_short_non_match ... bench: 61 ns/iter (+/- 1) = 426 MB/s +test bench::easy0_1K ... bench: 196 ns/iter (+/- 8) = 5224 MB/s +test bench::easy0_1MB ... bench: 255,138 ns/iter (+/- 4,820) = 4109 MB/s +test bench::easy0_32 ... bench: 71 ns/iter (+/- 2) = 450 MB/s +test bench::easy0_32K ... bench: 5,392 ns/iter (+/- 108) = 6077 MB/s +test bench::easy1_1K ... bench: 241 ns/iter (+/- 37) = 4248 MB/s +test bench::easy1_1MB ... bench: 334,872 ns/iter (+/- 3,433) = 3131 MB/s +test bench::easy1_32 ... bench: 65 ns/iter (+/- 2) = 492 MB/s +test bench::easy1_32K ... bench: 6,139 ns/iter (+/- 703) = 5337 MB/s +test bench::hard_1K ... bench: 4,654 ns/iter (+/- 63) = 220 MB/s +test bench::hard_1MB ... bench: 4,719,487 ns/iter (+/- 71,818) = 222 MB/s +test bench::hard_32 ... bench: 199 ns/iter (+/- 8) = 160 MB/s +test bench::hard_32K ... bench: 147,389 ns/iter (+/- 4,391) = 222 MB/s +test bench::literal ... bench: 20 ns/iter (+/- 4) = 2550 MB/s +test bench::match_class ... bench: 85 ns/iter (+/- 4) = 952 MB/s +test bench::match_class_in_range ... bench: 32 ns/iter (+/- 3) = 2531 MB/s +test bench::match_class_unicode ... bench: 783 ns/iter (+/- 13) = 205 MB/s +test bench::medium_1K ... bench: 1,334 ns/iter (+/- 154) = 767 MB/s +test bench::medium_1MB ... bench: 2,044,757 ns/iter (+/- 72,936) = 512 MB/s +test bench::medium_32 ... bench: 99 ns/iter (+/- 18) = 323 MB/s +test bench::medium_32K ... bench: 59,603 ns/iter (+/- 13,750) = 549 MB/s +test bench::no_exponential ... bench: 553 ns/iter (+/- 150) = 180 MB/s +test bench::not_literal ... bench: 293 ns/iter (+/- 59) = 174 MB/s +test bench::one_pass_long_prefix ... bench: 177 ns/iter (+/- 35) = 146 MB/s +test bench::one_pass_long_prefix_not ... bench: 175 ns/iter (+/- 47) = 148 MB/s +test bench::one_pass_short ... bench: 134 ns/iter (+/- 34) = 126 MB/s +test bench::one_pass_short_not ... bench: 136 ns/iter (+/- 39) = 125 MB/s +test bench::replace_all ... bench: 153 ns/iter (+/- 17) +test bench_dynamic_compile::compile_huge ... bench: 165,209 ns/iter (+/- 4,396) +test bench_dynamic_compile::compile_huge_bytes ... bench: 18,795,770 ns/iter (+/- 2,674,909) +test bench_dynamic_compile::compile_simple ... bench: 6,883 ns/iter (+/- 391) +test bench_dynamic_compile::compile_simple_bytes ... bench: 7,281 ns/iter (+/- 751) +test bench_dynamic_compile::compile_small ... bench: 9,091 ns/iter (+/- 1,125) +test bench_dynamic_compile::compile_small_bytes ... bench: 182,815 ns/iter (+/- 3,814) +test bench_dynamic_parse::parse_huge ... bench: 1,233 ns/iter (+/- 123) +test bench_dynamic_parse::parse_simple ... bench: 2,015 ns/iter (+/- 108) +test bench_dynamic_parse::parse_small ... bench: 2,500 ns/iter (+/- 76) +test bench_sherlock::before_holmes ... bench: 2,741,811 ns/iter (+/- 58,389) = 216 MB/s +test bench_sherlock::everything_greedy ... bench: 7,807,696 ns/iter (+/- 328,585) = 76 MB/s +test bench_sherlock::everything_greedy_nl ... bench: 5,424,922 ns/iter (+/- 78,937) = 109 MB/s +test bench_sherlock::holmes_cochar_watson ... bench: 266,557 ns/iter (+/- 3,832) = 2231 MB/s +test bench_sherlock::holmes_coword_watson ... bench: 1,327,967 ns/iter (+/- 12,773) = 448 MB/s +test bench_sherlock::line_boundary_sherlock_holmes ... bench: 2,690,485 ns/iter (+/- 17,393) = 221 MB/s +test bench_sherlock::name_alt1 ... bench: 77,206 ns/iter (+/- 951) = 7705 MB/s +test bench_sherlock::name_alt2 ... bench: 303,775 ns/iter (+/- 5,030) = 1958 MB/s +test bench_sherlock::name_alt3 ... bench: 1,385,153 ns/iter (+/- 15,871) = 429 MB/s +test bench_sherlock::name_alt3_nocase ... bench: 1,473,833 ns/iter (+/- 9,825) = 403 MB/s +test bench_sherlock::name_alt4 ... bench: 300,912 ns/iter (+/- 3,896) = 1977 MB/s +test bench_sherlock::name_alt4_nocase ... bench: 1,421,519 ns/iter (+/- 16,246) = 418 MB/s +test bench_sherlock::name_holmes ... bench: 52,027 ns/iter (+/- 785) = 11435 MB/s +test bench_sherlock::name_holmes_nocase ... bench: 1,241,204 ns/iter (+/- 16,862) = 479 MB/s +test bench_sherlock::name_sherlock ... bench: 34,378 ns/iter (+/- 677) = 17305 MB/s +test bench_sherlock::name_sherlock_holmes ... bench: 34,463 ns/iter (+/- 580) = 17262 MB/s +test bench_sherlock::name_sherlock_holmes_nocase ... bench: 1,281,540 ns/iter (+/- 11,054) = 464 MB/s +test bench_sherlock::name_sherlock_nocase ... bench: 1,281,293 ns/iter (+/- 13,129) = 464 MB/s +test bench_sherlock::name_whitespace ... bench: 60,463 ns/iter (+/- 815) = 9839 MB/s +test bench_sherlock::no_match_common ... bench: 568,357 ns/iter (+/- 11,237) = 1046 MB/s +test bench_sherlock::no_match_uncommon ... bench: 23,656 ns/iter (+/- 340) = 25149 MB/s +test bench_sherlock::quotes ... bench: 977,907 ns/iter (+/- 13,926) = 608 MB/s +test bench_sherlock::the_lower ... bench: 794,285 ns/iter (+/- 8,513) = 749 MB/s +test bench_sherlock::the_nocase ... bench: 1,837,240 ns/iter (+/- 22,738) = 323 MB/s +test bench_sherlock::the_upper ... bench: 54,083 ns/iter (+/- 1,153) = 11000 MB/s +test bench_sherlock::the_whitespace ... bench: 1,986,579 ns/iter (+/- 9,292) = 299 MB/s +test bench_sherlock::word_ending_n ... bench: 55,205,101 ns/iter (+/- 93,542) = 10 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 67 measured + diff --git a/third_party/rust/regex/record/old-bench-log/01-lazy-dfa/dynamic-no-lazy-dfa b/third_party/rust/regex/record/old-bench-log/01-lazy-dfa/dynamic-no-lazy-dfa new file mode 100644 index 0000000000..50d3a136c3 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/01-lazy-dfa/dynamic-no-lazy-dfa @@ -0,0 +1,85 @@ + Compiling regex v0.1.48 (file:///home/andrew/data/projects/rust/regex) +src/dfa.rs:73:1: 94:2 warning: function is never used: `can_exec`, #[warn(dead_code)] on by default +src/dfa.rs:73 pub fn can_exec(insts: &Insts) -> bool { +src/dfa.rs:74 use inst::EmptyLook::*; +src/dfa.rs:75 // If for some reason we manage to allocate a regex program with more +src/dfa.rs:76 // than 2^32-1 instructions, then we can't execute the DFA because we +src/dfa.rs:77 // use 32 bit pointers. +src/dfa.rs:78 if insts.len() > ::std::u32::MAX as usize { + ... +src/exec.rs:12:11: 12:15 warning: unused import, #[warn(unused_imports)] on by default +src/exec.rs:12 use dfa::{self, Dfa, DfaResult}; + ^~~~ + Running target/release/dynamic-e87a67d7ea67f0eb + +running 67 tests +test bench::anchored_literal_long_match ... bench: 169 ns/iter (+/- 1) = 2307 MB/s +test bench::anchored_literal_long_non_match ... bench: 85 ns/iter (+/- 0) = 4588 MB/s +test bench::anchored_literal_short_match ... bench: 158 ns/iter (+/- 3) = 164 MB/s +test bench::anchored_literal_short_non_match ... bench: 84 ns/iter (+/- 2) = 309 MB/s +test bench::easy0_1K ... bench: 318 ns/iter (+/- 2) = 3220 MB/s +test bench::easy0_1MB ... bench: 257,205 ns/iter (+/- 2,448) = 4076 MB/s +test bench::easy0_32 ... bench: 82 ns/iter (+/- 1) = 390 MB/s +test bench::easy0_32K ... bench: 8,666 ns/iter (+/- 104) = 3781 MB/s +test bench::easy1_1K ... bench: 293 ns/iter (+/- 2) = 3494 MB/s +test bench::easy1_1MB ... bench: 329,774 ns/iter (+/- 6,296) = 3179 MB/s +test bench::easy1_32 ... bench: 77 ns/iter (+/- 0) = 415 MB/s +test bench::easy1_32K ... bench: 8,856 ns/iter (+/- 93) = 3700 MB/s +test bench::hard_1K ... bench: 31,888 ns/iter (+/- 83) = 32 MB/s +test bench::hard_1MB ... bench: 58,435,108 ns/iter (+/- 64,537) = 17 MB/s +test bench::hard_32 ... bench: 1,048 ns/iter (+/- 12) = 30 MB/s +test bench::hard_32K ... bench: 1,033,930 ns/iter (+/- 4,224) = 31 MB/s +test bench::literal ... bench: 20 ns/iter (+/- 0) = 2550 MB/s +test bench::match_class ... bench: 84 ns/iter (+/- 0) = 964 MB/s +test bench::match_class_in_range ... bench: 33 ns/iter (+/- 0) = 2454 MB/s +test bench::match_class_unicode ... bench: 2,218 ns/iter (+/- 8) = 72 MB/s +test bench::medium_1K ... bench: 1,368 ns/iter (+/- 9) = 748 MB/s +test bench::medium_1MB ... bench: 2,034,481 ns/iter (+/- 3,608) = 515 MB/s +test bench::medium_32 ... bench: 141 ns/iter (+/- 0) = 226 MB/s +test bench::medium_32K ... bench: 59,949 ns/iter (+/- 421) = 546 MB/s +test bench::no_exponential ... bench: 336,653 ns/iter (+/- 1,757) +test bench::not_literal ... bench: 1,247 ns/iter (+/- 5) = 40 MB/s +test bench::one_pass_long_prefix ... bench: 264 ns/iter (+/- 2) = 98 MB/s +test bench::one_pass_long_prefix_not ... bench: 267 ns/iter (+/- 1) = 97 MB/s +test bench::one_pass_short ... bench: 768 ns/iter (+/- 5) = 22 MB/s +test bench::one_pass_short_not ... bench: 797 ns/iter (+/- 20) = 21 MB/s +test bench::replace_all ... bench: 149 ns/iter (+/- 0) +test bench_dynamic_compile::compile_huge ... bench: 161,349 ns/iter (+/- 1,462) +test bench_dynamic_compile::compile_huge_bytes ... bench: 18,050,519 ns/iter (+/- 105,846) +test bench_dynamic_compile::compile_simple ... bench: 6,664 ns/iter (+/- 390) +test bench_dynamic_compile::compile_simple_bytes ... bench: 7,035 ns/iter (+/- 370) +test bench_dynamic_compile::compile_small ... bench: 8,914 ns/iter (+/- 347) +test bench_dynamic_compile::compile_small_bytes ... bench: 186,970 ns/iter (+/- 2,134) +test bench_dynamic_parse::parse_huge ... bench: 1,238 ns/iter (+/- 11) +test bench_dynamic_parse::parse_simple ... bench: 2,005 ns/iter (+/- 19) +test bench_dynamic_parse::parse_small ... bench: 2,494 ns/iter (+/- 11) +test bench_sherlock::before_holmes ... bench: 42,005,594 ns/iter (+/- 57,752) = 14 MB/s +test bench_sherlock::everything_greedy ... bench: 38,431,063 ns/iter (+/- 28,840) = 15 MB/s +test bench_sherlock::everything_greedy_nl ... bench: 32,003,966 ns/iter (+/- 50,270) = 18 MB/s +test bench_sherlock::holmes_cochar_watson ... bench: 1,457,068 ns/iter (+/- 3,202) = 408 MB/s +test bench_sherlock::holmes_coword_watson ... bench: 136,035,549 ns/iter (+/- 75,381) = 4 MB/s +test bench_sherlock::line_boundary_sherlock_holmes ... bench: 33,024,291 ns/iter (+/- 67,902) = 18 MB/s +test bench_sherlock::name_alt1 ... bench: 157,989 ns/iter (+/- 917) = 3765 MB/s +test bench_sherlock::name_alt2 ... bench: 545,254 ns/iter (+/- 1,908) = 1091 MB/s +test bench_sherlock::name_alt3 ... bench: 2,245,964 ns/iter (+/- 2,478) = 264 MB/s +test bench_sherlock::name_alt3_nocase ... bench: 4,792,290 ns/iter (+/- 31,760) = 124 MB/s +test bench_sherlock::name_alt4 ... bench: 584,204 ns/iter (+/- 2,084) = 1018 MB/s +test bench_sherlock::name_alt4_nocase ... bench: 2,318,020 ns/iter (+/- 8,493) = 256 MB/s +test bench_sherlock::name_holmes ... bench: 51,880 ns/iter (+/- 299) = 11467 MB/s +test bench_sherlock::name_holmes_nocase ... bench: 1,414,500 ns/iter (+/- 2,497) = 420 MB/s +test bench_sherlock::name_sherlock ... bench: 34,294 ns/iter (+/- 349) = 17348 MB/s +test bench_sherlock::name_sherlock_holmes ... bench: 34,531 ns/iter (+/- 199) = 17228 MB/s +test bench_sherlock::name_sherlock_holmes_nocase ... bench: 1,692,651 ns/iter (+/- 8,846) = 351 MB/s +test bench_sherlock::name_sherlock_nocase ... bench: 1,657,413 ns/iter (+/- 5,534) = 358 MB/s +test bench_sherlock::name_whitespace ... bench: 131,372 ns/iter (+/- 605) = 4528 MB/s +test bench_sherlock::no_match_common ... bench: 567,065 ns/iter (+/- 2,763) = 1049 MB/s +test bench_sherlock::no_match_uncommon ... bench: 23,782 ns/iter (+/- 85) = 25016 MB/s +test bench_sherlock::quotes ... bench: 11,251,366 ns/iter (+/- 24,960) = 52 MB/s +test bench_sherlock::the_lower ... bench: 789,781 ns/iter (+/- 2,072) = 753 MB/s +test bench_sherlock::the_nocase ... bench: 1,807,509 ns/iter (+/- 4,685) = 329 MB/s +test bench_sherlock::the_upper ... bench: 53,542 ns/iter (+/- 198) = 11111 MB/s +test bench_sherlock::the_whitespace ... bench: 5,410,444 ns/iter (+/- 14,766) = 109 MB/s +test bench_sherlock::word_ending_n ... bench: 56,017,874 ns/iter (+/- 60,047) = 10 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 67 measured + diff --git a/third_party/rust/regex/record/old-bench-log/01-lazy-dfa/native b/third_party/rust/regex/record/old-bench-log/01-lazy-dfa/native new file mode 100644 index 0000000000..61fc08da41 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/01-lazy-dfa/native @@ -0,0 +1,65 @@ + Compiling regex_macros v0.1.28 (file:///home/andrew/data/projects/rust/regex/regex_macros) + Running regex_macros/target/release/native-f2ffefeeda527264 + +running 58 tests +test bench::anchored_literal_long_match ... bench: 189 ns/iter (+/- 16) = 2063 MB/s +test bench::anchored_literal_long_non_match ... bench: 47 ns/iter (+/- 1) = 8297 MB/s +test bench::anchored_literal_short_match ... bench: 177 ns/iter (+/- 5) = 146 MB/s +test bench::anchored_literal_short_non_match ... bench: 46 ns/iter (+/- 1) = 565 MB/s +test bench::easy0_1K ... bench: 26,578 ns/iter (+/- 1,140) = 38 MB/s +test bench::easy0_1MB ... bench: 27,229,730 ns/iter (+/- 261,126) = 38 MB/s +test bench::easy0_32 ... bench: 867 ns/iter (+/- 45) = 36 MB/s +test bench::easy0_32K ... bench: 847,113 ns/iter (+/- 276,910) = 38 MB/s +test bench::easy1_1K ... bench: 23,525 ns/iter (+/- 278) = 43 MB/s +test bench::easy1_1MB ... bench: 24,075,047 ns/iter (+/- 40,396) = 43 MB/s +test bench::easy1_32 ... bench: 767 ns/iter (+/- 14) = 41 MB/s +test bench::easy1_32K ... bench: 752,730 ns/iter (+/- 9,284) = 43 MB/s +test bench::hard_1K ... bench: 44,053 ns/iter (+/- 513) = 23 MB/s +test bench::hard_1MB ... bench: 44,982,170 ns/iter (+/- 76,683) = 23 MB/s +test bench::hard_32 ... bench: 1,418 ns/iter (+/- 26) = 22 MB/s +test bench::hard_32K ... bench: 1,407,013 ns/iter (+/- 13,426) = 23 MB/s +test bench::literal ... bench: 1,202 ns/iter (+/- 16) = 42 MB/s +test bench::match_class ... bench: 2,057 ns/iter (+/- 29) = 39 MB/s +test bench::match_class_in_range ... bench: 2,060 ns/iter (+/- 34) = 39 MB/s +test bench::match_class_unicode ... bench: 12,945 ns/iter (+/- 156) = 12 MB/s +test bench::medium_1K ... bench: 27,874 ns/iter (+/- 315) = 36 MB/s +test bench::medium_1MB ... bench: 28,614,500 ns/iter (+/- 544,256) = 36 MB/s +test bench::medium_32 ... bench: 896 ns/iter (+/- 85) = 35 MB/s +test bench::medium_32K ... bench: 892,349 ns/iter (+/- 35,511) = 36 MB/s +test bench::no_exponential ... bench: 319,270 ns/iter (+/- 19,837) +test bench::not_literal ... bench: 1,477 ns/iter (+/- 104) = 34 MB/s +test bench::one_pass_long_prefix ... bench: 653 ns/iter (+/- 10) = 39 MB/s +test bench::one_pass_long_prefix_not ... bench: 651 ns/iter (+/- 6) = 39 MB/s +test bench::one_pass_short ... bench: 1,016 ns/iter (+/- 24) = 16 MB/s +test bench::one_pass_short_not ... bench: 1,588 ns/iter (+/- 28) = 10 MB/s +test bench::replace_all ... bench: 1,078 ns/iter (+/- 55) +test bench_sherlock::before_holmes ... bench: 54,264,124 ns/iter (+/- 564,692) = 10 MB/s +test bench_sherlock::everything_greedy ... bench: 22,724,158 ns/iter (+/- 44,361) = 26 MB/s +test bench_sherlock::everything_greedy_nl ... bench: 22,168,804 ns/iter (+/- 66,296) = 26 MB/s +test bench_sherlock::holmes_cochar_watson ... bench: 24,791,824 ns/iter (+/- 37,522) = 23 MB/s +test bench_sherlock::holmes_coword_watson ... bench: 885,999,793 ns/iter (+/- 39,704,278) +test bench_sherlock::line_boundary_sherlock_holmes ... bench: 25,113,805 ns/iter (+/- 672,050) = 23 MB/s +test bench_sherlock::name_alt1 ... bench: 23,382,716 ns/iter (+/- 3,696,517) = 25 MB/s +test bench_sherlock::name_alt2 ... bench: 23,585,220 ns/iter (+/- 3,724,922) = 25 MB/s +test bench_sherlock::name_alt3 ... bench: 80,283,635 ns/iter (+/- 3,165,029) = 7 MB/s +test bench_sherlock::name_alt3_nocase ... bench: 77,357,394 ns/iter (+/- 268,133) = 7 MB/s +test bench_sherlock::name_alt4 ... bench: 22,736,520 ns/iter (+/- 43,231) = 26 MB/s +test bench_sherlock::name_alt4_nocase ... bench: 26,921,524 ns/iter (+/- 140,162) = 22 MB/s +test bench_sherlock::name_holmes ... bench: 15,145,735 ns/iter (+/- 65,980) = 39 MB/s +test bench_sherlock::name_holmes_nocase ... bench: 16,285,042 ns/iter (+/- 71,956) = 36 MB/s +test bench_sherlock::name_sherlock ... bench: 16,189,653 ns/iter (+/- 99,929) = 36 MB/s +test bench_sherlock::name_sherlock_holmes ... bench: 14,975,742 ns/iter (+/- 118,052) = 39 MB/s +test bench_sherlock::name_sherlock_holmes_nocase ... bench: 16,904,928 ns/iter (+/- 201,104) = 35 MB/s +test bench_sherlock::name_sherlock_nocase ... bench: 16,335,907 ns/iter (+/- 118,725) = 36 MB/s +test bench_sherlock::name_whitespace ... bench: 14,837,905 ns/iter (+/- 52,201) = 40 MB/s +test bench_sherlock::no_match_common ... bench: 16,036,625 ns/iter (+/- 108,268) = 37 MB/s +test bench_sherlock::no_match_uncommon ... bench: 15,278,356 ns/iter (+/- 81,123) = 38 MB/s +test bench_sherlock::quotes ... bench: 21,580,801 ns/iter (+/- 198,772) = 27 MB/s +test bench_sherlock::the_lower ... bench: 16,059,120 ns/iter (+/- 160,640) = 37 MB/s +test bench_sherlock::the_nocase ... bench: 17,376,836 ns/iter (+/- 103,371) = 34 MB/s +test bench_sherlock::the_upper ... bench: 15,259,087 ns/iter (+/- 93,807) = 38 MB/s +test bench_sherlock::the_whitespace ... bench: 18,835,951 ns/iter (+/- 160,674) = 31 MB/s +test bench_sherlock::word_ending_n ... bench: 59,832,390 ns/iter (+/- 4,478,911) = 9 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 58 measured + diff --git a/third_party/rust/regex/record/old-bench-log/01-lazy-dfa/nfa b/third_party/rust/regex/record/old-bench-log/01-lazy-dfa/nfa new file mode 100644 index 0000000000..994137b55c --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/01-lazy-dfa/nfa @@ -0,0 +1,74 @@ + Compiling regex v0.1.48 (file:///home/andrew/data/projects/rust/regex) + Running target/release/dynamic_nfa-1e40ce11bcb7c666 + +running 67 tests +test bench::anchored_literal_long_match ... bench: 306 ns/iter (+/- 6) = 1274 MB/s +test bench::anchored_literal_long_non_match ... bench: 95 ns/iter (+/- 1) = 4105 MB/s +test bench::anchored_literal_short_match ... bench: 315 ns/iter (+/- 2) = 82 MB/s +test bench::anchored_literal_short_non_match ... bench: 96 ns/iter (+/- 2) = 270 MB/s +test bench::easy0_1K ... bench: 206 ns/iter (+/- 1) = 4970 MB/s +test bench::easy0_1MB ... bench: 255,834 ns/iter (+/- 1,273) = 4098 MB/s +test bench::easy0_32 ... bench: 72 ns/iter (+/- 2) = 444 MB/s +test bench::easy0_32K ... bench: 5,315 ns/iter (+/- 25) = 6165 MB/s +test bench::easy1_1K ... bench: 274 ns/iter (+/- 0) = 3737 MB/s +test bench::easy1_1MB ... bench: 337,047 ns/iter (+/- 1,972) = 3111 MB/s +test bench::easy1_32 ... bench: 76 ns/iter (+/- 2) = 421 MB/s +test bench::easy1_32K ... bench: 6,111 ns/iter (+/- 39) = 5362 MB/s +test bench::hard_1K ... bench: 59,596 ns/iter (+/- 264) = 17 MB/s +test bench::hard_1MB ... bench: 58,947,188 ns/iter (+/- 205,874) = 17 MB/s +test bench::hard_32 ... bench: 1,978 ns/iter (+/- 22) = 16 MB/s +test bench::hard_32K ... bench: 1,846,347 ns/iter (+/- 14,253) = 17 MB/s +test bench::literal ... bench: 172 ns/iter (+/- 1) = 296 MB/s +test bench::match_class ... bench: 240 ns/iter (+/- 1) = 337 MB/s +test bench::match_class_in_range ... bench: 190 ns/iter (+/- 2) = 426 MB/s +test bench::match_class_unicode ... bench: 4,145 ns/iter (+/- 24) = 38 MB/s +test bench::medium_1K ... bench: 1,195 ns/iter (+/- 8) = 856 MB/s +test bench::medium_1MB ... bench: 2,028,649 ns/iter (+/- 11,235) = 516 MB/s +test bench::medium_32 ... bench: 84 ns/iter (+/- 0) = 380 MB/s +test bench::medium_32K ... bench: 56,134 ns/iter (+/- 369) = 583 MB/s +test bench::no_exponential ... bench: 536 ns/iter (+/- 4) = 186 MB/s +test bench::not_literal ... bench: 2,428 ns/iter (+/- 31) = 21 MB/s +test bench::one_pass_long_prefix ... bench: 756 ns/iter (+/- 2) = 34 MB/s +test bench::one_pass_long_prefix_not ... bench: 756 ns/iter (+/- 12) = 34 MB/s +test bench::one_pass_short ... bench: 1,813 ns/iter (+/- 5) = 9 MB/s +test bench::one_pass_short_not ... bench: 2,588 ns/iter (+/- 8) = 6 MB/s +test bench::replace_all ... bench: 905 ns/iter (+/- 7) +test bench_dynamic_compile::compile_huge ... bench: 161,517 ns/iter (+/- 1,287) +test bench_dynamic_compile::compile_huge_bytes ... bench: 18,395,715 ns/iter (+/- 98,986) +test bench_dynamic_compile::compile_simple ... bench: 6,623 ns/iter (+/- 296) +test bench_dynamic_compile::compile_simple_bytes ... bench: 7,047 ns/iter (+/- 232) +test bench_dynamic_compile::compile_small ... bench: 8,948 ns/iter (+/- 526) +test bench_dynamic_compile::compile_small_bytes ... bench: 186,796 ns/iter (+/- 817) +test bench_dynamic_parse::parse_huge ... bench: 1,238 ns/iter (+/- 6) +test bench_dynamic_parse::parse_simple ... bench: 1,977 ns/iter (+/- 12) +test bench_dynamic_parse::parse_small ... bench: 2,502 ns/iter (+/- 18) +test bench_sherlock::before_holmes ... bench: 45,045,123 ns/iter (+/- 261,188) = 13 MB/s +test bench_sherlock::everything_greedy ... bench: 38,685,654 ns/iter (+/- 107,136) = 15 MB/s +test bench_sherlock::everything_greedy_nl ... bench: 36,407,787 ns/iter (+/- 160,253) = 16 MB/s +test bench_sherlock::holmes_cochar_watson ... bench: 1,417,371 ns/iter (+/- 6,533) = 419 MB/s +test bench_sherlock::holmes_coword_watson ... bench: 139,298,695 ns/iter (+/- 154,012) = 4 MB/s +test bench_sherlock::line_boundary_sherlock_holmes ... bench: 32,734,005 ns/iter (+/- 98,729) = 18 MB/s +test bench_sherlock::name_alt1 ... bench: 153,016 ns/iter (+/- 739) = 3888 MB/s +test bench_sherlock::name_alt2 ... bench: 534,038 ns/iter (+/- 1,909) = 1114 MB/s +test bench_sherlock::name_alt3 ... bench: 2,220,778 ns/iter (+/- 6,374) = 267 MB/s +test bench_sherlock::name_alt3_nocase ... bench: 4,744,134 ns/iter (+/- 11,703) = 125 MB/s +test bench_sherlock::name_alt4 ... bench: 569,971 ns/iter (+/- 2,256) = 1043 MB/s +test bench_sherlock::name_alt4_nocase ... bench: 2,324,966 ns/iter (+/- 3,082) = 255 MB/s +test bench_sherlock::name_holmes ... bench: 268,146 ns/iter (+/- 1,238) = 2218 MB/s +test bench_sherlock::name_holmes_nocase ... bench: 1,409,583 ns/iter (+/- 2,808) = 422 MB/s +test bench_sherlock::name_sherlock ... bench: 95,280 ns/iter (+/- 316) = 6244 MB/s +test bench_sherlock::name_sherlock_holmes ... bench: 116,097 ns/iter (+/- 461) = 5124 MB/s +test bench_sherlock::name_sherlock_holmes_nocase ... bench: 1,691,210 ns/iter (+/- 3,712) = 351 MB/s +test bench_sherlock::name_sherlock_nocase ... bench: 1,651,722 ns/iter (+/- 7,070) = 360 MB/s +test bench_sherlock::name_whitespace ... bench: 130,960 ns/iter (+/- 923) = 4542 MB/s +test bench_sherlock::no_match_common ... bench: 568,008 ns/iter (+/- 1,723) = 1047 MB/s +test bench_sherlock::no_match_uncommon ... bench: 23,669 ns/iter (+/- 84) = 25135 MB/s +test bench_sherlock::quotes ... bench: 11,055,260 ns/iter (+/- 24,883) = 53 MB/s +test bench_sherlock::the_lower ... bench: 2,934,498 ns/iter (+/- 4,553) = 202 MB/s +test bench_sherlock::the_nocase ... bench: 4,268,193 ns/iter (+/- 8,164) = 139 MB/s +test bench_sherlock::the_upper ... bench: 272,832 ns/iter (+/- 1,436) = 2180 MB/s +test bench_sherlock::the_whitespace ... bench: 5,409,934 ns/iter (+/- 7,678) = 109 MB/s +test bench_sherlock::word_ending_n ... bench: 55,252,656 ns/iter (+/- 68,442) = 10 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 67 measured + diff --git a/third_party/rust/regex/record/old-bench-log/01-lazy-dfa/pcre b/third_party/rust/regex/record/old-bench-log/01-lazy-dfa/pcre new file mode 100644 index 0000000000..22a66e6a8b --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/01-lazy-dfa/pcre @@ -0,0 +1,60 @@ + Compiling regex v0.1.48 (file:///home/andrew/data/projects/rust/regex) + Running target/release/pcre-781840b9a3e9c199 + +running 53 tests +test anchored_literal_long_match ... bench: 90 ns/iter (+/- 7) = 4333 MB/s +test anchored_literal_long_non_match ... bench: 60 ns/iter (+/- 2) = 6500 MB/s +test anchored_literal_short_match ... bench: 87 ns/iter (+/- 6) = 298 MB/s +test anchored_literal_short_non_match ... bench: 58 ns/iter (+/- 4) = 448 MB/s +test easy0_1K ... bench: 258 ns/iter (+/- 14) = 3968 MB/s +test easy0_1MB ... bench: 226,139 ns/iter (+/- 1,637) = 4636 MB/s +test easy0_32 ... bench: 60 ns/iter (+/- 7) = 533 MB/s +test easy0_32K ... bench: 7,028 ns/iter (+/- 120) = 4662 MB/s +test easy1_1K ... bench: 794 ns/iter (+/- 20) = 1289 MB/s +test easy1_1MB ... bench: 751,438 ns/iter (+/- 11,372) = 1395 MB/s +test easy1_32 ... bench: 71 ns/iter (+/- 3) = 450 MB/s +test easy1_32K ... bench: 23,042 ns/iter (+/- 1,453) = 1422 MB/s +test hard_1K ... bench: 30,841 ns/iter (+/- 1,287) = 33 MB/s +test hard_1MB ... bench: 35,239,100 ns/iter (+/- 632,179) = 29 MB/s +test hard_32 ... bench: 86 ns/iter (+/- 11) = 372 MB/s +test hard_32K ... bench: 993,011 ns/iter (+/- 63,648) = 32 MB/s +test literal ... bench: 130 ns/iter (+/- 11) = 392 MB/s +test match_class ... bench: 183 ns/iter (+/- 33) = 442 MB/s +test match_class_in_range ... bench: 175 ns/iter (+/- 18) = 462 MB/s +test match_class_unicode ... bench: 513 ns/iter (+/- 8) = 313 MB/s +test medium_1K ... bench: 278 ns/iter (+/- 6) = 3683 MB/s +test medium_1MB ... bench: 240,699 ns/iter (+/- 17,344) = 4356 MB/s +test medium_32 ... bench: 61 ns/iter (+/- 13) = 524 MB/s +test medium_32K ... bench: 7,369 ns/iter (+/- 105) = 4446 MB/s +test not_literal ... bench: 274 ns/iter (+/- 17) = 186 MB/s +test one_pass_long_prefix ... bench: 87 ns/iter (+/- 19) = 298 MB/s +test one_pass_long_prefix_not ... bench: 86 ns/iter (+/- 13) = 302 MB/s +test one_pass_short ... bench: 117 ns/iter (+/- 44) = 145 MB/s +test one_pass_short_not ... bench: 122 ns/iter (+/- 6) = 139 MB/s +test sherlock::before_holmes ... bench: 14,450,308 ns/iter (+/- 617,786) = 41 MB/s +test sherlock::holmes_cochar_watson ... bench: 546,919 ns/iter (+/- 4,880) = 1087 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 194,524 ns/iter (+/- 6,230) = 3058 MB/s +test sherlock::name_alt1 ... bench: 457,899 ns/iter (+/- 7,781) = 1299 MB/s +test sherlock::name_alt2 ... bench: 496,659 ns/iter (+/- 6,529) = 1197 MB/s +test sherlock::name_alt3 ... bench: 983,620 ns/iter (+/- 45,359) = 604 MB/s +test sherlock::name_alt3_nocase ... bench: 3,500,367 ns/iter (+/- 79,807) = 169 MB/s +test sherlock::name_alt4 ... bench: 972,128 ns/iter (+/- 22,195) = 611 MB/s +test sherlock::name_alt4_nocase ... bench: 1,877,017 ns/iter (+/- 39,079) = 316 MB/s +test sherlock::name_holmes ... bench: 398,258 ns/iter (+/- 4,338) = 1493 MB/s +test sherlock::name_holmes_nocase ... bench: 492,292 ns/iter (+/- 4,667) = 1208 MB/s +test sherlock::name_sherlock ... bench: 268,891 ns/iter (+/- 18,063) = 2212 MB/s +test sherlock::name_sherlock_holmes ... bench: 197,067 ns/iter (+/- 8,027) = 3018 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,112,501 ns/iter (+/- 44,457) = 534 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,332,423 ns/iter (+/- 39,227) = 446 MB/s +test sherlock::name_whitespace ... bench: 267,257 ns/iter (+/- 964) = 2226 MB/s +test sherlock::no_match_common ... bench: 595,211 ns/iter (+/- 3,739) = 999 MB/s +test sherlock::no_match_uncommon ... bench: 584,057 ns/iter (+/- 6,825) = 1018 MB/s +test sherlock::quotes ... bench: 1,208,235 ns/iter (+/- 37,629) = 492 MB/s +test sherlock::the_lower ... bench: 1,210,851 ns/iter (+/- 35,900) = 491 MB/s +test sherlock::the_nocase ... bench: 1,286,611 ns/iter (+/- 35,689) = 462 MB/s +test sherlock::the_upper ... bench: 776,113 ns/iter (+/- 6,236) = 766 MB/s +test sherlock::the_whitespace ... bench: 1,368,468 ns/iter (+/- 135,282) = 434 MB/s +test sherlock::word_ending_n ... bench: 12,018,618 ns/iter (+/- 266,497) = 49 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 53 measured + diff --git a/third_party/rust/regex/record/old-bench-log/02-set/dynamic b/third_party/rust/regex/record/old-bench-log/02-set/dynamic new file mode 100644 index 0000000000..69c9f71754 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/02-set/dynamic @@ -0,0 +1,78 @@ + Compiling regex v0.1.52 (file:///home/andrew/data/projects/rust/regex) + Running target/release/dynamic-a76738dddf3bdc6b + +running 71 tests +test misc::anchored_literal_long_match ... bench: 74 ns/iter (+/- 8) = 5270 MB/s +test misc::anchored_literal_long_non_match ... bench: 58 ns/iter (+/- 0) = 6724 MB/s +test misc::anchored_literal_short_match ... bench: 73 ns/iter (+/- 0) = 356 MB/s +test misc::anchored_literal_short_non_match ... bench: 58 ns/iter (+/- 0) = 448 MB/s +test misc::easy0_1K ... bench: 214 ns/iter (+/- 2) = 4785 MB/s +test misc::easy0_1MB ... bench: 247,056 ns/iter (+/- 1,777) = 4244 MB/s +test misc::easy0_32 ... bench: 64 ns/iter (+/- 0) = 500 MB/s +test misc::easy0_32K ... bench: 5,281 ns/iter (+/- 29) = 6204 MB/s +test misc::easy1_1K ... bench: 278 ns/iter (+/- 5) = 3683 MB/s +test misc::easy1_1MB ... bench: 320,041 ns/iter (+/- 4,243) = 3276 MB/s +test misc::easy1_32 ... bench: 65 ns/iter (+/- 0) = 492 MB/s +test misc::easy1_32K ... bench: 5,885 ns/iter (+/- 83) = 5568 MB/s +test misc::hard_1K ... bench: 4,685 ns/iter (+/- 20) = 218 MB/s +test misc::hard_1MB ... bench: 4,745,020 ns/iter (+/- 19,440) = 220 MB/s +test misc::hard_32 ... bench: 197 ns/iter (+/- 1) = 162 MB/s +test misc::hard_32K ... bench: 147,409 ns/iter (+/- 656) = 222 MB/s +test misc::literal ... bench: 20 ns/iter (+/- 1) = 2550 MB/s +test misc::match_class ... bench: 86 ns/iter (+/- 3) = 941 MB/s +test misc::match_class_in_range ... bench: 32 ns/iter (+/- 2) = 2531 MB/s +test misc::match_class_unicode ... bench: 801 ns/iter (+/- 36) = 200 MB/s +test misc::medium_1K ... bench: 1,213 ns/iter (+/- 237) = 844 MB/s +test misc::medium_1MB ... bench: 1,991,418 ns/iter (+/- 239,612) = 526 MB/s +test misc::medium_32 ... bench: 100 ns/iter (+/- 8) = 320 MB/s +test misc::medium_32K ... bench: 57,080 ns/iter (+/- 709) = 574 MB/s +test misc::no_exponential ... bench: 522 ns/iter (+/- 17) = 191 MB/s +test misc::not_literal ... bench: 290 ns/iter (+/- 6) = 175 MB/s +test misc::one_pass_long_prefix ... bench: 176 ns/iter (+/- 15) = 147 MB/s +test misc::one_pass_long_prefix_not ... bench: 183 ns/iter (+/- 28) = 142 MB/s +test misc::one_pass_short ... bench: 136 ns/iter (+/- 8) = 125 MB/s +test misc::one_pass_short_not ... bench: 135 ns/iter (+/- 14) = 125 MB/s +test misc::replace_all ... bench: 149 ns/iter (+/- 34) +test rust_compile::compile_huge ... bench: 158,759 ns/iter (+/- 4,546) +test rust_compile::compile_huge_bytes ... bench: 17,538,290 ns/iter (+/- 1,735,383) +test rust_compile::compile_simple ... bench: 5,935 ns/iter (+/- 429) +test rust_compile::compile_simple_bytes ... bench: 6,682 ns/iter (+/- 293) +test rust_compile::compile_small ... bench: 7,664 ns/iter (+/- 473) +test rust_compile::compile_small_bytes ... bench: 175,272 ns/iter (+/- 4,492) +test rust_parse::parse_huge ... bench: 1,199 ns/iter (+/- 38) +test rust_parse::parse_simple ... bench: 1,849 ns/iter (+/- 28) +test rust_parse::parse_small ... bench: 2,470 ns/iter (+/- 35) +test sherlock::before_holmes ... bench: 2,750,028 ns/iter (+/- 21,847) = 216 MB/s +test sherlock::everything_greedy ... bench: 7,896,337 ns/iter (+/- 68,883) = 75 MB/s +test sherlock::everything_greedy_nl ... bench: 5,498,247 ns/iter (+/- 65,952) = 108 MB/s +test sherlock::holmes_cochar_watson ... bench: 260,499 ns/iter (+/- 4,984) = 2283 MB/s +test sherlock::holmes_coword_watson ... bench: 1,331,443 ns/iter (+/- 34,716) = 446 MB/s +test sherlock::letters ... bench: 60,985,848 ns/iter (+/- 592,838) = 9 MB/s +test sherlock::letters_lower ... bench: 59,041,695 ns/iter (+/- 186,034) = 10 MB/s +test sherlock::letters_upper ... bench: 4,714,214 ns/iter (+/- 35,672) = 126 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 2,730,524 ns/iter (+/- 69,565) = 217 MB/s +test sherlock::name_alt1 ... bench: 41,866 ns/iter (+/- 682) = 14210 MB/s +test sherlock::name_alt2 ... bench: 194,322 ns/iter (+/- 6,628) = 3061 MB/s +test sherlock::name_alt3 ... bench: 1,252,965 ns/iter (+/- 18,828) = 474 MB/s +test sherlock::name_alt3_nocase ... bench: 1,476,169 ns/iter (+/- 14,557) = 403 MB/s +test sherlock::name_alt4 ... bench: 298,639 ns/iter (+/- 3,905) = 1992 MB/s +test sherlock::name_alt4_nocase ... bench: 1,426,191 ns/iter (+/- 23,584) = 417 MB/s +test sherlock::name_holmes ... bench: 49,719 ns/iter (+/- 811) = 11965 MB/s +test sherlock::name_holmes_nocase ... bench: 1,191,400 ns/iter (+/- 19,175) = 499 MB/s +test sherlock::name_sherlock ... bench: 34,091 ns/iter (+/- 877) = 17451 MB/s +test sherlock::name_sherlock_holmes ... bench: 33,785 ns/iter (+/- 1,207) = 17609 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,235,442 ns/iter (+/- 18,023) = 481 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,236,252 ns/iter (+/- 26,934) = 481 MB/s +test sherlock::name_whitespace ... bench: 60,200 ns/iter (+/- 1,873) = 9882 MB/s +test sherlock::no_match_common ... bench: 559,886 ns/iter (+/- 20,306) = 1062 MB/s +test sherlock::no_match_uncommon ... bench: 23,631 ns/iter (+/- 497) = 25175 MB/s +test sherlock::quotes ... bench: 967,379 ns/iter (+/- 12,856) = 614 MB/s +test sherlock::the_lower ... bench: 766,950 ns/iter (+/- 21,944) = 775 MB/s +test sherlock::the_nocase ... bench: 1,706,539 ns/iter (+/- 26,003) = 348 MB/s +test sherlock::the_upper ... bench: 52,529 ns/iter (+/- 1,208) = 11325 MB/s +test sherlock::the_whitespace ... bench: 2,012,952 ns/iter (+/- 26,968) = 295 MB/s +test sherlock::word_ending_n ... bench: 55,578,841 ns/iter (+/- 537,463) = 10 MB/s +test sherlock::words ... bench: 19,103,327 ns/iter (+/- 102,828) = 31 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 71 measured + diff --git a/third_party/rust/regex/record/old-bench-log/03-bytes/onig b/third_party/rust/regex/record/old-bench-log/03-bytes/onig new file mode 100644 index 0000000000..aaf666b431 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/03-bytes/onig @@ -0,0 +1,68 @@ + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/benches) + Running benches/target/release/onig-e3bc363aa56fb408 + +running 61 tests +test misc::anchored_literal_long_match ... bench: 70 ns/iter (+/- 1) = 5571 MB/s +test misc::anchored_literal_long_non_match ... bench: 424 ns/iter (+/- 4) = 919 MB/s +test misc::anchored_literal_short_match ... bench: 70 ns/iter (+/- 1) = 371 MB/s +test misc::anchored_literal_short_non_match ... bench: 38 ns/iter (+/- 0) = 684 MB/s +test misc::easy0_1K ... bench: 176 ns/iter (+/- 2) = 5818 MB/s +test misc::easy0_1MB ... bench: 163,547 ns/iter (+/- 1,451) = 6411 MB/s +test misc::easy0_32 ... bench: 20 ns/iter (+/- 1) = 1600 MB/s +test misc::easy0_32K ... bench: 5,056 ns/iter (+/- 64) = 6481 MB/s +test misc::easy1_1K ... bench: 4,103 ns/iter (+/- 11) = 249 MB/s +test misc::easy1_1MB ... bench: 4,198,406 ns/iter (+/- 62,171) = 249 MB/s +test misc::easy1_32 ... bench: 139 ns/iter (+/- 1) = 230 MB/s +test misc::easy1_32K ... bench: 131,083 ns/iter (+/- 1,310) = 249 MB/s +test misc::hard_1K ... bench: 163 ns/iter (+/- 3) = 6282 MB/s +test misc::hard_1MB ... bench: 163,910 ns/iter (+/- 2,368) = 6397 MB/s +test misc::hard_32 ... bench: 20 ns/iter (+/- 1) = 1600 MB/s +test misc::hard_32K ... bench: 5,002 ns/iter (+/- 306) = 6550 MB/s +test misc::literal ... bench: 226 ns/iter (+/- 0) = 225 MB/s +test misc::match_class ... bench: 337 ns/iter (+/- 2) = 240 MB/s +test misc::match_class_in_range ... bench: 337 ns/iter (+/- 1) = 240 MB/s +test misc::match_class_unicode ... bench: 2,004 ns/iter (+/- 26) = 80 MB/s +test misc::medium_1K ... bench: 191 ns/iter (+/- 2) = 5361 MB/s +test misc::medium_1MB ... bench: 164,027 ns/iter (+/- 2,494) = 6392 MB/s +test misc::medium_32 ... bench: 22 ns/iter (+/- 1) = 1454 MB/s +test misc::medium_32K ... bench: 4,962 ns/iter (+/- 60) = 6603 MB/s +test misc::not_literal ... bench: 359 ns/iter (+/- 5) = 142 MB/s +test misc::one_pass_long_prefix ... bench: 94 ns/iter (+/- 3) = 276 MB/s +test misc::one_pass_long_prefix_not ... bench: 101 ns/iter (+/- 1) = 257 MB/s +test misc::one_pass_short ... bench: 332 ns/iter (+/- 6) = 51 MB/s +test misc::one_pass_short_not ... bench: 318 ns/iter (+/- 4) = 53 MB/s +test sherlock::before_holmes ... bench: 70,859,542 ns/iter (+/- 594,306) = 8 MB/s +test sherlock::everything_greedy ... bench: 5,129,894 ns/iter (+/- 33,792) = 115 MB/s +test sherlock::holmes_cochar_watson ... bench: 2,388,047 ns/iter (+/- 19,666) = 249 MB/s +test sherlock::ing_suffix ... bench: 28,413,935 ns/iter (+/- 800,513) = 20 MB/s +test sherlock::ing_suffix_limited_space ... bench: 2,636,327 ns/iter (+/- 66,410) = 225 MB/s +test sherlock::letters ... bench: 26,471,724 ns/iter (+/- 872,994) = 22 MB/s +test sherlock::letters_lower ... bench: 26,124,489 ns/iter (+/- 556,750) = 22 MB/s +test sherlock::letters_upper ... bench: 11,268,144 ns/iter (+/- 338,510) = 52 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 195,797 ns/iter (+/- 1,621) = 3038 MB/s +test sherlock::name_alt1 ... bench: 2,100,763 ns/iter (+/- 16,823) = 283 MB/s +test sherlock::name_alt2 ... bench: 2,212,816 ns/iter (+/- 17,997) = 268 MB/s +test sherlock::name_alt3 ... bench: 3,031,567 ns/iter (+/- 35,631) = 196 MB/s +test sherlock::name_alt3_nocase ... bench: 39,737,911 ns/iter (+/- 166,863) = 14 MB/s +test sherlock::name_alt4 ... bench: 2,230,681 ns/iter (+/- 18,856) = 266 MB/s +test sherlock::name_alt4_nocase ... bench: 8,294,698 ns/iter (+/- 36,887) = 71 MB/s +test sherlock::name_holmes ... bench: 402,600 ns/iter (+/- 6,232) = 1477 MB/s +test sherlock::name_holmes_nocase ... bench: 4,074,155 ns/iter (+/- 23,317) = 146 MB/s +test sherlock::name_sherlock ... bench: 270,225 ns/iter (+/- 2,815) = 2201 MB/s +test sherlock::name_sherlock_holmes ... bench: 196,502 ns/iter (+/- 2,168) = 3027 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 4,397,347 ns/iter (+/- 28,567) = 135 MB/s +test sherlock::name_sherlock_nocase ... bench: 4,400,574 ns/iter (+/- 25,127) = 135 MB/s +test sherlock::name_whitespace ... bench: 274,462 ns/iter (+/- 3,180) = 2167 MB/s +test sherlock::no_match_common ... bench: 596,601 ns/iter (+/- 9,285) = 997 MB/s +test sherlock::no_match_uncommon ... bench: 586,258 ns/iter (+/- 7,702) = 1014 MB/s +test sherlock::quotes ... bench: 4,069,570 ns/iter (+/- 20,372) = 146 MB/s +test sherlock::repeated_class_negation ... bench: 44,936,445 ns/iter (+/- 103,467) = 13 MB/s +test sherlock::the_lower ... bench: 1,300,513 ns/iter (+/- 12,884) = 457 MB/s +test sherlock::the_nocase ... bench: 5,141,237 ns/iter (+/- 25,487) = 115 MB/s +test sherlock::the_upper ... bench: 821,454 ns/iter (+/- 13,420) = 724 MB/s +test sherlock::the_whitespace ... bench: 2,009,530 ns/iter (+/- 14,082) = 296 MB/s +test sherlock::word_ending_n ... bench: 27,847,316 ns/iter (+/- 47,618) = 21 MB/s +test sherlock::words ... bench: 21,105,627 ns/iter (+/- 33,436) = 28 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 61 measured + diff --git a/third_party/rust/regex/record/old-bench-log/03-bytes/pcre b/third_party/rust/regex/record/old-bench-log/03-bytes/pcre new file mode 100644 index 0000000000..236613ae0f --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/03-bytes/pcre @@ -0,0 +1,66 @@ + Running benches/target/release/pcre-855c18fb35cdf072 + +running 60 tests +test misc::anchored_literal_long_match ... bench: 88 ns/iter (+/- 12) = 4431 MB/s +test misc::anchored_literal_long_non_match ... bench: 58 ns/iter (+/- 1) = 6724 MB/s +test misc::anchored_literal_short_match ... bench: 88 ns/iter (+/- 1) = 295 MB/s +test misc::anchored_literal_short_non_match ... bench: 60 ns/iter (+/- 3) = 433 MB/s +test misc::easy0_1K ... bench: 266 ns/iter (+/- 1) = 3849 MB/s +test misc::easy0_1MB ... bench: 227,366 ns/iter (+/- 794) = 4611 MB/s +test misc::easy0_32 ... bench: 62 ns/iter (+/- 2) = 516 MB/s +test misc::easy0_32K ... bench: 7,061 ns/iter (+/- 109) = 4640 MB/s +test misc::easy1_1K ... bench: 805 ns/iter (+/- 10) = 1272 MB/s +test misc::easy1_1MB ... bench: 751,948 ns/iter (+/- 6,995) = 1394 MB/s +test misc::easy1_32 ... bench: 71 ns/iter (+/- 1) = 450 MB/s +test misc::easy1_32K ... bench: 23,635 ns/iter (+/- 213) = 1386 MB/s +test misc::hard_1K ... bench: 31,008 ns/iter (+/- 299) = 33 MB/s +test misc::hard_1MB ... bench: 35,078,241 ns/iter (+/- 94,197) = 29 MB/s +test misc::hard_32 ... bench: 313 ns/iter (+/- 1) = 102 MB/s +test misc::hard_32K ... bench: 995,958 ns/iter (+/- 10,945) = 32 MB/s +test misc::literal ... bench: 130 ns/iter (+/- 1) = 392 MB/s +test misc::match_class ... bench: 176 ns/iter (+/- 2) = 460 MB/s +test misc::match_class_in_range ... bench: 178 ns/iter (+/- 1) = 455 MB/s +test misc::match_class_unicode ... bench: 511 ns/iter (+/- 6) = 315 MB/s +test misc::medium_1K ... bench: 275 ns/iter (+/- 4) = 3723 MB/s +test misc::medium_1MB ... bench: 239,603 ns/iter (+/- 1,808) = 4376 MB/s +test misc::medium_32 ... bench: 62 ns/iter (+/- 1) = 516 MB/s +test misc::medium_32K ... bench: 7,385 ns/iter (+/- 43) = 4437 MB/s +test misc::not_literal ... bench: 274 ns/iter (+/- 3) = 186 MB/s +test misc::one_pass_long_prefix ... bench: 87 ns/iter (+/- 1) = 298 MB/s +test misc::one_pass_long_prefix_not ... bench: 88 ns/iter (+/- 0) = 295 MB/s +test misc::one_pass_short ... bench: 115 ns/iter (+/- 0) = 147 MB/s +test misc::one_pass_short_not ... bench: 118 ns/iter (+/- 0) = 144 MB/s +test sherlock::before_holmes ... bench: 14,338,348 ns/iter (+/- 23,734) = 41 MB/s +test sherlock::holmes_cochar_watson ... bench: 547,196 ns/iter (+/- 4,100) = 1087 MB/s +test sherlock::ing_suffix ... bench: 6,012,620 ns/iter (+/- 51,777) = 98 MB/s +test sherlock::ing_suffix_limited_space ... bench: 6,374,577 ns/iter (+/- 46,486) = 93 MB/s +test sherlock::letters ... bench: 28,575,184 ns/iter (+/- 65,051) = 20 MB/s +test sherlock::letters_lower ... bench: 25,819,606 ns/iter (+/- 180,823) = 23 MB/s +test sherlock::letters_upper ... bench: 3,227,381 ns/iter (+/- 11,443) = 184 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 193,512 ns/iter (+/- 1,316) = 3074 MB/s +test sherlock::name_alt1 ... bench: 454,510 ns/iter (+/- 2,721) = 1308 MB/s +test sherlock::name_alt2 ... bench: 499,453 ns/iter (+/- 4,692) = 1191 MB/s +test sherlock::name_alt3 ... bench: 1,085,732 ns/iter (+/- 6,841) = 547 MB/s +test sherlock::name_alt3_nocase ... bench: 3,194,995 ns/iter (+/- 12,655) = 186 MB/s +test sherlock::name_alt4 ... bench: 944,353 ns/iter (+/- 12,661) = 629 MB/s +test sherlock::name_alt4_nocase ... bench: 1,646,368 ns/iter (+/- 12,376) = 361 MB/s +test sherlock::name_holmes ... bench: 395,019 ns/iter (+/- 3,929) = 1506 MB/s +test sherlock::name_holmes_nocase ... bench: 493,327 ns/iter (+/- 7,213) = 1205 MB/s +test sherlock::name_sherlock ... bench: 266,400 ns/iter (+/- 1,591) = 2233 MB/s +test sherlock::name_sherlock_holmes ... bench: 196,357 ns/iter (+/- 1,770) = 3029 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,259,747 ns/iter (+/- 4,939) = 472 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,128,970 ns/iter (+/- 6,730) = 526 MB/s +test sherlock::name_whitespace ... bench: 267,323 ns/iter (+/- 1,296) = 2225 MB/s +test sherlock::no_match_common ... bench: 595,372 ns/iter (+/- 5,690) = 999 MB/s +test sherlock::no_match_uncommon ... bench: 585,406 ns/iter (+/- 5,719) = 1016 MB/s +test sherlock::quotes ... bench: 1,223,528 ns/iter (+/- 6,579) = 486 MB/s +test sherlock::repeated_class_negation ... bench: 6,440,584 ns/iter (+/- 20,444) = 92 MB/s +test sherlock::the_lower ... bench: 1,220,999 ns/iter (+/- 7,595) = 487 MB/s +test sherlock::the_nocase ... bench: 1,263,078 ns/iter (+/- 15,321) = 471 MB/s +test sherlock::the_upper ... bench: 781,141 ns/iter (+/- 15,408) = 761 MB/s +test sherlock::the_whitespace ... bench: 1,383,414 ns/iter (+/- 548,289) = 430 MB/s +test sherlock::word_ending_n ... bench: 12,709,045 ns/iter (+/- 51,420) = 46 MB/s +test sherlock::words ... bench: 10,798,918 ns/iter (+/- 40,027) = 55 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 60 measured + diff --git a/third_party/rust/regex/record/old-bench-log/03-bytes/rust b/third_party/rust/regex/record/old-bench-log/03-bytes/rust new file mode 100644 index 0000000000..6dec097c5c --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/03-bytes/rust @@ -0,0 +1,83 @@ + Compiling regex-syntax v0.2.5 (file:///home/andrew/data/projects/rust/regex/benches) + Compiling regex v0.1.55 (file:///home/andrew/data/projects/rust/regex/benches) + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/benches) + Running benches/target/release/rust-50db306d093e5666 + +running 74 tests +test misc::anchored_literal_long_match ... bench: 75 ns/iter (+/- 5) = 5200 MB/s +test misc::anchored_literal_long_non_match ... bench: 56 ns/iter (+/- 0) = 6964 MB/s +test misc::anchored_literal_short_match ... bench: 79 ns/iter (+/- 0) = 329 MB/s +test misc::anchored_literal_short_non_match ... bench: 56 ns/iter (+/- 1) = 464 MB/s +test misc::easy0_1K ... bench: 138 ns/iter (+/- 0) = 7420 MB/s +test misc::easy0_1MB ... bench: 247,159 ns/iter (+/- 724) = 4242 MB/s +test misc::easy0_32 ... bench: 71 ns/iter (+/- 0) = 450 MB/s +test misc::easy0_32K ... bench: 5,474 ns/iter (+/- 34) = 5986 MB/s +test misc::easy1_1K ... bench: 273 ns/iter (+/- 1) = 3750 MB/s +test misc::easy1_1MB ... bench: 317,946 ns/iter (+/- 2,512) = 3297 MB/s +test misc::easy1_32 ... bench: 67 ns/iter (+/- 0) = 477 MB/s +test misc::easy1_32K ... bench: 5,882 ns/iter (+/- 32) = 5570 MB/s +test misc::hard_1K ... bench: 4,713 ns/iter (+/- 13) = 217 MB/s +test misc::hard_1MB ... bench: 4,732,901 ns/iter (+/- 6,948) = 221 MB/s +test misc::hard_32 ... bench: 201 ns/iter (+/- 0) = 159 MB/s +test misc::hard_32K ... bench: 147,994 ns/iter (+/- 900) = 221 MB/s +test misc::literal ... bench: 19 ns/iter (+/- 0) = 2684 MB/s +test misc::match_class ... bench: 85 ns/iter (+/- 0) = 952 MB/s +test misc::match_class_in_range ... bench: 30 ns/iter (+/- 1) = 2700 MB/s +test misc::match_class_unicode ... bench: 806 ns/iter (+/- 2) = 199 MB/s +test misc::medium_1K ... bench: 1,384 ns/iter (+/- 10) = 739 MB/s +test misc::medium_1MB ... bench: 1,974,381 ns/iter (+/- 7,383) = 531 MB/s +test misc::medium_32 ... bench: 130 ns/iter (+/- 0) = 246 MB/s +test misc::medium_32K ... bench: 52,783 ns/iter (+/- 465) = 620 MB/s +test misc::no_exponential ... bench: 536 ns/iter (+/- 13) = 186 MB/s +test misc::not_literal ... bench: 293 ns/iter (+/- 1) = 174 MB/s +test misc::one_pass_long_prefix ... bench: 179 ns/iter (+/- 1) = 145 MB/s +test misc::one_pass_long_prefix_not ... bench: 180 ns/iter (+/- 2) = 144 MB/s +test misc::one_pass_short ... bench: 139 ns/iter (+/- 1) = 122 MB/s +test misc::one_pass_short_not ... bench: 142 ns/iter (+/- 1) = 119 MB/s +test misc::replace_all ... bench: 171 ns/iter (+/- 1) +test rust_compile::compile_huge ... bench: 126,158 ns/iter (+/- 1,790) +test rust_compile::compile_huge_bytes ... bench: 18,088,719 ns/iter (+/- 518,980) +test rust_compile::compile_simple ... bench: 6,141 ns/iter (+/- 394) +test rust_compile::compile_simple_bytes ... bench: 6,669 ns/iter (+/- 306) +test rust_compile::compile_small ... bench: 7,431 ns/iter (+/- 275) +test rust_compile::compile_small_bytes ... bench: 191,002 ns/iter (+/- 1,297) +test rust_parse::parse_huge ... bench: 1,204 ns/iter (+/- 9) +test rust_parse::parse_simple ... bench: 1,905 ns/iter (+/- 16) +test rust_parse::parse_small ... bench: 2,454 ns/iter (+/- 24) +test sherlock::before_holmes ... bench: 2,748,082 ns/iter (+/- 11,406) = 216 MB/s +test sherlock::everything_greedy ... bench: 7,833,414 ns/iter (+/- 42,538) = 75 MB/s +test sherlock::everything_greedy_nl ... bench: 5,426,141 ns/iter (+/- 31,378) = 109 MB/s +test sherlock::holmes_cochar_watson ... bench: 262,322 ns/iter (+/- 5,243) = 2267 MB/s +test sherlock::holmes_coword_watson ... bench: 1,324,677 ns/iter (+/- 21,666) = 449 MB/s +test sherlock::ing_suffix ... bench: 3,179,928 ns/iter (+/- 40,246) = 187 MB/s +test sherlock::ing_suffix_limited_space ... bench: 3,525,004 ns/iter (+/- 37,262) = 168 MB/s +test sherlock::letters ... bench: 60,268,445 ns/iter (+/- 1,958,610) = 9 MB/s +test sherlock::letters_lower ... bench: 57,743,679 ns/iter (+/- 84,675) = 10 MB/s +test sherlock::letters_upper ... bench: 4,549,709 ns/iter (+/- 9,312) = 130 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 2,690,794 ns/iter (+/- 2,796) = 221 MB/s +test sherlock::name_alt1 ... bench: 42,476 ns/iter (+/- 346) = 14006 MB/s +test sherlock::name_alt2 ... bench: 199,058 ns/iter (+/- 1,498) = 2988 MB/s +test sherlock::name_alt3 ... bench: 1,248,439 ns/iter (+/- 3,051) = 476 MB/s +test sherlock::name_alt3_nocase ... bench: 1,463,628 ns/iter (+/- 2,799) = 406 MB/s +test sherlock::name_alt4 ... bench: 296,390 ns/iter (+/- 798) = 2007 MB/s +test sherlock::name_alt4_nocase ... bench: 1,415,770 ns/iter (+/- 3,400) = 420 MB/s +test sherlock::name_holmes ... bench: 49,713 ns/iter (+/- 317) = 11967 MB/s +test sherlock::name_holmes_nocase ... bench: 1,181,147 ns/iter (+/- 2,842) = 503 MB/s +test sherlock::name_sherlock ... bench: 34,263 ns/iter (+/- 136) = 17363 MB/s +test sherlock::name_sherlock_holmes ... bench: 34,179 ns/iter (+/- 188) = 17406 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,236,384 ns/iter (+/- 5,012) = 481 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,232,613 ns/iter (+/- 5,009) = 482 MB/s +test sherlock::name_whitespace ... bench: 60,024 ns/iter (+/- 187) = 9911 MB/s +test sherlock::no_match_common ... bench: 558,607 ns/iter (+/- 2,595) = 1065 MB/s +test sherlock::no_match_uncommon ... bench: 24,049 ns/iter (+/- 54) = 24738 MB/s +test sherlock::quotes ... bench: 966,792 ns/iter (+/- 2,982) = 615 MB/s +test sherlock::repeated_class_negation ... bench: 84,186,484 ns/iter (+/- 66,800) = 7 MB/s +test sherlock::the_lower ... bench: 773,759 ns/iter (+/- 2,759) = 768 MB/s +test sherlock::the_nocase ... bench: 1,705,648 ns/iter (+/- 4,604) = 348 MB/s +test sherlock::the_upper ... bench: 52,729 ns/iter (+/- 209) = 11282 MB/s +test sherlock::the_whitespace ... bench: 1,981,215 ns/iter (+/- 8,080) = 300 MB/s +test sherlock::word_ending_n ... bench: 53,482,650 ns/iter (+/- 73,844) = 11 MB/s +test sherlock::words ... bench: 18,961,987 ns/iter (+/- 27,794) = 31 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 74 measured + diff --git a/third_party/rust/regex/record/old-bench-log/03-bytes/rust-bytes b/third_party/rust/regex/record/old-bench-log/03-bytes/rust-bytes new file mode 100644 index 0000000000..735d259348 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/03-bytes/rust-bytes @@ -0,0 +1,66 @@ + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/benches) + Running benches/target/release/rust_bytes-9f3b188bc741e04b + +running 59 tests +test misc::anchored_literal_long_match ... bench: 75 ns/iter (+/- 6) = 5200 MB/s +test misc::anchored_literal_long_non_match ... bench: 55 ns/iter (+/- 0) = 7090 MB/s +test misc::anchored_literal_short_match ... bench: 75 ns/iter (+/- 0) = 346 MB/s +test misc::anchored_literal_short_non_match ... bench: 55 ns/iter (+/- 0) = 472 MB/s +test misc::easy0_1K ... bench: 245 ns/iter (+/- 0) = 4179 MB/s +test misc::easy0_1MB ... bench: 251,614 ns/iter (+/- 1,143) = 4167 MB/s +test misc::easy0_32 ... bench: 62 ns/iter (+/- 1) = 516 MB/s +test misc::easy0_32K ... bench: 5,281 ns/iter (+/- 66) = 6204 MB/s +test misc::easy1_1K ... bench: 266 ns/iter (+/- 1) = 3849 MB/s +test misc::easy1_1MB ... bench: 325,060 ns/iter (+/- 2,011) = 3225 MB/s +test misc::easy1_32 ... bench: 73 ns/iter (+/- 0) = 438 MB/s +test misc::easy1_32K ... bench: 5,609 ns/iter (+/- 41) = 5842 MB/s +test misc::hard_1K ... bench: 4,678 ns/iter (+/- 38) = 218 MB/s +test misc::hard_1MB ... bench: 4,736,631 ns/iter (+/- 26,227) = 221 MB/s +test misc::hard_32 ... bench: 199 ns/iter (+/- 0) = 160 MB/s +test misc::hard_32K ... bench: 148,282 ns/iter (+/- 1,353) = 220 MB/s +test misc::literal ... bench: 18 ns/iter (+/- 0) = 2833 MB/s +test misc::match_class ... bench: 83 ns/iter (+/- 0) = 975 MB/s +test misc::match_class_in_range ... bench: 30 ns/iter (+/- 0) = 2700 MB/s +test misc::medium_1K ... bench: 1,147 ns/iter (+/- 10) = 892 MB/s +test misc::medium_1MB ... bench: 1,953,230 ns/iter (+/- 10,530) = 536 MB/s +test misc::medium_32 ... bench: 99 ns/iter (+/- 0) = 323 MB/s +test misc::medium_32K ... bench: 54,705 ns/iter (+/- 349) = 598 MB/s +test misc::no_exponential ... bench: 534 ns/iter (+/- 4) = 187 MB/s +test misc::not_literal ... bench: 292 ns/iter (+/- 3) = 174 MB/s +test misc::one_pass_long_prefix ... bench: 179 ns/iter (+/- 1) = 145 MB/s +test misc::one_pass_long_prefix_not ... bench: 180 ns/iter (+/- 2) = 144 MB/s +test misc::one_pass_short ... bench: 139 ns/iter (+/- 0) = 122 MB/s +test misc::one_pass_short_not ... bench: 139 ns/iter (+/- 0) = 122 MB/s +test sherlock::before_holmes ... bench: 2,778,686 ns/iter (+/- 8,735) = 214 MB/s +test sherlock::everything_greedy ... bench: 7,884,691 ns/iter (+/- 37,268) = 75 MB/s +test sherlock::everything_greedy_nl ... bench: 5,406,627 ns/iter (+/- 24,707) = 110 MB/s +test sherlock::holmes_cochar_watson ... bench: 262,175 ns/iter (+/- 1,995) = 2269 MB/s +test sherlock::holmes_coword_watson ... bench: 1,299,904 ns/iter (+/- 5,090) = 457 MB/s +test sherlock::ing_suffix ... bench: 3,202,899 ns/iter (+/- 20,810) = 185 MB/s +test sherlock::ing_suffix_limited_space ... bench: 3,367,381 ns/iter (+/- 14,143) = 176 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 2,725,593 ns/iter (+/- 10,736) = 218 MB/s +test sherlock::name_alt1 ... bench: 42,161 ns/iter (+/- 355) = 14110 MB/s +test sherlock::name_alt2 ... bench: 195,390 ns/iter (+/- 1,112) = 3044 MB/s +test sherlock::name_alt3 ... bench: 1,248,432 ns/iter (+/- 3,244) = 476 MB/s +test sherlock::name_alt3_nocase ... bench: 3,371,906 ns/iter (+/- 42,421) = 176 MB/s +test sherlock::name_alt4 ... bench: 296,423 ns/iter (+/- 1,812) = 2007 MB/s +test sherlock::name_alt4_nocase ... bench: 1,753,178 ns/iter (+/- 23,269) = 339 MB/s +test sherlock::name_holmes ... bench: 49,554 ns/iter (+/- 261) = 12005 MB/s +test sherlock::name_holmes_nocase ... bench: 1,347,682 ns/iter (+/- 5,678) = 441 MB/s +test sherlock::name_sherlock ... bench: 33,937 ns/iter (+/- 208) = 17530 MB/s +test sherlock::name_sherlock_holmes ... bench: 33,870 ns/iter (+/- 225) = 17565 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,212,233 ns/iter (+/- 5,452) = 490 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,190,590 ns/iter (+/- 3,248) = 499 MB/s +test sherlock::name_whitespace ... bench: 59,434 ns/iter (+/- 253) = 10009 MB/s +test sherlock::no_match_common ... bench: 565,962 ns/iter (+/- 4,601) = 1051 MB/s +test sherlock::no_match_uncommon ... bench: 23,729 ns/iter (+/- 218) = 25071 MB/s +test sherlock::quotes ... bench: 966,904 ns/iter (+/- 7,115) = 615 MB/s +test sherlock::repeated_class_negation ... bench: 121,271,073 ns/iter (+/- 242,789) = 4 MB/s +test sherlock::the_lower ... bench: 778,850 ns/iter (+/- 6,781) = 763 MB/s +test sherlock::the_nocase ... bench: 2,876,190 ns/iter (+/- 8,611) = 206 MB/s +test sherlock::the_upper ... bench: 52,617 ns/iter (+/- 315) = 11306 MB/s +test sherlock::the_whitespace ... bench: 1,982,270 ns/iter (+/- 11,079) = 300 MB/s +test sherlock::word_ending_n ... bench: 76,442,330 ns/iter (+/- 236,690) = 7 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 59 measured + diff --git a/third_party/rust/regex/record/old-bench-log/04/onig b/third_party/rust/regex/record/old-bench-log/04/onig new file mode 100644 index 0000000000..81b40984fe --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/04/onig @@ -0,0 +1,78 @@ + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/bench) + Running target/release/bench-0d58c0af2e68ae0d + +running 71 tests +test misc::anchored_literal_long_match ... bench: 66 ns/iter (+/- 1) = 5909 MB/s +test misc::anchored_literal_long_non_match ... bench: 414 ns/iter (+/- 2) = 942 MB/s +test misc::anchored_literal_short_match ... bench: 66 ns/iter (+/- 1) = 393 MB/s +test misc::anchored_literal_short_non_match ... bench: 36 ns/iter (+/- 0) = 722 MB/s +test misc::easy0_1K ... bench: 217 ns/iter (+/- 2) = 4843 MB/s +test misc::easy0_1MB ... bench: 130,657 ns/iter (+/- 365) = 8025 MB/s +test misc::easy0_32 ... bench: 84 ns/iter (+/- 1) = 702 MB/s +test misc::easy0_32K ... bench: 4,092 ns/iter (+/- 25) = 8014 MB/s +test misc::easy1_1K ... bench: 3,682 ns/iter (+/- 25) = 283 MB/s +test misc::easy1_1MB ... bench: 3,613,381 ns/iter (+/- 5,960) = 290 MB/s +test misc::easy1_32 ... bench: 237 ns/iter (+/- 2) = 219 MB/s +test misc::easy1_32K ... bench: 113,040 ns/iter (+/- 303) = 290 MB/s +test misc::hard_1K ... bench: 184,299 ns/iter (+/- 2,508) = 5 MB/s +test misc::hard_1MB ... bench: 198,378,531 ns/iter (+/- 150,404) = 5 MB/s +test misc::hard_32 ... bench: 5,765 ns/iter (+/- 26) = 10 MB/s +test misc::hard_32K ... bench: 6,177,362 ns/iter (+/- 21,959) = 5 MB/s +test misc::literal ... bench: 219 ns/iter (+/- 1) = 232 MB/s +test misc::long_needle1 ... bench: 6,978,321 ns/iter (+/- 120,792) = 14 MB/s +test misc::long_needle2 ... bench: 6,981,122 ns/iter (+/- 120,371) = 14 MB/s +test misc::match_class ... bench: 329 ns/iter (+/- 5) = 246 MB/s +test misc::match_class_in_range ... bench: 332 ns/iter (+/- 1) = 243 MB/s +test misc::match_class_unicode ... bench: 1,980 ns/iter (+/- 23) = 81 MB/s +test misc::medium_1K ... bench: 232 ns/iter (+/- 0) = 4534 MB/s +test misc::medium_1MB ... bench: 130,702 ns/iter (+/- 997) = 8022 MB/s +test misc::medium_32 ... bench: 95 ns/iter (+/- 1) = 631 MB/s +test misc::medium_32K ... bench: 4,103 ns/iter (+/- 13) = 7993 MB/s +test misc::not_literal ... bench: 353 ns/iter (+/- 2) = 144 MB/s +test misc::one_pass_long_prefix ... bench: 89 ns/iter (+/- 1) = 292 MB/s +test misc::one_pass_long_prefix_not ... bench: 97 ns/iter (+/- 0) = 268 MB/s +test misc::one_pass_short ... bench: 329 ns/iter (+/- 4) = 51 MB/s +test misc::one_pass_short_not ... bench: 324 ns/iter (+/- 4) = 52 MB/s +test misc::reallyhard2_1K ... bench: 563,552 ns/iter (+/- 2,559) = 1 MB/s +test misc::reallyhard_1K ... bench: 184,200 ns/iter (+/- 553) = 5 MB/s +test misc::reallyhard_1MB ... bench: 198,336,145 ns/iter (+/- 149,796) = 5 MB/s +test misc::reallyhard_32 ... bench: 5,766 ns/iter (+/- 16) = 10 MB/s +test misc::reallyhard_32K ... bench: 6,174,904 ns/iter (+/- 5,491) = 5 MB/s +test sherlock::before_holmes ... bench: 70,476,093 ns/iter (+/- 271,168) = 8 MB/s +test sherlock::everything_greedy ... bench: 5,175,140 ns/iter (+/- 19,413) = 114 MB/s +test sherlock::holmes_cochar_watson ... bench: 2,379,427 ns/iter (+/- 5,816) = 250 MB/s +test sherlock::ing_suffix ... bench: 28,275,131 ns/iter (+/- 49,569) = 21 MB/s +test sherlock::ing_suffix_limited_space ... bench: 2,648,838 ns/iter (+/- 9,247) = 224 MB/s +test sherlock::letters ... bench: 25,940,039 ns/iter (+/- 57,724) = 22 MB/s +test sherlock::letters_lower ... bench: 25,680,050 ns/iter (+/- 48,209) = 23 MB/s +test sherlock::letters_upper ... bench: 11,122,063 ns/iter (+/- 28,302) = 53 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 195,903 ns/iter (+/- 1,162) = 3036 MB/s +test sherlock::name_alt1 ... bench: 2,100,175 ns/iter (+/- 4,251) = 283 MB/s +test sherlock::name_alt2 ... bench: 2,210,122 ns/iter (+/- 7,514) = 269 MB/s +test sherlock::name_alt3 ... bench: 3,025,653 ns/iter (+/- 9,375) = 196 MB/s +test sherlock::name_alt3_nocase ... bench: 39,475,102 ns/iter (+/- 51,488) = 15 MB/s +test sherlock::name_alt4 ... bench: 2,225,952 ns/iter (+/- 7,340) = 267 MB/s +test sherlock::name_alt4_nocase ... bench: 8,227,413 ns/iter (+/- 18,088) = 72 MB/s +test sherlock::name_alt5 ... bench: 2,300,803 ns/iter (+/- 6,325) = 258 MB/s +test sherlock::name_alt5_nocase ... bench: 11,488,783 ns/iter (+/- 28,880) = 51 MB/s +test sherlock::name_holmes ... bench: 400,760 ns/iter (+/- 907) = 1484 MB/s +test sherlock::name_holmes_nocase ... bench: 4,044,850 ns/iter (+/- 11,665) = 147 MB/s +test sherlock::name_sherlock ... bench: 269,021 ns/iter (+/- 791) = 2211 MB/s +test sherlock::name_sherlock_holmes ... bench: 196,161 ns/iter (+/- 899) = 3032 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 4,363,621 ns/iter (+/- 5,339) = 136 MB/s +test sherlock::name_sherlock_nocase ... bench: 4,389,375 ns/iter (+/- 11,077) = 135 MB/s +test sherlock::name_whitespace ... bench: 273,691 ns/iter (+/- 957) = 2173 MB/s +test sherlock::no_match_common ... bench: 588,744 ns/iter (+/- 1,732) = 1010 MB/s +test sherlock::no_match_really_common ... bench: 673,335 ns/iter (+/- 1,407) = 883 MB/s +test sherlock::no_match_uncommon ... bench: 578,009 ns/iter (+/- 5,111) = 1029 MB/s +test sherlock::quotes ... bench: 4,066,005 ns/iter (+/- 10,116) = 146 MB/s +test sherlock::repeated_class_negation ... bench: 43,374,733 ns/iter (+/- 48,409) = 13 MB/s +test sherlock::the_lower ... bench: 1,275,300 ns/iter (+/- 5,351) = 466 MB/s +test sherlock::the_nocase ... bench: 5,100,832 ns/iter (+/- 11,024) = 116 MB/s +test sherlock::the_upper ... bench: 816,606 ns/iter (+/- 3,370) = 728 MB/s +test sherlock::the_whitespace ... bench: 2,079,544 ns/iter (+/- 4,585) = 286 MB/s +test sherlock::word_ending_n ... bench: 27,699,175 ns/iter (+/- 58,998) = 21 MB/s +test sherlock::words ... bench: 19,460,356 ns/iter (+/- 29,406) = 30 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 71 measured + diff --git a/third_party/rust/regex/record/old-bench-log/04/pcre1-jit b/third_party/rust/regex/record/old-bench-log/04/pcre1-jit new file mode 100644 index 0000000000..2118d1f59d --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/04/pcre1-jit @@ -0,0 +1,77 @@ + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/bench) + Running target/release/bench-0d58c0af2e68ae0d + +running 70 tests +test misc::anchored_literal_long_match ... bench: 32 ns/iter (+/- 0) = 12187 MB/s +test misc::anchored_literal_long_non_match ... bench: 27 ns/iter (+/- 1) = 14444 MB/s +test misc::anchored_literal_short_match ... bench: 31 ns/iter (+/- 0) = 838 MB/s +test misc::anchored_literal_short_non_match ... bench: 27 ns/iter (+/- 2) = 962 MB/s +test misc::easy0_1K ... bench: 247 ns/iter (+/- 1) = 4255 MB/s +test misc::easy0_1MB ... bench: 193,485 ns/iter (+/- 906) = 5419 MB/s +test misc::easy0_32 ... bench: 55 ns/iter (+/- 1) = 1072 MB/s +test misc::easy0_32K ... bench: 6,057 ns/iter (+/- 19) = 5414 MB/s +test misc::easy1_1K ... bench: 604 ns/iter (+/- 3) = 1728 MB/s +test misc::easy1_1MB ... bench: 553,893 ns/iter (+/- 1,299) = 1893 MB/s +test misc::easy1_32 ... bench: 81 ns/iter (+/- 1) = 641 MB/s +test misc::easy1_32K ... bench: 17,335 ns/iter (+/- 33) = 1891 MB/s +test misc::hard_1K ... bench: 56,956 ns/iter (+/- 148) = 18 MB/s +test misc::hard_1MB ... bench: 63,576,485 ns/iter (+/- 93,278) = 16 MB/s +test misc::hard_32 ... bench: 1,744 ns/iter (+/- 10) = 33 MB/s +test misc::hard_32K ... bench: 1,931,799 ns/iter (+/- 7,752) = 16 MB/s +test misc::literal ... bench: 73 ns/iter (+/- 1) = 698 MB/s +test misc::long_needle1 ... bench: 532,256 ns/iter (+/- 4,633) = 187 MB/s +test misc::long_needle2 ... bench: 532,131 ns/iter (+/- 3,771) = 187 MB/s +test misc::match_class ... bench: 120 ns/iter (+/- 0) = 675 MB/s +test misc::match_class_in_range ... bench: 119 ns/iter (+/- 0) = 680 MB/s +test misc::match_class_unicode ... bench: 456 ns/iter (+/- 2) = 353 MB/s +test misc::medium_1K ... bench: 260 ns/iter (+/- 1) = 4046 MB/s +test misc::medium_1MB ... bench: 206,175 ns/iter (+/- 983) = 5085 MB/s +test misc::medium_32 ... bench: 58 ns/iter (+/- 0) = 1034 MB/s +test misc::medium_32K ... bench: 6,443 ns/iter (+/- 26) = 5090 MB/s +test misc::not_literal ... bench: 216 ns/iter (+/- 0) = 236 MB/s +test misc::one_pass_long_prefix ... bench: 31 ns/iter (+/- 0) = 838 MB/s +test misc::one_pass_long_prefix_not ... bench: 31 ns/iter (+/- 0) = 838 MB/s +test misc::one_pass_short ... bench: 59 ns/iter (+/- 0) = 288 MB/s +test misc::one_pass_short_not ... bench: 63 ns/iter (+/- 2) = 269 MB/s +test misc::reallyhard2_1K ... bench: 96,070 ns/iter (+/- 238) = 10 MB/s +test misc::reallyhard_1K ... bench: 60,783 ns/iter (+/- 170) = 17 MB/s +test misc::reallyhard_1MB ... bench: 60,899,076 ns/iter (+/- 483,661) = 17 MB/s +test misc::reallyhard_32 ... bench: 1,822 ns/iter (+/- 58) = 32 MB/s +test misc::reallyhard_32K ... bench: 1,809,770 ns/iter (+/- 45,348) = 18 MB/s +test sherlock::before_holmes ... bench: 14,513,309 ns/iter (+/- 146,332) = 40 MB/s +test sherlock::holmes_cochar_watson ... bench: 543,738 ns/iter (+/- 4,549) = 1094 MB/s +test sherlock::ing_suffix ... bench: 5,561,653 ns/iter (+/- 44,720) = 106 MB/s +test sherlock::ing_suffix_limited_space ... bench: 6,116,112 ns/iter (+/- 91,799) = 97 MB/s +test sherlock::letters ... bench: 15,633,185 ns/iter (+/- 313,036) = 38 MB/s +test sherlock::letters_lower ... bench: 15,228,423 ns/iter (+/- 290,879) = 39 MB/s +test sherlock::letters_upper ... bench: 3,279,472 ns/iter (+/- 48,073) = 181 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 192,935 ns/iter (+/- 1,405) = 3083 MB/s +test sherlock::name_alt1 ... bench: 452,708 ns/iter (+/- 4,728) = 1314 MB/s +test sherlock::name_alt2 ... bench: 477,092 ns/iter (+/- 6,192) = 1246 MB/s +test sherlock::name_alt3 ... bench: 959,514 ns/iter (+/- 25,214) = 620 MB/s +test sherlock::name_alt3_nocase ... bench: 3,478,546 ns/iter (+/- 52,300) = 171 MB/s +test sherlock::name_alt4 ... bench: 947,187 ns/iter (+/- 9,985) = 628 MB/s +test sherlock::name_alt4_nocase ... bench: 1,852,289 ns/iter (+/- 30,616) = 321 MB/s +test sherlock::name_alt5 ... bench: 655,616 ns/iter (+/- 9,327) = 907 MB/s +test sherlock::name_alt5_nocase ... bench: 1,957,627 ns/iter (+/- 47,271) = 303 MB/s +test sherlock::name_holmes ... bench: 383,813 ns/iter (+/- 1,185) = 1550 MB/s +test sherlock::name_holmes_nocase ... bench: 478,335 ns/iter (+/- 4,851) = 1243 MB/s +test sherlock::name_sherlock ... bench: 263,611 ns/iter (+/- 875) = 2256 MB/s +test sherlock::name_sherlock_holmes ... bench: 193,687 ns/iter (+/- 1,070) = 3071 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,258,447 ns/iter (+/- 32,369) = 472 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,330,069 ns/iter (+/- 36,657) = 447 MB/s +test sherlock::name_whitespace ... bench: 264,340 ns/iter (+/- 2,723) = 2250 MB/s +test sherlock::no_match_common ... bench: 589,309 ns/iter (+/- 5,038) = 1009 MB/s +test sherlock::no_match_really_common ... bench: 683,909 ns/iter (+/- 4,987) = 869 MB/s +test sherlock::no_match_uncommon ... bench: 578,309 ns/iter (+/- 2,831) = 1028 MB/s +test sherlock::quotes ... bench: 1,184,492 ns/iter (+/- 27,247) = 502 MB/s +test sherlock::repeated_class_negation ... bench: 7,208,342 ns/iter (+/- 17,978) = 82 MB/s +test sherlock::the_lower ... bench: 1,001,754 ns/iter (+/- 6,215) = 593 MB/s +test sherlock::the_nocase ... bench: 1,043,260 ns/iter (+/- 10,217) = 570 MB/s +test sherlock::the_upper ... bench: 753,058 ns/iter (+/- 1,640) = 790 MB/s +test sherlock::the_whitespace ... bench: 1,195,227 ns/iter (+/- 9,524) = 497 MB/s +test sherlock::word_ending_n ... bench: 11,767,448 ns/iter (+/- 15,460) = 50 MB/s +test sherlock::words ... bench: 7,551,361 ns/iter (+/- 25,566) = 78 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 70 measured + diff --git a/third_party/rust/regex/record/old-bench-log/04/pcre2-jit b/third_party/rust/regex/record/old-bench-log/04/pcre2-jit new file mode 100644 index 0000000000..9a110b5ee3 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/04/pcre2-jit @@ -0,0 +1,77 @@ + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/bench) + Running target/release/bench-0d58c0af2e68ae0d + +running 70 tests +test misc::anchored_literal_long_match ... bench: 22 ns/iter (+/- 0) = 17727 MB/s +test misc::anchored_literal_long_non_match ... bench: 14 ns/iter (+/- 0) = 27857 MB/s +test misc::anchored_literal_short_match ... bench: 21 ns/iter (+/- 0) = 1238 MB/s +test misc::anchored_literal_short_non_match ... bench: 14 ns/iter (+/- 1) = 1857 MB/s +test misc::easy0_1K ... bench: 235 ns/iter (+/- 2) = 4472 MB/s +test misc::easy0_1MB ... bench: 193,652 ns/iter (+/- 524) = 5414 MB/s +test misc::easy0_32 ... bench: 43 ns/iter (+/- 0) = 1372 MB/s +test misc::easy0_32K ... bench: 6,024 ns/iter (+/- 12) = 5444 MB/s +test misc::easy1_1K ... bench: 235 ns/iter (+/- 4) = 4442 MB/s +test misc::easy1_1MB ... bench: 193,685 ns/iter (+/- 617) = 5413 MB/s +test misc::easy1_32 ... bench: 45 ns/iter (+/- 0) = 1155 MB/s +test misc::easy1_32K ... bench: 6,018 ns/iter (+/- 9) = 5448 MB/s +test misc::hard_1K ... bench: 1,880 ns/iter (+/- 7) = 559 MB/s +test misc::hard_1MB ... bench: 1,283,101 ns/iter (+/- 4,420) = 817 MB/s +test misc::hard_32 ... bench: 119 ns/iter (+/- 2) = 495 MB/s +test misc::hard_32K ... bench: 39,919 ns/iter (+/- 95) = 821 MB/s +test misc::literal ... bench: 18 ns/iter (+/- 1) = 2833 MB/s +test misc::long_needle1 ... bench: 513,050 ns/iter (+/- 2,267) = 194 MB/s +test misc::long_needle2 ... bench: 518,009 ns/iter (+/- 3,066) = 193 MB/s +test misc::match_class ... bench: 106 ns/iter (+/- 1) = 764 MB/s +test misc::match_class_in_range ... bench: 24 ns/iter (+/- 1) = 3375 MB/s +test misc::match_class_unicode ... bench: 370 ns/iter (+/- 2) = 435 MB/s +test misc::medium_1K ... bench: 237 ns/iter (+/- 0) = 4438 MB/s +test misc::medium_1MB ... bench: 193,478 ns/iter (+/- 540) = 5419 MB/s +test misc::medium_32 ... bench: 46 ns/iter (+/- 0) = 1304 MB/s +test misc::medium_32K ... bench: 6,024 ns/iter (+/- 15) = 5444 MB/s +test misc::not_literal ... bench: 274 ns/iter (+/- 1) = 186 MB/s +test misc::one_pass_long_prefix ... bench: 19 ns/iter (+/- 1) = 1368 MB/s +test misc::one_pass_long_prefix_not ... bench: 19 ns/iter (+/- 0) = 1368 MB/s +test misc::one_pass_short ... bench: 47 ns/iter (+/- 0) = 361 MB/s +test misc::one_pass_short_not ... bench: 50 ns/iter (+/- 2) = 340 MB/s +test misc::reallyhard2_1K ... bench: 4,959 ns/iter (+/- 34) = 209 MB/s +test misc::reallyhard_1K ... bench: 2,145 ns/iter (+/- 17) = 489 MB/s +test misc::reallyhard_1MB ... bench: 1,292,683 ns/iter (+/- 3,342) = 811 MB/s +test misc::reallyhard_32 ... bench: 124 ns/iter (+/- 4) = 475 MB/s +test misc::reallyhard_32K ... bench: 47,263 ns/iter (+/- 173) = 693 MB/s +test sherlock::before_holmes ... bench: 4,706,445 ns/iter (+/- 23,483) = 126 MB/s +test sherlock::holmes_cochar_watson ... bench: 488,613 ns/iter (+/- 2,921) = 1217 MB/s +test sherlock::ing_suffix ... bench: 1,886,092 ns/iter (+/- 9,951) = 315 MB/s +test sherlock::ing_suffix_limited_space ... bench: 5,091,401 ns/iter (+/- 21,315) = 116 MB/s +test sherlock::letters ... bench: 10,082,811 ns/iter (+/- 41,989) = 59 MB/s +test sherlock::letters_lower ... bench: 9,640,481 ns/iter (+/- 46,499) = 61 MB/s +test sherlock::letters_upper ... bench: 1,772,105 ns/iter (+/- 8,833) = 335 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 192,075 ns/iter (+/- 1,043) = 3097 MB/s +test sherlock::name_alt1 ... bench: 447,382 ns/iter (+/- 2,142) = 1329 MB/s +test sherlock::name_alt2 ... bench: 447,421 ns/iter (+/- 2,077) = 1329 MB/s +test sherlock::name_alt3 ... bench: 963,775 ns/iter (+/- 1,684) = 617 MB/s +test sherlock::name_alt3_nocase ... bench: 3,152,920 ns/iter (+/- 5,757) = 188 MB/s +test sherlock::name_alt4 ... bench: 80,204 ns/iter (+/- 379) = 7417 MB/s +test sherlock::name_alt4_nocase ... bench: 1,665,405 ns/iter (+/- 7,134) = 357 MB/s +test sherlock::name_alt5 ... bench: 649,701 ns/iter (+/- 1,722) = 915 MB/s +test sherlock::name_alt5_nocase ... bench: 1,773,323 ns/iter (+/- 9,648) = 335 MB/s +test sherlock::name_holmes ... bench: 377,003 ns/iter (+/- 3,390) = 1578 MB/s +test sherlock::name_holmes_nocase ... bench: 472,947 ns/iter (+/- 1,011) = 1257 MB/s +test sherlock::name_sherlock ... bench: 262,237 ns/iter (+/- 1,268) = 2268 MB/s +test sherlock::name_sherlock_holmes ... bench: 192,306 ns/iter (+/- 520) = 3093 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,318,573 ns/iter (+/- 1,462) = 451 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,110,629 ns/iter (+/- 13,163) = 535 MB/s +test sherlock::name_whitespace ... bench: 262,889 ns/iter (+/- 637) = 2263 MB/s +test sherlock::no_match_common ... bench: 388,869 ns/iter (+/- 1,512) = 1529 MB/s +test sherlock::no_match_really_common ... bench: 422,058 ns/iter (+/- 1,788) = 1409 MB/s +test sherlock::no_match_uncommon ... bench: 30,594 ns/iter (+/- 166) = 19446 MB/s +test sherlock::quotes ... bench: 569,628 ns/iter (+/- 2,052) = 1044 MB/s +test sherlock::repeated_class_negation ... bench: 6,410,128 ns/iter (+/- 19,866) = 92 MB/s +test sherlock::the_lower ... bench: 648,366 ns/iter (+/- 5,142) = 917 MB/s +test sherlock::the_nocase ... bench: 694,035 ns/iter (+/- 4,844) = 857 MB/s +test sherlock::the_upper ... bench: 54,007 ns/iter (+/- 486) = 11015 MB/s +test sherlock::the_whitespace ... bench: 850,430 ns/iter (+/- 9,641) = 699 MB/s +test sherlock::word_ending_n ... bench: 5,768,961 ns/iter (+/- 20,924) = 103 MB/s +test sherlock::words ... bench: 5,866,550 ns/iter (+/- 34,451) = 101 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 70 measured + diff --git a/third_party/rust/regex/record/old-bench-log/04/re2 b/third_party/rust/regex/record/old-bench-log/04/re2 new file mode 100644 index 0000000000..31a6e6d488 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/04/re2 @@ -0,0 +1,79 @@ + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/bench) + Running target/release/bench-0d58c0af2e68ae0d + +running 72 tests +test misc::anchored_literal_long_match ... bench: 119 ns/iter (+/- 2) = 3277 MB/s +test misc::anchored_literal_long_non_match ... bench: 45 ns/iter (+/- 0) = 8666 MB/s +test misc::anchored_literal_short_match ... bench: 120 ns/iter (+/- 1) = 216 MB/s +test misc::anchored_literal_short_non_match ... bench: 45 ns/iter (+/- 0) = 577 MB/s +test misc::easy0_1K ... bench: 187 ns/iter (+/- 0) = 5620 MB/s +test misc::easy0_1MB ... bench: 39,573 ns/iter (+/- 600) = 26497 MB/s +test misc::easy0_32 ... bench: 165 ns/iter (+/- 1) = 357 MB/s +test misc::easy0_32K ... bench: 971 ns/iter (+/- 20) = 33774 MB/s +test misc::easy1_1K ... bench: 175 ns/iter (+/- 1) = 5965 MB/s +test misc::easy1_1MB ... bench: 39,451 ns/iter (+/- 183) = 26579 MB/s +test misc::easy1_32 ... bench: 153 ns/iter (+/- 1) = 339 MB/s +test misc::easy1_32K ... bench: 942 ns/iter (+/- 24) = 34806 MB/s +test misc::hard_1K ... bench: 2,362 ns/iter (+/- 11) = 444 MB/s +test misc::hard_1MB ... bench: 2,386,627 ns/iter (+/- 12,925) = 439 MB/s +test misc::hard_32 ... bench: 228 ns/iter (+/- 1) = 258 MB/s +test misc::hard_32K ... bench: 74,482 ns/iter (+/- 190) = 440 MB/s +test misc::literal ... bench: 120 ns/iter (+/- 0) = 425 MB/s +test misc::long_needle1 ... bench: 184,777 ns/iter (+/- 1,644) = 541 MB/s +test misc::long_needle2 ... bench: 184,685 ns/iter (+/- 289) = 541 MB/s +test misc::match_class ... bench: 267 ns/iter (+/- 1) = 303 MB/s +test misc::match_class_in_range ... bench: 267 ns/iter (+/- 1) = 303 MB/s +test misc::match_class_unicode ... bench: 491 ns/iter (+/- 3) = 327 MB/s +test misc::medium_1K ... bench: 2,065 ns/iter (+/- 4) = 509 MB/s +test misc::medium_1MB ... bench: 1,938,951 ns/iter (+/- 11,278) = 540 MB/s +test misc::medium_32 ... bench: 302 ns/iter (+/- 149) = 198 MB/s +test misc::medium_32K ... bench: 60,766 ns/iter (+/- 1,018) = 539 MB/s +test misc::not_literal ... bench: 203 ns/iter (+/- 2) = 251 MB/s +test misc::one_pass_long_prefix ... bench: 119 ns/iter (+/- 1) = 218 MB/s +test misc::one_pass_long_prefix_not ... bench: 161 ns/iter (+/- 0) = 161 MB/s +test misc::one_pass_short ... bench: 143 ns/iter (+/- 0) = 118 MB/s +test misc::one_pass_short_not ... bench: 145 ns/iter (+/- 1) = 117 MB/s +test misc::reallyhard2_1K ... bench: 2,030 ns/iter (+/- 22) = 512 MB/s +test misc::reallyhard_1K ... bench: 2,362 ns/iter (+/- 18) = 444 MB/s +test misc::reallyhard_1MB ... bench: 2,386,760 ns/iter (+/- 22,075) = 439 MB/s +test misc::reallyhard_32 ... bench: 230 ns/iter (+/- 2) = 256 MB/s +test misc::reallyhard_32K ... bench: 74,506 ns/iter (+/- 740) = 440 MB/s +test sherlock::before_holmes ... bench: 1,446,270 ns/iter (+/- 5,771) = 411 MB/s +test sherlock::everything_greedy ... bench: 9,111,570 ns/iter (+/- 54,091) = 65 MB/s +test sherlock::everything_greedy_nl ... bench: 2,489,649 ns/iter (+/- 23,310) = 238 MB/s +test sherlock::holmes_cochar_watson ... bench: 1,176,642 ns/iter (+/- 2,181) = 505 MB/s +test sherlock::holmes_coword_watson ... bench: 1,389,000 ns/iter (+/- 258,245) = 428 MB/s +test sherlock::ing_suffix ... bench: 3,050,918 ns/iter (+/- 16,854) = 195 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,954,264 ns/iter (+/- 4,656) = 304 MB/s +test sherlock::letters ... bench: 111,162,180 ns/iter (+/- 108,719) = 5 MB/s +test sherlock::letters_lower ... bench: 106,751,460 ns/iter (+/- 414,985) = 5 MB/s +test sherlock::letters_upper ... bench: 4,705,474 ns/iter (+/- 10,913) = 126 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 2,539,425 ns/iter (+/- 5,440) = 234 MB/s +test sherlock::name_alt1 ... bench: 77,719 ns/iter (+/- 275) = 7654 MB/s +test sherlock::name_alt2 ... bench: 1,319,600 ns/iter (+/- 2,771) = 450 MB/s +test sherlock::name_alt3 ... bench: 1,433,629 ns/iter (+/- 2,943) = 414 MB/s +test sherlock::name_alt3_nocase ... bench: 2,748,137 ns/iter (+/- 4,343) = 216 MB/s +test sherlock::name_alt4 ... bench: 1,354,024 ns/iter (+/- 2,312) = 439 MB/s +test sherlock::name_alt4_nocase ... bench: 2,018,381 ns/iter (+/- 2,442) = 294 MB/s +test sherlock::name_alt5 ... bench: 1,348,150 ns/iter (+/- 3,870) = 441 MB/s +test sherlock::name_alt5_nocase ... bench: 2,114,276 ns/iter (+/- 3,365) = 281 MB/s +test sherlock::name_holmes ... bench: 168,436 ns/iter (+/- 1,503) = 3532 MB/s +test sherlock::name_holmes_nocase ... bench: 1,645,658 ns/iter (+/- 3,816) = 361 MB/s +test sherlock::name_sherlock ... bench: 59,010 ns/iter (+/- 380) = 10081 MB/s +test sherlock::name_sherlock_holmes ... bench: 60,467 ns/iter (+/- 179) = 9838 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,539,137 ns/iter (+/- 5,506) = 386 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,535,058 ns/iter (+/- 3,352) = 387 MB/s +test sherlock::name_whitespace ... bench: 62,700 ns/iter (+/- 440) = 9488 MB/s +test sherlock::no_match_common ... bench: 439,560 ns/iter (+/- 1,545) = 1353 MB/s +test sherlock::no_match_really_common ... bench: 439,333 ns/iter (+/- 1,020) = 1354 MB/s +test sherlock::no_match_uncommon ... bench: 23,882 ns/iter (+/- 134) = 24911 MB/s +test sherlock::quotes ... bench: 1,396,564 ns/iter (+/- 2,785) = 425 MB/s +test sherlock::the_lower ... bench: 2,478,251 ns/iter (+/- 5,859) = 240 MB/s +test sherlock::the_nocase ... bench: 3,708,713 ns/iter (+/- 6,919) = 160 MB/s +test sherlock::the_upper ... bench: 232,490 ns/iter (+/- 4,478) = 2558 MB/s +test sherlock::the_whitespace ... bench: 2,286,399 ns/iter (+/- 5,006) = 260 MB/s +test sherlock::word_ending_n ... bench: 3,295,919 ns/iter (+/- 27,810) = 180 MB/s +test sherlock::words ... bench: 30,375,810 ns/iter (+/- 37,415) = 19 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 72 measured + diff --git a/third_party/rust/regex/record/old-bench-log/04/rust b/third_party/rust/regex/record/old-bench-log/04/rust new file mode 100644 index 0000000000..01e6f440d2 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/04/rust @@ -0,0 +1,81 @@ + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/bench) + Running target/release/bench-0d58c0af2e68ae0d + +running 74 tests +test misc::anchored_literal_long_match ... bench: 24 ns/iter (+/- 1) = 16250 MB/s +test misc::anchored_literal_long_non_match ... bench: 21 ns/iter (+/- 0) = 18571 MB/s +test misc::anchored_literal_short_match ... bench: 22 ns/iter (+/- 1) = 1181 MB/s +test misc::anchored_literal_short_non_match ... bench: 21 ns/iter (+/- 0) = 1238 MB/s +test misc::easy0_1K ... bench: 18 ns/iter (+/- 6) = 58388 MB/s +test misc::easy0_1MB ... bench: 21 ns/iter (+/- 4) = 49933476 MB/s +test misc::easy0_32 ... bench: 17 ns/iter (+/- 0) = 3470 MB/s +test misc::easy0_32K ... bench: 18 ns/iter (+/- 9) = 1821944 MB/s +test misc::easy1_1K ... bench: 52 ns/iter (+/- 0) = 20076 MB/s +test misc::easy1_1MB ... bench: 55 ns/iter (+/- 0) = 19065381 MB/s +test misc::easy1_32 ... bench: 50 ns/iter (+/- 0) = 1040 MB/s +test misc::easy1_32K ... bench: 50 ns/iter (+/- 0) = 655760 MB/s +test misc::hard_1K ... bench: 66 ns/iter (+/- 0) = 15924 MB/s +test misc::hard_1MB ... bench: 70 ns/iter (+/- 1) = 14980042 MB/s +test misc::hard_32 ... bench: 62 ns/iter (+/- 1) = 951 MB/s +test misc::hard_32K ... bench: 62 ns/iter (+/- 1) = 528951 MB/s +test misc::literal ... bench: 17 ns/iter (+/- 0) = 3000 MB/s +test misc::long_needle1 ... bench: 2,359 ns/iter (+/- 37) = 42391 MB/s +test misc::long_needle2 ... bench: 634,783 ns/iter (+/- 4,313) = 157 MB/s +test misc::match_class ... bench: 82 ns/iter (+/- 1) = 987 MB/s +test misc::match_class_in_range ... bench: 30 ns/iter (+/- 15) = 2700 MB/s +test misc::match_class_unicode ... bench: 317 ns/iter (+/- 2) = 507 MB/s +test misc::medium_1K ... bench: 18 ns/iter (+/- 0) = 58444 MB/s +test misc::medium_1MB ... bench: 22 ns/iter (+/- 0) = 47663818 MB/s +test misc::medium_32 ... bench: 18 ns/iter (+/- 0) = 3333 MB/s +test misc::medium_32K ... bench: 18 ns/iter (+/- 0) = 1822000 MB/s +test misc::not_literal ... bench: 115 ns/iter (+/- 0) = 443 MB/s +test misc::one_pass_long_prefix ... bench: 69 ns/iter (+/- 1) = 376 MB/s +test misc::one_pass_long_prefix_not ... bench: 68 ns/iter (+/- 0) = 382 MB/s +test misc::one_pass_short ... bench: 50 ns/iter (+/- 0) = 340 MB/s +test misc::one_pass_short_not ... bench: 52 ns/iter (+/- 0) = 326 MB/s +test misc::reallyhard2_1K ... bench: 1,939 ns/iter (+/- 12) = 536 MB/s +test misc::reallyhard_1K ... bench: 1,964 ns/iter (+/- 7) = 535 MB/s +test misc::reallyhard_1MB ... bench: 1,934,967 ns/iter (+/- 4,626) = 541 MB/s +test misc::reallyhard_32 ... bench: 130 ns/iter (+/- 0) = 453 MB/s +test misc::reallyhard_32K ... bench: 60,581 ns/iter (+/- 176) = 541 MB/s +test misc::replace_all ... bench: 142 ns/iter (+/- 1) +test sherlock::before_holmes ... bench: 1,127,747 ns/iter (+/- 2,052) = 527 MB/s +test sherlock::everything_greedy ... bench: 2,598,664 ns/iter (+/- 6,137) = 228 MB/s +test sherlock::everything_greedy_nl ... bench: 1,202,183 ns/iter (+/- 1,965) = 494 MB/s +test sherlock::holmes_cochar_watson ... bench: 220,378 ns/iter (+/- 1,229) = 2699 MB/s +test sherlock::holmes_coword_watson ... bench: 631,731 ns/iter (+/- 2,071) = 941 MB/s +test sherlock::ing_suffix ... bench: 1,344,980 ns/iter (+/- 1,799) = 442 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,286,425 ns/iter (+/- 2,965) = 462 MB/s +test sherlock::letters ... bench: 24,356,951 ns/iter (+/- 47,224) = 24 MB/s +test sherlock::letters_lower ... bench: 23,816,732 ns/iter (+/- 44,203) = 24 MB/s +test sherlock::letters_upper ... bench: 2,051,873 ns/iter (+/- 8,712) = 289 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 1,102,534 ns/iter (+/- 6,071) = 539 MB/s +test sherlock::name_alt1 ... bench: 36,474 ns/iter (+/- 308) = 16311 MB/s +test sherlock::name_alt2 ... bench: 185,668 ns/iter (+/- 1,023) = 3204 MB/s +test sherlock::name_alt3 ... bench: 1,152,554 ns/iter (+/- 1,991) = 516 MB/s +test sherlock::name_alt3_nocase ... bench: 1,254,885 ns/iter (+/- 5,387) = 474 MB/s +test sherlock::name_alt4 ... bench: 228,721 ns/iter (+/- 854) = 2601 MB/s +test sherlock::name_alt4_nocase ... bench: 1,223,457 ns/iter (+/- 2,307) = 486 MB/s +test sherlock::name_alt5 ... bench: 317,372 ns/iter (+/- 951) = 1874 MB/s +test sherlock::name_alt5_nocase ... bench: 1,224,434 ns/iter (+/- 3,886) = 485 MB/s +test sherlock::name_holmes ... bench: 42,905 ns/iter (+/- 217) = 13866 MB/s +test sherlock::name_holmes_nocase ... bench: 1,080,290 ns/iter (+/- 5,686) = 550 MB/s +test sherlock::name_sherlock ... bench: 70,041 ns/iter (+/- 444) = 8494 MB/s +test sherlock::name_sherlock_holmes ... bench: 36,092 ns/iter (+/- 189) = 16483 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,156,696 ns/iter (+/- 3,922) = 514 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,156,088 ns/iter (+/- 2,453) = 514 MB/s +test sherlock::name_whitespace ... bench: 79,560 ns/iter (+/- 426) = 7477 MB/s +test sherlock::no_match_common ... bench: 25,940 ns/iter (+/- 119) = 22934 MB/s +test sherlock::no_match_really_common ... bench: 364,911 ns/iter (+/- 1,302) = 1630 MB/s +test sherlock::no_match_uncommon ... bench: 25,851 ns/iter (+/- 112) = 23013 MB/s +test sherlock::quotes ... bench: 561,575 ns/iter (+/- 2,083) = 1059 MB/s +test sherlock::repeated_class_negation ... bench: 88,961,089 ns/iter (+/- 132,661) = 6 MB/s +test sherlock::the_lower ... bench: 609,891 ns/iter (+/- 1,451) = 975 MB/s +test sherlock::the_nocase ... bench: 1,622,541 ns/iter (+/- 6,851) = 366 MB/s +test sherlock::the_upper ... bench: 48,810 ns/iter (+/- 245) = 12188 MB/s +test sherlock::the_whitespace ... bench: 1,192,755 ns/iter (+/- 4,168) = 498 MB/s +test sherlock::word_ending_n ... bench: 1,991,440 ns/iter (+/- 7,313) = 298 MB/s +test sherlock::words ... bench: 9,688,357 ns/iter (+/- 17,267) = 61 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 74 measured + diff --git a/third_party/rust/regex/record/old-bench-log/04/tcl b/third_party/rust/regex/record/old-bench-log/04/tcl new file mode 100644 index 0000000000..934bf6e048 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/04/tcl @@ -0,0 +1,72 @@ + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/bench) + Running target/release/bench-0d58c0af2e68ae0d + +running 65 tests +test misc::anchored_literal_long_match ... bench: 925 ns/iter (+/- 16) = 421 MB/s +test misc::anchored_literal_long_non_match ... bench: 144 ns/iter (+/- 2) = 2708 MB/s +test misc::anchored_literal_short_match ... bench: 920 ns/iter (+/- 11) = 28 MB/s +test misc::anchored_literal_short_non_match ... bench: 144 ns/iter (+/- 1) = 180 MB/s +test misc::easy0_1K ... bench: 14,228 ns/iter (+/- 204) = 73 MB/s +test misc::easy0_1MB ... bench: 3,728,677 ns/iter (+/- 4,564) = 281 MB/s +test misc::easy0_32 ... bench: 10,023 ns/iter (+/- 156) = 5 MB/s +test misc::easy0_32K ... bench: 125,851 ns/iter (+/- 287) = 260 MB/s +test misc::easy1_1K ... bench: 8,797 ns/iter (+/- 90) = 118 MB/s +test misc::easy1_1MB ... bench: 3,722,675 ns/iter (+/- 4,912) = 281 MB/s +test misc::easy1_32 ... bench: 5,189 ns/iter (+/- 77) = 10 MB/s +test misc::easy1_32K ... bench: 121,106 ns/iter (+/- 694) = 270 MB/s +test misc::hard_1K ... bench: 17,111 ns/iter (+/- 251) = 61 MB/s +test misc::hard_1MB ... bench: 3,743,313 ns/iter (+/- 7,634) = 280 MB/s +test misc::hard_32 ... bench: 13,489 ns/iter (+/- 220) = 4 MB/s +test misc::hard_32K ... bench: 129,358 ns/iter (+/- 257) = 253 MB/s +test misc::literal ... bench: 629 ns/iter (+/- 5) = 81 MB/s +test misc::long_needle1 ... bench: 21,495,182 ns/iter (+/- 41,993) = 4 MB/s +test misc::long_needle2 ... bench: 21,501,034 ns/iter (+/- 34,033) = 4 MB/s +test misc::match_class ... bench: 732 ns/iter (+/- 3) = 110 MB/s +test misc::match_class_in_range ... bench: 736 ns/iter (+/- 6) = 110 MB/s +test misc::medium_1K ... bench: 14,433 ns/iter (+/- 49) = 72 MB/s +test misc::medium_1MB ... bench: 3,729,861 ns/iter (+/- 4,198) = 281 MB/s +test misc::medium_32 ... bench: 10,756 ns/iter (+/- 75) = 5 MB/s +test misc::medium_32K ... bench: 126,593 ns/iter (+/- 169) = 259 MB/s +test misc::not_literal ... bench: 2,350 ns/iter (+/- 13) = 21 MB/s +test misc::one_pass_long_prefix ... bench: 9,183 ns/iter (+/- 198) = 2 MB/s +test misc::one_pass_long_prefix_not ... bench: 8,470 ns/iter (+/- 110) = 3 MB/s +test misc::one_pass_short ... bench: 956 ns/iter (+/- 4) = 17 MB/s +test misc::one_pass_short_not ... bench: 1,042 ns/iter (+/- 13) = 16 MB/s +test misc::reallyhard2_1K ... bench: 129,563 ns/iter (+/- 336) = 8 MB/s +test misc::reallyhard_1K ... bench: 16,656 ns/iter (+/- 152) = 63 MB/s +test misc::reallyhard_1MB ... bench: 3,744,123 ns/iter (+/- 4,556) = 280 MB/s +test misc::reallyhard_32 ... bench: 12,910 ns/iter (+/- 112) = 4 MB/s +test misc::reallyhard_32K ... bench: 129,293 ns/iter (+/- 301) = 253 MB/s +test sherlock::before_holmes ... bench: 3,593,560 ns/iter (+/- 8,574) = 165 MB/s +test sherlock::holmes_cochar_watson ... bench: 2,906,271 ns/iter (+/- 5,153) = 204 MB/s +test sherlock::ing_suffix ... bench: 7,016,213 ns/iter (+/- 30,321) = 84 MB/s +test sherlock::ing_suffix_limited_space ... bench: 24,592,817 ns/iter (+/- 78,720) = 24 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 2,457,984 ns/iter (+/- 3,932) = 242 MB/s +test sherlock::name_alt1 ... bench: 2,569,156 ns/iter (+/- 5,789) = 231 MB/s +test sherlock::name_alt2 ... bench: 3,686,183 ns/iter (+/- 13,550) = 161 MB/s +test sherlock::name_alt3 ... bench: 6,715,311 ns/iter (+/- 15,208) = 88 MB/s +test sherlock::name_alt3_nocase ... bench: 9,702,060 ns/iter (+/- 32,628) = 61 MB/s +test sherlock::name_alt4 ... bench: 3,834,029 ns/iter (+/- 3,955) = 155 MB/s +test sherlock::name_alt4_nocase ... bench: 4,762,730 ns/iter (+/- 751,201) = 124 MB/s +test sherlock::name_alt5 ... bench: 4,582,303 ns/iter (+/- 8,073) = 129 MB/s +test sherlock::name_alt5_nocase ... bench: 5,583,652 ns/iter (+/- 14,573) = 106 MB/s +test sherlock::name_holmes ... bench: 2,968,764 ns/iter (+/- 6,198) = 200 MB/s +test sherlock::name_holmes_nocase ... bench: 3,066,080 ns/iter (+/- 8,986) = 194 MB/s +test sherlock::name_sherlock ... bench: 2,372,708 ns/iter (+/- 3,272) = 250 MB/s +test sherlock::name_sherlock_holmes ... bench: 2,607,914 ns/iter (+/- 3,361) = 228 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 2,641,260 ns/iter (+/- 9,409) = 225 MB/s +test sherlock::name_sherlock_nocase ... bench: 2,420,591 ns/iter (+/- 11,053) = 245 MB/s +test sherlock::name_whitespace ... bench: 2,592,553 ns/iter (+/- 3,476) = 229 MB/s +test sherlock::no_match_common ... bench: 2,114,367 ns/iter (+/- 1,665) = 281 MB/s +test sherlock::no_match_really_common ... bench: 2,114,835 ns/iter (+/- 2,491) = 281 MB/s +test sherlock::no_match_uncommon ... bench: 2,105,274 ns/iter (+/- 1,657) = 282 MB/s +test sherlock::quotes ... bench: 10,978,890 ns/iter (+/- 30,645) = 54 MB/s +test sherlock::repeated_class_negation ... bench: 69,836,043 ns/iter (+/- 117,415) = 8 MB/s +test sherlock::the_lower ... bench: 9,343,518 ns/iter (+/- 29,387) = 63 MB/s +test sherlock::the_nocase ... bench: 9,690,676 ns/iter (+/- 42,585) = 61 MB/s +test sherlock::the_upper ... bench: 2,780,398 ns/iter (+/- 6,949) = 213 MB/s +test sherlock::the_whitespace ... bench: 11,562,612 ns/iter (+/- 78,789) = 51 MB/s +test sherlock::words ... bench: 64,139,234 ns/iter (+/- 491,422) = 9 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 65 measured + diff --git a/third_party/rust/regex/record/old-bench-log/05/onig b/third_party/rust/regex/record/old-bench-log/05/onig new file mode 100644 index 0000000000..373b14951e --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/05/onig @@ -0,0 +1,99 @@ + +running 94 tests +test misc::anchored_literal_long_match ... bench: 158 ns/iter (+/- 4) = 2468 MB/s +test misc::anchored_literal_long_non_match ... bench: 495 ns/iter (+/- 7) = 787 MB/s +test misc::anchored_literal_short_match ... bench: 160 ns/iter (+/- 3) = 162 MB/s +test misc::anchored_literal_short_non_match ... bench: 44 ns/iter (+/- 4) = 590 MB/s +test misc::easy0_1K ... bench: 315 ns/iter (+/- 15) = 3336 MB/s +test misc::easy0_1MB ... bench: 136,864 ns/iter (+/- 5,984) = 7661 MB/s +test misc::easy0_32 ... bench: 163 ns/iter (+/- 11) = 361 MB/s +test misc::easy0_32K ... bench: 4,562 ns/iter (+/- 255) = 7188 MB/s +test misc::easy1_1K ... bench: 3,947 ns/iter (+/- 199) = 264 MB/s +test misc::easy1_1MB ... bench: 3,920,564 ns/iter (+/- 122,902) = 267 MB/s +test misc::easy1_32 ... bench: 321 ns/iter (+/- 20) = 161 MB/s +test misc::easy1_32K ... bench: 121,449 ns/iter (+/- 4,899) = 269 MB/s +test misc::hard_1K ... bench: 125,960 ns/iter (+/- 7,255) = 8 MB/s +test misc::hard_1MB ... bench: 134,129,947 ns/iter (+/- 4,797,942) = 7 MB/s +test misc::hard_32 ... bench: 4,044 ns/iter (+/- 227) = 14 MB/s +test misc::hard_32K ... bench: 4,183,228 ns/iter (+/- 127,808) = 7 MB/s +test misc::literal ... bench: 331 ns/iter (+/- 21) = 154 MB/s +test misc::long_needle1 ... bench: 5,715,563 ns/iter (+/- 250,535) = 17 MB/s +test misc::long_needle2 ... bench: 5,779,968 ns/iter (+/- 195,784) = 17 MB/s +test misc::match_class ... bench: 431 ns/iter (+/- 5) = 187 MB/s +test misc::match_class_in_range ... bench: 427 ns/iter (+/- 27) = 189 MB/s +test misc::match_class_unicode ... bench: 1,946 ns/iter (+/- 88) = 82 MB/s +test misc::medium_1K ... bench: 325 ns/iter (+/- 23) = 3236 MB/s +test misc::medium_1MB ... bench: 138,022 ns/iter (+/- 5,142) = 7597 MB/s +test misc::medium_32 ... bench: 182 ns/iter (+/- 7) = 329 MB/s +test misc::medium_32K ... bench: 4,511 ns/iter (+/- 190) = 7270 MB/s +test misc::not_literal ... bench: 436 ns/iter (+/- 25) = 116 MB/s +test misc::one_pass_long_prefix ... bench: 168 ns/iter (+/- 6) = 154 MB/s +test misc::one_pass_long_prefix_not ... bench: 176 ns/iter (+/- 7) = 147 MB/s +test misc::one_pass_short ... bench: 325 ns/iter (+/- 16) = 52 MB/s +test misc::one_pass_short_not ... bench: 322 ns/iter (+/- 21) = 52 MB/s +test misc::reallyhard2_1K ... bench: 289,956 ns/iter (+/- 16,350) = 3 MB/s +test misc::reallyhard_1K ... bench: 126,089 ns/iter (+/- 5,350) = 8 MB/s +test misc::reallyhard_1MB ... bench: 133,197,312 ns/iter (+/- 3,057,491) = 7 MB/s +test misc::reallyhard_32 ... bench: 4,060 ns/iter (+/- 11) = 14 MB/s +test misc::reallyhard_32K ... bench: 4,215,469 ns/iter (+/- 200,526) = 7 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 27,622 ns/iter (+/- 778) = 289 MB/s +test regexdna::find_new_lines ... bench: 30,503,604 ns/iter (+/- 1,120,697) = 166 MB/s +test regexdna::subst1 ... bench: 23,276,552 ns/iter (+/- 1,019,308) = 218 MB/s +test regexdna::subst10 ... bench: 23,199,415 ns/iter (+/- 790,938) = 219 MB/s +test regexdna::subst11 ... bench: 23,138,469 ns/iter (+/- 884,700) = 219 MB/s +test regexdna::subst2 ... bench: 23,076,376 ns/iter (+/- 644,391) = 220 MB/s +test regexdna::subst3 ... bench: 23,115,770 ns/iter (+/- 737,666) = 219 MB/s +test regexdna::subst4 ... bench: 23,093,288 ns/iter (+/- 1,003,519) = 220 MB/s +test regexdna::subst5 ... bench: 23,618,534 ns/iter (+/- 773,260) = 215 MB/s +test regexdna::subst6 ... bench: 23,301,581 ns/iter (+/- 679,681) = 218 MB/s +test regexdna::subst7 ... bench: 23,371,339 ns/iter (+/- 714,433) = 217 MB/s +test regexdna::subst8 ... bench: 23,187,513 ns/iter (+/- 863,031) = 219 MB/s +test regexdna::subst9 ... bench: 23,143,027 ns/iter (+/- 890,422) = 219 MB/s +test regexdna::variant1 ... bench: 104,906,982 ns/iter (+/- 3,391,942) = 48 MB/s +test regexdna::variant2 ... bench: 118,326,728 ns/iter (+/- 3,378,748) = 42 MB/s +test regexdna::variant3 ... bench: 109,348,596 ns/iter (+/- 3,647,056) = 46 MB/s +test regexdna::variant4 ... bench: 104,574,675 ns/iter (+/- 3,236,753) = 48 MB/s +test regexdna::variant5 ... bench: 102,968,132 ns/iter (+/- 2,792,754) = 49 MB/s +test regexdna::variant6 ... bench: 103,783,112 ns/iter (+/- 2,851,581) = 48 MB/s +test regexdna::variant7 ... bench: 103,939,805 ns/iter (+/- 3,118,277) = 48 MB/s +test regexdna::variant8 ... bench: 109,722,594 ns/iter (+/- 3,739,958) = 46 MB/s +test regexdna::variant9 ... bench: 128,702,724 ns/iter (+/- 3,739,103) = 39 MB/s +test sherlock::before_after_holmes ... bench: 39,219,739 ns/iter (+/- 1,622,425) = 15 MB/s +test sherlock::before_holmes ... bench: 37,454,934 ns/iter (+/- 1,055,140) = 15 MB/s +test sherlock::everything_greedy ... bench: 7,341,629 ns/iter (+/- 241,072) = 81 MB/s +test sherlock::holmes_cochar_watson ... bench: 2,298,534 ns/iter (+/- 94,224) = 258 MB/s +test sherlock::ing_suffix ... bench: 18,533,670 ns/iter (+/- 505,855) = 32 MB/s +test sherlock::ing_suffix_limited_space ... bench: 2,419,034 ns/iter (+/- 124,616) = 245 MB/s +test sherlock::letters ... bench: 61,910,045 ns/iter (+/- 2,122,755) = 9 MB/s +test sherlock::letters_lower ... bench: 60,831,022 ns/iter (+/- 2,559,720) = 9 MB/s +test sherlock::letters_upper ... bench: 10,747,265 ns/iter (+/- 761,147) = 55 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 215,376 ns/iter (+/- 13,622) = 2762 MB/s +test sherlock::name_alt1 ... bench: 2,282,320 ns/iter (+/- 154,104) = 260 MB/s +test sherlock::name_alt2 ... bench: 2,206,087 ns/iter (+/- 158,376) = 269 MB/s +test sherlock::name_alt3 ... bench: 2,771,932 ns/iter (+/- 181,216) = 214 MB/s +test sherlock::name_alt3_nocase ... bench: 19,198,056 ns/iter (+/- 816,668) = 30 MB/s +test sherlock::name_alt4 ... bench: 2,254,798 ns/iter (+/- 135,379) = 263 MB/s +test sherlock::name_alt4_nocase ... bench: 5,734,254 ns/iter (+/- 411,596) = 103 MB/s +test sherlock::name_alt5 ... bench: 2,276,779 ns/iter (+/- 172,557) = 261 MB/s +test sherlock::name_alt5_nocase ... bench: 7,314,318 ns/iter (+/- 377,963) = 81 MB/s +test sherlock::name_holmes ... bench: 477,888 ns/iter (+/- 37,472) = 1244 MB/s +test sherlock::name_holmes_nocase ... bench: 3,487,005 ns/iter (+/- 278,896) = 170 MB/s +test sherlock::name_sherlock ... bench: 295,313 ns/iter (+/- 16,739) = 2014 MB/s +test sherlock::name_sherlock_holmes ... bench: 216,522 ns/iter (+/- 15,594) = 2747 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 3,480,703 ns/iter (+/- 272,332) = 170 MB/s +test sherlock::name_sherlock_nocase ... bench: 3,511,444 ns/iter (+/- 283,013) = 169 MB/s +test sherlock::name_whitespace ... bench: 304,043 ns/iter (+/- 19,186) = 1956 MB/s +test sherlock::no_match_common ... bench: 632,615 ns/iter (+/- 44,676) = 940 MB/s +test sherlock::no_match_really_common ... bench: 727,565 ns/iter (+/- 54,169) = 817 MB/s +test sherlock::no_match_uncommon ... bench: 624,061 ns/iter (+/- 37,791) = 953 MB/s +test sherlock::quotes ... bench: 3,776,688 ns/iter (+/- 186,393) = 157 MB/s +test sherlock::repeated_class_negation ... bench: 34,354,179 ns/iter (+/- 1,534,267) = 17 MB/s +test sherlock::the_lower ... bench: 1,965,787 ns/iter (+/- 137,099) = 302 MB/s +test sherlock::the_nocase ... bench: 4,853,843 ns/iter (+/- 259,890) = 122 MB/s +test sherlock::the_upper ... bench: 949,071 ns/iter (+/- 66,016) = 626 MB/s +test sherlock::the_whitespace ... bench: 2,173,683 ns/iter (+/- 142,384) = 273 MB/s +test sherlock::word_ending_n ... bench: 19,711,057 ns/iter (+/- 942,152) = 30 MB/s +test sherlock::words ... bench: 21,979,387 ns/iter (+/- 1,250,588) = 27 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 94 measured + diff --git a/third_party/rust/regex/record/old-bench-log/05/onig-vs-rust b/third_party/rust/regex/record/old-bench-log/05/onig-vs-rust new file mode 100644 index 0000000000..4625e8fe89 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/05/onig-vs-rust @@ -0,0 +1,95 @@ + name onig ns/iter rust ns/iter diff ns/iter diff % + misc::anchored_literal_long_match 158 (2468 MB/s) 24 (16250 MB/s) -134 -84.81% + misc::anchored_literal_long_non_match 495 (787 MB/s) 27 (14444 MB/s) -468 -94.55% + misc::anchored_literal_short_match 160 (162 MB/s) 22 (1181 MB/s) -138 -86.25% + misc::anchored_literal_short_non_match 44 (590 MB/s) 24 (1083 MB/s) -20 -45.45% + misc::easy0_1K 315 (3336 MB/s) 16 (65687 MB/s) -299 -94.92% + misc::easy0_1MB 136,864 (7661 MB/s) 20 (52430150 MB/s) -136,844 -99.99% + misc::easy0_32 163 (361 MB/s) 16 (3687 MB/s) -147 -90.18% + misc::easy0_32K 4,562 (7188 MB/s) 16 (2049687 MB/s) -4,546 -99.65% + misc::easy1_1K 3,947 (264 MB/s) 48 (21750 MB/s) -3,899 -98.78% + misc::easy1_1MB 3,920,564 (267 MB/s) 48 (21845750 MB/s) -3,920,516 -100.00% + misc::easy1_32 321 (161 MB/s) 46 (1130 MB/s) -275 -85.67% + misc::easy1_32K 121,449 (269 MB/s) 47 (697617 MB/s) -121,402 -99.96% + misc::hard_1K 125,960 (8 MB/s) 58 (18120 MB/s) -125,902 -99.95% + misc::hard_1MB 134,129,947 (7 MB/s) 61 (17190213 MB/s) -134,129,886 -100.00% + misc::hard_32 4,044 (14 MB/s) 58 (1017 MB/s) -3,986 -98.57% + misc::hard_32K 4,183,228 (7 MB/s) 56 (585625 MB/s) -4,183,172 -100.00% + misc::literal 331 (154 MB/s) 16 (3187 MB/s) -315 -95.17% + misc::long_needle1 5,715,563 (17 MB/s) 2,226 (44924 MB/s) -5,713,337 -99.96% + misc::long_needle2 5,779,968 (17 MB/s) 576,997 (173 MB/s) -5,202,971 -90.02% + misc::match_class 431 (187 MB/s) 65 (1246 MB/s) -366 -84.92% + misc::match_class_in_range 427 (189 MB/s) 27 (3000 MB/s) -400 -93.68% + misc::match_class_unicode 1,946 (82 MB/s) 283 (568 MB/s) -1,663 -85.46% + misc::medium_1K 325 (3236 MB/s) 16 (65750 MB/s) -309 -95.08% + misc::medium_1MB 138,022 (7597 MB/s) 21 (49933523 MB/s) -138,001 -99.98% + misc::medium_32 182 (329 MB/s) 17 (3529 MB/s) -165 -90.66% + misc::medium_32K 4,511 (7270 MB/s) 17 (1929176 MB/s) -4,494 -99.62% + misc::not_literal 436 (116 MB/s) 105 (485 MB/s) -331 -75.92% + misc::one_pass_long_prefix 168 (154 MB/s) 68 (382 MB/s) -100 -59.52% + misc::one_pass_long_prefix_not 176 (147 MB/s) 58 (448 MB/s) -118 -67.05% + misc::one_pass_short 325 (52 MB/s) 45 (377 MB/s) -280 -86.15% + misc::one_pass_short_not 322 (52 MB/s) 50 (340 MB/s) -272 -84.47% + misc::reallyhard2_1K 289,956 (3 MB/s) 83 (12530 MB/s) -289,873 -99.97% + misc::reallyhard_1K 126,089 (8 MB/s) 1,822 (576 MB/s) -124,267 -98.55% + misc::reallyhard_1MB 133,197,312 (7 MB/s) 1,768,327 (592 MB/s) -131,428,985 -98.67% + misc::reallyhard_32 4,060 (14 MB/s) 121 (487 MB/s) -3,939 -97.02% + misc::reallyhard_32K 4,215,469 (7 MB/s) 56,375 (581 MB/s) -4,159,094 -98.66% + misc::reverse_suffix_no_quadratic 27,622 (289 MB/s) 5,803 (1378 MB/s) -21,819 -78.99% + regexdna::find_new_lines 30,503,604 (166 MB/s) 14,818,233 (343 MB/s) -15,685,371 -51.42% + regexdna::subst1 23,276,552 (218 MB/s) 896,790 (5668 MB/s) -22,379,762 -96.15% + regexdna::subst10 23,199,415 (219 MB/s) 957,325 (5310 MB/s) -22,242,090 -95.87% + regexdna::subst11 23,138,469 (219 MB/s) 917,248 (5542 MB/s) -22,221,221 -96.04% + regexdna::subst2 23,076,376 (220 MB/s) 892,129 (5698 MB/s) -22,184,247 -96.13% + regexdna::subst3 23,115,770 (219 MB/s) 929,250 (5470 MB/s) -22,186,520 -95.98% + regexdna::subst4 23,093,288 (220 MB/s) 872,581 (5825 MB/s) -22,220,707 -96.22% + regexdna::subst5 23,618,534 (215 MB/s) 875,804 (5804 MB/s) -22,742,730 -96.29% + regexdna::subst6 23,301,581 (218 MB/s) 884,639 (5746 MB/s) -22,416,942 -96.20% + regexdna::subst7 23,371,339 (217 MB/s) 872,791 (5824 MB/s) -22,498,548 -96.27% + regexdna::subst8 23,187,513 (219 MB/s) 873,833 (5817 MB/s) -22,313,680 -96.23% + regexdna::subst9 23,143,027 (219 MB/s) 886,744 (5732 MB/s) -22,256,283 -96.17% + regexdna::variant1 104,906,982 (48 MB/s) 3,699,267 (1374 MB/s) -101,207,715 -96.47% + regexdna::variant2 118,326,728 (42 MB/s) 6,760,952 (751 MB/s) -111,565,776 -94.29% + regexdna::variant3 109,348,596 (46 MB/s) 8,030,646 (633 MB/s) -101,317,950 -92.66% + regexdna::variant4 104,574,675 (48 MB/s) 8,077,290 (629 MB/s) -96,497,385 -92.28% + regexdna::variant5 102,968,132 (49 MB/s) 6,787,242 (748 MB/s) -96,180,890 -93.41% + regexdna::variant6 103,783,112 (48 MB/s) 6,577,777 (772 MB/s) -97,205,335 -93.66% + regexdna::variant7 103,939,805 (48 MB/s) 6,705,580 (758 MB/s) -97,234,225 -93.55% + regexdna::variant8 109,722,594 (46 MB/s) 6,818,785 (745 MB/s) -102,903,809 -93.79% + regexdna::variant9 128,702,724 (39 MB/s) 6,821,453 (745 MB/s) -121,881,271 -94.70% + sherlock::before_after_holmes 39,219,739 (15 MB/s) 1,029,866 (577 MB/s) -38,189,873 -97.37% + sherlock::before_holmes 37,454,934 (15 MB/s) 76,633 (7763 MB/s) -37,378,301 -99.80% + sherlock::everything_greedy 7,341,629 (81 MB/s) 2,375,079 (250 MB/s) -4,966,550 -67.65% + sherlock::holmes_cochar_watson 2,298,534 (258 MB/s) 144,725 (4110 MB/s) -2,153,809 -93.70% + sherlock::ing_suffix 18,533,670 (32 MB/s) 436,202 (1363 MB/s) -18,097,468 -97.65% + sherlock::ing_suffix_limited_space 2,419,034 (245 MB/s) 1,182,943 (502 MB/s) -1,236,091 -51.10% + sherlock::letters 61,910,045 (9 MB/s) 24,390,452 (24 MB/s) -37,519,593 -60.60% + sherlock::letters_lower 60,831,022 (9 MB/s) 23,784,108 (25 MB/s) -37,046,914 -60.90% + sherlock::letters_upper 10,747,265 (55 MB/s) 1,993,838 (298 MB/s) -8,753,427 -81.45% + sherlock::line_boundary_sherlock_holmes 215,376 (2762 MB/s) 999,414 (595 MB/s) 784,038 364.03% + sherlock::name_alt1 2,282,320 (260 MB/s) 34,298 (17345 MB/s) -2,248,022 -98.50% + sherlock::name_alt2 2,206,087 (269 MB/s) 124,226 (4789 MB/s) -2,081,861 -94.37% + sherlock::name_alt3 2,771,932 (214 MB/s) 137,742 (4319 MB/s) -2,634,190 -95.03% + sherlock::name_alt3_nocase 19,198,056 (30 MB/s) 1,293,763 (459 MB/s) -17,904,293 -93.26% + sherlock::name_alt4 2,254,798 (263 MB/s) 164,900 (3607 MB/s) -2,089,898 -92.69% + sherlock::name_alt4_nocase 5,734,254 (103 MB/s) 235,023 (2531 MB/s) -5,499,231 -95.90% + sherlock::name_alt5 2,276,779 (261 MB/s) 127,928 (4650 MB/s) -2,148,851 -94.38% + sherlock::name_alt5_nocase 7,314,318 (81 MB/s) 659,591 (901 MB/s) -6,654,727 -90.98% + sherlock::name_holmes 477,888 (1244 MB/s) 40,902 (14545 MB/s) -436,986 -91.44% + sherlock::name_holmes_nocase 3,487,005 (170 MB/s) 198,658 (2994 MB/s) -3,288,347 -94.30% + sherlock::name_sherlock 295,313 (2014 MB/s) 68,924 (8631 MB/s) -226,389 -76.66% + sherlock::name_sherlock_holmes 216,522 (2747 MB/s) 31,640 (18803 MB/s) -184,882 -85.39% + sherlock::name_sherlock_holmes_nocase 3,480,703 (170 MB/s) 173,522 (3428 MB/s) -3,307,181 -95.01% + sherlock::name_sherlock_nocase 3,511,444 (169 MB/s) 170,888 (3481 MB/s) -3,340,556 -95.13% + sherlock::name_whitespace 304,043 (1956 MB/s) 84,314 (7056 MB/s) -219,729 -72.27% + sherlock::no_match_common 632,615 (940 MB/s) 20,727 (28703 MB/s) -611,888 -96.72% + sherlock::no_match_really_common 727,565 (817 MB/s) 381,476 (1559 MB/s) -346,089 -47.57% + sherlock::no_match_uncommon 624,061 (953 MB/s) 20,786 (28621 MB/s) -603,275 -96.67% + sherlock::quotes 3,776,688 (157 MB/s) 531,487 (1119 MB/s) -3,245,201 -85.93% + sherlock::repeated_class_negation 34,354,179 (17 MB/s) 85,881,944 (6 MB/s) 51,527,765 149.99% + sherlock::the_lower 1,965,787 (302 MB/s) 654,110 (909 MB/s) -1,311,677 -66.73% + sherlock::the_nocase 4,853,843 (122 MB/s) 474,456 (1253 MB/s) -4,379,387 -90.23% + sherlock::the_upper 949,071 (626 MB/s) 43,746 (13599 MB/s) -905,325 -95.39% + sherlock::the_whitespace 2,173,683 (273 MB/s) 1,181,974 (503 MB/s) -991,709 -45.62% + sherlock::word_ending_n 19,711,057 (30 MB/s) 1,925,578 (308 MB/s) -17,785,479 -90.23% + sherlock::words 21,979,387 (27 MB/s) 9,697,201 (61 MB/s) -12,282,186 -55.88% diff --git a/third_party/rust/regex/record/old-bench-log/05/pcre1 b/third_party/rust/regex/record/old-bench-log/05/pcre1 new file mode 100644 index 0000000000..51af3613f5 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/05/pcre1 @@ -0,0 +1,98 @@ + +running 93 tests +test misc::anchored_literal_long_match ... bench: 30 ns/iter (+/- 0) = 13000 MB/s +test misc::anchored_literal_long_non_match ... bench: 24 ns/iter (+/- 1) = 16250 MB/s +test misc::anchored_literal_short_match ... bench: 29 ns/iter (+/- 1) = 896 MB/s +test misc::anchored_literal_short_non_match ... bench: 24 ns/iter (+/- 1) = 1083 MB/s +test misc::easy0_1K ... bench: 260 ns/iter (+/- 15) = 4042 MB/s +test misc::easy0_1MB ... bench: 202,849 ns/iter (+/- 7,973) = 5169 MB/s +test misc::easy0_32 ... bench: 47 ns/iter (+/- 3) = 1255 MB/s +test misc::easy0_32K ... bench: 6,378 ns/iter (+/- 236) = 5141 MB/s +test misc::easy1_1K ... bench: 248 ns/iter (+/- 15) = 4209 MB/s +test misc::easy1_1MB ... bench: 203,105 ns/iter (+/- 7,590) = 5162 MB/s +test misc::easy1_32 ... bench: 51 ns/iter (+/- 1) = 1019 MB/s +test misc::easy1_32K ... bench: 6,508 ns/iter (+/- 160) = 5038 MB/s +test misc::hard_1K ... bench: 1,324 ns/iter (+/- 46) = 793 MB/s +test misc::hard_1MB ... bench: 1,134,691 ns/iter (+/- 41,296) = 924 MB/s +test misc::hard_32 ... bench: 113 ns/iter (+/- 13) = 522 MB/s +test misc::hard_32K ... bench: 42,269 ns/iter (+/- 2,298) = 775 MB/s +test misc::literal ... bench: 28 ns/iter (+/- 0) = 1821 MB/s +test misc::long_needle1 ... bench: 547,122 ns/iter (+/- 34,029) = 182 MB/s +test misc::long_needle2 ... bench: 546,018 ns/iter (+/- 24,721) = 183 MB/s +test misc::match_class ... bench: 97 ns/iter (+/- 5) = 835 MB/s +test misc::match_class_in_range ... bench: 30 ns/iter (+/- 1) = 2700 MB/s +test misc::match_class_unicode ... bench: 343 ns/iter (+/- 2) = 469 MB/s +test misc::medium_1K ... bench: 253 ns/iter (+/- 15) = 4158 MB/s +test misc::medium_1MB ... bench: 202,025 ns/iter (+/- 11,252) = 5190 MB/s +test misc::medium_32 ... bench: 51 ns/iter (+/- 2) = 1176 MB/s +test misc::medium_32K ... bench: 6,406 ns/iter (+/- 318) = 5119 MB/s +test misc::not_literal ... bench: 169 ns/iter (+/- 6) = 301 MB/s +test misc::one_pass_long_prefix ... bench: 28 ns/iter (+/- 1) = 928 MB/s +test misc::one_pass_long_prefix_not ... bench: 28 ns/iter (+/- 0) = 928 MB/s +test misc::one_pass_short ... bench: 54 ns/iter (+/- 0) = 314 MB/s +test misc::one_pass_short_not ... bench: 55 ns/iter (+/- 3) = 309 MB/s +test misc::reallyhard2_1K ... bench: 4,664 ns/iter (+/- 123) = 222 MB/s +test misc::reallyhard_1K ... bench: 1,595 ns/iter (+/- 34) = 658 MB/s +test misc::reallyhard_1MB ... bench: 1,377,542 ns/iter (+/- 2,203) = 761 MB/s +test misc::reallyhard_32 ... bench: 106 ns/iter (+/- 2) = 556 MB/s +test misc::reallyhard_32K ... bench: 43,256 ns/iter (+/- 1,230) = 758 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,607 ns/iter (+/- 68) = 1736 MB/s +test regexdna::find_new_lines ... bench: 2,840,298 ns/iter (+/- 128,040) = 1789 MB/s +test regexdna::subst1 ... bench: 1,284,283 ns/iter (+/- 39,986) = 3958 MB/s +test regexdna::subst10 ... bench: 1,269,531 ns/iter (+/- 63,116) = 4004 MB/s +test regexdna::subst11 ... bench: 1,286,171 ns/iter (+/- 49,256) = 3952 MB/s +test regexdna::subst2 ... bench: 1,303,022 ns/iter (+/- 1,553) = 3901 MB/s +test regexdna::subst3 ... bench: 1,295,961 ns/iter (+/- 57,880) = 3922 MB/s +test regexdna::subst4 ... bench: 1,313,706 ns/iter (+/- 2,115) = 3869 MB/s +test regexdna::subst5 ... bench: 1,286,339 ns/iter (+/- 2,093) = 3951 MB/s +test regexdna::subst6 ... bench: 1,385,644 ns/iter (+/- 3,387) = 3668 MB/s +test regexdna::subst7 ... bench: 1,286,743 ns/iter (+/- 2,339) = 3950 MB/s +test regexdna::subst8 ... bench: 1,306,406 ns/iter (+/- 1,686) = 3891 MB/s +test regexdna::subst9 ... bench: 1,280,365 ns/iter (+/- 52,649) = 3970 MB/s +test regexdna::variant1 ... bench: 15,271,875 ns/iter (+/- 510,399) = 332 MB/s +test regexdna::variant2 ... bench: 16,704,090 ns/iter (+/- 446,145) = 304 MB/s +test regexdna::variant3 ... bench: 20,745,546 ns/iter (+/- 500,573) = 245 MB/s +test regexdna::variant4 ... bench: 19,285,154 ns/iter (+/- 543,793) = 263 MB/s +test regexdna::variant5 ... bench: 17,234,130 ns/iter (+/- 291,232) = 294 MB/s +test regexdna::variant6 ... bench: 17,462,350 ns/iter (+/- 510,036) = 291 MB/s +test regexdna::variant7 ... bench: 19,671,680 ns/iter (+/- 562,610) = 258 MB/s +test regexdna::variant8 ... bench: 24,515,319 ns/iter (+/- 725,298) = 207 MB/s +test regexdna::variant9 ... bench: 22,623,755 ns/iter (+/- 637,538) = 224 MB/s +test sherlock::before_after_holmes ... bench: 4,510,830 ns/iter (+/- 170,864) = 131 MB/s +test sherlock::before_holmes ... bench: 4,706,836 ns/iter (+/- 186,202) = 126 MB/s +test sherlock::holmes_cochar_watson ... bench: 523,122 ns/iter (+/- 988) = 1137 MB/s +test sherlock::ing_suffix ... bench: 2,030,438 ns/iter (+/- 9,228) = 293 MB/s +test sherlock::ing_suffix_limited_space ... bench: 4,996,956 ns/iter (+/- 197,705) = 119 MB/s +test sherlock::letters ... bench: 13,529,105 ns/iter (+/- 496,645) = 43 MB/s +test sherlock::letters_lower ... bench: 13,681,607 ns/iter (+/- 448,932) = 43 MB/s +test sherlock::letters_upper ... bench: 1,904,757 ns/iter (+/- 94,484) = 312 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 207,695 ns/iter (+/- 8,892) = 2864 MB/s +test sherlock::name_alt1 ... bench: 486,857 ns/iter (+/- 21,004) = 1221 MB/s +test sherlock::name_alt2 ... bench: 483,926 ns/iter (+/- 26,860) = 1229 MB/s +test sherlock::name_alt3 ... bench: 978,827 ns/iter (+/- 43,851) = 607 MB/s +test sherlock::name_alt3_nocase ... bench: 2,986,143 ns/iter (+/- 78,155) = 199 MB/s +test sherlock::name_alt4 ... bench: 78,104 ns/iter (+/- 4,056) = 7617 MB/s +test sherlock::name_alt4_nocase ... bench: 1,638,351 ns/iter (+/- 62,542) = 363 MB/s +test sherlock::name_alt5 ... bench: 685,723 ns/iter (+/- 26,092) = 867 MB/s +test sherlock::name_alt5_nocase ... bench: 1,817,760 ns/iter (+/- 80,781) = 327 MB/s +test sherlock::name_holmes ... bench: 411,102 ns/iter (+/- 1,887) = 1447 MB/s +test sherlock::name_holmes_nocase ... bench: 516,003 ns/iter (+/- 2,295) = 1152 MB/s +test sherlock::name_sherlock ... bench: 284,300 ns/iter (+/- 1,117) = 2092 MB/s +test sherlock::name_sherlock_holmes ... bench: 209,139 ns/iter (+/- 380) = 2844 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,118,324 ns/iter (+/- 1,654) = 531 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,126,992 ns/iter (+/- 1,180) = 527 MB/s +test sherlock::name_whitespace ... bench: 284,672 ns/iter (+/- 510) = 2089 MB/s +test sherlock::no_match_common ... bench: 439,955 ns/iter (+/- 939) = 1352 MB/s +test sherlock::no_match_really_common ... bench: 439,266 ns/iter (+/- 3,751) = 1354 MB/s +test sherlock::no_match_uncommon ... bench: 28,872 ns/iter (+/- 31) = 20605 MB/s +test sherlock::quotes ... bench: 522,877 ns/iter (+/- 32,723) = 1137 MB/s +test sherlock::repeated_class_negation ... bench: 5,997,745 ns/iter (+/- 209,544) = 99 MB/s +test sherlock::the_lower ... bench: 747,234 ns/iter (+/- 43,110) = 796 MB/s +test sherlock::the_nocase ... bench: 802,320 ns/iter (+/- 27,715) = 741 MB/s +test sherlock::the_upper ... bench: 58,163 ns/iter (+/- 2,202) = 10228 MB/s +test sherlock::the_whitespace ... bench: 920,781 ns/iter (+/- 30,609) = 646 MB/s +test sherlock::word_ending_n ... bench: 5,703,864 ns/iter (+/- 191,007) = 104 MB/s +test sherlock::words ... bench: 6,786,318 ns/iter (+/- 168,049) = 87 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 93 measured + diff --git a/third_party/rust/regex/record/old-bench-log/05/pcre1-vs-rust b/third_party/rust/regex/record/old-bench-log/05/pcre1-vs-rust new file mode 100644 index 0000000000..1d8c0d632d --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/05/pcre1-vs-rust @@ -0,0 +1,94 @@ + name pcre1 ns/iter rust ns/iter diff ns/iter diff % + misc::anchored_literal_long_match 30 (13000 MB/s) 24 (16250 MB/s) -6 -20.00% + misc::anchored_literal_long_non_match 24 (16250 MB/s) 27 (14444 MB/s) 3 12.50% + misc::anchored_literal_short_match 29 (896 MB/s) 22 (1181 MB/s) -7 -24.14% + misc::anchored_literal_short_non_match 24 (1083 MB/s) 24 (1083 MB/s) 0 0.00% + misc::easy0_1K 260 (4042 MB/s) 16 (65687 MB/s) -244 -93.85% + misc::easy0_1MB 202,849 (5169 MB/s) 20 (52430150 MB/s) -202,829 -99.99% + misc::easy0_32 47 (1255 MB/s) 16 (3687 MB/s) -31 -65.96% + misc::easy0_32K 6,378 (5141 MB/s) 16 (2049687 MB/s) -6,362 -99.75% + misc::easy1_1K 248 (4209 MB/s) 48 (21750 MB/s) -200 -80.65% + misc::easy1_1MB 203,105 (5162 MB/s) 48 (21845750 MB/s) -203,057 -99.98% + misc::easy1_32 51 (1019 MB/s) 46 (1130 MB/s) -5 -9.80% + misc::easy1_32K 6,508 (5038 MB/s) 47 (697617 MB/s) -6,461 -99.28% + misc::hard_1K 1,324 (793 MB/s) 58 (18120 MB/s) -1,266 -95.62% + misc::hard_1MB 1,134,691 (924 MB/s) 61 (17190213 MB/s) -1,134,630 -99.99% + misc::hard_32 113 (522 MB/s) 58 (1017 MB/s) -55 -48.67% + misc::hard_32K 42,269 (775 MB/s) 56 (585625 MB/s) -42,213 -99.87% + misc::literal 28 (1821 MB/s) 16 (3187 MB/s) -12 -42.86% + misc::long_needle1 547,122 (182 MB/s) 2,226 (44924 MB/s) -544,896 -99.59% + misc::long_needle2 546,018 (183 MB/s) 576,997 (173 MB/s) 30,979 5.67% + misc::match_class 97 (835 MB/s) 65 (1246 MB/s) -32 -32.99% + misc::match_class_in_range 30 (2700 MB/s) 27 (3000 MB/s) -3 -10.00% + misc::match_class_unicode 343 (469 MB/s) 283 (568 MB/s) -60 -17.49% + misc::medium_1K 253 (4158 MB/s) 16 (65750 MB/s) -237 -93.68% + misc::medium_1MB 202,025 (5190 MB/s) 21 (49933523 MB/s) -202,004 -99.99% + misc::medium_32 51 (1176 MB/s) 17 (3529 MB/s) -34 -66.67% + misc::medium_32K 6,406 (5119 MB/s) 17 (1929176 MB/s) -6,389 -99.73% + misc::not_literal 169 (301 MB/s) 105 (485 MB/s) -64 -37.87% + misc::one_pass_long_prefix 28 (928 MB/s) 68 (382 MB/s) 40 142.86% + misc::one_pass_long_prefix_not 28 (928 MB/s) 58 (448 MB/s) 30 107.14% + misc::one_pass_short 54 (314 MB/s) 45 (377 MB/s) -9 -16.67% + misc::one_pass_short_not 55 (309 MB/s) 50 (340 MB/s) -5 -9.09% + misc::reallyhard2_1K 4,664 (222 MB/s) 83 (12530 MB/s) -4,581 -98.22% + misc::reallyhard_1K 1,595 (658 MB/s) 1,822 (576 MB/s) 227 14.23% + misc::reallyhard_1MB 1,377,542 (761 MB/s) 1,768,327 (592 MB/s) 390,785 28.37% + misc::reallyhard_32 106 (556 MB/s) 121 (487 MB/s) 15 14.15% + misc::reallyhard_32K 43,256 (758 MB/s) 56,375 (581 MB/s) 13,119 30.33% + misc::reverse_suffix_no_quadratic 4,607 (1736 MB/s) 5,803 (1378 MB/s) 1,196 25.96% + regexdna::find_new_lines 2,840,298 (1789 MB/s) 14,818,233 (343 MB/s) 11,977,935 421.71% + regexdna::subst1 1,284,283 (3958 MB/s) 896,790 (5668 MB/s) -387,493 -30.17% + regexdna::subst10 1,269,531 (4004 MB/s) 957,325 (5310 MB/s) -312,206 -24.59% + regexdna::subst11 1,286,171 (3952 MB/s) 917,248 (5542 MB/s) -368,923 -28.68% + regexdna::subst2 1,303,022 (3901 MB/s) 892,129 (5698 MB/s) -410,893 -31.53% + regexdna::subst3 1,295,961 (3922 MB/s) 929,250 (5470 MB/s) -366,711 -28.30% + regexdna::subst4 1,313,706 (3869 MB/s) 872,581 (5825 MB/s) -441,125 -33.58% + regexdna::subst5 1,286,339 (3951 MB/s) 875,804 (5804 MB/s) -410,535 -31.91% + regexdna::subst6 1,385,644 (3668 MB/s) 884,639 (5746 MB/s) -501,005 -36.16% + regexdna::subst7 1,286,743 (3950 MB/s) 872,791 (5824 MB/s) -413,952 -32.17% + regexdna::subst8 1,306,406 (3891 MB/s) 873,833 (5817 MB/s) -432,573 -33.11% + regexdna::subst9 1,280,365 (3970 MB/s) 886,744 (5732 MB/s) -393,621 -30.74% + regexdna::variant1 15,271,875 (332 MB/s) 3,699,267 (1374 MB/s) -11,572,608 -75.78% + regexdna::variant2 16,704,090 (304 MB/s) 6,760,952 (751 MB/s) -9,943,138 -59.53% + regexdna::variant3 20,745,546 (245 MB/s) 8,030,646 (633 MB/s) -12,714,900 -61.29% + regexdna::variant4 19,285,154 (263 MB/s) 8,077,290 (629 MB/s) -11,207,864 -58.12% + regexdna::variant5 17,234,130 (294 MB/s) 6,787,242 (748 MB/s) -10,446,888 -60.62% + regexdna::variant6 17,462,350 (291 MB/s) 6,577,777 (772 MB/s) -10,884,573 -62.33% + regexdna::variant7 19,671,680 (258 MB/s) 6,705,580 (758 MB/s) -12,966,100 -65.91% + regexdna::variant8 24,515,319 (207 MB/s) 6,818,785 (745 MB/s) -17,696,534 -72.19% + regexdna::variant9 22,623,755 (224 MB/s) 6,821,453 (745 MB/s) -15,802,302 -69.85% + sherlock::before_after_holmes 4,510,830 (131 MB/s) 1,029,866 (577 MB/s) -3,480,964 -77.17% + sherlock::before_holmes 4,706,836 (126 MB/s) 76,633 (7763 MB/s) -4,630,203 -98.37% + sherlock::holmes_cochar_watson 523,122 (1137 MB/s) 144,725 (4110 MB/s) -378,397 -72.33% + sherlock::ing_suffix 2,030,438 (293 MB/s) 436,202 (1363 MB/s) -1,594,236 -78.52% + sherlock::ing_suffix_limited_space 4,996,956 (119 MB/s) 1,182,943 (502 MB/s) -3,814,013 -76.33% + sherlock::letters 13,529,105 (43 MB/s) 24,390,452 (24 MB/s) 10,861,347 80.28% + sherlock::letters_lower 13,681,607 (43 MB/s) 23,784,108 (25 MB/s) 10,102,501 73.84% + sherlock::letters_upper 1,904,757 (312 MB/s) 1,993,838 (298 MB/s) 89,081 4.68% + sherlock::line_boundary_sherlock_holmes 207,695 (2864 MB/s) 999,414 (595 MB/s) 791,719 381.19% + sherlock::name_alt1 486,857 (1221 MB/s) 34,298 (17345 MB/s) -452,559 -92.96% + sherlock::name_alt2 483,926 (1229 MB/s) 124,226 (4789 MB/s) -359,700 -74.33% + sherlock::name_alt3 978,827 (607 MB/s) 137,742 (4319 MB/s) -841,085 -85.93% + sherlock::name_alt3_nocase 2,986,143 (199 MB/s) 1,293,763 (459 MB/s) -1,692,380 -56.67% + sherlock::name_alt4 78,104 (7617 MB/s) 164,900 (3607 MB/s) 86,796 111.13% + sherlock::name_alt4_nocase 1,638,351 (363 MB/s) 235,023 (2531 MB/s) -1,403,328 -85.65% + sherlock::name_alt5 685,723 (867 MB/s) 127,928 (4650 MB/s) -557,795 -81.34% + sherlock::name_alt5_nocase 1,817,760 (327 MB/s) 659,591 (901 MB/s) -1,158,169 -63.71% + sherlock::name_holmes 411,102 (1447 MB/s) 40,902 (14545 MB/s) -370,200 -90.05% + sherlock::name_holmes_nocase 516,003 (1152 MB/s) 198,658 (2994 MB/s) -317,345 -61.50% + sherlock::name_sherlock 284,300 (2092 MB/s) 68,924 (8631 MB/s) -215,376 -75.76% + sherlock::name_sherlock_holmes 209,139 (2844 MB/s) 31,640 (18803 MB/s) -177,499 -84.87% + sherlock::name_sherlock_holmes_nocase 1,118,324 (531 MB/s) 173,522 (3428 MB/s) -944,802 -84.48% + sherlock::name_sherlock_nocase 1,126,992 (527 MB/s) 170,888 (3481 MB/s) -956,104 -84.84% + sherlock::name_whitespace 284,672 (2089 MB/s) 84,314 (7056 MB/s) -200,358 -70.38% + sherlock::no_match_common 439,955 (1352 MB/s) 20,727 (28703 MB/s) -419,228 -95.29% + sherlock::no_match_really_common 439,266 (1354 MB/s) 381,476 (1559 MB/s) -57,790 -13.16% + sherlock::no_match_uncommon 28,872 (20605 MB/s) 20,786 (28621 MB/s) -8,086 -28.01% + sherlock::quotes 522,877 (1137 MB/s) 531,487 (1119 MB/s) 8,610 1.65% + sherlock::repeated_class_negation 5,997,745 (99 MB/s) 85,881,944 (6 MB/s) 79,884,199 1331.90% + sherlock::the_lower 747,234 (796 MB/s) 654,110 (909 MB/s) -93,124 -12.46% + sherlock::the_nocase 802,320 (741 MB/s) 474,456 (1253 MB/s) -327,864 -40.86% + sherlock::the_upper 58,163 (10228 MB/s) 43,746 (13599 MB/s) -14,417 -24.79% + sherlock::the_whitespace 920,781 (646 MB/s) 1,181,974 (503 MB/s) 261,193 28.37% + sherlock::word_ending_n 5,703,864 (104 MB/s) 1,925,578 (308 MB/s) -3,778,286 -66.24% + sherlock::words 6,786,318 (87 MB/s) 9,697,201 (61 MB/s) 2,910,883 42.89% diff --git a/third_party/rust/regex/record/old-bench-log/05/pcre2 b/third_party/rust/regex/record/old-bench-log/05/pcre2 new file mode 100644 index 0000000000..76b3242cb4 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/05/pcre2 @@ -0,0 +1,98 @@ + +running 93 tests +test misc::anchored_literal_long_match ... bench: 20 ns/iter (+/- 0) = 19500 MB/s +test misc::anchored_literal_long_non_match ... bench: 15 ns/iter (+/- 0) = 26000 MB/s +test misc::anchored_literal_short_match ... bench: 19 ns/iter (+/- 1) = 1368 MB/s +test misc::anchored_literal_short_non_match ... bench: 13 ns/iter (+/- 0) = 2000 MB/s +test misc::easy0_1K ... bench: 241 ns/iter (+/- 9) = 4360 MB/s +test misc::easy0_1MB ... bench: 207,103 ns/iter (+/- 8,557) = 5063 MB/s +test misc::easy0_32 ... bench: 39 ns/iter (+/- 0) = 1512 MB/s +test misc::easy0_32K ... bench: 6,522 ns/iter (+/- 20) = 5028 MB/s +test misc::easy1_1K ... bench: 247 ns/iter (+/- 3) = 4226 MB/s +test misc::easy1_1MB ... bench: 206,893 ns/iter (+/- 9,489) = 5068 MB/s +test misc::easy1_32 ... bench: 41 ns/iter (+/- 0) = 1268 MB/s +test misc::easy1_32K ... bench: 6,516 ns/iter (+/- 301) = 5031 MB/s +test misc::hard_1K ... bench: 1,566 ns/iter (+/- 79) = 671 MB/s +test misc::hard_1MB ... bench: 1,119,234 ns/iter (+/- 38,605) = 936 MB/s +test misc::hard_32 ... bench: 95 ns/iter (+/- 4) = 621 MB/s +test misc::hard_32K ... bench: 34,411 ns/iter (+/- 1,542) = 953 MB/s +test misc::literal ... bench: 18 ns/iter (+/- 0) = 2833 MB/s +test misc::long_needle1 ... bench: 550,340 ns/iter (+/- 30,668) = 181 MB/s +test misc::long_needle2 ... bench: 553,056 ns/iter (+/- 25,618) = 180 MB/s +test misc::match_class ... bench: 82 ns/iter (+/- 1) = 987 MB/s +test misc::match_class_in_range ... bench: 20 ns/iter (+/- 1) = 4050 MB/s +test misc::match_class_unicode ... bench: 351 ns/iter (+/- 14) = 458 MB/s +test misc::medium_1K ... bench: 242 ns/iter (+/- 13) = 4347 MB/s +test misc::medium_1MB ... bench: 207,290 ns/iter (+/- 1,458) = 5058 MB/s +test misc::medium_32 ... bench: 41 ns/iter (+/- 0) = 1463 MB/s +test misc::medium_32K ... bench: 6,529 ns/iter (+/- 293) = 5023 MB/s +test misc::not_literal ... bench: 161 ns/iter (+/- 7) = 316 MB/s +test misc::one_pass_long_prefix ... bench: 17 ns/iter (+/- 1) = 1529 MB/s +test misc::one_pass_long_prefix_not ... bench: 18 ns/iter (+/- 1) = 1444 MB/s +test misc::one_pass_short ... bench: 45 ns/iter (+/- 2) = 377 MB/s +test misc::one_pass_short_not ... bench: 49 ns/iter (+/- 2) = 346 MB/s +test misc::reallyhard2_1K ... bench: 4,487 ns/iter (+/- 190) = 231 MB/s +test misc::reallyhard_1K ... bench: 1,260 ns/iter (+/- 46) = 834 MB/s +test misc::reallyhard_1MB ... bench: 1,361,796 ns/iter (+/- 46,490) = 770 MB/s +test misc::reallyhard_32 ... bench: 93 ns/iter (+/- 8) = 634 MB/s +test misc::reallyhard_32K ... bench: 42,503 ns/iter (+/- 1,721) = 771 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,559 ns/iter (+/- 171) = 1754 MB/s +test regexdna::find_new_lines ... bench: 1,887,325 ns/iter (+/- 74,026) = 2693 MB/s +test regexdna::subst1 ... bench: 963,089 ns/iter (+/- 3,478) = 5278 MB/s +test regexdna::subst10 ... bench: 968,221 ns/iter (+/- 5,406) = 5250 MB/s +test regexdna::subst11 ... bench: 961,661 ns/iter (+/- 45,597) = 5286 MB/s +test regexdna::subst2 ... bench: 956,430 ns/iter (+/- 32,654) = 5314 MB/s +test regexdna::subst3 ... bench: 961,204 ns/iter (+/- 48,799) = 5288 MB/s +test regexdna::subst4 ... bench: 961,897 ns/iter (+/- 50,762) = 5284 MB/s +test regexdna::subst5 ... bench: 953,412 ns/iter (+/- 69,554) = 5331 MB/s +test regexdna::subst6 ... bench: 962,362 ns/iter (+/- 42,136) = 5282 MB/s +test regexdna::subst7 ... bench: 961,694 ns/iter (+/- 100,348) = 5285 MB/s +test regexdna::subst8 ... bench: 963,230 ns/iter (+/- 10,882) = 5277 MB/s +test regexdna::subst9 ... bench: 960,246 ns/iter (+/- 27,407) = 5293 MB/s +test regexdna::variant1 ... bench: 15,553,281 ns/iter (+/- 566,810) = 326 MB/s +test regexdna::variant2 ... bench: 16,563,452 ns/iter (+/- 546,097) = 306 MB/s +test regexdna::variant3 ... bench: 20,405,916 ns/iter (+/- 809,236) = 249 MB/s +test regexdna::variant4 ... bench: 19,489,291 ns/iter (+/- 710,721) = 260 MB/s +test regexdna::variant5 ... bench: 17,406,769 ns/iter (+/- 656,024) = 292 MB/s +test regexdna::variant6 ... bench: 17,412,027 ns/iter (+/- 730,347) = 291 MB/s +test regexdna::variant7 ... bench: 19,509,193 ns/iter (+/- 783,850) = 260 MB/s +test regexdna::variant8 ... bench: 24,295,734 ns/iter (+/- 816,832) = 209 MB/s +test regexdna::variant9 ... bench: 22,541,558 ns/iter (+/- 783,104) = 225 MB/s +test sherlock::before_after_holmes ... bench: 4,583,804 ns/iter (+/- 124,057) = 129 MB/s +test sherlock::before_holmes ... bench: 4,640,546 ns/iter (+/- 241,311) = 128 MB/s +test sherlock::holmes_cochar_watson ... bench: 509,088 ns/iter (+/- 25,069) = 1168 MB/s +test sherlock::ing_suffix ... bench: 1,865,631 ns/iter (+/- 68,625) = 318 MB/s +test sherlock::ing_suffix_limited_space ... bench: 4,922,883 ns/iter (+/- 232,825) = 120 MB/s +test sherlock::letters ... bench: 9,848,144 ns/iter (+/- 206,915) = 60 MB/s +test sherlock::letters_lower ... bench: 9,723,642 ns/iter (+/- 370,000) = 61 MB/s +test sherlock::letters_upper ... bench: 1,762,773 ns/iter (+/- 86,671) = 337 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 206,367 ns/iter (+/- 8,874) = 2882 MB/s +test sherlock::name_alt1 ... bench: 485,953 ns/iter (+/- 15,036) = 1224 MB/s +test sherlock::name_alt2 ... bench: 483,813 ns/iter (+/- 17,822) = 1229 MB/s +test sherlock::name_alt3 ... bench: 903,013 ns/iter (+/- 38,445) = 658 MB/s +test sherlock::name_alt3_nocase ... bench: 2,993,633 ns/iter (+/- 131,218) = 198 MB/s +test sherlock::name_alt4 ... bench: 78,831 ns/iter (+/- 2,012) = 7546 MB/s +test sherlock::name_alt4_nocase ... bench: 1,647,202 ns/iter (+/- 5,838) = 361 MB/s +test sherlock::name_alt5 ... bench: 678,798 ns/iter (+/- 1,146) = 876 MB/s +test sherlock::name_alt5_nocase ... bench: 1,792,461 ns/iter (+/- 3,532) = 331 MB/s +test sherlock::name_holmes ... bench: 406,138 ns/iter (+/- 1,157) = 1464 MB/s +test sherlock::name_holmes_nocase ... bench: 517,884 ns/iter (+/- 8,548) = 1148 MB/s +test sherlock::name_sherlock ... bench: 282,357 ns/iter (+/- 13,583) = 2107 MB/s +test sherlock::name_sherlock_holmes ... bench: 207,894 ns/iter (+/- 1,847) = 2861 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,122,583 ns/iter (+/- 52,189) = 529 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,092,252 ns/iter (+/- 43,130) = 544 MB/s +test sherlock::name_whitespace ... bench: 280,360 ns/iter (+/- 12,136) = 2122 MB/s +test sherlock::no_match_common ... bench: 436,303 ns/iter (+/- 19,423) = 1363 MB/s +test sherlock::no_match_really_common ... bench: 417,686 ns/iter (+/- 15,258) = 1424 MB/s +test sherlock::no_match_uncommon ... bench: 28,504 ns/iter (+/- 1,032) = 20871 MB/s +test sherlock::quotes ... bench: 541,513 ns/iter (+/- 21,121) = 1098 MB/s +test sherlock::repeated_class_negation ... bench: 5,489,721 ns/iter (+/- 185,165) = 108 MB/s +test sherlock::the_lower ... bench: 680,710 ns/iter (+/- 29,403) = 873 MB/s +test sherlock::the_nocase ... bench: 737,040 ns/iter (+/- 4,391) = 807 MB/s +test sherlock::the_upper ... bench: 50,026 ns/iter (+/- 205) = 11892 MB/s +test sherlock::the_whitespace ... bench: 885,922 ns/iter (+/- 9,145) = 671 MB/s +test sherlock::word_ending_n ... bench: 5,424,773 ns/iter (+/- 154,353) = 109 MB/s +test sherlock::words ... bench: 5,753,231 ns/iter (+/- 177,890) = 103 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 93 measured + diff --git a/third_party/rust/regex/record/old-bench-log/05/pcre2-vs-rust b/third_party/rust/regex/record/old-bench-log/05/pcre2-vs-rust new file mode 100644 index 0000000000..3d89e19b32 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/05/pcre2-vs-rust @@ -0,0 +1,94 @@ + name pcre2 ns/iter rust ns/iter diff ns/iter diff % + misc::anchored_literal_long_match 20 (19500 MB/s) 24 (16250 MB/s) 4 20.00% + misc::anchored_literal_long_non_match 15 (26000 MB/s) 27 (14444 MB/s) 12 80.00% + misc::anchored_literal_short_match 19 (1368 MB/s) 22 (1181 MB/s) 3 15.79% + misc::anchored_literal_short_non_match 13 (2000 MB/s) 24 (1083 MB/s) 11 84.62% + misc::easy0_1K 241 (4360 MB/s) 16 (65687 MB/s) -225 -93.36% + misc::easy0_1MB 207,103 (5063 MB/s) 20 (52430150 MB/s) -207,083 -99.99% + misc::easy0_32 39 (1512 MB/s) 16 (3687 MB/s) -23 -58.97% + misc::easy0_32K 6,522 (5028 MB/s) 16 (2049687 MB/s) -6,506 -99.75% + misc::easy1_1K 247 (4226 MB/s) 48 (21750 MB/s) -199 -80.57% + misc::easy1_1MB 206,893 (5068 MB/s) 48 (21845750 MB/s) -206,845 -99.98% + misc::easy1_32 41 (1268 MB/s) 46 (1130 MB/s) 5 12.20% + misc::easy1_32K 6,516 (5031 MB/s) 47 (697617 MB/s) -6,469 -99.28% + misc::hard_1K 1,566 (671 MB/s) 58 (18120 MB/s) -1,508 -96.30% + misc::hard_1MB 1,119,234 (936 MB/s) 61 (17190213 MB/s) -1,119,173 -99.99% + misc::hard_32 95 (621 MB/s) 58 (1017 MB/s) -37 -38.95% + misc::hard_32K 34,411 (953 MB/s) 56 (585625 MB/s) -34,355 -99.84% + misc::literal 18 (2833 MB/s) 16 (3187 MB/s) -2 -11.11% + misc::long_needle1 550,340 (181 MB/s) 2,226 (44924 MB/s) -548,114 -99.60% + misc::long_needle2 553,056 (180 MB/s) 576,997 (173 MB/s) 23,941 4.33% + misc::match_class 82 (987 MB/s) 65 (1246 MB/s) -17 -20.73% + misc::match_class_in_range 20 (4050 MB/s) 27 (3000 MB/s) 7 35.00% + misc::match_class_unicode 351 (458 MB/s) 283 (568 MB/s) -68 -19.37% + misc::medium_1K 242 (4347 MB/s) 16 (65750 MB/s) -226 -93.39% + misc::medium_1MB 207,290 (5058 MB/s) 21 (49933523 MB/s) -207,269 -99.99% + misc::medium_32 41 (1463 MB/s) 17 (3529 MB/s) -24 -58.54% + misc::medium_32K 6,529 (5023 MB/s) 17 (1929176 MB/s) -6,512 -99.74% + misc::not_literal 161 (316 MB/s) 105 (485 MB/s) -56 -34.78% + misc::one_pass_long_prefix 17 (1529 MB/s) 68 (382 MB/s) 51 300.00% + misc::one_pass_long_prefix_not 18 (1444 MB/s) 58 (448 MB/s) 40 222.22% + misc::one_pass_short 45 (377 MB/s) 45 (377 MB/s) 0 0.00% + misc::one_pass_short_not 49 (346 MB/s) 50 (340 MB/s) 1 2.04% + misc::reallyhard2_1K 4,487 (231 MB/s) 83 (12530 MB/s) -4,404 -98.15% + misc::reallyhard_1K 1,260 (834 MB/s) 1,822 (576 MB/s) 562 44.60% + misc::reallyhard_1MB 1,361,796 (770 MB/s) 1,768,327 (592 MB/s) 406,531 29.85% + misc::reallyhard_32 93 (634 MB/s) 121 (487 MB/s) 28 30.11% + misc::reallyhard_32K 42,503 (771 MB/s) 56,375 (581 MB/s) 13,872 32.64% + misc::reverse_suffix_no_quadratic 4,559 (1754 MB/s) 5,803 (1378 MB/s) 1,244 27.29% + regexdna::find_new_lines 1,887,325 (2693 MB/s) 14,818,233 (343 MB/s) 12,930,908 685.14% + regexdna::subst1 963,089 (5278 MB/s) 896,790 (5668 MB/s) -66,299 -6.88% + regexdna::subst10 968,221 (5250 MB/s) 957,325 (5310 MB/s) -10,896 -1.13% + regexdna::subst11 961,661 (5286 MB/s) 917,248 (5542 MB/s) -44,413 -4.62% + regexdna::subst2 956,430 (5314 MB/s) 892,129 (5698 MB/s) -64,301 -6.72% + regexdna::subst3 961,204 (5288 MB/s) 929,250 (5470 MB/s) -31,954 -3.32% + regexdna::subst4 961,897 (5284 MB/s) 872,581 (5825 MB/s) -89,316 -9.29% + regexdna::subst5 953,412 (5331 MB/s) 875,804 (5804 MB/s) -77,608 -8.14% + regexdna::subst6 962,362 (5282 MB/s) 884,639 (5746 MB/s) -77,723 -8.08% + regexdna::subst7 961,694 (5285 MB/s) 872,791 (5824 MB/s) -88,903 -9.24% + regexdna::subst8 963,230 (5277 MB/s) 873,833 (5817 MB/s) -89,397 -9.28% + regexdna::subst9 960,246 (5293 MB/s) 886,744 (5732 MB/s) -73,502 -7.65% + regexdna::variant1 15,553,281 (326 MB/s) 3,699,267 (1374 MB/s) -11,854,014 -76.22% + regexdna::variant2 16,563,452 (306 MB/s) 6,760,952 (751 MB/s) -9,802,500 -59.18% + regexdna::variant3 20,405,916 (249 MB/s) 8,030,646 (633 MB/s) -12,375,270 -60.65% + regexdna::variant4 19,489,291 (260 MB/s) 8,077,290 (629 MB/s) -11,412,001 -58.56% + regexdna::variant5 17,406,769 (292 MB/s) 6,787,242 (748 MB/s) -10,619,527 -61.01% + regexdna::variant6 17,412,027 (291 MB/s) 6,577,777 (772 MB/s) -10,834,250 -62.22% + regexdna::variant7 19,509,193 (260 MB/s) 6,705,580 (758 MB/s) -12,803,613 -65.63% + regexdna::variant8 24,295,734 (209 MB/s) 6,818,785 (745 MB/s) -17,476,949 -71.93% + regexdna::variant9 22,541,558 (225 MB/s) 6,821,453 (745 MB/s) -15,720,105 -69.74% + sherlock::before_after_holmes 4,583,804 (129 MB/s) 1,029,866 (577 MB/s) -3,553,938 -77.53% + sherlock::before_holmes 4,640,546 (128 MB/s) 76,633 (7763 MB/s) -4,563,913 -98.35% + sherlock::holmes_cochar_watson 509,088 (1168 MB/s) 144,725 (4110 MB/s) -364,363 -71.57% + sherlock::ing_suffix 1,865,631 (318 MB/s) 436,202 (1363 MB/s) -1,429,429 -76.62% + sherlock::ing_suffix_limited_space 4,922,883 (120 MB/s) 1,182,943 (502 MB/s) -3,739,940 -75.97% + sherlock::letters 9,848,144 (60 MB/s) 24,390,452 (24 MB/s) 14,542,308 147.67% + sherlock::letters_lower 9,723,642 (61 MB/s) 23,784,108 (25 MB/s) 14,060,466 144.60% + sherlock::letters_upper 1,762,773 (337 MB/s) 1,993,838 (298 MB/s) 231,065 13.11% + sherlock::line_boundary_sherlock_holmes 206,367 (2882 MB/s) 999,414 (595 MB/s) 793,047 384.29% + sherlock::name_alt1 485,953 (1224 MB/s) 34,298 (17345 MB/s) -451,655 -92.94% + sherlock::name_alt2 483,813 (1229 MB/s) 124,226 (4789 MB/s) -359,587 -74.32% + sherlock::name_alt3 903,013 (658 MB/s) 137,742 (4319 MB/s) -765,271 -84.75% + sherlock::name_alt3_nocase 2,993,633 (198 MB/s) 1,293,763 (459 MB/s) -1,699,870 -56.78% + sherlock::name_alt4 78,831 (7546 MB/s) 164,900 (3607 MB/s) 86,069 109.18% + sherlock::name_alt4_nocase 1,647,202 (361 MB/s) 235,023 (2531 MB/s) -1,412,179 -85.73% + sherlock::name_alt5 678,798 (876 MB/s) 127,928 (4650 MB/s) -550,870 -81.15% + sherlock::name_alt5_nocase 1,792,461 (331 MB/s) 659,591 (901 MB/s) -1,132,870 -63.20% + sherlock::name_holmes 406,138 (1464 MB/s) 40,902 (14545 MB/s) -365,236 -89.93% + sherlock::name_holmes_nocase 517,884 (1148 MB/s) 198,658 (2994 MB/s) -319,226 -61.64% + sherlock::name_sherlock 282,357 (2107 MB/s) 68,924 (8631 MB/s) -213,433 -75.59% + sherlock::name_sherlock_holmes 207,894 (2861 MB/s) 31,640 (18803 MB/s) -176,254 -84.78% + sherlock::name_sherlock_holmes_nocase 1,122,583 (529 MB/s) 173,522 (3428 MB/s) -949,061 -84.54% + sherlock::name_sherlock_nocase 1,092,252 (544 MB/s) 170,888 (3481 MB/s) -921,364 -84.35% + sherlock::name_whitespace 280,360 (2122 MB/s) 84,314 (7056 MB/s) -196,046 -69.93% + sherlock::no_match_common 436,303 (1363 MB/s) 20,727 (28703 MB/s) -415,576 -95.25% + sherlock::no_match_really_common 417,686 (1424 MB/s) 381,476 (1559 MB/s) -36,210 -8.67% + sherlock::no_match_uncommon 28,504 (20871 MB/s) 20,786 (28621 MB/s) -7,718 -27.08% + sherlock::quotes 541,513 (1098 MB/s) 531,487 (1119 MB/s) -10,026 -1.85% + sherlock::repeated_class_negation 5,489,721 (108 MB/s) 85,881,944 (6 MB/s) 80,392,223 1464.41% + sherlock::the_lower 680,710 (873 MB/s) 654,110 (909 MB/s) -26,600 -3.91% + sherlock::the_nocase 737,040 (807 MB/s) 474,456 (1253 MB/s) -262,584 -35.63% + sherlock::the_upper 50,026 (11892 MB/s) 43,746 (13599 MB/s) -6,280 -12.55% + sherlock::the_whitespace 885,922 (671 MB/s) 1,181,974 (503 MB/s) 296,052 33.42% + sherlock::word_ending_n 5,424,773 (109 MB/s) 1,925,578 (308 MB/s) -3,499,195 -64.50% + sherlock::words 5,753,231 (103 MB/s) 9,697,201 (61 MB/s) 3,943,970 68.55% diff --git a/third_party/rust/regex/record/old-bench-log/05/re2 b/third_party/rust/regex/record/old-bench-log/05/re2 new file mode 100644 index 0000000000..d1f0bea4a3 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/05/re2 @@ -0,0 +1,101 @@ + +running 96 tests +test misc::anchored_literal_long_match ... bench: 102 ns/iter (+/- 3) = 3823 MB/s +test misc::anchored_literal_long_non_match ... bench: 20 ns/iter (+/- 0) = 19500 MB/s +test misc::anchored_literal_short_match ... bench: 95 ns/iter (+/- 8) = 273 MB/s +test misc::anchored_literal_short_non_match ... bench: 17 ns/iter (+/- 0) = 1529 MB/s +test misc::easy0_1K ... bench: 149 ns/iter (+/- 10) = 7053 MB/s +test misc::easy0_1MB ... bench: 29,234 ns/iter (+/- 886) = 35869 MB/s +test misc::easy0_32 ... bench: 126 ns/iter (+/- 4) = 468 MB/s +test misc::easy0_32K ... bench: 1,266 ns/iter (+/- 42) = 25904 MB/s +test misc::easy1_1K ... bench: 130 ns/iter (+/- 4) = 8030 MB/s +test misc::easy1_1MB ... bench: 29,218 ns/iter (+/- 791) = 35888 MB/s +test misc::easy1_32 ... bench: 112 ns/iter (+/- 7) = 464 MB/s +test misc::easy1_32K ... bench: 1,251 ns/iter (+/- 45) = 26209 MB/s +test misc::hard_1K ... bench: 2,357 ns/iter (+/- 33) = 445 MB/s +test misc::hard_1MB ... bench: 2,149,909 ns/iter (+/- 151,258) = 487 MB/s +test misc::hard_32 ... bench: 195 ns/iter (+/- 16) = 302 MB/s +test misc::hard_32K ... bench: 105,137 ns/iter (+/- 6,252) = 311 MB/s +test misc::literal ... bench: 89 ns/iter (+/- 3) = 573 MB/s +test misc::long_needle1 ... bench: 170,090 ns/iter (+/- 5,891) = 587 MB/s +test misc::long_needle2 ... bench: 174,341 ns/iter (+/- 7,949) = 573 MB/s +test misc::match_class ... bench: 220 ns/iter (+/- 16) = 368 MB/s +test misc::match_class_in_range ... bench: 215 ns/iter (+/- 16) = 376 MB/s +test misc::match_class_unicode ... bench: 382 ns/iter (+/- 27) = 421 MB/s +test misc::medium_1K ... bench: 1,939 ns/iter (+/- 153) = 542 MB/s +test misc::medium_1MB ... bench: 1,775,335 ns/iter (+/- 91,241) = 590 MB/s +test misc::medium_32 ... bench: 190 ns/iter (+/- 12) = 315 MB/s +test misc::medium_32K ... bench: 83,245 ns/iter (+/- 5,385) = 393 MB/s +test misc::no_exponential ... bench: 269 ns/iter (+/- 22) = 371 MB/s +test misc::not_literal ... bench: 167 ns/iter (+/- 13) = 305 MB/s +test misc::one_pass_long_prefix ... bench: 84 ns/iter (+/- 7) = 309 MB/s +test misc::one_pass_long_prefix_not ... bench: 137 ns/iter (+/- 12) = 189 MB/s +test misc::one_pass_short ... bench: 108 ns/iter (+/- 3) = 157 MB/s +test misc::one_pass_short_not ... bench: 105 ns/iter (+/- 6) = 161 MB/s +test misc::reallyhard2_1K ... bench: 1,811 ns/iter (+/- 44) = 574 MB/s +test misc::reallyhard_1K ... bench: 2,324 ns/iter (+/- 223) = 452 MB/s +test misc::reallyhard_1MB ... bench: 2,033,298 ns/iter (+/- 148,939) = 515 MB/s +test misc::reallyhard_32 ... bench: 185 ns/iter (+/- 8) = 318 MB/s +test misc::reallyhard_32K ... bench: 83,263 ns/iter (+/- 4,231) = 393 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 13,501 ns/iter (+/- 1,380) = 592 MB/s +test regexdna::find_new_lines ... bench: 31,464,067 ns/iter (+/- 2,248,457) = 161 MB/s +test regexdna::subst1 ... bench: 5,257,629 ns/iter (+/- 142,910) = 966 MB/s +test regexdna::subst10 ... bench: 5,189,384 ns/iter (+/- 130,525) = 979 MB/s +test regexdna::subst11 ... bench: 5,261,936 ns/iter (+/- 309,355) = 966 MB/s +test regexdna::subst2 ... bench: 5,268,281 ns/iter (+/- 348,592) = 964 MB/s +test regexdna::subst3 ... bench: 5,245,664 ns/iter (+/- 403,198) = 969 MB/s +test regexdna::subst4 ... bench: 5,264,833 ns/iter (+/- 312,063) = 965 MB/s +test regexdna::subst5 ... bench: 5,181,850 ns/iter (+/- 117,306) = 981 MB/s +test regexdna::subst6 ... bench: 5,200,226 ns/iter (+/- 124,723) = 977 MB/s +test regexdna::subst7 ... bench: 5,233,678 ns/iter (+/- 367,749) = 971 MB/s +test regexdna::subst8 ... bench: 5,242,400 ns/iter (+/- 317,859) = 969 MB/s +test regexdna::subst9 ... bench: 5,325,464 ns/iter (+/- 395,485) = 954 MB/s +test regexdna::variant1 ... bench: 24,377,246 ns/iter (+/- 733,355) = 208 MB/s +test regexdna::variant2 ... bench: 26,405,686 ns/iter (+/- 771,755) = 192 MB/s +test regexdna::variant3 ... bench: 25,130,419 ns/iter (+/- 1,245,527) = 202 MB/s +test regexdna::variant4 ... bench: 32,527,780 ns/iter (+/- 5,073,721) = 156 MB/s +test regexdna::variant5 ... bench: 31,081,800 ns/iter (+/- 1,256,796) = 163 MB/s +test regexdna::variant6 ... bench: 28,744,478 ns/iter (+/- 1,243,565) = 176 MB/s +test regexdna::variant7 ... bench: 26,693,756 ns/iter (+/- 886,566) = 190 MB/s +test regexdna::variant8 ... bench: 21,478,184 ns/iter (+/- 1,374,415) = 236 MB/s +test regexdna::variant9 ... bench: 18,639,814 ns/iter (+/- 519,136) = 272 MB/s +test sherlock::before_after_holmes ... bench: 1,552,265 ns/iter (+/- 105,467) = 383 MB/s +test sherlock::before_holmes ... bench: 1,360,446 ns/iter (+/- 111,123) = 437 MB/s +test sherlock::everything_greedy ... bench: 6,356,610 ns/iter (+/- 343,163) = 93 MB/s +test sherlock::everything_greedy_nl ... bench: 2,380,946 ns/iter (+/- 36,936) = 249 MB/s +test sherlock::holmes_cochar_watson ... bench: 1,144,439 ns/iter (+/- 25,948) = 519 MB/s +test sherlock::holmes_coword_watson ... bench: 1,503,311 ns/iter (+/- 99,075) = 395 MB/s +test sherlock::ing_suffix ... bench: 3,003,144 ns/iter (+/- 239,408) = 198 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,721,656 ns/iter (+/- 129,795) = 345 MB/s +test sherlock::letters ... bench: 73,833,131 ns/iter (+/- 2,542,107) = 8 MB/s +test sherlock::letters_lower ... bench: 72,250,289 ns/iter (+/- 1,280,826) = 8 MB/s +test sherlock::letters_upper ... bench: 3,397,481 ns/iter (+/- 160,294) = 175 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 3,694,486 ns/iter (+/- 403,679) = 161 MB/s +test sherlock::name_alt1 ... bench: 70,121 ns/iter (+/- 3,926) = 8484 MB/s +test sherlock::name_alt2 ... bench: 1,120,245 ns/iter (+/- 36,040) = 531 MB/s +test sherlock::name_alt3 ... bench: 1,247,630 ns/iter (+/- 127,226) = 476 MB/s +test sherlock::name_alt3_nocase ... bench: 2,894,586 ns/iter (+/- 201,023) = 205 MB/s +test sherlock::name_alt4 ... bench: 1,142,872 ns/iter (+/- 82,896) = 520 MB/s +test sherlock::name_alt4_nocase ... bench: 1,785,266 ns/iter (+/- 166,100) = 333 MB/s +test sherlock::name_alt5 ... bench: 1,167,553 ns/iter (+/- 91,672) = 509 MB/s +test sherlock::name_alt5_nocase ... bench: 2,023,732 ns/iter (+/- 74,558) = 293 MB/s +test sherlock::name_holmes ... bench: 126,480 ns/iter (+/- 6,959) = 4703 MB/s +test sherlock::name_holmes_nocase ... bench: 1,420,548 ns/iter (+/- 75,407) = 418 MB/s +test sherlock::name_sherlock ... bench: 57,090 ns/iter (+/- 1,392) = 10420 MB/s +test sherlock::name_sherlock_holmes ... bench: 57,965 ns/iter (+/- 2,996) = 10263 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,837,721 ns/iter (+/- 66,965) = 323 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,672,451 ns/iter (+/- 204,590) = 355 MB/s +test sherlock::name_whitespace ... bench: 60,342 ns/iter (+/- 3,290) = 9859 MB/s +test sherlock::no_match_common ... bench: 434,496 ns/iter (+/- 35,617) = 1369 MB/s +test sherlock::no_match_really_common ... bench: 431,778 ns/iter (+/- 11,799) = 1377 MB/s +test sherlock::no_match_uncommon ... bench: 19,313 ns/iter (+/- 1,167) = 30804 MB/s +test sherlock::quotes ... bench: 1,301,485 ns/iter (+/- 92,772) = 457 MB/s +test sherlock::the_lower ... bench: 1,846,403 ns/iter (+/- 39,799) = 322 MB/s +test sherlock::the_nocase ... bench: 2,956,115 ns/iter (+/- 136,011) = 201 MB/s +test sherlock::the_upper ... bench: 165,976 ns/iter (+/- 5,838) = 3584 MB/s +test sherlock::the_whitespace ... bench: 1,816,669 ns/iter (+/- 117,437) = 327 MB/s +test sherlock::word_ending_n ... bench: 2,601,847 ns/iter (+/- 166,024) = 228 MB/s +test sherlock::words ... bench: 21,137,049 ns/iter (+/- 750,253) = 28 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 96 measured + diff --git a/third_party/rust/regex/record/old-bench-log/05/re2-vs-rust b/third_party/rust/regex/record/old-bench-log/05/re2-vs-rust new file mode 100644 index 0000000000..180e431db4 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/05/re2-vs-rust @@ -0,0 +1,97 @@ + name re2 ns/iter rust ns/iter diff ns/iter diff % + misc::anchored_literal_long_match 102 (3823 MB/s) 24 (16250 MB/s) -78 -76.47% + misc::anchored_literal_long_non_match 20 (19500 MB/s) 27 (14444 MB/s) 7 35.00% + misc::anchored_literal_short_match 95 (273 MB/s) 22 (1181 MB/s) -73 -76.84% + misc::anchored_literal_short_non_match 17 (1529 MB/s) 24 (1083 MB/s) 7 41.18% + misc::easy0_1K 149 (7053 MB/s) 16 (65687 MB/s) -133 -89.26% + misc::easy0_1MB 29,234 (35869 MB/s) 20 (52430150 MB/s) -29,214 -99.93% + misc::easy0_32 126 (468 MB/s) 16 (3687 MB/s) -110 -87.30% + misc::easy0_32K 1,266 (25904 MB/s) 16 (2049687 MB/s) -1,250 -98.74% + misc::easy1_1K 130 (8030 MB/s) 48 (21750 MB/s) -82 -63.08% + misc::easy1_1MB 29,218 (35888 MB/s) 48 (21845750 MB/s) -29,170 -99.84% + misc::easy1_32 112 (464 MB/s) 46 (1130 MB/s) -66 -58.93% + misc::easy1_32K 1,251 (26209 MB/s) 47 (697617 MB/s) -1,204 -96.24% + misc::hard_1K 2,357 (445 MB/s) 58 (18120 MB/s) -2,299 -97.54% + misc::hard_1MB 2,149,909 (487 MB/s) 61 (17190213 MB/s) -2,149,848 -100.00% + misc::hard_32 195 (302 MB/s) 58 (1017 MB/s) -137 -70.26% + misc::hard_32K 105,137 (311 MB/s) 56 (585625 MB/s) -105,081 -99.95% + misc::literal 89 (573 MB/s) 16 (3187 MB/s) -73 -82.02% + misc::long_needle1 170,090 (587 MB/s) 2,226 (44924 MB/s) -167,864 -98.69% + misc::long_needle2 174,341 (573 MB/s) 576,997 (173 MB/s) 402,656 230.96% + misc::match_class 220 (368 MB/s) 65 (1246 MB/s) -155 -70.45% + misc::match_class_in_range 215 (376 MB/s) 27 (3000 MB/s) -188 -87.44% + misc::match_class_unicode 382 (421 MB/s) 283 (568 MB/s) -99 -25.92% + misc::medium_1K 1,939 (542 MB/s) 16 (65750 MB/s) -1,923 -99.17% + misc::medium_1MB 1,775,335 (590 MB/s) 21 (49933523 MB/s) -1,775,314 -100.00% + misc::medium_32 190 (315 MB/s) 17 (3529 MB/s) -173 -91.05% + misc::medium_32K 83,245 (393 MB/s) 17 (1929176 MB/s) -83,228 -99.98% + misc::no_exponential 269 (371 MB/s) 394 (253 MB/s) 125 46.47% + misc::not_literal 167 (305 MB/s) 105 (485 MB/s) -62 -37.13% + misc::one_pass_long_prefix 84 (309 MB/s) 68 (382 MB/s) -16 -19.05% + misc::one_pass_long_prefix_not 137 (189 MB/s) 58 (448 MB/s) -79 -57.66% + misc::one_pass_short 108 (157 MB/s) 45 (377 MB/s) -63 -58.33% + misc::one_pass_short_not 105 (161 MB/s) 50 (340 MB/s) -55 -52.38% + misc::reallyhard2_1K 1,811 (574 MB/s) 83 (12530 MB/s) -1,728 -95.42% + misc::reallyhard_1K 2,324 (452 MB/s) 1,822 (576 MB/s) -502 -21.60% + misc::reallyhard_1MB 2,033,298 (515 MB/s) 1,768,327 (592 MB/s) -264,971 -13.03% + misc::reallyhard_32 185 (318 MB/s) 121 (487 MB/s) -64 -34.59% + misc::reallyhard_32K 83,263 (393 MB/s) 56,375 (581 MB/s) -26,888 -32.29% + misc::reverse_suffix_no_quadratic 13,501 (592 MB/s) 5,803 (1378 MB/s) -7,698 -57.02% + regexdna::find_new_lines 31,464,067 (161 MB/s) 14,818,233 (343 MB/s) -16,645,834 -52.90% + regexdna::subst1 5,257,629 (966 MB/s) 896,790 (5668 MB/s) -4,360,839 -82.94% + regexdna::subst10 5,189,384 (979 MB/s) 957,325 (5310 MB/s) -4,232,059 -81.55% + regexdna::subst11 5,261,936 (966 MB/s) 917,248 (5542 MB/s) -4,344,688 -82.57% + regexdna::subst2 5,268,281 (964 MB/s) 892,129 (5698 MB/s) -4,376,152 -83.07% + regexdna::subst3 5,245,664 (969 MB/s) 929,250 (5470 MB/s) -4,316,414 -82.29% + regexdna::subst4 5,264,833 (965 MB/s) 872,581 (5825 MB/s) -4,392,252 -83.43% + regexdna::subst5 5,181,850 (981 MB/s) 875,804 (5804 MB/s) -4,306,046 -83.10% + regexdna::subst6 5,200,226 (977 MB/s) 884,639 (5746 MB/s) -4,315,587 -82.99% + regexdna::subst7 5,233,678 (971 MB/s) 872,791 (5824 MB/s) -4,360,887 -83.32% + regexdna::subst8 5,242,400 (969 MB/s) 873,833 (5817 MB/s) -4,368,567 -83.33% + regexdna::subst9 5,325,464 (954 MB/s) 886,744 (5732 MB/s) -4,438,720 -83.35% + regexdna::variant1 24,377,246 (208 MB/s) 3,699,267 (1374 MB/s) -20,677,979 -84.82% + regexdna::variant2 26,405,686 (192 MB/s) 6,760,952 (751 MB/s) -19,644,734 -74.40% + regexdna::variant3 25,130,419 (202 MB/s) 8,030,646 (633 MB/s) -17,099,773 -68.04% + regexdna::variant4 32,527,780 (156 MB/s) 8,077,290 (629 MB/s) -24,450,490 -75.17% + regexdna::variant5 31,081,800 (163 MB/s) 6,787,242 (748 MB/s) -24,294,558 -78.16% + regexdna::variant6 28,744,478 (176 MB/s) 6,577,777 (772 MB/s) -22,166,701 -77.12% + regexdna::variant7 26,693,756 (190 MB/s) 6,705,580 (758 MB/s) -19,988,176 -74.88% + regexdna::variant8 21,478,184 (236 MB/s) 6,818,785 (745 MB/s) -14,659,399 -68.25% + regexdna::variant9 18,639,814 (272 MB/s) 6,821,453 (745 MB/s) -11,818,361 -63.40% + sherlock::before_after_holmes 1,552,265 (383 MB/s) 1,029,866 (577 MB/s) -522,399 -33.65% + sherlock::before_holmes 1,360,446 (437 MB/s) 76,633 (7763 MB/s) -1,283,813 -94.37% + sherlock::everything_greedy 6,356,610 (93 MB/s) 2,375,079 (250 MB/s) -3,981,531 -62.64% + sherlock::everything_greedy_nl 2,380,946 (249 MB/s) 916,250 (649 MB/s) -1,464,696 -61.52% + sherlock::holmes_cochar_watson 1,144,439 (519 MB/s) 144,725 (4110 MB/s) -999,714 -87.35% + sherlock::holmes_coword_watson 1,503,311 (395 MB/s) 565,247 (1052 MB/s) -938,064 -62.40% + sherlock::ing_suffix 3,003,144 (198 MB/s) 436,202 (1363 MB/s) -2,566,942 -85.48% + sherlock::ing_suffix_limited_space 1,721,656 (345 MB/s) 1,182,943 (502 MB/s) -538,713 -31.29% + sherlock::letters 73,833,131 (8 MB/s) 24,390,452 (24 MB/s) -49,442,679 -66.97% + sherlock::letters_lower 72,250,289 (8 MB/s) 23,784,108 (25 MB/s) -48,466,181 -67.08% + sherlock::letters_upper 3,397,481 (175 MB/s) 1,993,838 (298 MB/s) -1,403,643 -41.31% + sherlock::line_boundary_sherlock_holmes 3,694,486 (161 MB/s) 999,414 (595 MB/s) -2,695,072 -72.95% + sherlock::name_alt1 70,121 (8484 MB/s) 34,298 (17345 MB/s) -35,823 -51.09% + sherlock::name_alt2 1,120,245 (531 MB/s) 124,226 (4789 MB/s) -996,019 -88.91% + sherlock::name_alt3 1,247,630 (476 MB/s) 137,742 (4319 MB/s) -1,109,888 -88.96% + sherlock::name_alt3_nocase 2,894,586 (205 MB/s) 1,293,763 (459 MB/s) -1,600,823 -55.30% + sherlock::name_alt4 1,142,872 (520 MB/s) 164,900 (3607 MB/s) -977,972 -85.57% + sherlock::name_alt4_nocase 1,785,266 (333 MB/s) 235,023 (2531 MB/s) -1,550,243 -86.84% + sherlock::name_alt5 1,167,553 (509 MB/s) 127,928 (4650 MB/s) -1,039,625 -89.04% + sherlock::name_alt5_nocase 2,023,732 (293 MB/s) 659,591 (901 MB/s) -1,364,141 -67.41% + sherlock::name_holmes 126,480 (4703 MB/s) 40,902 (14545 MB/s) -85,578 -67.66% + sherlock::name_holmes_nocase 1,420,548 (418 MB/s) 198,658 (2994 MB/s) -1,221,890 -86.02% + sherlock::name_sherlock 57,090 (10420 MB/s) 68,924 (8631 MB/s) 11,834 20.73% + sherlock::name_sherlock_holmes 57,965 (10263 MB/s) 31,640 (18803 MB/s) -26,325 -45.42% + sherlock::name_sherlock_holmes_nocase 1,837,721 (323 MB/s) 173,522 (3428 MB/s) -1,664,199 -90.56% + sherlock::name_sherlock_nocase 1,672,451 (355 MB/s) 170,888 (3481 MB/s) -1,501,563 -89.78% + sherlock::name_whitespace 60,342 (9859 MB/s) 84,314 (7056 MB/s) 23,972 39.73% + sherlock::no_match_common 434,496 (1369 MB/s) 20,727 (28703 MB/s) -413,769 -95.23% + sherlock::no_match_really_common 431,778 (1377 MB/s) 381,476 (1559 MB/s) -50,302 -11.65% + sherlock::no_match_uncommon 19,313 (30804 MB/s) 20,786 (28621 MB/s) 1,473 7.63% + sherlock::quotes 1,301,485 (457 MB/s) 531,487 (1119 MB/s) -769,998 -59.16% + sherlock::the_lower 1,846,403 (322 MB/s) 654,110 (909 MB/s) -1,192,293 -64.57% + sherlock::the_nocase 2,956,115 (201 MB/s) 474,456 (1253 MB/s) -2,481,659 -83.95% + sherlock::the_upper 165,976 (3584 MB/s) 43,746 (13599 MB/s) -122,230 -73.64% + sherlock::the_whitespace 1,816,669 (327 MB/s) 1,181,974 (503 MB/s) -634,695 -34.94% + sherlock::word_ending_n 2,601,847 (228 MB/s) 1,925,578 (308 MB/s) -676,269 -25.99% + sherlock::words 21,137,049 (28 MB/s) 9,697,201 (61 MB/s) -11,439,848 -54.12% diff --git a/third_party/rust/regex/record/old-bench-log/05/rust b/third_party/rust/regex/record/old-bench-log/05/rust new file mode 100644 index 0000000000..22848ccf54 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/05/rust @@ -0,0 +1,103 @@ + +running 98 tests +test misc::anchored_literal_long_match ... bench: 24 ns/iter (+/- 0) = 16250 MB/s +test misc::anchored_literal_long_non_match ... bench: 27 ns/iter (+/- 0) = 14444 MB/s +test misc::anchored_literal_short_match ... bench: 22 ns/iter (+/- 0) = 1181 MB/s +test misc::anchored_literal_short_non_match ... bench: 24 ns/iter (+/- 1) = 1083 MB/s +test misc::easy0_1K ... bench: 16 ns/iter (+/- 0) = 65687 MB/s +test misc::easy0_1MB ... bench: 20 ns/iter (+/- 0) = 52430150 MB/s +test misc::easy0_32 ... bench: 16 ns/iter (+/- 0) = 3687 MB/s +test misc::easy0_32K ... bench: 16 ns/iter (+/- 0) = 2049687 MB/s +test misc::easy1_1K ... bench: 48 ns/iter (+/- 2) = 21750 MB/s +test misc::easy1_1MB ... bench: 48 ns/iter (+/- 2) = 21845750 MB/s +test misc::easy1_32 ... bench: 46 ns/iter (+/- 0) = 1130 MB/s +test misc::easy1_32K ... bench: 47 ns/iter (+/- 0) = 697617 MB/s +test misc::hard_1K ... bench: 58 ns/iter (+/- 0) = 18120 MB/s +test misc::hard_1MB ... bench: 61 ns/iter (+/- 0) = 17190213 MB/s +test misc::hard_32 ... bench: 58 ns/iter (+/- 0) = 1017 MB/s +test misc::hard_32K ... bench: 56 ns/iter (+/- 2) = 585625 MB/s +test misc::literal ... bench: 16 ns/iter (+/- 0) = 3187 MB/s +test misc::long_needle1 ... bench: 2,226 ns/iter (+/- 139) = 44924 MB/s +test misc::long_needle2 ... bench: 576,997 ns/iter (+/- 21,660) = 173 MB/s +test misc::match_class ... bench: 65 ns/iter (+/- 3) = 1246 MB/s +test misc::match_class_in_range ... bench: 27 ns/iter (+/- 0) = 3000 MB/s +test misc::match_class_unicode ... bench: 283 ns/iter (+/- 15) = 568 MB/s +test misc::medium_1K ... bench: 16 ns/iter (+/- 0) = 65750 MB/s +test misc::medium_1MB ... bench: 21 ns/iter (+/- 1) = 49933523 MB/s +test misc::medium_32 ... bench: 17 ns/iter (+/- 0) = 3529 MB/s +test misc::medium_32K ... bench: 17 ns/iter (+/- 0) = 1929176 MB/s +test misc::no_exponential ... bench: 394 ns/iter (+/- 0) = 253 MB/s +test misc::not_literal ... bench: 105 ns/iter (+/- 0) = 485 MB/s +test misc::one_pass_long_prefix ... bench: 68 ns/iter (+/- 0) = 382 MB/s +test misc::one_pass_long_prefix_not ... bench: 58 ns/iter (+/- 3) = 448 MB/s +test misc::one_pass_short ... bench: 45 ns/iter (+/- 2) = 377 MB/s +test misc::one_pass_short_not ... bench: 50 ns/iter (+/- 16) = 340 MB/s +test misc::reallyhard2_1K ... bench: 83 ns/iter (+/- 4) = 12530 MB/s +test misc::reallyhard_1K ... bench: 1,822 ns/iter (+/- 72) = 576 MB/s +test misc::reallyhard_1MB ... bench: 1,768,327 ns/iter (+/- 67,421) = 592 MB/s +test misc::reallyhard_32 ... bench: 121 ns/iter (+/- 4) = 487 MB/s +test misc::reallyhard_32K ... bench: 56,375 ns/iter (+/- 1,404) = 581 MB/s +test misc::replace_all ... bench: 142 ns/iter (+/- 0) +test misc::reverse_suffix_no_quadratic ... bench: 5,803 ns/iter (+/- 6) = 1378 MB/s +test regexdna::find_new_lines ... bench: 14,818,233 ns/iter (+/- 430,454) = 343 MB/s +test regexdna::subst1 ... bench: 896,790 ns/iter (+/- 2,273) = 5668 MB/s +test regexdna::subst10 ... bench: 957,325 ns/iter (+/- 7,490) = 5310 MB/s +test regexdna::subst11 ... bench: 917,248 ns/iter (+/- 12,886) = 5542 MB/s +test regexdna::subst2 ... bench: 892,129 ns/iter (+/- 36,230) = 5698 MB/s +test regexdna::subst3 ... bench: 929,250 ns/iter (+/- 38,312) = 5470 MB/s +test regexdna::subst4 ... bench: 872,581 ns/iter (+/- 27,431) = 5825 MB/s +test regexdna::subst5 ... bench: 875,804 ns/iter (+/- 30,611) = 5804 MB/s +test regexdna::subst6 ... bench: 884,639 ns/iter (+/- 44,927) = 5746 MB/s +test regexdna::subst7 ... bench: 872,791 ns/iter (+/- 31,810) = 5824 MB/s +test regexdna::subst8 ... bench: 873,833 ns/iter (+/- 37,335) = 5817 MB/s +test regexdna::subst9 ... bench: 886,744 ns/iter (+/- 42,880) = 5732 MB/s +test regexdna::variant1 ... bench: 3,699,267 ns/iter (+/- 134,945) = 1374 MB/s +test regexdna::variant2 ... bench: 6,760,952 ns/iter (+/- 228,082) = 751 MB/s +test regexdna::variant3 ... bench: 8,030,646 ns/iter (+/- 271,204) = 633 MB/s +test regexdna::variant4 ... bench: 8,077,290 ns/iter (+/- 266,264) = 629 MB/s +test regexdna::variant5 ... bench: 6,787,242 ns/iter (+/- 226,071) = 748 MB/s +test regexdna::variant6 ... bench: 6,577,777 ns/iter (+/- 226,332) = 772 MB/s +test regexdna::variant7 ... bench: 6,705,580 ns/iter (+/- 232,953) = 758 MB/s +test regexdna::variant8 ... bench: 6,818,785 ns/iter (+/- 241,075) = 745 MB/s +test regexdna::variant9 ... bench: 6,821,453 ns/iter (+/- 257,044) = 745 MB/s +test sherlock::before_after_holmes ... bench: 1,029,866 ns/iter (+/- 42,662) = 577 MB/s +test sherlock::before_holmes ... bench: 76,633 ns/iter (+/- 1,135) = 7763 MB/s +test sherlock::everything_greedy ... bench: 2,375,079 ns/iter (+/- 102,532) = 250 MB/s +test sherlock::everything_greedy_nl ... bench: 916,250 ns/iter (+/- 37,950) = 649 MB/s +test sherlock::holmes_cochar_watson ... bench: 144,725 ns/iter (+/- 8,793) = 4110 MB/s +test sherlock::holmes_coword_watson ... bench: 565,247 ns/iter (+/- 24,056) = 1052 MB/s +test sherlock::ing_suffix ... bench: 436,202 ns/iter (+/- 19,863) = 1363 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,182,943 ns/iter (+/- 38,658) = 502 MB/s +test sherlock::letters ... bench: 24,390,452 ns/iter (+/- 869,008) = 24 MB/s +test sherlock::letters_lower ... bench: 23,784,108 ns/iter (+/- 796,195) = 25 MB/s +test sherlock::letters_upper ... bench: 1,993,838 ns/iter (+/- 77,697) = 298 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 999,414 ns/iter (+/- 31,202) = 595 MB/s +test sherlock::name_alt1 ... bench: 34,298 ns/iter (+/- 1,091) = 17345 MB/s +test sherlock::name_alt2 ... bench: 124,226 ns/iter (+/- 5,579) = 4789 MB/s +test sherlock::name_alt3 ... bench: 137,742 ns/iter (+/- 6,496) = 4319 MB/s +test sherlock::name_alt3_nocase ... bench: 1,293,763 ns/iter (+/- 51,097) = 459 MB/s +test sherlock::name_alt4 ... bench: 164,900 ns/iter (+/- 10,023) = 3607 MB/s +test sherlock::name_alt4_nocase ... bench: 235,023 ns/iter (+/- 14,465) = 2531 MB/s +test sherlock::name_alt5 ... bench: 127,928 ns/iter (+/- 6,882) = 4650 MB/s +test sherlock::name_alt5_nocase ... bench: 659,591 ns/iter (+/- 20,587) = 901 MB/s +test sherlock::name_holmes ... bench: 40,902 ns/iter (+/- 402) = 14545 MB/s +test sherlock::name_holmes_nocase ... bench: 198,658 ns/iter (+/- 3,782) = 2994 MB/s +test sherlock::name_sherlock ... bench: 68,924 ns/iter (+/- 1,456) = 8631 MB/s +test sherlock::name_sherlock_holmes ... bench: 31,640 ns/iter (+/- 383) = 18803 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 173,522 ns/iter (+/- 7,812) = 3428 MB/s +test sherlock::name_sherlock_nocase ... bench: 170,888 ns/iter (+/- 612) = 3481 MB/s +test sherlock::name_whitespace ... bench: 84,314 ns/iter (+/- 508) = 7056 MB/s +test sherlock::no_match_common ... bench: 20,727 ns/iter (+/- 565) = 28703 MB/s +test sherlock::no_match_really_common ... bench: 381,476 ns/iter (+/- 2,338) = 1559 MB/s +test sherlock::no_match_uncommon ... bench: 20,786 ns/iter (+/- 717) = 28621 MB/s +test sherlock::quotes ... bench: 531,487 ns/iter (+/- 5,517) = 1119 MB/s +test sherlock::repeated_class_negation ... bench: 85,881,944 ns/iter (+/- 4,906,514) = 6 MB/s +test sherlock::the_lower ... bench: 654,110 ns/iter (+/- 34,542) = 909 MB/s +test sherlock::the_nocase ... bench: 474,456 ns/iter (+/- 16,549) = 1253 MB/s +test sherlock::the_upper ... bench: 43,746 ns/iter (+/- 579) = 13599 MB/s +test sherlock::the_whitespace ... bench: 1,181,974 ns/iter (+/- 3,005) = 503 MB/s +test sherlock::word_ending_n ... bench: 1,925,578 ns/iter (+/- 3,811) = 308 MB/s +test sherlock::words ... bench: 9,697,201 ns/iter (+/- 156,772) = 61 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 98 measured + diff --git a/third_party/rust/regex/record/old-bench-log/05/tcl b/third_party/rust/regex/record/old-bench-log/05/tcl new file mode 100644 index 0000000000..3e1778b719 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/05/tcl @@ -0,0 +1,94 @@ + +running 89 tests +test misc::anchored_literal_long_match ... bench: 662 ns/iter (+/- 12) = 589 MB/s +test misc::anchored_literal_long_non_match ... bench: 133 ns/iter (+/- 1) = 2932 MB/s +test misc::anchored_literal_short_match ... bench: 616 ns/iter (+/- 18) = 42 MB/s +test misc::anchored_literal_short_non_match ... bench: 122 ns/iter (+/- 1) = 213 MB/s +test misc::easy0_1K ... bench: 11,816 ns/iter (+/- 92) = 88 MB/s +test misc::easy0_1MB ... bench: 3,409,439 ns/iter (+/- 94,972) = 307 MB/s +test misc::easy0_32 ... bench: 8,785 ns/iter (+/- 183) = 6 MB/s +test misc::easy0_32K ... bench: 115,371 ns/iter (+/- 2,279) = 284 MB/s +test misc::easy1_1K ... bench: 7,038 ns/iter (+/- 145) = 148 MB/s +test misc::easy1_1MB ... bench: 3,396,028 ns/iter (+/- 100,173) = 308 MB/s +test misc::easy1_32 ... bench: 3,687 ns/iter (+/- 44) = 14 MB/s +test misc::easy1_32K ... bench: 109,689 ns/iter (+/- 3,757) = 298 MB/s +test misc::hard_1K ... bench: 14,836 ns/iter (+/- 518) = 70 MB/s +test misc::hard_1MB ... bench: 3,376,015 ns/iter (+/- 95,045) = 310 MB/s +test misc::hard_32 ... bench: 11,278 ns/iter (+/- 389) = 5 MB/s +test misc::hard_32K ... bench: 115,400 ns/iter (+/- 4,738) = 284 MB/s +test misc::literal ... bench: 511 ns/iter (+/- 11) = 99 MB/s +test misc::long_needle1 ... bench: 18,076,901 ns/iter (+/- 523,761) = 5 MB/s +test misc::long_needle2 ... bench: 18,497,725 ns/iter (+/- 465,516) = 5 MB/s +test misc::match_class ... bench: 620 ns/iter (+/- 23) = 130 MB/s +test misc::match_class_in_range ... bench: 605 ns/iter (+/- 26) = 133 MB/s +test misc::medium_1K ... bench: 12,355 ns/iter (+/- 390) = 85 MB/s +test misc::medium_1MB ... bench: 3,410,978 ns/iter (+/- 112,021) = 307 MB/s +test misc::medium_32 ... bench: 9,086 ns/iter (+/- 287) = 6 MB/s +test misc::medium_32K ... bench: 116,944 ns/iter (+/- 5,654) = 280 MB/s +test misc::no_exponential ... bench: 2,379,518 ns/iter (+/- 92,628) +test misc::not_literal ... bench: 1,979 ns/iter (+/- 116) = 25 MB/s +test misc::one_pass_long_prefix ... bench: 6,932 ns/iter (+/- 464) = 3 MB/s +test misc::one_pass_long_prefix_not ... bench: 6,242 ns/iter (+/- 384) = 4 MB/s +test misc::one_pass_short ... bench: 630 ns/iter (+/- 42) = 26 MB/s +test misc::one_pass_short_not ... bench: 718 ns/iter (+/- 64) = 23 MB/s +test misc::reallyhard2_1K ... bench: 108,421 ns/iter (+/- 6,489) = 9 MB/s +test misc::reallyhard_1K ... bench: 14,330 ns/iter (+/- 814) = 73 MB/s +test misc::reallyhard_1MB ... bench: 3,287,965 ns/iter (+/- 203,546) = 318 MB/s +test misc::reallyhard_32 ... bench: 11,193 ns/iter (+/- 683) = 5 MB/s +test misc::reallyhard_32K ... bench: 112,731 ns/iter (+/- 5,966) = 290 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 26,907 ns/iter (+/- 2,396) = 297 MB/s +test regexdna::find_new_lines ... bench: 48,223,361 ns/iter (+/- 2,855,654) = 105 MB/s +test regexdna::subst1 ... bench: 27,177,359 ns/iter (+/- 1,359,987) = 187 MB/s +test regexdna::subst10 ... bench: 26,722,144 ns/iter (+/- 1,090,216) = 190 MB/s +test regexdna::subst11 ... bench: 27,382,875 ns/iter (+/- 1,656,754) = 185 MB/s +test regexdna::subst2 ... bench: 26,957,766 ns/iter (+/- 1,433,630) = 188 MB/s +test regexdna::subst3 ... bench: 27,195,925 ns/iter (+/- 1,828,460) = 186 MB/s +test regexdna::subst4 ... bench: 26,342,249 ns/iter (+/- 1,949,172) = 192 MB/s +test regexdna::subst5 ... bench: 26,543,675 ns/iter (+/- 2,143,336) = 191 MB/s +test regexdna::subst6 ... bench: 26,185,452 ns/iter (+/- 2,199,220) = 194 MB/s +test regexdna::subst7 ... bench: 26,338,573 ns/iter (+/- 2,124,778) = 193 MB/s +test regexdna::subst8 ... bench: 26,468,652 ns/iter (+/- 1,923,567) = 192 MB/s +test regexdna::subst9 ... bench: 26,487,784 ns/iter (+/- 1,250,319) = 191 MB/s +test regexdna::variant1 ... bench: 16,325,983 ns/iter (+/- 491,000) = 311 MB/s +test regexdna::variant2 ... bench: 16,845,952 ns/iter (+/- 470,062) = 301 MB/s +test regexdna::variant3 ... bench: 19,258,030 ns/iter (+/- 525,045) = 263 MB/s +test regexdna::variant4 ... bench: 18,018,713 ns/iter (+/- 1,235,670) = 282 MB/s +test regexdna::variant5 ... bench: 19,583,528 ns/iter (+/- 1,756,762) = 259 MB/s +test regexdna::variant6 ... bench: 17,630,308 ns/iter (+/- 973,191) = 288 MB/s +test regexdna::variant7 ... bench: 17,121,666 ns/iter (+/- 1,274,478) = 296 MB/s +test regexdna::variant8 ... bench: 17,154,863 ns/iter (+/- 425,504) = 296 MB/s +test regexdna::variant9 ... bench: 17,930,482 ns/iter (+/- 587,712) = 283 MB/s +test sherlock::before_after_holmes ... bench: 2,600,503 ns/iter (+/- 383,440) = 228 MB/s +test sherlock::before_holmes ... bench: 3,145,648 ns/iter (+/- 37,316) = 189 MB/s +test sherlock::holmes_cochar_watson ... bench: 2,668,355 ns/iter (+/- 193,724) = 222 MB/s +test sherlock::ing_suffix ... bench: 5,638,296 ns/iter (+/- 69,345) = 105 MB/s +test sherlock::ing_suffix_limited_space ... bench: 22,466,946 ns/iter (+/- 659,956) = 26 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 2,251,996 ns/iter (+/- 66,639) = 264 MB/s +test sherlock::name_alt1 ... bench: 2,276,056 ns/iter (+/- 64,088) = 261 MB/s +test sherlock::name_alt2 ... bench: 3,196,348 ns/iter (+/- 202,979) = 186 MB/s +test sherlock::name_alt3 ... bench: 5,260,374 ns/iter (+/- 426,028) = 113 MB/s +test sherlock::name_alt3_nocase ... bench: 8,529,394 ns/iter (+/- 558,731) = 69 MB/s +test sherlock::name_alt4 ... bench: 2,787,972 ns/iter (+/- 153,839) = 213 MB/s +test sherlock::name_alt4_nocase ... bench: 3,370,452 ns/iter (+/- 140,385) = 176 MB/s +test sherlock::name_alt5 ... bench: 3,795,793 ns/iter (+/- 182,240) = 156 MB/s +test sherlock::name_alt5_nocase ... bench: 4,691,422 ns/iter (+/- 161,515) = 126 MB/s +test sherlock::name_holmes ... bench: 2,513,139 ns/iter (+/- 72,157) = 236 MB/s +test sherlock::name_holmes_nocase ... bench: 2,636,441 ns/iter (+/- 78,402) = 225 MB/s +test sherlock::name_sherlock ... bench: 2,015,753 ns/iter (+/- 104,000) = 295 MB/s +test sherlock::name_sherlock_holmes ... bench: 2,180,684 ns/iter (+/- 162,201) = 272 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 2,306,664 ns/iter (+/- 165,960) = 257 MB/s +test sherlock::name_sherlock_nocase ... bench: 2,065,630 ns/iter (+/- 155,223) = 288 MB/s +test sherlock::name_whitespace ... bench: 2,266,188 ns/iter (+/- 173,380) = 262 MB/s +test sherlock::no_match_common ... bench: 1,881,887 ns/iter (+/- 123,883) = 316 MB/s +test sherlock::no_match_really_common ... bench: 1,804,352 ns/iter (+/- 33,396) = 329 MB/s +test sherlock::no_match_uncommon ... bench: 1,809,300 ns/iter (+/- 123,888) = 328 MB/s +test sherlock::quotes ... bench: 9,682,507 ns/iter (+/- 1,200,909) = 61 MB/s +test sherlock::repeated_class_negation ... bench: 68,600,251 ns/iter (+/- 2,043,582) = 8 MB/s +test sherlock::the_lower ... bench: 6,849,558 ns/iter (+/- 517,709) = 86 MB/s +test sherlock::the_nocase ... bench: 7,354,742 ns/iter (+/- 390,834) = 80 MB/s +test sherlock::the_upper ... bench: 2,442,364 ns/iter (+/- 174,452) = 243 MB/s +test sherlock::the_whitespace ... bench: 9,210,338 ns/iter (+/- 651,675) = 64 MB/s +test sherlock::words ... bench: 47,863,652 ns/iter (+/- 3,536,998) = 12 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 89 measured + diff --git a/third_party/rust/regex/record/old-bench-log/05/tcl-vs-rust b/third_party/rust/regex/record/old-bench-log/05/tcl-vs-rust new file mode 100644 index 0000000000..0faefe97da --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/05/tcl-vs-rust @@ -0,0 +1,90 @@ + name tcl ns/iter rust ns/iter diff ns/iter diff % + misc::anchored_literal_long_match 662 (589 MB/s) 24 (16250 MB/s) -638 -96.37% + misc::anchored_literal_long_non_match 133 (2932 MB/s) 27 (14444 MB/s) -106 -79.70% + misc::anchored_literal_short_match 616 (42 MB/s) 22 (1181 MB/s) -594 -96.43% + misc::anchored_literal_short_non_match 122 (213 MB/s) 24 (1083 MB/s) -98 -80.33% + misc::easy0_1K 11,816 (88 MB/s) 16 (65687 MB/s) -11,800 -99.86% + misc::easy0_1MB 3,409,439 (307 MB/s) 20 (52430150 MB/s) -3,409,419 -100.00% + misc::easy0_32 8,785 (6 MB/s) 16 (3687 MB/s) -8,769 -99.82% + misc::easy0_32K 115,371 (284 MB/s) 16 (2049687 MB/s) -115,355 -99.99% + misc::easy1_1K 7,038 (148 MB/s) 48 (21750 MB/s) -6,990 -99.32% + misc::easy1_1MB 3,396,028 (308 MB/s) 48 (21845750 MB/s) -3,395,980 -100.00% + misc::easy1_32 3,687 (14 MB/s) 46 (1130 MB/s) -3,641 -98.75% + misc::easy1_32K 109,689 (298 MB/s) 47 (697617 MB/s) -109,642 -99.96% + misc::hard_1K 14,836 (70 MB/s) 58 (18120 MB/s) -14,778 -99.61% + misc::hard_1MB 3,376,015 (310 MB/s) 61 (17190213 MB/s) -3,375,954 -100.00% + misc::hard_32 11,278 (5 MB/s) 58 (1017 MB/s) -11,220 -99.49% + misc::hard_32K 115,400 (284 MB/s) 56 (585625 MB/s) -115,344 -99.95% + misc::literal 511 (99 MB/s) 16 (3187 MB/s) -495 -96.87% + misc::long_needle1 18,076,901 (5 MB/s) 2,226 (44924 MB/s) -18,074,675 -99.99% + misc::long_needle2 18,497,725 (5 MB/s) 576,997 (173 MB/s) -17,920,728 -96.88% + misc::match_class 620 (130 MB/s) 65 (1246 MB/s) -555 -89.52% + misc::match_class_in_range 605 (133 MB/s) 27 (3000 MB/s) -578 -95.54% + misc::medium_1K 12,355 (85 MB/s) 16 (65750 MB/s) -12,339 -99.87% + misc::medium_1MB 3,410,978 (307 MB/s) 21 (49933523 MB/s) -3,410,957 -100.00% + misc::medium_32 9,086 (6 MB/s) 17 (3529 MB/s) -9,069 -99.81% + misc::medium_32K 116,944 (280 MB/s) 17 (1929176 MB/s) -116,927 -99.99% + misc::no_exponential 2,379,518 394 (253 MB/s) -2,379,124 -99.98% + misc::not_literal 1,979 (25 MB/s) 105 (485 MB/s) -1,874 -94.69% + misc::one_pass_long_prefix 6,932 (3 MB/s) 68 (382 MB/s) -6,864 -99.02% + misc::one_pass_long_prefix_not 6,242 (4 MB/s) 58 (448 MB/s) -6,184 -99.07% + misc::one_pass_short 630 (26 MB/s) 45 (377 MB/s) -585 -92.86% + misc::one_pass_short_not 718 (23 MB/s) 50 (340 MB/s) -668 -93.04% + misc::reallyhard2_1K 108,421 (9 MB/s) 83 (12530 MB/s) -108,338 -99.92% + misc::reallyhard_1K 14,330 (73 MB/s) 1,822 (576 MB/s) -12,508 -87.29% + misc::reallyhard_1MB 3,287,965 (318 MB/s) 1,768,327 (592 MB/s) -1,519,638 -46.22% + misc::reallyhard_32 11,193 (5 MB/s) 121 (487 MB/s) -11,072 -98.92% + misc::reallyhard_32K 112,731 (290 MB/s) 56,375 (581 MB/s) -56,356 -49.99% + misc::reverse_suffix_no_quadratic 26,907 (297 MB/s) 5,803 (1378 MB/s) -21,104 -78.43% + regexdna::find_new_lines 48,223,361 (105 MB/s) 14,818,233 (343 MB/s) -33,405,128 -69.27% + regexdna::subst1 27,177,359 (187 MB/s) 896,790 (5668 MB/s) -26,280,569 -96.70% + regexdna::subst10 26,722,144 (190 MB/s) 957,325 (5310 MB/s) -25,764,819 -96.42% + regexdna::subst11 27,382,875 (185 MB/s) 917,248 (5542 MB/s) -26,465,627 -96.65% + regexdna::subst2 26,957,766 (188 MB/s) 892,129 (5698 MB/s) -26,065,637 -96.69% + regexdna::subst3 27,195,925 (186 MB/s) 929,250 (5470 MB/s) -26,266,675 -96.58% + regexdna::subst4 26,342,249 (192 MB/s) 872,581 (5825 MB/s) -25,469,668 -96.69% + regexdna::subst5 26,543,675 (191 MB/s) 875,804 (5804 MB/s) -25,667,871 -96.70% + regexdna::subst6 26,185,452 (194 MB/s) 884,639 (5746 MB/s) -25,300,813 -96.62% + regexdna::subst7 26,338,573 (193 MB/s) 872,791 (5824 MB/s) -25,465,782 -96.69% + regexdna::subst8 26,468,652 (192 MB/s) 873,833 (5817 MB/s) -25,594,819 -96.70% + regexdna::subst9 26,487,784 (191 MB/s) 886,744 (5732 MB/s) -25,601,040 -96.65% + regexdna::variant1 16,325,983 (311 MB/s) 3,699,267 (1374 MB/s) -12,626,716 -77.34% + regexdna::variant2 16,845,952 (301 MB/s) 6,760,952 (751 MB/s) -10,085,000 -59.87% + regexdna::variant3 19,258,030 (263 MB/s) 8,030,646 (633 MB/s) -11,227,384 -58.30% + regexdna::variant4 18,018,713 (282 MB/s) 8,077,290 (629 MB/s) -9,941,423 -55.17% + regexdna::variant5 19,583,528 (259 MB/s) 6,787,242 (748 MB/s) -12,796,286 -65.34% + regexdna::variant6 17,630,308 (288 MB/s) 6,577,777 (772 MB/s) -11,052,531 -62.69% + regexdna::variant7 17,121,666 (296 MB/s) 6,705,580 (758 MB/s) -10,416,086 -60.84% + regexdna::variant8 17,154,863 (296 MB/s) 6,818,785 (745 MB/s) -10,336,078 -60.25% + regexdna::variant9 17,930,482 (283 MB/s) 6,821,453 (745 MB/s) -11,109,029 -61.96% + sherlock::before_after_holmes 2,600,503 (228 MB/s) 1,029,866 (577 MB/s) -1,570,637 -60.40% + sherlock::before_holmes 3,145,648 (189 MB/s) 76,633 (7763 MB/s) -3,069,015 -97.56% + sherlock::holmes_cochar_watson 2,668,355 (222 MB/s) 144,725 (4110 MB/s) -2,523,630 -94.58% + sherlock::ing_suffix 5,638,296 (105 MB/s) 436,202 (1363 MB/s) -5,202,094 -92.26% + sherlock::ing_suffix_limited_space 22,466,946 (26 MB/s) 1,182,943 (502 MB/s) -21,284,003 -94.73% + sherlock::line_boundary_sherlock_holmes 2,251,996 (264 MB/s) 999,414 (595 MB/s) -1,252,582 -55.62% + sherlock::name_alt1 2,276,056 (261 MB/s) 34,298 (17345 MB/s) -2,241,758 -98.49% + sherlock::name_alt2 3,196,348 (186 MB/s) 124,226 (4789 MB/s) -3,072,122 -96.11% + sherlock::name_alt3 5,260,374 (113 MB/s) 137,742 (4319 MB/s) -5,122,632 -97.38% + sherlock::name_alt3_nocase 8,529,394 (69 MB/s) 1,293,763 (459 MB/s) -7,235,631 -84.83% + sherlock::name_alt4 2,787,972 (213 MB/s) 164,900 (3607 MB/s) -2,623,072 -94.09% + sherlock::name_alt4_nocase 3,370,452 (176 MB/s) 235,023 (2531 MB/s) -3,135,429 -93.03% + sherlock::name_alt5 3,795,793 (156 MB/s) 127,928 (4650 MB/s) -3,667,865 -96.63% + sherlock::name_alt5_nocase 4,691,422 (126 MB/s) 659,591 (901 MB/s) -4,031,831 -85.94% + sherlock::name_holmes 2,513,139 (236 MB/s) 40,902 (14545 MB/s) -2,472,237 -98.37% + sherlock::name_holmes_nocase 2,636,441 (225 MB/s) 198,658 (2994 MB/s) -2,437,783 -92.46% + sherlock::name_sherlock 2,015,753 (295 MB/s) 68,924 (8631 MB/s) -1,946,829 -96.58% + sherlock::name_sherlock_holmes 2,180,684 (272 MB/s) 31,640 (18803 MB/s) -2,149,044 -98.55% + sherlock::name_sherlock_holmes_nocase 2,306,664 (257 MB/s) 173,522 (3428 MB/s) -2,133,142 -92.48% + sherlock::name_sherlock_nocase 2,065,630 (288 MB/s) 170,888 (3481 MB/s) -1,894,742 -91.73% + sherlock::name_whitespace 2,266,188 (262 MB/s) 84,314 (7056 MB/s) -2,181,874 -96.28% + sherlock::no_match_common 1,881,887 (316 MB/s) 20,727 (28703 MB/s) -1,861,160 -98.90% + sherlock::no_match_really_common 1,804,352 (329 MB/s) 381,476 (1559 MB/s) -1,422,876 -78.86% + sherlock::no_match_uncommon 1,809,300 (328 MB/s) 20,786 (28621 MB/s) -1,788,514 -98.85% + sherlock::quotes 9,682,507 (61 MB/s) 531,487 (1119 MB/s) -9,151,020 -94.51% + sherlock::repeated_class_negation 68,600,251 (8 MB/s) 85,881,944 (6 MB/s) 17,281,693 25.19% + sherlock::the_lower 6,849,558 (86 MB/s) 654,110 (909 MB/s) -6,195,448 -90.45% + sherlock::the_nocase 7,354,742 (80 MB/s) 474,456 (1253 MB/s) -6,880,286 -93.55% + sherlock::the_upper 2,442,364 (243 MB/s) 43,746 (13599 MB/s) -2,398,618 -98.21% + sherlock::the_whitespace 9,210,338 (64 MB/s) 1,181,974 (503 MB/s) -8,028,364 -87.17% + sherlock::words 47,863,652 (12 MB/s) 9,697,201 (61 MB/s) -38,166,451 -79.74% diff --git a/third_party/rust/regex/record/old-bench-log/06/dphobos-dmd b/third_party/rust/regex/record/old-bench-log/06/dphobos-dmd new file mode 100644 index 0000000000..bffdd29d6f --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/06/dphobos-dmd @@ -0,0 +1,98 @@ +running 95 tests +test misc::anchored_literal_long_match ... bench: 356 ns/iter (+/- 4) = 1095 MB/s +test misc::anchored_literal_long_non_match ... bench: 280 ns/iter (+/- 18) = 1392 MB/s +test misc::anchored_literal_short_match ... bench: 351 ns/iter (+/- 16) = 74 MB/s +test misc::anchored_literal_short_non_match ... bench: 274 ns/iter (+/- 17) = 94 MB/s +test misc::easy0_1K ... bench: 810 ns/iter (+/- 38) = 1297 MB/s +test misc::easy0_1MB ... bench: 25,296 ns/iter (+/- 3,592) = 41453 MB/s +test misc::easy0_32 ... bench: 745 ns/iter (+/- 60) = 79 MB/s +test misc::easy0_32K ... bench: 1,111 ns/iter (+/- 82) = 29518 MB/s +test misc::easy1_1K ... bench: 730 ns/iter (+/- 20) = 1430 MB/s +test misc::easy1_1MB ... bench: 25,442 ns/iter (+/- 2,076) = 41215 MB/s +test misc::easy1_32 ... bench: 730 ns/iter (+/- 79) = 71 MB/s +test misc::easy1_32K ... bench: 1,104 ns/iter (+/- 93) = 29699 MB/s +test misc::hard_1K ... bench: 18,238 ns/iter (+/- 1,173) = 57 MB/s +test misc::hard_1MB ... bench: 19,302,344 ns/iter (+/- 2,039,538) = 54 MB/s +test misc::hard_32 ... bench: 2,508 ns/iter (+/- 119) = 23 MB/s +test misc::hard_32K ... bench: 666,948 ns/iter (+/- 58,067) = 49 MB/s +test misc::literal ... bench: 196 ns/iter (+/- 17) = 260 MB/s +test misc::long_needle1 ... bench: 82,532 ns/iter (+/- 4,618) = 1211 MB/s +test misc::long_needle2 ... bench: 84,079 ns/iter (+/- 5,930) = 1189 MB/s +test misc::match_class ... bench: 300 ns/iter (+/- 41) = 270 MB/s +test misc::match_class_in_range ... bench: 258 ns/iter (+/- 16) = 313 MB/s +test misc::match_class_unicode ... bench: 1,563 ns/iter (+/- 171) = 103 MB/s +test misc::medium_1K ... bench: 1,541 ns/iter (+/- 127) = 682 MB/s +test misc::medium_1MB ... bench: 617,650 ns/iter (+/- 59,618) = 1697 MB/s +test misc::medium_32 ... bench: 985 ns/iter (+/- 62) = 60 MB/s +test misc::medium_32K ... bench: 19,948 ns/iter (+/- 1,388) = 1644 MB/s +test misc::no_exponential ... bench: 430,777 ns/iter (+/- 52,435) +test misc::not_literal ... bench: 1,202 ns/iter (+/- 60) = 42 MB/s +test misc::one_pass_long_prefix ... bench: 630 ns/iter (+/- 45) = 41 MB/s +test misc::one_pass_long_prefix_not ... bench: 617 ns/iter (+/- 60) = 42 MB/s +test misc::one_pass_short ... bench: 1,102 ns/iter (+/- 38) = 15 MB/s +test misc::one_pass_short_not ... bench: 1,481 ns/iter (+/- 44) = 11 MB/s +test misc::reallyhard2_1K ... bench: 40,749 ns/iter (+/- 2,027) = 25 MB/s +test misc::reallyhard_1K ... bench: 18,987 ns/iter (+/- 1,419) = 55 MB/s +test misc::reallyhard_1MB ... bench: 19,923,786 ns/iter (+/- 1,499,750) = 52 MB/s +test misc::reallyhard_32 ... bench: 2,369 ns/iter (+/- 115) = 24 MB/s +test misc::reallyhard_32K ... bench: 627,664 ns/iter (+/- 30,507) = 52 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,933 ns/iter (+/- 700) = 1621 MB/s +test regexdna::find_new_lines ... bench: 28,886,666 ns/iter (+/- 1,969,669) = 175 MB/s +test regexdna::subst1 ... bench: 6,722,884 ns/iter (+/- 431,722) = 756 MB/s +test regexdna::subst10 ... bench: 6,923,833 ns/iter (+/- 677,840) = 734 MB/s +test regexdna::subst11 ... bench: 6,917,738 ns/iter (+/- 306,829) = 734 MB/s +test regexdna::subst2 ... bench: 6,914,593 ns/iter (+/- 625,342) = 735 MB/s +test regexdna::subst3 ... bench: 6,582,793 ns/iter (+/- 297,052) = 772 MB/s +test regexdna::subst4 ... bench: 6,528,804 ns/iter (+/- 463,331) = 778 MB/s +test regexdna::subst5 ... bench: 6,886,457 ns/iter (+/- 1,015,943) = 738 MB/s +test regexdna::subst6 ... bench: 6,789,493 ns/iter (+/- 573,137) = 748 MB/s +test regexdna::subst7 ... bench: 6,533,609 ns/iter (+/- 372,293) = 778 MB/s +test regexdna::subst8 ... bench: 6,536,845 ns/iter (+/- 290,249) = 777 MB/s +test regexdna::subst9 ... bench: 6,509,834 ns/iter (+/- 402,426) = 780 MB/s +test regexdna::variant1 ... bench: 5,746,639 ns/iter (+/- 205,103) = 884 MB/s +test regexdna::variant2 ... bench: 7,661,372 ns/iter (+/- 145,811) = 663 MB/s +test regexdna::variant3 ... bench: 12,801,668 ns/iter (+/- 337,572) = 397 MB/s +test regexdna::variant4 ... bench: 11,109,679 ns/iter (+/- 357,680) = 457 MB/s +test regexdna::variant5 ... bench: 11,238,093 ns/iter (+/- 1,571,929) = 452 MB/s +test regexdna::variant6 ... bench: 8,453,224 ns/iter (+/- 185,044) = 601 MB/s +test regexdna::variant7 ... bench: 8,784,446 ns/iter (+/- 153,626) = 578 MB/s +test regexdna::variant8 ... bench: 11,151,797 ns/iter (+/- 366,593) = 455 MB/s +test regexdna::variant9 ... bench: 22,206,248 ns/iter (+/- 1,143,965) = 228 MB/s +test sherlock::before_after_holmes ... bench: 23,458,512 ns/iter (+/- 1,982,069) = 25 MB/s +test sherlock::before_holmes ... bench: 23,040,796 ns/iter (+/- 688,881) = 25 MB/s +test sherlock::holmes_cochar_watson ... bench: 1,035,156 ns/iter (+/- 113,881) = 574 MB/s +test sherlock::holmes_coword_watson ... bench: 118,126,447 ns/iter (+/- 8,394,250) = 5 MB/s +test sherlock::ing_suffix ... bench: 16,122,434 ns/iter (+/- 236,636) = 36 MB/s +test sherlock::ing_suffix_limited_space ... bench: 22,239,435 ns/iter (+/- 364,604) = 26 MB/s +test sherlock::letters ... bench: 92,002,273 ns/iter (+/- 2,056,908) = 6 MB/s +test sherlock::letters_lower ... bench: 90,778,580 ns/iter (+/- 4,179,255) = 6 MB/s +test sherlock::letters_upper ... bench: 3,392,415 ns/iter (+/- 143,338) = 175 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 428,636 ns/iter (+/- 14,993) = 1387 MB/s +test sherlock::name_alt1 ... bench: 432,574 ns/iter (+/- 13,731) = 1375 MB/s +test sherlock::name_alt2 ... bench: 644,165 ns/iter (+/- 15,049) = 923 MB/s +test sherlock::name_alt3 ... bench: 1,176,979 ns/iter (+/- 105,694) = 505 MB/s +test sherlock::name_alt3_nocase ... bench: 2,054,990 ns/iter (+/- 91,909) = 289 MB/s +test sherlock::name_alt4 ... bench: 712,039 ns/iter (+/- 36,911) = 835 MB/s +test sherlock::name_alt4_nocase ... bench: 993,415 ns/iter (+/- 27,355) = 598 MB/s +test sherlock::name_alt5 ... bench: 757,045 ns/iter (+/- 29,126) = 785 MB/s +test sherlock::name_alt5_nocase ... bench: 953,821 ns/iter (+/- 37,252) = 623 MB/s +test sherlock::name_holmes ... bench: 186,801 ns/iter (+/- 6,676) = 3184 MB/s +test sherlock::name_holmes_nocase ... bench: 539,857 ns/iter (+/- 40,614) = 1102 MB/s +test sherlock::name_sherlock ... bench: 56,113 ns/iter (+/- 4,566) = 10602 MB/s +test sherlock::name_sherlock_holmes ... bench: 67,558 ns/iter (+/- 6,746) = 8806 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 429,123 ns/iter (+/- 51,647) = 1386 MB/s +test sherlock::name_sherlock_nocase ... bench: 396,070 ns/iter (+/- 33,934) = 1502 MB/s +test sherlock::name_whitespace ... bench: 84,630 ns/iter (+/- 6,285) = 7029 MB/s +test sherlock::no_match_common ... bench: 292,844 ns/iter (+/- 24,013) = 2031 MB/s +test sherlock::no_match_really_common ... bench: 290,986 ns/iter (+/- 10,163) = 2044 MB/s +test sherlock::no_match_uncommon ... bench: 14,041 ns/iter (+/- 599) = 42371 MB/s +test sherlock::quotes ... bench: 6,489,945 ns/iter (+/- 132,983) = 91 MB/s +test sherlock::repeated_class_negation ... bench: 49,479,000 ns/iter (+/- 965,144) = 12 MB/s +test sherlock::the_lower ... bench: 2,268,881 ns/iter (+/- 134,889) = 262 MB/s +test sherlock::the_nocase ... bench: 2,906,824 ns/iter (+/- 72,615) = 204 MB/s +test sherlock::the_upper ... bench: 211,138 ns/iter (+/- 9,935) = 2817 MB/s +test sherlock::the_whitespace ... bench: 3,488,249 ns/iter (+/- 254,294) = 170 MB/s +test sherlock::word_ending_n ... bench: 30,917,395 ns/iter (+/- 2,298,620) = 19 MB/s +test sherlock::words ... bench: 39,830,572 ns/iter (+/- 2,662,348) = 14 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 95 measured; 0 filtered out diff --git a/third_party/rust/regex/record/old-bench-log/06/dphobos-dmd-ct b/third_party/rust/regex/record/old-bench-log/06/dphobos-dmd-ct new file mode 100644 index 0000000000..426fa6ce2a --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/06/dphobos-dmd-ct @@ -0,0 +1,99 @@ + +running 94 tests +test misc::anchored_literal_long_match ... bench: 336 ns/iter (+/- 145) = 1160 MB/s +test misc::anchored_literal_long_non_match ... bench: 246 ns/iter (+/- 27) = 1585 MB/s +test misc::anchored_literal_short_match ... bench: 313 ns/iter (+/- 32) = 83 MB/s +test misc::anchored_literal_short_non_match ... bench: 248 ns/iter (+/- 31) = 104 MB/s +test misc::easy0_1K ... bench: 792 ns/iter (+/- 109) = 1327 MB/s +test misc::easy0_1MB ... bench: 24,706 ns/iter (+/- 812) = 42443 MB/s +test misc::easy0_32 ... bench: 793 ns/iter (+/- 77) = 74 MB/s +test misc::easy0_32K ... bench: 1,179 ns/iter (+/- 55) = 27815 MB/s +test misc::easy1_1K ... bench: 720 ns/iter (+/- 85) = 1450 MB/s +test misc::easy1_1MB ... bench: 24,647 ns/iter (+/- 761) = 42544 MB/s +test misc::easy1_32 ... bench: 717 ns/iter (+/- 28) = 72 MB/s +test misc::easy1_32K ... bench: 1,140 ns/iter (+/- 116) = 28761 MB/s +test misc::hard_1K ... bench: 19,153 ns/iter (+/- 2,063) = 54 MB/s +test misc::hard_1MB ... bench: 19,966,822 ns/iter (+/- 1,979,640) = 52 MB/s +test misc::hard_32 ... bench: 2,617 ns/iter (+/- 354) = 22 MB/s +test misc::hard_32K ... bench: 621,150 ns/iter (+/- 24,244) = 52 MB/s +test misc::literal ... bench: 194 ns/iter (+/- 28) = 262 MB/s +test misc::long_needle1 ... bench: 83,293 ns/iter (+/- 3,287) = 1200 MB/s +test misc::long_needle2 ... bench: 83,214 ns/iter (+/- 3,344) = 1201 MB/s +test misc::match_class ... bench: 301 ns/iter (+/- 38) = 269 MB/s +test misc::match_class_in_range ... bench: 258 ns/iter (+/- 27) = 313 MB/s +test misc::match_class_unicode ... bench: 1,565 ns/iter (+/- 187) = 102 MB/s +test misc::medium_1K ... bench: 1,572 ns/iter (+/- 230) = 669 MB/s +test misc::medium_1MB ... bench: 609,944 ns/iter (+/- 23,088) = 1719 MB/s +test misc::medium_32 ... bench: 980 ns/iter (+/- 112) = 61 MB/s +test misc::medium_32K ... bench: 20,058 ns/iter (+/- 884) = 1635 MB/s +test misc::not_literal ... bench: 1,218 ns/iter (+/- 67) = 41 MB/s +test misc::one_pass_long_prefix ... bench: 588 ns/iter (+/- 93) = 44 MB/s +test misc::one_pass_long_prefix_not ... bench: 595 ns/iter (+/- 77) = 43 MB/s +test misc::one_pass_short ... bench: 1,114 ns/iter (+/- 52) = 15 MB/s +test misc::one_pass_short_not ... bench: 1,481 ns/iter (+/- 183) = 11 MB/s +test misc::reallyhard2_1K ... bench: 40,858 ns/iter (+/- 1,860) = 25 MB/s +test misc::reallyhard_1K ... bench: 18,678 ns/iter (+/- 835) = 56 MB/s +test misc::reallyhard_1MB ... bench: 19,824,750 ns/iter (+/- 354,159) = 52 MB/s +test misc::reallyhard_32 ... bench: 2,340 ns/iter (+/- 68) = 25 MB/s +test misc::reallyhard_32K ... bench: 621,351 ns/iter (+/- 21,369) = 52 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,919 ns/iter (+/- 224) = 1626 MB/s +test regexdna::find_new_lines ... bench: 27,265,128 ns/iter (+/- 1,416,486) = 186 MB/s +test regexdna::subst1 ... bench: 6,414,636 ns/iter (+/- 696,943) = 792 MB/s +test regexdna::subst10 ... bench: 6,426,829 ns/iter (+/- 206,773) = 790 MB/s +test regexdna::subst11 ... bench: 6,435,800 ns/iter (+/- 439,175) = 789 MB/s +test regexdna::subst2 ... bench: 6,428,455 ns/iter (+/- 214,961) = 790 MB/s +test regexdna::subst3 ... bench: 6,428,692 ns/iter (+/- 681,910) = 790 MB/s +test regexdna::subst4 ... bench: 6,425,781 ns/iter (+/- 129,718) = 791 MB/s +test regexdna::subst5 ... bench: 6,414,376 ns/iter (+/- 151,827) = 792 MB/s +test regexdna::subst6 ... bench: 6,455,032 ns/iter (+/- 423,915) = 787 MB/s +test regexdna::subst7 ... bench: 6,668,649 ns/iter (+/- 686,734) = 762 MB/s +test regexdna::subst8 ... bench: 6,393,791 ns/iter (+/- 172,533) = 795 MB/s +test regexdna::subst9 ... bench: 6,426,100 ns/iter (+/- 175,951) = 791 MB/s +test regexdna::variant1 ... bench: 5,612,507 ns/iter (+/- 128,406) = 905 MB/s +test regexdna::variant2 ... bench: 7,572,661 ns/iter (+/- 159,047) = 671 MB/s +test regexdna::variant3 ... bench: 12,287,183 ns/iter (+/- 378,305) = 413 MB/s +test regexdna::variant4 ... bench: 11,223,976 ns/iter (+/- 1,191,250) = 452 MB/s +test regexdna::variant5 ... bench: 11,016,081 ns/iter (+/- 714,537) = 461 MB/s +test regexdna::variant6 ... bench: 8,198,798 ns/iter (+/- 471,338) = 620 MB/s +test regexdna::variant7 ... bench: 8,895,886 ns/iter (+/- 885,690) = 571 MB/s +test regexdna::variant8 ... bench: 11,000,942 ns/iter (+/- 886,538) = 462 MB/s +test regexdna::variant9 ... bench: 20,761,109 ns/iter (+/- 629,876) = 244 MB/s +test sherlock::before_after_holmes ... bench: 24,417,513 ns/iter (+/- 2,359,425) = 24 MB/s +test sherlock::before_holmes ... bench: 24,435,196 ns/iter (+/- 2,164,187) = 24 MB/s +test sherlock::holmes_cochar_watson ... bench: 1,025,780 ns/iter (+/- 121,876) = 579 MB/s +test sherlock::holmes_coword_watson ... bench: 122,988,753 ns/iter (+/- 7,606,302) = 4 MB/s +test sherlock::ing_suffix ... bench: 16,322,427 ns/iter (+/- 321,746) = 36 MB/s +test sherlock::ing_suffix_limited_space ... bench: 21,993,282 ns/iter (+/- 434,365) = 27 MB/s +test sherlock::letters ... bench: 88,877,258 ns/iter (+/- 504,024) = 6 MB/s +test sherlock::letters_lower ... bench: 87,709,419 ns/iter (+/- 659,859) = 6 MB/s +test sherlock::letters_upper ... bench: 3,299,811 ns/iter (+/- 78,850) = 180 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 431,548 ns/iter (+/- 54,716) = 1378 MB/s +test sherlock::name_alt1 ... bench: 470,052 ns/iter (+/- 49,985) = 1265 MB/s +test sherlock::name_alt2 ... bench: 705,694 ns/iter (+/- 45,724) = 843 MB/s +test sherlock::name_alt3 ... bench: 1,148,456 ns/iter (+/- 51,018) = 518 MB/s +test sherlock::name_alt3_nocase ... bench: 2,026,355 ns/iter (+/- 220,043) = 293 MB/s +test sherlock::name_alt4 ... bench: 699,625 ns/iter (+/- 40,361) = 850 MB/s +test sherlock::name_alt4_nocase ... bench: 979,151 ns/iter (+/- 41,460) = 607 MB/s +test sherlock::name_alt5 ... bench: 751,646 ns/iter (+/- 31,601) = 791 MB/s +test sherlock::name_alt5_nocase ... bench: 950,701 ns/iter (+/- 102,078) = 625 MB/s +test sherlock::name_holmes ... bench: 184,935 ns/iter (+/- 6,633) = 3216 MB/s +test sherlock::name_holmes_nocase ... bench: 532,703 ns/iter (+/- 33,919) = 1116 MB/s +test sherlock::name_sherlock ... bench: 55,468 ns/iter (+/- 1,776) = 10725 MB/s +test sherlock::name_sherlock_holmes ... bench: 67,327 ns/iter (+/- 5,464) = 8836 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 417,724 ns/iter (+/- 45,520) = 1424 MB/s +test sherlock::name_sherlock_nocase ... bench: 392,285 ns/iter (+/- 14,778) = 1516 MB/s +test sherlock::name_whitespace ... bench: 77,112 ns/iter (+/- 2,785) = 7715 MB/s +test sherlock::no_match_common ... bench: 291,222 ns/iter (+/- 10,477) = 2042 MB/s +test sherlock::no_match_really_common ... bench: 291,393 ns/iter (+/- 10,834) = 2041 MB/s +test sherlock::no_match_uncommon ... bench: 14,016 ns/iter (+/- 376) = 42446 MB/s +test sherlock::quotes ... bench: 6,557,639 ns/iter (+/- 158,929) = 90 MB/s +test sherlock::repeated_class_negation ... bench: 49,697,910 ns/iter (+/- 773,749) = 11 MB/s +test sherlock::the_lower ... bench: 2,236,055 ns/iter (+/- 72,024) = 266 MB/s +test sherlock::the_nocase ... bench: 2,892,430 ns/iter (+/- 89,222) = 205 MB/s +test sherlock::the_upper ... bench: 207,035 ns/iter (+/- 8,624) = 2873 MB/s +test sherlock::the_whitespace ... bench: 3,435,267 ns/iter (+/- 416,560) = 173 MB/s +test sherlock::word_ending_n ... bench: 31,751,871 ns/iter (+/- 374,472) = 18 MB/s +test sherlock::words ... bench: 38,793,659 ns/iter (+/- 3,022,370) = 15 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 94 measured; 0 filtered out + diff --git a/third_party/rust/regex/record/old-bench-log/06/dphobos-ldc b/third_party/rust/regex/record/old-bench-log/06/dphobos-ldc new file mode 100644 index 0000000000..29f5595c76 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/06/dphobos-ldc @@ -0,0 +1,100 @@ + +running 95 tests +test misc::anchored_literal_long_match ... bench: 203 ns/iter (+/- 13) = 1921 MB/s +test misc::anchored_literal_long_non_match ... bench: 126 ns/iter (+/- 5) = 3095 MB/s +test misc::anchored_literal_short_match ... bench: 204 ns/iter (+/- 4) = 127 MB/s +test misc::anchored_literal_short_non_match ... bench: 127 ns/iter (+/- 8) = 204 MB/s +test misc::easy0_1K ... bench: 571 ns/iter (+/- 44) = 1840 MB/s +test misc::easy0_1MB ... bench: 25,321 ns/iter (+/- 421) = 41412 MB/s +test misc::easy0_32 ... bench: 553 ns/iter (+/- 9) = 106 MB/s +test misc::easy0_32K ... bench: 971 ns/iter (+/- 29) = 33774 MB/s +test misc::easy1_1K ... bench: 508 ns/iter (+/- 22) = 2055 MB/s +test misc::easy1_1MB ... bench: 24,181 ns/iter (+/- 704) = 43364 MB/s +test misc::easy1_32 ... bench: 494 ns/iter (+/- 14) = 105 MB/s +test misc::easy1_32K ... bench: 892 ns/iter (+/- 82) = 36757 MB/s +test misc::hard_1K ... bench: 15,335 ns/iter (+/- 1,224) = 68 MB/s +test misc::hard_1MB ... bench: 16,105,838 ns/iter (+/- 319,567) = 65 MB/s +test misc::hard_32 ... bench: 1,798 ns/iter (+/- 79) = 32 MB/s +test misc::hard_32K ... bench: 504,123 ns/iter (+/- 44,829) = 65 MB/s +test misc::literal ... bench: 74 ns/iter (+/- 9) = 689 MB/s +test misc::long_needle1 ... bench: 56,853 ns/iter (+/- 3,662) = 1758 MB/s +test misc::long_needle2 ... bench: 57,038 ns/iter (+/- 2,532) = 1753 MB/s +test misc::match_class ... bench: 140 ns/iter (+/- 15) = 578 MB/s +test misc::match_class_in_range ... bench: 126 ns/iter (+/- 17) = 642 MB/s +test misc::match_class_unicode ... bench: 1,407 ns/iter (+/- 122) = 114 MB/s +test misc::medium_1K ... bench: 1,199 ns/iter (+/- 80) = 877 MB/s +test misc::medium_1MB ... bench: 558,323 ns/iter (+/- 20,908) = 1878 MB/s +test misc::medium_32 ... bench: 661 ns/iter (+/- 30) = 90 MB/s +test misc::medium_32K ... bench: 18,148 ns/iter (+/- 1,038) = 1807 MB/s +test misc::no_exponential ... bench: 334,786 ns/iter (+/- 18,234) +test misc::not_literal ... bench: 1,347 ns/iter (+/- 49) = 37 MB/s +test misc::one_pass_long_prefix ... bench: 499 ns/iter (+/- 59) = 52 MB/s +test misc::one_pass_long_prefix_not ... bench: 522 ns/iter (+/- 64) = 49 MB/s +test misc::one_pass_short ... bench: 804 ns/iter (+/- 37) = 21 MB/s +test misc::one_pass_short_not ... bench: 1,260 ns/iter (+/- 130) = 13 MB/s +test misc::reallyhard2_1K ... bench: 37,726 ns/iter (+/- 1,284) = 27 MB/s +test misc::reallyhard_1K ... bench: 15,246 ns/iter (+/- 901) = 68 MB/s +test misc::reallyhard_1MB ... bench: 16,187,692 ns/iter (+/- 1,552,760) = 64 MB/s +test misc::reallyhard_32 ... bench: 1,882 ns/iter (+/- 237) = 31 MB/s +test misc::reallyhard_32K ... bench: 541,567 ns/iter (+/- 64,929) = 60 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,576 ns/iter (+/- 185) = 1748 MB/s +test regexdna::find_new_lines ... bench: 14,744,849 ns/iter (+/- 1,141,621) = 344 MB/s +test regexdna::subst1 ... bench: 2,801,370 ns/iter (+/- 105,875) = 1814 MB/s +test regexdna::subst10 ... bench: 3,015,410 ns/iter (+/- 446,982) = 1685 MB/s +test regexdna::subst11 ... bench: 2,923,557 ns/iter (+/- 193,230) = 1738 MB/s +test regexdna::subst2 ... bench: 2,948,002 ns/iter (+/- 306,203) = 1724 MB/s +test regexdna::subst3 ... bench: 2,899,076 ns/iter (+/- 174,958) = 1753 MB/s +test regexdna::subst4 ... bench: 2,908,685 ns/iter (+/- 221,436) = 1747 MB/s +test regexdna::subst5 ... bench: 3,780,044 ns/iter (+/- 150,740) = 1344 MB/s +test regexdna::subst6 ... bench: 2,920,193 ns/iter (+/- 142,191) = 1740 MB/s +test regexdna::subst7 ... bench: 2,918,785 ns/iter (+/- 175,109) = 1741 MB/s +test regexdna::subst8 ... bench: 2,932,075 ns/iter (+/- 152,745) = 1733 MB/s +test regexdna::subst9 ... bench: 2,914,694 ns/iter (+/- 176,327) = 1744 MB/s +test regexdna::variant1 ... bench: 5,172,617 ns/iter (+/- 269,855) = 982 MB/s +test regexdna::variant2 ... bench: 6,770,702 ns/iter (+/- 474,076) = 750 MB/s +test regexdna::variant3 ... bench: 11,124,754 ns/iter (+/- 649,591) = 456 MB/s +test regexdna::variant4 ... bench: 9,751,982 ns/iter (+/- 460,679) = 521 MB/s +test regexdna::variant5 ... bench: 9,791,229 ns/iter (+/- 461,486) = 519 MB/s +test regexdna::variant6 ... bench: 7,417,031 ns/iter (+/- 275,225) = 685 MB/s +test regexdna::variant7 ... bench: 7,873,097 ns/iter (+/- 451,115) = 645 MB/s +test regexdna::variant8 ... bench: 9,707,683 ns/iter (+/- 418,865) = 523 MB/s +test regexdna::variant9 ... bench: 18,696,520 ns/iter (+/- 742,018) = 271 MB/s +test sherlock::before_after_holmes ... bench: 22,314,084 ns/iter (+/- 888,249) = 26 MB/s +test sherlock::before_holmes ... bench: 22,501,540 ns/iter (+/- 892,027) = 26 MB/s +test sherlock::holmes_cochar_watson ... bench: 929,372 ns/iter (+/- 46,859) = 640 MB/s +test sherlock::holmes_coword_watson ... bench: 125,548,613 ns/iter (+/- 3,297,687) = 4 MB/s +test sherlock::ing_suffix ... bench: 18,023,803 ns/iter (+/- 1,079,960) = 33 MB/s +test sherlock::ing_suffix_limited_space ... bench: 21,809,497 ns/iter (+/- 1,259,989) = 27 MB/s +test sherlock::letters ... bench: 39,512,315 ns/iter (+/- 3,309,084) = 15 MB/s +test sherlock::letters_lower ... bench: 37,160,354 ns/iter (+/- 3,084,525) = 16 MB/s +test sherlock::letters_upper ... bench: 1,721,867 ns/iter (+/- 66,812) = 345 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 378,307 ns/iter (+/- 30,914) = 1572 MB/s +test sherlock::name_alt1 ... bench: 381,242 ns/iter (+/- 41,954) = 1560 MB/s +test sherlock::name_alt2 ... bench: 503,558 ns/iter (+/- 46,044) = 1181 MB/s +test sherlock::name_alt3 ... bench: 912,340 ns/iter (+/- 79,787) = 652 MB/s +test sherlock::name_alt3_nocase ... bench: 1,515,048 ns/iter (+/- 74,623) = 392 MB/s +test sherlock::name_alt4 ... bench: 580,652 ns/iter (+/- 60,407) = 1024 MB/s +test sherlock::name_alt4_nocase ... bench: 826,866 ns/iter (+/- 58,485) = 719 MB/s +test sherlock::name_alt5 ... bench: 651,281 ns/iter (+/- 64,134) = 913 MB/s +test sherlock::name_alt5_nocase ... bench: 808,974 ns/iter (+/- 49,119) = 735 MB/s +test sherlock::name_holmes ... bench: 120,010 ns/iter (+/- 9,458) = 4957 MB/s +test sherlock::name_holmes_nocase ... bench: 441,316 ns/iter (+/- 56,990) = 1348 MB/s +test sherlock::name_sherlock ... bench: 39,935 ns/iter (+/- 4,078) = 14897 MB/s +test sherlock::name_sherlock_holmes ... bench: 49,126 ns/iter (+/- 3,082) = 12110 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 366,865 ns/iter (+/- 18,520) = 1621 MB/s +test sherlock::name_sherlock_nocase ... bench: 349,337 ns/iter (+/- 18,365) = 1703 MB/s +test sherlock::name_whitespace ... bench: 57,076 ns/iter (+/- 6,314) = 10423 MB/s +test sherlock::no_match_common ... bench: 291,022 ns/iter (+/- 30,143) = 2044 MB/s +test sherlock::no_match_really_common ... bench: 286,214 ns/iter (+/- 15,722) = 2078 MB/s +test sherlock::no_match_uncommon ... bench: 13,963 ns/iter (+/- 759) = 42607 MB/s +test sherlock::quotes ... bench: 5,580,378 ns/iter (+/- 295,941) = 106 MB/s +test sherlock::repeated_class_negation ... bench: 52,797,981 ns/iter (+/- 2,731,805) = 11 MB/s +test sherlock::the_lower ... bench: 1,295,105 ns/iter (+/- 62,365) = 459 MB/s +test sherlock::the_nocase ... bench: 1,620,713 ns/iter (+/- 73,503) = 367 MB/s +test sherlock::the_upper ... bench: 112,911 ns/iter (+/- 5,843) = 5269 MB/s +test sherlock::the_whitespace ... bench: 2,441,986 ns/iter (+/- 133,012) = 243 MB/s +test sherlock::word_ending_n ... bench: 26,478,327 ns/iter (+/- 1,361,757) = 22 MB/s +test sherlock::words ... bench: 23,948,872 ns/iter (+/- 2,323,993) = 24 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 95 measured; 0 filtered out + diff --git a/third_party/rust/regex/record/old-bench-log/06/dphobos-ldc-ct b/third_party/rust/regex/record/old-bench-log/06/dphobos-ldc-ct new file mode 100644 index 0000000000..6aaa5de709 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/06/dphobos-ldc-ct @@ -0,0 +1,99 @@ + +running 94 tests +test misc::anchored_literal_long_match ... bench: 189 ns/iter (+/- 23) = 2063 MB/s +test misc::anchored_literal_long_non_match ... bench: 128 ns/iter (+/- 14) = 3046 MB/s +test misc::anchored_literal_short_match ... bench: 191 ns/iter (+/- 20) = 136 MB/s +test misc::anchored_literal_short_non_match ... bench: 120 ns/iter (+/- 13) = 216 MB/s +test misc::easy0_1K ... bench: 536 ns/iter (+/- 49) = 1960 MB/s +test misc::easy0_1MB ... bench: 24,516 ns/iter (+/- 2,181) = 42772 MB/s +test misc::easy0_32 ... bench: 551 ns/iter (+/- 36) = 107 MB/s +test misc::easy0_32K ... bench: 961 ns/iter (+/- 105) = 34125 MB/s +test misc::easy1_1K ... bench: 518 ns/iter (+/- 59) = 2015 MB/s +test misc::easy1_1MB ... bench: 25,352 ns/iter (+/- 2,847) = 41361 MB/s +test misc::easy1_32 ... bench: 501 ns/iter (+/- 42) = 103 MB/s +test misc::easy1_32K ... bench: 919 ns/iter (+/- 69) = 35677 MB/s +test misc::hard_1K ... bench: 16,146 ns/iter (+/- 1,124) = 65 MB/s +test misc::hard_1MB ... bench: 16,482,695 ns/iter (+/- 805,077) = 63 MB/s +test misc::hard_32 ... bench: 1,807 ns/iter (+/- 173) = 32 MB/s +test misc::hard_32K ... bench: 516,772 ns/iter (+/- 33,884) = 63 MB/s +test misc::literal ... bench: 77 ns/iter (+/- 9) = 662 MB/s +test misc::long_needle1 ... bench: 56,900 ns/iter (+/- 3,087) = 1757 MB/s +test misc::long_needle2 ... bench: 57,364 ns/iter (+/- 4,166) = 1743 MB/s +test misc::match_class ... bench: 156 ns/iter (+/- 21) = 519 MB/s +test misc::match_class_in_range ... bench: 121 ns/iter (+/- 12) = 669 MB/s +test misc::match_class_unicode ... bench: 1,515 ns/iter (+/- 207) = 106 MB/s +test misc::medium_1K ... bench: 1,186 ns/iter (+/- 120) = 887 MB/s +test misc::medium_1MB ... bench: 559,677 ns/iter (+/- 59,284) = 1873 MB/s +test misc::medium_32 ... bench: 657 ns/iter (+/- 86) = 91 MB/s +test misc::medium_32K ... bench: 18,142 ns/iter (+/- 915) = 1807 MB/s +test misc::not_literal ... bench: 1,319 ns/iter (+/- 128) = 38 MB/s +test misc::one_pass_long_prefix ... bench: 509 ns/iter (+/- 56) = 51 MB/s +test misc::one_pass_long_prefix_not ... bench: 517 ns/iter (+/- 38) = 50 MB/s +test misc::one_pass_short ... bench: 783 ns/iter (+/- 83) = 21 MB/s +test misc::one_pass_short_not ... bench: 1,239 ns/iter (+/- 98) = 13 MB/s +test misc::reallyhard2_1K ... bench: 40,580 ns/iter (+/- 3,041) = 25 MB/s +test misc::reallyhard_1K ... bench: 15,162 ns/iter (+/- 652) = 69 MB/s +test misc::reallyhard_1MB ... bench: 16,065,920 ns/iter (+/- 886,245) = 65 MB/s +test misc::reallyhard_32 ... bench: 1,829 ns/iter (+/- 90) = 32 MB/s +test misc::reallyhard_32K ... bench: 520,572 ns/iter (+/- 88,290) = 62 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,423 ns/iter (+/- 493) = 1808 MB/s +test regexdna::find_new_lines ... bench: 14,658,357 ns/iter (+/- 1,784,941) = 346 MB/s +test regexdna::subst1 ... bench: 2,984,959 ns/iter (+/- 422,186) = 1703 MB/s +test regexdna::subst10 ... bench: 2,836,747 ns/iter (+/- 274,300) = 1791 MB/s +test regexdna::subst11 ... bench: 2,809,880 ns/iter (+/- 309,516) = 1809 MB/s +test regexdna::subst2 ... bench: 2,868,765 ns/iter (+/- 435,511) = 1771 MB/s +test regexdna::subst3 ... bench: 2,837,000 ns/iter (+/- 319,135) = 1791 MB/s +test regexdna::subst4 ... bench: 2,856,540 ns/iter (+/- 320,458) = 1779 MB/s +test regexdna::subst5 ... bench: 2,820,953 ns/iter (+/- 340,996) = 1802 MB/s +test regexdna::subst6 ... bench: 3,588,607 ns/iter (+/- 462,158) = 1416 MB/s +test regexdna::subst7 ... bench: 2,896,235 ns/iter (+/- 165,525) = 1755 MB/s +test regexdna::subst8 ... bench: 2,982,961 ns/iter (+/- 315,768) = 1704 MB/s +test regexdna::subst9 ... bench: 3,024,311 ns/iter (+/- 300,274) = 1680 MB/s +test regexdna::variant1 ... bench: 5,234,342 ns/iter (+/- 269,577) = 971 MB/s +test regexdna::variant2 ... bench: 6,463,683 ns/iter (+/- 532,663) = 786 MB/s +test regexdna::variant3 ... bench: 10,720,523 ns/iter (+/- 414,684) = 474 MB/s +test regexdna::variant4 ... bench: 9,882,647 ns/iter (+/- 297,904) = 514 MB/s +test regexdna::variant5 ... bench: 9,664,151 ns/iter (+/- 659,587) = 526 MB/s +test regexdna::variant6 ... bench: 7,174,368 ns/iter (+/- 322,025) = 708 MB/s +test regexdna::variant7 ... bench: 7,605,668 ns/iter (+/- 411,605) = 668 MB/s +test regexdna::variant8 ... bench: 9,580,481 ns/iter (+/- 373,332) = 530 MB/s +test regexdna::variant9 ... bench: 18,270,186 ns/iter (+/- 986,510) = 278 MB/s +test sherlock::before_after_holmes ... bench: 21,982,853 ns/iter (+/- 1,032,853) = 27 MB/s +test sherlock::before_holmes ... bench: 21,947,949 ns/iter (+/- 848,014) = 27 MB/s +test sherlock::holmes_cochar_watson ... bench: 909,691 ns/iter (+/- 48,847) = 653 MB/s +test sherlock::holmes_coword_watson ... bench: 124,771,191 ns/iter (+/- 8,084,768) = 4 MB/s +test sherlock::ing_suffix ... bench: 17,864,129 ns/iter (+/- 1,343,114) = 33 MB/s +test sherlock::ing_suffix_limited_space ... bench: 21,009,249 ns/iter (+/- 452,676) = 28 MB/s +test sherlock::letters ... bench: 37,888,421 ns/iter (+/- 2,482,541) = 15 MB/s +test sherlock::letters_lower ... bench: 37,029,883 ns/iter (+/- 481,280) = 16 MB/s +test sherlock::letters_upper ... bench: 1,627,107 ns/iter (+/- 51,063) = 365 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 370,850 ns/iter (+/- 15,384) = 1604 MB/s +test sherlock::name_alt1 ... bench: 371,780 ns/iter (+/- 28,486) = 1600 MB/s +test sherlock::name_alt2 ... bench: 506,859 ns/iter (+/- 17,553) = 1173 MB/s +test sherlock::name_alt3 ... bench: 915,729 ns/iter (+/- 99,429) = 649 MB/s +test sherlock::name_alt3_nocase ... bench: 1,512,050 ns/iter (+/- 186,130) = 393 MB/s +test sherlock::name_alt4 ... bench: 578,710 ns/iter (+/- 18,089) = 1028 MB/s +test sherlock::name_alt4_nocase ... bench: 752,912 ns/iter (+/- 51,342) = 790 MB/s +test sherlock::name_alt5 ... bench: 595,803 ns/iter (+/- 15,053) = 998 MB/s +test sherlock::name_alt5_nocase ... bench: 730,149 ns/iter (+/- 40,662) = 814 MB/s +test sherlock::name_holmes ... bench: 115,596 ns/iter (+/- 4,597) = 5146 MB/s +test sherlock::name_holmes_nocase ... bench: 429,765 ns/iter (+/- 16,685) = 1384 MB/s +test sherlock::name_sherlock ... bench: 38,985 ns/iter (+/- 2,195) = 15260 MB/s +test sherlock::name_sherlock_holmes ... bench: 49,610 ns/iter (+/- 2,005) = 11992 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 374,335 ns/iter (+/- 37,062) = 1589 MB/s +test sherlock::name_sherlock_nocase ... bench: 355,559 ns/iter (+/- 40,873) = 1673 MB/s +test sherlock::name_whitespace ... bench: 57,616 ns/iter (+/- 5,124) = 10325 MB/s +test sherlock::no_match_common ... bench: 284,228 ns/iter (+/- 29,087) = 2093 MB/s +test sherlock::no_match_really_common ... bench: 287,263 ns/iter (+/- 22,755) = 2071 MB/s +test sherlock::no_match_uncommon ... bench: 14,030 ns/iter (+/- 526) = 42404 MB/s +test sherlock::quotes ... bench: 5,563,019 ns/iter (+/- 537,611) = 106 MB/s +test sherlock::repeated_class_negation ... bench: 54,831,275 ns/iter (+/- 5,982,214) = 10 MB/s +test sherlock::the_lower ... bench: 1,298,205 ns/iter (+/- 73,265) = 458 MB/s +test sherlock::the_nocase ... bench: 1,572,579 ns/iter (+/- 63,536) = 378 MB/s +test sherlock::the_upper ... bench: 112,795 ns/iter (+/- 4,179) = 5274 MB/s +test sherlock::the_whitespace ... bench: 2,630,026 ns/iter (+/- 227,760) = 226 MB/s +test sherlock::word_ending_n ... bench: 26,975,356 ns/iter (+/- 2,531,982) = 22 MB/s +test sherlock::words ... bench: 23,116,326 ns/iter (+/- 458,721) = 25 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 94 measured; 0 filtered out + diff --git a/third_party/rust/regex/record/old-bench-log/06/pcre1 b/third_party/rust/regex/record/old-bench-log/06/pcre1 new file mode 100644 index 0000000000..f8a9100e1a --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/06/pcre1 @@ -0,0 +1,98 @@ + +running 93 tests +test misc::anchored_literal_long_match ... bench: 32 ns/iter (+/- 38) = 12187 MB/s +test misc::anchored_literal_long_non_match ... bench: 23 ns/iter (+/- 1) = 16956 MB/s +test misc::anchored_literal_short_match ... bench: 30 ns/iter (+/- 1) = 866 MB/s +test misc::anchored_literal_short_non_match ... bench: 23 ns/iter (+/- 0) = 1130 MB/s +test misc::easy0_1K ... bench: 261 ns/iter (+/- 21) = 4026 MB/s +test misc::easy0_1MB ... bench: 202,218 ns/iter (+/- 16,050) = 5185 MB/s +test misc::easy0_32 ... bench: 49 ns/iter (+/- 3) = 1204 MB/s +test misc::easy0_32K ... bench: 6,305 ns/iter (+/- 448) = 5201 MB/s +test misc::easy1_1K ... bench: 245 ns/iter (+/- 5) = 4261 MB/s +test misc::easy1_1MB ... bench: 198,215 ns/iter (+/- 10,461) = 5290 MB/s +test misc::easy1_32 ... bench: 49 ns/iter (+/- 1) = 1061 MB/s +test misc::easy1_32K ... bench: 6,309 ns/iter (+/- 358) = 5197 MB/s +test misc::hard_1K ... bench: 1,306 ns/iter (+/- 50) = 804 MB/s +test misc::hard_1MB ... bench: 1,219,034 ns/iter (+/- 92,693) = 860 MB/s +test misc::hard_32 ... bench: 95 ns/iter (+/- 7) = 621 MB/s +test misc::hard_32K ... bench: 37,713 ns/iter (+/- 948) = 869 MB/s +test misc::literal ... bench: 29 ns/iter (+/- 1) = 1758 MB/s +test misc::long_needle1 ... bench: 548,012 ns/iter (+/- 26,029) = 182 MB/s +test misc::long_needle2 ... bench: 538,536 ns/iter (+/- 54,612) = 185 MB/s +test misc::match_class ... bench: 94 ns/iter (+/- 3) = 861 MB/s +test misc::match_class_in_range ... bench: 29 ns/iter (+/- 1) = 2793 MB/s +test misc::match_class_unicode ... bench: 370 ns/iter (+/- 19) = 435 MB/s +test misc::medium_1K ... bench: 256 ns/iter (+/- 13) = 4109 MB/s +test misc::medium_1MB ... bench: 207,655 ns/iter (+/- 9,168) = 5049 MB/s +test misc::medium_32 ... bench: 51 ns/iter (+/- 5) = 1176 MB/s +test misc::medium_32K ... bench: 6,144 ns/iter (+/- 327) = 5337 MB/s +test misc::not_literal ... bench: 166 ns/iter (+/- 14) = 307 MB/s +test misc::one_pass_long_prefix ... bench: 27 ns/iter (+/- 2) = 962 MB/s +test misc::one_pass_long_prefix_not ... bench: 29 ns/iter (+/- 1) = 896 MB/s +test misc::one_pass_short ... bench: 55 ns/iter (+/- 2) = 309 MB/s +test misc::one_pass_short_not ... bench: 55 ns/iter (+/- 3) = 309 MB/s +test misc::reallyhard2_1K ... bench: 4,404 ns/iter (+/- 346) = 236 MB/s +test misc::reallyhard_1K ... bench: 1,365 ns/iter (+/- 52) = 769 MB/s +test misc::reallyhard_1MB ... bench: 1,118,777 ns/iter (+/- 72,209) = 937 MB/s +test misc::reallyhard_32 ... bench: 112 ns/iter (+/- 4) = 526 MB/s +test misc::reallyhard_32K ... bench: 41,164 ns/iter (+/- 2,351) = 796 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,413 ns/iter (+/- 304) = 1812 MB/s +test regexdna::find_new_lines ... bench: 2,802,109 ns/iter (+/- 129,768) = 1814 MB/s +test regexdna::subst1 ... bench: 1,263,401 ns/iter (+/- 54,374) = 4023 MB/s +test regexdna::subst10 ... bench: 1,254,544 ns/iter (+/- 101,656) = 4051 MB/s +test regexdna::subst11 ... bench: 1,408,321 ns/iter (+/- 121,522) = 3609 MB/s +test regexdna::subst2 ... bench: 1,364,704 ns/iter (+/- 106,508) = 3724 MB/s +test regexdna::subst3 ... bench: 1,258,687 ns/iter (+/- 84,504) = 4038 MB/s +test regexdna::subst4 ... bench: 1,301,822 ns/iter (+/- 62,866) = 3904 MB/s +test regexdna::subst5 ... bench: 1,338,338 ns/iter (+/- 313,996) = 3798 MB/s +test regexdna::subst6 ... bench: 1,349,310 ns/iter (+/- 117,181) = 3767 MB/s +test regexdna::subst7 ... bench: 1,390,090 ns/iter (+/- 210,430) = 3656 MB/s +test regexdna::subst8 ... bench: 1,293,481 ns/iter (+/- 38,532) = 3930 MB/s +test regexdna::subst9 ... bench: 1,245,652 ns/iter (+/- 58,026) = 4080 MB/s +test regexdna::variant1 ... bench: 15,239,324 ns/iter (+/- 414,621) = 333 MB/s +test regexdna::variant2 ... bench: 16,489,922 ns/iter (+/- 825,229) = 308 MB/s +test regexdna::variant3 ... bench: 19,945,871 ns/iter (+/- 665,046) = 254 MB/s +test regexdna::variant4 ... bench: 18,604,011 ns/iter (+/- 712,670) = 273 MB/s +test regexdna::variant5 ... bench: 17,084,919 ns/iter (+/- 1,379,879) = 297 MB/s +test regexdna::variant6 ... bench: 16,918,130 ns/iter (+/- 975,620) = 300 MB/s +test regexdna::variant7 ... bench: 19,114,194 ns/iter (+/- 857,330) = 265 MB/s +test regexdna::variant8 ... bench: 23,831,138 ns/iter (+/- 878,576) = 213 MB/s +test regexdna::variant9 ... bench: 21,835,777 ns/iter (+/- 1,339,143) = 232 MB/s +test sherlock::before_after_holmes ... bench: 4,401,834 ns/iter (+/- 218,696) = 135 MB/s +test sherlock::before_holmes ... bench: 4,436,717 ns/iter (+/- 109,324) = 134 MB/s +test sherlock::holmes_cochar_watson ... bench: 497,667 ns/iter (+/- 19,212) = 1195 MB/s +test sherlock::ing_suffix ... bench: 1,852,390 ns/iter (+/- 77,888) = 321 MB/s +test sherlock::ing_suffix_limited_space ... bench: 4,775,078 ns/iter (+/- 152,556) = 124 MB/s +test sherlock::letters ... bench: 13,888,750 ns/iter (+/- 668,831) = 42 MB/s +test sherlock::letters_lower ... bench: 13,452,405 ns/iter (+/- 453,184) = 44 MB/s +test sherlock::letters_upper ... bench: 1,870,502 ns/iter (+/- 57,825) = 318 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 198,649 ns/iter (+/- 9,080) = 2994 MB/s +test sherlock::name_alt1 ... bench: 464,513 ns/iter (+/- 29,935) = 1280 MB/s +test sherlock::name_alt2 ... bench: 470,746 ns/iter (+/- 12,931) = 1263 MB/s +test sherlock::name_alt3 ... bench: 874,352 ns/iter (+/- 38,618) = 680 MB/s +test sherlock::name_alt3_nocase ... bench: 2,821,106 ns/iter (+/- 113,055) = 210 MB/s +test sherlock::name_alt4 ... bench: 78,753 ns/iter (+/- 3,111) = 7554 MB/s +test sherlock::name_alt4_nocase ... bench: 1,596,406 ns/iter (+/- 62,919) = 372 MB/s +test sherlock::name_alt5 ... bench: 655,870 ns/iter (+/- 32,597) = 907 MB/s +test sherlock::name_alt5_nocase ... bench: 1,732,595 ns/iter (+/- 75,827) = 343 MB/s +test sherlock::name_holmes ... bench: 400,037 ns/iter (+/- 16,935) = 1487 MB/s +test sherlock::name_holmes_nocase ... bench: 501,467 ns/iter (+/- 20,805) = 1186 MB/s +test sherlock::name_sherlock ... bench: 267,873 ns/iter (+/- 10,199) = 2220 MB/s +test sherlock::name_sherlock_holmes ... bench: 202,107 ns/iter (+/- 10,314) = 2943 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,070,780 ns/iter (+/- 43,144) = 555 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,074,139 ns/iter (+/- 48,270) = 553 MB/s +test sherlock::name_whitespace ... bench: 271,978 ns/iter (+/- 10,137) = 2187 MB/s +test sherlock::no_match_common ... bench: 411,484 ns/iter (+/- 13,213) = 1445 MB/s +test sherlock::no_match_really_common ... bench: 403,709 ns/iter (+/- 12,415) = 1473 MB/s +test sherlock::no_match_uncommon ... bench: 27,730 ns/iter (+/- 928) = 21454 MB/s +test sherlock::quotes ... bench: 515,141 ns/iter (+/- 17,799) = 1154 MB/s +test sherlock::repeated_class_negation ... bench: 5,842,243 ns/iter (+/- 282,478) = 101 MB/s +test sherlock::the_lower ... bench: 725,059 ns/iter (+/- 36,233) = 820 MB/s +test sherlock::the_nocase ... bench: 812,888 ns/iter (+/- 34,200) = 731 MB/s +test sherlock::the_upper ... bench: 56,746 ns/iter (+/- 2,186) = 10484 MB/s +test sherlock::the_whitespace ... bench: 920,705 ns/iter (+/- 37,325) = 646 MB/s +test sherlock::word_ending_n ... bench: 5,625,614 ns/iter (+/- 199,408) = 105 MB/s +test sherlock::words ... bench: 7,122,561 ns/iter (+/- 161,013) = 83 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 93 measured; 0 filtered out + diff --git a/third_party/rust/regex/record/old-bench-log/06/pcre2 b/third_party/rust/regex/record/old-bench-log/06/pcre2 new file mode 100644 index 0000000000..518530113f --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/06/pcre2 @@ -0,0 +1,98 @@ + +running 93 tests +test misc::anchored_literal_long_match ... bench: 16 ns/iter (+/- 0) = 24375 MB/s +test misc::anchored_literal_long_non_match ... bench: 13 ns/iter (+/- 1) = 30000 MB/s +test misc::anchored_literal_short_match ... bench: 16 ns/iter (+/- 1) = 1625 MB/s +test misc::anchored_literal_short_non_match ... bench: 13 ns/iter (+/- 1) = 2000 MB/s +test misc::easy0_1K ... bench: 104 ns/iter (+/- 5) = 10105 MB/s +test misc::easy0_1MB ... bench: 64,102 ns/iter (+/- 4,103) = 16358 MB/s +test misc::easy0_32 ... bench: 32 ns/iter (+/- 4) = 1843 MB/s +test misc::easy0_32K ... bench: 2,042 ns/iter (+/- 152) = 16060 MB/s +test misc::easy1_1K ... bench: 102 ns/iter (+/- 11) = 10235 MB/s +test misc::easy1_1MB ... bench: 63,117 ns/iter (+/- 4,547) = 16613 MB/s +test misc::easy1_32 ... bench: 33 ns/iter (+/- 4) = 1575 MB/s +test misc::easy1_32K ... bench: 2,019 ns/iter (+/- 181) = 16239 MB/s +test misc::hard_1K ... bench: 1,236 ns/iter (+/- 82) = 850 MB/s +test misc::hard_1MB ... bench: 1,041,354 ns/iter (+/- 39,123) = 1006 MB/s +test misc::hard_32 ... bench: 86 ns/iter (+/- 8) = 686 MB/s +test misc::hard_32K ... bench: 33,054 ns/iter (+/- 1,813) = 992 MB/s +test misc::literal ... bench: 20 ns/iter (+/- 2) = 2550 MB/s +test misc::long_needle1 ... bench: 501,732 ns/iter (+/- 52,173) = 199 MB/s +test misc::long_needle2 ... bench: 515,127 ns/iter (+/- 48,790) = 194 MB/s +test misc::match_class ... bench: 55 ns/iter (+/- 7) = 1472 MB/s +test misc::match_class_in_range ... bench: 19 ns/iter (+/- 2) = 4263 MB/s +test misc::match_class_unicode ... bench: 342 ns/iter (+/- 60) = 470 MB/s +test misc::medium_1K ... bench: 106 ns/iter (+/- 4) = 9924 MB/s +test misc::medium_1MB ... bench: 63,011 ns/iter (+/- 4,942) = 16641 MB/s +test misc::medium_32 ... bench: 32 ns/iter (+/- 3) = 1875 MB/s +test misc::medium_32K ... bench: 2,068 ns/iter (+/- 189) = 15858 MB/s +test misc::not_literal ... bench: 147 ns/iter (+/- 13) = 346 MB/s +test misc::one_pass_long_prefix ... bench: 15 ns/iter (+/- 1) = 1733 MB/s +test misc::one_pass_long_prefix_not ... bench: 15 ns/iter (+/- 1) = 1733 MB/s +test misc::one_pass_short ... bench: 42 ns/iter (+/- 3) = 404 MB/s +test misc::one_pass_short_not ... bench: 43 ns/iter (+/- 5) = 395 MB/s +test misc::reallyhard2_1K ... bench: 4,356 ns/iter (+/- 499) = 238 MB/s +test misc::reallyhard_1K ... bench: 1,196 ns/iter (+/- 113) = 878 MB/s +test misc::reallyhard_1MB ... bench: 1,070,155 ns/iter (+/- 90,895) = 979 MB/s +test misc::reallyhard_32 ... bench: 93 ns/iter (+/- 12) = 634 MB/s +test misc::reallyhard_32K ... bench: 33,521 ns/iter (+/- 2,663) = 978 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 3,065 ns/iter (+/- 393) = 2610 MB/s +test regexdna::find_new_lines ... bench: 1,891,736 ns/iter (+/- 232,990) = 2687 MB/s +test regexdna::subst1 ... bench: 920,853 ns/iter (+/- 75,276) = 5520 MB/s +test regexdna::subst10 ... bench: 892,533 ns/iter (+/- 77,177) = 5695 MB/s +test regexdna::subst11 ... bench: 869,335 ns/iter (+/- 75,754) = 5847 MB/s +test regexdna::subst2 ... bench: 901,876 ns/iter (+/- 75,287) = 5636 MB/s +test regexdna::subst3 ... bench: 870,185 ns/iter (+/- 53,535) = 5841 MB/s +test regexdna::subst4 ... bench: 859,924 ns/iter (+/- 63,888) = 5911 MB/s +test regexdna::subst5 ... bench: 886,748 ns/iter (+/- 87,929) = 5732 MB/s +test regexdna::subst6 ... bench: 870,428 ns/iter (+/- 47,015) = 5840 MB/s +test regexdna::subst7 ... bench: 865,513 ns/iter (+/- 41,507) = 5873 MB/s +test regexdna::subst8 ... bench: 870,030 ns/iter (+/- 110,449) = 5842 MB/s +test regexdna::subst9 ... bench: 875,649 ns/iter (+/- 32,905) = 5805 MB/s +test regexdna::variant1 ... bench: 9,234,989 ns/iter (+/- 127,076) = 550 MB/s +test regexdna::variant2 ... bench: 11,759,628 ns/iter (+/- 575,788) = 432 MB/s +test regexdna::variant3 ... bench: 11,229,965 ns/iter (+/- 522,759) = 452 MB/s +test regexdna::variant4 ... bench: 10,040,716 ns/iter (+/- 309,357) = 506 MB/s +test regexdna::variant5 ... bench: 10,052,052 ns/iter (+/- 522,338) = 505 MB/s +test regexdna::variant6 ... bench: 10,719,366 ns/iter (+/- 577,988) = 474 MB/s +test regexdna::variant7 ... bench: 11,076,094 ns/iter (+/- 1,291,237) = 458 MB/s +test regexdna::variant8 ... bench: 11,855,290 ns/iter (+/- 667,429) = 428 MB/s +test regexdna::variant9 ... bench: 12,531,240 ns/iter (+/- 606,198) = 405 MB/s +test sherlock::before_after_holmes ... bench: 4,169,656 ns/iter (+/- 222,900) = 142 MB/s +test sherlock::before_holmes ... bench: 4,144,394 ns/iter (+/- 170,133) = 143 MB/s +test sherlock::holmes_cochar_watson ... bench: 74,437 ns/iter (+/- 4,266) = 7992 MB/s +test sherlock::ing_suffix ... bench: 1,731,507 ns/iter (+/- 162,892) = 343 MB/s +test sherlock::ing_suffix_limited_space ... bench: 4,418,194 ns/iter (+/- 240,220) = 134 MB/s +test sherlock::letters ... bench: 8,847,041 ns/iter (+/- 392,402) = 67 MB/s +test sherlock::letters_lower ... bench: 8,547,432 ns/iter (+/- 304,256) = 69 MB/s +test sherlock::letters_upper ... bench: 1,584,248 ns/iter (+/- 51,331) = 375 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 38,057 ns/iter (+/- 1,666) = 15632 MB/s +test sherlock::name_alt1 ... bench: 50,415 ns/iter (+/- 3,173) = 11800 MB/s +test sherlock::name_alt2 ... bench: 66,062 ns/iter (+/- 2,807) = 9005 MB/s +test sherlock::name_alt3 ... bench: 720,097 ns/iter (+/- 32,351) = 826 MB/s +test sherlock::name_alt3_nocase ... bench: 2,591,049 ns/iter (+/- 86,537) = 229 MB/s +test sherlock::name_alt4 ... bench: 65,860 ns/iter (+/- 2,780) = 9033 MB/s +test sherlock::name_alt4_nocase ... bench: 1,204,839 ns/iter (+/- 41,087) = 493 MB/s +test sherlock::name_alt5 ... bench: 615,483 ns/iter (+/- 24,177) = 966 MB/s +test sherlock::name_alt5_nocase ... bench: 1,467,461 ns/iter (+/- 71,032) = 405 MB/s +test sherlock::name_holmes ... bench: 48,997 ns/iter (+/- 2,471) = 12142 MB/s +test sherlock::name_holmes_nocase ... bench: 88,549 ns/iter (+/- 4,814) = 6718 MB/s +test sherlock::name_sherlock ... bench: 38,309 ns/iter (+/- 1,354) = 15529 MB/s +test sherlock::name_sherlock_holmes ... bench: 39,062 ns/iter (+/- 4,253) = 15230 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 713,355 ns/iter (+/- 77,990) = 833 MB/s +test sherlock::name_sherlock_nocase ... bench: 719,747 ns/iter (+/- 85,736) = 826 MB/s +test sherlock::name_whitespace ... bench: 39,161 ns/iter (+/- 3,678) = 15191 MB/s +test sherlock::no_match_common ... bench: 35,574 ns/iter (+/- 3,433) = 16723 MB/s +test sherlock::no_match_really_common ... bench: 56,847 ns/iter (+/- 7,068) = 10465 MB/s +test sherlock::no_match_uncommon ... bench: 36,185 ns/iter (+/- 4,938) = 16441 MB/s +test sherlock::quotes ... bench: 454,135 ns/iter (+/- 18,816) = 1310 MB/s +test sherlock::repeated_class_negation ... bench: 5,724,068 ns/iter (+/- 342,211) = 103 MB/s +test sherlock::the_lower ... bench: 256,190 ns/iter (+/- 25,452) = 2322 MB/s +test sherlock::the_nocase ... bench: 284,080 ns/iter (+/- 17,165) = 2094 MB/s +test sherlock::the_upper ... bench: 56,120 ns/iter (+/- 2,826) = 10601 MB/s +test sherlock::the_whitespace ... bench: 456,734 ns/iter (+/- 23,405) = 1302 MB/s +test sherlock::word_ending_n ... bench: 5,079,288 ns/iter (+/- 214,895) = 117 MB/s +test sherlock::words ... bench: 5,200,092 ns/iter (+/- 250,085) = 114 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 93 measured; 0 filtered out + diff --git a/third_party/rust/regex/record/old-bench-log/06/re2 b/third_party/rust/regex/record/old-bench-log/06/re2 new file mode 100644 index 0000000000..3e1585a646 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/06/re2 @@ -0,0 +1,101 @@ + +running 96 tests +test misc::anchored_literal_long_match ... bench: 73 ns/iter (+/- 8) = 5342 MB/s +test misc::anchored_literal_long_non_match ... bench: 16 ns/iter (+/- 1) = 24375 MB/s +test misc::anchored_literal_short_match ... bench: 73 ns/iter (+/- 9) = 356 MB/s +test misc::anchored_literal_short_non_match ... bench: 16 ns/iter (+/- 1) = 1625 MB/s +test misc::easy0_1K ... bench: 119 ns/iter (+/- 11) = 8831 MB/s +test misc::easy0_1MB ... bench: 25,312 ns/iter (+/- 875) = 41427 MB/s +test misc::easy0_32 ... bench: 112 ns/iter (+/- 5) = 526 MB/s +test misc::easy0_32K ... bench: 534 ns/iter (+/- 43) = 61413 MB/s +test misc::easy1_1K ... bench: 109 ns/iter (+/- 9) = 9577 MB/s +test misc::easy1_1MB ... bench: 23,892 ns/iter (+/- 715) = 43889 MB/s +test misc::easy1_32 ... bench: 102 ns/iter (+/- 8) = 509 MB/s +test misc::easy1_32K ... bench: 519 ns/iter (+/- 54) = 63175 MB/s +test misc::hard_1K ... bench: 1,859 ns/iter (+/- 202) = 565 MB/s +test misc::hard_1MB ... bench: 1,871,446 ns/iter (+/- 99,961) = 560 MB/s +test misc::hard_32 ... bench: 162 ns/iter (+/- 20) = 364 MB/s +test misc::hard_32K ... bench: 57,459 ns/iter (+/- 4,672) = 570 MB/s +test misc::literal ... bench: 70 ns/iter (+/- 8) = 728 MB/s +test misc::long_needle1 ... bench: 130,995 ns/iter (+/- 4,935) = 763 MB/s +test misc::long_needle2 ... bench: 129,668 ns/iter (+/- 8,852) = 771 MB/s +test misc::match_class ... bench: 195 ns/iter (+/- 16) = 415 MB/s +test misc::match_class_in_range ... bench: 194 ns/iter (+/- 22) = 417 MB/s +test misc::match_class_unicode ... bench: 630 ns/iter (+/- 61) = 255 MB/s +test misc::medium_1K ... bench: 1,699 ns/iter (+/- 147) = 619 MB/s +test misc::medium_1MB ... bench: 1,633,131 ns/iter (+/- 65,889) = 642 MB/s +test misc::medium_32 ... bench: 169 ns/iter (+/- 18) = 355 MB/s +test misc::medium_32K ... bench: 51,313 ns/iter (+/- 1,855) = 639 MB/s +test misc::no_exponential ... bench: 216 ns/iter (+/- 13) = 462 MB/s +test misc::not_literal ... bench: 140 ns/iter (+/- 6) = 364 MB/s +test misc::one_pass_long_prefix ... bench: 71 ns/iter (+/- 2) = 366 MB/s +test misc::one_pass_long_prefix_not ... bench: 109 ns/iter (+/- 9) = 238 MB/s +test misc::one_pass_short ... bench: 99 ns/iter (+/- 7) = 171 MB/s +test misc::one_pass_short_not ... bench: 96 ns/iter (+/- 5) = 177 MB/s +test misc::reallyhard2_1K ... bench: 1,405 ns/iter (+/- 134) = 740 MB/s +test misc::reallyhard_1K ... bench: 1,875 ns/iter (+/- 168) = 560 MB/s +test misc::reallyhard_1MB ... bench: 1,853,207 ns/iter (+/- 103,218) = 565 MB/s +test misc::reallyhard_32 ... bench: 157 ns/iter (+/- 11) = 375 MB/s +test misc::reallyhard_32K ... bench: 57,880 ns/iter (+/- 5,319) = 566 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 12,686 ns/iter (+/- 536) = 630 MB/s +test regexdna::find_new_lines ... bench: 28,761,913 ns/iter (+/- 1,447,326) = 176 MB/s +test regexdna::subst1 ... bench: 4,629,782 ns/iter (+/- 142,214) = 1097 MB/s +test regexdna::subst10 ... bench: 4,692,819 ns/iter (+/- 156,805) = 1083 MB/s +test regexdna::subst11 ... bench: 4,652,438 ns/iter (+/- 206,457) = 1092 MB/s +test regexdna::subst2 ... bench: 4,682,943 ns/iter (+/- 176,335) = 1085 MB/s +test regexdna::subst3 ... bench: 4,646,162 ns/iter (+/- 241,873) = 1094 MB/s +test regexdna::subst4 ... bench: 4,653,380 ns/iter (+/- 188,899) = 1092 MB/s +test regexdna::subst5 ... bench: 4,770,480 ns/iter (+/- 238,930) = 1065 MB/s +test regexdna::subst6 ... bench: 4,671,427 ns/iter (+/- 286,241) = 1088 MB/s +test regexdna::subst7 ... bench: 4,658,214 ns/iter (+/- 210,723) = 1091 MB/s +test regexdna::subst8 ... bench: 4,909,600 ns/iter (+/- 417,894) = 1035 MB/s +test regexdna::subst9 ... bench: 4,910,285 ns/iter (+/- 587,024) = 1035 MB/s +test regexdna::variant1 ... bench: 20,895,772 ns/iter (+/- 2,313,771) = 243 MB/s +test regexdna::variant2 ... bench: 20,465,984 ns/iter (+/- 1,913,613) = 248 MB/s +test regexdna::variant3 ... bench: 19,469,527 ns/iter (+/- 1,367,226) = 261 MB/s +test regexdna::variant4 ... bench: 21,662,238 ns/iter (+/- 1,489,235) = 234 MB/s +test regexdna::variant5 ... bench: 21,808,098 ns/iter (+/- 2,294,522) = 233 MB/s +test regexdna::variant6 ... bench: 21,208,952 ns/iter (+/- 986,848) = 239 MB/s +test regexdna::variant7 ... bench: 20,289,473 ns/iter (+/- 595,084) = 250 MB/s +test regexdna::variant8 ... bench: 17,765,356 ns/iter (+/- 503,529) = 286 MB/s +test regexdna::variant9 ... bench: 13,222,010 ns/iter (+/- 509,278) = 384 MB/s +test sherlock::before_after_holmes ... bench: 1,313,676 ns/iter (+/- 52,992) = 452 MB/s +test sherlock::before_holmes ... bench: 1,337,432 ns/iter (+/- 37,054) = 444 MB/s +test sherlock::everything_greedy ... bench: 6,080,272 ns/iter (+/- 110,011) = 97 MB/s +test sherlock::everything_greedy_nl ... bench: 2,395,932 ns/iter (+/- 123,521) = 248 MB/s +test sherlock::holmes_cochar_watson ... bench: 1,052,245 ns/iter (+/- 33,929) = 565 MB/s +test sherlock::holmes_coword_watson ... bench: 1,063,007 ns/iter (+/- 34,462) = 559 MB/s +test sherlock::ing_suffix ... bench: 2,703,395 ns/iter (+/- 63,263) = 220 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,608,756 ns/iter (+/- 42,100) = 369 MB/s +test sherlock::letters ... bench: 68,220,129 ns/iter (+/- 3,602,216) = 8 MB/s +test sherlock::letters_lower ... bench: 67,390,101 ns/iter (+/- 6,032,867) = 8 MB/s +test sherlock::letters_upper ... bench: 3,708,482 ns/iter (+/- 235,128) = 160 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 2,816,517 ns/iter (+/- 99,081) = 211 MB/s +test sherlock::name_alt1 ... bench: 53,193 ns/iter (+/- 1,575) = 11184 MB/s +test sherlock::name_alt2 ... bench: 1,133,704 ns/iter (+/- 36,634) = 524 MB/s +test sherlock::name_alt3 ... bench: 1,227,785 ns/iter (+/- 31,742) = 484 MB/s +test sherlock::name_alt3_nocase ... bench: 2,451,285 ns/iter (+/- 103,766) = 242 MB/s +test sherlock::name_alt4 ... bench: 1,168,955 ns/iter (+/- 87,785) = 508 MB/s +test sherlock::name_alt4_nocase ... bench: 1,699,899 ns/iter (+/- 91,762) = 349 MB/s +test sherlock::name_alt5 ... bench: 1,167,232 ns/iter (+/- 51,695) = 509 MB/s +test sherlock::name_alt5_nocase ... bench: 1,805,463 ns/iter (+/- 74,631) = 329 MB/s +test sherlock::name_holmes ... bench: 108,195 ns/iter (+/- 3,815) = 5498 MB/s +test sherlock::name_holmes_nocase ... bench: 1,360,092 ns/iter (+/- 60,416) = 437 MB/s +test sherlock::name_sherlock ... bench: 40,376 ns/iter (+/- 5,104) = 14734 MB/s +test sherlock::name_sherlock_holmes ... bench: 41,361 ns/iter (+/- 2,553) = 14383 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,317,594 ns/iter (+/- 168,248) = 451 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,284,026 ns/iter (+/- 92,188) = 463 MB/s +test sherlock::name_whitespace ... bench: 44,973 ns/iter (+/- 5,888) = 13228 MB/s +test sherlock::no_match_common ... bench: 341,867 ns/iter (+/- 32,045) = 1740 MB/s +test sherlock::no_match_really_common ... bench: 331,760 ns/iter (+/- 43,608) = 1793 MB/s +test sherlock::no_match_uncommon ... bench: 14,285 ns/iter (+/- 760) = 41647 MB/s +test sherlock::quotes ... bench: 1,342,144 ns/iter (+/- 96,471) = 443 MB/s +test sherlock::the_lower ... bench: 1,722,919 ns/iter (+/- 83,873) = 345 MB/s +test sherlock::the_nocase ... bench: 2,866,258 ns/iter (+/- 117,349) = 207 MB/s +test sherlock::the_upper ... bench: 151,020 ns/iter (+/- 13,454) = 3939 MB/s +test sherlock::the_whitespace ... bench: 1,597,329 ns/iter (+/- 149,689) = 372 MB/s +test sherlock::word_ending_n ... bench: 2,193,027 ns/iter (+/- 136,408) = 271 MB/s +test sherlock::words ... bench: 20,721,148 ns/iter (+/- 1,968,912) = 28 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 96 measured; 0 filtered out + diff --git a/third_party/rust/regex/record/old-bench-log/06/rust b/third_party/rust/regex/record/old-bench-log/06/rust new file mode 100644 index 0000000000..53ab22287e --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/06/rust @@ -0,0 +1,113 @@ + +running 108 tests +test misc::anchored_literal_long_match ... bench: 22 ns/iter (+/- 2) = 17727 MB/s +test misc::anchored_literal_long_non_match ... bench: 27 ns/iter (+/- 2) = 14444 MB/s +test misc::anchored_literal_short_match ... bench: 22 ns/iter (+/- 1) = 1181 MB/s +test misc::anchored_literal_short_non_match ... bench: 26 ns/iter (+/- 2) = 1000 MB/s +test misc::easy0_1K ... bench: 16 ns/iter (+/- 1) = 65687 MB/s +test misc::easy0_1MB ... bench: 19 ns/iter (+/- 2) = 55189631 MB/s +test misc::easy0_32 ... bench: 15 ns/iter (+/- 1) = 3933 MB/s +test misc::easy0_32K ... bench: 16 ns/iter (+/- 0) = 2049687 MB/s +test misc::easy1_1K ... bench: 43 ns/iter (+/- 2) = 24279 MB/s +test misc::easy1_1MB ... bench: 45 ns/iter (+/- 4) = 23302133 MB/s +test misc::easy1_32 ... bench: 43 ns/iter (+/- 5) = 1209 MB/s +test misc::easy1_32K ... bench: 43 ns/iter (+/- 2) = 762511 MB/s +test misc::hard_1K ... bench: 53 ns/iter (+/- 6) = 19830 MB/s +test misc::hard_1MB ... bench: 57 ns/iter (+/- 1) = 18396543 MB/s +test misc::hard_32 ... bench: 53 ns/iter (+/- 4) = 1113 MB/s +test misc::hard_32K ... bench: 53 ns/iter (+/- 6) = 618773 MB/s +test misc::literal ... bench: 13 ns/iter (+/- 1) = 3923 MB/s +test misc::long_needle1 ... bench: 1,203 ns/iter (+/- 55) = 83126 MB/s +test misc::long_needle2 ... bench: 149,418 ns/iter (+/- 13,825) = 669 MB/s +test misc::match_class ... bench: 62 ns/iter (+/- 6) = 1306 MB/s +test misc::match_class_in_range ... bench: 23 ns/iter (+/- 2) = 3521 MB/s +test misc::match_class_unicode ... bench: 268 ns/iter (+/- 30) = 600 MB/s +test misc::medium_1K ... bench: 16 ns/iter (+/- 0) = 65750 MB/s +test misc::medium_1MB ... bench: 20 ns/iter (+/- 15) = 52430200 MB/s +test misc::medium_32 ... bench: 16 ns/iter (+/- 2) = 3750 MB/s +test misc::medium_32K ... bench: 16 ns/iter (+/- 1) = 2049750 MB/s +test misc::no_exponential ... bench: 353 ns/iter (+/- 26) = 283 MB/s +test misc::not_literal ... bench: 97 ns/iter (+/- 9) = 525 MB/s +test misc::one_pass_long_prefix ... bench: 58 ns/iter (+/- 5) = 448 MB/s +test misc::one_pass_long_prefix_not ... bench: 60 ns/iter (+/- 6) = 433 MB/s +test misc::one_pass_short ... bench: 43 ns/iter (+/- 4) = 395 MB/s +test misc::one_pass_short_not ... bench: 46 ns/iter (+/- 2) = 369 MB/s +test misc::reallyhard2_1K ... bench: 62 ns/iter (+/- 5) = 16774 MB/s +test misc::reallyhard_1K ... bench: 1,650 ns/iter (+/- 176) = 636 MB/s +test misc::reallyhard_1MB ... bench: 1,635,447 ns/iter (+/- 97,611) = 641 MB/s +test misc::reallyhard_32 ... bench: 109 ns/iter (+/- 9) = 541 MB/s +test misc::reallyhard_32K ... bench: 50,991 ns/iter (+/- 4,031) = 643 MB/s +test misc::replace_all ... bench: 155 ns/iter (+/- 8) +test misc::reverse_suffix_no_quadratic ... bench: 4,254 ns/iter (+/- 489) = 1880 MB/s +test misc::short_haystack_1000000x ... bench: 91,124 ns/iter (+/- 4,584) = 87792 MB/s +test misc::short_haystack_100000x ... bench: 10,681 ns/iter (+/- 420) = 74900 MB/s +test misc::short_haystack_10000x ... bench: 3,240 ns/iter (+/- 395) = 24694 MB/s +test misc::short_haystack_1000x ... bench: 403 ns/iter (+/- 48) = 19878 MB/s +test misc::short_haystack_100x ... bench: 303 ns/iter (+/- 27) = 2676 MB/s +test misc::short_haystack_10x ... bench: 272 ns/iter (+/- 27) = 334 MB/s +test misc::short_haystack_1x ... bench: 264 ns/iter (+/- 32) = 71 MB/s +test misc::short_haystack_2x ... bench: 269 ns/iter (+/- 25) = 100 MB/s +test misc::short_haystack_3x ... bench: 264 ns/iter (+/- 26) = 132 MB/s +test misc::short_haystack_4x ... bench: 271 ns/iter (+/- 28) = 158 MB/s +test regexdna::find_new_lines ... bench: 13,700,405 ns/iter (+/- 647,840) = 371 MB/s +test regexdna::subst1 ... bench: 806,342 ns/iter (+/- 48,014) = 6304 MB/s +test regexdna::subst10 ... bench: 794,403 ns/iter (+/- 40,393) = 6399 MB/s +test regexdna::subst11 ... bench: 801,963 ns/iter (+/- 46,164) = 6338 MB/s +test regexdna::subst2 ... bench: 779,768 ns/iter (+/- 81,505) = 6519 MB/s +test regexdna::subst3 ... bench: 777,024 ns/iter (+/- 52,795) = 6542 MB/s +test regexdna::subst4 ... bench: 769,862 ns/iter (+/- 48,980) = 6603 MB/s +test regexdna::subst5 ... bench: 779,754 ns/iter (+/- 39,784) = 6519 MB/s +test regexdna::subst6 ... bench: 769,400 ns/iter (+/- 69,980) = 6606 MB/s +test regexdna::subst7 ... bench: 771,457 ns/iter (+/- 40,490) = 6589 MB/s +test regexdna::subst8 ... bench: 808,468 ns/iter (+/- 53,093) = 6287 MB/s +test regexdna::subst9 ... bench: 771,869 ns/iter (+/- 50,966) = 6585 MB/s +test regexdna::variant1 ... bench: 3,093,422 ns/iter (+/- 222,818) = 1643 MB/s +test regexdna::variant2 ... bench: 6,520,178 ns/iter (+/- 400,704) = 779 MB/s +test regexdna::variant3 ... bench: 7,297,818 ns/iter (+/- 319,866) = 696 MB/s +test regexdna::variant4 ... bench: 7,356,045 ns/iter (+/- 530,375) = 691 MB/s +test regexdna::variant5 ... bench: 5,977,343 ns/iter (+/- 296,375) = 850 MB/s +test regexdna::variant6 ... bench: 6,045,776 ns/iter (+/- 270,954) = 840 MB/s +test regexdna::variant7 ... bench: 5,447,060 ns/iter (+/- 223,542) = 933 MB/s +test regexdna::variant8 ... bench: 5,615,676 ns/iter (+/- 419,756) = 905 MB/s +test regexdna::variant9 ... bench: 5,457,949 ns/iter (+/- 439,821) = 931 MB/s +test sherlock::before_after_holmes ... bench: 957,660 ns/iter (+/- 96,491) = 621 MB/s +test sherlock::before_holmes ... bench: 65,680 ns/iter (+/- 3,085) = 9058 MB/s +test sherlock::everything_greedy ... bench: 2,151,577 ns/iter (+/- 70,114) = 276 MB/s +test sherlock::everything_greedy_nl ... bench: 836,942 ns/iter (+/- 81,010) = 710 MB/s +test sherlock::holmes_cochar_watson ... bench: 137,441 ns/iter (+/- 14,157) = 4328 MB/s +test sherlock::holmes_coword_watson ... bench: 514,100 ns/iter (+/- 48,210) = 1157 MB/s +test sherlock::ing_suffix ... bench: 409,126 ns/iter (+/- 23,370) = 1454 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,117,457 ns/iter (+/- 53,545) = 532 MB/s +test sherlock::letters ... bench: 23,152,671 ns/iter (+/- 1,002,203) = 25 MB/s +test sherlock::letters_lower ... bench: 22,521,833 ns/iter (+/- 1,178,375) = 26 MB/s +test sherlock::letters_upper ... bench: 1,841,871 ns/iter (+/- 108,471) = 323 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 946,126 ns/iter (+/- 53,405) = 628 MB/s +test sherlock::name_alt1 ... bench: 25,830 ns/iter (+/- 1,054) = 23032 MB/s +test sherlock::name_alt2 ... bench: 116,879 ns/iter (+/- 6,000) = 5090 MB/s +test sherlock::name_alt3 ... bench: 125,746 ns/iter (+/- 7,121) = 4731 MB/s +test sherlock::name_alt3_nocase ... bench: 1,203,114 ns/iter (+/- 72,037) = 494 MB/s +test sherlock::name_alt4 ... bench: 156,208 ns/iter (+/- 5,188) = 3808 MB/s +test sherlock::name_alt4_nocase ... bench: 222,618 ns/iter (+/- 30,017) = 2672 MB/s +test sherlock::name_alt5 ... bench: 133,440 ns/iter (+/- 14,831) = 4458 MB/s +test sherlock::name_alt5_nocase ... bench: 558,482 ns/iter (+/- 22,435) = 1065 MB/s +test sherlock::name_holmes ... bench: 30,800 ns/iter (+/- 2,933) = 19316 MB/s +test sherlock::name_holmes_nocase ... bench: 190,736 ns/iter (+/- 24,310) = 3119 MB/s +test sherlock::name_sherlock ... bench: 56,238 ns/iter (+/- 3,310) = 10578 MB/s +test sherlock::name_sherlock_holmes ... bench: 24,129 ns/iter (+/- 2,662) = 24656 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 174,649 ns/iter (+/- 13,487) = 3406 MB/s +test sherlock::name_sherlock_nocase ... bench: 157,674 ns/iter (+/- 7,888) = 3773 MB/s +test sherlock::name_whitespace ... bench: 74,637 ns/iter (+/- 6,523) = 7971 MB/s +test sherlock::no_match_common ... bench: 15,140 ns/iter (+/- 969) = 39295 MB/s +test sherlock::no_match_really_common ... bench: 305,112 ns/iter (+/- 31,314) = 1949 MB/s +test sherlock::no_match_uncommon ... bench: 15,539 ns/iter (+/- 1,269) = 38286 MB/s +test sherlock::quotes ... bench: 482,180 ns/iter (+/- 33,736) = 1233 MB/s +test sherlock::repeated_class_negation ... bench: 78,428,426 ns/iter (+/- 6,705,217) = 7 MB/s +test sherlock::the_lower ... bench: 576,511 ns/iter (+/- 21,735) = 1031 MB/s +test sherlock::the_nocase ... bench: 413,565 ns/iter (+/- 42,941) = 1438 MB/s +test sherlock::the_upper ... bench: 34,491 ns/iter (+/- 1,901) = 17248 MB/s +test sherlock::the_whitespace ... bench: 1,061,365 ns/iter (+/- 66,639) = 560 MB/s +test sherlock::word_ending_n ... bench: 1,763,795 ns/iter (+/- 83,031) = 337 MB/s +test sherlock::words ... bench: 9,281,896 ns/iter (+/- 934,308) = 64 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 108 measured; 0 filtered out + diff --git a/third_party/rust/regex/record/old-bench-log/07/boost b/third_party/rust/regex/record/old-bench-log/07/boost new file mode 100644 index 0000000000..5a13a1047a --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/07/boost @@ -0,0 +1,97 @@ + +running 92 tests +test misc::anchored_literal_long_match ... bench: 174 ns/iter (+/- 0) = 2241 MB/s +test misc::anchored_literal_long_non_match ... bench: 329 ns/iter (+/- 3) = 1185 MB/s +test misc::anchored_literal_short_match ... bench: 168 ns/iter (+/- 0) = 154 MB/s +test misc::anchored_literal_short_non_match ... bench: 121 ns/iter (+/- 0) = 214 MB/s +test misc::easy0_1K ... bench: 660 ns/iter (+/- 3) = 1592 MB/s +test misc::easy0_1MB ... bench: 514,707 ns/iter (+/- 2,689) = 2037 MB/s +test misc::easy0_32 ... bench: 170 ns/iter (+/- 2) = 347 MB/s +test misc::easy0_32K ... bench: 16,208 ns/iter (+/- 99) = 2023 MB/s +test misc::easy1_1K ... bench: 756 ns/iter (+/- 1) = 1380 MB/s +test misc::easy1_1MB ... bench: 514,816 ns/iter (+/- 2,832) = 2036 MB/s +test misc::easy1_32 ... bench: 271 ns/iter (+/- 3) = 191 MB/s +test misc::easy1_32K ... bench: 16,316 ns/iter (+/- 93) = 2009 MB/s +test misc::hard_1K ... bench: 63,089 ns/iter (+/- 594) = 16 MB/s +test misc::hard_1MB ... bench: 66,537,328 ns/iter (+/- 866,695) = 15 MB/s +test misc::hard_32 ... bench: 2,125 ns/iter (+/- 8) = 27 MB/s +test misc::hard_32K ... bench: 2,075,568 ns/iter (+/- 6,634) = 15 MB/s +test misc::literal ... bench: 143 ns/iter (+/- 1) = 356 MB/s +test misc::long_needle1 ... bench: 6,557,839 ns/iter (+/- 27,779) = 15 MB/s +test misc::long_needle2 ... bench: 6,557,332 ns/iter (+/- 101,494) = 15 MB/s +test misc::match_class ... bench: 157 ns/iter (+/- 0) = 515 MB/s +test misc::match_class_in_range ... bench: 157 ns/iter (+/- 4) = 515 MB/s +test misc::medium_1K ... bench: 665 ns/iter (+/- 2) = 1581 MB/s +test misc::medium_1MB ... bench: 514,869 ns/iter (+/- 5,832) = 2036 MB/s +test misc::medium_32 ... bench: 167 ns/iter (+/- 1) = 359 MB/s +test misc::medium_32K ... bench: 16,253 ns/iter (+/- 74) = 2017 MB/s +test misc::no_exponential ... bench: 1,717 ns/iter (+/- 13) = 58 MB/s +test misc::not_literal ... bench: 1,084 ns/iter (+/- 16) = 47 MB/s +test misc::one_pass_long_prefix ... bench: 169 ns/iter (+/- 2) = 153 MB/s +test misc::one_pass_long_prefix_not ... bench: 169 ns/iter (+/- 6) = 153 MB/s +test misc::one_pass_short ... bench: 1,105 ns/iter (+/- 2) = 15 MB/s +test misc::one_pass_short_not ... bench: 1,076 ns/iter (+/- 10) = 15 MB/s +test misc::reallyhard2_1K ... bench: 4,935 ns/iter (+/- 39) = 210 MB/s +test misc::reallyhard_1K ... bench: 63,076 ns/iter (+/- 226) = 16 MB/s +test misc::reallyhard_1MB ... bench: 68,534,102 ns/iter (+/- 125,043) = 15 MB/s +test misc::reallyhard_32 ... bench: 2,134 ns/iter (+/- 8) = 27 MB/s +test misc::reallyhard_32K ... bench: 2,074,582 ns/iter (+/- 5,943) = 15 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,001 ns/iter (+/- 5) = 1999 MB/s +test regexdna::find_new_lines ... bench: 12,942,765 ns/iter (+/- 21,828) = 392 MB/s +test regexdna::subst1 ... bench: 6,241,036 ns/iter (+/- 13,806) = 814 MB/s +test regexdna::subst10 ... bench: 6,247,896 ns/iter (+/- 28,406) = 813 MB/s +test regexdna::subst11 ... bench: 6,240,960 ns/iter (+/- 20,660) = 814 MB/s +test regexdna::subst2 ... bench: 6,245,156 ns/iter (+/- 17,639) = 813 MB/s +test regexdna::subst3 ... bench: 6,276,881 ns/iter (+/- 14,851) = 809 MB/s +test regexdna::subst4 ... bench: 6,249,549 ns/iter (+/- 30,600) = 813 MB/s +test regexdna::subst5 ... bench: 6,251,942 ns/iter (+/- 33,889) = 813 MB/s +test regexdna::subst6 ... bench: 6,244,011 ns/iter (+/- 11,642) = 814 MB/s +test regexdna::subst7 ... bench: 6,283,445 ns/iter (+/- 11,693) = 809 MB/s +test regexdna::subst8 ... bench: 6,247,310 ns/iter (+/- 11,590) = 813 MB/s +test regexdna::subst9 ... bench: 6,249,184 ns/iter (+/- 8,159) = 813 MB/s +test regexdna::variant1 ... bench: 73,947,890 ns/iter (+/- 930,039) = 68 MB/s +test regexdna::variant2 ... bench: 108,486,922 ns/iter (+/- 181,287) = 46 MB/s +test regexdna::variant3 ... bench: 93,241,161 ns/iter (+/- 143,224) = 54 MB/s +test regexdna::variant4 ... bench: 75,615,061 ns/iter (+/- 107,918) = 67 MB/s +test regexdna::variant5 ... bench: 74,484,623 ns/iter (+/- 121,807) = 68 MB/s +test regexdna::variant6 ... bench: 74,594,078 ns/iter (+/- 121,252) = 68 MB/s +test regexdna::variant7 ... bench: 77,064,066 ns/iter (+/- 123,262) = 65 MB/s +test regexdna::variant8 ... bench: 87,267,656 ns/iter (+/- 128,639) = 58 MB/s +test regexdna::variant9 ... bench: 98,197,000 ns/iter (+/- 149,379) = 51 MB/s +test sherlock::before_after_holmes ... bench: 7,100,744 ns/iter (+/- 29,939) = 83 MB/s +test sherlock::before_holmes ... bench: 7,120,564 ns/iter (+/- 32,659) = 83 MB/s +test sherlock::everything_greedy ... bench: 3,777,458 ns/iter (+/- 8,802) = 157 MB/s +test sherlock::everything_greedy_nl ... bench: 282 ns/iter (+/- 2) = 2109691 MB/s +test sherlock::holmes_cochar_watson ... bench: 389,335 ns/iter (+/- 1,472) = 1528 MB/s +test sherlock::ing_suffix ... bench: 6,256,416 ns/iter (+/- 8,735) = 95 MB/s +test sherlock::ing_suffix_limited_space ... bench: 7,572,167 ns/iter (+/- 15,521) = 78 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 343,402 ns/iter (+/- 3,693) = 1732 MB/s +test sherlock::name_alt1 ... bench: 347,605 ns/iter (+/- 916) = 1711 MB/s +test sherlock::name_alt2 ... bench: 420,500 ns/iter (+/- 2,846) = 1414 MB/s +test sherlock::name_alt3 ... bench: 762,684 ns/iter (+/- 3,834) = 780 MB/s +test sherlock::name_alt3_nocase ... bench: 9,980,804 ns/iter (+/- 49,424) = 59 MB/s +test sherlock::name_alt4 ... bench: 431,744 ns/iter (+/- 682) = 1377 MB/s +test sherlock::name_alt4_nocase ... bench: 3,464,135 ns/iter (+/- 11,476) = 171 MB/s +test sherlock::name_alt5 ... bench: 472,923 ns/iter (+/- 846) = 1257 MB/s +test sherlock::name_alt5_nocase ... bench: 4,338,924 ns/iter (+/- 31,118) = 137 MB/s +test sherlock::name_holmes ... bench: 378,950 ns/iter (+/- 865) = 1569 MB/s +test sherlock::name_holmes_nocase ... bench: 1,952,035 ns/iter (+/- 8,233) = 304 MB/s +test sherlock::name_sherlock ... bench: 324,845 ns/iter (+/- 8,376) = 1831 MB/s +test sherlock::name_sherlock_holmes ... bench: 324,647 ns/iter (+/- 2,901) = 1832 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,870,400 ns/iter (+/- 10,609) = 318 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,814,172 ns/iter (+/- 16,455) = 327 MB/s +test sherlock::name_whitespace ... bench: 326,252 ns/iter (+/- 1,557) = 1823 MB/s +test sherlock::no_match_common ... bench: 1,154,445 ns/iter (+/- 8,544) = 515 MB/s +test sherlock::no_match_really_common ... bench: 1,154,314 ns/iter (+/- 5,467) = 515 MB/s +test sherlock::no_match_uncommon ... bench: 295,301 ns/iter (+/- 906) = 2014 MB/s +test sherlock::quotes ... bench: 863,138 ns/iter (+/- 3,072) = 689 MB/s +test sherlock::repeated_class_negation ... bench: 13,594,294 ns/iter (+/- 40,354) = 43 MB/s +test sherlock::the_lower ... bench: 2,171,731 ns/iter (+/- 7,148) = 273 MB/s +test sherlock::the_nocase ... bench: 3,556,278 ns/iter (+/- 7,269) = 167 MB/s +test sherlock::the_upper ... bench: 404,851 ns/iter (+/- 865) = 1469 MB/s +test sherlock::the_whitespace ... bench: 2,139,597 ns/iter (+/- 7,427) = 278 MB/s +test sherlock::word_ending_n ... bench: 7,824,965 ns/iter (+/- 30,691) = 76 MB/s +test sherlock::words ... bench: 18,386,285 ns/iter (+/- 34,161) = 32 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 92 measured; 0 filtered out + diff --git a/third_party/rust/regex/record/old-bench-log/07/dphobos-dmd b/third_party/rust/regex/record/old-bench-log/07/dphobos-dmd new file mode 100644 index 0000000000..835a096e08 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/07/dphobos-dmd @@ -0,0 +1,100 @@ + +running 95 tests +test misc::anchored_literal_long_match ... bench: 365 ns/iter (+/- 2) = 1068 MB/s +test misc::anchored_literal_long_non_match ... bench: 300 ns/iter (+/- 0) = 1300 MB/s +test misc::anchored_literal_short_match ... bench: 364 ns/iter (+/- 2) = 71 MB/s +test misc::anchored_literal_short_non_match ... bench: 306 ns/iter (+/- 9) = 84 MB/s +test misc::easy0_1K ... bench: 768 ns/iter (+/- 5) = 1368 MB/s +test misc::easy0_1MB ... bench: 17,062 ns/iter (+/- 252) = 61458 MB/s +test misc::easy0_32 ... bench: 759 ns/iter (+/- 7) = 77 MB/s +test misc::easy0_32K ... bench: 1,095 ns/iter (+/- 20) = 29949 MB/s +test misc::easy1_1K ... bench: 723 ns/iter (+/- 1) = 1443 MB/s +test misc::easy1_1MB ... bench: 17,021 ns/iter (+/- 229) = 61606 MB/s +test misc::easy1_32 ... bench: 714 ns/iter (+/- 1) = 72 MB/s +test misc::easy1_32K ... bench: 1,052 ns/iter (+/- 12) = 31167 MB/s +test misc::hard_1K ... bench: 17,044 ns/iter (+/- 176) = 61 MB/s +test misc::hard_1MB ... bench: 17,965,420 ns/iter (+/- 72,226) = 58 MB/s +test misc::hard_32 ... bench: 2,171 ns/iter (+/- 2) = 27 MB/s +test misc::hard_32K ... bench: 561,207 ns/iter (+/- 5,654) = 58 MB/s +test misc::literal ... bench: 240 ns/iter (+/- 0) = 212 MB/s +test misc::long_needle1 ... bench: 76,640 ns/iter (+/- 1,043) = 1304 MB/s +test misc::long_needle2 ... bench: 76,747 ns/iter (+/- 3,299) = 1302 MB/s +test misc::match_class ... bench: 344 ns/iter (+/- 1) = 235 MB/s +test misc::match_class_in_range ... bench: 306 ns/iter (+/- 9) = 264 MB/s +test misc::match_class_unicode ... bench: 1,435 ns/iter (+/- 9) = 112 MB/s +test misc::medium_1K ... bench: 1,480 ns/iter (+/- 16) = 710 MB/s +test misc::medium_1MB ... bench: 564,378 ns/iter (+/- 18,695) = 1857 MB/s +test misc::medium_32 ... bench: 941 ns/iter (+/- 32) = 63 MB/s +test misc::medium_32K ... bench: 18,465 ns/iter (+/- 116) = 1776 MB/s +test misc::no_exponential ... bench: 367,476 ns/iter (+/- 15,176) +test misc::not_literal ... bench: 1,165 ns/iter (+/- 9) = 43 MB/s +test misc::one_pass_long_prefix ... bench: 596 ns/iter (+/- 2) = 43 MB/s +test misc::one_pass_long_prefix_not ... bench: 602 ns/iter (+/- 6) = 43 MB/s +test misc::one_pass_short ... bench: 1,068 ns/iter (+/- 3) = 15 MB/s +test misc::one_pass_short_not ... bench: 1,434 ns/iter (+/- 11) = 11 MB/s +test misc::reallyhard2_1K ... bench: 36,539 ns/iter (+/- 281) = 28 MB/s +test misc::reallyhard_1K ... bench: 17,086 ns/iter (+/- 94) = 61 MB/s +test misc::reallyhard_1MB ... bench: 17,973,007 ns/iter (+/- 64,010) = 58 MB/s +test misc::reallyhard_32 ... bench: 2,200 ns/iter (+/- 16) = 26 MB/s +test misc::reallyhard_32K ... bench: 561,371 ns/iter (+/- 8,688) = 58 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,606 ns/iter (+/- 16) = 1736 MB/s +test regexdna::find_new_lines ... bench: 31,579,756 ns/iter (+/- 121,047) = 160 MB/s +test regexdna::subst1 ... bench: 7,930,333 ns/iter (+/- 27,118) = 641 MB/s +test regexdna::subst10 ... bench: 7,942,534 ns/iter (+/- 36,470) = 640 MB/s +test regexdna::subst11 ... bench: 7,934,646 ns/iter (+/- 42,013) = 640 MB/s +test regexdna::subst2 ... bench: 7,947,802 ns/iter (+/- 53,427) = 639 MB/s +test regexdna::subst3 ... bench: 7,941,691 ns/iter (+/- 122,303) = 640 MB/s +test regexdna::subst4 ... bench: 7,928,228 ns/iter (+/- 30,493) = 641 MB/s +test regexdna::subst5 ... bench: 7,936,901 ns/iter (+/- 37,894) = 640 MB/s +test regexdna::subst6 ... bench: 7,936,211 ns/iter (+/- 46,269) = 640 MB/s +test regexdna::subst7 ... bench: 7,946,477 ns/iter (+/- 62,660) = 639 MB/s +test regexdna::subst8 ... bench: 7,930,830 ns/iter (+/- 31,234) = 640 MB/s +test regexdna::subst9 ... bench: 7,937,951 ns/iter (+/- 36,425) = 640 MB/s +test regexdna::variant1 ... bench: 5,104,224 ns/iter (+/- 26,612) = 995 MB/s +test regexdna::variant2 ... bench: 6,847,162 ns/iter (+/- 31,233) = 742 MB/s +test regexdna::variant3 ... bench: 11,153,739 ns/iter (+/- 114,193) = 455 MB/s +test regexdna::variant4 ... bench: 9,665,797 ns/iter (+/- 47,148) = 525 MB/s +test regexdna::variant5 ... bench: 9,645,193 ns/iter (+/- 35,250) = 527 MB/s +test regexdna::variant6 ... bench: 7,280,069 ns/iter (+/- 21,171) = 698 MB/s +test regexdna::variant7 ... bench: 7,841,177 ns/iter (+/- 20,797) = 648 MB/s +test regexdna::variant8 ... bench: 9,783,978 ns/iter (+/- 35,231) = 519 MB/s +test regexdna::variant9 ... bench: 19,157,329 ns/iter (+/- 445,911) = 265 MB/s +test sherlock::before_after_holmes ... bench: 20,995,307 ns/iter (+/- 258,419) = 28 MB/s +test sherlock::before_holmes ... bench: 20,899,416 ns/iter (+/- 122,256) = 28 MB/s +test sherlock::holmes_cochar_watson ... bench: 904,439 ns/iter (+/- 6,934) = 657 MB/s +test sherlock::holmes_coword_watson ... bench: 103,706,930 ns/iter (+/- 176,711) = 5 MB/s +test sherlock::ing_suffix ... bench: 14,927,612 ns/iter (+/- 90,346) = 39 MB/s +test sherlock::ing_suffix_limited_space ... bench: 19,743,662 ns/iter (+/- 78,506) = 30 MB/s +test sherlock::letters ... bench: 112,708,213 ns/iter (+/- 251,690) = 5 MB/s +test sherlock::letters_lower ... bench: 111,058,829 ns/iter (+/- 192,793) = 5 MB/s +test sherlock::letters_upper ... bench: 4,072,062 ns/iter (+/- 20,273) = 146 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 399,796 ns/iter (+/- 4,222) = 1488 MB/s +test sherlock::name_alt1 ... bench: 411,415 ns/iter (+/- 2,257) = 1446 MB/s +test sherlock::name_alt2 ... bench: 626,671 ns/iter (+/- 5,745) = 949 MB/s +test sherlock::name_alt3 ... bench: 1,086,570 ns/iter (+/- 6,105) = 547 MB/s +test sherlock::name_alt3_nocase ... bench: 1,827,028 ns/iter (+/- 12,788) = 325 MB/s +test sherlock::name_alt4 ... bench: 687,454 ns/iter (+/- 11,421) = 865 MB/s +test sherlock::name_alt4_nocase ... bench: 943,925 ns/iter (+/- 4,108) = 630 MB/s +test sherlock::name_alt5 ... bench: 734,969 ns/iter (+/- 7,215) = 809 MB/s +test sherlock::name_alt5_nocase ... bench: 895,903 ns/iter (+/- 5,647) = 664 MB/s +test sherlock::name_holmes ... bench: 199,880 ns/iter (+/- 1,654) = 2976 MB/s +test sherlock::name_holmes_nocase ... bench: 529,590 ns/iter (+/- 1,288) = 1123 MB/s +test sherlock::name_sherlock ... bench: 57,720 ns/iter (+/- 555) = 10307 MB/s +test sherlock::name_sherlock_holmes ... bench: 67,596 ns/iter (+/- 580) = 8801 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 393,903 ns/iter (+/- 2,700) = 1510 MB/s +test sherlock::name_sherlock_nocase ... bench: 373,411 ns/iter (+/- 3,264) = 1593 MB/s +test sherlock::name_whitespace ... bench: 79,175 ns/iter (+/- 1,288) = 7514 MB/s +test sherlock::no_match_common ... bench: 276,503 ns/iter (+/- 2,155) = 2151 MB/s +test sherlock::no_match_really_common ... bench: 276,535 ns/iter (+/- 416) = 2151 MB/s +test sherlock::no_match_uncommon ... bench: 10,535 ns/iter (+/- 105) = 56472 MB/s +test sherlock::quotes ... bench: 5,746,202 ns/iter (+/- 33,993) = 103 MB/s +test sherlock::repeated_class_negation ... bench: 46,124,528 ns/iter (+/- 125,861) = 12 MB/s +test sherlock::the_lower ... bench: 2,527,960 ns/iter (+/- 12,351) = 235 MB/s +test sherlock::the_nocase ... bench: 3,210,112 ns/iter (+/- 10,799) = 185 MB/s +test sherlock::the_upper ... bench: 240,272 ns/iter (+/- 3,902) = 2476 MB/s +test sherlock::the_whitespace ... bench: 3,511,711 ns/iter (+/- 17,181) = 169 MB/s +test sherlock::word_ending_n ... bench: 29,535,089 ns/iter (+/- 95,201) = 20 MB/s +test sherlock::words ... bench: 43,341,782 ns/iter (+/- 110,038) = 13 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 95 measured; 0 filtered out + diff --git a/third_party/rust/regex/record/old-bench-log/07/dphobos-dmd-ct b/third_party/rust/regex/record/old-bench-log/07/dphobos-dmd-ct new file mode 100644 index 0000000000..9dd6d02fec --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/07/dphobos-dmd-ct @@ -0,0 +1,98 @@ + +running 93 tests +test misc::anchored_literal_long_match ... bench: 354 ns/iter (+/- 25) = 1101 MB/s +test misc::anchored_literal_long_non_match ... bench: 314 ns/iter (+/- 3) = 1242 MB/s +test misc::anchored_literal_short_match ... bench: 331 ns/iter (+/- 0) = 78 MB/s +test misc::anchored_literal_short_non_match ... bench: 314 ns/iter (+/- 4) = 82 MB/s +test misc::easy0_1K ... bench: 430 ns/iter (+/- 2) = 2444 MB/s +test misc::easy0_1MB ... bench: 16,692 ns/iter (+/- 222) = 62820 MB/s +test misc::easy0_32 ... bench: 420 ns/iter (+/- 4) = 140 MB/s +test misc::easy0_32K ... bench: 755 ns/iter (+/- 5) = 43437 MB/s +test misc::easy1_1K ... bench: 407 ns/iter (+/- 10) = 2565 MB/s +test misc::easy1_1MB ... bench: 16,670 ns/iter (+/- 205) = 62903 MB/s +test misc::easy1_32 ... bench: 389 ns/iter (+/- 0) = 133 MB/s +test misc::easy1_32K ... bench: 732 ns/iter (+/- 6) = 44792 MB/s +test misc::hard_1K ... bench: 35,518 ns/iter (+/- 346) = 29 MB/s +test misc::hard_1MB ... bench: 31,657,473 ns/iter (+/- 512,618) = 33 MB/s +test misc::hard_32 ... bench: 1,057 ns/iter (+/- 7) = 55 MB/s +test misc::hard_32K ... bench: 950,905 ns/iter (+/- 13,239) = 34 MB/s +test misc::literal ... bench: 320 ns/iter (+/- 3) = 159 MB/s +test misc::long_needle1 ... bench: 73,954 ns/iter (+/- 331) = 1352 MB/s +test misc::long_needle2 ... bench: 73,915 ns/iter (+/- 199) = 1352 MB/s +test misc::match_class ... bench: 374 ns/iter (+/- 3) = 216 MB/s +test misc::match_class_in_range ... bench: 372 ns/iter (+/- 0) = 217 MB/s +test misc::match_class_unicode ... bench: 1,631 ns/iter (+/- 8) = 98 MB/s +test misc::medium_1K ... bench: 965 ns/iter (+/- 10) = 1090 MB/s +test misc::medium_1MB ... bench: 563,242 ns/iter (+/- 6,767) = 1861 MB/s +test misc::medium_32 ... bench: 434 ns/iter (+/- 3) = 138 MB/s +test misc::medium_32K ... bench: 17,976 ns/iter (+/- 137) = 1824 MB/s +test misc::not_literal ... bench: 1,063 ns/iter (+/- 2) = 47 MB/s +test misc::one_pass_long_prefix ... bench: 405 ns/iter (+/- 4) = 64 MB/s +test misc::one_pass_long_prefix_not ... bench: 410 ns/iter (+/- 2) = 63 MB/s +test misc::one_pass_short ... bench: 539 ns/iter (+/- 12) = 31 MB/s +test misc::one_pass_short_not ... bench: 534 ns/iter (+/- 1) = 31 MB/s +test misc::reallyhard2_1K ... bench: 75,108 ns/iter (+/- 699) = 13 MB/s +test misc::reallyhard_1K ... bench: 34,681 ns/iter (+/- 268) = 30 MB/s +test misc::reallyhard_1MB ... bench: 30,579,065 ns/iter (+/- 389,443) = 34 MB/s +test misc::reallyhard_32 ... bench: 1,025 ns/iter (+/- 22) = 57 MB/s +test misc::reallyhard_32K ... bench: 920,515 ns/iter (+/- 26,281) = 35 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,607 ns/iter (+/- 36) = 1736 MB/s +test regexdna::find_new_lines ... bench: 34,122,227 ns/iter (+/- 842,345) = 148 MB/s +test regexdna::subst1 ... bench: 9,932,271 ns/iter (+/- 86,915) = 511 MB/s +test regexdna::subst10 ... bench: 9,977,738 ns/iter (+/- 51,656) = 509 MB/s +test regexdna::subst11 ... bench: 9,945,085 ns/iter (+/- 53,175) = 511 MB/s +test regexdna::subst2 ... bench: 9,928,284 ns/iter (+/- 32,335) = 512 MB/s +test regexdna::subst3 ... bench: 9,968,901 ns/iter (+/- 41,254) = 509 MB/s +test regexdna::subst4 ... bench: 9,912,463 ns/iter (+/- 28,171) = 512 MB/s +test regexdna::subst5 ... bench: 9,948,128 ns/iter (+/- 22,949) = 510 MB/s +test regexdna::subst6 ... bench: 9,916,200 ns/iter (+/- 28,947) = 512 MB/s +test regexdna::subst7 ... bench: 9,996,277 ns/iter (+/- 37,585) = 508 MB/s +test regexdna::subst8 ... bench: 9,974,849 ns/iter (+/- 41,503) = 509 MB/s +test regexdna::subst9 ... bench: 9,961,948 ns/iter (+/- 28,254) = 510 MB/s +test regexdna::variant1 ... bench: 3,504,049 ns/iter (+/- 15,090) = 1450 MB/s +test regexdna::variant2 ... bench: 3,800,264 ns/iter (+/- 12,123) = 1337 MB/s +test regexdna::variant3 ... bench: 4,932,027 ns/iter (+/- 15,553) = 1030 MB/s +test regexdna::variant4 ... bench: 4,709,109 ns/iter (+/- 15,213) = 1079 MB/s +test regexdna::variant5 ... bench: 4,918,928 ns/iter (+/- 19,196) = 1033 MB/s +test regexdna::variant6 ... bench: 4,244,250 ns/iter (+/- 24,367) = 1197 MB/s +test regexdna::variant7 ... bench: 4,245,530 ns/iter (+/- 16,178) = 1197 MB/s +test regexdna::variant8 ... bench: 4,205,036 ns/iter (+/- 10,733) = 1208 MB/s +test regexdna::variant9 ... bench: 3,864,771 ns/iter (+/- 11,864) = 1315 MB/s +test sherlock::before_after_holmes ... bench: 22,490,817 ns/iter (+/- 571,510) = 26 MB/s +test sherlock::before_holmes ... bench: 22,603,264 ns/iter (+/- 74,703) = 26 MB/s +test sherlock::holmes_cochar_watson ... bench: 519,710 ns/iter (+/- 5,878) = 1144 MB/s +test sherlock::ing_suffix ... bench: 9,237,783 ns/iter (+/- 30,188) = 64 MB/s +test sherlock::ing_suffix_limited_space ... bench: 12,074,301 ns/iter (+/- 30,860) = 49 MB/s +test sherlock::letters ... bench: 137,678,575 ns/iter (+/- 131,761) = 4 MB/s +test sherlock::letters_lower ... bench: 135,414,657 ns/iter (+/- 134,307) = 4 MB/s +test sherlock::letters_upper ... bench: 5,004,996 ns/iter (+/- 23,224) = 118 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 341,556 ns/iter (+/- 1,500) = 1741 MB/s +test sherlock::name_alt1 ... bench: 378,291 ns/iter (+/- 1,545) = 1572 MB/s +test sherlock::name_alt2 ... bench: 528,403 ns/iter (+/- 2,273) = 1125 MB/s +test sherlock::name_alt3 ... bench: 685,634 ns/iter (+/- 17,205) = 867 MB/s +test sherlock::name_alt3_nocase ... bench: 825,069 ns/iter (+/- 10,490) = 721 MB/s +test sherlock::name_alt4 ... bench: 555,717 ns/iter (+/- 3,223) = 1070 MB/s +test sherlock::name_alt4_nocase ... bench: 649,913 ns/iter (+/- 4,543) = 915 MB/s +test sherlock::name_alt5 ... bench: 570,036 ns/iter (+/- 543) = 1043 MB/s +test sherlock::name_alt5_nocase ... bench: 580,445 ns/iter (+/- 2,100) = 1024 MB/s +test sherlock::name_holmes ... bench: 185,140 ns/iter (+/- 2,100) = 3213 MB/s +test sherlock::name_holmes_nocase ... bench: 479,902 ns/iter (+/- 5,898) = 1239 MB/s +test sherlock::name_sherlock ... bench: 51,053 ns/iter (+/- 491) = 11653 MB/s +test sherlock::name_sherlock_holmes ... bench: 50,722 ns/iter (+/- 296) = 11729 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 355,142 ns/iter (+/- 1,424) = 1675 MB/s +test sherlock::name_sherlock_nocase ... bench: 354,932 ns/iter (+/- 1,554) = 1676 MB/s +test sherlock::name_whitespace ... bench: 56,972 ns/iter (+/- 271) = 10442 MB/s +test sherlock::no_match_common ... bench: 274,260 ns/iter (+/- 3,092) = 2169 MB/s +test sherlock::no_match_really_common ... bench: 273,984 ns/iter (+/- 2,202) = 2171 MB/s +test sherlock::no_match_uncommon ... bench: 10,444 ns/iter (+/- 68) = 56964 MB/s +test sherlock::quotes ... bench: 2,755,414 ns/iter (+/- 11,488) = 215 MB/s +test sherlock::repeated_class_negation ... bench: 21,585,138 ns/iter (+/- 50,347) = 27 MB/s +test sherlock::the_lower ... bench: 2,835,360 ns/iter (+/- 10,083) = 209 MB/s +test sherlock::the_nocase ... bench: 3,060,088 ns/iter (+/- 10,321) = 194 MB/s +test sherlock::the_upper ... bench: 272,416 ns/iter (+/- 3,308) = 2183 MB/s +test sherlock::the_whitespace ... bench: 2,991,214 ns/iter (+/- 27,223) = 198 MB/s +test sherlock::word_ending_n ... bench: 30,726,303 ns/iter (+/- 83,743) = 19 MB/s +test sherlock::words ... bench: 42,256,710 ns/iter (+/- 88,302) = 14 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 93 measured; 0 filtered out + diff --git a/third_party/rust/regex/record/old-bench-log/07/oniguruma b/third_party/rust/regex/record/old-bench-log/07/oniguruma new file mode 100644 index 0000000000..b9e8e29c42 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/07/oniguruma @@ -0,0 +1,99 @@ + +running 94 tests +test misc::anchored_literal_long_match ... bench: 129 ns/iter (+/- 3) = 3023 MB/s +test misc::anchored_literal_long_non_match ... bench: 402 ns/iter (+/- 1) = 970 MB/s +test misc::anchored_literal_short_match ... bench: 130 ns/iter (+/- 1) = 200 MB/s +test misc::anchored_literal_short_non_match ... bench: 49 ns/iter (+/- 0) = 530 MB/s +test misc::easy0_1K ... bench: 281 ns/iter (+/- 3) = 3740 MB/s +test misc::easy0_1MB ... bench: 119,847 ns/iter (+/- 713) = 8749 MB/s +test misc::easy0_32 ... bench: 154 ns/iter (+/- 4) = 383 MB/s +test misc::easy0_32K ... bench: 3,985 ns/iter (+/- 24) = 8229 MB/s +test misc::easy1_1K ... bench: 3,472 ns/iter (+/- 11) = 300 MB/s +test misc::easy1_1MB ... bench: 3,385,764 ns/iter (+/- 6,630) = 309 MB/s +test misc::easy1_32 ... bench: 283 ns/iter (+/- 6) = 183 MB/s +test misc::easy1_32K ... bench: 105,977 ns/iter (+/- 319) = 309 MB/s +test misc::hard_1K ... bench: 106,973 ns/iter (+/- 1,091) = 9 MB/s +test misc::hard_1MB ... bench: 114,602,847 ns/iter (+/- 336,051) = 9 MB/s +test misc::hard_32 ... bench: 3,569 ns/iter (+/- 3) = 16 MB/s +test misc::hard_32K ... bench: 3,570,108 ns/iter (+/- 17,057) = 9 MB/s +test misc::literal ... bench: 287 ns/iter (+/- 1) = 177 MB/s +test misc::long_needle1 ... bench: 5,430,190 ns/iter (+/- 271,737) = 18 MB/s +test misc::long_needle2 ... bench: 5,651,748 ns/iter (+/- 260,960) = 17 MB/s +test misc::match_class ... bench: 369 ns/iter (+/- 0) = 219 MB/s +test misc::match_class_in_range ... bench: 370 ns/iter (+/- 8) = 218 MB/s +test misc::match_class_unicode ... bench: 1,600 ns/iter (+/- 24) = 100 MB/s +test misc::medium_1K ... bench: 295 ns/iter (+/- 4) = 3566 MB/s +test misc::medium_1MB ... bench: 119,845 ns/iter (+/- 707) = 8749 MB/s +test misc::medium_32 ... bench: 166 ns/iter (+/- 0) = 361 MB/s +test misc::medium_32K ... bench: 3,995 ns/iter (+/- 30) = 8209 MB/s +test misc::not_literal ... bench: 365 ns/iter (+/- 1) = 139 MB/s +test misc::one_pass_long_prefix ... bench: 155 ns/iter (+/- 0) = 167 MB/s +test misc::one_pass_long_prefix_not ... bench: 162 ns/iter (+/- 0) = 160 MB/s +test misc::one_pass_short ... bench: 279 ns/iter (+/- 0) = 60 MB/s +test misc::one_pass_short_not ... bench: 269 ns/iter (+/- 3) = 63 MB/s +test misc::reallyhard2_1K ... bench: 227,630 ns/iter (+/- 963) = 4 MB/s +test misc::reallyhard_1K ... bench: 106,964 ns/iter (+/- 1,199) = 9 MB/s +test misc::reallyhard_1MB ... bench: 114,622,989 ns/iter (+/- 206,430) = 9 MB/s +test misc::reallyhard_32 ... bench: 3,477 ns/iter (+/- 13) = 16 MB/s +test misc::reallyhard_32K ... bench: 3,580,927 ns/iter (+/- 15,784) = 9 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 23,518 ns/iter (+/- 105) = 340 MB/s +test regexdna::find_new_lines ... bench: 33,300,039 ns/iter (+/- 827,837) = 152 MB/s +test regexdna::subst1 ... bench: 22,829,688 ns/iter (+/- 81,653) = 222 MB/s +test regexdna::subst10 ... bench: 22,843,694 ns/iter (+/- 94,299) = 222 MB/s +test regexdna::subst11 ... bench: 22,827,872 ns/iter (+/- 84,129) = 222 MB/s +test regexdna::subst2 ... bench: 22,841,925 ns/iter (+/- 84,394) = 222 MB/s +test regexdna::subst3 ... bench: 22,885,409 ns/iter (+/- 114,277) = 222 MB/s +test regexdna::subst4 ... bench: 22,837,475 ns/iter (+/- 58,938) = 222 MB/s +test regexdna::subst5 ... bench: 22,835,207 ns/iter (+/- 39,862) = 222 MB/s +test regexdna::subst6 ... bench: 22,833,199 ns/iter (+/- 77,142) = 222 MB/s +test regexdna::subst7 ... bench: 22,851,757 ns/iter (+/- 322,186) = 222 MB/s +test regexdna::subst8 ... bench: 22,842,892 ns/iter (+/- 86,166) = 222 MB/s +test regexdna::subst9 ... bench: 22,840,862 ns/iter (+/- 105,926) = 222 MB/s +test regexdna::variant1 ... bench: 91,691,325 ns/iter (+/- 194,247) = 55 MB/s +test regexdna::variant2 ... bench: 105,586,659 ns/iter (+/- 320,354) = 48 MB/s +test regexdna::variant3 ... bench: 94,437,485 ns/iter (+/- 277,744) = 53 MB/s +test regexdna::variant4 ... bench: 90,399,600 ns/iter (+/- 184,588) = 56 MB/s +test regexdna::variant5 ... bench: 90,332,232 ns/iter (+/- 174,254) = 56 MB/s +test regexdna::variant6 ... bench: 90,519,504 ns/iter (+/- 227,643) = 56 MB/s +test regexdna::variant7 ... bench: 90,881,562 ns/iter (+/- 221,861) = 55 MB/s +test regexdna::variant8 ... bench: 96,962,980 ns/iter (+/- 180,002) = 52 MB/s +test regexdna::variant9 ... bench: 109,558,711 ns/iter (+/- 166,337) = 46 MB/s +test sherlock::before_after_holmes ... bench: 31,530,493 ns/iter (+/- 112,639) = 18 MB/s +test sherlock::before_holmes ... bench: 30,420,729 ns/iter (+/- 114,072) = 19 MB/s +test sherlock::everything_greedy ... bench: 6,656,677 ns/iter (+/- 167,110) = 89 MB/s +test sherlock::holmes_cochar_watson ... bench: 1,992,839 ns/iter (+/- 8,037) = 298 MB/s +test sherlock::ing_suffix ... bench: 15,878,331 ns/iter (+/- 150,901) = 37 MB/s +test sherlock::ing_suffix_limited_space ... bench: 2,085,471 ns/iter (+/- 8,438) = 285 MB/s +test sherlock::letters ... bench: 89,091,241 ns/iter (+/- 182,225) = 6 MB/s +test sherlock::letters_lower ... bench: 55,634,237 ns/iter (+/- 115,097) = 10 MB/s +test sherlock::letters_upper ... bench: 10,126,641 ns/iter (+/- 36,015) = 58 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 191,963 ns/iter (+/- 687) = 3099 MB/s +test sherlock::name_alt1 ... bench: 1,983,070 ns/iter (+/- 5,863) = 300 MB/s +test sherlock::name_alt2 ... bench: 1,972,746 ns/iter (+/- 14,082) = 301 MB/s +test sherlock::name_alt3 ... bench: 2,424,033 ns/iter (+/- 13,209) = 245 MB/s +test sherlock::name_alt3_nocase ... bench: 16,876,942 ns/iter (+/- 77,218) = 35 MB/s +test sherlock::name_alt4 ... bench: 1,986,579 ns/iter (+/- 9,195) = 299 MB/s +test sherlock::name_alt4_nocase ... bench: 4,992,277 ns/iter (+/- 10,882) = 119 MB/s +test sherlock::name_alt5 ... bench: 2,033,937 ns/iter (+/- 13,627) = 292 MB/s +test sherlock::name_alt5_nocase ... bench: 6,292,627 ns/iter (+/- 14,666) = 94 MB/s +test sherlock::name_holmes ... bench: 450,290 ns/iter (+/- 1,882) = 1321 MB/s +test sherlock::name_holmes_nocase ... bench: 3,032,489 ns/iter (+/- 8,728) = 196 MB/s +test sherlock::name_sherlock ... bench: 265,379 ns/iter (+/- 865) = 2241 MB/s +test sherlock::name_sherlock_holmes ... bench: 201,375 ns/iter (+/- 2,146) = 2954 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 3,010,059 ns/iter (+/- 7,093) = 197 MB/s +test sherlock::name_sherlock_nocase ... bench: 3,016,713 ns/iter (+/- 11,280) = 197 MB/s +test sherlock::name_whitespace ... bench: 266,706 ns/iter (+/- 908) = 2230 MB/s +test sherlock::no_match_common ... bench: 544,428 ns/iter (+/- 7,562) = 1092 MB/s +test sherlock::no_match_really_common ... bench: 626,986 ns/iter (+/- 2,959) = 948 MB/s +test sherlock::no_match_uncommon ... bench: 534,517 ns/iter (+/- 4,342) = 1113 MB/s +test sherlock::quotes ... bench: 3,210,614 ns/iter (+/- 15,699) = 185 MB/s +test sherlock::repeated_class_negation ... bench: 31,147,103 ns/iter (+/- 117,471) = 19 MB/s +test sherlock::the_lower ... bench: 2,275,468 ns/iter (+/- 19,220) = 261 MB/s +test sherlock::the_nocase ... bench: 4,999,086 ns/iter (+/- 20,184) = 119 MB/s +test sherlock::the_upper ... bench: 893,288 ns/iter (+/- 11,368) = 666 MB/s +test sherlock::the_whitespace ... bench: 2,364,893 ns/iter (+/- 21,124) = 251 MB/s +test sherlock::word_ending_n ... bench: 18,221,921 ns/iter (+/- 62,927) = 32 MB/s +test sherlock::words ... bench: 27,552,543 ns/iter (+/- 89,437) = 21 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 94 measured; 0 filtered out + diff --git a/third_party/rust/regex/record/old-bench-log/07/pcre1 b/third_party/rust/regex/record/old-bench-log/07/pcre1 new file mode 100644 index 0000000000..a28d3cb665 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/07/pcre1 @@ -0,0 +1,98 @@ + +running 93 tests +test misc::anchored_literal_long_match ... bench: 23 ns/iter (+/- 0) = 16956 MB/s +test misc::anchored_literal_long_non_match ... bench: 19 ns/iter (+/- 0) = 20526 MB/s +test misc::anchored_literal_short_match ... bench: 23 ns/iter (+/- 0) = 1130 MB/s +test misc::anchored_literal_short_non_match ... bench: 19 ns/iter (+/- 0) = 1368 MB/s +test misc::easy0_1K ... bench: 223 ns/iter (+/- 2) = 4713 MB/s +test misc::easy0_1MB ... bench: 178,098 ns/iter (+/- 3,124) = 5887 MB/s +test misc::easy0_32 ... bench: 39 ns/iter (+/- 0) = 1512 MB/s +test misc::easy0_32K ... bench: 5,600 ns/iter (+/- 27) = 5856 MB/s +test misc::easy1_1K ... bench: 210 ns/iter (+/- 7) = 4971 MB/s +test misc::easy1_1MB ... bench: 178,177 ns/iter (+/- 1,024) = 5885 MB/s +test misc::easy1_32 ... bench: 40 ns/iter (+/- 0) = 1300 MB/s +test misc::easy1_32K ... bench: 5,592 ns/iter (+/- 52) = 5863 MB/s +test misc::hard_1K ... bench: 1,223 ns/iter (+/- 14) = 859 MB/s +test misc::hard_1MB ... bench: 983,169 ns/iter (+/- 13,398) = 1066 MB/s +test misc::hard_32 ... bench: 99 ns/iter (+/- 0) = 595 MB/s +test misc::hard_32K ... bench: 31,422 ns/iter (+/- 326) = 1043 MB/s +test misc::literal ... bench: 23 ns/iter (+/- 0) = 2217 MB/s +test misc::long_needle1 ... bench: 464,932 ns/iter (+/- 1,869) = 215 MB/s +test misc::long_needle2 ... bench: 462,587 ns/iter (+/- 6,375) = 216 MB/s +test misc::match_class ... bench: 73 ns/iter (+/- 0) = 1109 MB/s +test misc::match_class_in_range ... bench: 25 ns/iter (+/- 0) = 3240 MB/s +test misc::match_class_unicode ... bench: 263 ns/iter (+/- 2) = 612 MB/s +test misc::medium_1K ... bench: 213 ns/iter (+/- 3) = 4938 MB/s +test misc::medium_1MB ... bench: 178,077 ns/iter (+/- 1,844) = 5888 MB/s +test misc::medium_32 ... bench: 48 ns/iter (+/- 0) = 1250 MB/s +test misc::medium_32K ... bench: 5,598 ns/iter (+/- 38) = 5858 MB/s +test misc::not_literal ... bench: 131 ns/iter (+/- 0) = 389 MB/s +test misc::one_pass_long_prefix ... bench: 22 ns/iter (+/- 0) = 1181 MB/s +test misc::one_pass_long_prefix_not ... bench: 22 ns/iter (+/- 0) = 1181 MB/s +test misc::one_pass_short ... bench: 44 ns/iter (+/- 0) = 386 MB/s +test misc::one_pass_short_not ... bench: 44 ns/iter (+/- 0) = 386 MB/s +test misc::reallyhard2_1K ... bench: 3,503 ns/iter (+/- 29) = 296 MB/s +test misc::reallyhard_1K ... bench: 1,276 ns/iter (+/- 14) = 823 MB/s +test misc::reallyhard_1MB ... bench: 1,003,152 ns/iter (+/- 10,884) = 1045 MB/s +test misc::reallyhard_32 ... bench: 102 ns/iter (+/- 7) = 578 MB/s +test misc::reallyhard_32K ... bench: 31,035 ns/iter (+/- 221) = 1056 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 3,924 ns/iter (+/- 23) = 2038 MB/s +test regexdna::find_new_lines ... bench: 2,398,578 ns/iter (+/- 28,663) = 2119 MB/s +test regexdna::subst1 ... bench: 1,073,632 ns/iter (+/- 7,567) = 4734 MB/s +test regexdna::subst10 ... bench: 1,068,696 ns/iter (+/- 14,896) = 4756 MB/s +test regexdna::subst11 ... bench: 1,071,991 ns/iter (+/- 21,623) = 4742 MB/s +test regexdna::subst2 ... bench: 1,064,244 ns/iter (+/- 22,701) = 4776 MB/s +test regexdna::subst3 ... bench: 1,081,402 ns/iter (+/- 25,919) = 4700 MB/s +test regexdna::subst4 ... bench: 1,078,319 ns/iter (+/- 8,278) = 4714 MB/s +test regexdna::subst5 ... bench: 1,067,600 ns/iter (+/- 6,079) = 4761 MB/s +test regexdna::subst6 ... bench: 1,073,509 ns/iter (+/- 8,068) = 4735 MB/s +test regexdna::subst7 ... bench: 1,075,462 ns/iter (+/- 9,145) = 4726 MB/s +test regexdna::subst8 ... bench: 1,073,592 ns/iter (+/- 10,284) = 4734 MB/s +test regexdna::subst9 ... bench: 1,074,960 ns/iter (+/- 11,802) = 4728 MB/s +test regexdna::variant1 ... bench: 14,120,901 ns/iter (+/- 85,462) = 359 MB/s +test regexdna::variant2 ... bench: 15,606,152 ns/iter (+/- 128,452) = 325 MB/s +test regexdna::variant3 ... bench: 18,892,502 ns/iter (+/- 82,790) = 269 MB/s +test regexdna::variant4 ... bench: 17,988,621 ns/iter (+/- 50,462) = 282 MB/s +test regexdna::variant5 ... bench: 15,854,890 ns/iter (+/- 54,745) = 320 MB/s +test regexdna::variant6 ... bench: 16,126,069 ns/iter (+/- 76,013) = 315 MB/s +test regexdna::variant7 ... bench: 17,997,470 ns/iter (+/- 94,472) = 282 MB/s +test regexdna::variant8 ... bench: 23,004,949 ns/iter (+/- 81,626) = 220 MB/s +test regexdna::variant9 ... bench: 20,272,633 ns/iter (+/- 99,674) = 250 MB/s +test sherlock::before_after_holmes ... bench: 3,660,138 ns/iter (+/- 41,095) = 162 MB/s +test sherlock::before_holmes ... bench: 3,632,955 ns/iter (+/- 25,761) = 163 MB/s +test sherlock::holmes_cochar_watson ... bench: 458,639 ns/iter (+/- 9,185) = 1297 MB/s +test sherlock::ing_suffix ... bench: 1,746,052 ns/iter (+/- 31,762) = 340 MB/s +test sherlock::ing_suffix_limited_space ... bench: 4,067,141 ns/iter (+/- 12,943) = 146 MB/s +test sherlock::letters ... bench: 11,360,188 ns/iter (+/- 22,264) = 52 MB/s +test sherlock::letters_lower ... bench: 11,137,940 ns/iter (+/- 35,225) = 53 MB/s +test sherlock::letters_upper ... bench: 1,505,435 ns/iter (+/- 10,318) = 395 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 181,084 ns/iter (+/- 3,121) = 3285 MB/s +test sherlock::name_alt1 ... bench: 427,474 ns/iter (+/- 1,601) = 1391 MB/s +test sherlock::name_alt2 ... bench: 434,858 ns/iter (+/- 6,444) = 1368 MB/s +test sherlock::name_alt3 ... bench: 747,274 ns/iter (+/- 7,303) = 796 MB/s +test sherlock::name_alt3_nocase ... bench: 2,574,102 ns/iter (+/- 44,203) = 231 MB/s +test sherlock::name_alt4 ... bench: 66,428 ns/iter (+/- 336) = 8956 MB/s +test sherlock::name_alt4_nocase ... bench: 1,333,932 ns/iter (+/- 6,683) = 445 MB/s +test sherlock::name_alt5 ... bench: 598,062 ns/iter (+/- 4,936) = 994 MB/s +test sherlock::name_alt5_nocase ... bench: 1,496,292 ns/iter (+/- 6,595) = 397 MB/s +test sherlock::name_holmes ... bench: 359,203 ns/iter (+/- 6,202) = 1656 MB/s +test sherlock::name_holmes_nocase ... bench: 454,624 ns/iter (+/- 2,658) = 1308 MB/s +test sherlock::name_sherlock ... bench: 243,450 ns/iter (+/- 2,435) = 2443 MB/s +test sherlock::name_sherlock_holmes ... bench: 182,407 ns/iter (+/- 878) = 3261 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 940,244 ns/iter (+/- 6,064) = 632 MB/s +test sherlock::name_sherlock_nocase ... bench: 904,285 ns/iter (+/- 9,405) = 657 MB/s +test sherlock::name_whitespace ... bench: 244,114 ns/iter (+/- 1,875) = 2437 MB/s +test sherlock::no_match_common ... bench: 358,735 ns/iter (+/- 4,090) = 1658 MB/s +test sherlock::no_match_really_common ... bench: 348,964 ns/iter (+/- 6,060) = 1704 MB/s +test sherlock::no_match_uncommon ... bench: 21,256 ns/iter (+/- 144) = 27988 MB/s +test sherlock::quotes ... bench: 422,149 ns/iter (+/- 1,540) = 1409 MB/s +test sherlock::repeated_class_negation ... bench: 5,232,683 ns/iter (+/- 21,609) = 113 MB/s +test sherlock::the_lower ... bench: 651,539 ns/iter (+/- 1,763) = 913 MB/s +test sherlock::the_nocase ... bench: 693,506 ns/iter (+/- 13,143) = 857 MB/s +test sherlock::the_upper ... bench: 46,904 ns/iter (+/- 657) = 12684 MB/s +test sherlock::the_whitespace ... bench: 788,070 ns/iter (+/- 17,403) = 754 MB/s +test sherlock::word_ending_n ... bench: 4,545,774 ns/iter (+/- 26,965) = 130 MB/s +test sherlock::words ... bench: 5,493,039 ns/iter (+/- 16,767) = 108 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 93 measured; 0 filtered out + diff --git a/third_party/rust/regex/record/old-bench-log/07/pcre2 b/third_party/rust/regex/record/old-bench-log/07/pcre2 new file mode 100644 index 0000000000..c2bbd39e2f --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/07/pcre2 @@ -0,0 +1,98 @@ + +running 93 tests +test misc::anchored_literal_long_match ... bench: 15 ns/iter (+/- 0) = 26000 MB/s +test misc::anchored_literal_long_non_match ... bench: 12 ns/iter (+/- 0) = 32500 MB/s +test misc::anchored_literal_short_match ... bench: 14 ns/iter (+/- 0) = 1857 MB/s +test misc::anchored_literal_short_non_match ... bench: 11 ns/iter (+/- 0) = 2363 MB/s +test misc::easy0_1K ... bench: 81 ns/iter (+/- 4) = 12975 MB/s +test misc::easy0_1MB ... bench: 60,199 ns/iter (+/- 658) = 17418 MB/s +test misc::easy0_32 ... bench: 28 ns/iter (+/- 0) = 2107 MB/s +test misc::easy0_32K ... bench: 1,878 ns/iter (+/- 25) = 17462 MB/s +test misc::easy1_1K ... bench: 81 ns/iter (+/- 0) = 12888 MB/s +test misc::easy1_1MB ... bench: 59,222 ns/iter (+/- 598) = 17706 MB/s +test misc::easy1_32 ... bench: 28 ns/iter (+/- 0) = 1857 MB/s +test misc::easy1_32K ... bench: 1,819 ns/iter (+/- 6) = 18025 MB/s +test misc::hard_1K ... bench: 1,147 ns/iter (+/- 13) = 916 MB/s +test misc::hard_1MB ... bench: 990,924 ns/iter (+/- 6,065) = 1058 MB/s +test misc::hard_32 ... bench: 82 ns/iter (+/- 3) = 719 MB/s +test misc::hard_32K ... bench: 32,218 ns/iter (+/- 471) = 1017 MB/s +test misc::literal ... bench: 15 ns/iter (+/- 0) = 3400 MB/s +test misc::long_needle1 ... bench: 464,061 ns/iter (+/- 2,241) = 215 MB/s +test misc::long_needle2 ... bench: 465,191 ns/iter (+/- 823) = 214 MB/s +test misc::match_class ... bench: 46 ns/iter (+/- 1) = 1760 MB/s +test misc::match_class_in_range ... bench: 16 ns/iter (+/- 0) = 5062 MB/s +test misc::match_class_unicode ... bench: 246 ns/iter (+/- 0) = 654 MB/s +test misc::medium_1K ... bench: 102 ns/iter (+/- 9) = 10313 MB/s +test misc::medium_1MB ... bench: 60,042 ns/iter (+/- 585) = 17464 MB/s +test misc::medium_32 ... bench: 29 ns/iter (+/- 1) = 2068 MB/s +test misc::medium_32K ... bench: 1,901 ns/iter (+/- 23) = 17251 MB/s +test misc::not_literal ... bench: 122 ns/iter (+/- 2) = 418 MB/s +test misc::one_pass_long_prefix ... bench: 13 ns/iter (+/- 0) = 2000 MB/s +test misc::one_pass_long_prefix_not ... bench: 13 ns/iter (+/- 0) = 2000 MB/s +test misc::one_pass_short ... bench: 36 ns/iter (+/- 0) = 472 MB/s +test misc::one_pass_short_not ... bench: 36 ns/iter (+/- 0) = 472 MB/s +test misc::reallyhard2_1K ... bench: 3,517 ns/iter (+/- 39) = 295 MB/s +test misc::reallyhard_1K ... bench: 1,123 ns/iter (+/- 12) = 935 MB/s +test misc::reallyhard_1MB ... bench: 992,521 ns/iter (+/- 13,407) = 1056 MB/s +test misc::reallyhard_32 ... bench: 71 ns/iter (+/- 0) = 830 MB/s +test misc::reallyhard_32K ... bench: 30,626 ns/iter (+/- 206) = 1070 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 2,824 ns/iter (+/- 21) = 2832 MB/s +test regexdna::find_new_lines ... bench: 1,500,377 ns/iter (+/- 8,152) = 3388 MB/s +test regexdna::subst1 ... bench: 815,769 ns/iter (+/- 14,286) = 6231 MB/s +test regexdna::subst10 ... bench: 820,459 ns/iter (+/- 57,098) = 6195 MB/s +test regexdna::subst11 ... bench: 810,986 ns/iter (+/- 4,270) = 6268 MB/s +test regexdna::subst2 ... bench: 815,568 ns/iter (+/- 35,148) = 6232 MB/s +test regexdna::subst3 ... bench: 812,590 ns/iter (+/- 6,782) = 6255 MB/s +test regexdna::subst4 ... bench: 831,679 ns/iter (+/- 12,372) = 6112 MB/s +test regexdna::subst5 ... bench: 823,207 ns/iter (+/- 12,977) = 6175 MB/s +test regexdna::subst6 ... bench: 815,506 ns/iter (+/- 11,610) = 6233 MB/s +test regexdna::subst7 ... bench: 818,104 ns/iter (+/- 4,807) = 6213 MB/s +test regexdna::subst8 ... bench: 815,265 ns/iter (+/- 21,504) = 6235 MB/s +test regexdna::subst9 ... bench: 809,236 ns/iter (+/- 7,003) = 6281 MB/s +test regexdna::variant1 ... bench: 8,375,573 ns/iter (+/- 80,345) = 606 MB/s +test regexdna::variant2 ... bench: 11,207,698 ns/iter (+/- 45,582) = 453 MB/s +test regexdna::variant3 ... bench: 10,505,744 ns/iter (+/- 69,756) = 483 MB/s +test regexdna::variant4 ... bench: 9,276,177 ns/iter (+/- 50,904) = 548 MB/s +test regexdna::variant5 ... bench: 9,333,446 ns/iter (+/- 41,108) = 544 MB/s +test regexdna::variant6 ... bench: 9,865,395 ns/iter (+/- 26,010) = 515 MB/s +test regexdna::variant7 ... bench: 10,033,179 ns/iter (+/- 28,272) = 506 MB/s +test regexdna::variant8 ... bench: 10,752,604 ns/iter (+/- 37,714) = 472 MB/s +test regexdna::variant9 ... bench: 11,397,272 ns/iter (+/- 41,200) = 446 MB/s +test sherlock::before_after_holmes ... bench: 3,627,616 ns/iter (+/- 18,202) = 164 MB/s +test sherlock::before_holmes ... bench: 3,614,713 ns/iter (+/- 18,191) = 164 MB/s +test sherlock::holmes_cochar_watson ... bench: 68,419 ns/iter (+/- 918) = 8695 MB/s +test sherlock::ing_suffix ... bench: 1,766,571 ns/iter (+/- 16,612) = 336 MB/s +test sherlock::ing_suffix_limited_space ... bench: 4,018,396 ns/iter (+/- 11,822) = 148 MB/s +test sherlock::letters ... bench: 8,058,390 ns/iter (+/- 39,083) = 73 MB/s +test sherlock::letters_lower ... bench: 8,014,051 ns/iter (+/- 33,500) = 74 MB/s +test sherlock::letters_upper ... bench: 1,452,421 ns/iter (+/- 157,023) = 409 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 36,248 ns/iter (+/- 252) = 16412 MB/s +test sherlock::name_alt1 ... bench: 45,538 ns/iter (+/- 235) = 13064 MB/s +test sherlock::name_alt2 ... bench: 62,202 ns/iter (+/- 892) = 9564 MB/s +test sherlock::name_alt3 ... bench: 623,900 ns/iter (+/- 3,139) = 953 MB/s +test sherlock::name_alt3_nocase ... bench: 2,518,464 ns/iter (+/- 31,943) = 236 MB/s +test sherlock::name_alt4 ... bench: 62,015 ns/iter (+/- 712) = 9593 MB/s +test sherlock::name_alt4_nocase ... bench: 1,162,489 ns/iter (+/- 14,622) = 511 MB/s +test sherlock::name_alt5 ... bench: 589,686 ns/iter (+/- 6,775) = 1008 MB/s +test sherlock::name_alt5_nocase ... bench: 1,359,066 ns/iter (+/- 7,487) = 437 MB/s +test sherlock::name_holmes ... bench: 45,993 ns/iter (+/- 812) = 12935 MB/s +test sherlock::name_holmes_nocase ... bench: 82,326 ns/iter (+/- 758) = 7226 MB/s +test sherlock::name_sherlock ... bench: 36,848 ns/iter (+/- 50) = 16145 MB/s +test sherlock::name_sherlock_holmes ... bench: 36,778 ns/iter (+/- 621) = 16176 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 636,825 ns/iter (+/- 2,957) = 934 MB/s +test sherlock::name_sherlock_nocase ... bench: 635,313 ns/iter (+/- 10,776) = 936 MB/s +test sherlock::name_whitespace ... bench: 37,360 ns/iter (+/- 132) = 15924 MB/s +test sherlock::no_match_common ... bench: 34,545 ns/iter (+/- 239) = 17221 MB/s +test sherlock::no_match_really_common ... bench: 49,019 ns/iter (+/- 590) = 12136 MB/s +test sherlock::no_match_uncommon ... bench: 34,410 ns/iter (+/- 182) = 17289 MB/s +test sherlock::quotes ... bench: 414,599 ns/iter (+/- 3,528) = 1434 MB/s +test sherlock::repeated_class_negation ... bench: 5,106,885 ns/iter (+/- 23,660) = 116 MB/s +test sherlock::the_lower ... bench: 234,135 ns/iter (+/- 3,821) = 2540 MB/s +test sherlock::the_nocase ... bench: 261,765 ns/iter (+/- 2,272) = 2272 MB/s +test sherlock::the_upper ... bench: 50,816 ns/iter (+/- 583) = 11707 MB/s +test sherlock::the_whitespace ... bench: 408,355 ns/iter (+/- 5,463) = 1456 MB/s +test sherlock::word_ending_n ... bench: 4,367,721 ns/iter (+/- 55,474) = 136 MB/s +test sherlock::words ... bench: 4,640,171 ns/iter (+/- 20,462) = 128 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 93 measured; 0 filtered out + diff --git a/third_party/rust/regex/record/old-bench-log/07/re2 b/third_party/rust/regex/record/old-bench-log/07/re2 new file mode 100644 index 0000000000..6888bea06a --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/07/re2 @@ -0,0 +1,101 @@ + +running 96 tests +test misc::anchored_literal_long_match ... bench: 69 ns/iter (+/- 0) = 5652 MB/s +test misc::anchored_literal_long_non_match ... bench: 15 ns/iter (+/- 0) = 26000 MB/s +test misc::anchored_literal_short_match ... bench: 69 ns/iter (+/- 0) = 376 MB/s +test misc::anchored_literal_short_non_match ... bench: 15 ns/iter (+/- 0) = 1733 MB/s +test misc::easy0_1K ... bench: 106 ns/iter (+/- 0) = 9915 MB/s +test misc::easy0_1MB ... bench: 15,311 ns/iter (+/- 113) = 68486 MB/s +test misc::easy0_32 ... bench: 100 ns/iter (+/- 3) = 590 MB/s +test misc::easy0_32K ... bench: 426 ns/iter (+/- 1) = 76983 MB/s +test misc::easy1_1K ... bench: 98 ns/iter (+/- 0) = 10653 MB/s +test misc::easy1_1MB ... bench: 15,299 ns/iter (+/- 136) = 68540 MB/s +test misc::easy1_32 ... bench: 91 ns/iter (+/- 0) = 571 MB/s +test misc::easy1_32K ... bench: 413 ns/iter (+/- 5) = 79389 MB/s +test misc::hard_1K ... bench: 1,815 ns/iter (+/- 43) = 579 MB/s +test misc::hard_1MB ... bench: 1,842,293 ns/iter (+/- 10,227) = 569 MB/s +test misc::hard_32 ... bench: 146 ns/iter (+/- 4) = 404 MB/s +test misc::hard_32K ... bench: 57,638 ns/iter (+/- 481) = 568 MB/s +test misc::literal ... bench: 64 ns/iter (+/- 1) = 796 MB/s +test misc::long_needle1 ... bench: 122,154 ns/iter (+/- 840) = 818 MB/s +test misc::long_needle2 ... bench: 122,105 ns/iter (+/- 578) = 818 MB/s +test misc::match_class ... bench: 178 ns/iter (+/- 0) = 455 MB/s +test misc::match_class_in_range ... bench: 178 ns/iter (+/- 2) = 455 MB/s +test misc::match_class_unicode ... bench: 293 ns/iter (+/- 2) = 549 MB/s +test misc::medium_1K ... bench: 1,610 ns/iter (+/- 26) = 653 MB/s +test misc::medium_1MB ... bench: 1,537,932 ns/iter (+/- 4,134) = 681 MB/s +test misc::medium_32 ... bench: 158 ns/iter (+/- 1) = 379 MB/s +test misc::medium_32K ... bench: 48,172 ns/iter (+/- 390) = 680 MB/s +test misc::no_exponential ... bench: 216 ns/iter (+/- 1) = 462 MB/s +test misc::not_literal ... bench: 127 ns/iter (+/- 1) = 401 MB/s +test misc::one_pass_long_prefix ... bench: 64 ns/iter (+/- 0) = 406 MB/s +test misc::one_pass_long_prefix_not ... bench: 100 ns/iter (+/- 1) = 260 MB/s +test misc::one_pass_short ... bench: 88 ns/iter (+/- 0) = 193 MB/s +test misc::one_pass_short_not ... bench: 86 ns/iter (+/- 0) = 197 MB/s +test misc::reallyhard2_1K ... bench: 1,332 ns/iter (+/- 41) = 780 MB/s +test misc::reallyhard_1K ... bench: 1,815 ns/iter (+/- 16) = 579 MB/s +test misc::reallyhard_1MB ... bench: 1,842,206 ns/iter (+/- 9,086) = 569 MB/s +test misc::reallyhard_32 ... bench: 149 ns/iter (+/- 1) = 395 MB/s +test misc::reallyhard_32K ... bench: 57,591 ns/iter (+/- 101) = 569 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 11,753 ns/iter (+/- 130) = 680 MB/s +test regexdna::find_new_lines ... bench: 24,330,235 ns/iter (+/- 374,274) = 208 MB/s +test regexdna::subst1 ... bench: 4,079,631 ns/iter (+/- 51,348) = 1246 MB/s +test regexdna::subst10 ... bench: 4,080,803 ns/iter (+/- 30,966) = 1245 MB/s +test regexdna::subst11 ... bench: 4,154,389 ns/iter (+/- 34,825) = 1223 MB/s +test regexdna::subst2 ... bench: 4,076,784 ns/iter (+/- 102,863) = 1246 MB/s +test regexdna::subst3 ... bench: 4,074,850 ns/iter (+/- 52,106) = 1247 MB/s +test regexdna::subst4 ... bench: 4,078,456 ns/iter (+/- 12,343) = 1246 MB/s +test regexdna::subst5 ... bench: 4,075,812 ns/iter (+/- 24,524) = 1247 MB/s +test regexdna::subst6 ... bench: 4,097,009 ns/iter (+/- 13,240) = 1240 MB/s +test regexdna::subst7 ... bench: 4,069,096 ns/iter (+/- 29,794) = 1249 MB/s +test regexdna::subst8 ... bench: 4,078,838 ns/iter (+/- 20,713) = 1246 MB/s +test regexdna::subst9 ... bench: 4,116,905 ns/iter (+/- 14,130) = 1234 MB/s +test regexdna::variant1 ... bench: 21,411,252 ns/iter (+/- 568,076) = 237 MB/s +test regexdna::variant2 ... bench: 21,082,571 ns/iter (+/- 92,912) = 241 MB/s +test regexdna::variant3 ... bench: 20,302,954 ns/iter (+/- 118,421) = 250 MB/s +test regexdna::variant4 ... bench: 21,290,669 ns/iter (+/- 102,527) = 238 MB/s +test regexdna::variant5 ... bench: 21,451,671 ns/iter (+/- 99,524) = 236 MB/s +test regexdna::variant6 ... bench: 21,057,017 ns/iter (+/- 530,904) = 241 MB/s +test regexdna::variant7 ... bench: 20,394,037 ns/iter (+/- 128,973) = 249 MB/s +test regexdna::variant8 ... bench: 17,839,069 ns/iter (+/- 122,671) = 284 MB/s +test regexdna::variant9 ... bench: 12,720,049 ns/iter (+/- 76,816) = 399 MB/s +test sherlock::before_after_holmes ... bench: 1,044,129 ns/iter (+/- 4,967) = 569 MB/s +test sherlock::before_holmes ... bench: 1,067,879 ns/iter (+/- 11,345) = 557 MB/s +test sherlock::everything_greedy ... bench: 5,193,222 ns/iter (+/- 10,990) = 114 MB/s +test sherlock::everything_greedy_nl ... bench: 2,038,599 ns/iter (+/- 18,946) = 291 MB/s +test sherlock::holmes_cochar_watson ... bench: 909,342 ns/iter (+/- 5,295) = 654 MB/s +test sherlock::holmes_coword_watson ... bench: 939,154 ns/iter (+/- 6,087) = 633 MB/s +test sherlock::ing_suffix ... bench: 2,729,081 ns/iter (+/- 15,969) = 217 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,373,143 ns/iter (+/- 17,068) = 433 MB/s +test sherlock::letters ... bench: 56,266,035 ns/iter (+/- 165,398) = 10 MB/s +test sherlock::letters_lower ... bench: 54,590,671 ns/iter (+/- 138,842) = 10 MB/s +test sherlock::letters_upper ... bench: 2,702,242 ns/iter (+/- 9,889) = 220 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 2,430,065 ns/iter (+/- 27,756) = 244 MB/s +test sherlock::name_alt1 ... bench: 45,514 ns/iter (+/- 403) = 13071 MB/s +test sherlock::name_alt2 ... bench: 975,861 ns/iter (+/- 11,553) = 609 MB/s +test sherlock::name_alt3 ... bench: 1,070,967 ns/iter (+/- 11,065) = 555 MB/s +test sherlock::name_alt3_nocase ... bench: 2,574,585 ns/iter (+/- 39,816) = 231 MB/s +test sherlock::name_alt4 ... bench: 978,776 ns/iter (+/- 25,503) = 607 MB/s +test sherlock::name_alt4_nocase ... bench: 1,643,230 ns/iter (+/- 27,685) = 362 MB/s +test sherlock::name_alt5 ... bench: 998,349 ns/iter (+/- 6,658) = 595 MB/s +test sherlock::name_alt5_nocase ... bench: 1,781,006 ns/iter (+/- 22,507) = 334 MB/s +test sherlock::name_holmes ... bench: 92,561 ns/iter (+/- 1,358) = 6427 MB/s +test sherlock::name_holmes_nocase ... bench: 1,281,827 ns/iter (+/- 7,651) = 464 MB/s +test sherlock::name_sherlock ... bench: 31,994 ns/iter (+/- 326) = 18595 MB/s +test sherlock::name_sherlock_holmes ... bench: 34,272 ns/iter (+/- 445) = 17359 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,218,006 ns/iter (+/- 19,301) = 488 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,214,744 ns/iter (+/- 11,473) = 489 MB/s +test sherlock::name_whitespace ... bench: 35,455 ns/iter (+/- 412) = 16779 MB/s +test sherlock::no_match_common ... bench: 299,771 ns/iter (+/- 7,799) = 1984 MB/s +test sherlock::no_match_really_common ... bench: 299,595 ns/iter (+/- 926) = 1985 MB/s +test sherlock::no_match_uncommon ... bench: 9,803 ns/iter (+/- 139) = 60688 MB/s +test sherlock::quotes ... bench: 1,033,423 ns/iter (+/- 9,177) = 575 MB/s +test sherlock::the_lower ... bench: 1,454,358 ns/iter (+/- 75,304) = 409 MB/s +test sherlock::the_nocase ... bench: 2,486,681 ns/iter (+/- 9,026) = 239 MB/s +test sherlock::the_upper ... bench: 123,989 ns/iter (+/- 1,097) = 4798 MB/s +test sherlock::the_whitespace ... bench: 1,454,732 ns/iter (+/- 118,006) = 408 MB/s +test sherlock::word_ending_n ... bench: 1,922,008 ns/iter (+/- 15,040) = 309 MB/s +test sherlock::words ... bench: 16,054,888 ns/iter (+/- 90,684) = 37 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 96 measured; 0 filtered out + diff --git a/third_party/rust/regex/record/old-bench-log/07/rust b/third_party/rust/regex/record/old-bench-log/07/rust new file mode 100644 index 0000000000..f5e73a2c8a --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/07/rust @@ -0,0 +1,113 @@ + +running 108 tests +test misc::anchored_literal_long_match ... bench: 15 ns/iter (+/- 0) = 26000 MB/s +test misc::anchored_literal_long_non_match ... bench: 16 ns/iter (+/- 0) = 24375 MB/s +test misc::anchored_literal_short_match ... bench: 14 ns/iter (+/- 0) = 1857 MB/s +test misc::anchored_literal_short_non_match ... bench: 16 ns/iter (+/- 0) = 1625 MB/s +test misc::easy0_1K ... bench: 11 ns/iter (+/- 0) = 95545 MB/s +test misc::easy0_1MB ... bench: 15 ns/iter (+/- 0) = 69906866 MB/s +test misc::easy0_32 ... bench: 11 ns/iter (+/- 0) = 5363 MB/s +test misc::easy0_32K ... bench: 11 ns/iter (+/- 0) = 2981363 MB/s +test misc::easy1_1K ... bench: 36 ns/iter (+/- 0) = 29000 MB/s +test misc::easy1_1MB ... bench: 38 ns/iter (+/- 0) = 27594631 MB/s +test misc::easy1_32 ... bench: 39 ns/iter (+/- 0) = 1333 MB/s +test misc::easy1_32K ... bench: 36 ns/iter (+/- 0) = 910777 MB/s +test misc::hard_1K ... bench: 48 ns/iter (+/- 0) = 21895 MB/s +test misc::hard_1MB ... bench: 51 ns/iter (+/- 0) = 20560843 MB/s +test misc::hard_32 ... bench: 48 ns/iter (+/- 1) = 1229 MB/s +test misc::hard_32K ... bench: 48 ns/iter (+/- 0) = 683229 MB/s +test misc::literal ... bench: 10 ns/iter (+/- 0) = 5100 MB/s +test misc::long_needle1 ... bench: 956 ns/iter (+/- 14) = 104603 MB/s +test misc::long_needle2 ... bench: 538,237 ns/iter (+/- 8,739) = 185 MB/s +test misc::match_class ... bench: 57 ns/iter (+/- 0) = 1421 MB/s +test misc::match_class_in_range ... bench: 22 ns/iter (+/- 0) = 3681 MB/s +test misc::match_class_unicode ... bench: 245 ns/iter (+/- 4) = 657 MB/s +test misc::medium_1K ... bench: 13 ns/iter (+/- 0) = 80923 MB/s +test misc::medium_1MB ... bench: 15 ns/iter (+/- 0) = 69906933 MB/s +test misc::medium_32 ... bench: 12 ns/iter (+/- 0) = 5000 MB/s +test misc::medium_32K ... bench: 12 ns/iter (+/- 0) = 2733000 MB/s +test misc::no_exponential ... bench: 318 ns/iter (+/- 0) = 314 MB/s +test misc::not_literal ... bench: 85 ns/iter (+/- 0) = 600 MB/s +test misc::one_pass_long_prefix ... bench: 48 ns/iter (+/- 0) = 541 MB/s +test misc::one_pass_long_prefix_not ... bench: 48 ns/iter (+/- 0) = 541 MB/s +test misc::one_pass_short ... bench: 34 ns/iter (+/- 0) = 500 MB/s +test misc::one_pass_short_not ... bench: 37 ns/iter (+/- 0) = 459 MB/s +test misc::reallyhard2_1K ... bench: 51 ns/iter (+/- 0) = 20392 MB/s +test misc::reallyhard_1K ... bench: 1,547 ns/iter (+/- 19) = 679 MB/s +test misc::reallyhard_1MB ... bench: 1,533,883 ns/iter (+/- 9,553) = 683 MB/s +test misc::reallyhard_32 ... bench: 96 ns/iter (+/- 0) = 614 MB/s +test misc::reallyhard_32K ... bench: 47,989 ns/iter (+/- 198) = 683 MB/s +test misc::replace_all ... bench: 136 ns/iter (+/- 0) +test misc::reverse_suffix_no_quadratic ... bench: 4,016 ns/iter (+/- 21) = 1992 MB/s +test misc::short_haystack_1000000x ... bench: 64,033 ns/iter (+/- 470) = 124935 MB/s +test misc::short_haystack_100000x ... bench: 6,472 ns/iter (+/- 44) = 123611 MB/s +test misc::short_haystack_10000x ... bench: 1,906 ns/iter (+/- 49) = 41978 MB/s +test misc::short_haystack_1000x ... bench: 362 ns/iter (+/- 1) = 22129 MB/s +test misc::short_haystack_100x ... bench: 259 ns/iter (+/- 2) = 3131 MB/s +test misc::short_haystack_10x ... bench: 228 ns/iter (+/- 0) = 399 MB/s +test misc::short_haystack_1x ... bench: 223 ns/iter (+/- 2) = 85 MB/s +test misc::short_haystack_2x ... bench: 224 ns/iter (+/- 2) = 120 MB/s +test misc::short_haystack_3x ... bench: 221 ns/iter (+/- 2) = 158 MB/s +test misc::short_haystack_4x ... bench: 223 ns/iter (+/- 2) = 192 MB/s +test regexdna::find_new_lines ... bench: 11,885,905 ns/iter (+/- 23,501) = 427 MB/s +test regexdna::subst1 ... bench: 712,544 ns/iter (+/- 16,100) = 7134 MB/s +test regexdna::subst10 ... bench: 709,739 ns/iter (+/- 8,467) = 7162 MB/s +test regexdna::subst11 ... bench: 714,261 ns/iter (+/- 8,495) = 7117 MB/s +test regexdna::subst2 ... bench: 711,197 ns/iter (+/- 14,736) = 7147 MB/s +test regexdna::subst3 ... bench: 718,083 ns/iter (+/- 5,050) = 7079 MB/s +test regexdna::subst4 ... bench: 725,196 ns/iter (+/- 20,044) = 7009 MB/s +test regexdna::subst5 ... bench: 709,301 ns/iter (+/- 10,961) = 7166 MB/s +test regexdna::subst6 ... bench: 715,658 ns/iter (+/- 16,431) = 7103 MB/s +test regexdna::subst7 ... bench: 707,472 ns/iter (+/- 5,764) = 7185 MB/s +test regexdna::subst8 ... bench: 707,300 ns/iter (+/- 19,545) = 7187 MB/s +test regexdna::subst9 ... bench: 709,950 ns/iter (+/- 11,319) = 7160 MB/s +test regexdna::variant1 ... bench: 2,498,980 ns/iter (+/- 67,933) = 2034 MB/s +test regexdna::variant2 ... bench: 5,544,923 ns/iter (+/- 31,911) = 916 MB/s +test regexdna::variant3 ... bench: 6,441,568 ns/iter (+/- 20,197) = 789 MB/s +test regexdna::variant4 ... bench: 6,421,276 ns/iter (+/- 161,499) = 791 MB/s +test regexdna::variant5 ... bench: 5,093,567 ns/iter (+/- 18,696) = 998 MB/s +test regexdna::variant6 ... bench: 5,094,859 ns/iter (+/- 22,894) = 997 MB/s +test regexdna::variant7 ... bench: 4,540,111 ns/iter (+/- 11,863) = 1119 MB/s +test regexdna::variant8 ... bench: 4,636,741 ns/iter (+/- 23,448) = 1096 MB/s +test regexdna::variant9 ... bench: 4,557,500 ns/iter (+/- 16,168) = 1115 MB/s +test sherlock::before_after_holmes ... bench: 880,959 ns/iter (+/- 3,004) = 675 MB/s +test sherlock::before_holmes ... bench: 54,416 ns/iter (+/- 1,099) = 10933 MB/s +test sherlock::everything_greedy ... bench: 1,736,180 ns/iter (+/- 9,410) = 342 MB/s +test sherlock::everything_greedy_nl ... bench: 783,848 ns/iter (+/- 19,640) = 758 MB/s +test sherlock::holmes_cochar_watson ... bench: 90,085 ns/iter (+/- 499) = 6604 MB/s +test sherlock::holmes_coword_watson ... bench: 459,431 ns/iter (+/- 830) = 1294 MB/s +test sherlock::ing_suffix ... bench: 348,103 ns/iter (+/- 9,052) = 1709 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,017,387 ns/iter (+/- 3,906) = 584 MB/s +test sherlock::letters ... bench: 18,265,074 ns/iter (+/- 463,241) = 32 MB/s +test sherlock::letters_lower ... bench: 17,846,209 ns/iter (+/- 431,089) = 33 MB/s +test sherlock::letters_upper ... bench: 1,594,743 ns/iter (+/- 3,151) = 373 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 873,308 ns/iter (+/- 1,379) = 681 MB/s +test sherlock::name_alt1 ... bench: 21,144 ns/iter (+/- 315) = 28137 MB/s +test sherlock::name_alt2 ... bench: 71,354 ns/iter (+/- 1,432) = 8337 MB/s +test sherlock::name_alt3 ... bench: 79,167 ns/iter (+/- 294) = 7514 MB/s +test sherlock::name_alt3_nocase ... bench: 1,111,300 ns/iter (+/- 4,434) = 535 MB/s +test sherlock::name_alt4 ... bench: 100,864 ns/iter (+/- 570) = 5898 MB/s +test sherlock::name_alt4_nocase ... bench: 157,266 ns/iter (+/- 4,048) = 3782 MB/s +test sherlock::name_alt5 ... bench: 74,375 ns/iter (+/- 576) = 7999 MB/s +test sherlock::name_alt5_nocase ... bench: 467,879 ns/iter (+/- 2,115) = 1271 MB/s +test sherlock::name_holmes ... bench: 26,856 ns/iter (+/- 345) = 22152 MB/s +test sherlock::name_holmes_nocase ... bench: 124,140 ns/iter (+/- 1,111) = 4792 MB/s +test sherlock::name_sherlock ... bench: 52,330 ns/iter (+/- 316) = 11368 MB/s +test sherlock::name_sherlock_holmes ... bench: 19,646 ns/iter (+/- 355) = 30282 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 107,973 ns/iter (+/- 508) = 5510 MB/s +test sherlock::name_sherlock_nocase ... bench: 105,141 ns/iter (+/- 426) = 5658 MB/s +test sherlock::name_whitespace ... bench: 61,149 ns/iter (+/- 350) = 9729 MB/s +test sherlock::no_match_common ... bench: 11,735 ns/iter (+/- 185) = 50697 MB/s +test sherlock::no_match_really_common ... bench: 274,089 ns/iter (+/- 617) = 2170 MB/s +test sherlock::no_match_uncommon ... bench: 11,581 ns/iter (+/- 298) = 51371 MB/s +test sherlock::quotes ... bench: 447,749 ns/iter (+/- 1,173) = 1328 MB/s +test sherlock::repeated_class_negation ... bench: 69,119,491 ns/iter (+/- 117,739) = 8 MB/s +test sherlock::the_lower ... bench: 492,559 ns/iter (+/- 1,674) = 1207 MB/s +test sherlock::the_nocase ... bench: 341,445 ns/iter (+/- 6,455) = 1742 MB/s +test sherlock::the_upper ... bench: 30,555 ns/iter (+/- 168) = 19470 MB/s +test sherlock::the_whitespace ... bench: 950,630 ns/iter (+/- 25,179) = 625 MB/s +test sherlock::word_ending_n ... bench: 1,551,930 ns/iter (+/- 17,792) = 383 MB/s +test sherlock::words ... bench: 7,229,870 ns/iter (+/- 25,046) = 82 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 108 measured; 0 filtered out + diff --git a/third_party/rust/regex/record/old-bench-log/07/rust-bytes b/third_party/rust/regex/record/old-bench-log/07/rust-bytes new file mode 100644 index 0000000000..310d775d7c --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/07/rust-bytes @@ -0,0 +1,101 @@ + +running 96 tests +test misc::anchored_literal_long_match ... bench: 16 ns/iter (+/- 0) = 24375 MB/s +test misc::anchored_literal_long_non_match ... bench: 16 ns/iter (+/- 0) = 24375 MB/s +test misc::anchored_literal_short_match ... bench: 14 ns/iter (+/- 0) = 1857 MB/s +test misc::anchored_literal_short_non_match ... bench: 16 ns/iter (+/- 0) = 1625 MB/s +test misc::easy0_1K ... bench: 11 ns/iter (+/- 0) = 95545 MB/s +test misc::easy0_1MB ... bench: 14 ns/iter (+/- 0) = 74900214 MB/s +test misc::easy0_32 ... bench: 11 ns/iter (+/- 0) = 5363 MB/s +test misc::easy0_32K ... bench: 11 ns/iter (+/- 0) = 2981363 MB/s +test misc::easy1_1K ... bench: 36 ns/iter (+/- 0) = 29000 MB/s +test misc::easy1_1MB ... bench: 38 ns/iter (+/- 0) = 27594631 MB/s +test misc::easy1_32 ... bench: 36 ns/iter (+/- 0) = 1444 MB/s +test misc::easy1_32K ... bench: 36 ns/iter (+/- 0) = 910777 MB/s +test misc::hard_1K ... bench: 46 ns/iter (+/- 0) = 22847 MB/s +test misc::hard_1MB ... bench: 49 ns/iter (+/- 0) = 21400061 MB/s +test misc::hard_32 ... bench: 46 ns/iter (+/- 0) = 1282 MB/s +test misc::hard_32K ... bench: 46 ns/iter (+/- 0) = 712934 MB/s +test misc::literal ... bench: 10 ns/iter (+/- 0) = 5100 MB/s +test misc::long_needle1 ... bench: 1,119 ns/iter (+/- 22) = 89366 MB/s +test misc::long_needle2 ... bench: 535,168 ns/iter (+/- 2,976) = 186 MB/s +test misc::match_class ... bench: 67 ns/iter (+/- 0) = 1208 MB/s +test misc::match_class_in_range ... bench: 21 ns/iter (+/- 0) = 3857 MB/s +test misc::medium_1K ... bench: 12 ns/iter (+/- 0) = 87666 MB/s +test misc::medium_1MB ... bench: 16 ns/iter (+/- 0) = 65537750 MB/s +test misc::medium_32 ... bench: 12 ns/iter (+/- 0) = 5000 MB/s +test misc::medium_32K ... bench: 12 ns/iter (+/- 0) = 2733000 MB/s +test misc::no_exponential ... bench: 320 ns/iter (+/- 3) = 312 MB/s +test misc::not_literal ... bench: 86 ns/iter (+/- 0) = 593 MB/s +test misc::one_pass_long_prefix ... bench: 48 ns/iter (+/- 0) = 541 MB/s +test misc::one_pass_long_prefix_not ... bench: 48 ns/iter (+/- 0) = 541 MB/s +test misc::one_pass_short ... bench: 34 ns/iter (+/- 0) = 500 MB/s +test misc::one_pass_short_not ... bench: 37 ns/iter (+/- 0) = 459 MB/s +test misc::reallyhard2_1K ... bench: 50 ns/iter (+/- 0) = 20800 MB/s +test misc::reallyhard_1K ... bench: 1,548 ns/iter (+/- 0) = 678 MB/s +test misc::reallyhard_1MB ... bench: 1,534,068 ns/iter (+/- 14,813) = 683 MB/s +test misc::reallyhard_32 ... bench: 98 ns/iter (+/- 1) = 602 MB/s +test misc::reallyhard_32K ... bench: 48,003 ns/iter (+/- 128) = 683 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,015 ns/iter (+/- 11) = 1992 MB/s +test regexdna::find_new_lines ... bench: 11,859,603 ns/iter (+/- 22,707) = 428 MB/s +test regexdna::subst1 ... bench: 717,255 ns/iter (+/- 3,261) = 7087 MB/s +test regexdna::subst10 ... bench: 719,600 ns/iter (+/- 4,712) = 7064 MB/s +test regexdna::subst11 ... bench: 708,612 ns/iter (+/- 6,314) = 7173 MB/s +test regexdna::subst2 ... bench: 715,174 ns/iter (+/- 5,097) = 7107 MB/s +test regexdna::subst3 ... bench: 711,261 ns/iter (+/- 12,051) = 7147 MB/s +test regexdna::subst4 ... bench: 761,920 ns/iter (+/- 4,924) = 6671 MB/s +test regexdna::subst5 ... bench: 740,755 ns/iter (+/- 12,762) = 6862 MB/s +test regexdna::subst6 ... bench: 713,936 ns/iter (+/- 7,103) = 7120 MB/s +test regexdna::subst7 ... bench: 710,142 ns/iter (+/- 5,377) = 7158 MB/s +test regexdna::subst8 ... bench: 712,154 ns/iter (+/- 4,485) = 7138 MB/s +test regexdna::subst9 ... bench: 713,214 ns/iter (+/- 6,830) = 7127 MB/s +test regexdna::variant1 ... bench: 2,448,709 ns/iter (+/- 10,799) = 2075 MB/s +test regexdna::variant2 ... bench: 5,541,606 ns/iter (+/- 26,197) = 917 MB/s +test regexdna::variant3 ... bench: 6,563,736 ns/iter (+/- 163,805) = 774 MB/s +test regexdna::variant4 ... bench: 6,428,096 ns/iter (+/- 38,372) = 790 MB/s +test regexdna::variant5 ... bench: 5,110,667 ns/iter (+/- 141,363) = 994 MB/s +test regexdna::variant6 ... bench: 5,086,936 ns/iter (+/- 25,675) = 999 MB/s +test regexdna::variant7 ... bench: 4,607,360 ns/iter (+/- 31,834) = 1103 MB/s +test regexdna::variant8 ... bench: 4,636,550 ns/iter (+/- 11,143) = 1096 MB/s +test regexdna::variant9 ... bench: 4,534,765 ns/iter (+/- 18,435) = 1120 MB/s +test sherlock::before_after_holmes ... bench: 880,980 ns/iter (+/- 1,386) = 675 MB/s +test sherlock::before_holmes ... bench: 56,626 ns/iter (+/- 612) = 10506 MB/s +test sherlock::everything_greedy ... bench: 1,715,022 ns/iter (+/- 7,374) = 346 MB/s +test sherlock::everything_greedy_nl ... bench: 778,398 ns/iter (+/- 6,195) = 764 MB/s +test sherlock::holmes_cochar_watson ... bench: 91,093 ns/iter (+/- 266) = 6531 MB/s +test sherlock::holmes_coword_watson ... bench: 457,793 ns/iter (+/- 3,094) = 1299 MB/s +test sherlock::ing_suffix ... bench: 348,696 ns/iter (+/- 2,174) = 1706 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,017,664 ns/iter (+/- 8,581) = 584 MB/s +test sherlock::letters ... bench: 19,098,779 ns/iter (+/- 36,233) = 31 MB/s +test sherlock::letters_lower ... bench: 17,748,386 ns/iter (+/- 37,835) = 33 MB/s +test sherlock::letters_upper ... bench: 1,592,729 ns/iter (+/- 2,977) = 373 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 873,365 ns/iter (+/- 1,399) = 681 MB/s +test sherlock::name_alt1 ... bench: 21,965 ns/iter (+/- 336) = 27085 MB/s +test sherlock::name_alt2 ... bench: 73,887 ns/iter (+/- 107) = 8051 MB/s +test sherlock::name_alt3 ... bench: 79,186 ns/iter (+/- 274) = 7513 MB/s +test sherlock::name_alt3_nocase ... bench: 1,111,949 ns/iter (+/- 3,589) = 535 MB/s +test sherlock::name_alt4 ... bench: 102,493 ns/iter (+/- 959) = 5804 MB/s +test sherlock::name_alt4_nocase ... bench: 158,438 ns/iter (+/- 946) = 3754 MB/s +test sherlock::name_alt5 ... bench: 74,362 ns/iter (+/- 139) = 8000 MB/s +test sherlock::name_alt5_nocase ... bench: 469,720 ns/iter (+/- 5,941) = 1266 MB/s +test sherlock::name_holmes ... bench: 28,919 ns/iter (+/- 372) = 20572 MB/s +test sherlock::name_holmes_nocase ... bench: 123,251 ns/iter (+/- 786) = 4827 MB/s +test sherlock::name_sherlock ... bench: 53,032 ns/iter (+/- 487) = 11218 MB/s +test sherlock::name_sherlock_holmes ... bench: 20,566 ns/iter (+/- 280) = 28927 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 108,166 ns/iter (+/- 303) = 5500 MB/s +test sherlock::name_sherlock_nocase ... bench: 105,034 ns/iter (+/- 797) = 5664 MB/s +test sherlock::name_whitespace ... bench: 60,968 ns/iter (+/- 490) = 9758 MB/s +test sherlock::no_match_common ... bench: 12,191 ns/iter (+/- 128) = 48801 MB/s +test sherlock::no_match_really_common ... bench: 274,528 ns/iter (+/- 1,101) = 2167 MB/s +test sherlock::no_match_uncommon ... bench: 12,197 ns/iter (+/- 191) = 48776 MB/s +test sherlock::quotes ... bench: 446,264 ns/iter (+/- 5,936) = 1333 MB/s +test sherlock::repeated_class_negation ... bench: 69,728,764 ns/iter (+/- 155,104) = 8 MB/s +test sherlock::the_lower ... bench: 493,734 ns/iter (+/- 5,997) = 1204 MB/s +test sherlock::the_nocase ... bench: 339,088 ns/iter (+/- 3,760) = 1754 MB/s +test sherlock::the_upper ... bench: 30,957 ns/iter (+/- 313) = 19218 MB/s +test sherlock::the_whitespace ... bench: 921,059 ns/iter (+/- 8,102) = 645 MB/s +test sherlock::word_ending_n ... bench: 1,530,899 ns/iter (+/- 18,006) = 388 MB/s +test sherlock::words ... bench: 6,959,355 ns/iter (+/- 31,671) = 85 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 96 measured; 0 filtered out + diff --git a/third_party/rust/regex/record/old-bench-log/07/stdcpp b/third_party/rust/regex/record/old-bench-log/07/stdcpp new file mode 100644 index 0000000000..57c25ae602 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/07/stdcpp @@ -0,0 +1,87 @@ + +running 82 tests +test misc::anchored_literal_long_match ... bench: 142 ns/iter (+/- 0) = 2746 MB/s +test misc::anchored_literal_long_non_match ... bench: 5,504 ns/iter (+/- 20) = 70 MB/s +test misc::anchored_literal_short_match ... bench: 143 ns/iter (+/- 0) = 181 MB/s +test misc::anchored_literal_short_non_match ... bench: 471 ns/iter (+/- 1) = 55 MB/s +test misc::easy0_1K ... bench: 14,534 ns/iter (+/- 87) = 72 MB/s +test misc::easy0_1MB ... bench: 14,554,912 ns/iter (+/- 33,264) = 72 MB/s +test misc::easy0_32 ... bench: 730 ns/iter (+/- 1) = 80 MB/s +test misc::easy0_32K ... bench: 454,911 ns/iter (+/- 526) = 72 MB/s +test misc::easy1_1K ... bench: 14,486 ns/iter (+/- 45) = 72 MB/s +test misc::easy1_1MB ... bench: 14,555,850 ns/iter (+/- 108,290) = 72 MB/s +test misc::easy1_32 ... bench: 692 ns/iter (+/- 1) = 75 MB/s +test misc::easy1_32K ... bench: 456,269 ns/iter (+/- 2,856) = 71 MB/s +test misc::hard_1K ... bench: 299,581 ns/iter (+/- 7,493) = 3 MB/s +test misc::hard_1MB ... bench: 314,289,240 ns/iter (+/- 128,869) = 3 MB/s +test misc::hard_32 ... bench: 9,202 ns/iter (+/- 17) = 6 MB/s +test misc::hard_32K ... bench: 9,777,807 ns/iter (+/- 19,451) = 3 MB/s +test misc::literal ... bench: 804 ns/iter (+/- 2) = 63 MB/s +test misc::long_needle1 ... bench: 15,712,941 ns/iter (+/- 23,893) = 6 MB/s +test misc::long_needle2 ... bench: 15,955,109 ns/iter (+/- 26,652) = 6 MB/s +test misc::match_class ... bench: 1,250 ns/iter (+/- 4) = 64 MB/s +test misc::match_class_in_range ... bench: 1,250 ns/iter (+/- 4) = 64 MB/s +test misc::medium_1K ... bench: 14,913 ns/iter (+/- 108) = 70 MB/s +test misc::medium_1MB ... bench: 14,929,542 ns/iter (+/- 38,890) = 70 MB/s +test misc::medium_32 ... bench: 736 ns/iter (+/- 0) = 81 MB/s +test misc::medium_32K ... bench: 466,504 ns/iter (+/- 1,488) = 70 MB/s +test misc::not_literal ... bench: 1,015 ns/iter (+/- 8) = 50 MB/s +test misc::one_pass_long_prefix ... bench: 262 ns/iter (+/- 0) = 99 MB/s +test misc::one_pass_long_prefix_not ... bench: 263 ns/iter (+/- 3) = 98 MB/s +test misc::one_pass_short ... bench: 502 ns/iter (+/- 2) = 33 MB/s +test misc::one_pass_short_not ... bench: 498 ns/iter (+/- 0) = 34 MB/s +test misc::reallyhard2_1K ... bench: 304,485 ns/iter (+/- 762) = 3 MB/s +test misc::reallyhard_1K ... bench: 292,315 ns/iter (+/- 1,985) = 3 MB/s +test misc::reallyhard_1MB ... bench: 313,208,610 ns/iter (+/- 163,013) = 3 MB/s +test misc::reallyhard_32 ... bench: 9,232 ns/iter (+/- 21) = 6 MB/s +test misc::reallyhard_32K ... bench: 9,952,463 ns/iter (+/- 22,317) = 3 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 114,029 ns/iter (+/- 734) = 70 MB/s +test regexdna::find_new_lines ... bench: 121,481,845 ns/iter (+/- 289,966) = 41 MB/s +test regexdna::subst1 ... bench: 73,580,323 ns/iter (+/- 82,998) = 69 MB/s +test regexdna::subst10 ... bench: 73,588,543 ns/iter (+/- 95,250) = 69 MB/s +test regexdna::subst11 ... bench: 73,592,436 ns/iter (+/- 86,358) = 69 MB/s +test regexdna::subst2 ... bench: 73,581,323 ns/iter (+/- 88,210) = 69 MB/s +test regexdna::subst3 ... bench: 73,577,422 ns/iter (+/- 48,215) = 69 MB/s +test regexdna::subst4 ... bench: 73,586,896 ns/iter (+/- 82,117) = 69 MB/s +test regexdna::subst5 ... bench: 73,652,696 ns/iter (+/- 95,155) = 69 MB/s +test regexdna::subst6 ... bench: 74,633,620 ns/iter (+/- 74,754) = 68 MB/s +test regexdna::subst7 ... bench: 73,586,338 ns/iter (+/- 82,645) = 69 MB/s +test regexdna::subst8 ... bench: 75,009,572 ns/iter (+/- 116,800) = 67 MB/s +test regexdna::subst9 ... bench: 73,581,469 ns/iter (+/- 146,286) = 69 MB/s +test regexdna::variant1 ... bench: 140,768,740 ns/iter (+/- 113,580) = 36 MB/s +test regexdna::variant2 ... bench: 153,330,005 ns/iter (+/- 11,581,095) = 33 MB/s +test regexdna::variant3 ... bench: 145,484,512 ns/iter (+/- 150,566) = 34 MB/s +test regexdna::variant4 ... bench: 141,659,767 ns/iter (+/- 123,940) = 35 MB/s +test regexdna::variant5 ... bench: 145,309,207 ns/iter (+/- 129,675) = 34 MB/s +test regexdna::variant6 ... bench: 141,145,017 ns/iter (+/- 164,414) = 36 MB/s +test regexdna::variant7 ... bench: 141,897,206 ns/iter (+/- 212,981) = 35 MB/s +test regexdna::variant8 ... bench: 150,467,139 ns/iter (+/- 120,619) = 33 MB/s +test regexdna::variant9 ... bench: 151,635,430 ns/iter (+/- 128,912) = 33 MB/s +test sherlock::before_after_holmes ... bench: 36,941,681 ns/iter (+/- 36,199) = 16 MB/s +test sherlock::before_holmes ... bench: 36,920,860 ns/iter (+/- 38,258) = 16 MB/s +test sherlock::everything_greedy ... bench: 9,047,684 ns/iter (+/- 18,290) = 65 MB/s +test sherlock::holmes_cochar_watson ... bench: 12,634,723 ns/iter (+/- 36,086) = 47 MB/s +test sherlock::ing_suffix ... bench: 30,232,323 ns/iter (+/- 49,084) = 19 MB/s +test sherlock::ing_suffix_limited_space ... bench: 18,837,733 ns/iter (+/- 39,569) = 31 MB/s +test sherlock::name_alt1 ... bench: 12,462,918 ns/iter (+/- 17,158) = 47 MB/s +test sherlock::name_alt2 ... bench: 12,490,419 ns/iter (+/- 26,214) = 47 MB/s +test sherlock::name_alt3 ... bench: 33,156,941 ns/iter (+/- 47,236) = 17 MB/s +test sherlock::name_alt4 ... bench: 12,583,828 ns/iter (+/- 26,121) = 47 MB/s +test sherlock::name_alt5 ... bench: 16,615,345 ns/iter (+/- 22,930) = 35 MB/s +test sherlock::name_holmes ... bench: 8,307,917 ns/iter (+/- 17,452) = 71 MB/s +test sherlock::name_sherlock ... bench: 8,273,395 ns/iter (+/- 25,717) = 71 MB/s +test sherlock::name_sherlock_holmes ... bench: 8,270,000 ns/iter (+/- 19,702) = 71 MB/s +test sherlock::name_whitespace ... bench: 8,453,784 ns/iter (+/- 19,604) = 70 MB/s +test sherlock::no_match_common ... bench: 8,679,069 ns/iter (+/- 27,721) = 68 MB/s +test sherlock::no_match_really_common ... bench: 8,679,099 ns/iter (+/- 17,665) = 68 MB/s +test sherlock::no_match_uncommon ... bench: 8,260,259 ns/iter (+/- 147,913) = 72 MB/s +test sherlock::quotes ... bench: 10,257,367 ns/iter (+/- 25,054) = 58 MB/s +test sherlock::repeated_class_negation ... bench: 25,374,678 ns/iter (+/- 23,494) = 23 MB/s +test sherlock::the_lower ... bench: 9,424,206 ns/iter (+/- 23,231) = 63 MB/s +test sherlock::the_upper ... bench: 8,350,015 ns/iter (+/- 23,176) = 71 MB/s +test sherlock::the_whitespace ... bench: 9,285,991 ns/iter (+/- 16,835) = 64 MB/s +test sherlock::word_ending_n ... bench: 69,609,427 ns/iter (+/- 52,974) = 8 MB/s +test sherlock::words ... bench: 20,107,601 ns/iter (+/- 36,086) = 29 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 82 measured; 0 filtered out + diff --git a/third_party/rust/regex/record/old-bench-log/07/stdcpp-libcxx b/third_party/rust/regex/record/old-bench-log/07/stdcpp-libcxx new file mode 100644 index 0000000000..ff21e67881 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/07/stdcpp-libcxx @@ -0,0 +1,87 @@ + +running 82 tests +test misc::anchored_literal_long_match ... bench: 162 ns/iter (+/- 0) = 2407 MB/s +test misc::anchored_literal_long_non_match ... bench: 21,901 ns/iter (+/- 140) = 17 MB/s +test misc::anchored_literal_short_match ... bench: 162 ns/iter (+/- 0) = 160 MB/s +test misc::anchored_literal_short_non_match ... bench: 1,501 ns/iter (+/- 1) = 17 MB/s +test misc::easy0_1K ... bench: 39,405 ns/iter (+/- 250) = 26 MB/s +test misc::easy0_1MB ... bench: 39,706,678 ns/iter (+/- 103,211) = 26 MB/s +test misc::easy0_32 ... bench: 1,415 ns/iter (+/- 3) = 41 MB/s +test misc::easy0_32K ... bench: 1,241,085 ns/iter (+/- 5,625) = 26 MB/s +test misc::easy1_1K ... bench: 39,421 ns/iter (+/- 275) = 26 MB/s +test misc::easy1_1MB ... bench: 39,725,158 ns/iter (+/- 64,488) = 26 MB/s +test misc::easy1_32 ... bench: 1,421 ns/iter (+/- 8) = 36 MB/s +test misc::easy1_32K ... bench: 1,240,953 ns/iter (+/- 5,794) = 26 MB/s +test misc::hard_1K ... bench: 1,263,948 ns/iter (+/- 31,771) +test misc::hard_1MB ... bench: 1,331,000,673 ns/iter (+/- 7,401,131) +test misc::hard_32 ... bench: 37,752 ns/iter (+/- 109) = 1 MB/s +test misc::hard_32K ... bench: 41,044,286 ns/iter (+/- 57,765) +test misc::literal ... bench: 1,980 ns/iter (+/- 7) = 25 MB/s +test misc::long_needle1 ... bench: 12,425,121 ns/iter (+/- 36,611) = 8 MB/s +test misc::long_needle2 ... bench: 12,568,992 ns/iter (+/- 28,513) = 7 MB/s +test misc::match_class ... bench: 3,918 ns/iter (+/- 67) = 20 MB/s +test misc::match_class_in_range ... bench: 3,534 ns/iter (+/- 11) = 22 MB/s +test misc::medium_1K ... bench: 44,910 ns/iter (+/- 167) = 23 MB/s +test misc::medium_1MB ... bench: 45,558,328 ns/iter (+/- 77,166) = 23 MB/s +test misc::medium_32 ... bench: 1,599 ns/iter (+/- 12) = 37 MB/s +test misc::medium_32K ... bench: 1,423,945 ns/iter (+/- 9,468) = 23 MB/s +test misc::not_literal ... bench: 2,051 ns/iter (+/- 16) = 24 MB/s +test misc::one_pass_long_prefix ... bench: 222 ns/iter (+/- 0) = 117 MB/s +test misc::one_pass_long_prefix_not ... bench: 223 ns/iter (+/- 0) = 116 MB/s +test misc::one_pass_short ... bench: 2,002 ns/iter (+/- 37) = 8 MB/s +test misc::one_pass_short_not ... bench: 1,990 ns/iter (+/- 6) = 8 MB/s +test misc::reallyhard2_1K ... bench: 1,335,845 ns/iter (+/- 6,233) +test misc::reallyhard_1K ... bench: 1,208,846 ns/iter (+/- 6,070) +test misc::reallyhard_1MB ... bench: 1,291,183,401 ns/iter (+/- 4,281,775) +test misc::reallyhard_32 ... bench: 36,521 ns/iter (+/- 157) = 1 MB/s +test misc::reallyhard_32K ... bench: 40,131,467 ns/iter (+/- 66,846) +test misc::reverse_suffix_no_quadratic ... bench: 506,352 ns/iter (+/- 632) = 15 MB/s +test regexdna::find_new_lines ... bench: 510,954,670 ns/iter (+/- 1,946,366) = 9 MB/s +test regexdna::subst1 ... bench: 198,786,137 ns/iter (+/- 240,963) = 25 MB/s +test regexdna::subst10 ... bench: 198,733,597 ns/iter (+/- 770,484) = 25 MB/s +test regexdna::subst11 ... bench: 198,734,922 ns/iter (+/- 198,116) = 25 MB/s +test regexdna::subst2 ... bench: 198,735,715 ns/iter (+/- 235,337) = 25 MB/s +test regexdna::subst3 ... bench: 198,736,727 ns/iter (+/- 157,633) = 25 MB/s +test regexdna::subst4 ... bench: 198,811,880 ns/iter (+/- 1,502,214) = 25 MB/s +test regexdna::subst5 ... bench: 198,697,281 ns/iter (+/- 211,978) = 25 MB/s +test regexdna::subst6 ... bench: 198,714,239 ns/iter (+/- 1,187,050) = 25 MB/s +test regexdna::subst7 ... bench: 199,021,730 ns/iter (+/- 1,555,969) = 25 MB/s +test regexdna::subst8 ... bench: 199,033,133 ns/iter (+/- 213,859) = 25 MB/s +test regexdna::subst9 ... bench: 199,466,527 ns/iter (+/- 1,394,750) = 25 MB/s +test regexdna::variant1 ... bench: 403,588,578 ns/iter (+/- 493,905) = 12 MB/s +test regexdna::variant2 ... bench: 440,582,945 ns/iter (+/- 305,836) = 11 MB/s +test regexdna::variant3 ... bench: 417,460,804 ns/iter (+/- 1,858,105) = 12 MB/s +test regexdna::variant4 ... bench: 407,209,088 ns/iter (+/- 1,374,513) = 12 MB/s +test regexdna::variant5 ... bench: 408,665,895 ns/iter (+/- 338,946) = 12 MB/s +test regexdna::variant6 ... bench: 408,640,565 ns/iter (+/- 1,895,287) = 12 MB/s +test regexdna::variant7 ... bench: 406,340,097 ns/iter (+/- 2,309,358) = 12 MB/s +test regexdna::variant8 ... bench: 413,195,331 ns/iter (+/- 2,178,194) = 12 MB/s +test regexdna::variant9 ... bench: 438,844,927 ns/iter (+/- 2,589,599) = 11 MB/s +test sherlock::before_after_holmes ... bench: 165,435,560 ns/iter (+/- 165,901) = 3 MB/s +test sherlock::before_holmes ... bench: 164,466,984 ns/iter (+/- 178,082) = 3 MB/s +test sherlock::everything_greedy ... bench: 34,680,745 ns/iter (+/- 862,671) = 17 MB/s +test sherlock::holmes_cochar_watson ... bench: 59,712,596 ns/iter (+/- 85,049) = 9 MB/s +test sherlock::ing_suffix ... bench: 135,611,524 ns/iter (+/- 383,869) = 4 MB/s +test sherlock::ing_suffix_limited_space ... bench: 73,398,446 ns/iter (+/- 112,893) = 8 MB/s +test sherlock::name_alt1 ... bench: 42,274,906 ns/iter (+/- 60,836) = 14 MB/s +test sherlock::name_alt2 ... bench: 42,159,449 ns/iter (+/- 56,642) = 14 MB/s +test sherlock::name_alt3 ... bench: 121,926,811 ns/iter (+/- 624,877) = 4 MB/s +test sherlock::name_alt4 ... bench: 58,912,788 ns/iter (+/- 101,576) = 10 MB/s +test sherlock::name_alt5 ... bench: 63,891,303 ns/iter (+/- 79,754) = 9 MB/s +test sherlock::name_holmes ... bench: 22,995,759 ns/iter (+/- 45,074) = 25 MB/s +test sherlock::name_sherlock ... bench: 23,024,135 ns/iter (+/- 86,982) = 25 MB/s +test sherlock::name_sherlock_holmes ... bench: 23,026,357 ns/iter (+/- 42,271) = 25 MB/s +test sherlock::name_whitespace ... bench: 32,485,572 ns/iter (+/- 77,736) = 18 MB/s +test sherlock::no_match_common ... bench: 23,544,207 ns/iter (+/- 590,037) = 25 MB/s +test sherlock::no_match_really_common ... bench: 23,543,480 ns/iter (+/- 51,838) = 25 MB/s +test sherlock::no_match_uncommon ... bench: 23,024,692 ns/iter (+/- 78,358) = 25 MB/s +test sherlock::quotes ... bench: 42,376,602 ns/iter (+/- 49,060) = 14 MB/s +test sherlock::repeated_class_negation ... bench: 92,701,274 ns/iter (+/- 208,063) = 6 MB/s +test sherlock::the_lower ... bench: 23,553,163 ns/iter (+/- 61,446) = 25 MB/s +test sherlock::the_upper ... bench: 23,281,951 ns/iter (+/- 35,811) = 25 MB/s +test sherlock::the_whitespace ... bench: 33,011,779 ns/iter (+/- 65,085) = 18 MB/s +test sherlock::word_ending_n ... bench: 64,965,762 ns/iter (+/- 106,103) = 9 MB/s +test sherlock::words ... bench: 47,466,153 ns/iter (+/- 773,222) = 12 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 82 measured; 0 filtered out + diff --git a/third_party/rust/regex/record/old-bench-log/07/tcl b/third_party/rust/regex/record/old-bench-log/07/tcl new file mode 100644 index 0000000000..0586935c03 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/07/tcl @@ -0,0 +1,94 @@ + +running 89 tests +test misc::anchored_literal_long_match ... bench: 452 ns/iter (+/- 6) = 862 MB/s +test misc::anchored_literal_long_non_match ... bench: 92 ns/iter (+/- 2) = 4239 MB/s +test misc::anchored_literal_short_match ... bench: 454 ns/iter (+/- 6) = 57 MB/s +test misc::anchored_literal_short_non_match ... bench: 92 ns/iter (+/- 1) = 282 MB/s +test misc::easy0_1K ... bench: 9,231 ns/iter (+/- 59) = 113 MB/s +test misc::easy0_1MB ... bench: 2,828,050 ns/iter (+/- 9,104) = 370 MB/s +test misc::easy0_32 ... bench: 6,527 ns/iter (+/- 78) = 9 MB/s +test misc::easy0_32K ... bench: 94,825 ns/iter (+/- 410) = 345 MB/s +test misc::easy1_1K ... bench: 5,420 ns/iter (+/- 54) = 192 MB/s +test misc::easy1_1MB ... bench: 2,823,597 ns/iter (+/- 8,534) = 371 MB/s +test misc::easy1_32 ... bench: 2,727 ns/iter (+/- 80) = 19 MB/s +test misc::easy1_32K ... bench: 93,382 ns/iter (+/- 108) = 351 MB/s +test misc::hard_1K ... bench: 12,046 ns/iter (+/- 88) = 87 MB/s +test misc::hard_1MB ... bench: 2,831,445 ns/iter (+/- 9,713) = 370 MB/s +test misc::hard_32 ... bench: 9,257 ns/iter (+/- 63) = 6 MB/s +test misc::hard_32K ... bench: 97,613 ns/iter (+/- 533) = 335 MB/s +test misc::literal ... bench: 398 ns/iter (+/- 14) = 128 MB/s +test misc::long_needle1 ... bench: 18,459,088 ns/iter (+/- 162,391) = 5 MB/s +test misc::long_needle2 ... bench: 18,390,595 ns/iter (+/- 96,143) = 5 MB/s +test misc::match_class ... bench: 480 ns/iter (+/- 1) = 168 MB/s +test misc::match_class_in_range ... bench: 477 ns/iter (+/- 10) = 169 MB/s +test misc::medium_1K ... bench: 9,573 ns/iter (+/- 94) = 109 MB/s +test misc::medium_1MB ... bench: 2,828,512 ns/iter (+/- 28,270) = 370 MB/s +test misc::medium_32 ... bench: 6,874 ns/iter (+/- 68) = 8 MB/s +test misc::medium_32K ... bench: 95,040 ns/iter (+/- 517) = 345 MB/s +test misc::no_exponential ... bench: 1,976,788 ns/iter (+/- 20,661) +test misc::not_literal ... bench: 1,548 ns/iter (+/- 15) = 32 MB/s +test misc::one_pass_long_prefix ... bench: 5,063 ns/iter (+/- 76) = 5 MB/s +test misc::one_pass_long_prefix_not ... bench: 4,933 ns/iter (+/- 62) = 5 MB/s +test misc::one_pass_short ... bench: 486 ns/iter (+/- 4) = 34 MB/s +test misc::one_pass_short_not ... bench: 579 ns/iter (+/- 3) = 29 MB/s +test misc::reallyhard2_1K ... bench: 88,153 ns/iter (+/- 2,317) = 11 MB/s +test misc::reallyhard_1K ... bench: 12,157 ns/iter (+/- 51) = 86 MB/s +test misc::reallyhard_1MB ... bench: 2,866,126 ns/iter (+/- 71,338) = 365 MB/s +test misc::reallyhard_32 ... bench: 9,321 ns/iter (+/- 138) = 6 MB/s +test misc::reallyhard_32K ... bench: 97,799 ns/iter (+/- 1,087) = 335 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 22,679 ns/iter (+/- 293) = 352 MB/s +test regexdna::find_new_lines ... bench: 38,700,951 ns/iter (+/- 105,197) = 131 MB/s +test regexdna::subst1 ... bench: 22,123,470 ns/iter (+/- 96,738) = 229 MB/s +test regexdna::subst10 ... bench: 22,125,412 ns/iter (+/- 65,856) = 229 MB/s +test regexdna::subst11 ... bench: 22,178,791 ns/iter (+/- 75,853) = 229 MB/s +test regexdna::subst2 ... bench: 22,348,278 ns/iter (+/- 228,790) = 227 MB/s +test regexdna::subst3 ... bench: 22,187,493 ns/iter (+/- 69,149) = 229 MB/s +test regexdna::subst4 ... bench: 22,134,373 ns/iter (+/- 71,979) = 229 MB/s +test regexdna::subst5 ... bench: 22,183,169 ns/iter (+/- 66,220) = 229 MB/s +test regexdna::subst6 ... bench: 22,263,432 ns/iter (+/- 91,605) = 228 MB/s +test regexdna::subst7 ... bench: 22,256,481 ns/iter (+/- 62,794) = 228 MB/s +test regexdna::subst8 ... bench: 22,134,314 ns/iter (+/- 75,199) = 229 MB/s +test regexdna::subst9 ... bench: 22,144,129 ns/iter (+/- 76,744) = 229 MB/s +test regexdna::variant1 ... bench: 13,846,793 ns/iter (+/- 33,520) = 367 MB/s +test regexdna::variant2 ... bench: 14,248,239 ns/iter (+/- 62,252) = 356 MB/s +test regexdna::variant3 ... bench: 15,702,520 ns/iter (+/- 339,738) = 323 MB/s +test regexdna::variant4 ... bench: 15,143,136 ns/iter (+/- 52,300) = 335 MB/s +test regexdna::variant5 ... bench: 16,324,698 ns/iter (+/- 50,942) = 311 MB/s +test regexdna::variant6 ... bench: 14,508,593 ns/iter (+/- 46,251) = 350 MB/s +test regexdna::variant7 ... bench: 14,443,485 ns/iter (+/- 80,444) = 351 MB/s +test regexdna::variant8 ... bench: 14,430,571 ns/iter (+/- 63,143) = 352 MB/s +test regexdna::variant9 ... bench: 14,883,129 ns/iter (+/- 76,837) = 341 MB/s +test sherlock::before_after_holmes ... bench: 2,227,807 ns/iter (+/- 9,119) = 267 MB/s +test sherlock::before_holmes ... bench: 2,700,579 ns/iter (+/- 24,875) = 220 MB/s +test sherlock::holmes_cochar_watson ... bench: 2,211,847 ns/iter (+/- 15,027) = 268 MB/s +test sherlock::ing_suffix ... bench: 4,398,150 ns/iter (+/- 27,219) = 135 MB/s +test sherlock::ing_suffix_limited_space ... bench: 17,992,130 ns/iter (+/- 457,978) = 33 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 1,845,704 ns/iter (+/- 9,382) = 322 MB/s +test sherlock::name_alt1 ... bench: 1,890,373 ns/iter (+/- 9,971) = 314 MB/s +test sherlock::name_alt2 ... bench: 2,626,524 ns/iter (+/- 18,261) = 226 MB/s +test sherlock::name_alt3 ... bench: 4,468,643 ns/iter (+/- 11,946) = 133 MB/s +test sherlock::name_alt3_nocase ... bench: 7,226,342 ns/iter (+/- 57,220) = 82 MB/s +test sherlock::name_alt4 ... bench: 2,395,105 ns/iter (+/- 31,101) = 248 MB/s +test sherlock::name_alt4_nocase ... bench: 2,895,153 ns/iter (+/- 12,446) = 205 MB/s +test sherlock::name_alt5 ... bench: 3,253,560 ns/iter (+/- 33,725) = 182 MB/s +test sherlock::name_alt5_nocase ... bench: 4,008,656 ns/iter (+/- 39,415) = 148 MB/s +test sherlock::name_holmes ... bench: 2,076,117 ns/iter (+/- 6,376) = 286 MB/s +test sherlock::name_holmes_nocase ... bench: 2,157,634 ns/iter (+/- 6,494) = 275 MB/s +test sherlock::name_sherlock ... bench: 1,757,317 ns/iter (+/- 5,935) = 338 MB/s +test sherlock::name_sherlock_holmes ... bench: 1,897,004 ns/iter (+/- 12,012) = 313 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,939,722 ns/iter (+/- 6,273) = 306 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,801,334 ns/iter (+/- 3,179) = 330 MB/s +test sherlock::name_whitespace ... bench: 1,910,996 ns/iter (+/- 6,429) = 311 MB/s +test sherlock::no_match_common ... bench: 1,601,431 ns/iter (+/- 7,131) = 371 MB/s +test sherlock::no_match_really_common ... bench: 1,601,153 ns/iter (+/- 4,375) = 371 MB/s +test sherlock::no_match_uncommon ... bench: 1,600,840 ns/iter (+/- 8,348) = 371 MB/s +test sherlock::quotes ... bench: 7,620,650 ns/iter (+/- 48,467) = 78 MB/s +test sherlock::repeated_class_negation ... bench: 55,564,521 ns/iter (+/- 210,324) = 10 MB/s +test sherlock::the_lower ... bench: 5,628,558 ns/iter (+/- 19,934) = 105 MB/s +test sherlock::the_nocase ... bench: 6,063,195 ns/iter (+/- 28,534) = 98 MB/s +test sherlock::the_upper ... bench: 1,992,703 ns/iter (+/- 6,736) = 298 MB/s +test sherlock::the_whitespace ... bench: 7,159,423 ns/iter (+/- 38,306) = 83 MB/s +test sherlock::words ... bench: 38,358,421 ns/iter (+/- 99,230) = 15 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 89 measured; 0 filtered out + diff --git a/third_party/rust/regex/record/old-bench-log/08-new-memmem/rust-after-01 b/third_party/rust/regex/record/old-bench-log/08-new-memmem/rust-after-01 new file mode 100644 index 0000000000..521e935f43 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/08-new-memmem/rust-after-01 @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 18 ns/iter (+/- 1) = 21666 MB/s +test misc::anchored_literal_long_non_match ... bench: 19 ns/iter (+/- 2) = 20526 MB/s +test misc::anchored_literal_short_match ... bench: 18 ns/iter (+/- 1) = 1444 MB/s +test misc::anchored_literal_short_non_match ... bench: 19 ns/iter (+/- 0) = 1368 MB/s +test misc::easy0_1K ... bench: 15 ns/iter (+/- 2) = 70066 MB/s +test misc::easy0_1MB ... bench: 22 ns/iter (+/- 0) = 47663772 MB/s +test misc::easy0_32 ... bench: 14 ns/iter (+/- 2) = 4214 MB/s +test misc::easy0_32K ... bench: 15 ns/iter (+/- 1) = 2186333 MB/s +test misc::easy1_1K ... bench: 40 ns/iter (+/- 2) = 26100 MB/s +test misc::easy1_1MB ... bench: 44 ns/iter (+/- 5) = 23831727 MB/s +test misc::easy1_32 ... bench: 39 ns/iter (+/- 5) = 1333 MB/s +test misc::easy1_32K ... bench: 41 ns/iter (+/- 3) = 799707 MB/s +test misc::hard_1K ... bench: 50 ns/iter (+/- 7) = 21020 MB/s +test misc::hard_1MB ... bench: 55 ns/iter (+/- 6) = 19065509 MB/s +test misc::hard_32 ... bench: 50 ns/iter (+/- 7) = 1180 MB/s +test misc::hard_32K ... bench: 50 ns/iter (+/- 2) = 655900 MB/s +test misc::is_match_set ... bench: 60 ns/iter (+/- 2) = 416 MB/s +test misc::literal ... bench: 12 ns/iter (+/- 1) = 4250 MB/s +test misc::long_needle1 ... bench: 3,252 ns/iter (+/- 168) = 30750 MB/s +test misc::long_needle2 ... bench: 355,576 ns/iter (+/- 34,074) = 281 MB/s +test misc::match_class ... bench: 67 ns/iter (+/- 2) = 1208 MB/s +test misc::match_class_in_range ... bench: 14 ns/iter (+/- 0) = 5785 MB/s +test misc::match_class_unicode ... bench: 256 ns/iter (+/- 36) = 628 MB/s +test misc::matches_set ... bench: 458 ns/iter (+/- 65) = 54 MB/s +test misc::medium_1K ... bench: 15 ns/iter (+/- 1) = 70133 MB/s +test misc::medium_1MB ... bench: 23 ns/iter (+/- 2) = 45591478 MB/s +test misc::medium_32 ... bench: 15 ns/iter (+/- 0) = 4000 MB/s +test misc::medium_32K ... bench: 15 ns/iter (+/- 0) = 2186400 MB/s +test misc::no_exponential ... bench: 406 ns/iter (+/- 32) = 246 MB/s +test misc::not_literal ... bench: 90 ns/iter (+/- 12) = 566 MB/s +test misc::one_pass_long_prefix ... bench: 53 ns/iter (+/- 1) = 490 MB/s +test misc::one_pass_long_prefix_not ... bench: 51 ns/iter (+/- 7) = 509 MB/s +test misc::one_pass_short ... bench: 37 ns/iter (+/- 1) = 459 MB/s +test misc::one_pass_short_not ... bench: 39 ns/iter (+/- 5) = 435 MB/s +test misc::reallyhard2_1K ... bench: 75 ns/iter (+/- 2) = 13866 MB/s +test misc::reallyhard_1K ... bench: 1,591 ns/iter (+/- 227) = 660 MB/s +test misc::reallyhard_1MB ... bench: 1,576,602 ns/iter (+/- 204,573) = 665 MB/s +test misc::reallyhard_32 ... bench: 102 ns/iter (+/- 7) = 578 MB/s +test misc::reallyhard_32K ... bench: 49,327 ns/iter (+/- 4,812) = 664 MB/s +test misc::replace_all ... bench: 132 ns/iter (+/- 13) +test misc::reverse_suffix_no_quadratic ... bench: 4,190 ns/iter (+/- 581) = 1909 MB/s +test misc::short_haystack_1000000x ... bench: 132,982 ns/iter (+/- 18,045) = 60158 MB/s +test misc::short_haystack_100000x ... bench: 14,720 ns/iter (+/- 946) = 54348 MB/s +test misc::short_haystack_10000x ... bench: 5,993 ns/iter (+/- 381) = 13350 MB/s +test misc::short_haystack_1000x ... bench: 476 ns/iter (+/- 58) = 16829 MB/s +test misc::short_haystack_100x ... bench: 227 ns/iter (+/- 22) = 3572 MB/s +test misc::short_haystack_10x ... bench: 211 ns/iter (+/- 13) = 431 MB/s +test misc::short_haystack_1x ... bench: 204 ns/iter (+/- 29) = 93 MB/s +test misc::short_haystack_2x ... bench: 206 ns/iter (+/- 7) = 131 MB/s +test misc::short_haystack_3x ... bench: 212 ns/iter (+/- 16) = 165 MB/s +test misc::short_haystack_4x ... bench: 207 ns/iter (+/- 29) = 207 MB/s +test regexdna::find_new_lines ... bench: 12,053,740 ns/iter (+/- 393,644) = 421 MB/s +test regexdna::subst1 ... bench: 786,112 ns/iter (+/- 91,136) = 6466 MB/s +test regexdna::subst10 ... bench: 831,353 ns/iter (+/- 67,293) = 6114 MB/s +test regexdna::subst11 ... bench: 784,021 ns/iter (+/- 28,112) = 6483 MB/s +test regexdna::subst2 ... bench: 785,838 ns/iter (+/- 108,510) = 6468 MB/s +test regexdna::subst3 ... bench: 791,789 ns/iter (+/- 37,364) = 6420 MB/s +test regexdna::subst4 ... bench: 784,224 ns/iter (+/- 23,802) = 6482 MB/s +test regexdna::subst5 ... bench: 788,368 ns/iter (+/- 75,171) = 6448 MB/s +test regexdna::subst6 ... bench: 784,730 ns/iter (+/- 48,594) = 6477 MB/s +test regexdna::subst7 ... bench: 788,067 ns/iter (+/- 88,333) = 6450 MB/s +test regexdna::subst8 ... bench: 810,784 ns/iter (+/- 111,836) = 6269 MB/s +test regexdna::subst9 ... bench: 788,854 ns/iter (+/- 66,496) = 6444 MB/s +test regexdna::variant1 ... bench: 2,238,677 ns/iter (+/- 144,752) = 2270 MB/s +test regexdna::variant2 ... bench: 3,258,761 ns/iter (+/- 205,012) = 1559 MB/s +test regexdna::variant3 ... bench: 3,818,146 ns/iter (+/- 254,877) = 1331 MB/s +test regexdna::variant4 ... bench: 3,837,323 ns/iter (+/- 349,373) = 1324 MB/s +test regexdna::variant5 ... bench: 2,698,901 ns/iter (+/- 111,145) = 1883 MB/s +test regexdna::variant6 ... bench: 2,687,854 ns/iter (+/- 184,039) = 1891 MB/s +test regexdna::variant7 ... bench: 3,291,211 ns/iter (+/- 220,992) = 1544 MB/s +test regexdna::variant8 ... bench: 3,359,262 ns/iter (+/- 185,610) = 1513 MB/s +test regexdna::variant9 ... bench: 3,293,953 ns/iter (+/- 245,454) = 1543 MB/s +test rust_compile::compile_huge ... bench: 95,142 ns/iter (+/- 10,195) +test rust_compile::compile_huge_bytes ... bench: 5,650,680 ns/iter (+/- 252,936) +test rust_compile::compile_huge_full ... bench: 10,867,986 ns/iter (+/- 275,259) +test rust_compile::compile_simple ... bench: 3,751 ns/iter (+/- 310) +test rust_compile::compile_simple_bytes ... bench: 3,664 ns/iter (+/- 172) +test rust_compile::compile_simple_full ... bench: 22,078 ns/iter (+/- 3,259) +test rust_compile::compile_small ... bench: 8,499 ns/iter (+/- 942) +test rust_compile::compile_small_bytes ... bench: 151,196 ns/iter (+/- 16,322) +test rust_compile::compile_small_full ... bench: 309,597 ns/iter (+/- 32,622) +test sherlock::before_after_holmes ... bench: 917,591 ns/iter (+/- 55,643) = 648 MB/s +test sherlock::before_holmes ... bench: 62,726 ns/iter (+/- 8,861) = 9484 MB/s +test sherlock::everything_greedy ... bench: 2,036,050 ns/iter (+/- 152,461) = 292 MB/s +test sherlock::everything_greedy_nl ... bench: 796,690 ns/iter (+/- 71,089) = 746 MB/s +test sherlock::holmes_cochar_watson ... bench: 106,258 ns/iter (+/- 8,294) = 5598 MB/s +test sherlock::holmes_coword_watson ... bench: 481,086 ns/iter (+/- 60,212) = 1236 MB/s +test sherlock::ing_suffix ... bench: 322,033 ns/iter (+/- 8,912) = 1847 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,067,523 ns/iter (+/- 89,630) = 557 MB/s +test sherlock::letters ... bench: 22,745,932 ns/iter (+/- 428,787) = 26 MB/s +test sherlock::letters_lower ... bench: 22,228,365 ns/iter (+/- 495,287) = 26 MB/s +test sherlock::letters_upper ... bench: 1,775,941 ns/iter (+/- 158,985) = 334 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,327 ns/iter (+/- 49,085) = 663 MB/s +test sherlock::name_alt1 ... bench: 32,008 ns/iter (+/- 4,011) = 18587 MB/s +test sherlock::name_alt2 ... bench: 86,850 ns/iter (+/- 5,463) = 6850 MB/s +test sherlock::name_alt3 ... bench: 98,359 ns/iter (+/- 14,052) = 6048 MB/s +test sherlock::name_alt3_nocase ... bench: 381,147 ns/iter (+/- 16,996) = 1560 MB/s +test sherlock::name_alt4 ... bench: 121,025 ns/iter (+/- 16,654) = 4915 MB/s +test sherlock::name_alt4_nocase ... bench: 188,972 ns/iter (+/- 26,145) = 3148 MB/s +test sherlock::name_alt5 ... bench: 91,832 ns/iter (+/- 6,188) = 6478 MB/s +test sherlock::name_alt5_nocase ... bench: 351,422 ns/iter (+/- 49,084) = 1692 MB/s +test sherlock::name_holmes ... bench: 33,405 ns/iter (+/- 3,113) = 17809 MB/s +test sherlock::name_holmes_nocase ... bench: 134,899 ns/iter (+/- 10,883) = 4410 MB/s +test sherlock::name_sherlock ... bench: 22,455 ns/iter (+/- 2,027) = 26494 MB/s +test sherlock::name_sherlock_holmes ... bench: 22,283 ns/iter (+/- 2,281) = 26698 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 97,492 ns/iter (+/- 6,496) = 6102 MB/s +test sherlock::name_sherlock_nocase ... bench: 95,627 ns/iter (+/- 8,442) = 6221 MB/s +test sherlock::name_whitespace ... bench: 30,702 ns/iter (+/- 4,194) = 19377 MB/s +test sherlock::no_match_common ... bench: 19,616 ns/iter (+/- 2,677) = 30328 MB/s +test sherlock::no_match_really_common ... bench: 25,601 ns/iter (+/- 2,506) = 23238 MB/s +test sherlock::no_match_uncommon ... bench: 19,641 ns/iter (+/- 2,175) = 30290 MB/s +test sherlock::quotes ... bench: 369,048 ns/iter (+/- 25,898) = 1612 MB/s +test sherlock::repeated_class_negation ... bench: 75,780,396 ns/iter (+/- 1,032,817) = 7 MB/s +test sherlock::the_lower ... bench: 327,762 ns/iter (+/- 48,769) = 1815 MB/s +test sherlock::the_nocase ... bench: 532,075 ns/iter (+/- 40,117) = 1118 MB/s +test sherlock::the_upper ... bench: 45,197 ns/iter (+/- 1,621) = 13163 MB/s +test sherlock::the_whitespace ... bench: 819,239 ns/iter (+/- 81,388) = 726 MB/s +test sherlock::word_ending_n ... bench: 1,716,625 ns/iter (+/- 120,247) = 346 MB/s +test sherlock::words ... bench: 8,690,764 ns/iter (+/- 322,915) = 68 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 114.31s + diff --git a/third_party/rust/regex/record/old-bench-log/08-new-memmem/rust-after-02 b/third_party/rust/regex/record/old-bench-log/08-new-memmem/rust-after-02 new file mode 100644 index 0000000000..60d057836c --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/08-new-memmem/rust-after-02 @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 18 ns/iter (+/- 2) = 21666 MB/s +test misc::anchored_literal_long_non_match ... bench: 19 ns/iter (+/- 1) = 20526 MB/s +test misc::anchored_literal_short_match ... bench: 18 ns/iter (+/- 1) = 1444 MB/s +test misc::anchored_literal_short_non_match ... bench: 19 ns/iter (+/- 2) = 1368 MB/s +test misc::easy0_1K ... bench: 15 ns/iter (+/- 1) = 70066 MB/s +test misc::easy0_1MB ... bench: 22 ns/iter (+/- 1) = 47663772 MB/s +test misc::easy0_32 ... bench: 14 ns/iter (+/- 1) = 4214 MB/s +test misc::easy0_32K ... bench: 15 ns/iter (+/- 1) = 2186333 MB/s +test misc::easy1_1K ... bench: 39 ns/iter (+/- 4) = 26769 MB/s +test misc::easy1_1MB ... bench: 43 ns/iter (+/- 3) = 24385953 MB/s +test misc::easy1_32 ... bench: 39 ns/iter (+/- 4) = 1333 MB/s +test misc::easy1_32K ... bench: 39 ns/iter (+/- 3) = 840717 MB/s +test misc::hard_1K ... bench: 50 ns/iter (+/- 5) = 21020 MB/s +test misc::hard_1MB ... bench: 55 ns/iter (+/- 7) = 19065509 MB/s +test misc::hard_32 ... bench: 50 ns/iter (+/- 5) = 1180 MB/s +test misc::hard_32K ... bench: 50 ns/iter (+/- 6) = 655900 MB/s +test misc::is_match_set ... bench: 60 ns/iter (+/- 4) = 416 MB/s +test misc::literal ... bench: 12 ns/iter (+/- 0) = 4250 MB/s +test misc::long_needle1 ... bench: 3,251 ns/iter (+/- 333) = 30760 MB/s +test misc::long_needle2 ... bench: 355,576 ns/iter (+/- 24,612) = 281 MB/s +test misc::match_class ... bench: 66 ns/iter (+/- 1) = 1227 MB/s +test misc::match_class_in_range ... bench: 14 ns/iter (+/- 1) = 5785 MB/s +test misc::match_class_unicode ... bench: 254 ns/iter (+/- 25) = 633 MB/s +test misc::matches_set ... bench: 456 ns/iter (+/- 17) = 54 MB/s +test misc::medium_1K ... bench: 15 ns/iter (+/- 0) = 70133 MB/s +test misc::medium_1MB ... bench: 23 ns/iter (+/- 2) = 45591478 MB/s +test misc::medium_32 ... bench: 15 ns/iter (+/- 2) = 4000 MB/s +test misc::medium_32K ... bench: 15 ns/iter (+/- 2) = 2186400 MB/s +test misc::no_exponential ... bench: 403 ns/iter (+/- 55) = 248 MB/s +test misc::not_literal ... bench: 90 ns/iter (+/- 12) = 566 MB/s +test misc::one_pass_long_prefix ... bench: 51 ns/iter (+/- 7) = 509 MB/s +test misc::one_pass_long_prefix_not ... bench: 51 ns/iter (+/- 5) = 509 MB/s +test misc::one_pass_short ... bench: 38 ns/iter (+/- 5) = 447 MB/s +test misc::one_pass_short_not ... bench: 39 ns/iter (+/- 1) = 435 MB/s +test misc::reallyhard2_1K ... bench: 75 ns/iter (+/- 2) = 13866 MB/s +test misc::reallyhard_1K ... bench: 1,592 ns/iter (+/- 148) = 660 MB/s +test misc::reallyhard_1MB ... bench: 1,576,299 ns/iter (+/- 142,145) = 665 MB/s +test misc::reallyhard_32 ... bench: 103 ns/iter (+/- 8) = 572 MB/s +test misc::reallyhard_32K ... bench: 49,326 ns/iter (+/- 3,202) = 664 MB/s +test misc::replace_all ... bench: 132 ns/iter (+/- 16) +test misc::reverse_suffix_no_quadratic ... bench: 4,168 ns/iter (+/- 227) = 1919 MB/s +test misc::short_haystack_1000000x ... bench: 132,733 ns/iter (+/- 18,141) = 60271 MB/s +test misc::short_haystack_100000x ... bench: 14,468 ns/iter (+/- 1,777) = 55295 MB/s +test misc::short_haystack_10000x ... bench: 6,316 ns/iter (+/- 360) = 12667 MB/s +test misc::short_haystack_1000x ... bench: 474 ns/iter (+/- 69) = 16900 MB/s +test misc::short_haystack_100x ... bench: 229 ns/iter (+/- 32) = 3541 MB/s +test misc::short_haystack_10x ... bench: 212 ns/iter (+/- 18) = 429 MB/s +test misc::short_haystack_1x ... bench: 205 ns/iter (+/- 28) = 92 MB/s +test misc::short_haystack_2x ... bench: 207 ns/iter (+/- 20) = 130 MB/s +test misc::short_haystack_3x ... bench: 213 ns/iter (+/- 7) = 164 MB/s +test misc::short_haystack_4x ... bench: 208 ns/iter (+/- 9) = 206 MB/s +test regexdna::find_new_lines ... bench: 12,050,847 ns/iter (+/- 346,484) = 421 MB/s +test regexdna::subst1 ... bench: 817,689 ns/iter (+/- 104,629) = 6216 MB/s +test regexdna::subst10 ... bench: 788,728 ns/iter (+/- 66,497) = 6445 MB/s +test regexdna::subst11 ... bench: 787,188 ns/iter (+/- 49,158) = 6457 MB/s +test regexdna::subst2 ... bench: 787,143 ns/iter (+/- 108,541) = 6458 MB/s +test regexdna::subst3 ... bench: 792,452 ns/iter (+/- 32,963) = 6414 MB/s +test regexdna::subst4 ... bench: 820,043 ns/iter (+/- 71,037) = 6198 MB/s +test regexdna::subst5 ... bench: 790,043 ns/iter (+/- 39,234) = 6434 MB/s +test regexdna::subst6 ... bench: 785,007 ns/iter (+/- 18,701) = 6475 MB/s +test regexdna::subst7 ... bench: 789,393 ns/iter (+/- 51,525) = 6439 MB/s +test regexdna::subst8 ... bench: 784,190 ns/iter (+/- 90,675) = 6482 MB/s +test regexdna::subst9 ... bench: 789,021 ns/iter (+/- 88,256) = 6442 MB/s +test regexdna::variant1 ... bench: 2,237,592 ns/iter (+/- 146,174) = 2271 MB/s +test regexdna::variant2 ... bench: 3,255,382 ns/iter (+/- 179,473) = 1561 MB/s +test regexdna::variant3 ... bench: 3,812,799 ns/iter (+/- 210,786) = 1333 MB/s +test regexdna::variant4 ... bench: 3,853,476 ns/iter (+/- 263,442) = 1319 MB/s +test regexdna::variant5 ... bench: 2,696,756 ns/iter (+/- 161,353) = 1885 MB/s +test regexdna::variant6 ... bench: 2,683,221 ns/iter (+/- 149,650) = 1894 MB/s +test regexdna::variant7 ... bench: 3,289,426 ns/iter (+/- 209,217) = 1545 MB/s +test regexdna::variant8 ... bench: 3,362,858 ns/iter (+/- 274,273) = 1511 MB/s +test regexdna::variant9 ... bench: 3,287,253 ns/iter (+/- 188,894) = 1546 MB/s +test rust_compile::compile_huge ... bench: 94,912 ns/iter (+/- 12,311) +test rust_compile::compile_huge_bytes ... bench: 5,534,281 ns/iter (+/- 192,069) +test rust_compile::compile_huge_full ... bench: 10,969,970 ns/iter (+/- 312,230) +test rust_compile::compile_simple ... bench: 3,523 ns/iter (+/- 525) +test rust_compile::compile_simple_bytes ... bench: 3,564 ns/iter (+/- 355) +test rust_compile::compile_simple_full ... bench: 19,887 ns/iter (+/- 1,885) +test rust_compile::compile_small ... bench: 8,294 ns/iter (+/- 1,123) +test rust_compile::compile_small_bytes ... bench: 153,070 ns/iter (+/- 20,825) +test rust_compile::compile_small_full ... bench: 313,318 ns/iter (+/- 28,271) +test sherlock::before_after_holmes ... bench: 907,585 ns/iter (+/- 86,027) = 655 MB/s +test sherlock::before_holmes ... bench: 62,765 ns/iter (+/- 6,413) = 9478 MB/s +test sherlock::everything_greedy ... bench: 2,033,519 ns/iter (+/- 97,963) = 292 MB/s +test sherlock::everything_greedy_nl ... bench: 796,514 ns/iter (+/- 48,247) = 746 MB/s +test sherlock::holmes_cochar_watson ... bench: 107,788 ns/iter (+/- 15,545) = 5519 MB/s +test sherlock::holmes_coword_watson ... bench: 482,686 ns/iter (+/- 49,033) = 1232 MB/s +test sherlock::ing_suffix ... bench: 322,901 ns/iter (+/- 46,329) = 1842 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,067,799 ns/iter (+/- 57,022) = 557 MB/s +test sherlock::letters ... bench: 22,823,246 ns/iter (+/- 472,094) = 26 MB/s +test sherlock::letters_lower ... bench: 22,137,278 ns/iter (+/- 443,188) = 26 MB/s +test sherlock::letters_upper ... bench: 1,773,598 ns/iter (+/- 96,994) = 335 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,623 ns/iter (+/- 48,509) = 662 MB/s +test sherlock::name_alt1 ... bench: 31,882 ns/iter (+/- 3,354) = 18660 MB/s +test sherlock::name_alt2 ... bench: 86,500 ns/iter (+/- 7,997) = 6877 MB/s +test sherlock::name_alt3 ... bench: 98,159 ns/iter (+/- 6,106) = 6060 MB/s +test sherlock::name_alt3_nocase ... bench: 383,858 ns/iter (+/- 19,224) = 1549 MB/s +test sherlock::name_alt4 ... bench: 122,489 ns/iter (+/- 17,271) = 4857 MB/s +test sherlock::name_alt4_nocase ... bench: 192,081 ns/iter (+/- 10,999) = 3097 MB/s +test sherlock::name_alt5 ... bench: 91,396 ns/iter (+/- 6,399) = 6509 MB/s +test sherlock::name_alt5_nocase ... bench: 354,804 ns/iter (+/- 26,158) = 1676 MB/s +test sherlock::name_holmes ... bench: 33,569 ns/iter (+/- 4,647) = 17722 MB/s +test sherlock::name_holmes_nocase ... bench: 136,387 ns/iter (+/- 14,005) = 4362 MB/s +test sherlock::name_sherlock ... bench: 22,468 ns/iter (+/- 1,144) = 26479 MB/s +test sherlock::name_sherlock_holmes ... bench: 22,279 ns/iter (+/- 1,563) = 26703 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 98,003 ns/iter (+/- 10,978) = 6070 MB/s +test sherlock::name_sherlock_nocase ... bench: 96,130 ns/iter (+/- 4,373) = 6188 MB/s +test sherlock::name_whitespace ... bench: 30,532 ns/iter (+/- 3,125) = 19485 MB/s +test sherlock::no_match_common ... bench: 19,644 ns/iter (+/- 2,118) = 30285 MB/s +test sherlock::no_match_really_common ... bench: 25,374 ns/iter (+/- 1,538) = 23446 MB/s +test sherlock::no_match_uncommon ... bench: 19,602 ns/iter (+/- 427) = 30350 MB/s +test sherlock::quotes ... bench: 369,657 ns/iter (+/- 52,406) = 1609 MB/s +test sherlock::repeated_class_negation ... bench: 76,922,839 ns/iter (+/- 1,261,770) = 7 MB/s +test sherlock::the_lower ... bench: 326,221 ns/iter (+/- 35,683) = 1823 MB/s +test sherlock::the_nocase ... bench: 525,254 ns/iter (+/- 26,000) = 1132 MB/s +test sherlock::the_upper ... bench: 44,702 ns/iter (+/- 5,012) = 13308 MB/s +test sherlock::the_whitespace ... bench: 814,494 ns/iter (+/- 66,715) = 730 MB/s +test sherlock::word_ending_n ... bench: 1,705,139 ns/iter (+/- 97,420) = 348 MB/s +test sherlock::words ... bench: 8,632,437 ns/iter (+/- 278,177) = 68 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 106.01s + diff --git a/third_party/rust/regex/record/old-bench-log/08-new-memmem/rust-before-01 b/third_party/rust/regex/record/old-bench-log/08-new-memmem/rust-before-01 new file mode 100644 index 0000000000..1316e6d695 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/08-new-memmem/rust-before-01 @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 18 ns/iter (+/- 1) = 21666 MB/s +test misc::anchored_literal_long_non_match ... bench: 17 ns/iter (+/- 1) = 22941 MB/s +test misc::anchored_literal_short_match ... bench: 16 ns/iter (+/- 2) = 1625 MB/s +test misc::anchored_literal_short_non_match ... bench: 17 ns/iter (+/- 2) = 1529 MB/s +test misc::easy0_1K ... bench: 12 ns/iter (+/- 1) = 87583 MB/s +test misc::easy0_1MB ... bench: 15 ns/iter (+/- 0) = 69906866 MB/s +test misc::easy0_32 ... bench: 11 ns/iter (+/- 0) = 5363 MB/s +test misc::easy0_32K ... bench: 12 ns/iter (+/- 2) = 2732916 MB/s +test misc::easy1_1K ... bench: 39 ns/iter (+/- 5) = 26769 MB/s +test misc::easy1_1MB ... bench: 40 ns/iter (+/- 6) = 26214900 MB/s +test misc::easy1_32 ... bench: 39 ns/iter (+/- 3) = 1333 MB/s +test misc::easy1_32K ... bench: 39 ns/iter (+/- 5) = 840717 MB/s +test misc::hard_1K ... bench: 49 ns/iter (+/- 1) = 21448 MB/s +test misc::hard_1MB ... bench: 52 ns/iter (+/- 2) = 20165442 MB/s +test misc::hard_32 ... bench: 49 ns/iter (+/- 2) = 1204 MB/s +test misc::hard_32K ... bench: 49 ns/iter (+/- 3) = 669285 MB/s +test misc::is_match_set ... bench: 59 ns/iter (+/- 2) = 423 MB/s +test misc::literal ... bench: 11 ns/iter (+/- 1) = 4636 MB/s +test misc::long_needle1 ... bench: 1,161 ns/iter (+/- 54) = 86133 MB/s +test misc::long_needle2 ... bench: 680,687 ns/iter (+/- 63,713) = 146 MB/s +test misc::match_class ... bench: 69 ns/iter (+/- 4) = 1173 MB/s +test misc::match_class_in_range ... bench: 13 ns/iter (+/- 0) = 6230 MB/s +test misc::match_class_unicode ... bench: 253 ns/iter (+/- 9) = 636 MB/s +test misc::matches_set ... bench: 453 ns/iter (+/- 65) = 55 MB/s +test misc::medium_1K ... bench: 13 ns/iter (+/- 0) = 80923 MB/s +test misc::medium_1MB ... bench: 17 ns/iter (+/- 2) = 61682588 MB/s +test misc::medium_32 ... bench: 13 ns/iter (+/- 0) = 4615 MB/s +test misc::medium_32K ... bench: 13 ns/iter (+/- 0) = 2522769 MB/s +test misc::no_exponential ... bench: 330 ns/iter (+/- 47) = 303 MB/s +test misc::not_literal ... bench: 88 ns/iter (+/- 9) = 579 MB/s +test misc::one_pass_long_prefix ... bench: 50 ns/iter (+/- 7) = 520 MB/s +test misc::one_pass_long_prefix_not ... bench: 50 ns/iter (+/- 2) = 520 MB/s +test misc::one_pass_short ... bench: 36 ns/iter (+/- 4) = 472 MB/s +test misc::one_pass_short_not ... bench: 39 ns/iter (+/- 2) = 435 MB/s +test misc::reallyhard2_1K ... bench: 55 ns/iter (+/- 7) = 18909 MB/s +test misc::reallyhard_1K ... bench: 1,590 ns/iter (+/- 225) = 661 MB/s +test misc::reallyhard_1MB ... bench: 1,580,163 ns/iter (+/- 224,935) = 663 MB/s +test misc::reallyhard_32 ... bench: 100 ns/iter (+/- 6) = 590 MB/s +test misc::reallyhard_32K ... bench: 49,318 ns/iter (+/- 6,046) = 664 MB/s +test misc::replace_all ... bench: 127 ns/iter (+/- 14) +test misc::reverse_suffix_no_quadratic ... bench: 4,240 ns/iter (+/- 117) = 1886 MB/s +test misc::short_haystack_1000000x ... bench: 89,004 ns/iter (+/- 2,927) = 89883 MB/s +test misc::short_haystack_100000x ... bench: 10,349 ns/iter (+/- 334) = 77303 MB/s +test misc::short_haystack_10000x ... bench: 5,835 ns/iter (+/- 700) = 13712 MB/s +test misc::short_haystack_1000x ... bench: 563 ns/iter (+/- 33) = 14229 MB/s +test misc::short_haystack_100x ... bench: 260 ns/iter (+/- 21) = 3119 MB/s +test misc::short_haystack_10x ... bench: 221 ns/iter (+/- 31) = 411 MB/s +test misc::short_haystack_1x ... bench: 211 ns/iter (+/- 30) = 90 MB/s +test misc::short_haystack_2x ... bench: 213 ns/iter (+/- 19) = 126 MB/s +test misc::short_haystack_3x ... bench: 212 ns/iter (+/- 7) = 165 MB/s +test misc::short_haystack_4x ... bench: 221 ns/iter (+/- 26) = 194 MB/s +test regexdna::find_new_lines ... bench: 12,035,248 ns/iter (+/- 362,122) = 422 MB/s +test regexdna::subst1 ... bench: 787,853 ns/iter (+/- 29,667) = 6452 MB/s +test regexdna::subst10 ... bench: 750,718 ns/iter (+/- 103,118) = 6771 MB/s +test regexdna::subst11 ... bench: 749,377 ns/iter (+/- 103,312) = 6783 MB/s +test regexdna::subst2 ... bench: 748,785 ns/iter (+/- 83,175) = 6788 MB/s +test regexdna::subst3 ... bench: 755,004 ns/iter (+/- 75,589) = 6732 MB/s +test regexdna::subst4 ... bench: 747,617 ns/iter (+/- 70,600) = 6799 MB/s +test regexdna::subst5 ... bench: 752,458 ns/iter (+/- 86,154) = 6755 MB/s +test regexdna::subst6 ... bench: 749,801 ns/iter (+/- 102,642) = 6779 MB/s +test regexdna::subst7 ... bench: 760,975 ns/iter (+/- 105,159) = 6680 MB/s +test regexdna::subst8 ... bench: 749,002 ns/iter (+/- 82,082) = 6786 MB/s +test regexdna::subst9 ... bench: 751,248 ns/iter (+/- 100,152) = 6766 MB/s +test regexdna::variant1 ... bench: 2,211,035 ns/iter (+/- 150,147) = 2299 MB/s +test regexdna::variant2 ... bench: 3,210,193 ns/iter (+/- 161,942) = 1583 MB/s +test regexdna::variant3 ... bench: 3,793,641 ns/iter (+/- 203,795) = 1339 MB/s +test regexdna::variant4 ... bench: 3,799,721 ns/iter (+/- 140,933) = 1337 MB/s +test regexdna::variant5 ... bench: 2,652,750 ns/iter (+/- 185,489) = 1916 MB/s +test regexdna::variant6 ... bench: 2,633,257 ns/iter (+/- 211,323) = 1930 MB/s +test regexdna::variant7 ... bench: 3,268,111 ns/iter (+/- 176,273) = 1555 MB/s +test regexdna::variant8 ... bench: 3,331,333 ns/iter (+/- 264,431) = 1525 MB/s +test regexdna::variant9 ... bench: 3,268,398 ns/iter (+/- 298,223) = 1555 MB/s +test rust_compile::compile_huge ... bench: 94,562 ns/iter (+/- 2,194) +test rust_compile::compile_huge_bytes ... bench: 5,611,428 ns/iter (+/- 202,365) +test rust_compile::compile_huge_full ... bench: 10,933,505 ns/iter (+/- 325,078) +test rust_compile::compile_simple ... bench: 3,496 ns/iter (+/- 156) +test rust_compile::compile_simple_bytes ... bench: 3,572 ns/iter (+/- 389) +test rust_compile::compile_simple_full ... bench: 20,283 ns/iter (+/- 1,894) +test rust_compile::compile_small ... bench: 8,475 ns/iter (+/- 1,008) +test rust_compile::compile_small_bytes ... bench: 157,446 ns/iter (+/- 11,319) +test rust_compile::compile_small_full ... bench: 316,041 ns/iter (+/- 23,620) +test sherlock::before_after_holmes ... bench: 906,578 ns/iter (+/- 129,507) = 656 MB/s +test sherlock::before_holmes ... bench: 64,715 ns/iter (+/- 9,107) = 9193 MB/s +test sherlock::everything_greedy ... bench: 2,065,017 ns/iter (+/- 156,855) = 288 MB/s +test sherlock::everything_greedy_nl ... bench: 810,672 ns/iter (+/- 100,547) = 733 MB/s +test sherlock::holmes_cochar_watson ... bench: 106,124 ns/iter (+/- 10,948) = 5606 MB/s +test sherlock::holmes_coword_watson ... bench: 488,503 ns/iter (+/- 63,243) = 1217 MB/s +test sherlock::ing_suffix ... bench: 384,936 ns/iter (+/- 25,316) = 1545 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,060,294 ns/iter (+/- 152,263) = 561 MB/s +test sherlock::letters ... bench: 22,127,059 ns/iter (+/- 413,502) = 26 MB/s +test sherlock::letters_lower ... bench: 21,535,012 ns/iter (+/- 463,835) = 27 MB/s +test sherlock::letters_upper ... bench: 1,758,480 ns/iter (+/- 130,352) = 338 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,329 ns/iter (+/- 96,625) = 663 MB/s +test sherlock::name_alt1 ... bench: 31,585 ns/iter (+/- 2,796) = 18835 MB/s +test sherlock::name_alt2 ... bench: 86,223 ns/iter (+/- 9,553) = 6899 MB/s +test sherlock::name_alt3 ... bench: 97,177 ns/iter (+/- 11,479) = 6122 MB/s +test sherlock::name_alt3_nocase ... bench: 381,511 ns/iter (+/- 55,025) = 1559 MB/s +test sherlock::name_alt4 ... bench: 121,672 ns/iter (+/- 9,253) = 4889 MB/s +test sherlock::name_alt4_nocase ... bench: 187,887 ns/iter (+/- 26,932) = 3166 MB/s +test sherlock::name_alt5 ... bench: 90,732 ns/iter (+/- 7,251) = 6557 MB/s +test sherlock::name_alt5_nocase ... bench: 352,388 ns/iter (+/- 50,408) = 1688 MB/s +test sherlock::name_holmes ... bench: 33,836 ns/iter (+/- 3,388) = 17582 MB/s +test sherlock::name_holmes_nocase ... bench: 133,068 ns/iter (+/- 7,602) = 4470 MB/s +test sherlock::name_sherlock ... bench: 62,719 ns/iter (+/- 8,927) = 9485 MB/s +test sherlock::name_sherlock_holmes ... bench: 24,688 ns/iter (+/- 2,482) = 24098 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 97,793 ns/iter (+/- 12,078) = 6083 MB/s +test sherlock::name_sherlock_nocase ... bench: 95,772 ns/iter (+/- 13,713) = 6211 MB/s +test sherlock::name_whitespace ... bench: 70,942 ns/iter (+/- 5,565) = 8386 MB/s +test sherlock::no_match_common ... bench: 14,645 ns/iter (+/- 1,430) = 40623 MB/s +test sherlock::no_match_really_common ... bench: 239,346 ns/iter (+/- 17,292) = 2485 MB/s +test sherlock::no_match_uncommon ... bench: 14,637 ns/iter (+/- 1,360) = 40645 MB/s +test sherlock::quotes ... bench: 367,945 ns/iter (+/- 35,370) = 1616 MB/s +test sherlock::repeated_class_negation ... bench: 74,367,046 ns/iter (+/- 1,114,875) = 7 MB/s +test sherlock::the_lower ... bench: 463,888 ns/iter (+/- 67,551) = 1282 MB/s +test sherlock::the_nocase ... bench: 520,822 ns/iter (+/- 76,131) = 1142 MB/s +test sherlock::the_upper ... bench: 37,354 ns/iter (+/- 4,110) = 15926 MB/s +test sherlock::the_whitespace ... bench: 922,312 ns/iter (+/- 95,082) = 645 MB/s +test sherlock::word_ending_n ... bench: 1,679,343 ns/iter (+/- 165,580) = 354 MB/s +test sherlock::words ... bench: 8,280,082 ns/iter (+/- 290,280) = 71 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 113.49s + diff --git a/third_party/rust/regex/record/old-bench-log/08-new-memmem/rust-before-02 b/third_party/rust/regex/record/old-bench-log/08-new-memmem/rust-before-02 new file mode 100644 index 0000000000..5d75102189 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/08-new-memmem/rust-before-02 @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 17 ns/iter (+/- 0) = 22941 MB/s +test misc::anchored_literal_long_non_match ... bench: 18 ns/iter (+/- 0) = 21666 MB/s +test misc::anchored_literal_short_match ... bench: 16 ns/iter (+/- 2) = 1625 MB/s +test misc::anchored_literal_short_non_match ... bench: 17 ns/iter (+/- 2) = 1529 MB/s +test misc::easy0_1K ... bench: 12 ns/iter (+/- 0) = 87583 MB/s +test misc::easy0_1MB ... bench: 14 ns/iter (+/- 1) = 74900214 MB/s +test misc::easy0_32 ... bench: 11 ns/iter (+/- 1) = 5363 MB/s +test misc::easy0_32K ... bench: 12 ns/iter (+/- 1) = 2732916 MB/s +test misc::easy1_1K ... bench: 38 ns/iter (+/- 5) = 27473 MB/s +test misc::easy1_1MB ... bench: 40 ns/iter (+/- 5) = 26214900 MB/s +test misc::easy1_32 ... bench: 38 ns/iter (+/- 1) = 1368 MB/s +test misc::easy1_32K ... bench: 38 ns/iter (+/- 1) = 862842 MB/s +test misc::hard_1K ... bench: 49 ns/iter (+/- 4) = 21448 MB/s +test misc::hard_1MB ... bench: 52 ns/iter (+/- 7) = 20165442 MB/s +test misc::hard_32 ... bench: 49 ns/iter (+/- 1) = 1204 MB/s +test misc::hard_32K ... bench: 49 ns/iter (+/- 6) = 669285 MB/s +test misc::is_match_set ... bench: 59 ns/iter (+/- 2) = 423 MB/s +test misc::literal ... bench: 11 ns/iter (+/- 0) = 4636 MB/s +test misc::long_needle1 ... bench: 1,179 ns/iter (+/- 92) = 84818 MB/s +test misc::long_needle2 ... bench: 680,418 ns/iter (+/- 27,142) = 146 MB/s +test misc::match_class ... bench: 68 ns/iter (+/- 6) = 1191 MB/s +test misc::match_class_in_range ... bench: 13 ns/iter (+/- 1) = 6230 MB/s +test misc::match_class_unicode ... bench: 253 ns/iter (+/- 33) = 636 MB/s +test misc::matches_set ... bench: 453 ns/iter (+/- 65) = 55 MB/s +test misc::medium_1K ... bench: 13 ns/iter (+/- 1) = 80923 MB/s +test misc::medium_1MB ... bench: 17 ns/iter (+/- 2) = 61682588 MB/s +test misc::medium_32 ... bench: 13 ns/iter (+/- 0) = 4615 MB/s +test misc::medium_32K ... bench: 13 ns/iter (+/- 0) = 2522769 MB/s +test misc::no_exponential ... bench: 330 ns/iter (+/- 47) = 303 MB/s +test misc::not_literal ... bench: 88 ns/iter (+/- 12) = 579 MB/s +test misc::one_pass_long_prefix ... bench: 50 ns/iter (+/- 6) = 520 MB/s +test misc::one_pass_long_prefix_not ... bench: 50 ns/iter (+/- 7) = 520 MB/s +test misc::one_pass_short ... bench: 36 ns/iter (+/- 2) = 472 MB/s +test misc::one_pass_short_not ... bench: 38 ns/iter (+/- 5) = 447 MB/s +test misc::reallyhard2_1K ... bench: 55 ns/iter (+/- 7) = 18909 MB/s +test misc::reallyhard_1K ... bench: 1,590 ns/iter (+/- 64) = 661 MB/s +test misc::reallyhard_1MB ... bench: 1,581,975 ns/iter (+/- 126,709) = 662 MB/s +test misc::reallyhard_32 ... bench: 100 ns/iter (+/- 4) = 590 MB/s +test misc::reallyhard_32K ... bench: 49,323 ns/iter (+/- 7,063) = 664 MB/s +test misc::replace_all ... bench: 127 ns/iter (+/- 5) +test misc::reverse_suffix_no_quadratic ... bench: 4,171 ns/iter (+/- 624) = 1918 MB/s +test misc::short_haystack_1000000x ... bench: 88,960 ns/iter (+/- 7,710) = 89928 MB/s +test misc::short_haystack_100000x ... bench: 10,193 ns/iter (+/- 952) = 78486 MB/s +test misc::short_haystack_10000x ... bench: 5,798 ns/iter (+/- 636) = 13799 MB/s +test misc::short_haystack_1000x ... bench: 418 ns/iter (+/- 60) = 19165 MB/s +test misc::short_haystack_100x ... bench: 258 ns/iter (+/- 21) = 3143 MB/s +test misc::short_haystack_10x ... bench: 216 ns/iter (+/- 21) = 421 MB/s +test misc::short_haystack_1x ... bench: 205 ns/iter (+/- 29) = 92 MB/s +test misc::short_haystack_2x ... bench: 206 ns/iter (+/- 22) = 131 MB/s +test misc::short_haystack_3x ... bench: 205 ns/iter (+/- 29) = 170 MB/s +test misc::short_haystack_4x ... bench: 214 ns/iter (+/- 6) = 200 MB/s +test regexdna::find_new_lines ... bench: 12,039,715 ns/iter (+/- 410,515) = 422 MB/s +test regexdna::subst1 ... bench: 750,454 ns/iter (+/- 65,358) = 6773 MB/s +test regexdna::subst10 ... bench: 748,321 ns/iter (+/- 93,416) = 6793 MB/s +test regexdna::subst11 ... bench: 747,906 ns/iter (+/- 92,141) = 6796 MB/s +test regexdna::subst2 ... bench: 755,082 ns/iter (+/- 88,044) = 6732 MB/s +test regexdna::subst3 ... bench: 753,496 ns/iter (+/- 70,987) = 6746 MB/s +test regexdna::subst4 ... bench: 747,103 ns/iter (+/- 102,992) = 6804 MB/s +test regexdna::subst5 ... bench: 750,805 ns/iter (+/- 72,572) = 6770 MB/s +test regexdna::subst6 ... bench: 748,419 ns/iter (+/- 47,272) = 6792 MB/s +test regexdna::subst7 ... bench: 752,556 ns/iter (+/- 95,329) = 6754 MB/s +test regexdna::subst8 ... bench: 756,009 ns/iter (+/- 78,049) = 6724 MB/s +test regexdna::subst9 ... bench: 749,278 ns/iter (+/- 70,259) = 6784 MB/s +test regexdna::variant1 ... bench: 2,215,182 ns/iter (+/- 114,543) = 2294 MB/s +test regexdna::variant2 ... bench: 3,207,983 ns/iter (+/- 184,419) = 1584 MB/s +test regexdna::variant3 ... bench: 3,791,716 ns/iter (+/- 192,185) = 1340 MB/s +test regexdna::variant4 ... bench: 3,809,934 ns/iter (+/- 222,872) = 1334 MB/s +test regexdna::variant5 ... bench: 2,651,345 ns/iter (+/- 183,673) = 1917 MB/s +test regexdna::variant6 ... bench: 2,635,566 ns/iter (+/- 170,288) = 1928 MB/s +test regexdna::variant7 ... bench: 3,265,519 ns/iter (+/- 234,923) = 1556 MB/s +test regexdna::variant8 ... bench: 3,340,830 ns/iter (+/- 183,129) = 1521 MB/s +test regexdna::variant9 ... bench: 3,267,141 ns/iter (+/- 185,543) = 1555 MB/s +test rust_compile::compile_huge ... bench: 94,368 ns/iter (+/- 13,293) +test rust_compile::compile_huge_bytes ... bench: 5,616,594 ns/iter (+/- 243,462) +test rust_compile::compile_huge_full ... bench: 10,862,100 ns/iter (+/- 260,207) +test rust_compile::compile_simple ... bench: 3,463 ns/iter (+/- 350) +test rust_compile::compile_simple_bytes ... bench: 3,542 ns/iter (+/- 504) +test rust_compile::compile_simple_full ... bench: 20,562 ns/iter (+/- 3,117) +test rust_compile::compile_small ... bench: 8,325 ns/iter (+/- 641) +test rust_compile::compile_small_bytes ... bench: 153,450 ns/iter (+/- 11,174) +test rust_compile::compile_small_full ... bench: 315,871 ns/iter (+/- 33,828) +test sherlock::before_after_holmes ... bench: 906,423 ns/iter (+/- 34,801) = 656 MB/s +test sherlock::before_holmes ... bench: 64,457 ns/iter (+/- 8,343) = 9229 MB/s +test sherlock::everything_greedy ... bench: 2,058,675 ns/iter (+/- 208,885) = 288 MB/s +test sherlock::everything_greedy_nl ... bench: 810,638 ns/iter (+/- 39,955) = 733 MB/s +test sherlock::holmes_cochar_watson ... bench: 106,048 ns/iter (+/- 8,158) = 5610 MB/s +test sherlock::holmes_coword_watson ... bench: 482,243 ns/iter (+/- 30,955) = 1233 MB/s +test sherlock::ing_suffix ... bench: 385,767 ns/iter (+/- 24,902) = 1542 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,060,762 ns/iter (+/- 94,273) = 560 MB/s +test sherlock::letters ... bench: 22,127,007 ns/iter (+/- 467,539) = 26 MB/s +test sherlock::letters_lower ... bench: 21,719,871 ns/iter (+/- 459,587) = 27 MB/s +test sherlock::letters_upper ... bench: 1,753,028 ns/iter (+/- 172,914) = 339 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,091 ns/iter (+/- 109,954) = 663 MB/s +test sherlock::name_alt1 ... bench: 31,636 ns/iter (+/- 2,323) = 18805 MB/s +test sherlock::name_alt2 ... bench: 85,898 ns/iter (+/- 10,486) = 6926 MB/s +test sherlock::name_alt3 ... bench: 97,104 ns/iter (+/- 8,851) = 6126 MB/s +test sherlock::name_alt3_nocase ... bench: 381,487 ns/iter (+/- 14,829) = 1559 MB/s +test sherlock::name_alt4 ... bench: 121,301 ns/iter (+/- 17,178) = 4904 MB/s +test sherlock::name_alt4_nocase ... bench: 187,262 ns/iter (+/- 17,478) = 3177 MB/s +test sherlock::name_alt5 ... bench: 90,773 ns/iter (+/- 2,791) = 6554 MB/s +test sherlock::name_alt5_nocase ... bench: 351,900 ns/iter (+/- 40,408) = 1690 MB/s +test sherlock::name_holmes ... bench: 34,767 ns/iter (+/- 3,334) = 17112 MB/s +test sherlock::name_holmes_nocase ... bench: 132,953 ns/iter (+/- 15,747) = 4474 MB/s +test sherlock::name_sherlock ... bench: 66,566 ns/iter (+/- 6,822) = 8937 MB/s +test sherlock::name_sherlock_holmes ... bench: 24,481 ns/iter (+/- 2,330) = 24301 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 97,531 ns/iter (+/- 12,331) = 6099 MB/s +test sherlock::name_sherlock_nocase ... bench: 95,808 ns/iter (+/- 13,250) = 6209 MB/s +test sherlock::name_whitespace ... bench: 71,342 ns/iter (+/- 9,877) = 8339 MB/s +test sherlock::no_match_common ... bench: 14,704 ns/iter (+/- 1,241) = 40460 MB/s +test sherlock::no_match_really_common ... bench: 238,731 ns/iter (+/- 31,179) = 2492 MB/s +test sherlock::no_match_uncommon ... bench: 14,620 ns/iter (+/- 1,250) = 40693 MB/s +test sherlock::quotes ... bench: 367,740 ns/iter (+/- 10,107) = 1617 MB/s +test sherlock::repeated_class_negation ... bench: 76,315,217 ns/iter (+/- 940,903) = 7 MB/s +test sherlock::the_lower ... bench: 464,322 ns/iter (+/- 14,654) = 1281 MB/s +test sherlock::the_nocase ... bench: 519,069 ns/iter (+/- 59,161) = 1146 MB/s +test sherlock::the_upper ... bench: 37,575 ns/iter (+/- 2,455) = 15833 MB/s +test sherlock::the_whitespace ... bench: 939,412 ns/iter (+/- 60,941) = 633 MB/s +test sherlock::word_ending_n ... bench: 1,681,192 ns/iter (+/- 156,265) = 353 MB/s +test sherlock::words ... bench: 8,213,141 ns/iter (+/- 322,533) = 72 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 94.52s + diff --git a/third_party/rust/regex/record/old-bench-log/09-new-baseline/pcre2 b/third_party/rust/regex/record/old-bench-log/09-new-baseline/pcre2 new file mode 100644 index 0000000000..595365d50a --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/09-new-baseline/pcre2 @@ -0,0 +1,98 @@ + +running 93 tests +test misc::anchored_literal_long_match ... bench: 8 ns/iter (+/- 0) = 48750 MB/s +test misc::anchored_literal_long_non_match ... bench: 7 ns/iter (+/- 0) = 55714 MB/s +test misc::anchored_literal_short_match ... bench: 8 ns/iter (+/- 0) = 3250 MB/s +test misc::anchored_literal_short_non_match ... bench: 7 ns/iter (+/- 0) = 3714 MB/s +test misc::easy0_1K ... bench: 32 ns/iter (+/- 1) = 32843 MB/s +test misc::easy0_1MB ... bench: 22,160 ns/iter (+/- 3,887) = 47319 MB/s +test misc::easy0_32 ... bench: 10 ns/iter (+/- 0) = 5900 MB/s +test misc::easy0_32K ... bench: 651 ns/iter (+/- 2) = 50376 MB/s +test misc::easy1_1K ... bench: 36 ns/iter (+/- 1) = 29000 MB/s +test misc::easy1_1MB ... bench: 22,982 ns/iter (+/- 2,839) = 45626 MB/s +test misc::easy1_32 ... bench: 12 ns/iter (+/- 0) = 4333 MB/s +test misc::easy1_32K ... bench: 654 ns/iter (+/- 2) = 50134 MB/s +test misc::hard_1K ... bench: 469 ns/iter (+/- 9) = 2240 MB/s +test misc::hard_1MB ... bench: 733,962 ns/iter (+/- 28,297) = 1428 MB/s +test misc::hard_32 ... bench: 34 ns/iter (+/- 4) = 1735 MB/s +test misc::hard_32K ... bench: 19,567 ns/iter (+/- 363) = 1676 MB/s +test misc::literal ... bench: 8 ns/iter (+/- 0) = 6375 MB/s +test misc::long_needle1 ... bench: 257,858 ns/iter (+/- 646) = 387 MB/s +test misc::long_needle2 ... bench: 259,045 ns/iter (+/- 2,220) = 386 MB/s +test misc::match_class ... bench: 34 ns/iter (+/- 1) = 2382 MB/s +test misc::match_class_in_range ... bench: 9 ns/iter (+/- 0) = 9000 MB/s +test misc::match_class_unicode ... bench: 125 ns/iter (+/- 3) = 1288 MB/s +test misc::medium_1K ... bench: 35 ns/iter (+/- 3) = 30057 MB/s +test misc::medium_1MB ... bench: 21,126 ns/iter (+/- 4,036) = 49635 MB/s +test misc::medium_32 ... bench: 10 ns/iter (+/- 0) = 6000 MB/s +test misc::medium_32K ... bench: 714 ns/iter (+/- 122) = 45932 MB/s +test misc::not_literal ... bench: 62 ns/iter (+/- 2) = 822 MB/s +test misc::one_pass_long_prefix ... bench: 8 ns/iter (+/- 0) = 3250 MB/s +test misc::one_pass_long_prefix_not ... bench: 8 ns/iter (+/- 0) = 3250 MB/s +test misc::one_pass_short ... bench: 19 ns/iter (+/- 1) = 894 MB/s +test misc::one_pass_short_not ... bench: 19 ns/iter (+/- 1) = 894 MB/s +test misc::reallyhard2_1K ... bench: 1,704 ns/iter (+/- 17) = 610 MB/s +test misc::reallyhard_1K ... bench: 495 ns/iter (+/- 9) = 2123 MB/s +test misc::reallyhard_1MB ... bench: 682,371 ns/iter (+/- 31,284) = 1536 MB/s +test misc::reallyhard_32 ... bench: 34 ns/iter (+/- 2) = 1735 MB/s +test misc::reallyhard_32K ... bench: 17,994 ns/iter (+/- 540) = 1822 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 1,507 ns/iter (+/- 10) = 5308 MB/s +test regexdna::find_new_lines ... bench: 849,983 ns/iter (+/- 25,898) = 5980 MB/s +test regexdna::subst1 ... bench: 520,602 ns/iter (+/- 12,170) = 9764 MB/s +test regexdna::subst10 ... bench: 517,151 ns/iter (+/- 19,060) = 9829 MB/s +test regexdna::subst11 ... bench: 519,209 ns/iter (+/- 12,477) = 9790 MB/s +test regexdna::subst2 ... bench: 513,418 ns/iter (+/- 19,803) = 9901 MB/s +test regexdna::subst3 ... bench: 514,166 ns/iter (+/- 13,019) = 9886 MB/s +test regexdna::subst4 ... bench: 517,808 ns/iter (+/- 30,655) = 9817 MB/s +test regexdna::subst5 ... bench: 516,922 ns/iter (+/- 17,204) = 9834 MB/s +test regexdna::subst6 ... bench: 509,430 ns/iter (+/- 20,608) = 9978 MB/s +test regexdna::subst7 ... bench: 519,437 ns/iter (+/- 10,537) = 9786 MB/s +test regexdna::subst8 ... bench: 520,282 ns/iter (+/- 25,742) = 9770 MB/s +test regexdna::subst9 ... bench: 512,819 ns/iter (+/- 11,443) = 9912 MB/s +test regexdna::variant1 ... bench: 5,302,526 ns/iter (+/- 158,370) = 958 MB/s +test regexdna::variant2 ... bench: 7,421,107 ns/iter (+/- 105,716) = 684 MB/s +test regexdna::variant3 ... bench: 7,310,968 ns/iter (+/- 103,989) = 695 MB/s +test regexdna::variant4 ... bench: 6,152,891 ns/iter (+/- 144,194) = 826 MB/s +test regexdna::variant5 ... bench: 5,717,515 ns/iter (+/- 42,902) = 889 MB/s +test regexdna::variant6 ... bench: 5,840,938 ns/iter (+/- 47,730) = 870 MB/s +test regexdna::variant7 ... bench: 6,624,859 ns/iter (+/- 37,376) = 767 MB/s +test regexdna::variant8 ... bench: 7,308,342 ns/iter (+/- 58,395) = 695 MB/s +test regexdna::variant9 ... bench: 7,372,260 ns/iter (+/- 76,966) = 689 MB/s +test sherlock::before_after_holmes ... bench: 2,817,108 ns/iter (+/- 18,002) = 211 MB/s +test sherlock::before_holmes ... bench: 2,841,515 ns/iter (+/- 14,677) = 209 MB/s +test sherlock::holmes_cochar_watson ... bench: 33,066 ns/iter (+/- 1,766) = 17992 MB/s +test sherlock::ing_suffix ... bench: 1,299,382 ns/iter (+/- 19,674) = 457 MB/s +test sherlock::ing_suffix_limited_space ... bench: 2,799,189 ns/iter (+/- 33,841) = 212 MB/s +test sherlock::letters ... bench: 4,923,399 ns/iter (+/- 111,904) = 120 MB/s +test sherlock::letters_lower ... bench: 5,057,224 ns/iter (+/- 102,860) = 117 MB/s +test sherlock::letters_upper ... bench: 874,306 ns/iter (+/- 10,587) = 680 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 15,876 ns/iter (+/- 58) = 37473 MB/s +test sherlock::name_alt1 ... bench: 19,349 ns/iter (+/- 201) = 30747 MB/s +test sherlock::name_alt2 ... bench: 29,916 ns/iter (+/- 581) = 19886 MB/s +test sherlock::name_alt3 ... bench: 461,887 ns/iter (+/- 5,337) = 1288 MB/s +test sherlock::name_alt3_nocase ... bench: 1,813,574 ns/iter (+/- 27,519) = 328 MB/s +test sherlock::name_alt4 ... bench: 30,155 ns/iter (+/- 1,407) = 19729 MB/s +test sherlock::name_alt4_nocase ... bench: 822,605 ns/iter (+/- 56,624) = 723 MB/s +test sherlock::name_alt5 ... bench: 426,318 ns/iter (+/- 12,233) = 1395 MB/s +test sherlock::name_alt5_nocase ... bench: 1,012,097 ns/iter (+/- 27,806) = 587 MB/s +test sherlock::name_holmes ... bench: 19,833 ns/iter (+/- 499) = 29997 MB/s +test sherlock::name_holmes_nocase ... bench: 40,266 ns/iter (+/- 2,089) = 14775 MB/s +test sherlock::name_sherlock ... bench: 14,589 ns/iter (+/- 115) = 40779 MB/s +test sherlock::name_sherlock_holmes ... bench: 14,213 ns/iter (+/- 81) = 41858 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 602,296 ns/iter (+/- 98,066) = 987 MB/s +test sherlock::name_sherlock_nocase ... bench: 479,745 ns/iter (+/- 18,070) = 1240 MB/s +test sherlock::name_whitespace ... bench: 14,584 ns/iter (+/- 44) = 40793 MB/s +test sherlock::no_match_common ... bench: 13,499 ns/iter (+/- 1,090) = 44072 MB/s +test sherlock::no_match_really_common ... bench: 12,507 ns/iter (+/- 1,238) = 47568 MB/s +test sherlock::no_match_uncommon ... bench: 11,534 ns/iter (+/- 9) = 51580 MB/s +test sherlock::quotes ... bench: 251,867 ns/iter (+/- 11,818) = 2362 MB/s +test sherlock::repeated_class_negation ... bench: 2,969,330 ns/iter (+/- 287,150) = 200 MB/s +test sherlock::the_lower ... bench: 206,513 ns/iter (+/- 3,294) = 2880 MB/s +test sherlock::the_nocase ... bench: 237,655 ns/iter (+/- 6,616) = 2503 MB/s +test sherlock::the_upper ... bench: 23,922 ns/iter (+/- 510) = 24869 MB/s +test sherlock::the_whitespace ... bench: 326,257 ns/iter (+/- 10,038) = 1823 MB/s +test sherlock::word_ending_n ... bench: 3,264,085 ns/iter (+/- 57,242) = 182 MB/s +test sherlock::words ... bench: 3,161,731 ns/iter (+/- 45,794) = 188 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 93 measured; 0 filtered out; finished in 184.16s + diff --git a/third_party/rust/regex/record/old-bench-log/09-new-baseline/re2 b/third_party/rust/regex/record/old-bench-log/09-new-baseline/re2 new file mode 100644 index 0000000000..9bae2a1747 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/09-new-baseline/re2 @@ -0,0 +1,101 @@ + +running 96 tests +test misc::anchored_literal_long_match ... bench: 26 ns/iter (+/- 0) = 15000 MB/s +test misc::anchored_literal_long_non_match ... bench: 4 ns/iter (+/- 0) = 97500 MB/s +test misc::anchored_literal_short_match ... bench: 26 ns/iter (+/- 0) = 1000 MB/s +test misc::anchored_literal_short_non_match ... bench: 4 ns/iter (+/- 0) = 6500 MB/s +test misc::easy0_1K ... bench: 50 ns/iter (+/- 0) = 21020 MB/s +test misc::easy0_1MB ... bench: 51 ns/iter (+/- 0) = 20560843 MB/s +test misc::easy0_32 ... bench: 50 ns/iter (+/- 0) = 1180 MB/s +test misc::easy0_32K ... bench: 50 ns/iter (+/- 0) = 655900 MB/s +test misc::easy1_1K ... bench: 43 ns/iter (+/- 1) = 24279 MB/s +test misc::easy1_1MB ... bench: 43 ns/iter (+/- 0) = 24385953 MB/s +test misc::easy1_32 ... bench: 43 ns/iter (+/- 1) = 1209 MB/s +test misc::easy1_32K ... bench: 43 ns/iter (+/- 0) = 762511 MB/s +test misc::hard_1K ... bench: 50 ns/iter (+/- 0) = 21020 MB/s +test misc::hard_1MB ... bench: 50 ns/iter (+/- 0) = 20972060 MB/s +test misc::hard_32 ... bench: 50 ns/iter (+/- 0) = 1180 MB/s +test misc::hard_32K ... bench: 50 ns/iter (+/- 0) = 655900 MB/s +test misc::literal ... bench: 25 ns/iter (+/- 0) = 2040 MB/s +test misc::long_needle1 ... bench: 356,319 ns/iter (+/- 680) = 280 MB/s +test misc::long_needle2 ... bench: 356,384 ns/iter (+/- 3,126) = 280 MB/s +test misc::match_class ... bench: 94 ns/iter (+/- 0) = 861 MB/s +test misc::match_class_in_range ... bench: 94 ns/iter (+/- 0) = 861 MB/s +test misc::match_class_unicode ... bench: 168 ns/iter (+/- 1) = 958 MB/s +test misc::medium_1K ... bench: 51 ns/iter (+/- 0) = 20627 MB/s +test misc::medium_1MB ... bench: 51 ns/iter (+/- 0) = 20560862 MB/s +test misc::medium_32 ... bench: 51 ns/iter (+/- 0) = 1176 MB/s +test misc::medium_32K ... bench: 51 ns/iter (+/- 1) = 643058 MB/s +test misc::no_exponential ... bench: 112 ns/iter (+/- 0) = 892 MB/s +test misc::not_literal ... bench: 66 ns/iter (+/- 0) = 772 MB/s +test misc::one_pass_long_prefix ... bench: 25 ns/iter (+/- 0) = 1040 MB/s +test misc::one_pass_long_prefix_not ... bench: 44 ns/iter (+/- 0) = 590 MB/s +test misc::one_pass_short ... bench: 43 ns/iter (+/- 0) = 395 MB/s +test misc::one_pass_short_not ... bench: 41 ns/iter (+/- 0) = 414 MB/s +test misc::reallyhard2_1K ... bench: 978 ns/iter (+/- 7) = 1063 MB/s +test misc::reallyhard_1K ... bench: 987 ns/iter (+/- 11) = 1064 MB/s +test misc::reallyhard_1MB ... bench: 957,501 ns/iter (+/- 8,247) = 1095 MB/s +test misc::reallyhard_32 ... bench: 73 ns/iter (+/- 0) = 808 MB/s +test misc::reallyhard_32K ... bench: 30,057 ns/iter (+/- 315) = 1091 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 7,368 ns/iter (+/- 105) = 1085 MB/s +test regexdna::find_new_lines ... bench: 15,567,882 ns/iter (+/- 48,213) = 326 MB/s +test regexdna::subst1 ... bench: 2,011,288 ns/iter (+/- 23,092) = 2527 MB/s +test regexdna::subst10 ... bench: 2,013,337 ns/iter (+/- 33,388) = 2524 MB/s +test regexdna::subst11 ... bench: 2,005,968 ns/iter (+/- 25,799) = 2534 MB/s +test regexdna::subst2 ... bench: 2,022,572 ns/iter (+/- 23,311) = 2513 MB/s +test regexdna::subst3 ... bench: 2,018,386 ns/iter (+/- 32,071) = 2518 MB/s +test regexdna::subst4 ... bench: 2,013,345 ns/iter (+/- 32,599) = 2524 MB/s +test regexdna::subst5 ... bench: 2,015,871 ns/iter (+/- 25,081) = 2521 MB/s +test regexdna::subst6 ... bench: 2,008,492 ns/iter (+/- 24,502) = 2530 MB/s +test regexdna::subst7 ... bench: 2,018,804 ns/iter (+/- 38,700) = 2518 MB/s +test regexdna::subst8 ... bench: 2,010,856 ns/iter (+/- 23,695) = 2527 MB/s +test regexdna::subst9 ... bench: 2,023,767 ns/iter (+/- 17,040) = 2511 MB/s +test regexdna::variant1 ... bench: 4,688,839 ns/iter (+/- 19,258) = 1084 MB/s +test regexdna::variant2 ... bench: 4,693,463 ns/iter (+/- 31,741) = 1083 MB/s +test regexdna::variant3 ... bench: 4,674,020 ns/iter (+/- 15,755) = 1087 MB/s +test regexdna::variant4 ... bench: 4,666,017 ns/iter (+/- 16,318) = 1089 MB/s +test regexdna::variant5 ... bench: 4,682,965 ns/iter (+/- 17,552) = 1085 MB/s +test regexdna::variant6 ... bench: 4,661,825 ns/iter (+/- 21,667) = 1090 MB/s +test regexdna::variant7 ... bench: 4,697,959 ns/iter (+/- 24,282) = 1082 MB/s +test regexdna::variant8 ... bench: 4,700,703 ns/iter (+/- 21,377) = 1081 MB/s +test regexdna::variant9 ... bench: 4,665,298 ns/iter (+/- 19,086) = 1089 MB/s +test sherlock::before_after_holmes ... bench: 560,350 ns/iter (+/- 3,852) = 1061 MB/s +test sherlock::before_holmes ... bench: 574,423 ns/iter (+/- 4,638) = 1035 MB/s +test sherlock::everything_greedy ... bench: 2,688,852 ns/iter (+/- 16,320) = 221 MB/s +test sherlock::everything_greedy_nl ... bench: 1,206,136 ns/iter (+/- 6,173) = 493 MB/s +test sherlock::holmes_cochar_watson ... bench: 547,910 ns/iter (+/- 7,147) = 1085 MB/s +test sherlock::holmes_coword_watson ... bench: 610,803 ns/iter (+/- 1,029) = 974 MB/s +test sherlock::ing_suffix ... bench: 777,478 ns/iter (+/- 3,028) = 765 MB/s +test sherlock::ing_suffix_limited_space ... bench: 725,653 ns/iter (+/- 4,746) = 819 MB/s +test sherlock::letters ... bench: 25,265,004 ns/iter (+/- 120,234) = 23 MB/s +test sherlock::letters_lower ... bench: 24,615,621 ns/iter (+/- 134,875) = 24 MB/s +test sherlock::letters_upper ... bench: 1,485,920 ns/iter (+/- 21,446) = 400 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 547,229 ns/iter (+/- 3,726) = 1087 MB/s +test sherlock::name_alt1 ... bench: 18,148 ns/iter (+/- 131) = 32782 MB/s +test sherlock::name_alt2 ... bench: 586,335 ns/iter (+/- 3,679) = 1014 MB/s +test sherlock::name_alt3 ... bench: 601,096 ns/iter (+/- 3,781) = 989 MB/s +test sherlock::name_alt3_nocase ... bench: 602,319 ns/iter (+/- 7,872) = 987 MB/s +test sherlock::name_alt4 ... bench: 586,762 ns/iter (+/- 3,465) = 1013 MB/s +test sherlock::name_alt4_nocase ... bench: 595,539 ns/iter (+/- 3,240) = 998 MB/s +test sherlock::name_alt5 ... bench: 592,474 ns/iter (+/- 6,361) = 1004 MB/s +test sherlock::name_alt5_nocase ... bench: 593,214 ns/iter (+/- 4,667) = 1002 MB/s +test sherlock::name_holmes ... bench: 40,236 ns/iter (+/- 514) = 14786 MB/s +test sherlock::name_holmes_nocase ... bench: 215,216 ns/iter (+/- 4,822) = 2764 MB/s +test sherlock::name_sherlock ... bench: 14,064 ns/iter (+/- 159) = 42301 MB/s +test sherlock::name_sherlock_holmes ... bench: 15,727 ns/iter (+/- 166) = 37828 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 552,042 ns/iter (+/- 6,395) = 1077 MB/s +test sherlock::name_sherlock_nocase ... bench: 552,475 ns/iter (+/- 5,365) = 1076 MB/s +test sherlock::name_whitespace ... bench: 16,210 ns/iter (+/- 194) = 36701 MB/s +test sherlock::no_match_common ... bench: 147,489 ns/iter (+/- 602) = 4033 MB/s +test sherlock::no_match_really_common ... bench: 157,205 ns/iter (+/- 350) = 3784 MB/s +test sherlock::no_match_uncommon ... bench: 4,849 ns/iter (+/- 5) = 122691 MB/s +test sherlock::quotes ... bench: 619,880 ns/iter (+/- 5,189) = 959 MB/s +test sherlock::the_lower ... bench: 685,396 ns/iter (+/- 12,559) = 868 MB/s +test sherlock::the_nocase ... bench: 771,051 ns/iter (+/- 18,470) = 771 MB/s +test sherlock::the_upper ... bench: 59,139 ns/iter (+/- 1,604) = 10059 MB/s +test sherlock::the_whitespace ... bench: 736,147 ns/iter (+/- 7,668) = 808 MB/s +test sherlock::word_ending_n ... bench: 1,200,401 ns/iter (+/- 11,206) = 495 MB/s +test sherlock::words ... bench: 8,024,768 ns/iter (+/- 93,051) = 74 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 96 measured; 0 filtered out; finished in 86.80s + diff --git a/third_party/rust/regex/record/old-bench-log/09-new-baseline/rust b/third_party/rust/regex/record/old-bench-log/09-new-baseline/rust new file mode 100644 index 0000000000..30924d4bd5 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/09-new-baseline/rust @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 7 ns/iter (+/- 0) = 55714 MB/s +test misc::anchored_literal_long_non_match ... bench: 10 ns/iter (+/- 0) = 39000 MB/s +test misc::anchored_literal_short_match ... bench: 7 ns/iter (+/- 0) = 3714 MB/s +test misc::anchored_literal_short_non_match ... bench: 10 ns/iter (+/- 0) = 2600 MB/s +test misc::easy0_1K ... bench: 8 ns/iter (+/- 0) = 131375 MB/s +test misc::easy0_1MB ... bench: 12 ns/iter (+/- 0) = 87383583 MB/s +test misc::easy0_32 ... bench: 8 ns/iter (+/- 0) = 7375 MB/s +test misc::easy0_32K ... bench: 8 ns/iter (+/- 0) = 4099375 MB/s +test misc::easy1_1K ... bench: 25 ns/iter (+/- 0) = 41760 MB/s +test misc::easy1_1MB ... bench: 26 ns/iter (+/- 0) = 40330615 MB/s +test misc::easy1_32 ... bench: 25 ns/iter (+/- 0) = 2080 MB/s +test misc::easy1_32K ... bench: 26 ns/iter (+/- 0) = 1261076 MB/s +test misc::hard_1K ... bench: 33 ns/iter (+/- 0) = 31848 MB/s +test misc::hard_1MB ... bench: 33 ns/iter (+/- 0) = 31775848 MB/s +test misc::hard_32 ... bench: 34 ns/iter (+/- 0) = 1735 MB/s +test misc::hard_32K ... bench: 33 ns/iter (+/- 0) = 993787 MB/s +test misc::is_match_set ... bench: 35 ns/iter (+/- 0) = 714 MB/s +test misc::literal ... bench: 7 ns/iter (+/- 0) = 7285 MB/s +test misc::long_needle1 ... bench: 1,517 ns/iter (+/- 25) = 65920 MB/s +test misc::long_needle2 ... bench: 186,131 ns/iter (+/- 1,191) = 537 MB/s +test misc::match_class ... bench: 37 ns/iter (+/- 0) = 2189 MB/s +test misc::match_class_in_range ... bench: 7 ns/iter (+/- 0) = 11571 MB/s +test misc::match_class_unicode ... bench: 160 ns/iter (+/- 1) = 1006 MB/s +test misc::matches_set ... bench: 200 ns/iter (+/- 4) = 125 MB/s +test misc::medium_1K ... bench: 8 ns/iter (+/- 0) = 131500 MB/s +test misc::medium_1MB ... bench: 12 ns/iter (+/- 0) = 87383666 MB/s +test misc::medium_32 ... bench: 8 ns/iter (+/- 0) = 7500 MB/s +test misc::medium_32K ... bench: 8 ns/iter (+/- 0) = 4099500 MB/s +test misc::no_exponential ... bench: 262 ns/iter (+/- 6) = 381 MB/s +test misc::not_literal ... bench: 43 ns/iter (+/- 1) = 1186 MB/s +test misc::one_pass_long_prefix ... bench: 23 ns/iter (+/- 1) = 1130 MB/s +test misc::one_pass_long_prefix_not ... bench: 23 ns/iter (+/- 1) = 1130 MB/s +test misc::one_pass_short ... bench: 16 ns/iter (+/- 0) = 1062 MB/s +test misc::one_pass_short_not ... bench: 18 ns/iter (+/- 0) = 944 MB/s +test misc::reallyhard2_1K ... bench: 36 ns/iter (+/- 1) = 28888 MB/s +test misc::reallyhard_1K ... bench: 1,155 ns/iter (+/- 11) = 909 MB/s +test misc::reallyhard_1MB ... bench: 1,152,983 ns/iter (+/- 6,607) = 909 MB/s +test misc::reallyhard_32 ... bench: 52 ns/iter (+/- 2) = 1134 MB/s +test misc::reallyhard_32K ... bench: 36,194 ns/iter (+/- 327) = 906 MB/s +test misc::replace_all ... bench: 81 ns/iter (+/- 5) +test misc::reverse_suffix_no_quadratic ... bench: 2,269 ns/iter (+/- 3) = 3525 MB/s +test misc::short_haystack_1000000x ... bench: 63,956 ns/iter (+/- 209) = 125086 MB/s +test misc::short_haystack_100000x ... bench: 5,877 ns/iter (+/- 66) = 136125 MB/s +test misc::short_haystack_10000x ... bench: 2,414 ns/iter (+/- 10) = 33144 MB/s +test misc::short_haystack_1000x ... bench: 195 ns/iter (+/- 11) = 41082 MB/s +test misc::short_haystack_100x ... bench: 96 ns/iter (+/- 7) = 8447 MB/s +test misc::short_haystack_10x ... bench: 85 ns/iter (+/- 8) = 1070 MB/s +test misc::short_haystack_1x ... bench: 85 ns/iter (+/- 6) = 223 MB/s +test misc::short_haystack_2x ... bench: 86 ns/iter (+/- 12) = 313 MB/s +test misc::short_haystack_3x ... bench: 85 ns/iter (+/- 22) = 411 MB/s +test misc::short_haystack_4x ... bench: 85 ns/iter (+/- 12) = 505 MB/s +test regexdna::find_new_lines ... bench: 6,977,678 ns/iter (+/- 90,937) = 728 MB/s +test regexdna::subst1 ... bench: 423,846 ns/iter (+/- 41,460) = 11993 MB/s +test regexdna::subst10 ... bench: 424,043 ns/iter (+/- 55,743) = 11987 MB/s +test regexdna::subst11 ... bench: 418,549 ns/iter (+/- 12,106) = 12145 MB/s +test regexdna::subst2 ... bench: 430,056 ns/iter (+/- 8,862) = 11820 MB/s +test regexdna::subst3 ... bench: 429,634 ns/iter (+/- 26,807) = 11831 MB/s +test regexdna::subst4 ... bench: 419,313 ns/iter (+/- 42,070) = 12123 MB/s +test regexdna::subst5 ... bench: 425,299 ns/iter (+/- 43,161) = 11952 MB/s +test regexdna::subst6 ... bench: 420,177 ns/iter (+/- 49,394) = 12098 MB/s +test regexdna::subst7 ... bench: 425,118 ns/iter (+/- 46,952) = 11957 MB/s +test regexdna::subst8 ... bench: 420,840 ns/iter (+/- 11,623) = 12079 MB/s +test regexdna::subst9 ... bench: 420,752 ns/iter (+/- 10,186) = 12081 MB/s +test regexdna::variant1 ... bench: 1,445,103 ns/iter (+/- 29,436) = 3517 MB/s +test regexdna::variant2 ... bench: 2,234,423 ns/iter (+/- 24,502) = 2275 MB/s +test regexdna::variant3 ... bench: 2,730,972 ns/iter (+/- 26,961) = 1861 MB/s +test regexdna::variant4 ... bench: 2,708,975 ns/iter (+/- 36,517) = 1876 MB/s +test regexdna::variant5 ... bench: 1,663,458 ns/iter (+/- 39,508) = 3055 MB/s +test regexdna::variant6 ... bench: 1,673,873 ns/iter (+/- 14,846) = 3036 MB/s +test regexdna::variant7 ... bench: 2,322,347 ns/iter (+/- 33,731) = 2188 MB/s +test regexdna::variant8 ... bench: 2,350,779 ns/iter (+/- 54,976) = 2162 MB/s +test regexdna::variant9 ... bench: 2,326,741 ns/iter (+/- 20,836) = 2184 MB/s +test rust_compile::compile_huge ... bench: 47,700 ns/iter (+/- 230) +test rust_compile::compile_huge_bytes ... bench: 2,987,898 ns/iter (+/- 32,819) +test rust_compile::compile_huge_full ... bench: 5,705,551 ns/iter (+/- 63,483) +test rust_compile::compile_simple ... bench: 1,963 ns/iter (+/- 44) +test rust_compile::compile_simple_bytes ... bench: 1,970 ns/iter (+/- 32) +test rust_compile::compile_simple_full ... bench: 9,677 ns/iter (+/- 69) +test rust_compile::compile_small ... bench: 4,501 ns/iter (+/- 70) +test rust_compile::compile_small_bytes ... bench: 75,372 ns/iter (+/- 2,007) +test rust_compile::compile_small_full ... bench: 151,733 ns/iter (+/- 2,378) +test sherlock::before_after_holmes ... bench: 655,827 ns/iter (+/- 1,426) = 907 MB/s +test sherlock::before_holmes ... bench: 24,653 ns/iter (+/- 224) = 24132 MB/s +test sherlock::everything_greedy ... bench: 1,026,254 ns/iter (+/- 27,926) = 579 MB/s +test sherlock::everything_greedy_nl ... bench: 469,676 ns/iter (+/- 62,296) = 1266 MB/s +test sherlock::holmes_cochar_watson ... bench: 47,578 ns/iter (+/- 1,730) = 12504 MB/s +test sherlock::holmes_coword_watson ... bench: 321,318 ns/iter (+/- 3,235) = 1851 MB/s +test sherlock::ing_suffix ... bench: 150,908 ns/iter (+/- 3,952) = 3942 MB/s +test sherlock::ing_suffix_limited_space ... bench: 726,848 ns/iter (+/- 5,314) = 818 MB/s +test sherlock::letters ... bench: 9,719,997 ns/iter (+/- 67,717) = 61 MB/s +test sherlock::letters_lower ... bench: 9,559,105 ns/iter (+/- 79,257) = 62 MB/s +test sherlock::letters_upper ... bench: 1,066,791 ns/iter (+/- 13,193) = 557 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 653,228 ns/iter (+/- 881) = 910 MB/s +test sherlock::name_alt1 ... bench: 10,663 ns/iter (+/- 76) = 55794 MB/s +test sherlock::name_alt2 ... bench: 33,831 ns/iter (+/- 967) = 17585 MB/s +test sherlock::name_alt3 ... bench: 38,061 ns/iter (+/- 1,123) = 15631 MB/s +test sherlock::name_alt3_nocase ... bench: 218,691 ns/iter (+/- 2,345) = 2720 MB/s +test sherlock::name_alt4 ... bench: 52,408 ns/iter (+/- 1,315) = 11351 MB/s +test sherlock::name_alt4_nocase ... bench: 84,212 ns/iter (+/- 2,708) = 7064 MB/s +test sherlock::name_alt5 ... bench: 35,272 ns/iter (+/- 1,784) = 16867 MB/s +test sherlock::name_alt5_nocase ... bench: 193,585 ns/iter (+/- 5,057) = 3073 MB/s +test sherlock::name_holmes ... bench: 15,018 ns/iter (+/- 440) = 39614 MB/s +test sherlock::name_holmes_nocase ... bench: 60,207 ns/iter (+/- 1,046) = 9881 MB/s +test sherlock::name_sherlock ... bench: 10,344 ns/iter (+/- 52) = 57514 MB/s +test sherlock::name_sherlock_holmes ... bench: 10,374 ns/iter (+/- 98) = 57348 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 42,037 ns/iter (+/- 1,363) = 14152 MB/s +test sherlock::name_sherlock_nocase ... bench: 41,192 ns/iter (+/- 881) = 14442 MB/s +test sherlock::name_whitespace ... bench: 13,893 ns/iter (+/- 55) = 42822 MB/s +test sherlock::no_match_common ... bench: 8,700 ns/iter (+/- 10) = 68383 MB/s +test sherlock::no_match_really_common ... bench: 10,368 ns/iter (+/- 123) = 57381 MB/s +test sherlock::no_match_uncommon ... bench: 8,695 ns/iter (+/- 7) = 68422 MB/s +test sherlock::quotes ... bench: 222,526 ns/iter (+/- 5,362) = 2673 MB/s +test sherlock::repeated_class_negation ... bench: 35,869,193 ns/iter (+/- 551,212) = 16 MB/s +test sherlock::the_lower ... bench: 187,208 ns/iter (+/- 4,374) = 3177 MB/s +test sherlock::the_nocase ... bench: 280,625 ns/iter (+/- 10,142) = 2120 MB/s +test sherlock::the_upper ... bench: 19,742 ns/iter (+/- 692) = 30135 MB/s +test sherlock::the_whitespace ... bench: 396,099 ns/iter (+/- 10,400) = 1501 MB/s +test sherlock::word_ending_n ... bench: 1,055,639 ns/iter (+/- 6,627) = 563 MB/s +test sherlock::words ... bench: 4,280,471 ns/iter (+/- 53,841) = 138 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 141.25s + diff --git a/third_party/rust/regex/record/old-bench-log/09-new-baseline/rust-bytes b/third_party/rust/regex/record/old-bench-log/09-new-baseline/rust-bytes new file mode 100644 index 0000000000..ff08ed188a --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/09-new-baseline/rust-bytes @@ -0,0 +1,112 @@ + +running 107 tests +test misc::anchored_literal_long_match ... bench: 7 ns/iter (+/- 0) = 55714 MB/s +test misc::anchored_literal_long_non_match ... bench: 8 ns/iter (+/- 0) = 48750 MB/s +test misc::anchored_literal_short_match ... bench: 7 ns/iter (+/- 0) = 3714 MB/s +test misc::anchored_literal_short_non_match ... bench: 9 ns/iter (+/- 0) = 2888 MB/s +test misc::easy0_1K ... bench: 7 ns/iter (+/- 0) = 150142 MB/s +test misc::easy0_1MB ... bench: 11 ns/iter (+/- 1) = 95327545 MB/s +test misc::easy0_32 ... bench: 7 ns/iter (+/- 0) = 8428 MB/s +test misc::easy0_32K ... bench: 7 ns/iter (+/- 0) = 4685000 MB/s +test misc::easy1_1K ... bench: 17 ns/iter (+/- 0) = 61411 MB/s +test misc::easy1_1MB ... bench: 20 ns/iter (+/- 0) = 52429800 MB/s +test misc::easy1_32 ... bench: 17 ns/iter (+/- 0) = 3058 MB/s +test misc::easy1_32K ... bench: 17 ns/iter (+/- 1) = 1928705 MB/s +test misc::hard_1K ... bench: 24 ns/iter (+/- 0) = 43791 MB/s +test misc::hard_1MB ... bench: 27 ns/iter (+/- 0) = 38837148 MB/s +test misc::hard_32 ... bench: 24 ns/iter (+/- 0) = 2458 MB/s +test misc::hard_32K ... bench: 24 ns/iter (+/- 0) = 1366458 MB/s +test misc::is_match_set ... bench: 35 ns/iter (+/- 0) = 714 MB/s +test misc::literal ... bench: 7 ns/iter (+/- 0) = 7285 MB/s +test misc::long_needle1 ... bench: 1,325 ns/iter (+/- 18) = 75472 MB/s +test misc::long_needle2 ... bench: 186,021 ns/iter (+/- 1,157) = 537 MB/s +test misc::match_class ... bench: 38 ns/iter (+/- 3) = 2131 MB/s +test misc::match_class_in_range ... bench: 7 ns/iter (+/- 0) = 11571 MB/s +test misc::matches_set ... bench: 172 ns/iter (+/- 4) = 145 MB/s +test misc::medium_1K ... bench: 7 ns/iter (+/- 0) = 150285 MB/s +test misc::medium_1MB ... bench: 12 ns/iter (+/- 0) = 87383666 MB/s +test misc::medium_32 ... bench: 8 ns/iter (+/- 0) = 7500 MB/s +test misc::medium_32K ... bench: 7 ns/iter (+/- 0) = 4685142 MB/s +test misc::no_exponential ... bench: 272 ns/iter (+/- 10) = 367 MB/s +test misc::not_literal ... bench: 42 ns/iter (+/- 1) = 1214 MB/s +test misc::one_pass_long_prefix ... bench: 23 ns/iter (+/- 1) = 1130 MB/s +test misc::one_pass_long_prefix_not ... bench: 22 ns/iter (+/- 0) = 1181 MB/s +test misc::one_pass_short ... bench: 15 ns/iter (+/- 0) = 1133 MB/s +test misc::one_pass_short_not ... bench: 18 ns/iter (+/- 0) = 944 MB/s +test misc::reallyhard2_1K ... bench: 36 ns/iter (+/- 0) = 28888 MB/s +test misc::reallyhard_1K ... bench: 1,152 ns/iter (+/- 14) = 912 MB/s +test misc::reallyhard_1MB ... bench: 1,155,496 ns/iter (+/- 7,722) = 907 MB/s +test misc::reallyhard_32 ... bench: 51 ns/iter (+/- 1) = 1156 MB/s +test misc::reallyhard_32K ... bench: 36,202 ns/iter (+/- 167) = 905 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 2,269 ns/iter (+/- 2) = 3525 MB/s +test regexdna::find_new_lines ... bench: 6,982,142 ns/iter (+/- 70,086) = 728 MB/s +test regexdna::subst1 ... bench: 425,753 ns/iter (+/- 15,075) = 11939 MB/s +test regexdna::subst10 ... bench: 431,401 ns/iter (+/- 19,346) = 11783 MB/s +test regexdna::subst11 ... bench: 427,131 ns/iter (+/- 38,166) = 11901 MB/s +test regexdna::subst2 ... bench: 423,284 ns/iter (+/- 9,016) = 12009 MB/s +test regexdna::subst3 ... bench: 425,850 ns/iter (+/- 7,324) = 11937 MB/s +test regexdna::subst4 ... bench: 426,013 ns/iter (+/- 6,922) = 11932 MB/s +test regexdna::subst5 ... bench: 426,029 ns/iter (+/- 8,697) = 11932 MB/s +test regexdna::subst6 ... bench: 427,781 ns/iter (+/- 8,166) = 11883 MB/s +test regexdna::subst7 ... bench: 426,589 ns/iter (+/- 13,274) = 11916 MB/s +test regexdna::subst8 ... bench: 424,152 ns/iter (+/- 14,879) = 11984 MB/s +test regexdna::subst9 ... bench: 428,066 ns/iter (+/- 8,773) = 11875 MB/s +test regexdna::variant1 ... bench: 1,446,630 ns/iter (+/- 53,195) = 3513 MB/s +test regexdna::variant2 ... bench: 2,241,934 ns/iter (+/- 42,563) = 2267 MB/s +test regexdna::variant3 ... bench: 2,741,736 ns/iter (+/- 28,424) = 1854 MB/s +test regexdna::variant4 ... bench: 2,725,768 ns/iter (+/- 37,801) = 1864 MB/s +test regexdna::variant5 ... bench: 1,686,366 ns/iter (+/- 25,054) = 3014 MB/s +test regexdna::variant6 ... bench: 1,689,225 ns/iter (+/- 24,479) = 3009 MB/s +test regexdna::variant7 ... bench: 2,343,567 ns/iter (+/- 34,646) = 2169 MB/s +test regexdna::variant8 ... bench: 2,363,133 ns/iter (+/- 69,696) = 2151 MB/s +test regexdna::variant9 ... bench: 2,337,512 ns/iter (+/- 32,958) = 2174 MB/s +test rust_compile::compile_huge ... bench: 53,055 ns/iter (+/- 88) +test rust_compile::compile_huge_bytes ... bench: 2,979,724 ns/iter (+/- 43,904) +test rust_compile::compile_huge_full ... bench: 5,825,193 ns/iter (+/- 61,322) +test rust_compile::compile_simple ... bench: 1,927 ns/iter (+/- 39) +test rust_compile::compile_simple_bytes ... bench: 1,924 ns/iter (+/- 29) +test rust_compile::compile_simple_full ... bench: 9,830 ns/iter (+/- 108) +test rust_compile::compile_small ... bench: 4,569 ns/iter (+/- 70) +test rust_compile::compile_small_bytes ... bench: 74,875 ns/iter (+/- 1,337) +test rust_compile::compile_small_full ... bench: 151,485 ns/iter (+/- 3,063) +test sherlock::before_after_holmes ... bench: 655,632 ns/iter (+/- 801) = 907 MB/s +test sherlock::before_holmes ... bench: 24,576 ns/iter (+/- 307) = 24207 MB/s +test sherlock::everything_greedy ... bench: 1,026,410 ns/iter (+/- 57,265) = 579 MB/s +test sherlock::everything_greedy_nl ... bench: 424,490 ns/iter (+/- 7,188) = 1401 MB/s +test sherlock::holmes_cochar_watson ... bench: 46,935 ns/iter (+/- 1,007) = 12675 MB/s +test sherlock::holmes_coword_watson ... bench: 322,497 ns/iter (+/- 3,680) = 1844 MB/s +test sherlock::ing_suffix ... bench: 149,923 ns/iter (+/- 2,936) = 3968 MB/s +test sherlock::ing_suffix_limited_space ... bench: 732,021 ns/iter (+/- 10,242) = 812 MB/s +test sherlock::letters ... bench: 9,716,641 ns/iter (+/- 56,270) = 61 MB/s +test sherlock::letters_lower ... bench: 9,541,922 ns/iter (+/- 63,715) = 62 MB/s +test sherlock::letters_upper ... bench: 1,070,240 ns/iter (+/- 10,505) = 555 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 652,312 ns/iter (+/- 546) = 912 MB/s +test sherlock::name_alt1 ... bench: 10,832 ns/iter (+/- 499) = 54923 MB/s +test sherlock::name_alt2 ... bench: 33,528 ns/iter (+/- 484) = 17744 MB/s +test sherlock::name_alt3 ... bench: 37,352 ns/iter (+/- 1,173) = 15927 MB/s +test sherlock::name_alt3_nocase ... bench: 217,570 ns/iter (+/- 3,401) = 2734 MB/s +test sherlock::name_alt4 ... bench: 52,711 ns/iter (+/- 1,257) = 11286 MB/s +test sherlock::name_alt4_nocase ... bench: 81,635 ns/iter (+/- 1,740) = 7287 MB/s +test sherlock::name_alt5 ... bench: 34,935 ns/iter (+/- 1,190) = 17029 MB/s +test sherlock::name_alt5_nocase ... bench: 194,600 ns/iter (+/- 3,742) = 3057 MB/s +test sherlock::name_holmes ... bench: 14,670 ns/iter (+/- 153) = 40554 MB/s +test sherlock::name_holmes_nocase ... bench: 59,906 ns/iter (+/- 898) = 9931 MB/s +test sherlock::name_sherlock ... bench: 10,470 ns/iter (+/- 74) = 56822 MB/s +test sherlock::name_sherlock_holmes ... bench: 10,291 ns/iter (+/- 29) = 57810 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 41,316 ns/iter (+/- 1,350) = 14399 MB/s +test sherlock::name_sherlock_nocase ... bench: 40,748 ns/iter (+/- 724) = 14600 MB/s +test sherlock::name_whitespace ... bench: 14,682 ns/iter (+/- 52) = 40521 MB/s +test sherlock::no_match_common ... bench: 8,822 ns/iter (+/- 310) = 67437 MB/s +test sherlock::no_match_really_common ... bench: 8,990 ns/iter (+/- 129) = 66177 MB/s +test sherlock::no_match_uncommon ... bench: 8,649 ns/iter (+/- 192) = 68786 MB/s +test sherlock::quotes ... bench: 218,225 ns/iter (+/- 4,267) = 2726 MB/s +test sherlock::repeated_class_negation ... bench: 35,771,807 ns/iter (+/- 640,817) = 16 MB/s +test sherlock::the_lower ... bench: 190,205 ns/iter (+/- 9,051) = 3127 MB/s +test sherlock::the_nocase ... bench: 280,386 ns/iter (+/- 5,346) = 2121 MB/s +test sherlock::the_upper ... bench: 19,325 ns/iter (+/- 695) = 30785 MB/s +test sherlock::the_whitespace ... bench: 409,665 ns/iter (+/- 7,657) = 1452 MB/s +test sherlock::word_ending_n ... bench: 1,066,052 ns/iter (+/- 7,072) = 558 MB/s +test sherlock::words ... bench: 4,330,659 ns/iter (+/- 53,403) = 137 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 107 measured; 0 filtered out; finished in 131.99s + diff --git a/third_party/rust/regex/record/old-bench-log/10-last-frontier/rust-after-literal.log b/third_party/rust/regex/record/old-bench-log/10-last-frontier/rust-after-literal.log new file mode 100644 index 0000000000..c45b55cac7 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/10-last-frontier/rust-after-literal.log @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 18 ns/iter (+/- 0) = 21666 MB/s +test misc::anchored_literal_long_non_match ... bench: 20 ns/iter (+/- 0) = 19500 MB/s +test misc::anchored_literal_short_match ... bench: 18 ns/iter (+/- 0) = 1444 MB/s +test misc::anchored_literal_short_non_match ... bench: 20 ns/iter (+/- 0) = 1300 MB/s +test misc::easy0_1K ... bench: 51 ns/iter (+/- 2) = 20607 MB/s +test misc::easy0_1MB ... bench: 56 ns/iter (+/- 1) = 18725053 MB/s +test misc::easy0_32 ... bench: 51 ns/iter (+/- 0) = 1156 MB/s +test misc::easy0_32K ... bench: 53 ns/iter (+/- 1) = 618773 MB/s +test misc::easy1_1K ... bench: 41 ns/iter (+/- 0) = 25463 MB/s +test misc::easy1_1MB ... bench: 44 ns/iter (+/- 1) = 23831727 MB/s +test misc::easy1_32 ... bench: 40 ns/iter (+/- 1) = 1300 MB/s +test misc::easy1_32K ... bench: 40 ns/iter (+/- 1) = 819700 MB/s +test misc::hard_1K ... bench: 51 ns/iter (+/- 2) = 20607 MB/s +test misc::hard_1MB ... bench: 56 ns/iter (+/- 1) = 18725053 MB/s +test misc::hard_32 ... bench: 51 ns/iter (+/- 2) = 1156 MB/s +test misc::hard_32K ... bench: 51 ns/iter (+/- 1) = 643039 MB/s +test misc::is_match_set ... bench: 61 ns/iter (+/- 2) = 409 MB/s +test misc::literal ... bench: 13 ns/iter (+/- 0) = 3923 MB/s +test misc::long_needle1 ... bench: 3,242 ns/iter (+/- 79) = 30845 MB/s +test misc::long_needle2 ... bench: 350,572 ns/iter (+/- 6,860) = 285 MB/s +test misc::match_class ... bench: 62 ns/iter (+/- 6) = 1306 MB/s +test misc::match_class_in_range ... bench: 14 ns/iter (+/- 0) = 5785 MB/s +test misc::match_class_unicode ... bench: 259 ns/iter (+/- 15) = 621 MB/s +test misc::matches_set ... bench: 462 ns/iter (+/- 9) = 54 MB/s +test misc::medium_1K ... bench: 53 ns/iter (+/- 0) = 19849 MB/s +test misc::medium_1MB ... bench: 58 ns/iter (+/- 1) = 18079379 MB/s +test misc::medium_32 ... bench: 53 ns/iter (+/- 1) = 1132 MB/s +test misc::medium_32K ... bench: 53 ns/iter (+/- 1) = 618792 MB/s +test misc::no_exponential ... bench: 423 ns/iter (+/- 13) = 236 MB/s +test misc::not_literal ... bench: 89 ns/iter (+/- 0) = 573 MB/s +test misc::one_pass_long_prefix ... bench: 52 ns/iter (+/- 0) = 500 MB/s +test misc::one_pass_long_prefix_not ... bench: 52 ns/iter (+/- 1) = 500 MB/s +test misc::one_pass_short ... bench: 38 ns/iter (+/- 1) = 447 MB/s +test misc::one_pass_short_not ... bench: 41 ns/iter (+/- 1) = 414 MB/s +test misc::reallyhard2_1K ... bench: 81 ns/iter (+/- 1) = 12839 MB/s +test misc::reallyhard_1K ... bench: 1,592 ns/iter (+/- 1) = 660 MB/s +test misc::reallyhard_1MB ... bench: 1,575,822 ns/iter (+/- 39,203) = 665 MB/s +test misc::reallyhard_32 ... bench: 102 ns/iter (+/- 0) = 578 MB/s +test misc::reallyhard_32K ... bench: 49,328 ns/iter (+/- 2,598) = 664 MB/s +test misc::replace_all ... bench: 132 ns/iter (+/- 3) +test misc::reverse_suffix_no_quadratic ... bench: 4,171 ns/iter (+/- 134) = 1918 MB/s +test misc::short_haystack_1000000x ... bench: 132,251 ns/iter (+/- 729) = 60491 MB/s +test misc::short_haystack_100000x ... bench: 13,184 ns/iter (+/- 408) = 60680 MB/s +test misc::short_haystack_10000x ... bench: 6,036 ns/iter (+/- 167) = 13255 MB/s +test misc::short_haystack_1000x ... bench: 602 ns/iter (+/- 14) = 13307 MB/s +test misc::short_haystack_100x ... bench: 230 ns/iter (+/- 7) = 3526 MB/s +test misc::short_haystack_10x ... bench: 218 ns/iter (+/- 3) = 417 MB/s +test misc::short_haystack_1x ... bench: 210 ns/iter (+/- 8) = 90 MB/s +test misc::short_haystack_2x ... bench: 225 ns/iter (+/- 6) = 120 MB/s +test misc::short_haystack_3x ... bench: 211 ns/iter (+/- 8) = 165 MB/s +test misc::short_haystack_4x ... bench: 212 ns/iter (+/- 6) = 202 MB/s +test regexdna::find_new_lines ... bench: 12,245,066 ns/iter (+/- 117,141) = 415 MB/s +test regexdna::subst1 ... bench: 786,357 ns/iter (+/- 14,200) = 6464 MB/s +test regexdna::subst10 ... bench: 788,550 ns/iter (+/- 26,456) = 6446 MB/s +test regexdna::subst11 ... bench: 782,161 ns/iter (+/- 15,583) = 6499 MB/s +test regexdna::subst2 ... bench: 784,902 ns/iter (+/- 23,379) = 6476 MB/s +test regexdna::subst3 ... bench: 786,640 ns/iter (+/- 27,063) = 6462 MB/s +test regexdna::subst4 ... bench: 785,591 ns/iter (+/- 20,498) = 6470 MB/s +test regexdna::subst5 ... bench: 787,447 ns/iter (+/- 20,892) = 6455 MB/s +test regexdna::subst6 ... bench: 784,994 ns/iter (+/- 19,687) = 6475 MB/s +test regexdna::subst7 ... bench: 801,921 ns/iter (+/- 15,391) = 6339 MB/s +test regexdna::subst8 ... bench: 785,541 ns/iter (+/- 11,908) = 6471 MB/s +test regexdna::subst9 ... bench: 785,848 ns/iter (+/- 28,020) = 6468 MB/s +test regexdna::variant1 ... bench: 2,195,058 ns/iter (+/- 44,066) = 2315 MB/s +test regexdna::variant2 ... bench: 3,219,968 ns/iter (+/- 59,372) = 1578 MB/s +test regexdna::variant3 ... bench: 3,776,467 ns/iter (+/- 54,326) = 1346 MB/s +test regexdna::variant4 ... bench: 3,803,674 ns/iter (+/- 95,281) = 1336 MB/s +test regexdna::variant5 ... bench: 2,661,333 ns/iter (+/- 46,408) = 1910 MB/s +test regexdna::variant6 ... bench: 2,645,716 ns/iter (+/- 38,659) = 1921 MB/s +test regexdna::variant7 ... bench: 3,228,352 ns/iter (+/- 69,155) = 1574 MB/s +test regexdna::variant8 ... bench: 3,305,563 ns/iter (+/- 59,321) = 1537 MB/s +test regexdna::variant9 ... bench: 3,225,039 ns/iter (+/- 49,720) = 1576 MB/s +test rust_compile::compile_huge ... bench: 100,381 ns/iter (+/- 2,052) +test rust_compile::compile_huge_bytes ... bench: 5,899,989 ns/iter (+/- 114,363) +test rust_compile::compile_huge_full ... bench: 11,650,995 ns/iter (+/- 172,285) +test rust_compile::compile_simple ... bench: 4,082 ns/iter (+/- 88) +test rust_compile::compile_simple_bytes ... bench: 4,153 ns/iter (+/- 120) +test rust_compile::compile_simple_full ... bench: 20,414 ns/iter (+/- 1,860) +test rust_compile::compile_small ... bench: 9,114 ns/iter (+/- 216) +test rust_compile::compile_small_bytes ... bench: 183,049 ns/iter (+/- 9,917) +test rust_compile::compile_small_full ... bench: 361,291 ns/iter (+/- 11,045) +test sherlock::before_after_holmes ... bench: 907,103 ns/iter (+/- 12,165) = 655 MB/s +test sherlock::before_holmes ... bench: 62,501 ns/iter (+/- 1,880) = 9518 MB/s +test sherlock::everything_greedy ... bench: 2,062,116 ns/iter (+/- 41,900) = 288 MB/s +test sherlock::everything_greedy_nl ... bench: 894,529 ns/iter (+/- 38,723) = 665 MB/s +test sherlock::holmes_cochar_watson ... bench: 103,305 ns/iter (+/- 3,798) = 5758 MB/s +test sherlock::holmes_coword_watson ... bench: 479,423 ns/iter (+/- 13,924) = 1240 MB/s +test sherlock::ing_suffix ... bench: 318,300 ns/iter (+/- 6,846) = 1869 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,066,300 ns/iter (+/- 19,375) = 557 MB/s +test sherlock::letters ... bench: 21,777,358 ns/iter (+/- 230,478) = 27 MB/s +test sherlock::letters_lower ... bench: 21,152,019 ns/iter (+/- 203,617) = 28 MB/s +test sherlock::letters_upper ... bench: 1,777,626 ns/iter (+/- 26,243) = 334 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,509 ns/iter (+/- 24,983) = 662 MB/s +test sherlock::name_alt1 ... bench: 32,255 ns/iter (+/- 681) = 18444 MB/s +test sherlock::name_alt2 ... bench: 86,369 ns/iter (+/- 2,494) = 6888 MB/s +test sherlock::name_alt3 ... bench: 97,618 ns/iter (+/- 564) = 6094 MB/s +test sherlock::name_alt3_nocase ... bench: 944,848 ns/iter (+/- 31,039) = 629 MB/s +test sherlock::name_alt4 ... bench: 122,029 ns/iter (+/- 2,716) = 4875 MB/s +test sherlock::name_alt4_nocase ... bench: 225,544 ns/iter (+/- 5,783) = 2637 MB/s +test sherlock::name_alt5 ... bench: 91,897 ns/iter (+/- 3,796) = 6473 MB/s +test sherlock::name_alt5_nocase ... bench: 936,420 ns/iter (+/- 15,092) = 635 MB/s +test sherlock::name_holmes ... bench: 33,448 ns/iter (+/- 959) = 17786 MB/s +test sherlock::name_holmes_nocase ... bench: 115,864 ns/iter (+/- 1,645) = 5134 MB/s +test sherlock::name_sherlock ... bench: 22,474 ns/iter (+/- 674) = 26472 MB/s +test sherlock::name_sherlock_holmes ... bench: 22,184 ns/iter (+/- 497) = 26818 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 99,629 ns/iter (+/- 2,398) = 5971 MB/s +test sherlock::name_sherlock_nocase ... bench: 99,523 ns/iter (+/- 2,674) = 5977 MB/s +test sherlock::name_whitespace ... bench: 30,815 ns/iter (+/- 107) = 19306 MB/s +test sherlock::no_match_common ... bench: 19,661 ns/iter (+/- 656) = 30259 MB/s +test sherlock::no_match_really_common ... bench: 27,544 ns/iter (+/- 527) = 21599 MB/s +test sherlock::no_match_uncommon ... bench: 19,553 ns/iter (+/- 31) = 30426 MB/s +test sherlock::quotes ... bench: 369,144 ns/iter (+/- 45,316) = 1611 MB/s +test sherlock::repeated_class_negation ... bench: 68,838,857 ns/iter (+/- 330,544) = 8 MB/s +test sherlock::the_lower ... bench: 321,692 ns/iter (+/- 5,418) = 1849 MB/s +test sherlock::the_nocase ... bench: 507,936 ns/iter (+/- 3,080) = 1171 MB/s +test sherlock::the_upper ... bench: 43,705 ns/iter (+/- 788) = 13612 MB/s +test sherlock::the_whitespace ... bench: 819,179 ns/iter (+/- 20,071) = 726 MB/s +test sherlock::word_ending_n ... bench: 1,700,300 ns/iter (+/- 36,623) = 349 MB/s +test sherlock::words ... bench: 8,249,767 ns/iter (+/- 75,015) = 72 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 111.55s + diff --git a/third_party/rust/regex/record/old-bench-log/10-last-frontier/rust-before-literal.log b/third_party/rust/regex/record/old-bench-log/10-last-frontier/rust-before-literal.log new file mode 100644 index 0000000000..98b3496ae9 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/10-last-frontier/rust-before-literal.log @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 18 ns/iter (+/- 0) = 21666 MB/s +test misc::anchored_literal_long_non_match ... bench: 18 ns/iter (+/- 0) = 21666 MB/s +test misc::anchored_literal_short_match ... bench: 18 ns/iter (+/- 0) = 1444 MB/s +test misc::anchored_literal_short_non_match ... bench: 18 ns/iter (+/- 0) = 1444 MB/s +test misc::easy0_1K ... bench: 15 ns/iter (+/- 0) = 70066 MB/s +test misc::easy0_1MB ... bench: 21 ns/iter (+/- 0) = 49933476 MB/s +test misc::easy0_32 ... bench: 15 ns/iter (+/- 0) = 3933 MB/s +test misc::easy0_32K ... bench: 14 ns/iter (+/- 0) = 2342500 MB/s +test misc::easy1_1K ... bench: 40 ns/iter (+/- 1) = 26100 MB/s +test misc::easy1_1MB ... bench: 45 ns/iter (+/- 1) = 23302133 MB/s +test misc::easy1_32 ... bench: 40 ns/iter (+/- 5) = 1300 MB/s +test misc::easy1_32K ... bench: 40 ns/iter (+/- 1) = 819700 MB/s +test misc::hard_1K ... bench: 51 ns/iter (+/- 1) = 20607 MB/s +test misc::hard_1MB ... bench: 56 ns/iter (+/- 0) = 18725053 MB/s +test misc::hard_32 ... bench: 51 ns/iter (+/- 3) = 1156 MB/s +test misc::hard_32K ... bench: 51 ns/iter (+/- 1) = 643039 MB/s +test misc::is_match_set ... bench: 61 ns/iter (+/- 2) = 409 MB/s +test misc::literal ... bench: 13 ns/iter (+/- 0) = 3923 MB/s +test misc::long_needle1 ... bench: 3,259 ns/iter (+/- 86) = 30684 MB/s +test misc::long_needle2 ... bench: 350,722 ns/iter (+/- 6,984) = 285 MB/s +test misc::match_class ... bench: 60 ns/iter (+/- 1) = 1350 MB/s +test misc::match_class_in_range ... bench: 14 ns/iter (+/- 0) = 5785 MB/s +test misc::match_class_unicode ... bench: 255 ns/iter (+/- 0) = 631 MB/s +test misc::matches_set ... bench: 481 ns/iter (+/- 11) = 51 MB/s +test misc::medium_1K ... bench: 15 ns/iter (+/- 0) = 70133 MB/s +test misc::medium_1MB ... bench: 22 ns/iter (+/- 0) = 47663818 MB/s +test misc::medium_32 ... bench: 15 ns/iter (+/- 0) = 4000 MB/s +test misc::medium_32K ... bench: 15 ns/iter (+/- 0) = 2186400 MB/s +test misc::no_exponential ... bench: 442 ns/iter (+/- 13) = 226 MB/s +test misc::not_literal ... bench: 89 ns/iter (+/- 1) = 573 MB/s +test misc::one_pass_long_prefix ... bench: 54 ns/iter (+/- 1) = 481 MB/s +test misc::one_pass_long_prefix_not ... bench: 52 ns/iter (+/- 1) = 500 MB/s +test misc::one_pass_short ... bench: 39 ns/iter (+/- 0) = 435 MB/s +test misc::one_pass_short_not ... bench: 42 ns/iter (+/- 0) = 404 MB/s +test misc::reallyhard2_1K ... bench: 83 ns/iter (+/- 6) = 12530 MB/s +test misc::reallyhard_1K ... bench: 1,592 ns/iter (+/- 4) = 660 MB/s +test misc::reallyhard_1MB ... bench: 1,575,691 ns/iter (+/- 29,668) = 665 MB/s +test misc::reallyhard_32 ... bench: 101 ns/iter (+/- 5) = 584 MB/s +test misc::reallyhard_32K ... bench: 49,325 ns/iter (+/- 1,734) = 664 MB/s +test misc::replace_all ... bench: 134 ns/iter (+/- 2) +test misc::reverse_suffix_no_quadratic ... bench: 4,189 ns/iter (+/- 274) = 1909 MB/s +test misc::short_haystack_1000000x ... bench: 132,182 ns/iter (+/- 4,966) = 60522 MB/s +test misc::short_haystack_100000x ... bench: 13,344 ns/iter (+/- 275) = 59952 MB/s +test misc::short_haystack_10000x ... bench: 6,119 ns/iter (+/- 285) = 13075 MB/s +test misc::short_haystack_1000x ... bench: 617 ns/iter (+/- 15) = 12983 MB/s +test misc::short_haystack_100x ... bench: 230 ns/iter (+/- 7) = 3526 MB/s +test misc::short_haystack_10x ... bench: 207 ns/iter (+/- 8) = 439 MB/s +test misc::short_haystack_1x ... bench: 213 ns/iter (+/- 7) = 89 MB/s +test misc::short_haystack_2x ... bench: 206 ns/iter (+/- 6) = 131 MB/s +test misc::short_haystack_3x ... bench: 207 ns/iter (+/- 10) = 169 MB/s +test misc::short_haystack_4x ... bench: 208 ns/iter (+/- 7) = 206 MB/s +test regexdna::find_new_lines ... bench: 12,275,804 ns/iter (+/- 145,331) = 414 MB/s +test regexdna::subst1 ... bench: 793,517 ns/iter (+/- 44,203) = 6406 MB/s +test regexdna::subst10 ... bench: 794,922 ns/iter (+/- 23,459) = 6394 MB/s +test regexdna::subst11 ... bench: 790,525 ns/iter (+/- 23,010) = 6430 MB/s +test regexdna::subst2 ... bench: 790,637 ns/iter (+/- 17,962) = 6429 MB/s +test regexdna::subst3 ... bench: 793,559 ns/iter (+/- 17,575) = 6405 MB/s +test regexdna::subst4 ... bench: 792,738 ns/iter (+/- 15,237) = 6412 MB/s +test regexdna::subst5 ... bench: 795,060 ns/iter (+/- 26,172) = 6393 MB/s +test regexdna::subst6 ... bench: 792,357 ns/iter (+/- 15,067) = 6415 MB/s +test regexdna::subst7 ... bench: 797,006 ns/iter (+/- 27,928) = 6378 MB/s +test regexdna::subst8 ... bench: 790,603 ns/iter (+/- 22,754) = 6429 MB/s +test regexdna::subst9 ... bench: 793,055 ns/iter (+/- 13,202) = 6409 MB/s +test regexdna::variant1 ... bench: 2,204,304 ns/iter (+/- 50,669) = 2306 MB/s +test regexdna::variant2 ... bench: 3,224,798 ns/iter (+/- 45,705) = 1576 MB/s +test regexdna::variant3 ... bench: 3,802,774 ns/iter (+/- 86,530) = 1336 MB/s +test regexdna::variant4 ... bench: 3,805,916 ns/iter (+/- 69,737) = 1335 MB/s +test regexdna::variant5 ... bench: 2,662,373 ns/iter (+/- 61,259) = 1909 MB/s +test regexdna::variant6 ... bench: 2,654,072 ns/iter (+/- 51,095) = 1915 MB/s +test regexdna::variant7 ... bench: 3,232,369 ns/iter (+/- 67,147) = 1572 MB/s +test regexdna::variant8 ... bench: 3,311,225 ns/iter (+/- 66,086) = 1535 MB/s +test regexdna::variant9 ... bench: 3,241,601 ns/iter (+/- 68,394) = 1568 MB/s +test rust_compile::compile_huge ... bench: 100,955 ns/iter (+/- 2,466) +test rust_compile::compile_huge_bytes ... bench: 5,936,732 ns/iter (+/- 126,993) +test rust_compile::compile_huge_full ... bench: 11,880,838 ns/iter (+/- 211,387) +test rust_compile::compile_simple ... bench: 4,575 ns/iter (+/- 139) +test rust_compile::compile_simple_bytes ... bench: 4,653 ns/iter (+/- 122) +test rust_compile::compile_simple_full ... bench: 20,656 ns/iter (+/- 535) +test rust_compile::compile_small ... bench: 9,613 ns/iter (+/- 992) +test rust_compile::compile_small_bytes ... bench: 188,349 ns/iter (+/- 4,733) +test rust_compile::compile_small_full ... bench: 341,554 ns/iter (+/- 9,774) +test sherlock::before_after_holmes ... bench: 907,419 ns/iter (+/- 11,645) = 655 MB/s +test sherlock::before_holmes ... bench: 62,036 ns/iter (+/- 1,854) = 9590 MB/s +test sherlock::everything_greedy ... bench: 2,072,694 ns/iter (+/- 45,192) = 287 MB/s +test sherlock::everything_greedy_nl ... bench: 884,483 ns/iter (+/- 25,710) = 672 MB/s +test sherlock::holmes_cochar_watson ... bench: 103,873 ns/iter (+/- 1,310) = 5727 MB/s +test sherlock::holmes_coword_watson ... bench: 481,491 ns/iter (+/- 11,516) = 1235 MB/s +test sherlock::ing_suffix ... bench: 323,119 ns/iter (+/- 7,438) = 1841 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,067,293 ns/iter (+/- 18,661) = 557 MB/s +test sherlock::letters ... bench: 21,732,526 ns/iter (+/- 253,563) = 27 MB/s +test sherlock::letters_lower ... bench: 21,187,465 ns/iter (+/- 191,023) = 28 MB/s +test sherlock::letters_upper ... bench: 1,766,003 ns/iter (+/- 17,494) = 336 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,387 ns/iter (+/- 26,674) = 662 MB/s +test sherlock::name_alt1 ... bench: 34,183 ns/iter (+/- 885) = 17404 MB/s +test sherlock::name_alt2 ... bench: 87,151 ns/iter (+/- 2,139) = 6826 MB/s +test sherlock::name_alt3 ... bench: 99,293 ns/iter (+/- 1,938) = 5991 MB/s +test sherlock::name_alt3_nocase ... bench: 379,228 ns/iter (+/- 22,539) = 1568 MB/s +test sherlock::name_alt4 ... bench: 123,040 ns/iter (+/- 2,676) = 4835 MB/s +test sherlock::name_alt4_nocase ... bench: 186,045 ns/iter (+/- 403) = 3197 MB/s +test sherlock::name_alt5 ... bench: 91,679 ns/iter (+/- 2,543) = 6489 MB/s +test sherlock::name_alt5_nocase ... bench: 343,668 ns/iter (+/- 6,807) = 1731 MB/s +test sherlock::name_holmes ... bench: 33,802 ns/iter (+/- 936) = 17600 MB/s +test sherlock::name_holmes_nocase ... bench: 136,208 ns/iter (+/- 4,317) = 4367 MB/s +test sherlock::name_sherlock ... bench: 22,534 ns/iter (+/- 462) = 26401 MB/s +test sherlock::name_sherlock_holmes ... bench: 22,514 ns/iter (+/- 697) = 26425 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 97,796 ns/iter (+/- 2,037) = 6083 MB/s +test sherlock::name_sherlock_nocase ... bench: 95,809 ns/iter (+/- 1,538) = 6209 MB/s +test sherlock::name_whitespace ... bench: 30,959 ns/iter (+/- 968) = 19216 MB/s +test sherlock::no_match_common ... bench: 19,568 ns/iter (+/- 616) = 30403 MB/s +test sherlock::no_match_really_common ... bench: 26,273 ns/iter (+/- 1,143) = 22644 MB/s +test sherlock::no_match_uncommon ... bench: 19,643 ns/iter (+/- 496) = 30287 MB/s +test sherlock::quotes ... bench: 371,876 ns/iter (+/- 2,494) = 1599 MB/s +test sherlock::repeated_class_negation ... bench: 76,963,104 ns/iter (+/- 277,311) = 7 MB/s +test sherlock::the_lower ... bench: 331,250 ns/iter (+/- 8,588) = 1796 MB/s +test sherlock::the_nocase ... bench: 516,528 ns/iter (+/- 40,826) = 1151 MB/s +test sherlock::the_upper ... bench: 44,206 ns/iter (+/- 1,277) = 13458 MB/s +test sherlock::the_whitespace ... bench: 822,577 ns/iter (+/- 23,649) = 723 MB/s +test sherlock::word_ending_n ... bench: 1,685,110 ns/iter (+/- 34,615) = 353 MB/s +test sherlock::words ... bench: 8,333,499 ns/iter (+/- 152,757) = 71 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 124.94s + diff --git a/third_party/rust/regex/record/old-bench-log/10-last-frontier/rust-bytes-after-literal.log b/third_party/rust/regex/record/old-bench-log/10-last-frontier/rust-bytes-after-literal.log new file mode 100644 index 0000000000..470e09b9c8 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/10-last-frontier/rust-bytes-after-literal.log @@ -0,0 +1,112 @@ + +running 107 tests +test misc::anchored_literal_long_match ... bench: 18 ns/iter (+/- 0) = 21666 MB/s +test misc::anchored_literal_long_non_match ... bench: 20 ns/iter (+/- 1) = 19500 MB/s +test misc::anchored_literal_short_match ... bench: 18 ns/iter (+/- 0) = 1444 MB/s +test misc::anchored_literal_short_non_match ... bench: 20 ns/iter (+/- 0) = 1300 MB/s +test misc::easy0_1K ... bench: 54 ns/iter (+/- 2) = 19462 MB/s +test misc::easy0_1MB ... bench: 56 ns/iter (+/- 1) = 18725053 MB/s +test misc::easy0_32 ... bench: 51 ns/iter (+/- 1) = 1156 MB/s +test misc::easy0_32K ... bench: 51 ns/iter (+/- 2) = 643039 MB/s +test misc::easy1_1K ... bench: 41 ns/iter (+/- 1) = 25463 MB/s +test misc::easy1_1MB ... bench: 44 ns/iter (+/- 1) = 23831727 MB/s +test misc::easy1_32 ... bench: 40 ns/iter (+/- 2) = 1300 MB/s +test misc::easy1_32K ... bench: 40 ns/iter (+/- 1) = 819700 MB/s +test misc::hard_1K ... bench: 52 ns/iter (+/- 1) = 20211 MB/s +test misc::hard_1MB ... bench: 57 ns/iter (+/- 0) = 18396543 MB/s +test misc::hard_32 ... bench: 51 ns/iter (+/- 0) = 1156 MB/s +test misc::hard_32K ... bench: 51 ns/iter (+/- 3) = 643039 MB/s +test misc::is_match_set ... bench: 61 ns/iter (+/- 2) = 409 MB/s +test misc::literal ... bench: 14 ns/iter (+/- 0) = 3642 MB/s +test misc::long_needle1 ... bench: 3,249 ns/iter (+/- 87) = 30779 MB/s +test misc::long_needle2 ... bench: 350,559 ns/iter (+/- 7,154) = 285 MB/s +test misc::match_class ... bench: 61 ns/iter (+/- 4) = 1327 MB/s +test misc::match_class_in_range ... bench: 14 ns/iter (+/- 0) = 5785 MB/s +test misc::matches_set ... bench: 401 ns/iter (+/- 17) = 62 MB/s +test misc::medium_1K ... bench: 53 ns/iter (+/- 0) = 19849 MB/s +test misc::medium_1MB ... bench: 58 ns/iter (+/- 0) = 18079379 MB/s +test misc::medium_32 ... bench: 53 ns/iter (+/- 0) = 1132 MB/s +test misc::medium_32K ... bench: 53 ns/iter (+/- 2) = 618792 MB/s +test misc::no_exponential ... bench: 421 ns/iter (+/- 8) = 237 MB/s +test misc::not_literal ... bench: 90 ns/iter (+/- 0) = 566 MB/s +test misc::one_pass_long_prefix ... bench: 53 ns/iter (+/- 1) = 490 MB/s +test misc::one_pass_long_prefix_not ... bench: 53 ns/iter (+/- 0) = 490 MB/s +test misc::one_pass_short ... bench: 38 ns/iter (+/- 0) = 447 MB/s +test misc::one_pass_short_not ... bench: 42 ns/iter (+/- 3) = 404 MB/s +test misc::reallyhard2_1K ... bench: 77 ns/iter (+/- 1) = 13506 MB/s +test misc::reallyhard_1K ... bench: 1,592 ns/iter (+/- 1) = 660 MB/s +test misc::reallyhard_1MB ... bench: 1,575,759 ns/iter (+/- 49,997) = 665 MB/s +test misc::reallyhard_32 ... bench: 102 ns/iter (+/- 2) = 578 MB/s +test misc::reallyhard_32K ... bench: 49,326 ns/iter (+/- 1,055) = 664 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,161 ns/iter (+/- 94) = 1922 MB/s +test regexdna::find_new_lines ... bench: 12,344,799 ns/iter (+/- 188,054) = 411 MB/s +test regexdna::subst1 ... bench: 780,449 ns/iter (+/- 14,474) = 6513 MB/s +test regexdna::subst10 ... bench: 795,203 ns/iter (+/- 40,742) = 6392 MB/s +test regexdna::subst11 ... bench: 816,444 ns/iter (+/- 23,334) = 6226 MB/s +test regexdna::subst2 ... bench: 777,546 ns/iter (+/- 19,625) = 6537 MB/s +test regexdna::subst3 ... bench: 783,295 ns/iter (+/- 8,266) = 6489 MB/s +test regexdna::subst4 ... bench: 775,154 ns/iter (+/- 21,350) = 6557 MB/s +test regexdna::subst5 ... bench: 781,414 ns/iter (+/- 21,057) = 6505 MB/s +test regexdna::subst6 ... bench: 783,595 ns/iter (+/- 23,835) = 6487 MB/s +test regexdna::subst7 ... bench: 821,620 ns/iter (+/- 46,131) = 6187 MB/s +test regexdna::subst8 ... bench: 818,402 ns/iter (+/- 21,350) = 6211 MB/s +test regexdna::subst9 ... bench: 779,115 ns/iter (+/- 21,335) = 6524 MB/s +test regexdna::variant1 ... bench: 2,189,308 ns/iter (+/- 32,528) = 2321 MB/s +test regexdna::variant2 ... bench: 3,217,478 ns/iter (+/- 36,011) = 1579 MB/s +test regexdna::variant3 ... bench: 3,771,330 ns/iter (+/- 74,944) = 1347 MB/s +test regexdna::variant4 ... bench: 3,787,593 ns/iter (+/- 37,825) = 1342 MB/s +test regexdna::variant5 ... bench: 2,669,799 ns/iter (+/- 69,777) = 1904 MB/s +test regexdna::variant6 ... bench: 2,651,559 ns/iter (+/- 33,895) = 1917 MB/s +test regexdna::variant7 ... bench: 3,222,991 ns/iter (+/- 41,014) = 1577 MB/s +test regexdna::variant8 ... bench: 3,298,048 ns/iter (+/- 41,331) = 1541 MB/s +test regexdna::variant9 ... bench: 3,218,486 ns/iter (+/- 50,318) = 1579 MB/s +test rust_compile::compile_huge ... bench: 100,031 ns/iter (+/- 3,464) +test rust_compile::compile_huge_bytes ... bench: 5,885,102 ns/iter (+/- 130,016) +test rust_compile::compile_huge_full ... bench: 11,641,251 ns/iter (+/- 147,700) +test rust_compile::compile_simple ... bench: 4,263 ns/iter (+/- 116) +test rust_compile::compile_simple_bytes ... bench: 4,236 ns/iter (+/- 91) +test rust_compile::compile_simple_full ... bench: 22,349 ns/iter (+/- 2,085) +test rust_compile::compile_small ... bench: 9,537 ns/iter (+/- 298) +test rust_compile::compile_small_bytes ... bench: 178,561 ns/iter (+/- 3,796) +test rust_compile::compile_small_full ... bench: 363,343 ns/iter (+/- 9,481) +test sherlock::before_after_holmes ... bench: 907,022 ns/iter (+/- 19,133) = 655 MB/s +test sherlock::before_holmes ... bench: 63,729 ns/iter (+/- 1,830) = 9335 MB/s +test sherlock::everything_greedy ... bench: 2,181,593 ns/iter (+/- 46,002) = 272 MB/s +test sherlock::everything_greedy_nl ... bench: 884,811 ns/iter (+/- 26,211) = 672 MB/s +test sherlock::holmes_cochar_watson ... bench: 105,610 ns/iter (+/- 3,120) = 5633 MB/s +test sherlock::holmes_coword_watson ... bench: 480,986 ns/iter (+/- 13,228) = 1236 MB/s +test sherlock::ing_suffix ... bench: 322,921 ns/iter (+/- 3,555) = 1842 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,065,372 ns/iter (+/- 21,242) = 558 MB/s +test sherlock::letters ... bench: 22,109,015 ns/iter (+/- 146,243) = 26 MB/s +test sherlock::letters_lower ... bench: 21,686,153 ns/iter (+/- 206,041) = 27 MB/s +test sherlock::letters_upper ... bench: 1,778,225 ns/iter (+/- 25,935) = 334 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,355 ns/iter (+/- 26,781) = 662 MB/s +test sherlock::name_alt1 ... bench: 31,927 ns/iter (+/- 633) = 18634 MB/s +test sherlock::name_alt2 ... bench: 87,040 ns/iter (+/- 1,859) = 6835 MB/s +test sherlock::name_alt3 ... bench: 97,715 ns/iter (+/- 2,109) = 6088 MB/s +test sherlock::name_alt3_nocase ... bench: 944,955 ns/iter (+/- 26,503) = 629 MB/s +test sherlock::name_alt4 ... bench: 120,935 ns/iter (+/- 2,399) = 4919 MB/s +test sherlock::name_alt4_nocase ... bench: 228,597 ns/iter (+/- 7,137) = 2602 MB/s +test sherlock::name_alt5 ... bench: 91,174 ns/iter (+/- 1,096) = 6525 MB/s +test sherlock::name_alt5_nocase ... bench: 937,189 ns/iter (+/- 23,839) = 634 MB/s +test sherlock::name_holmes ... bench: 34,020 ns/iter (+/- 752) = 17487 MB/s +test sherlock::name_holmes_nocase ... bench: 117,194 ns/iter (+/- 3,444) = 5076 MB/s +test sherlock::name_sherlock ... bench: 22,557 ns/iter (+/- 388) = 26374 MB/s +test sherlock::name_sherlock_holmes ... bench: 22,428 ns/iter (+/- 683) = 26526 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 99,637 ns/iter (+/- 636) = 5971 MB/s +test sherlock::name_sherlock_nocase ... bench: 97,895 ns/iter (+/- 1,875) = 6077 MB/s +test sherlock::name_whitespace ... bench: 30,772 ns/iter (+/- 1,591) = 19333 MB/s +test sherlock::no_match_common ... bench: 19,665 ns/iter (+/- 296) = 30253 MB/s +test sherlock::no_match_really_common ... bench: 27,403 ns/iter (+/- 2,507) = 21710 MB/s +test sherlock::no_match_uncommon ... bench: 19,601 ns/iter (+/- 293) = 30352 MB/s +test sherlock::quotes ... bench: 370,323 ns/iter (+/- 1,345) = 1606 MB/s +test sherlock::repeated_class_negation ... bench: 68,414,794 ns/iter (+/- 342,428) = 8 MB/s +test sherlock::the_lower ... bench: 327,767 ns/iter (+/- 5,493) = 1815 MB/s +test sherlock::the_nocase ... bench: 507,818 ns/iter (+/- 1,796) = 1171 MB/s +test sherlock::the_upper ... bench: 45,045 ns/iter (+/- 1,400) = 13207 MB/s +test sherlock::the_whitespace ... bench: 822,080 ns/iter (+/- 16,581) = 723 MB/s +test sherlock::word_ending_n ... bench: 1,690,084 ns/iter (+/- 40,361) = 352 MB/s +test sherlock::words ... bench: 8,573,617 ns/iter (+/- 143,313) = 69 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 107 measured; 0 filtered out; finished in 110.03s + diff --git a/third_party/rust/regex/record/old-bench-log/10-last-frontier/rust-bytes-before-literal.log b/third_party/rust/regex/record/old-bench-log/10-last-frontier/rust-bytes-before-literal.log new file mode 100644 index 0000000000..7016e3c565 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/10-last-frontier/rust-bytes-before-literal.log @@ -0,0 +1,112 @@ + +running 107 tests +test misc::anchored_literal_long_match ... bench: 18 ns/iter (+/- 0) = 21666 MB/s +test misc::anchored_literal_long_non_match ... bench: 19 ns/iter (+/- 1) = 20526 MB/s +test misc::anchored_literal_short_match ... bench: 18 ns/iter (+/- 0) = 1444 MB/s +test misc::anchored_literal_short_non_match ... bench: 20 ns/iter (+/- 0) = 1300 MB/s +test misc::easy0_1K ... bench: 14 ns/iter (+/- 0) = 75071 MB/s +test misc::easy0_1MB ... bench: 21 ns/iter (+/- 0) = 49933476 MB/s +test misc::easy0_32 ... bench: 14 ns/iter (+/- 0) = 4214 MB/s +test misc::easy0_32K ... bench: 14 ns/iter (+/- 0) = 2342500 MB/s +test misc::easy1_1K ... bench: 41 ns/iter (+/- 0) = 25463 MB/s +test misc::easy1_1MB ... bench: 48 ns/iter (+/- 0) = 21845750 MB/s +test misc::easy1_32 ... bench: 41 ns/iter (+/- 0) = 1268 MB/s +test misc::easy1_32K ... bench: 41 ns/iter (+/- 1) = 799707 MB/s +test misc::hard_1K ... bench: 51 ns/iter (+/- 1) = 20607 MB/s +test misc::hard_1MB ... bench: 56 ns/iter (+/- 2) = 18725053 MB/s +test misc::hard_32 ... bench: 51 ns/iter (+/- 6) = 1156 MB/s +test misc::hard_32K ... bench: 51 ns/iter (+/- 1) = 643039 MB/s +test misc::is_match_set ... bench: 62 ns/iter (+/- 2) = 403 MB/s +test misc::literal ... bench: 13 ns/iter (+/- 0) = 3923 MB/s +test misc::long_needle1 ... bench: 2,825 ns/iter (+/- 57) = 35398 MB/s +test misc::long_needle2 ... bench: 350,755 ns/iter (+/- 11,905) = 285 MB/s +test misc::match_class ... bench: 64 ns/iter (+/- 1) = 1265 MB/s +test misc::match_class_in_range ... bench: 13 ns/iter (+/- 0) = 6230 MB/s +test misc::matches_set ... bench: 422 ns/iter (+/- 12) = 59 MB/s +test misc::medium_1K ... bench: 15 ns/iter (+/- 0) = 70133 MB/s +test misc::medium_1MB ... bench: 21 ns/iter (+/- 0) = 49933523 MB/s +test misc::medium_32 ... bench: 15 ns/iter (+/- 0) = 4000 MB/s +test misc::medium_32K ... bench: 14 ns/iter (+/- 0) = 2342571 MB/s +test misc::no_exponential ... bench: 443 ns/iter (+/- 12) = 225 MB/s +test misc::not_literal ... bench: 89 ns/iter (+/- 1) = 573 MB/s +test misc::one_pass_long_prefix ... bench: 52 ns/iter (+/- 1) = 500 MB/s +test misc::one_pass_long_prefix_not ... bench: 52 ns/iter (+/- 1) = 500 MB/s +test misc::one_pass_short ... bench: 40 ns/iter (+/- 1) = 425 MB/s +test misc::one_pass_short_not ... bench: 42 ns/iter (+/- 0) = 404 MB/s +test misc::reallyhard2_1K ... bench: 80 ns/iter (+/- 0) = 13000 MB/s +test misc::reallyhard_1K ... bench: 1,592 ns/iter (+/- 1) = 660 MB/s +test misc::reallyhard_1MB ... bench: 1,575,789 ns/iter (+/- 34,236) = 665 MB/s +test misc::reallyhard_32 ... bench: 101 ns/iter (+/- 2) = 584 MB/s +test misc::reallyhard_32K ... bench: 49,321 ns/iter (+/- 2,718) = 664 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,158 ns/iter (+/- 93) = 1924 MB/s +test regexdna::find_new_lines ... bench: 12,391,732 ns/iter (+/- 180,913) = 410 MB/s +test regexdna::subst1 ... bench: 781,690 ns/iter (+/- 29,637) = 6503 MB/s +test regexdna::subst10 ... bench: 778,306 ns/iter (+/- 22,706) = 6531 MB/s +test regexdna::subst11 ... bench: 777,716 ns/iter (+/- 24,635) = 6536 MB/s +test regexdna::subst2 ... bench: 791,786 ns/iter (+/- 15,778) = 6420 MB/s +test regexdna::subst3 ... bench: 783,470 ns/iter (+/- 25,543) = 6488 MB/s +test regexdna::subst4 ... bench: 814,902 ns/iter (+/- 14,146) = 6238 MB/s +test regexdna::subst5 ... bench: 781,464 ns/iter (+/- 19,532) = 6504 MB/s +test regexdna::subst6 ... bench: 780,116 ns/iter (+/- 16,558) = 6516 MB/s +test regexdna::subst7 ... bench: 795,982 ns/iter (+/- 11,254) = 6386 MB/s +test regexdna::subst8 ... bench: 781,746 ns/iter (+/- 24,996) = 6502 MB/s +test regexdna::subst9 ... bench: 783,793 ns/iter (+/- 14,943) = 6485 MB/s +test regexdna::variant1 ... bench: 2,188,940 ns/iter (+/- 42,308) = 2322 MB/s +test regexdna::variant2 ... bench: 3,218,011 ns/iter (+/- 50,700) = 1579 MB/s +test regexdna::variant3 ... bench: 3,778,907 ns/iter (+/- 90,543) = 1345 MB/s +test regexdna::variant4 ... bench: 3,803,852 ns/iter (+/- 68,319) = 1336 MB/s +test regexdna::variant5 ... bench: 2,660,949 ns/iter (+/- 55,488) = 1910 MB/s +test regexdna::variant6 ... bench: 2,647,131 ns/iter (+/- 26,846) = 1920 MB/s +test regexdna::variant7 ... bench: 3,235,032 ns/iter (+/- 37,599) = 1571 MB/s +test regexdna::variant8 ... bench: 3,305,124 ns/iter (+/- 67,109) = 1538 MB/s +test regexdna::variant9 ... bench: 3,231,033 ns/iter (+/- 55,626) = 1573 MB/s +test rust_compile::compile_huge ... bench: 99,387 ns/iter (+/- 2,366) +test rust_compile::compile_huge_bytes ... bench: 5,865,693 ns/iter (+/- 62,255) +test rust_compile::compile_huge_full ... bench: 11,752,845 ns/iter (+/- 195,440) +test rust_compile::compile_simple ... bench: 4,117 ns/iter (+/- 141) +test rust_compile::compile_simple_bytes ... bench: 4,162 ns/iter (+/- 67) +test rust_compile::compile_simple_full ... bench: 19,955 ns/iter (+/- 622) +test rust_compile::compile_small ... bench: 9,140 ns/iter (+/- 112) +test rust_compile::compile_small_bytes ... bench: 165,990 ns/iter (+/- 5,876) +test rust_compile::compile_small_full ... bench: 342,897 ns/iter (+/- 13,730) +test sherlock::before_after_holmes ... bench: 906,789 ns/iter (+/- 13,931) = 656 MB/s +test sherlock::before_holmes ... bench: 62,319 ns/iter (+/- 790) = 9546 MB/s +test sherlock::everything_greedy ... bench: 2,175,424 ns/iter (+/- 47,720) = 273 MB/s +test sherlock::everything_greedy_nl ... bench: 884,406 ns/iter (+/- 22,679) = 672 MB/s +test sherlock::holmes_cochar_watson ... bench: 105,261 ns/iter (+/- 3,536) = 5651 MB/s +test sherlock::holmes_coword_watson ... bench: 479,524 ns/iter (+/- 7,749) = 1240 MB/s +test sherlock::ing_suffix ... bench: 321,401 ns/iter (+/- 9,123) = 1851 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,069,722 ns/iter (+/- 16,366) = 556 MB/s +test sherlock::letters ... bench: 21,959,896 ns/iter (+/- 204,695) = 27 MB/s +test sherlock::letters_lower ... bench: 21,462,457 ns/iter (+/- 207,449) = 27 MB/s +test sherlock::letters_upper ... bench: 1,768,026 ns/iter (+/- 41,459) = 336 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,197 ns/iter (+/- 14,349) = 663 MB/s +test sherlock::name_alt1 ... bench: 34,037 ns/iter (+/- 719) = 17479 MB/s +test sherlock::name_alt2 ... bench: 86,788 ns/iter (+/- 1,203) = 6855 MB/s +test sherlock::name_alt3 ... bench: 98,225 ns/iter (+/- 1,589) = 6056 MB/s +test sherlock::name_alt3_nocase ... bench: 377,597 ns/iter (+/- 14,840) = 1575 MB/s +test sherlock::name_alt4 ... bench: 122,440 ns/iter (+/- 8,123) = 4858 MB/s +test sherlock::name_alt4_nocase ... bench: 187,282 ns/iter (+/- 5,176) = 3176 MB/s +test sherlock::name_alt5 ... bench: 91,429 ns/iter (+/- 1,944) = 6507 MB/s +test sherlock::name_alt5_nocase ... bench: 348,111 ns/iter (+/- 12,721) = 1709 MB/s +test sherlock::name_holmes ... bench: 33,547 ns/iter (+/- 1,119) = 17734 MB/s +test sherlock::name_holmes_nocase ... bench: 132,342 ns/iter (+/- 3,974) = 4495 MB/s +test sherlock::name_sherlock ... bench: 22,562 ns/iter (+/- 364) = 26368 MB/s +test sherlock::name_sherlock_holmes ... bench: 22,313 ns/iter (+/- 579) = 26663 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 97,556 ns/iter (+/- 2,092) = 6098 MB/s +test sherlock::name_sherlock_nocase ... bench: 95,917 ns/iter (+/- 4,054) = 6202 MB/s +test sherlock::name_whitespace ... bench: 30,997 ns/iter (+/- 1,039) = 19193 MB/s +test sherlock::no_match_common ... bench: 19,690 ns/iter (+/- 378) = 30214 MB/s +test sherlock::no_match_really_common ... bench: 27,629 ns/iter (+/- 465) = 21532 MB/s +test sherlock::no_match_uncommon ... bench: 19,681 ns/iter (+/- 291) = 30228 MB/s +test sherlock::quotes ... bench: 368,290 ns/iter (+/- 1,508) = 1615 MB/s +test sherlock::repeated_class_negation ... bench: 73,004,024 ns/iter (+/- 1,040,743) = 8 MB/s +test sherlock::the_lower ... bench: 320,929 ns/iter (+/- 12,287) = 1853 MB/s +test sherlock::the_nocase ... bench: 514,946 ns/iter (+/- 11,241) = 1155 MB/s +test sherlock::the_upper ... bench: 43,816 ns/iter (+/- 1,719) = 13577 MB/s +test sherlock::the_whitespace ... bench: 825,245 ns/iter (+/- 20,797) = 720 MB/s +test sherlock::word_ending_n ... bench: 1,676,908 ns/iter (+/- 40,650) = 354 MB/s +test sherlock::words ... bench: 8,449,099 ns/iter (+/- 123,842) = 70 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 107 measured; 0 filtered out; finished in 128.47s + diff --git a/third_party/rust/regex/record/old-bench-log/11-regex-1.7.3/rust b/third_party/rust/regex/record/old-bench-log/11-regex-1.7.3/rust new file mode 100644 index 0000000000..aed99af927 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/11-regex-1.7.3/rust @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 7 ns/iter (+/- 0) = 55714 MB/s +test misc::anchored_literal_long_non_match ... bench: 10 ns/iter (+/- 0) = 39000 MB/s +test misc::anchored_literal_short_match ... bench: 7 ns/iter (+/- 0) = 3714 MB/s +test misc::anchored_literal_short_non_match ... bench: 10 ns/iter (+/- 0) = 2600 MB/s +test misc::easy0_1K ... bench: 7 ns/iter (+/- 0) = 150142 MB/s +test misc::easy0_1MB ... bench: 11 ns/iter (+/- 1) = 95327545 MB/s +test misc::easy0_32 ... bench: 7 ns/iter (+/- 0) = 8428 MB/s +test misc::easy0_32K ... bench: 7 ns/iter (+/- 0) = 4685000 MB/s +test misc::easy1_1K ... bench: 17 ns/iter (+/- 1) = 61411 MB/s +test misc::easy1_1MB ... bench: 20 ns/iter (+/- 0) = 52429800 MB/s +test misc::easy1_32 ... bench: 18 ns/iter (+/- 1) = 2888 MB/s +test misc::easy1_32K ... bench: 18 ns/iter (+/- 0) = 1821555 MB/s +test misc::hard_1K ... bench: 24 ns/iter (+/- 0) = 43791 MB/s +test misc::hard_1MB ... bench: 28 ns/iter (+/- 0) = 37450107 MB/s +test misc::hard_32 ... bench: 24 ns/iter (+/- 0) = 2458 MB/s +test misc::hard_32K ... bench: 24 ns/iter (+/- 0) = 1366458 MB/s +test misc::is_match_set ... bench: 37 ns/iter (+/- 0) = 675 MB/s +test misc::literal ... bench: 8 ns/iter (+/- 1) = 6375 MB/s +test misc::long_needle1 ... bench: 1,785 ns/iter (+/- 1) = 56022 MB/s +test misc::long_needle2 ... bench: 193,595 ns/iter (+/- 1,486) = 516 MB/s +test misc::match_class ... bench: 37 ns/iter (+/- 1) = 2189 MB/s +test misc::match_class_in_range ... bench: 8 ns/iter (+/- 0) = 10125 MB/s +test misc::match_class_unicode ... bench: 181 ns/iter (+/- 1) = 889 MB/s +test misc::matches_set ... bench: 216 ns/iter (+/- 9) = 115 MB/s +test misc::medium_1K ... bench: 7 ns/iter (+/- 0) = 150285 MB/s +test misc::medium_1MB ... bench: 12 ns/iter (+/- 1) = 87383666 MB/s +test misc::medium_32 ... bench: 7 ns/iter (+/- 0) = 8571 MB/s +test misc::medium_32K ... bench: 7 ns/iter (+/- 0) = 4685142 MB/s +test misc::no_exponential ... bench: 283 ns/iter (+/- 7) = 353 MB/s +test misc::not_literal ... bench: 53 ns/iter (+/- 1) = 962 MB/s +test misc::one_pass_long_prefix ... bench: 24 ns/iter (+/- 2) = 1083 MB/s +test misc::one_pass_long_prefix_not ... bench: 24 ns/iter (+/- 2) = 1083 MB/s +test misc::one_pass_short ... bench: 16 ns/iter (+/- 0) = 1062 MB/s +test misc::one_pass_short_not ... bench: 19 ns/iter (+/- 0) = 894 MB/s +test misc::reallyhard2_1K ... bench: 41 ns/iter (+/- 0) = 25365 MB/s +test misc::reallyhard_1K ... bench: 1,208 ns/iter (+/- 2) = 870 MB/s +test misc::reallyhard_1MB ... bench: 1,213,959 ns/iter (+/- 7,198) = 863 MB/s +test misc::reallyhard_32 ... bench: 62 ns/iter (+/- 0) = 951 MB/s +test misc::reallyhard_32K ... bench: 38,231 ns/iter (+/- 354) = 857 MB/s +test misc::replace_all ... bench: 86 ns/iter (+/- 3) +test misc::reverse_suffix_no_quadratic ... bench: 2,351 ns/iter (+/- 31) = 3402 MB/s +test misc::short_haystack_1000000x ... bench: 91,018 ns/iter (+/- 203) = 87894 MB/s +test misc::short_haystack_100000x ... bench: 9,277 ns/iter (+/- 40) = 86235 MB/s +test misc::short_haystack_10000x ... bench: 2,863 ns/iter (+/- 4) = 27946 MB/s +test misc::short_haystack_1000x ... bench: 201 ns/iter (+/- 3) = 39855 MB/s +test misc::short_haystack_100x ... bench: 100 ns/iter (+/- 2) = 8110 MB/s +test misc::short_haystack_10x ... bench: 88 ns/iter (+/- 0) = 1034 MB/s +test misc::short_haystack_1x ... bench: 86 ns/iter (+/- 1) = 220 MB/s +test misc::short_haystack_2x ... bench: 87 ns/iter (+/- 0) = 310 MB/s +test misc::short_haystack_3x ... bench: 88 ns/iter (+/- 1) = 397 MB/s +test misc::short_haystack_4x ... bench: 88 ns/iter (+/- 1) = 488 MB/s +test regexdna::find_new_lines ... bench: 7,348,651 ns/iter (+/- 40,559) = 691 MB/s +test regexdna::subst1 ... bench: 493,624 ns/iter (+/- 10,315) = 10298 MB/s +test regexdna::subst10 ... bench: 489,573 ns/iter (+/- 18,151) = 10383 MB/s +test regexdna::subst11 ... bench: 492,501 ns/iter (+/- 11,650) = 10321 MB/s +test regexdna::subst2 ... bench: 492,283 ns/iter (+/- 12,363) = 10326 MB/s +test regexdna::subst3 ... bench: 496,795 ns/iter (+/- 20,704) = 10232 MB/s +test regexdna::subst4 ... bench: 489,245 ns/iter (+/- 10,289) = 10390 MB/s +test regexdna::subst5 ... bench: 499,701 ns/iter (+/- 11,359) = 10172 MB/s +test regexdna::subst6 ... bench: 490,460 ns/iter (+/- 8,758) = 10364 MB/s +test regexdna::subst7 ... bench: 496,398 ns/iter (+/- 18,774) = 10240 MB/s +test regexdna::subst8 ... bench: 497,077 ns/iter (+/- 24,767) = 10226 MB/s +test regexdna::subst9 ... bench: 496,763 ns/iter (+/- 12,477) = 10233 MB/s +test regexdna::variant1 ... bench: 1,454,747 ns/iter (+/- 48,995) = 3494 MB/s +test regexdna::variant2 ... bench: 2,311,001 ns/iter (+/- 63,347) = 2199 MB/s +test regexdna::variant3 ... bench: 2,832,483 ns/iter (+/- 33,976) = 1794 MB/s +test regexdna::variant4 ... bench: 2,796,710 ns/iter (+/- 56,279) = 1817 MB/s +test regexdna::variant5 ... bench: 1,708,634 ns/iter (+/- 25,749) = 2975 MB/s +test regexdna::variant6 ... bench: 1,706,259 ns/iter (+/- 22,151) = 2979 MB/s +test regexdna::variant7 ... bench: 2,400,436 ns/iter (+/- 24,655) = 2117 MB/s +test regexdna::variant8 ... bench: 2,413,765 ns/iter (+/- 50,326) = 2106 MB/s +test regexdna::variant9 ... bench: 2,402,528 ns/iter (+/- 26,150) = 2115 MB/s +test rust_compile::compile_huge ... bench: 51,936 ns/iter (+/- 834) +test rust_compile::compile_huge_bytes ... bench: 3,294,633 ns/iter (+/- 40,585) +test rust_compile::compile_huge_full ... bench: 6,323,294 ns/iter (+/- 66,684) +test rust_compile::compile_simple ... bench: 1,992 ns/iter (+/- 25) +test rust_compile::compile_simple_bytes ... bench: 2,004 ns/iter (+/- 20) +test rust_compile::compile_simple_full ... bench: 9,697 ns/iter (+/- 68) +test rust_compile::compile_small ... bench: 4,261 ns/iter (+/- 72) +test rust_compile::compile_small_bytes ... bench: 83,908 ns/iter (+/- 1,405) +test rust_compile::compile_small_full ... bench: 166,152 ns/iter (+/- 3,508) +test sherlock::before_after_holmes ... bench: 699,767 ns/iter (+/- 6,201) = 850 MB/s +test sherlock::before_holmes ... bench: 29,284 ns/iter (+/- 573) = 20315 MB/s +test sherlock::everything_greedy ... bench: 1,070,812 ns/iter (+/- 18,795) = 555 MB/s +test sherlock::everything_greedy_nl ... bench: 445,517 ns/iter (+/- 7,760) = 1335 MB/s +test sherlock::holmes_cochar_watson ... bench: 43,459 ns/iter (+/- 901) = 13689 MB/s +test sherlock::holmes_coword_watson ... bench: 335,772 ns/iter (+/- 6,348) = 1771 MB/s +test sherlock::ing_suffix ... bench: 153,546 ns/iter (+/- 3,075) = 3874 MB/s +test sherlock::ing_suffix_limited_space ... bench: 777,388 ns/iter (+/- 8,447) = 765 MB/s +test sherlock::letters ... bench: 10,123,374 ns/iter (+/- 90,059) = 58 MB/s +test sherlock::letters_lower ... bench: 9,957,916 ns/iter (+/- 63,766) = 59 MB/s +test sherlock::letters_upper ... bench: 1,123,119 ns/iter (+/- 17,972) = 529 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 694,714 ns/iter (+/- 7,006) = 856 MB/s +test sherlock::name_alt1 ... bench: 13,427 ns/iter (+/- 331) = 44308 MB/s +test sherlock::name_alt2 ... bench: 33,171 ns/iter (+/- 1,029) = 17935 MB/s +test sherlock::name_alt3 ... bench: 36,816 ns/iter (+/- 1,138) = 16159 MB/s +test sherlock::name_alt3_nocase ... bench: 221,185 ns/iter (+/- 3,268) = 2689 MB/s +test sherlock::name_alt4 ... bench: 49,883 ns/iter (+/- 1,150) = 11926 MB/s +test sherlock::name_alt4_nocase ... bench: 74,967 ns/iter (+/- 1,807) = 7935 MB/s +test sherlock::name_alt5 ... bench: 34,675 ns/iter (+/- 1,335) = 17157 MB/s +test sherlock::name_alt5_nocase ... bench: 192,109 ns/iter (+/- 6,194) = 3096 MB/s +test sherlock::name_holmes ... bench: 18,355 ns/iter (+/- 389) = 32412 MB/s +test sherlock::name_holmes_nocase ... bench: 58,179 ns/iter (+/- 917) = 10225 MB/s +test sherlock::name_sherlock ... bench: 14,307 ns/iter (+/- 74) = 41583 MB/s +test sherlock::name_sherlock_holmes ... bench: 14,332 ns/iter (+/- 144) = 41510 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 41,336 ns/iter (+/- 736) = 14392 MB/s +test sherlock::name_sherlock_nocase ... bench: 40,029 ns/iter (+/- 1,393) = 14862 MB/s +test sherlock::name_whitespace ... bench: 17,807 ns/iter (+/- 105) = 33410 MB/s +test sherlock::no_match_common ... bench: 13,625 ns/iter (+/- 15) = 43664 MB/s +test sherlock::no_match_really_common ... bench: 13,818 ns/iter (+/- 282) = 43054 MB/s +test sherlock::no_match_uncommon ... bench: 13,628 ns/iter (+/- 27) = 43655 MB/s +test sherlock::quotes ... bench: 232,910 ns/iter (+/- 1,883) = 2554 MB/s +test sherlock::repeated_class_negation ... bench: 36,892,964 ns/iter (+/- 629,538) = 16 MB/s +test sherlock::the_lower ... bench: 203,077 ns/iter (+/- 2,574) = 2929 MB/s +test sherlock::the_nocase ... bench: 290,781 ns/iter (+/- 6,597) = 2045 MB/s +test sherlock::the_upper ... bench: 22,731 ns/iter (+/- 439) = 26172 MB/s +test sherlock::the_whitespace ... bench: 423,983 ns/iter (+/- 10,849) = 1403 MB/s +test sherlock::word_ending_n ... bench: 1,109,013 ns/iter (+/- 12,645) = 536 MB/s +test sherlock::words ... bench: 4,529,451 ns/iter (+/- 44,285) = 131 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 164.08s + diff --git a/third_party/rust/regex/record/old-bench-log/11-regex-1.7.3/rust-bytes b/third_party/rust/regex/record/old-bench-log/11-regex-1.7.3/rust-bytes new file mode 100644 index 0000000000..e9f750ebdf --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/11-regex-1.7.3/rust-bytes @@ -0,0 +1,112 @@ + +running 107 tests +test misc::anchored_literal_long_match ... bench: 8 ns/iter (+/- 0) = 48750 MB/s +test misc::anchored_literal_long_non_match ... bench: 10 ns/iter (+/- 0) = 39000 MB/s +test misc::anchored_literal_short_match ... bench: 7 ns/iter (+/- 0) = 3714 MB/s +test misc::anchored_literal_short_non_match ... bench: 10 ns/iter (+/- 0) = 2600 MB/s +test misc::easy0_1K ... bench: 7 ns/iter (+/- 0) = 150142 MB/s +test misc::easy0_1MB ... bench: 11 ns/iter (+/- 0) = 95327545 MB/s +test misc::easy0_32 ... bench: 7 ns/iter (+/- 0) = 8428 MB/s +test misc::easy0_32K ... bench: 7 ns/iter (+/- 0) = 4685000 MB/s +test misc::easy1_1K ... bench: 17 ns/iter (+/- 0) = 61411 MB/s +test misc::easy1_1MB ... bench: 20 ns/iter (+/- 0) = 52429800 MB/s +test misc::easy1_32 ... bench: 18 ns/iter (+/- 0) = 2888 MB/s +test misc::easy1_32K ... bench: 18 ns/iter (+/- 0) = 1821555 MB/s +test misc::hard_1K ... bench: 24 ns/iter (+/- 0) = 43791 MB/s +test misc::hard_1MB ... bench: 28 ns/iter (+/- 0) = 37450107 MB/s +test misc::hard_32 ... bench: 24 ns/iter (+/- 0) = 2458 MB/s +test misc::hard_32K ... bench: 24 ns/iter (+/- 0) = 1366458 MB/s +test misc::is_match_set ... bench: 37 ns/iter (+/- 0) = 675 MB/s +test misc::literal ... bench: 7 ns/iter (+/- 0) = 7285 MB/s +test misc::long_needle1 ... bench: 2,186 ns/iter (+/- 19) = 45746 MB/s +test misc::long_needle2 ... bench: 210,378 ns/iter (+/- 61,574) = 475 MB/s +test misc::match_class ... bench: 39 ns/iter (+/- 1) = 2076 MB/s +test misc::match_class_in_range ... bench: 7 ns/iter (+/- 0) = 11571 MB/s +test misc::matches_set ... bench: 176 ns/iter (+/- 12) = 142 MB/s +test misc::medium_1K ... bench: 8 ns/iter (+/- 0) = 131500 MB/s +test misc::medium_1MB ... bench: 12 ns/iter (+/- 0) = 87383666 MB/s +test misc::medium_32 ... bench: 8 ns/iter (+/- 0) = 7500 MB/s +test misc::medium_32K ... bench: 8 ns/iter (+/- 0) = 4099500 MB/s +test misc::no_exponential ... bench: 274 ns/iter (+/- 7) = 364 MB/s +test misc::not_literal ... bench: 53 ns/iter (+/- 0) = 962 MB/s +test misc::one_pass_long_prefix ... bench: 24 ns/iter (+/- 2) = 1083 MB/s +test misc::one_pass_long_prefix_not ... bench: 24 ns/iter (+/- 1) = 1083 MB/s +test misc::one_pass_short ... bench: 16 ns/iter (+/- 1) = 1062 MB/s +test misc::one_pass_short_not ... bench: 19 ns/iter (+/- 0) = 894 MB/s +test misc::reallyhard2_1K ... bench: 38 ns/iter (+/- 5) = 27368 MB/s +test misc::reallyhard_1K ... bench: 1,220 ns/iter (+/- 15) = 861 MB/s +test misc::reallyhard_1MB ... bench: 1,215,297 ns/iter (+/- 5,229) = 862 MB/s +test misc::reallyhard_32 ... bench: 63 ns/iter (+/- 1) = 936 MB/s +test misc::reallyhard_32K ... bench: 38,164 ns/iter (+/- 232) = 859 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 2,353 ns/iter (+/- 8) = 3399 MB/s +test regexdna::find_new_lines ... bench: 7,346,276 ns/iter (+/- 46,149) = 691 MB/s +test regexdna::subst1 ... bench: 486,203 ns/iter (+/- 21,159) = 10455 MB/s +test regexdna::subst10 ... bench: 494,356 ns/iter (+/- 6,423) = 10282 MB/s +test regexdna::subst11 ... bench: 481,930 ns/iter (+/- 19,639) = 10548 MB/s +test regexdna::subst2 ... bench: 486,672 ns/iter (+/- 22,184) = 10445 MB/s +test regexdna::subst3 ... bench: 487,152 ns/iter (+/- 19,776) = 10434 MB/s +test regexdna::subst4 ... bench: 486,534 ns/iter (+/- 23,897) = 10448 MB/s +test regexdna::subst5 ... bench: 481,412 ns/iter (+/- 26,310) = 10559 MB/s +test regexdna::subst6 ... bench: 479,498 ns/iter (+/- 20,310) = 10601 MB/s +test regexdna::subst7 ... bench: 481,960 ns/iter (+/- 18,492) = 10547 MB/s +test regexdna::subst8 ... bench: 482,282 ns/iter (+/- 22,522) = 10540 MB/s +test regexdna::subst9 ... bench: 489,224 ns/iter (+/- 25,264) = 10390 MB/s +test regexdna::variant1 ... bench: 1,470,068 ns/iter (+/- 65,563) = 3457 MB/s +test regexdna::variant2 ... bench: 2,298,112 ns/iter (+/- 27,688) = 2211 MB/s +test regexdna::variant3 ... bench: 2,818,539 ns/iter (+/- 31,432) = 1803 MB/s +test regexdna::variant4 ... bench: 2,786,226 ns/iter (+/- 30,699) = 1824 MB/s +test regexdna::variant5 ... bench: 1,716,429 ns/iter (+/- 20,264) = 2961 MB/s +test regexdna::variant6 ... bench: 1,719,420 ns/iter (+/- 23,944) = 2956 MB/s +test regexdna::variant7 ... bench: 2,391,022 ns/iter (+/- 23,192) = 2126 MB/s +test regexdna::variant8 ... bench: 2,418,744 ns/iter (+/- 44,152) = 2101 MB/s +test regexdna::variant9 ... bench: 2,400,918 ns/iter (+/- 24,041) = 2117 MB/s +test rust_compile::compile_huge ... bench: 57,745 ns/iter (+/- 816) +test rust_compile::compile_huge_bytes ... bench: 3,346,952 ns/iter (+/- 39,488) +test rust_compile::compile_huge_full ... bench: 6,344,293 ns/iter (+/- 53,114) +test rust_compile::compile_simple ... bench: 2,040 ns/iter (+/- 32) +test rust_compile::compile_simple_bytes ... bench: 2,010 ns/iter (+/- 34) +test rust_compile::compile_simple_full ... bench: 9,632 ns/iter (+/- 464) +test rust_compile::compile_small ... bench: 4,445 ns/iter (+/- 77) +test rust_compile::compile_small_bytes ... bench: 83,791 ns/iter (+/- 1,929) +test rust_compile::compile_small_full ... bench: 164,948 ns/iter (+/- 2,595) +test sherlock::before_after_holmes ... bench: 699,996 ns/iter (+/- 6,647) = 849 MB/s +test sherlock::before_holmes ... bench: 28,208 ns/iter (+/- 233) = 21090 MB/s +test sherlock::everything_greedy ... bench: 1,033,048 ns/iter (+/- 9,790) = 575 MB/s +test sherlock::everything_greedy_nl ... bench: 424,081 ns/iter (+/- 22,574) = 1402 MB/s +test sherlock::holmes_cochar_watson ... bench: 43,131 ns/iter (+/- 827) = 13793 MB/s +test sherlock::holmes_coword_watson ... bench: 336,678 ns/iter (+/- 6,985) = 1767 MB/s +test sherlock::ing_suffix ... bench: 153,589 ns/iter (+/- 3,193) = 3873 MB/s +test sherlock::ing_suffix_limited_space ... bench: 776,911 ns/iter (+/- 8,815) = 765 MB/s +test sherlock::letters ... bench: 10,056,702 ns/iter (+/- 49,688) = 59 MB/s +test sherlock::letters_lower ... bench: 9,900,568 ns/iter (+/- 76,118) = 60 MB/s +test sherlock::letters_upper ... bench: 1,120,456 ns/iter (+/- 13,538) = 530 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 693,727 ns/iter (+/- 6,840) = 857 MB/s +test sherlock::name_alt1 ... bench: 11,101 ns/iter (+/- 65) = 53592 MB/s +test sherlock::name_alt2 ... bench: 34,003 ns/iter (+/- 966) = 17496 MB/s +test sherlock::name_alt3 ... bench: 37,975 ns/iter (+/- 1,313) = 15666 MB/s +test sherlock::name_alt3_nocase ... bench: 214,299 ns/iter (+/- 3,026) = 2776 MB/s +test sherlock::name_alt4 ... bench: 50,551 ns/iter (+/- 1,377) = 11768 MB/s +test sherlock::name_alt4_nocase ... bench: 74,713 ns/iter (+/- 1,359) = 7962 MB/s +test sherlock::name_alt5 ... bench: 35,426 ns/iter (+/- 625) = 16793 MB/s +test sherlock::name_alt5_nocase ... bench: 190,521 ns/iter (+/- 4,903) = 3122 MB/s +test sherlock::name_holmes ... bench: 18,070 ns/iter (+/- 763) = 32923 MB/s +test sherlock::name_holmes_nocase ... bench: 58,454 ns/iter (+/- 1,228) = 10177 MB/s +test sherlock::name_sherlock ... bench: 14,380 ns/iter (+/- 227) = 41372 MB/s +test sherlock::name_sherlock_holmes ... bench: 14,491 ns/iter (+/- 116) = 41055 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 40,722 ns/iter (+/- 231) = 14609 MB/s +test sherlock::name_sherlock_nocase ... bench: 39,937 ns/iter (+/- 623) = 14896 MB/s +test sherlock::name_whitespace ... bench: 17,979 ns/iter (+/- 140) = 33090 MB/s +test sherlock::no_match_common ... bench: 13,650 ns/iter (+/- 112) = 43584 MB/s +test sherlock::no_match_really_common ... bench: 13,623 ns/iter (+/- 295) = 43671 MB/s +test sherlock::no_match_uncommon ... bench: 13,641 ns/iter (+/- 55) = 43613 MB/s +test sherlock::quotes ... bench: 232,451 ns/iter (+/- 6,555) = 2559 MB/s +test sherlock::repeated_class_negation ... bench: 36,984,199 ns/iter (+/- 623,153) = 16 MB/s +test sherlock::the_lower ... bench: 189,502 ns/iter (+/- 4,870) = 3139 MB/s +test sherlock::the_nocase ... bench: 294,945 ns/iter (+/- 9,381) = 2017 MB/s +test sherlock::the_upper ... bench: 21,591 ns/iter (+/- 680) = 27554 MB/s +test sherlock::the_whitespace ... bench: 424,862 ns/iter (+/- 7,197) = 1400 MB/s +test sherlock::word_ending_n ... bench: 1,126,768 ns/iter (+/- 13,900) = 527 MB/s +test sherlock::words ... bench: 4,517,167 ns/iter (+/- 55,809) = 131 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 107 measured; 0 filtered out; finished in 150.58s + diff --git a/third_party/rust/regex/record/old-bench-log/12-regex-1.8.1/rust b/third_party/rust/regex/record/old-bench-log/12-regex-1.8.1/rust new file mode 100644 index 0000000000..282893e55e --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/12-regex-1.8.1/rust @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 8 ns/iter (+/- 0) = 48750 MB/s +test misc::anchored_literal_long_non_match ... bench: 9 ns/iter (+/- 0) = 43333 MB/s +test misc::anchored_literal_short_match ... bench: 7 ns/iter (+/- 0) = 3714 MB/s +test misc::anchored_literal_short_non_match ... bench: 9 ns/iter (+/- 0) = 2888 MB/s +test misc::easy0_1K ... bench: 24 ns/iter (+/- 1) = 43791 MB/s +test misc::easy0_1MB ... bench: 28 ns/iter (+/- 0) = 37450107 MB/s +test misc::easy0_32 ... bench: 25 ns/iter (+/- 0) = 2360 MB/s +test misc::easy0_32K ... bench: 24 ns/iter (+/- 0) = 1366458 MB/s +test misc::easy1_1K ... bench: 18 ns/iter (+/- 1) = 58000 MB/s +test misc::easy1_1MB ... bench: 21 ns/iter (+/- 0) = 49933142 MB/s +test misc::easy1_32 ... bench: 18 ns/iter (+/- 0) = 2888 MB/s +test misc::easy1_32K ... bench: 18 ns/iter (+/- 0) = 1821555 MB/s +test misc::hard_1K ... bench: 24 ns/iter (+/- 0) = 43791 MB/s +test misc::hard_1MB ... bench: 29 ns/iter (+/- 0) = 36158724 MB/s +test misc::hard_32 ... bench: 24 ns/iter (+/- 0) = 2458 MB/s +test misc::hard_32K ... bench: 24 ns/iter (+/- 0) = 1366458 MB/s +test misc::is_match_set ... bench: 37 ns/iter (+/- 0) = 675 MB/s +test misc::literal ... bench: 7 ns/iter (+/- 0) = 7285 MB/s +test misc::long_needle1 ... bench: 1,802 ns/iter (+/- 6) = 55494 MB/s +test misc::long_needle2 ... bench: 207,353 ns/iter (+/- 165) = 482 MB/s +test misc::match_class ... bench: 41 ns/iter (+/- 2) = 1975 MB/s +test misc::match_class_in_range ... bench: 7 ns/iter (+/- 0) = 11571 MB/s +test misc::match_class_unicode ... bench: 168 ns/iter (+/- 3) = 958 MB/s +test misc::matches_set ... bench: 210 ns/iter (+/- 5) = 119 MB/s +test misc::medium_1K ... bench: 25 ns/iter (+/- 0) = 42080 MB/s +test misc::medium_1MB ... bench: 29 ns/iter (+/- 0) = 36158758 MB/s +test misc::medium_32 ... bench: 25 ns/iter (+/- 0) = 2400 MB/s +test misc::medium_32K ... bench: 25 ns/iter (+/- 0) = 1311840 MB/s +test misc::no_exponential ... bench: 268 ns/iter (+/- 7) = 373 MB/s +test misc::not_literal ... bench: 44 ns/iter (+/- 4) = 1159 MB/s +test misc::one_pass_long_prefix ... bench: 24 ns/iter (+/- 2) = 1083 MB/s +test misc::one_pass_long_prefix_not ... bench: 23 ns/iter (+/- 2) = 1130 MB/s +test misc::one_pass_short ... bench: 16 ns/iter (+/- 0) = 1062 MB/s +test misc::one_pass_short_not ... bench: 19 ns/iter (+/- 0) = 894 MB/s +test misc::reallyhard2_1K ... bench: 38 ns/iter (+/- 1) = 27368 MB/s +test misc::reallyhard_1K ... bench: 1,215 ns/iter (+/- 12) = 865 MB/s +test misc::reallyhard_1MB ... bench: 1,215,907 ns/iter (+/- 6,442) = 862 MB/s +test misc::reallyhard_32 ... bench: 53 ns/iter (+/- 2) = 1113 MB/s +test misc::reallyhard_32K ... bench: 38,162 ns/iter (+/- 464) = 859 MB/s +test misc::replace_all ... bench: 86 ns/iter (+/- 5) +test misc::reverse_suffix_no_quadratic ... bench: 2,355 ns/iter (+/- 470) = 3397 MB/s +test misc::short_haystack_1000000x ... bench: 91,039 ns/iter (+/- 157) = 87874 MB/s +test misc::short_haystack_100000x ... bench: 7,595 ns/iter (+/- 33) = 105333 MB/s +test misc::short_haystack_10000x ... bench: 2,865 ns/iter (+/- 9) = 27927 MB/s +test misc::short_haystack_1000x ... bench: 211 ns/iter (+/- 2) = 37966 MB/s +test misc::short_haystack_100x ... bench: 98 ns/iter (+/- 3) = 8275 MB/s +test misc::short_haystack_10x ... bench: 92 ns/iter (+/- 4) = 989 MB/s +test misc::short_haystack_1x ... bench: 90 ns/iter (+/- 2) = 211 MB/s +test misc::short_haystack_2x ... bench: 88 ns/iter (+/- 3) = 306 MB/s +test misc::short_haystack_3x ... bench: 91 ns/iter (+/- 3) = 384 MB/s +test misc::short_haystack_4x ... bench: 90 ns/iter (+/- 3) = 477 MB/s +test regexdna::find_new_lines ... bench: 7,323,399 ns/iter (+/- 24,661) = 694 MB/s +test regexdna::subst1 ... bench: 473,671 ns/iter (+/- 16,963) = 10731 MB/s +test regexdna::subst10 ... bench: 463,672 ns/iter (+/- 13,433) = 10963 MB/s +test regexdna::subst11 ... bench: 470,891 ns/iter (+/- 28,305) = 10795 MB/s +test regexdna::subst2 ... bench: 469,218 ns/iter (+/- 26,181) = 10833 MB/s +test regexdna::subst3 ... bench: 467,417 ns/iter (+/- 30,700) = 10875 MB/s +test regexdna::subst4 ... bench: 469,373 ns/iter (+/- 17,254) = 10830 MB/s +test regexdna::subst5 ... bench: 467,035 ns/iter (+/- 30,365) = 10884 MB/s +test regexdna::subst6 ... bench: 466,540 ns/iter (+/- 18,283) = 10895 MB/s +test regexdna::subst7 ... bench: 470,291 ns/iter (+/- 23,930) = 10809 MB/s +test regexdna::subst8 ... bench: 466,425 ns/iter (+/- 27,080) = 10898 MB/s +test regexdna::subst9 ... bench: 468,192 ns/iter (+/- 17,296) = 10857 MB/s +test regexdna::variant1 ... bench: 653,471 ns/iter (+/- 8,898) = 7779 MB/s +test regexdna::variant2 ... bench: 902,852 ns/iter (+/- 12,549) = 5630 MB/s +test regexdna::variant3 ... bench: 1,158,000 ns/iter (+/- 14,075) = 4389 MB/s +test regexdna::variant4 ... bench: 1,149,520 ns/iter (+/- 13,482) = 4422 MB/s +test regexdna::variant5 ... bench: 1,132,121 ns/iter (+/- 7,624) = 4490 MB/s +test regexdna::variant6 ... bench: 1,069,227 ns/iter (+/- 13,436) = 4754 MB/s +test regexdna::variant7 ... bench: 1,150,436 ns/iter (+/- 28,302) = 4418 MB/s +test regexdna::variant8 ... bench: 1,148,923 ns/iter (+/- 49,063) = 4424 MB/s +test regexdna::variant9 ... bench: 1,190,858 ns/iter (+/- 15,044) = 4268 MB/s +test rust_compile::compile_huge ... bench: 52,168 ns/iter (+/- 827) +test rust_compile::compile_huge_bytes ... bench: 3,330,456 ns/iter (+/- 57,242) +test rust_compile::compile_huge_full ... bench: 6,378,126 ns/iter (+/- 85,019) +test rust_compile::compile_simple ... bench: 2,291 ns/iter (+/- 39) +test rust_compile::compile_simple_bytes ... bench: 2,355 ns/iter (+/- 37) +test rust_compile::compile_simple_full ... bench: 14,581 ns/iter (+/- 103) +test rust_compile::compile_small ... bench: 10,443 ns/iter (+/- 114) +test rust_compile::compile_small_bytes ... bench: 11,269 ns/iter (+/- 150) +test rust_compile::compile_small_full ... bench: 14,746 ns/iter (+/- 212) +test sherlock::before_after_holmes ... bench: 699,736 ns/iter (+/- 6,402) = 850 MB/s +test sherlock::before_holmes ... bench: 28,001 ns/iter (+/- 198) = 21246 MB/s +test sherlock::everything_greedy ... bench: 1,029,174 ns/iter (+/- 33,321) = 578 MB/s +test sherlock::everything_greedy_nl ... bench: 460,103 ns/iter (+/- 23,290) = 1293 MB/s +test sherlock::holmes_cochar_watson ... bench: 57,666 ns/iter (+/- 907) = 10316 MB/s +test sherlock::holmes_coword_watson ... bench: 345,016 ns/iter (+/- 4,672) = 1724 MB/s +test sherlock::ing_suffix ... bench: 150,499 ns/iter (+/- 4,855) = 3953 MB/s +test sherlock::ing_suffix_limited_space ... bench: 777,723 ns/iter (+/- 8,076) = 764 MB/s +test sherlock::letters ... bench: 10,022,203 ns/iter (+/- 77,897) = 59 MB/s +test sherlock::letters_lower ... bench: 9,861,816 ns/iter (+/- 76,172) = 60 MB/s +test sherlock::letters_upper ... bench: 1,134,201 ns/iter (+/- 11,926) = 524 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 693,533 ns/iter (+/- 6,686) = 857 MB/s +test sherlock::name_alt1 ... bench: 11,974 ns/iter (+/- 292) = 49685 MB/s +test sherlock::name_alt2 ... bench: 44,708 ns/iter (+/- 573) = 13307 MB/s +test sherlock::name_alt3 ... bench: 49,873 ns/iter (+/- 785) = 11928 MB/s +test sherlock::name_alt3_nocase ... bench: 190,194 ns/iter (+/- 2,944) = 3128 MB/s +test sherlock::name_alt4 ... bench: 52,028 ns/iter (+/- 1,102) = 11434 MB/s +test sherlock::name_alt4_nocase ... bench: 119,891 ns/iter (+/- 921) = 4962 MB/s +test sherlock::name_alt5 ... bench: 47,139 ns/iter (+/- 1,617) = 12620 MB/s +test sherlock::name_alt5_nocase ... bench: 200,159 ns/iter (+/- 3,992) = 2972 MB/s +test sherlock::name_holmes ... bench: 17,902 ns/iter (+/- 577) = 33232 MB/s +test sherlock::name_holmes_nocase ... bench: 58,219 ns/iter (+/- 1,215) = 10218 MB/s +test sherlock::name_sherlock ... bench: 14,314 ns/iter (+/- 45) = 41563 MB/s +test sherlock::name_sherlock_holmes ... bench: 14,399 ns/iter (+/- 45) = 41317 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 41,418 ns/iter (+/- 591) = 14364 MB/s +test sherlock::name_sherlock_nocase ... bench: 39,877 ns/iter (+/- 545) = 14919 MB/s +test sherlock::name_whitespace ... bench: 17,883 ns/iter (+/- 151) = 33268 MB/s +test sherlock::no_match_common ... bench: 13,696 ns/iter (+/- 123) = 43438 MB/s +test sherlock::no_match_really_common ... bench: 10,157 ns/iter (+/- 222) = 58573 MB/s +test sherlock::no_match_uncommon ... bench: 13,663 ns/iter (+/- 53) = 43543 MB/s +test sherlock::quotes ... bench: 234,890 ns/iter (+/- 4,574) = 2532 MB/s +test sherlock::repeated_class_negation ... bench: 36,406,680 ns/iter (+/- 397,378) = 16 MB/s +test sherlock::the_lower ... bench: 192,028 ns/iter (+/- 5,315) = 3098 MB/s +test sherlock::the_nocase ... bench: 311,087 ns/iter (+/- 6,723) = 1912 MB/s +test sherlock::the_upper ... bench: 21,710 ns/iter (+/- 1,269) = 27403 MB/s +test sherlock::the_whitespace ... bench: 425,246 ns/iter (+/- 7,741) = 1399 MB/s +test sherlock::word_ending_n ... bench: 1,116,412 ns/iter (+/- 11,753) = 532 MB/s +test sherlock::words ... bench: 4,452,805 ns/iter (+/- 84,309) = 133 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 142.33s + diff --git a/third_party/rust/regex/record/old-bench-log/12-regex-1.8.1/rust-bytes b/third_party/rust/regex/record/old-bench-log/12-regex-1.8.1/rust-bytes new file mode 100644 index 0000000000..f5380a765d --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/12-regex-1.8.1/rust-bytes @@ -0,0 +1,112 @@ + +running 107 tests +test misc::anchored_literal_long_match ... bench: 7 ns/iter (+/- 0) = 55714 MB/s +test misc::anchored_literal_long_non_match ... bench: 8 ns/iter (+/- 0) = 48750 MB/s +test misc::anchored_literal_short_match ... bench: 7 ns/iter (+/- 0) = 3714 MB/s +test misc::anchored_literal_short_non_match ... bench: 8 ns/iter (+/- 0) = 3250 MB/s +test misc::easy0_1K ... bench: 24 ns/iter (+/- 0) = 43791 MB/s +test misc::easy0_1MB ... bench: 28 ns/iter (+/- 0) = 37450107 MB/s +test misc::easy0_32 ... bench: 24 ns/iter (+/- 0) = 2458 MB/s +test misc::easy0_32K ... bench: 24 ns/iter (+/- 0) = 1366458 MB/s +test misc::easy1_1K ... bench: 18 ns/iter (+/- 0) = 58000 MB/s +test misc::easy1_1MB ... bench: 21 ns/iter (+/- 0) = 49933142 MB/s +test misc::easy1_32 ... bench: 18 ns/iter (+/- 2) = 2888 MB/s +test misc::easy1_32K ... bench: 18 ns/iter (+/- 0) = 1821555 MB/s +test misc::hard_1K ... bench: 24 ns/iter (+/- 0) = 43791 MB/s +test misc::hard_1MB ... bench: 28 ns/iter (+/- 0) = 37450107 MB/s +test misc::hard_32 ... bench: 24 ns/iter (+/- 0) = 2458 MB/s +test misc::hard_32K ... bench: 24 ns/iter (+/- 0) = 1366458 MB/s +test misc::is_match_set ... bench: 37 ns/iter (+/- 0) = 675 MB/s +test misc::literal ... bench: 7 ns/iter (+/- 0) = 7285 MB/s +test misc::long_needle1 ... bench: 1,801 ns/iter (+/- 2) = 55525 MB/s +test misc::long_needle2 ... bench: 212,892 ns/iter (+/- 206) = 469 MB/s +test misc::match_class ... bench: 40 ns/iter (+/- 0) = 2025 MB/s +test misc::match_class_in_range ... bench: 7 ns/iter (+/- 0) = 11571 MB/s +test misc::matches_set ... bench: 174 ns/iter (+/- 2) = 143 MB/s +test misc::medium_1K ... bench: 25 ns/iter (+/- 0) = 42080 MB/s +test misc::medium_1MB ... bench: 29 ns/iter (+/- 0) = 36158758 MB/s +test misc::medium_32 ... bench: 25 ns/iter (+/- 0) = 2400 MB/s +test misc::medium_32K ... bench: 25 ns/iter (+/- 0) = 1311840 MB/s +test misc::no_exponential ... bench: 270 ns/iter (+/- 8) = 370 MB/s +test misc::not_literal ... bench: 44 ns/iter (+/- 1) = 1159 MB/s +test misc::one_pass_long_prefix ... bench: 23 ns/iter (+/- 0) = 1130 MB/s +test misc::one_pass_long_prefix_not ... bench: 23 ns/iter (+/- 0) = 1130 MB/s +test misc::one_pass_short ... bench: 16 ns/iter (+/- 1) = 1062 MB/s +test misc::one_pass_short_not ... bench: 19 ns/iter (+/- 0) = 894 MB/s +test misc::reallyhard2_1K ... bench: 38 ns/iter (+/- 2) = 27368 MB/s +test misc::reallyhard_1K ... bench: 1,215 ns/iter (+/- 15) = 865 MB/s +test misc::reallyhard_1MB ... bench: 1,217,631 ns/iter (+/- 11,216) = 861 MB/s +test misc::reallyhard_32 ... bench: 53 ns/iter (+/- 4) = 1113 MB/s +test misc::reallyhard_32K ... bench: 38,251 ns/iter (+/- 364) = 857 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 2,353 ns/iter (+/- 4) = 3399 MB/s +test regexdna::find_new_lines ... bench: 7,322,463 ns/iter (+/- 37,966) = 694 MB/s +test regexdna::subst1 ... bench: 466,849 ns/iter (+/- 12,252) = 10888 MB/s +test regexdna::subst10 ... bench: 465,011 ns/iter (+/- 19,693) = 10931 MB/s +test regexdna::subst11 ... bench: 457,806 ns/iter (+/- 13,453) = 11103 MB/s +test regexdna::subst2 ... bench: 456,878 ns/iter (+/- 32,828) = 11126 MB/s +test regexdna::subst3 ... bench: 465,531 ns/iter (+/- 21,786) = 10919 MB/s +test regexdna::subst4 ... bench: 454,553 ns/iter (+/- 12,698) = 11183 MB/s +test regexdna::subst5 ... bench: 456,977 ns/iter (+/- 13,155) = 11123 MB/s +test regexdna::subst6 ... bench: 466,105 ns/iter (+/- 15,667) = 10906 MB/s +test regexdna::subst7 ... bench: 462,655 ns/iter (+/- 18,871) = 10987 MB/s +test regexdna::subst8 ... bench: 456,642 ns/iter (+/- 19,218) = 11132 MB/s +test regexdna::subst9 ... bench: 456,307 ns/iter (+/- 15,369) = 11140 MB/s +test regexdna::variant1 ... bench: 655,033 ns/iter (+/- 7,901) = 7760 MB/s +test regexdna::variant2 ... bench: 902,675 ns/iter (+/- 15,165) = 5631 MB/s +test regexdna::variant3 ... bench: 1,159,521 ns/iter (+/- 14,489) = 4384 MB/s +test regexdna::variant4 ... bench: 1,147,781 ns/iter (+/- 16,536) = 4428 MB/s +test regexdna::variant5 ... bench: 1,133,068 ns/iter (+/- 13,938) = 4486 MB/s +test regexdna::variant6 ... bench: 1,061,174 ns/iter (+/- 14,478) = 4790 MB/s +test regexdna::variant7 ... bench: 1,151,637 ns/iter (+/- 35,753) = 4414 MB/s +test regexdna::variant8 ... bench: 1,137,068 ns/iter (+/- 37,678) = 4470 MB/s +test regexdna::variant9 ... bench: 1,185,082 ns/iter (+/- 14,355) = 4289 MB/s +test rust_compile::compile_huge ... bench: 66,894 ns/iter (+/- 2,425) +test rust_compile::compile_huge_bytes ... bench: 3,331,663 ns/iter (+/- 47,261) +test rust_compile::compile_huge_full ... bench: 6,446,254 ns/iter (+/- 65,334) +test rust_compile::compile_simple ... bench: 2,351 ns/iter (+/- 71) +test rust_compile::compile_simple_bytes ... bench: 2,350 ns/iter (+/- 49) +test rust_compile::compile_simple_full ... bench: 14,460 ns/iter (+/- 144) +test rust_compile::compile_small ... bench: 10,350 ns/iter (+/- 120) +test rust_compile::compile_small_bytes ... bench: 10,993 ns/iter (+/- 89) +test rust_compile::compile_small_full ... bench: 14,201 ns/iter (+/- 139) +test sherlock::before_after_holmes ... bench: 698,092 ns/iter (+/- 6,907) = 852 MB/s +test sherlock::before_holmes ... bench: 29,127 ns/iter (+/- 1,001) = 20425 MB/s +test sherlock::everything_greedy ... bench: 1,026,902 ns/iter (+/- 86,299) = 579 MB/s +test sherlock::everything_greedy_nl ... bench: 433,157 ns/iter (+/- 10,129) = 1373 MB/s +test sherlock::holmes_cochar_watson ... bench: 57,103 ns/iter (+/- 509) = 10418 MB/s +test sherlock::holmes_coword_watson ... bench: 344,973 ns/iter (+/- 3,288) = 1724 MB/s +test sherlock::ing_suffix ... bench: 158,337 ns/iter (+/- 2,492) = 3757 MB/s +test sherlock::ing_suffix_limited_space ... bench: 776,703 ns/iter (+/- 8,000) = 765 MB/s +test sherlock::letters ... bench: 10,179,909 ns/iter (+/- 55,188) = 58 MB/s +test sherlock::letters_lower ... bench: 10,007,465 ns/iter (+/- 75,168) = 59 MB/s +test sherlock::letters_upper ... bench: 1,116,201 ns/iter (+/- 11,571) = 532 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 693,124 ns/iter (+/- 6,540) = 858 MB/s +test sherlock::name_alt1 ... bench: 12,079 ns/iter (+/- 192) = 49253 MB/s +test sherlock::name_alt2 ... bench: 44,336 ns/iter (+/- 1,424) = 13418 MB/s +test sherlock::name_alt3 ... bench: 49,569 ns/iter (+/- 721) = 12002 MB/s +test sherlock::name_alt3_nocase ... bench: 189,812 ns/iter (+/- 2,952) = 3134 MB/s +test sherlock::name_alt4 ... bench: 52,132 ns/iter (+/- 1,182) = 11412 MB/s +test sherlock::name_alt4_nocase ... bench: 120,591 ns/iter (+/- 2,521) = 4933 MB/s +test sherlock::name_alt5 ... bench: 46,956 ns/iter (+/- 545) = 12670 MB/s +test sherlock::name_alt5_nocase ... bench: 199,252 ns/iter (+/- 2,212) = 2985 MB/s +test sherlock::name_holmes ... bench: 17,983 ns/iter (+/- 591) = 33083 MB/s +test sherlock::name_holmes_nocase ... bench: 58,139 ns/iter (+/- 919) = 10232 MB/s +test sherlock::name_sherlock ... bench: 14,283 ns/iter (+/- 113) = 41653 MB/s +test sherlock::name_sherlock_holmes ... bench: 14,587 ns/iter (+/- 82) = 40785 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 40,947 ns/iter (+/- 385) = 14529 MB/s +test sherlock::name_sherlock_nocase ... bench: 39,607 ns/iter (+/- 567) = 15020 MB/s +test sherlock::name_whitespace ... bench: 18,803 ns/iter (+/- 232) = 31640 MB/s +test sherlock::no_match_common ... bench: 13,704 ns/iter (+/- 73) = 43413 MB/s +test sherlock::no_match_really_common ... bench: 14,166 ns/iter (+/- 191) = 41997 MB/s +test sherlock::no_match_uncommon ... bench: 13,702 ns/iter (+/- 36) = 43419 MB/s +test sherlock::quotes ... bench: 232,609 ns/iter (+/- 3,217) = 2557 MB/s +test sherlock::repeated_class_negation ... bench: 36,167,769 ns/iter (+/- 592,579) = 16 MB/s +test sherlock::the_lower ... bench: 188,281 ns/iter (+/- 2,966) = 3159 MB/s +test sherlock::the_nocase ... bench: 312,853 ns/iter (+/- 23,145) = 1901 MB/s +test sherlock::the_upper ... bench: 20,987 ns/iter (+/- 909) = 28347 MB/s +test sherlock::the_whitespace ... bench: 427,154 ns/iter (+/- 6,396) = 1392 MB/s +test sherlock::word_ending_n ... bench: 1,112,964 ns/iter (+/- 15,393) = 534 MB/s +test sherlock::words ... bench: 4,513,468 ns/iter (+/- 35,410) = 131 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 107 measured; 0 filtered out; finished in 143.96s + diff --git a/third_party/rust/regex/record/old-bench-log/13-regex-1.9.0/rust b/third_party/rust/regex/record/old-bench-log/13-regex-1.9.0/rust new file mode 100644 index 0000000000..b46bdf9152 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/13-regex-1.9.0/rust @@ -0,0 +1,115 @@ + +running 110 tests +test misc::anchored_literal_long_match ... bench: 15 ns/iter (+/- 0) = 26000 MB/s +test misc::anchored_literal_long_non_match ... bench: 12 ns/iter (+/- 1) = 32500 MB/s +test misc::anchored_literal_short_match ... bench: 15 ns/iter (+/- 0) = 1733 MB/s +test misc::anchored_literal_short_non_match ... bench: 12 ns/iter (+/- 1) = 2166 MB/s +test misc::easy0_1K ... bench: 42 ns/iter (+/- 0) = 25023 MB/s +test misc::easy0_1MB ... bench: 42 ns/iter (+/- 0) = 24966738 MB/s +test misc::easy0_32 ... bench: 42 ns/iter (+/- 0) = 1404 MB/s +test misc::easy0_32K ... bench: 43 ns/iter (+/- 0) = 762674 MB/s +test misc::easy1_1K ... bench: 35 ns/iter (+/- 0) = 29828 MB/s +test misc::easy1_1MB ... bench: 35 ns/iter (+/- 0) = 29959885 MB/s +test misc::easy1_32 ... bench: 35 ns/iter (+/- 0) = 1485 MB/s +test misc::easy1_32K ... bench: 35 ns/iter (+/- 0) = 936800 MB/s +test misc::hard_1K ... bench: 43 ns/iter (+/- 0) = 24441 MB/s +test misc::hard_1MB ... bench: 42 ns/iter (+/- 0) = 24966738 MB/s +test misc::hard_32 ... bench: 42 ns/iter (+/- 0) = 1404 MB/s +test misc::hard_32K ... bench: 42 ns/iter (+/- 0) = 780833 MB/s +test misc::is_match_set ... bench: 46 ns/iter (+/- 1) = 543 MB/s +test misc::literal ... bench: 9 ns/iter (+/- 0) = 5666 MB/s +test misc::long_needle1 ... bench: 1,801 ns/iter (+/- 24) = 55525 MB/s +test misc::long_needle2 ... bench: 194,124 ns/iter (+/- 289) = 515 MB/s +test misc::match_class ... bench: 22 ns/iter (+/- 1) = 3681 MB/s +test misc::match_class_in_range ... bench: 10 ns/iter (+/- 0) = 8100 MB/s +test misc::match_class_unicode ... bench: 196 ns/iter (+/- 0) = 821 MB/s +test misc::matches_set ... bench: 55 ns/iter (+/- 3) = 454 MB/s +test misc::medium_1K ... bench: 43 ns/iter (+/- 0) = 24465 MB/s +test misc::medium_1MB ... bench: 43 ns/iter (+/- 0) = 24386139 MB/s +test misc::medium_32 ... bench: 43 ns/iter (+/- 0) = 1395 MB/s +test misc::medium_32K ... bench: 43 ns/iter (+/- 0) = 762697 MB/s +test misc::no_exponential ... bench: 167 ns/iter (+/- 0) = 598 MB/s +test misc::not_literal ... bench: 26 ns/iter (+/- 1) = 1961 MB/s +test misc::one_pass_long_prefix ... bench: 40 ns/iter (+/- 0) = 650 MB/s +test misc::one_pass_long_prefix_not ... bench: 40 ns/iter (+/- 0) = 650 MB/s +test misc::one_pass_short ... bench: 30 ns/iter (+/- 0) = 566 MB/s +test misc::one_pass_short_not ... bench: 31 ns/iter (+/- 0) = 548 MB/s +test misc::reallyhard2_1K ... bench: 67 ns/iter (+/- 1) = 15522 MB/s +test misc::reallyhard_1K ... bench: 78 ns/iter (+/- 1) = 13474 MB/s +test misc::reallyhard_1MB ... bench: 19,310 ns/iter (+/- 80) = 54303 MB/s +test misc::reallyhard_32 ... bench: 62 ns/iter (+/- 2) = 951 MB/s +test misc::reallyhard_32K ... bench: 543 ns/iter (+/- 4) = 60395 MB/s +test misc::replace_all ... bench: 151 ns/iter (+/- 13) +test misc::reverse_suffix_no_quadratic ... bench: 9,302 ns/iter (+/- 25) = 860 MB/s +test misc::short_haystack_1000000x ... bench: 90,868 ns/iter (+/- 354) = 88039 MB/s +test misc::short_haystack_100000x ... bench: 7,215 ns/iter (+/- 18) = 110881 MB/s +test misc::short_haystack_10000x ... bench: 605 ns/iter (+/- 2) = 132249 MB/s +test misc::short_haystack_1000x ... bench: 148 ns/iter (+/- 2) = 54128 MB/s +test misc::short_haystack_100x ... bench: 83 ns/iter (+/- 3) = 9771 MB/s +test misc::short_haystack_10x ... bench: 89 ns/iter (+/- 1) = 1022 MB/s +test misc::short_haystack_1x ... bench: 79 ns/iter (+/- 1) = 240 MB/s +test misc::short_haystack_2x ... bench: 79 ns/iter (+/- 1) = 341 MB/s +test misc::short_haystack_3x ... bench: 80 ns/iter (+/- 2) = 437 MB/s +test misc::short_haystack_4x ... bench: 79 ns/iter (+/- 1) = 544 MB/s +test regexdna::find_new_lines ... bench: 1,748,215 ns/iter (+/- 25,793) = 2907 MB/s +test regexdna::subst1 ... bench: 486,169 ns/iter (+/- 11,425) = 10456 MB/s +test regexdna::subst10 ... bench: 479,019 ns/iter (+/- 7,468) = 10612 MB/s +test regexdna::subst11 ... bench: 481,118 ns/iter (+/- 10,305) = 10565 MB/s +test regexdna::subst2 ... bench: 484,508 ns/iter (+/- 11,753) = 10491 MB/s +test regexdna::subst3 ... bench: 481,861 ns/iter (+/- 7,991) = 10549 MB/s +test regexdna::subst4 ... bench: 477,043 ns/iter (+/- 12,101) = 10656 MB/s +test regexdna::subst5 ... bench: 483,954 ns/iter (+/- 7,728) = 10503 MB/s +test regexdna::subst6 ... bench: 479,564 ns/iter (+/- 13,514) = 10600 MB/s +test regexdna::subst7 ... bench: 481,345 ns/iter (+/- 11,205) = 10560 MB/s +test regexdna::subst8 ... bench: 479,772 ns/iter (+/- 13,266) = 10595 MB/s +test regexdna::subst9 ... bench: 480,299 ns/iter (+/- 9,997) = 10583 MB/s +test regexdna::variant1 ... bench: 693,230 ns/iter (+/- 21,808) = 7332 MB/s +test regexdna::variant2 ... bench: 936,552 ns/iter (+/- 9,916) = 5427 MB/s +test regexdna::variant3 ... bench: 1,192,921 ns/iter (+/- 11,038) = 4261 MB/s +test regexdna::variant4 ... bench: 1,170,341 ns/iter (+/- 27,745) = 4343 MB/s +test regexdna::variant5 ... bench: 1,166,877 ns/iter (+/- 8,369) = 4356 MB/s +test regexdna::variant6 ... bench: 1,085,919 ns/iter (+/- 9,594) = 4681 MB/s +test regexdna::variant7 ... bench: 1,248,718 ns/iter (+/- 13,480) = 4070 MB/s +test regexdna::variant8 ... bench: 1,216,643 ns/iter (+/- 15,505) = 4178 MB/s +test regexdna::variant9 ... bench: 1,219,951 ns/iter (+/- 14,109) = 4166 MB/s +test sherlock::before_after_holmes ... bench: 27,363 ns/iter (+/- 604) = 21742 MB/s +test sherlock::before_holmes ... bench: 31,147 ns/iter (+/- 876) = 19100 MB/s +test sherlock::everything_greedy ... bench: 1,326,354 ns/iter (+/- 22,628) = 448 MB/s +test sherlock::everything_greedy_nl ... bench: 801,343 ns/iter (+/- 895) = 742 MB/s +test sherlock::holmes_cochar_watson ... bench: 56,328 ns/iter (+/- 1,009) = 10561 MB/s +test sherlock::holmes_coword_watson ... bench: 301,186 ns/iter (+/- 3,615) = 1975 MB/s +test sherlock::ing_suffix ... bench: 176,428 ns/iter (+/- 2,182) = 3372 MB/s +test sherlock::ing_suffix_limited_space ... bench: 173,948 ns/iter (+/- 5,073) = 3420 MB/s +test sherlock::letters ... bench: 7,226,608 ns/iter (+/- 261,849) = 82 MB/s +test sherlock::letters_lower ... bench: 7,024,589 ns/iter (+/- 145,281) = 84 MB/s +test sherlock::letters_upper ... bench: 1,004,841 ns/iter (+/- 6,857) = 592 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 15,978 ns/iter (+/- 90) = 37234 MB/s +test sherlock::name_alt1 ... bench: 11,151 ns/iter (+/- 289) = 53352 MB/s +test sherlock::name_alt2 ... bench: 45,441 ns/iter (+/- 960) = 13092 MB/s +test sherlock::name_alt3 ... bench: 51,934 ns/iter (+/- 806) = 11455 MB/s +test sherlock::name_alt3_nocase ... bench: 171,844 ns/iter (+/- 4,176) = 3462 MB/s +test sherlock::name_alt4 ... bench: 46,611 ns/iter (+/- 1,072) = 12763 MB/s +test sherlock::name_alt4_nocase ... bench: 74,956 ns/iter (+/- 2,098) = 7937 MB/s +test sherlock::name_alt5 ... bench: 47,595 ns/iter (+/- 595) = 12499 MB/s +test sherlock::name_alt5_nocase ... bench: 100,636 ns/iter (+/- 814) = 5911 MB/s +test sherlock::name_holmes ... bench: 19,293 ns/iter (+/- 687) = 30836 MB/s +test sherlock::name_holmes_nocase ... bench: 52,310 ns/iter (+/- 1,024) = 11373 MB/s +test sherlock::name_sherlock ... bench: 16,080 ns/iter (+/- 327) = 36998 MB/s +test sherlock::name_sherlock_holmes ... bench: 14,605 ns/iter (+/- 120) = 40734 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 38,662 ns/iter (+/- 360) = 15388 MB/s +test sherlock::name_sherlock_nocase ... bench: 37,650 ns/iter (+/- 316) = 15801 MB/s +test sherlock::name_whitespace ... bench: 16,234 ns/iter (+/- 125) = 36647 MB/s +test sherlock::no_match_common ... bench: 13,709 ns/iter (+/- 72) = 43397 MB/s +test sherlock::no_match_really_common ... bench: 9,870 ns/iter (+/- 133) = 60276 MB/s +test sherlock::no_match_uncommon ... bench: 13,735 ns/iter (+/- 57) = 43315 MB/s +test sherlock::quotes ... bench: 189,377 ns/iter (+/- 2,105) = 3141 MB/s +test sherlock::repeated_class_negation ... bench: 29,934 ns/iter (+/- 1,249) = 19874 MB/s +test sherlock::the_lower ... bench: 213,236 ns/iter (+/- 3,823) = 2790 MB/s +test sherlock::the_nocase ... bench: 322,922 ns/iter (+/- 5,946) = 1842 MB/s +test sherlock::the_upper ... bench: 23,494 ns/iter (+/- 718) = 25322 MB/s +test sherlock::the_whitespace ... bench: 392,113 ns/iter (+/- 6,046) = 1517 MB/s +test sherlock::word_ending_n ... bench: 673,618 ns/iter (+/- 12,865) = 883 MB/s +test sherlock::words ... bench: 3,632,096 ns/iter (+/- 56,944) = 163 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 110 measured; 0 filtered out; finished in 117.87s + diff --git a/third_party/rust/regex/record/old-bench-log/13-regex-1.9.0/rust-bytes b/third_party/rust/regex/record/old-bench-log/13-regex-1.9.0/rust-bytes new file mode 100644 index 0000000000..8ac6c046c8 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/13-regex-1.9.0/rust-bytes @@ -0,0 +1,103 @@ + +running 98 tests +test misc::anchored_literal_long_match ... bench: 15 ns/iter (+/- 0) = 26000 MB/s +test misc::anchored_literal_long_non_match ... bench: 12 ns/iter (+/- 0) = 32500 MB/s +test misc::anchored_literal_short_match ... bench: 15 ns/iter (+/- 0) = 1733 MB/s +test misc::anchored_literal_short_non_match ... bench: 12 ns/iter (+/- 0) = 2166 MB/s +test misc::easy0_1K ... bench: 42 ns/iter (+/- 0) = 25023 MB/s +test misc::easy0_1MB ... bench: 42 ns/iter (+/- 0) = 24966738 MB/s +test misc::easy0_32 ... bench: 42 ns/iter (+/- 0) = 1404 MB/s +test misc::easy0_32K ... bench: 42 ns/iter (+/- 0) = 780833 MB/s +test misc::easy1_1K ... bench: 34 ns/iter (+/- 1) = 30705 MB/s +test misc::easy1_1MB ... bench: 34 ns/iter (+/- 0) = 30841058 MB/s +test misc::easy1_32 ... bench: 34 ns/iter (+/- 0) = 1529 MB/s +test misc::easy1_32K ... bench: 34 ns/iter (+/- 0) = 964352 MB/s +test misc::hard_1K ... bench: 42 ns/iter (+/- 0) = 25023 MB/s +test misc::hard_1MB ... bench: 42 ns/iter (+/- 0) = 24966738 MB/s +test misc::hard_32 ... bench: 42 ns/iter (+/- 0) = 1404 MB/s +test misc::hard_32K ... bench: 42 ns/iter (+/- 0) = 780833 MB/s +test misc::is_match_set ... bench: 47 ns/iter (+/- 1) = 531 MB/s +test misc::literal ... bench: 10 ns/iter (+/- 0) = 5100 MB/s +test misc::long_needle1 ... bench: 1,808 ns/iter (+/- 7) = 55310 MB/s +test misc::long_needle2 ... bench: 213,106 ns/iter (+/- 416) = 469 MB/s +test misc::match_class ... bench: 23 ns/iter (+/- 1) = 3521 MB/s +test misc::match_class_in_range ... bench: 11 ns/iter (+/- 0) = 7363 MB/s +test misc::matches_set ... bench: 56 ns/iter (+/- 3) = 446 MB/s +test misc::medium_1K ... bench: 43 ns/iter (+/- 0) = 24465 MB/s +test misc::medium_1MB ... bench: 43 ns/iter (+/- 0) = 24386139 MB/s +test misc::medium_32 ... bench: 43 ns/iter (+/- 0) = 1395 MB/s +test misc::medium_32K ... bench: 43 ns/iter (+/- 0) = 762697 MB/s +test misc::no_exponential ... bench: 162 ns/iter (+/- 4) = 617 MB/s +test misc::not_literal ... bench: 27 ns/iter (+/- 1) = 1888 MB/s +test misc::one_pass_long_prefix ... bench: 41 ns/iter (+/- 0) = 634 MB/s +test misc::one_pass_long_prefix_not ... bench: 41 ns/iter (+/- 0) = 634 MB/s +test misc::one_pass_short ... bench: 30 ns/iter (+/- 0) = 566 MB/s +test misc::one_pass_short_not ... bench: 31 ns/iter (+/- 0) = 548 MB/s +test misc::reallyhard2_1K ... bench: 70 ns/iter (+/- 1) = 14857 MB/s +test misc::reallyhard_1K ... bench: 78 ns/iter (+/- 3) = 13474 MB/s +test misc::reallyhard_1MB ... bench: 19,850 ns/iter (+/- 345) = 52826 MB/s +test misc::reallyhard_32 ... bench: 61 ns/iter (+/- 2) = 967 MB/s +test misc::reallyhard_32K ... bench: 546 ns/iter (+/- 8) = 60064 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 9,304 ns/iter (+/- 29) = 859 MB/s +test regexdna::find_new_lines ... bench: 1,733,767 ns/iter (+/- 66,699) = 2932 MB/s +test regexdna::subst1 ... bench: 486,442 ns/iter (+/- 11,929) = 10450 MB/s +test regexdna::subst10 ... bench: 486,073 ns/iter (+/- 12,157) = 10458 MB/s +test regexdna::subst11 ... bench: 483,485 ns/iter (+/- 11,703) = 10514 MB/s +test regexdna::subst2 ... bench: 487,298 ns/iter (+/- 9,184) = 10431 MB/s +test regexdna::subst3 ... bench: 491,219 ns/iter (+/- 9,614) = 10348 MB/s +test regexdna::subst4 ... bench: 482,668 ns/iter (+/- 9,576) = 10531 MB/s +test regexdna::subst5 ... bench: 489,673 ns/iter (+/- 8,331) = 10381 MB/s +test regexdna::subst6 ... bench: 484,707 ns/iter (+/- 5,276) = 10487 MB/s +test regexdna::subst7 ... bench: 485,109 ns/iter (+/- 9,360) = 10478 MB/s +test regexdna::subst8 ... bench: 485,790 ns/iter (+/- 9,298) = 10464 MB/s +test regexdna::subst9 ... bench: 483,255 ns/iter (+/- 12,434) = 10519 MB/s +test regexdna::variant1 ... bench: 654,757 ns/iter (+/- 8,719) = 7763 MB/s +test regexdna::variant2 ... bench: 905,052 ns/iter (+/- 9,599) = 5616 MB/s +test regexdna::variant3 ... bench: 1,161,187 ns/iter (+/- 13,798) = 4377 MB/s +test regexdna::variant4 ... bench: 1,144,656 ns/iter (+/- 15,198) = 4440 MB/s +test regexdna::variant5 ... bench: 1,136,222 ns/iter (+/- 9,112) = 4473 MB/s +test regexdna::variant6 ... bench: 1,062,124 ns/iter (+/- 12,336) = 4786 MB/s +test regexdna::variant7 ... bench: 1,144,371 ns/iter (+/- 44,700) = 4442 MB/s +test regexdna::variant8 ... bench: 1,143,064 ns/iter (+/- 53,456) = 4447 MB/s +test regexdna::variant9 ... bench: 1,187,063 ns/iter (+/- 14,341) = 4282 MB/s +test sherlock::before_after_holmes ... bench: 27,804 ns/iter (+/- 598) = 21397 MB/s +test sherlock::before_holmes ... bench: 31,197 ns/iter (+/- 933) = 19070 MB/s +test sherlock::everything_greedy ... bench: 1,272,335 ns/iter (+/- 12,466) = 467 MB/s +test sherlock::everything_greedy_nl ... bench: 801,469 ns/iter (+/- 955) = 742 MB/s +test sherlock::holmes_cochar_watson ... bench: 56,790 ns/iter (+/- 1,606) = 10476 MB/s +test sherlock::holmes_coword_watson ... bench: 300,554 ns/iter (+/- 3,460) = 1979 MB/s +test sherlock::ing_suffix ... bench: 179,355 ns/iter (+/- 5,486) = 3317 MB/s +test sherlock::ing_suffix_limited_space ... bench: 175,703 ns/iter (+/- 2,380) = 3386 MB/s +test sherlock::letters ... bench: 7,197,094 ns/iter (+/- 181,502) = 82 MB/s +test sherlock::letters_lower ... bench: 7,100,979 ns/iter (+/- 155,898) = 83 MB/s +test sherlock::letters_upper ... bench: 1,018,217 ns/iter (+/- 21,695) = 584 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 15,931 ns/iter (+/- 140) = 37344 MB/s +test sherlock::name_alt1 ... bench: 10,932 ns/iter (+/- 96) = 54421 MB/s +test sherlock::name_alt2 ... bench: 45,580 ns/iter (+/- 829) = 13052 MB/s +test sherlock::name_alt3 ... bench: 51,942 ns/iter (+/- 1,418) = 11453 MB/s +test sherlock::name_alt3_nocase ... bench: 171,749 ns/iter (+/- 1,451) = 3463 MB/s +test sherlock::name_alt4 ... bench: 45,705 ns/iter (+/- 1,536) = 13016 MB/s +test sherlock::name_alt4_nocase ... bench: 73,782 ns/iter (+/- 1,679) = 8063 MB/s +test sherlock::name_alt5 ... bench: 48,045 ns/iter (+/- 1,261) = 12382 MB/s +test sherlock::name_alt5_nocase ... bench: 100,307 ns/iter (+/- 553) = 5931 MB/s +test sherlock::name_holmes ... bench: 18,916 ns/iter (+/- 662) = 31451 MB/s +test sherlock::name_holmes_nocase ... bench: 52,714 ns/iter (+/- 774) = 11286 MB/s +test sherlock::name_sherlock ... bench: 14,575 ns/iter (+/- 163) = 40818 MB/s +test sherlock::name_sherlock_holmes ... bench: 14,625 ns/iter (+/- 166) = 40679 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 39,024 ns/iter (+/- 361) = 15245 MB/s +test sherlock::name_sherlock_nocase ... bench: 38,025 ns/iter (+/- 418) = 15645 MB/s +test sherlock::name_whitespace ... bench: 16,247 ns/iter (+/- 88) = 36618 MB/s +test sherlock::no_match_common ... bench: 13,724 ns/iter (+/- 28) = 43349 MB/s +test sherlock::no_match_really_common ... bench: 13,798 ns/iter (+/- 93) = 43117 MB/s +test sherlock::no_match_uncommon ... bench: 13,671 ns/iter (+/- 80) = 43517 MB/s +test sherlock::quotes ... bench: 189,359 ns/iter (+/- 2,334) = 3141 MB/s +test sherlock::repeated_class_negation ... bench: 29,083 ns/iter (+/- 708) = 20456 MB/s +test sherlock::the_lower ... bench: 204,122 ns/iter (+/- 4,256) = 2914 MB/s +test sherlock::the_nocase ... bench: 319,388 ns/iter (+/- 6,790) = 1862 MB/s +test sherlock::the_upper ... bench: 22,706 ns/iter (+/- 961) = 26201 MB/s +test sherlock::the_whitespace ... bench: 386,276 ns/iter (+/- 4,950) = 1540 MB/s +test sherlock::word_ending_n ... bench: 690,010 ns/iter (+/- 8,516) = 862 MB/s +test sherlock::words ... bench: 3,659,990 ns/iter (+/- 104,505) = 162 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 98 measured; 0 filtered out; finished in 105.65s + diff --git a/third_party/rust/regex/record/old-bench-log/README.md b/third_party/rust/regex/record/old-bench-log/README.md new file mode 100644 index 0000000000..aab290e8e3 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/README.md @@ -0,0 +1,11 @@ +These represent an old log of benchmarks from regex 1.7.3 and older. New +and much more comprehensive benchmarks are now maintained as part of the +[rebar] project. + +We keep these old benchmark recordings for posterity, but they may be removed +in the future. + +Measurements can be compared using the [`cargo-benchcmp`][cargo-benchcmp] tool. + +[rebar]: https://github.com/BurntSushi/rebar +[cargo-benchcmp]: https://github.com/BurntSushi/cargo-benchcmp diff --git a/third_party/rust/regex/record/old-bench-log/old/01-before b/third_party/rust/regex/record/old-bench-log/old/01-before new file mode 100644 index 0000000000..74890a34fe --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/old/01-before @@ -0,0 +1,28 @@ +test bench::anchored_literal_long_match ... bench: 520 ns/iter (+/- 1) +test bench::anchored_literal_long_non_match ... bench: 236 ns/iter (+/- 2) +test bench::anchored_literal_short_match ... bench: 519 ns/iter (+/- 2) +test bench::anchored_literal_short_non_match ... bench: 238 ns/iter (+/- 2) +test bench::easy0_1K ... bench: 7742 ns/iter (+/- 97) = 132 MB/s +test bench::easy0_32 ... bench: 4989 ns/iter (+/- 20) = 6 MB/s +test bench::easy0_32K ... bench: 96347 ns/iter (+/- 997) = 340 MB/s +test bench::easy1_1K ... bench: 9805 ns/iter (+/- 1846) = 104 MB/s +test bench::easy1_32 ... bench: 4930 ns/iter (+/- 202) = 6 MB/s +test bench::easy1_32K ... bench: 163332 ns/iter (+/- 9207) = 200 MB/s +test bench::hard_1K ... bench: 97455 ns/iter (+/- 1089) = 10 MB/s +test bench::hard_32 ... bench: 8256 ns/iter (+/- 148) = 3 MB/s +test bench::hard_32K ... bench: 2948095 ns/iter (+/- 11988) = 11 MB/s +test bench::literal ... bench: 371 ns/iter (+/- 5) +test bench::match_class ... bench: 2168 ns/iter (+/- 12) +test bench::match_class_in_range ... bench: 2379 ns/iter (+/- 13) +test bench::medium_1K ... bench: 37073 ns/iter (+/- 1100) = 27 MB/s +test bench::medium_32 ... bench: 6183 ns/iter (+/- 218) = 5 MB/s +test bench::medium_32K ... bench: 1032000 ns/iter (+/- 8278) = 31 MB/s +test bench::no_exponential ... bench: 727975 ns/iter (+/- 2970) +test bench::not_literal ... bench: 4670 ns/iter (+/- 29) +test bench::one_pass_long_prefix ... bench: 1562 ns/iter (+/- 24) +test bench::one_pass_long_prefix_not ... bench: 1539 ns/iter (+/- 40) +test bench::one_pass_short_a ... bench: 2688 ns/iter (+/- 21) +test bench::one_pass_short_a_not ... bench: 4197 ns/iter (+/- 36) +test bench::one_pass_short_b ... bench: 2198 ns/iter (+/- 22) +test bench::one_pass_short_b_not ... bench: 3761 ns/iter (+/- 41) +test bench::replace_all ... bench: 2874 ns/iter (+/- 25) diff --git a/third_party/rust/regex/record/old-bench-log/old/02-new-syntax-crate b/third_party/rust/regex/record/old-bench-log/old/02-new-syntax-crate new file mode 100644 index 0000000000..267808ffb7 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/old/02-new-syntax-crate @@ -0,0 +1,28 @@ +test bench::anchored_literal_long_match ... bench: 545 ns/iter (+/- 12) +test bench::anchored_literal_long_non_match ... bench: 251 ns/iter (+/- 11) +test bench::anchored_literal_short_match ... bench: 521 ns/iter (+/- 31) +test bench::anchored_literal_short_non_match ... bench: 231 ns/iter (+/- 0) +test bench::easy0_1K ... bench: 7465 ns/iter (+/- 102) = 137 MB/s +test bench::easy0_32 ... bench: 4995 ns/iter (+/- 27) = 6 MB/s +test bench::easy0_32K ... bench: 86985 ns/iter (+/- 755) = 376 MB/s +test bench::easy1_1K ... bench: 9493 ns/iter (+/- 1727) = 107 MB/s +test bench::easy1_32 ... bench: 4955 ns/iter (+/- 324) = 6 MB/s +test bench::easy1_32K ... bench: 155288 ns/iter (+/- 13016) = 210 MB/s +test bench::hard_1K ... bench: 95925 ns/iter (+/- 1674) = 10 MB/s +test bench::hard_32 ... bench: 8264 ns/iter (+/- 151) = 3 MB/s +test bench::hard_32K ... bench: 2886440 ns/iter (+/- 25807) = 11 MB/s +test bench::literal ... bench: 365 ns/iter (+/- 12) +test bench::match_class ... bench: 2313 ns/iter (+/- 8) +test bench::match_class_in_range ... bench: 2596 ns/iter (+/- 8) +test bench::medium_1K ... bench: 38136 ns/iter (+/- 941) = 26 MB/s +test bench::medium_32 ... bench: 6178 ns/iter (+/- 147) = 5 MB/s +test bench::medium_32K ... bench: 1065698 ns/iter (+/- 6815) = 30 MB/s +test bench::no_exponential ... bench: 682461 ns/iter (+/- 2860) +test bench::not_literal ... bench: 4525 ns/iter (+/- 67) +test bench::one_pass_long_prefix ... bench: 1459 ns/iter (+/- 13) +test bench::one_pass_long_prefix_not ... bench: 1463 ns/iter (+/- 8) +test bench::one_pass_short_a ... bench: 2615 ns/iter (+/- 10) +test bench::one_pass_short_a_not ... bench: 4066 ns/iter (+/- 48) +test bench::one_pass_short_b ... bench: 2064 ns/iter (+/- 10) +test bench::one_pass_short_b_not ... bench: 3502 ns/iter (+/- 24) +test bench::replace_all ... bench: 2949 ns/iter (+/- 15) diff --git a/third_party/rust/regex/record/old-bench-log/old/03-new-syntax-crate b/third_party/rust/regex/record/old-bench-log/old/03-new-syntax-crate new file mode 100644 index 0000000000..a50005d85e --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/old/03-new-syntax-crate @@ -0,0 +1,28 @@ +test bench::anchored_literal_long_match ... bench: 373 ns/iter (+/- 5) +test bench::anchored_literal_long_non_match ... bench: 204 ns/iter (+/- 3) +test bench::anchored_literal_short_match ... bench: 376 ns/iter (+/- 5) +test bench::anchored_literal_short_non_match ... bench: 206 ns/iter (+/- 3) +test bench::easy0_1K ... bench: 9136 ns/iter (+/- 177) = 112 MB/s +test bench::easy0_32 ... bench: 6641 ns/iter (+/- 86) = 4 MB/s +test bench::easy0_32K ... bench: 88826 ns/iter (+/- 1366) = 368 MB/s +test bench::easy1_1K ... bench: 10937 ns/iter (+/- 737) = 93 MB/s +test bench::easy1_32 ... bench: 7366 ns/iter (+/- 219) = 4 MB/s +test bench::easy1_32K ... bench: 122324 ns/iter (+/- 4628) = 267 MB/s +test bench::hard_1K ... bench: 59998 ns/iter (+/- 965) = 17 MB/s +test bench::hard_32 ... bench: 9058 ns/iter (+/- 123) = 3 MB/s +test bench::hard_32K ... bench: 1694326 ns/iter (+/- 27226) = 19 MB/s +test bench::literal ... bench: 336 ns/iter (+/- 6) +test bench::match_class ... bench: 2109 ns/iter (+/- 27) +test bench::match_class_in_range ... bench: 2274 ns/iter (+/- 32) +test bench::medium_1K ... bench: 38317 ns/iter (+/- 1075) = 26 MB/s +test bench::medium_32 ... bench: 7969 ns/iter (+/- 115) = 4 MB/s +test bench::medium_32K ... bench: 1028260 ns/iter (+/- 12905) = 31 MB/s +test bench::no_exponential ... bench: 257719 ns/iter (+/- 4939) +test bench::not_literal ... bench: 1699 ns/iter (+/- 31) +test bench::one_pass_long_prefix ... bench: 750 ns/iter (+/- 9) +test bench::one_pass_long_prefix_not ... bench: 747 ns/iter (+/- 12) +test bench::one_pass_short_a ... bench: 1844 ns/iter (+/- 22) +test bench::one_pass_short_a_not ... bench: 2395 ns/iter (+/- 21) +test bench::one_pass_short_b ... bench: 1270 ns/iter (+/- 26) +test bench::one_pass_short_b_not ... bench: 1869 ns/iter (+/- 25) +test bench::replace_all ... bench: 3124 ns/iter (+/- 53) diff --git a/third_party/rust/regex/record/old-bench-log/old/04-fixed-benchmark b/third_party/rust/regex/record/old-bench-log/old/04-fixed-benchmark new file mode 100644 index 0000000000..1956e98466 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/old/04-fixed-benchmark @@ -0,0 +1,28 @@ +test bench::anchored_literal_long_match ... bench: 373 ns/iter (+/- 5) +test bench::anchored_literal_long_non_match ... bench: 202 ns/iter (+/- 12) +test bench::anchored_literal_short_match ... bench: 380 ns/iter (+/- 135) +test bench::anchored_literal_short_non_match ... bench: 211 ns/iter (+/- 1) +test bench::easy0_1K ... bench: 2,723 ns/iter (+/- 101) = 376 MB/s +test bench::easy0_32 ... bench: 255 ns/iter (+/- 2) = 125 MB/s +test bench::easy0_32K ... bench: 81,845 ns/iter (+/- 598) = 400 MB/s +test bench::easy1_1K ... bench: 3,872 ns/iter (+/- 783) = 264 MB/s +test bench::easy1_32 ... bench: 287 ns/iter (+/- 143) = 111 MB/s +test bench::easy1_32K ... bench: 115,340 ns/iter (+/- 4,717) = 284 MB/s +test bench::hard_1K ... bench: 52,484 ns/iter (+/- 472) = 19 MB/s +test bench::hard_32 ... bench: 1,923 ns/iter (+/- 49) = 16 MB/s +test bench::hard_32K ... bench: 1,710,214 ns/iter (+/- 9,733) = 19 MB/s +test bench::literal ... bench: 337 ns/iter (+/- 13) +test bench::match_class ... bench: 2,141 ns/iter (+/- 7) +test bench::match_class_in_range ... bench: 2,301 ns/iter (+/- 7) +test bench::medium_1K ... bench: 31,696 ns/iter (+/- 961) = 32 MB/s +test bench::medium_32 ... bench: 1,155 ns/iter (+/- 71) = 27 MB/s +test bench::medium_32K ... bench: 1,016,101 ns/iter (+/- 12,090) = 32 MB/s +test bench::no_exponential ... bench: 262,801 ns/iter (+/- 1,332) +test bench::not_literal ... bench: 1,729 ns/iter (+/- 3) +test bench::one_pass_long_prefix ... bench: 779 ns/iter (+/- 4) +test bench::one_pass_long_prefix_not ... bench: 779 ns/iter (+/- 6) +test bench::one_pass_short_a ... bench: 1,943 ns/iter (+/- 10) +test bench::one_pass_short_a_not ... bench: 2,545 ns/iter (+/- 9) +test bench::one_pass_short_b ... bench: 1,364 ns/iter (+/- 4) +test bench::one_pass_short_b_not ... bench: 2,029 ns/iter (+/- 22) +test bench::replace_all ... bench: 3,185 ns/iter (+/- 12) diff --git a/third_party/rust/regex/record/old-bench-log/old/05-thread-caching b/third_party/rust/regex/record/old-bench-log/old/05-thread-caching new file mode 100644 index 0000000000..238f978b39 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/old/05-thread-caching @@ -0,0 +1,29 @@ +test bench::anchored_literal_long_match ... bench: 287 ns/iter (+/- 11) +test bench::anchored_literal_long_non_match ... bench: 111 ns/iter (+/- 0) +test bench::anchored_literal_short_match ... bench: 286 ns/iter (+/- 4) +test bench::anchored_literal_short_non_match ... bench: 114 ns/iter (+/- 0) +test bench::easy0_1K ... bench: 2562 ns/iter (+/- 94) = 399 MB/s +test bench::easy0_32 ... bench: 95 ns/iter (+/- 1) = 336 MB/s +test bench::easy0_32K ... bench: 81755 ns/iter (+/- 576) = 400 MB/s +test bench::easy1_1K ... bench: 3586 ns/iter (+/- 917) = 285 MB/s +test bench::easy1_32 ... bench: 155 ns/iter (+/- 132) = 206 MB/s +test bench::easy1_32K ... bench: 113980 ns/iter (+/- 9331) = 287 MB/s +test bench::hard_1K ... bench: 54573 ns/iter (+/- 565) = 18 MB/s +test bench::hard_32 ... bench: 1806 ns/iter (+/- 44) = 17 MB/s +test bench::hard_32K ... bench: 1754465 ns/iter (+/- 7867) = 18 MB/s +test bench::literal ... bench: 299 ns/iter (+/- 1) +test bench::match_class ... bench: 2399 ns/iter (+/- 23) +test bench::match_class_in_range ... bench: 2142 ns/iter (+/- 8) +test bench::match_class_unicode ... bench: 2804 ns/iter (+/- 9) +test bench::medium_1K ... bench: 29536 ns/iter (+/- 537) = 34 MB/s +test bench::medium_32 ... bench: 962 ns/iter (+/- 59) = 33 MB/s +test bench::medium_32K ... bench: 946483 ns/iter (+/- 7106) = 34 MB/s +test bench::no_exponential ... bench: 274301 ns/iter (+/- 552) +test bench::not_literal ... bench: 2039 ns/iter (+/- 13) +test bench::one_pass_long_prefix ... bench: 573 ns/iter (+/- 3) +test bench::one_pass_long_prefix_not ... bench: 577 ns/iter (+/- 4) +test bench::one_pass_short_a ... bench: 1951 ns/iter (+/- 29) +test bench::one_pass_short_a_not ... bench: 2464 ns/iter (+/- 10) +test bench::one_pass_short_b ... bench: 1301 ns/iter (+/- 6) +test bench::one_pass_short_b_not ... bench: 1785 ns/iter (+/- 6) +test bench::replace_all ... bench: 2168 ns/iter (+/- 152) diff --git a/third_party/rust/regex/record/old-bench-log/old/06-major-dynamic b/third_party/rust/regex/record/old-bench-log/old/06-major-dynamic new file mode 100644 index 0000000000..123efdde31 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/old/06-major-dynamic @@ -0,0 +1,33 @@ +test bench::anchored_literal_long_match ... bench: 206 ns/iter (+/- 7) +test bench::anchored_literal_long_non_match ... bench: 97 ns/iter (+/- 1) +test bench::anchored_literal_short_match ... bench: 193 ns/iter (+/- 1) +test bench::anchored_literal_short_non_match ... bench: 86 ns/iter (+/- 0) +test bench::easy0_1K ... bench: 356 ns/iter (+/- 136) = 2876 MB/s +test bench::easy0_1MB ... bench: 352,434 ns/iter (+/- 7,874) = 2974 MB/s +test bench::easy0_32 ... bench: 72 ns/iter (+/- 21) = 444 MB/s +test bench::easy0_32K ... bench: 11,053 ns/iter (+/- 1,388) = 2964 MB/s +test bench::easy1_1K ... bench: 331 ns/iter (+/- 162) = 3093 MB/s +test bench::easy1_1MB ... bench: 353,723 ns/iter (+/- 6,836) = 2964 MB/s +test bench::easy1_32 ... bench: 73 ns/iter (+/- 20) = 438 MB/s +test bench::easy1_32K ... bench: 10,297 ns/iter (+/- 1,137) = 3182 MB/s +test bench::hard_1K ... bench: 34,951 ns/iter (+/- 171) = 29 MB/s +test bench::hard_1MB ... bench: 63,323,613 ns/iter (+/- 279,582) = 15 MB/s +test bench::hard_32 ... bench: 1,131 ns/iter (+/- 13) = 28 MB/s +test bench::hard_32K ... bench: 1,099,921 ns/iter (+/- 1,338) = 29 MB/s +test bench::literal ... bench: 16 ns/iter (+/- 0) +test bench::match_class ... bench: 188 ns/iter (+/- 0) +test bench::match_class_in_range ... bench: 188 ns/iter (+/- 0) +test bench::match_class_unicode ... bench: 1,940 ns/iter (+/- 10) +test bench::medium_1K ... bench: 5,262 ns/iter (+/- 256) = 194 MB/s +test bench::medium_1MB ... bench: 5,295,539 ns/iter (+/- 9,808) = 197 MB/s +test bench::medium_32 ... bench: 217 ns/iter (+/- 19) = 147 MB/s +test bench::medium_32K ... bench: 169,169 ns/iter (+/- 1,606) = 193 MB/s +test bench::no_exponential ... bench: 293,739 ns/iter (+/- 1,632) +test bench::not_literal ... bench: 1,371 ns/iter (+/- 136) +test bench::one_pass_long_prefix ... bench: 337 ns/iter (+/- 6) +test bench::one_pass_long_prefix_not ... bench: 341 ns/iter (+/- 6) +test bench::one_pass_short_a ... bench: 1,399 ns/iter (+/- 16) +test bench::one_pass_short_a_not ... bench: 1,229 ns/iter (+/- 13) +test bench::one_pass_short_b ... bench: 844 ns/iter (+/- 24) +test bench::one_pass_short_b_not ... bench: 849 ns/iter (+/- 45) +test bench::replace_all ... bench: 579 ns/iter (+/- 3) diff --git a/third_party/rust/regex/record/old-bench-log/old/06-major-macro b/third_party/rust/regex/record/old-bench-log/old/06-major-macro new file mode 100644 index 0000000000..199561dfef --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/old/06-major-macro @@ -0,0 +1,33 @@ +test bench::anchored_literal_long_match ... bench: 225 ns/iter (+/- 22) +test bench::anchored_literal_long_non_match ... bench: 62 ns/iter (+/- 1) +test bench::anchored_literal_short_match ... bench: 225 ns/iter (+/- 1) +test bench::anchored_literal_short_non_match ... bench: 60 ns/iter (+/- 1) +test bench::easy0_1K ... bench: 29,984 ns/iter (+/- 190) = 34 MB/s +test bench::easy0_1MB ... bench: 30,641,690 ns/iter (+/- 110,535) = 33 MB/s +test bench::easy0_32 ... bench: 981 ns/iter (+/- 12) = 32 MB/s +test bench::easy0_32K ... bench: 957,358 ns/iter (+/- 2,633) = 34 MB/s +test bench::easy1_1K ... bench: 29,636 ns/iter (+/- 150) = 34 MB/s +test bench::easy1_1MB ... bench: 30,295,321 ns/iter (+/- 98,181) = 34 MB/s +test bench::easy1_32 ... bench: 971 ns/iter (+/- 30) = 32 MB/s +test bench::easy1_32K ... bench: 947,307 ns/iter (+/- 4,258) = 34 MB/s +test bench::hard_1K ... bench: 54,856 ns/iter (+/- 209) = 18 MB/s +test bench::hard_1MB ... bench: 56,126,571 ns/iter (+/- 224,163) = 17 MB/s +test bench::hard_32 ... bench: 1,776 ns/iter (+/- 23) = 18 MB/s +test bench::hard_32K ... bench: 1,753,833 ns/iter (+/- 54,427) = 18 MB/s +test bench::literal ... bench: 1,516 ns/iter (+/- 6) +test bench::match_class ... bench: 2,429 ns/iter (+/- 11) +test bench::match_class_in_range ... bench: 2,398 ns/iter (+/- 4) +test bench::match_class_unicode ... bench: 12,915 ns/iter (+/- 29) +test bench::medium_1K ... bench: 31,914 ns/iter (+/- 276) = 32 MB/s +test bench::medium_1MB ... bench: 32,617,173 ns/iter (+/- 68,114) = 31 MB/s +test bench::medium_32 ... bench: 1,046 ns/iter (+/- 42) = 30 MB/s +test bench::medium_32K ... bench: 1,019,516 ns/iter (+/- 3,788) = 32 MB/s +test bench::no_exponential ... bench: 303,239 ns/iter (+/- 518) +test bench::not_literal ... bench: 1,756 ns/iter (+/- 115) +test bench::one_pass_long_prefix ... bench: 834 ns/iter (+/- 7) +test bench::one_pass_long_prefix_not ... bench: 858 ns/iter (+/- 15) +test bench::one_pass_short_a ... bench: 1,597 ns/iter (+/- 9) +test bench::one_pass_short_a_not ... bench: 1,950 ns/iter (+/- 21) +test bench::one_pass_short_b ... bench: 1,077 ns/iter (+/- 5) +test bench::one_pass_short_b_not ... bench: 1,596 ns/iter (+/- 9) +test bench::replace_all ... bench: 1,288 ns/iter (+/- 13) diff --git a/third_party/rust/regex/record/old-bench-log/old/07-prefix-improvements b/third_party/rust/regex/record/old-bench-log/old/07-prefix-improvements new file mode 100644 index 0000000000..55477fdcef --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/old/07-prefix-improvements @@ -0,0 +1,33 @@ +test bench::anchored_literal_long_match ... bench: 197 ns/iter (+/- 9) +test bench::anchored_literal_long_non_match ... bench: 95 ns/iter (+/- 1) +test bench::anchored_literal_short_match ... bench: 193 ns/iter (+/- 2) +test bench::anchored_literal_short_non_match ... bench: 85 ns/iter (+/- 2) +test bench::easy0_1K ... bench: 304 ns/iter (+/- 119) = 3368 MB/s +test bench::easy0_1MB ... bench: 281,912 ns/iter (+/- 5,274) = 3719 MB/s +test bench::easy0_32 ... bench: 74 ns/iter (+/- 16) = 432 MB/s +test bench::easy0_32K ... bench: 8,909 ns/iter (+/- 667) = 3678 MB/s +test bench::easy1_1K ... bench: 300 ns/iter (+/- 111) = 3413 MB/s +test bench::easy1_1MB ... bench: 282,250 ns/iter (+/- 5,556) = 3714 MB/s +test bench::easy1_32 ... bench: 98 ns/iter (+/- 17) = 326 MB/s +test bench::easy1_32K ... bench: 8,105 ns/iter (+/- 593) = 4042 MB/s +test bench::hard_1K ... bench: 34,562 ns/iter (+/- 211) = 29 MB/s +test bench::hard_1MB ... bench: 64,510,947 ns/iter (+/- 308,627) = 15 MB/s +test bench::hard_32 ... bench: 1,139 ns/iter (+/- 26) = 28 MB/s +test bench::hard_32K ... bench: 1,102,562 ns/iter (+/- 1,850) = 29 MB/s +test bench::literal ... bench: 15 ns/iter (+/- 0) +test bench::match_class ... bench: 105 ns/iter (+/- 1) +test bench::match_class_in_range ... bench: 105 ns/iter (+/- 1) +test bench::match_class_unicode ... bench: 2,270 ns/iter (+/- 185) +test bench::medium_1K ... bench: 2,262 ns/iter (+/- 73) = 452 MB/s +test bench::medium_1MB ... bench: 2,185,098 ns/iter (+/- 3,007) = 479 MB/s +test bench::medium_32 ... bench: 139 ns/iter (+/- 1) = 230 MB/s +test bench::medium_32K ... bench: 72,320 ns/iter (+/- 193) = 453 MB/s +test bench::no_exponential ... bench: 300,699 ns/iter (+/- 494) +test bench::not_literal ... bench: 1,462 ns/iter (+/- 89) +test bench::one_pass_long_prefix ... bench: 283 ns/iter (+/- 1) +test bench::one_pass_long_prefix_not ... bench: 287 ns/iter (+/- 0) +test bench::one_pass_short_a ... bench: 1,131 ns/iter (+/- 11) +test bench::one_pass_short_a_not ... bench: 1,259 ns/iter (+/- 12) +test bench::one_pass_short_b ... bench: 883 ns/iter (+/- 15) +test bench::one_pass_short_b_not ... bench: 799 ns/iter (+/- 28) +test bench::replace_all ... bench: 170 ns/iter (+/- 1) diff --git a/third_party/rust/regex/record/old-bench-log/old/08-case-fixes b/third_party/rust/regex/record/old-bench-log/old/08-case-fixes new file mode 100644 index 0000000000..7609f6c940 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/old/08-case-fixes @@ -0,0 +1,33 @@ +test bench::anchored_literal_long_match ... bench: 192 ns/iter (+/- 11) +test bench::anchored_literal_long_non_match ... bench: 92 ns/iter (+/- 4) +test bench::anchored_literal_short_match ... bench: 182 ns/iter (+/- 6) +test bench::anchored_literal_short_non_match ... bench: 82 ns/iter (+/- 1) +test bench::easy0_1K ... bench: 277 ns/iter (+/- 79) = 3696 MB/s +test bench::easy0_1MB ... bench: 230,829 ns/iter (+/- 5,712) = 4542 MB/s +test bench::easy0_32 ... bench: 70 ns/iter (+/- 4) = 457 MB/s +test bench::easy0_32K ... bench: 8,444 ns/iter (+/- 492) = 3880 MB/s +test bench::easy1_1K ... bench: 272 ns/iter (+/- 98) = 3764 MB/s +test bench::easy1_1MB ... bench: 273,867 ns/iter (+/- 6,351) = 3828 MB/s +test bench::easy1_32 ... bench: 72 ns/iter (+/- 15) = 444 MB/s +test bench::easy1_32K ... bench: 8,109 ns/iter (+/- 540) = 4040 MB/s +test bench::hard_1K ... bench: 31,043 ns/iter (+/- 1,237) = 32 MB/s +test bench::hard_1MB ... bench: 60,077,413 ns/iter (+/- 129,611) = 16 MB/s +test bench::hard_32 ... bench: 1,036 ns/iter (+/- 20) = 30 MB/s +test bench::hard_32K ... bench: 996,238 ns/iter (+/- 3,181) = 32 MB/s +test bench::literal ... bench: 15 ns/iter (+/- 0) +test bench::match_class ... bench: 75 ns/iter (+/- 7) +test bench::match_class_in_range ... bench: 77 ns/iter (+/- 7) +test bench::match_class_unicode ... bench: 2,057 ns/iter (+/- 102) +test bench::medium_1K ... bench: 2,252 ns/iter (+/- 63) = 454 MB/s +test bench::medium_1MB ... bench: 2,186,091 ns/iter (+/- 7,496) = 479 MB/s +test bench::medium_32 ... bench: 132 ns/iter (+/- 2) = 242 MB/s +test bench::medium_32K ... bench: 72,394 ns/iter (+/- 342) = 452 MB/s +test bench::no_exponential ... bench: 286,662 ns/iter (+/- 1,150) +test bench::not_literal ... bench: 1,130 ns/iter (+/- 10) +test bench::one_pass_long_prefix ... bench: 271 ns/iter (+/- 0) +test bench::one_pass_long_prefix_not ... bench: 276 ns/iter (+/- 3) +test bench::one_pass_short_a ... bench: 1,147 ns/iter (+/- 10) +test bench::one_pass_short_a_not ... bench: 901 ns/iter (+/- 8) +test bench::one_pass_short_b ... bench: 887 ns/iter (+/- 7) +test bench::one_pass_short_b_not ... bench: 777 ns/iter (+/- 6) +test bench::replace_all ... bench: 154 ns/iter (+/- 0) diff --git a/third_party/rust/regex/record/old-bench-log/old/09-before-compiler-rewrite b/third_party/rust/regex/record/old-bench-log/old/09-before-compiler-rewrite new file mode 100644 index 0000000000..fe67d096f9 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/old/09-before-compiler-rewrite @@ -0,0 +1,33 @@ +test bench::anchored_literal_long_match ... bench: 156 ns/iter (+/- 5) +test bench::anchored_literal_long_non_match ... bench: 85 ns/iter (+/- 7) +test bench::anchored_literal_short_match ... bench: 145 ns/iter (+/- 3) +test bench::anchored_literal_short_non_match ... bench: 76 ns/iter (+/- 2) +test bench::easy0_1K ... bench: 269 ns/iter (+/- 63) = 3806 MB/s +test bench::easy0_1MB ... bench: 232,461 ns/iter (+/- 13,022) = 4509 MB/s +test bench::easy0_32 ... bench: 63 ns/iter (+/- 6) = 507 MB/s +test bench::easy0_32K ... bench: 8,358 ns/iter (+/- 430) = 3920 MB/s +test bench::easy1_1K ... bench: 274 ns/iter (+/- 101) = 3737 MB/s +test bench::easy1_1MB ... bench: 278,949 ns/iter (+/- 11,324) = 3758 MB/s +test bench::easy1_32 ... bench: 63 ns/iter (+/- 15) = 507 MB/s +test bench::easy1_32K ... bench: 7,731 ns/iter (+/- 488) = 4238 MB/s +test bench::hard_1K ... bench: 44,685 ns/iter (+/- 661) = 22 MB/s +test bench::hard_1MB ... bench: 60,108,237 ns/iter (+/- 814,810) = 16 MB/s +test bench::hard_32 ... bench: 1,412 ns/iter (+/- 38) = 22 MB/s +test bench::hard_32K ... bench: 1,363,335 ns/iter (+/- 21,316) = 24 MB/s +test bench::literal ... bench: 14 ns/iter (+/- 0) +test bench::match_class ... bench: 81 ns/iter (+/- 0) +test bench::match_class_in_range ... bench: 81 ns/iter (+/- 2) +test bench::match_class_unicode ... bench: 2,978 ns/iter (+/- 64) +test bench::medium_1K ... bench: 2,239 ns/iter (+/- 68) = 457 MB/s +test bench::medium_1MB ... bench: 2,215,729 ns/iter (+/- 20,897) = 472 MB/s +test bench::medium_32 ... bench: 124 ns/iter (+/- 2) = 258 MB/s +test bench::medium_32K ... bench: 72,486 ns/iter (+/- 1,027) = 452 MB/s +test bench::no_exponential ... bench: 282,992 ns/iter (+/- 8,102) +test bench::not_literal ... bench: 1,526 ns/iter (+/- 32) +test bench::one_pass_long_prefix ... bench: 307 ns/iter (+/- 7) +test bench::one_pass_long_prefix_not ... bench: 311 ns/iter (+/- 8) +test bench::one_pass_short_a ... bench: 623 ns/iter (+/- 12) +test bench::one_pass_short_a_not ... bench: 920 ns/iter (+/- 19) +test bench::one_pass_short_b ... bench: 554 ns/iter (+/- 13) +test bench::one_pass_short_b_not ... bench: 740 ns/iter (+/- 12) +test bench::replace_all ... bench: 155 ns/iter (+/- 5) diff --git a/third_party/rust/regex/record/old-bench-log/old/10-compiler-rewrite b/third_party/rust/regex/record/old-bench-log/old/10-compiler-rewrite new file mode 100644 index 0000000000..e25a602d2c --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/old/10-compiler-rewrite @@ -0,0 +1,33 @@ +test bench::anchored_literal_long_match ... bench: 145 ns/iter (+/- 1) +test bench::anchored_literal_long_non_match ... bench: 92 ns/iter (+/- 2) +test bench::anchored_literal_short_match ... bench: 129 ns/iter (+/- 3) +test bench::anchored_literal_short_non_match ... bench: 72 ns/iter (+/- 1) +test bench::easy0_1K ... bench: 268 ns/iter (+/- 88) = 3820 MB/s +test bench::easy0_1MB ... bench: 234,067 ns/iter (+/- 4,663) = 4479 MB/s +test bench::easy0_32 ... bench: 64 ns/iter (+/- 4) = 500 MB/s +test bench::easy0_32K ... bench: 8,298 ns/iter (+/- 521) = 3948 MB/s +test bench::easy1_1K ... bench: 275 ns/iter (+/- 95) = 3723 MB/s +test bench::easy1_1MB ... bench: 280,466 ns/iter (+/- 5,938) = 3738 MB/s +test bench::easy1_32 ... bench: 64 ns/iter (+/- 16) = 500 MB/s +test bench::easy1_32K ... bench: 7,693 ns/iter (+/- 595) = 4259 MB/s +test bench::hard_1K ... bench: 27,844 ns/iter (+/- 1,012) = 36 MB/s +test bench::hard_1MB ... bench: 52,323,489 ns/iter (+/- 1,251,665) = 19 MB/s +test bench::hard_32 ... bench: 970 ns/iter (+/- 92) = 32 MB/s +test bench::hard_32K ... bench: 896,945 ns/iter (+/- 29,977) = 36 MB/s +test bench::literal ... bench: 13 ns/iter (+/- 1) +test bench::match_class ... bench: 80 ns/iter (+/- 0) +test bench::match_class_in_range ... bench: 80 ns/iter (+/- 0) +test bench::match_class_unicode ... bench: 2,150 ns/iter (+/- 18) +test bench::medium_1K ... bench: 2,241 ns/iter (+/- 55) = 456 MB/s +test bench::medium_1MB ... bench: 2,186,354 ns/iter (+/- 9,134) = 479 MB/s +test bench::medium_32 ... bench: 125 ns/iter (+/- 1) = 256 MB/s +test bench::medium_32K ... bench: 72,156 ns/iter (+/- 145) = 454 MB/s +test bench::no_exponential ... bench: 305,034 ns/iter (+/- 1,134) +test bench::not_literal ... bench: 1,169 ns/iter (+/- 105) +test bench::one_pass_long_prefix ... bench: 257 ns/iter (+/- 4) +test bench::one_pass_long_prefix_not ... bench: 276 ns/iter (+/- 4) +test bench::one_pass_short_a ... bench: 680 ns/iter (+/- 3) +test bench::one_pass_short_a_not ... bench: 804 ns/iter (+/- 48) +test bench::one_pass_short_b ... bench: 337 ns/iter (+/- 3) +test bench::one_pass_short_b_not ... bench: 339 ns/iter (+/- 5) +test bench::replace_all ... bench: 150 ns/iter (+/- 1) diff --git a/third_party/rust/regex/record/old-bench-log/old/11-compiler-rewrite b/third_party/rust/regex/record/old-bench-log/old/11-compiler-rewrite new file mode 100644 index 0000000000..3296d4376b --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/old/11-compiler-rewrite @@ -0,0 +1,33 @@ +test bench::anchored_literal_long_match ... bench: 171 ns/iter (+/- 20) +test bench::anchored_literal_long_non_match ... bench: 90 ns/iter (+/- 8) +test bench::anchored_literal_short_match ... bench: 180 ns/iter (+/- 33) +test bench::anchored_literal_short_non_match ... bench: 78 ns/iter (+/- 9) +test bench::easy0_1K ... bench: 272 ns/iter (+/- 82) = 3764 MB/s +test bench::easy0_1MB ... bench: 233,014 ns/iter (+/- 22,144) = 4500 MB/s +test bench::easy0_32 ... bench: 62 ns/iter (+/- 6) = 516 MB/s +test bench::easy0_32K ... bench: 8,490 ns/iter (+/- 905) = 3859 MB/s +test bench::easy1_1K ... bench: 273 ns/iter (+/- 100) = 3750 MB/s +test bench::easy1_1MB ... bench: 279,901 ns/iter (+/- 5,598) = 3746 MB/s +test bench::easy1_32 ... bench: 62 ns/iter (+/- 6) = 516 MB/s +test bench::easy1_32K ... bench: 7,713 ns/iter (+/- 566) = 4248 MB/s +test bench::hard_1K ... bench: 38,641 ns/iter (+/- 605) = 26 MB/s +test bench::hard_1MB ... bench: 56,579,116 ns/iter (+/- 1,193,231) = 18 MB/s +test bench::hard_32 ... bench: 1,252 ns/iter (+/- 24) = 25 MB/s +test bench::hard_32K ... bench: 1,247,639 ns/iter (+/- 12,774) = 26 MB/s +test bench::literal ... bench: 13 ns/iter (+/- 1) +test bench::match_class ... bench: 80 ns/iter (+/- 1) +test bench::match_class_in_range ... bench: 80 ns/iter (+/- 0) +test bench::match_class_unicode ... bench: 2,459 ns/iter (+/- 77) +test bench::medium_1K ... bench: 2,244 ns/iter (+/- 63) = 456 MB/s +test bench::medium_1MB ... bench: 2,192,052 ns/iter (+/- 21,460) = 478 MB/s +test bench::medium_32 ... bench: 122 ns/iter (+/- 3) = 262 MB/s +test bench::medium_32K ... bench: 73,167 ns/iter (+/- 15,655) = 447 MB/s +test bench::no_exponential ... bench: 289,292 ns/iter (+/- 1,488) +test bench::not_literal ... bench: 1,480 ns/iter (+/- 18) +test bench::one_pass_long_prefix ... bench: 324 ns/iter (+/- 15) +test bench::one_pass_long_prefix_not ... bench: 337 ns/iter (+/- 5) +test bench::one_pass_short_a ... bench: 1,161 ns/iter (+/- 10) +test bench::one_pass_short_a_not ... bench: 798 ns/iter (+/- 6) +test bench::one_pass_short_b ... bench: 456 ns/iter (+/- 6) +test bench::one_pass_short_b_not ... bench: 452 ns/iter (+/- 33) +test bench::replace_all ... bench: 148 ns/iter (+/- 0) diff --git a/third_party/rust/regex/record/old-bench-log/old/12-executor b/third_party/rust/regex/record/old-bench-log/old/12-executor new file mode 100644 index 0000000000..8ec8561b56 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/old/12-executor @@ -0,0 +1,35 @@ +test bench::anchored_literal_long_match ... bench: 179 ns/iter (+/- 5) +test bench::anchored_literal_long_non_match ... bench: 90 ns/iter (+/- 1) +test bench::anchored_literal_short_match ... bench: 164 ns/iter (+/- 16) +test bench::anchored_literal_short_non_match ... bench: 79 ns/iter (+/- 1) +test bench::compile_simple ... bench: 3,708 ns/iter (+/- 225) +test bench::compile_unicode ... bench: 5,871 ns/iter (+/- 264) +test bench::easy0_1K ... bench: 263 ns/iter (+/- 92) = 3893 MB/s +test bench::easy0_1MB ... bench: 217,835 ns/iter (+/- 4,074) = 4813 MB/s +test bench::easy0_32 ... bench: 67 ns/iter (+/- 1) = 477 MB/s +test bench::easy0_32K ... bench: 8,204 ns/iter (+/- 426) = 3994 MB/s +test bench::easy1_1K ... bench: 276 ns/iter (+/- 100) = 3710 MB/s +test bench::easy1_1MB ... bench: 284,086 ns/iter (+/- 6,516) = 3691 MB/s +test bench::easy1_32 ... bench: 70 ns/iter (+/- 15) = 457 MB/s +test bench::easy1_32K ... bench: 7,844 ns/iter (+/- 556) = 4177 MB/s +test bench::hard_1K ... bench: 30,062 ns/iter (+/- 1,684) = 34 MB/s +test bench::hard_1MB ... bench: 50,839,701 ns/iter (+/- 104,343) = 20 MB/s +test bench::hard_32 ... bench: 1,009 ns/iter (+/- 48) = 31 MB/s +test bench::hard_32K ... bench: 965,341 ns/iter (+/- 45,075) = 33 MB/s +test bench::literal ... bench: 12 ns/iter (+/- 0) +test bench::match_class ... bench: 80 ns/iter (+/- 0) +test bench::match_class_in_range ... bench: 80 ns/iter (+/- 1) +test bench::match_class_unicode ... bench: 2,150 ns/iter (+/- 22) +test bench::medium_1K ... bench: 2,262 ns/iter (+/- 66) = 452 MB/s +test bench::medium_1MB ... bench: 2,193,428 ns/iter (+/- 6,147) = 478 MB/s +test bench::medium_32 ... bench: 129 ns/iter (+/- 1) = 248 MB/s +test bench::medium_32K ... bench: 72,629 ns/iter (+/- 348) = 451 MB/s +test bench::no_exponential ... bench: 289,043 ns/iter (+/- 2,478) +test bench::not_literal ... bench: 1,195 ns/iter (+/- 10) +test bench::one_pass_long_prefix ... bench: 265 ns/iter (+/- 3) +test bench::one_pass_long_prefix_not ... bench: 270 ns/iter (+/- 4) +test bench::one_pass_short_a ... bench: 730 ns/iter (+/- 4) +test bench::one_pass_short_a_not ... bench: 712 ns/iter (+/- 4) +test bench::one_pass_short_b ... bench: 445 ns/iter (+/- 49) +test bench::one_pass_short_b_not ... bench: 406 ns/iter (+/- 72) +test bench::replace_all ... bench: 136 ns/iter (+/- 2) diff --git a/third_party/rust/regex/record/old-bench-log/old/12-executor-bytes b/third_party/rust/regex/record/old-bench-log/old/12-executor-bytes new file mode 100644 index 0000000000..c036920c2b --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/old/12-executor-bytes @@ -0,0 +1,35 @@ +test bench::anchored_literal_long_match ... bench: 190 ns/iter (+/- 12) +test bench::anchored_literal_long_non_match ... bench: 85 ns/iter (+/- 2) +test bench::anchored_literal_short_match ... bench: 147 ns/iter (+/- 9) +test bench::anchored_literal_short_non_match ... bench: 74 ns/iter (+/- 5) +test bench::compile_simple ... bench: 4,218 ns/iter (+/- 201) +test bench::compile_unicode ... bench: 402,353 ns/iter (+/- 2,642) +test bench::easy0_1K ... bench: 253 ns/iter (+/- 79) = 4047 MB/s +test bench::easy0_1MB ... bench: 215,308 ns/iter (+/- 3,474) = 4870 MB/s +test bench::easy0_32 ... bench: 64 ns/iter (+/- 4) = 500 MB/s +test bench::easy0_32K ... bench: 8,134 ns/iter (+/- 435) = 4028 MB/s +test bench::easy1_1K ... bench: 277 ns/iter (+/- 105) = 3696 MB/s +test bench::easy1_1MB ... bench: 283,435 ns/iter (+/- 5,975) = 3699 MB/s +test bench::easy1_32 ... bench: 64 ns/iter (+/- 14) = 500 MB/s +test bench::easy1_32K ... bench: 7,832 ns/iter (+/- 575) = 4183 MB/s +test bench::hard_1K ... bench: 35,380 ns/iter (+/- 772) = 28 MB/s +test bench::hard_1MB ... bench: 46,639,535 ns/iter (+/- 456,010) = 22 MB/s +test bench::hard_32 ... bench: 1,110 ns/iter (+/- 53) = 28 MB/s +test bench::hard_32K ... bench: 1,146,751 ns/iter (+/- 17,290) = 28 MB/s +test bench::literal ... bench: 12 ns/iter (+/- 0) +test bench::match_class ... bench: 80 ns/iter (+/- 1) +test bench::match_class_in_range ... bench: 80 ns/iter (+/- 0) +test bench::match_class_unicode ... bench: 2,487,088 ns/iter (+/- 103,259) +test bench::medium_1K ... bench: 2,253 ns/iter (+/- 52) = 454 MB/s +test bench::medium_1MB ... bench: 2,193,344 ns/iter (+/- 7,582) = 478 MB/s +test bench::medium_32 ... bench: 119 ns/iter (+/- 5) = 268 MB/s +test bench::medium_32K ... bench: 72,569 ns/iter (+/- 283) = 451 MB/s +test bench::no_exponential ... bench: 292,840 ns/iter (+/- 2,823) +test bench::not_literal ... bench: 6,417 ns/iter (+/- 26) +test bench::one_pass_long_prefix ... bench: 304 ns/iter (+/- 0) +test bench::one_pass_long_prefix_not ... bench: 943 ns/iter (+/- 44) +test bench::one_pass_short_a ... bench: 688 ns/iter (+/- 11) +test bench::one_pass_short_a_not ... bench: 687 ns/iter (+/- 7) +test bench::one_pass_short_b ... bench: 589 ns/iter (+/- 6) +test bench::one_pass_short_b_not ... bench: 357 ns/iter (+/- 11) +test bench::replace_all ... bench: 131 ns/iter (+/- 1) diff --git a/third_party/rust/regex/record/old-bench-log/old/13-cache-byte-range-suffixes b/third_party/rust/regex/record/old-bench-log/old/13-cache-byte-range-suffixes new file mode 100644 index 0000000000..5a2ec09d50 --- /dev/null +++ b/third_party/rust/regex/record/old-bench-log/old/13-cache-byte-range-suffixes @@ -0,0 +1,35 @@ +test bench::anchored_literal_long_match ... bench: 174 ns/iter (+/- 65) +test bench::anchored_literal_long_non_match ... bench: 94 ns/iter (+/- 1) +test bench::anchored_literal_short_match ... bench: 142 ns/iter (+/- 1) +test bench::anchored_literal_short_non_match ... bench: 82 ns/iter (+/- 0) +test bench::compile_simple ... bench: 4,878 ns/iter (+/- 207) +test bench::compile_unicode ... bench: 679,701 ns/iter (+/- 10,264) +test bench::easy0_1K ... bench: 257 ns/iter (+/- 83) = 3984 MB/s +test bench::easy0_1MB ... bench: 217,698 ns/iter (+/- 3,307) = 4816 MB/s +test bench::easy0_32 ... bench: 61 ns/iter (+/- 3) = 524 MB/s +test bench::easy0_32K ... bench: 8,144 ns/iter (+/- 449) = 4023 MB/s +test bench::easy1_1K ... bench: 276 ns/iter (+/- 106) = 3710 MB/s +test bench::easy1_1MB ... bench: 285,518 ns/iter (+/- 4,933) = 3672 MB/s +test bench::easy1_32 ... bench: 61 ns/iter (+/- 12) = 524 MB/s +test bench::easy1_32K ... bench: 7,896 ns/iter (+/- 508) = 4149 MB/s +test bench::hard_1K ... bench: 35,361 ns/iter (+/- 684) = 28 MB/s +test bench::hard_1MB ... bench: 48,691,236 ns/iter (+/- 2,316,446) = 21 MB/s +test bench::hard_32 ... bench: 1,087 ns/iter (+/- 33) = 29 MB/s +test bench::hard_32K ... bench: 1,147,627 ns/iter (+/- 4,982) = 28 MB/s +test bench::literal ... bench: 12 ns/iter (+/- 0) +test bench::match_class ... bench: 80 ns/iter (+/- 0) +test bench::match_class_in_range ... bench: 80 ns/iter (+/- 0) +test bench::match_class_unicode ... bench: 2,431,592 ns/iter (+/- 89,268) +test bench::medium_1K ... bench: 2,245 ns/iter (+/- 93) = 456 MB/s +test bench::medium_1MB ... bench: 2,192,828 ns/iter (+/- 4,343) = 478 MB/s +test bench::medium_32 ... bench: 120 ns/iter (+/- 2) = 266 MB/s +test bench::medium_32K ... bench: 72,996 ns/iter (+/- 627) = 448 MB/s +test bench::no_exponential ... bench: 290,775 ns/iter (+/- 1,176) +test bench::not_literal ... bench: 5,282 ns/iter (+/- 199) +test bench::one_pass_long_prefix ... bench: 294 ns/iter (+/- 3) +test bench::one_pass_long_prefix_not ... bench: 315 ns/iter (+/- 7) +test bench::one_pass_short_a ... bench: 708 ns/iter (+/- 21) +test bench::one_pass_short_a_not ... bench: 861 ns/iter (+/- 9) +test bench::one_pass_short_b ... bench: 607 ns/iter (+/- 2) +test bench::one_pass_short_b_not ... bench: 344 ns/iter (+/- 11) +test bench::replace_all ... bench: 135 ns/iter (+/- 1) diff --git a/third_party/rust/regex/rustfmt.toml b/third_party/rust/regex/rustfmt.toml new file mode 100644 index 0000000000..aa37a218b9 --- /dev/null +++ b/third_party/rust/regex/rustfmt.toml @@ -0,0 +1,2 @@ +max_width = 79 +use_small_heuristics = "max" diff --git a/third_party/rust/regex/src/builders.rs b/third_party/rust/regex/src/builders.rs new file mode 100644 index 0000000000..46c4824c56 --- /dev/null +++ b/third_party/rust/regex/src/builders.rs @@ -0,0 +1,2535 @@ +#![allow(warnings)] + +// This module defines an internal builder that encapsulates all interaction +// with meta::Regex construction, and then 4 public API builders that wrap +// around it. The docs are essentially repeated on each of the 4 public +// builders, with tweaks to the examples as needed. +// +// The reason why there are so many builders is partially because of a misstep +// in the initial API design: the builder constructor takes in the pattern +// strings instead of using the `build` method to accept the pattern strings. +// This means `new` has a different signature for each builder. It probably +// would have been nicer to to use one builder with `fn new()`, and then add +// `build(pat)` and `build_many(pats)` constructors. +// +// The other reason is because I think the `bytes` module should probably +// have its own builder type. That way, it is completely isolated from the +// top-level API. +// +// If I could do it again, I'd probably have a `regex::Builder` and a +// `regex::bytes::Builder`. Each would have `build` and `build_set` (or +// `build_many`) methods for constructing a single pattern `Regex` and a +// multi-pattern `RegexSet`, respectively. + +use alloc::{ + string::{String, ToString}, + sync::Arc, + vec, + vec::Vec, +}; + +use regex_automata::{ + meta, nfa::thompson::WhichCaptures, util::syntax, MatchKind, +}; + +use crate::error::Error; + +/// A builder for constructing a `Regex`, `bytes::Regex`, `RegexSet` or a +/// `bytes::RegexSet`. +/// +/// This is essentially the implementation of the four different builder types +/// in the public API: `RegexBuilder`, `bytes::RegexBuilder`, `RegexSetBuilder` +/// and `bytes::RegexSetBuilder`. +#[derive(Clone, Debug)] +struct Builder { + pats: Vec<String>, + metac: meta::Config, + syntaxc: syntax::Config, +} + +impl Default for Builder { + fn default() -> Builder { + let metac = meta::Config::new() + .nfa_size_limit(Some(10 * (1 << 20))) + .hybrid_cache_capacity(2 * (1 << 20)); + Builder { pats: vec![], metac, syntaxc: syntax::Config::default() } + } +} + +impl Builder { + fn new<I, S>(patterns: I) -> Builder + where + S: AsRef<str>, + I: IntoIterator<Item = S>, + { + let mut b = Builder::default(); + b.pats.extend(patterns.into_iter().map(|p| p.as_ref().to_string())); + b + } + + fn build_one_string(&self) -> Result<crate::Regex, Error> { + assert_eq!(1, self.pats.len()); + let metac = self + .metac + .clone() + .match_kind(MatchKind::LeftmostFirst) + .utf8_empty(true); + let syntaxc = self.syntaxc.clone().utf8(true); + let pattern = Arc::from(self.pats[0].as_str()); + meta::Builder::new() + .configure(metac) + .syntax(syntaxc) + .build(&pattern) + .map(|meta| crate::Regex { meta, pattern }) + .map_err(Error::from_meta_build_error) + } + + fn build_one_bytes(&self) -> Result<crate::bytes::Regex, Error> { + assert_eq!(1, self.pats.len()); + let metac = self + .metac + .clone() + .match_kind(MatchKind::LeftmostFirst) + .utf8_empty(false); + let syntaxc = self.syntaxc.clone().utf8(false); + let pattern = Arc::from(self.pats[0].as_str()); + meta::Builder::new() + .configure(metac) + .syntax(syntaxc) + .build(&pattern) + .map(|meta| crate::bytes::Regex { meta, pattern }) + .map_err(Error::from_meta_build_error) + } + + fn build_many_string(&self) -> Result<crate::RegexSet, Error> { + let metac = self + .metac + .clone() + .match_kind(MatchKind::All) + .utf8_empty(true) + .which_captures(WhichCaptures::None); + let syntaxc = self.syntaxc.clone().utf8(true); + let patterns = Arc::from(self.pats.as_slice()); + meta::Builder::new() + .configure(metac) + .syntax(syntaxc) + .build_many(&patterns) + .map(|meta| crate::RegexSet { meta, patterns }) + .map_err(Error::from_meta_build_error) + } + + fn build_many_bytes(&self) -> Result<crate::bytes::RegexSet, Error> { + let metac = self + .metac + .clone() + .match_kind(MatchKind::All) + .utf8_empty(false) + .which_captures(WhichCaptures::None); + let syntaxc = self.syntaxc.clone().utf8(false); + let patterns = Arc::from(self.pats.as_slice()); + meta::Builder::new() + .configure(metac) + .syntax(syntaxc) + .build_many(&patterns) + .map(|meta| crate::bytes::RegexSet { meta, patterns }) + .map_err(Error::from_meta_build_error) + } + + fn case_insensitive(&mut self, yes: bool) -> &mut Builder { + self.syntaxc = self.syntaxc.case_insensitive(yes); + self + } + + fn multi_line(&mut self, yes: bool) -> &mut Builder { + self.syntaxc = self.syntaxc.multi_line(yes); + self + } + + fn dot_matches_new_line(&mut self, yes: bool) -> &mut Builder { + self.syntaxc = self.syntaxc.dot_matches_new_line(yes); + self + } + + fn crlf(&mut self, yes: bool) -> &mut Builder { + self.syntaxc = self.syntaxc.crlf(yes); + self + } + + fn line_terminator(&mut self, byte: u8) -> &mut Builder { + self.metac = self.metac.clone().line_terminator(byte); + self.syntaxc = self.syntaxc.line_terminator(byte); + self + } + + fn swap_greed(&mut self, yes: bool) -> &mut Builder { + self.syntaxc = self.syntaxc.swap_greed(yes); + self + } + + fn ignore_whitespace(&mut self, yes: bool) -> &mut Builder { + self.syntaxc = self.syntaxc.ignore_whitespace(yes); + self + } + + fn unicode(&mut self, yes: bool) -> &mut Builder { + self.syntaxc = self.syntaxc.unicode(yes); + self + } + + fn octal(&mut self, yes: bool) -> &mut Builder { + self.syntaxc = self.syntaxc.octal(yes); + self + } + + fn size_limit(&mut self, limit: usize) -> &mut Builder { + self.metac = self.metac.clone().nfa_size_limit(Some(limit)); + self + } + + fn dfa_size_limit(&mut self, limit: usize) -> &mut Builder { + self.metac = self.metac.clone().hybrid_cache_capacity(limit); + self + } + + fn nest_limit(&mut self, limit: u32) -> &mut Builder { + self.syntaxc = self.syntaxc.nest_limit(limit); + self + } +} + +pub(crate) mod string { + use crate::{error::Error, Regex, RegexSet}; + + use super::Builder; + + /// A configurable builder for a [`Regex`]. + /// + /// This builder can be used to programmatically set flags such as `i` + /// (case insensitive) and `x` (for verbose mode). This builder can also be + /// used to configure things like the line terminator and a size limit on + /// the compiled regular expression. + #[derive(Clone, Debug)] + pub struct RegexBuilder { + builder: Builder, + } + + impl RegexBuilder { + /// Create a new builder with a default configuration for the given + /// pattern. + /// + /// If the pattern is invalid or exceeds the configured size limits, + /// then an error will be returned when [`RegexBuilder::build`] is + /// called. + pub fn new(pattern: &str) -> RegexBuilder { + RegexBuilder { builder: Builder::new([pattern]) } + } + + /// Compiles the pattern given to `RegexBuilder::new` with the + /// configuration set on this builder. + /// + /// If the pattern isn't a valid regex or if a configured size limit + /// was exceeded, then an error is returned. + pub fn build(&self) -> Result<Regex, Error> { + self.builder.build_one_string() + } + + /// This configures Unicode mode for the entire pattern. + /// + /// Enabling Unicode mode does a number of things: + /// + /// * Most fundamentally, it causes the fundamental atom of matching + /// to be a single codepoint. When Unicode mode is disabled, it's a + /// single byte. For example, when Unicode mode is enabled, `.` will + /// match `💩` once, where as it will match 4 times when Unicode mode + /// is disabled. (Since the UTF-8 encoding of `💩` is 4 bytes long.) + /// * Case insensitive matching uses Unicode simple case folding rules. + /// * Unicode character classes like `\p{Letter}` and `\p{Greek}` are + /// available. + /// * Perl character classes are Unicode aware. That is, `\w`, `\s` and + /// `\d`. + /// * The word boundary assertions, `\b` and `\B`, use the Unicode + /// definition of a word character. + /// + /// Note that if Unicode mode is disabled, then the regex will fail to + /// compile if it could match invalid UTF-8. For example, when Unicode + /// mode is disabled, then since `.` matches any byte (except for + /// `\n`), then it can match invalid UTF-8 and thus building a regex + /// from it will fail. Another example is `\w` and `\W`. Since `\w` can + /// only match ASCII bytes when Unicode mode is disabled, it's allowed. + /// But `\W` can match more than ASCII bytes, including invalid UTF-8, + /// and so it is not allowed. This restriction can be lifted only by + /// using a [`bytes::Regex`](crate::bytes::Regex). + /// + /// For more details on the Unicode support in this crate, see the + /// [Unicode section](crate#unicode) in this crate's top-level + /// documentation. + /// + /// The default for this is `true`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"\w") + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally greek letters would be included in \w, but since + /// // Unicode mode is disabled, it only matches ASCII letters. + /// assert!(!re.is_match("δ")); + /// + /// let re = RegexBuilder::new(r"s") + /// .case_insensitive(true) + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally 'ſ' is included when searching for 's' case + /// // insensitively due to Unicode's simple case folding rules. But + /// // when Unicode mode is disabled, only ASCII case insensitive rules + /// // are used. + /// assert!(!re.is_match("ſ")); + /// ``` + pub fn unicode(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.unicode(yes); + self + } + + /// This configures whether to enable case insensitive matching for the + /// entire pattern. + /// + /// This setting can also be configured using the inline flag `i` + /// in the pattern. For example, `(?i:foo)` matches `foo` case + /// insensitively while `(?-i:foo)` matches `foo` case sensitively. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"foo(?-i:bar)quux") + /// .case_insensitive(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match("FoObarQuUx")); + /// // Even though case insensitive matching is enabled in the builder, + /// // it can be locally disabled within the pattern. In this case, + /// // `bar` is matched case sensitively. + /// assert!(!re.is_match("fooBARquux")); + /// ``` + pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.case_insensitive(yes); + self + } + + /// This configures multi-line mode for the entire pattern. + /// + /// Enabling multi-line mode changes the behavior of the `^` and `$` + /// anchor assertions. Instead of only matching at the beginning and + /// end of a haystack, respectively, multi-line mode causes them to + /// match at the beginning and end of a line *in addition* to the + /// beginning and end of a haystack. More precisely, `^` will match at + /// the position immediately following a `\n` and `$` will match at the + /// position immediately preceding a `\n`. + /// + /// The behavior of this option can be impacted by other settings too: + /// + /// * The [`RegexBuilder::line_terminator`] option changes `\n` above + /// to any ASCII byte. + /// * The [`RegexBuilder::crlf`] option changes the line terminator to + /// be either `\r` or `\n`, but never at the position between a `\r` + /// and `\n`. + /// + /// This setting can also be configured using the inline flag `m` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"^foo$") + /// .multi_line(true) + /// .build() + /// .unwrap(); + /// assert_eq!(Some(1..4), re.find("\nfoo\n").map(|m| m.range())); + /// ``` + pub fn multi_line(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.multi_line(yes); + self + } + + /// This configures dot-matches-new-line mode for the entire pattern. + /// + /// Perhaps surprisingly, the default behavior for `.` is not to match + /// any character, but rather, to match any character except for the + /// line terminator (which is `\n` by default). When this mode is + /// enabled, the behavior changes such that `.` truly matches any + /// character. + /// + /// This setting can also be configured using the inline flag `s` in + /// the pattern. For example, `(?s:.)` and `\p{any}` are equivalent + /// regexes. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"foo.bar") + /// .dot_matches_new_line(true) + /// .build() + /// .unwrap(); + /// let hay = "foo\nbar"; + /// assert_eq!(Some("foo\nbar"), re.find(hay).map(|m| m.as_str())); + /// ``` + pub fn dot_matches_new_line( + &mut self, + yes: bool, + ) -> &mut RegexBuilder { + self.builder.dot_matches_new_line(yes); + self + } + + /// This configures CRLF mode for the entire pattern. + /// + /// When CRLF mode is enabled, both `\r` ("carriage return" or CR for + /// short) and `\n` ("line feed" or LF for short) are treated as line + /// terminators. This results in the following: + /// + /// * Unless dot-matches-new-line mode is enabled, `.` will now match + /// any character except for `\n` and `\r`. + /// * When multi-line mode is enabled, `^` will match immediately + /// following a `\n` or a `\r`. Similarly, `$` will match immediately + /// preceding a `\n` or a `\r`. Neither `^` nor `$` will ever match + /// between `\r` and `\n`. + /// + /// This setting can also be configured using the inline flag `R` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"^foo$") + /// .multi_line(true) + /// .crlf(true) + /// .build() + /// .unwrap(); + /// let hay = "\r\nfoo\r\n"; + /// // If CRLF mode weren't enabled here, then '$' wouldn't match + /// // immediately after 'foo', and thus no match would be found. + /// assert_eq!(Some("foo"), re.find(hay).map(|m| m.as_str())); + /// ``` + /// + /// This example demonstrates that `^` will never match at a position + /// between `\r` and `\n`. (`$` will similarly not match between a `\r` + /// and a `\n`.) + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"^") + /// .multi_line(true) + /// .crlf(true) + /// .build() + /// .unwrap(); + /// let hay = "\r\n\r\n"; + /// let ranges: Vec<_> = re.find_iter(hay).map(|m| m.range()).collect(); + /// assert_eq!(ranges, vec![0..0, 2..2, 4..4]); + /// ``` + pub fn crlf(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.crlf(yes); + self + } + + /// Configures the line terminator to be used by the regex. + /// + /// The line terminator is relevant in two ways for a particular regex: + /// + /// * When dot-matches-new-line mode is *not* enabled (the default), + /// then `.` will match any character except for the configured line + /// terminator. + /// * When multi-line mode is enabled (not the default), then `^` and + /// `$` will match immediately after and before, respectively, a line + /// terminator. + /// + /// In both cases, if CRLF mode is enabled in a particular context, + /// then it takes precedence over any configured line terminator. + /// + /// This option cannot be configured from within the pattern. + /// + /// The default line terminator is `\n`. + /// + /// # Example + /// + /// This shows how to treat the NUL byte as a line terminator. This can + /// be a useful heuristic when searching binary data. + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"^foo$") + /// .multi_line(true) + /// .line_terminator(b'\x00') + /// .build() + /// .unwrap(); + /// let hay = "\x00foo\x00"; + /// assert_eq!(Some(1..4), re.find(hay).map(|m| m.range())); + /// ``` + /// + /// This example shows that the behavior of `.` is impacted by this + /// setting as well: + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r".") + /// .line_terminator(b'\x00') + /// .build() + /// .unwrap(); + /// assert!(re.is_match("\n")); + /// assert!(!re.is_match("\x00")); + /// ``` + /// + /// This shows that building a regex will fail if the byte given + /// is not ASCII and the pattern could result in matching invalid + /// UTF-8. This is because any singular non-ASCII byte is not valid + /// UTF-8, and it is not permitted for a [`Regex`] to match invalid + /// UTF-8. (It is permissible to use a non-ASCII byte when building a + /// [`bytes::Regex`](crate::bytes::Regex).) + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// assert!(RegexBuilder::new(r".").line_terminator(0x80).build().is_err()); + /// // Note that using a non-ASCII byte isn't enough on its own to + /// // cause regex compilation to fail. You actually have to make use + /// // of it in the regex in a way that leads to matching invalid + /// // UTF-8. If you don't, then regex compilation will succeed! + /// assert!(RegexBuilder::new(r"a").line_terminator(0x80).build().is_ok()); + /// ``` + pub fn line_terminator(&mut self, byte: u8) -> &mut RegexBuilder { + self.builder.line_terminator(byte); + self + } + + /// This configures swap-greed mode for the entire pattern. + /// + /// When swap-greed mode is enabled, patterns like `a+` will become + /// non-greedy and patterns like `a+?` will become greedy. In other + /// words, the meanings of `a+` and `a+?` are switched. + /// + /// This setting can also be configured using the inline flag `U` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"a+") + /// .swap_greed(true) + /// .build() + /// .unwrap(); + /// assert_eq!(Some("a"), re.find("aaa").map(|m| m.as_str())); + /// ``` + pub fn swap_greed(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.swap_greed(yes); + self + } + + /// This configures verbose mode for the entire pattern. + /// + /// When enabled, whitespace will treated as insignifcant in the + /// pattern and `#` can be used to start a comment until the next new + /// line. + /// + /// Normally, in most places in a pattern, whitespace is treated + /// literally. For example ` +` will match one or more ASCII whitespace + /// characters. + /// + /// When verbose mode is enabled, `\#` can be used to match a literal + /// `#` and `\ ` can be used to match a literal ASCII whitespace + /// character. + /// + /// Verbose mode is useful for permitting regexes to be formatted and + /// broken up more nicely. This may make them more easily readable. + /// + /// This setting can also be configured using the inline flag `x` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let pat = r" + /// \b + /// (?<first>\p{Uppercase}\w*) # always start with uppercase letter + /// [\s--\n]+ # whitespace should separate names + /// (?: # middle name can be an initial! + /// (?:(?<initial>\p{Uppercase})\.|(?<middle>\p{Uppercase}\w*)) + /// [\s--\n]+ + /// )? + /// (?<last>\p{Uppercase}\w*) + /// \b + /// "; + /// let re = RegexBuilder::new(pat) + /// .ignore_whitespace(true) + /// .build() + /// .unwrap(); + /// + /// let caps = re.captures("Harry Potter").unwrap(); + /// assert_eq!("Harry", &caps["first"]); + /// assert_eq!("Potter", &caps["last"]); + /// + /// let caps = re.captures("Harry J. Potter").unwrap(); + /// assert_eq!("Harry", &caps["first"]); + /// // Since a middle name/initial isn't required for an overall match, + /// // we can't assume that 'initial' or 'middle' will be populated! + /// assert_eq!(Some("J"), caps.name("initial").map(|m| m.as_str())); + /// assert_eq!(None, caps.name("middle").map(|m| m.as_str())); + /// assert_eq!("Potter", &caps["last"]); + /// + /// let caps = re.captures("Harry James Potter").unwrap(); + /// assert_eq!("Harry", &caps["first"]); + /// // Since a middle name/initial isn't required for an overall match, + /// // we can't assume that 'initial' or 'middle' will be populated! + /// assert_eq!(None, caps.name("initial").map(|m| m.as_str())); + /// assert_eq!(Some("James"), caps.name("middle").map(|m| m.as_str())); + /// assert_eq!("Potter", &caps["last"]); + /// ``` + pub fn ignore_whitespace(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.ignore_whitespace(yes); + self + } + + /// This configures octal mode for the entire pattern. + /// + /// Octal syntax is a little-known way of uttering Unicode codepoints + /// in a pattern. For example, `a`, `\x61`, `\u0061` and `\141` are all + /// equivalent patterns, where the last example shows octal syntax. + /// + /// While supporting octal syntax isn't in and of itself a problem, + /// it does make good error messages harder. That is, in PCRE based + /// regex engines, syntax like `\1` invokes a backreference, which is + /// explicitly unsupported this library. However, many users expect + /// backreferences to be supported. Therefore, when octal support + /// is disabled, the error message will explicitly mention that + /// backreferences aren't supported. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// // Normally this pattern would not compile, with an error message + /// // about backreferences not being supported. But with octal mode + /// // enabled, octal escape sequences work. + /// let re = RegexBuilder::new(r"\141") + /// .octal(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match("a")); + /// ``` + pub fn octal(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.octal(yes); + self + } + + /// Sets the approximate size limit, in bytes, of the compiled regex. + /// + /// This roughly corresponds to the number of heap memory, in + /// bytes, occupied by a single regex. If the regex would otherwise + /// approximately exceed this limit, then compiling that regex will + /// fail. + /// + /// The main utility of a method like this is to avoid compiling + /// regexes that use an unexpected amount of resources, such as + /// time and memory. Even if the memory usage of a large regex is + /// acceptable, its search time may not be. Namely, worst case time + /// complexity for search is `O(m * n)`, where `m ~ len(pattern)` and + /// `n ~ len(haystack)`. That is, search time depends, in part, on the + /// size of the compiled regex. This means that putting a limit on the + /// size of the regex limits how much a regex can impact search time. + /// + /// For more information about regex size limits, see the section on + /// [untrusted inputs](crate#untrusted-input) in the top-level crate + /// documentation. + /// + /// The default for this is some reasonable number that permits most + /// patterns to compile successfully. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// // It may surprise you how big some seemingly small patterns can + /// // be! Since \w is Unicode aware, this generates a regex that can + /// // match approximately 140,000 distinct codepoints. + /// assert!(RegexBuilder::new(r"\w").size_limit(45_000).build().is_err()); + /// ``` + pub fn size_limit(&mut self, bytes: usize) -> &mut RegexBuilder { + self.builder.size_limit(bytes); + self + } + + /// Set the approximate capacity, in bytes, of the cache of transitions + /// used by the lazy DFA. + /// + /// While the lazy DFA isn't always used, in tends to be the most + /// commonly use regex engine in default configurations. It tends to + /// adopt the performance profile of a fully build DFA, but without the + /// downside of taking worst case exponential time to build. + /// + /// The downside is that it needs to keep a cache of transitions and + /// states that are built while running a search, and this cache + /// can fill up. When it fills up, the cache will reset itself. Any + /// previously generated states and transitions will then need to be + /// re-generated. If this happens too many times, then this library + /// will bail out of using the lazy DFA and switch to a different regex + /// engine. + /// + /// If your regex provokes this particular downside of the lazy DFA, + /// then it may be beneficial to increase its cache capacity. This will + /// potentially reduce the frequency of cache resetting (ideally to + /// `0`). While it won't fix all potential performance problems with + /// the lazy DFA, increasing the cache capacity does fix some. + /// + /// There is no easy way to determine, a priori, whether increasing + /// this cache capacity will help. In general, the larger your regex, + /// the more cache it's likely to use. But that isn't an ironclad rule. + /// For example, a regex like `[01]*1[01]{N}` would normally produce a + /// fully build DFA that is exponential in size with respect to `N`. + /// The lazy DFA will prevent exponential space blow-up, but it cache + /// is likely to fill up, even when it's large and even for smallish + /// values of `N`. + /// + /// If you aren't sure whether this helps or not, it is sensible to + /// set this to some arbitrarily large number in testing, such as + /// `usize::MAX`. Namely, this represents the amount of capacity that + /// *may* be used. It's probably not a good idea to use `usize::MAX` in + /// production though, since it implies there are no controls on heap + /// memory used by this library during a search. In effect, set it to + /// whatever you're willing to allocate for a single regex search. + pub fn dfa_size_limit(&mut self, bytes: usize) -> &mut RegexBuilder { + self.builder.dfa_size_limit(bytes); + self + } + + /// Set the nesting limit for this parser. + /// + /// The nesting limit controls how deep the abstract syntax tree is + /// allowed to be. If the AST exceeds the given limit (e.g., with too + /// many nested groups), then an error is returned by the parser. + /// + /// The purpose of this limit is to act as a heuristic to prevent stack + /// overflow for consumers that do structural induction on an AST using + /// explicit recursion. While this crate never does this (instead using + /// constant stack space and moving the call stack to the heap), other + /// crates may. + /// + /// This limit is not checked until the entire AST is parsed. + /// Therefore, if callers want to put a limit on the amount of heap + /// space used, then they should impose a limit on the length, in + /// bytes, of the concrete pattern string. In particular, this is + /// viable since this parser implementation will limit itself to heap + /// space proportional to the length of the pattern string. See also + /// the [untrusted inputs](crate#untrusted-input) section in the + /// top-level crate documentation for more information about this. + /// + /// Note that a nest limit of `0` will return a nest limit error for + /// most patterns but not all. For example, a nest limit of `0` permits + /// `a` but not `ab`, since `ab` requires an explicit concatenation, + /// which results in a nest depth of `1`. In general, a nest limit is + /// not something that manifests in an obvious way in the concrete + /// syntax, therefore, it should not be used in a granular way. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// assert!(RegexBuilder::new(r"a").nest_limit(0).build().is_ok()); + /// assert!(RegexBuilder::new(r"ab").nest_limit(0).build().is_err()); + /// ``` + pub fn nest_limit(&mut self, limit: u32) -> &mut RegexBuilder { + self.builder.nest_limit(limit); + self + } + } + + /// A configurable builder for a [`RegexSet`]. + /// + /// This builder can be used to programmatically set flags such as + /// `i` (case insensitive) and `x` (for verbose mode). This builder + /// can also be used to configure things like the line terminator + /// and a size limit on the compiled regular expression. + #[derive(Clone, Debug)] + pub struct RegexSetBuilder { + builder: Builder, + } + + impl RegexSetBuilder { + /// Create a new builder with a default configuration for the given + /// patterns. + /// + /// If the patterns are invalid or exceed the configured size limits, + /// then an error will be returned when [`RegexSetBuilder::build`] is + /// called. + pub fn new<I, S>(patterns: I) -> RegexSetBuilder + where + I: IntoIterator<Item = S>, + S: AsRef<str>, + { + RegexSetBuilder { builder: Builder::new(patterns) } + } + + /// Compiles the patterns given to `RegexSetBuilder::new` with the + /// configuration set on this builder. + /// + /// If the patterns aren't valid regexes or if a configured size limit + /// was exceeded, then an error is returned. + pub fn build(&self) -> Result<RegexSet, Error> { + self.builder.build_many_string() + } + + /// This configures Unicode mode for the all of the patterns. + /// + /// Enabling Unicode mode does a number of things: + /// + /// * Most fundamentally, it causes the fundamental atom of matching + /// to be a single codepoint. When Unicode mode is disabled, it's a + /// single byte. For example, when Unicode mode is enabled, `.` will + /// match `💩` once, where as it will match 4 times when Unicode mode + /// is disabled. (Since the UTF-8 encoding of `💩` is 4 bytes long.) + /// * Case insensitive matching uses Unicode simple case folding rules. + /// * Unicode character classes like `\p{Letter}` and `\p{Greek}` are + /// available. + /// * Perl character classes are Unicode aware. That is, `\w`, `\s` and + /// `\d`. + /// * The word boundary assertions, `\b` and `\B`, use the Unicode + /// definition of a word character. + /// + /// Note that if Unicode mode is disabled, then the regex will fail to + /// compile if it could match invalid UTF-8. For example, when Unicode + /// mode is disabled, then since `.` matches any byte (except for + /// `\n`), then it can match invalid UTF-8 and thus building a regex + /// from it will fail. Another example is `\w` and `\W`. Since `\w` can + /// only match ASCII bytes when Unicode mode is disabled, it's allowed. + /// But `\W` can match more than ASCII bytes, including invalid UTF-8, + /// and so it is not allowed. This restriction can be lifted only by + /// using a [`bytes::RegexSet`](crate::bytes::RegexSet). + /// + /// For more details on the Unicode support in this crate, see the + /// [Unicode section](crate#unicode) in this crate's top-level + /// documentation. + /// + /// The default for this is `true`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"\w"]) + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally greek letters would be included in \w, but since + /// // Unicode mode is disabled, it only matches ASCII letters. + /// assert!(!re.is_match("δ")); + /// + /// let re = RegexSetBuilder::new([r"s"]) + /// .case_insensitive(true) + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally 'ſ' is included when searching for 's' case + /// // insensitively due to Unicode's simple case folding rules. But + /// // when Unicode mode is disabled, only ASCII case insensitive rules + /// // are used. + /// assert!(!re.is_match("ſ")); + /// ``` + pub fn unicode(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.unicode(yes); + self + } + + /// This configures whether to enable case insensitive matching for all + /// of the patterns. + /// + /// This setting can also be configured using the inline flag `i` + /// in the pattern. For example, `(?i:foo)` matches `foo` case + /// insensitively while `(?-i:foo)` matches `foo` case sensitively. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"foo(?-i:bar)quux"]) + /// .case_insensitive(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match("FoObarQuUx")); + /// // Even though case insensitive matching is enabled in the builder, + /// // it can be locally disabled within the pattern. In this case, + /// // `bar` is matched case sensitively. + /// assert!(!re.is_match("fooBARquux")); + /// ``` + pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.case_insensitive(yes); + self + } + + /// This configures multi-line mode for all of the patterns. + /// + /// Enabling multi-line mode changes the behavior of the `^` and `$` + /// anchor assertions. Instead of only matching at the beginning and + /// end of a haystack, respectively, multi-line mode causes them to + /// match at the beginning and end of a line *in addition* to the + /// beginning and end of a haystack. More precisely, `^` will match at + /// the position immediately following a `\n` and `$` will match at the + /// position immediately preceding a `\n`. + /// + /// The behavior of this option can be impacted by other settings too: + /// + /// * The [`RegexSetBuilder::line_terminator`] option changes `\n` + /// above to any ASCII byte. + /// * The [`RegexSetBuilder::crlf`] option changes the line terminator + /// to be either `\r` or `\n`, but never at the position between a `\r` + /// and `\n`. + /// + /// This setting can also be configured using the inline flag `m` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"^foo$"]) + /// .multi_line(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match("\nfoo\n")); + /// ``` + pub fn multi_line(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.multi_line(yes); + self + } + + /// This configures dot-matches-new-line mode for the entire pattern. + /// + /// Perhaps surprisingly, the default behavior for `.` is not to match + /// any character, but rather, to match any character except for the + /// line terminator (which is `\n` by default). When this mode is + /// enabled, the behavior changes such that `.` truly matches any + /// character. + /// + /// This setting can also be configured using the inline flag `s` in + /// the pattern. For example, `(?s:.)` and `\p{any}` are equivalent + /// regexes. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"foo.bar"]) + /// .dot_matches_new_line(true) + /// .build() + /// .unwrap(); + /// let hay = "foo\nbar"; + /// assert!(re.is_match(hay)); + /// ``` + pub fn dot_matches_new_line( + &mut self, + yes: bool, + ) -> &mut RegexSetBuilder { + self.builder.dot_matches_new_line(yes); + self + } + + /// This configures CRLF mode for all of the patterns. + /// + /// When CRLF mode is enabled, both `\r` ("carriage return" or CR for + /// short) and `\n` ("line feed" or LF for short) are treated as line + /// terminators. This results in the following: + /// + /// * Unless dot-matches-new-line mode is enabled, `.` will now match + /// any character except for `\n` and `\r`. + /// * When multi-line mode is enabled, `^` will match immediately + /// following a `\n` or a `\r`. Similarly, `$` will match immediately + /// preceding a `\n` or a `\r`. Neither `^` nor `$` will ever match + /// between `\r` and `\n`. + /// + /// This setting can also be configured using the inline flag `R` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"^foo$"]) + /// .multi_line(true) + /// .crlf(true) + /// .build() + /// .unwrap(); + /// let hay = "\r\nfoo\r\n"; + /// // If CRLF mode weren't enabled here, then '$' wouldn't match + /// // immediately after 'foo', and thus no match would be found. + /// assert!(re.is_match(hay)); + /// ``` + /// + /// This example demonstrates that `^` will never match at a position + /// between `\r` and `\n`. (`$` will similarly not match between a `\r` + /// and a `\n`.) + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"^\n"]) + /// .multi_line(true) + /// .crlf(true) + /// .build() + /// .unwrap(); + /// assert!(!re.is_match("\r\n")); + /// ``` + pub fn crlf(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.crlf(yes); + self + } + + /// Configures the line terminator to be used by the regex. + /// + /// The line terminator is relevant in two ways for a particular regex: + /// + /// * When dot-matches-new-line mode is *not* enabled (the default), + /// then `.` will match any character except for the configured line + /// terminator. + /// * When multi-line mode is enabled (not the default), then `^` and + /// `$` will match immediately after and before, respectively, a line + /// terminator. + /// + /// In both cases, if CRLF mode is enabled in a particular context, + /// then it takes precedence over any configured line terminator. + /// + /// This option cannot be configured from within the pattern. + /// + /// The default line terminator is `\n`. + /// + /// # Example + /// + /// This shows how to treat the NUL byte as a line terminator. This can + /// be a useful heuristic when searching binary data. + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"^foo$"]) + /// .multi_line(true) + /// .line_terminator(b'\x00') + /// .build() + /// .unwrap(); + /// let hay = "\x00foo\x00"; + /// assert!(re.is_match(hay)); + /// ``` + /// + /// This example shows that the behavior of `.` is impacted by this + /// setting as well: + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"."]) + /// .line_terminator(b'\x00') + /// .build() + /// .unwrap(); + /// assert!(re.is_match("\n")); + /// assert!(!re.is_match("\x00")); + /// ``` + /// + /// This shows that building a regex will fail if the byte given + /// is not ASCII and the pattern could result in matching invalid + /// UTF-8. This is because any singular non-ASCII byte is not valid + /// UTF-8, and it is not permitted for a [`RegexSet`] to match invalid + /// UTF-8. (It is permissible to use a non-ASCII byte when building a + /// [`bytes::RegexSet`](crate::bytes::RegexSet).) + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// assert!( + /// RegexSetBuilder::new([r"."]) + /// .line_terminator(0x80) + /// .build() + /// .is_err() + /// ); + /// // Note that using a non-ASCII byte isn't enough on its own to + /// // cause regex compilation to fail. You actually have to make use + /// // of it in the regex in a way that leads to matching invalid + /// // UTF-8. If you don't, then regex compilation will succeed! + /// assert!( + /// RegexSetBuilder::new([r"a"]) + /// .line_terminator(0x80) + /// .build() + /// .is_ok() + /// ); + /// ``` + pub fn line_terminator(&mut self, byte: u8) -> &mut RegexSetBuilder { + self.builder.line_terminator(byte); + self + } + + /// This configures swap-greed mode for all of the patterns. + /// + /// When swap-greed mode is enabled, patterns like `a+` will become + /// non-greedy and patterns like `a+?` will become greedy. In other + /// words, the meanings of `a+` and `a+?` are switched. + /// + /// This setting can also be configured using the inline flag `U` in + /// the pattern. + /// + /// Note that this is generally not useful for a `RegexSet` since a + /// `RegexSet` can only report whether a pattern matches or not. Since + /// greediness never impacts whether a match is found or not (only the + /// offsets of the match), it follows that whether parts of a pattern + /// are greedy or not doesn't matter for a `RegexSet`. + /// + /// The default for this is `false`. + pub fn swap_greed(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.swap_greed(yes); + self + } + + /// This configures verbose mode for all of the patterns. + /// + /// When enabled, whitespace will treated as insignifcant in the + /// pattern and `#` can be used to start a comment until the next new + /// line. + /// + /// Normally, in most places in a pattern, whitespace is treated + /// literally. For example ` +` will match one or more ASCII whitespace + /// characters. + /// + /// When verbose mode is enabled, `\#` can be used to match a literal + /// `#` and `\ ` can be used to match a literal ASCII whitespace + /// character. + /// + /// Verbose mode is useful for permitting regexes to be formatted and + /// broken up more nicely. This may make them more easily readable. + /// + /// This setting can also be configured using the inline flag `x` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let pat = r" + /// \b + /// (?<first>\p{Uppercase}\w*) # always start with uppercase letter + /// [\s--\n]+ # whitespace should separate names + /// (?: # middle name can be an initial! + /// (?:(?<initial>\p{Uppercase})\.|(?<middle>\p{Uppercase}\w*)) + /// [\s--\n]+ + /// )? + /// (?<last>\p{Uppercase}\w*) + /// \b + /// "; + /// let re = RegexSetBuilder::new([pat]) + /// .ignore_whitespace(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match("Harry Potter")); + /// assert!(re.is_match("Harry J. Potter")); + /// assert!(re.is_match("Harry James Potter")); + /// assert!(!re.is_match("harry J. Potter")); + /// ``` + pub fn ignore_whitespace( + &mut self, + yes: bool, + ) -> &mut RegexSetBuilder { + self.builder.ignore_whitespace(yes); + self + } + + /// This configures octal mode for all of the patterns. + /// + /// Octal syntax is a little-known way of uttering Unicode codepoints + /// in a pattern. For example, `a`, `\x61`, `\u0061` and `\141` are all + /// equivalent patterns, where the last example shows octal syntax. + /// + /// While supporting octal syntax isn't in and of itself a problem, + /// it does make good error messages harder. That is, in PCRE based + /// regex engines, syntax like `\1` invokes a backreference, which is + /// explicitly unsupported this library. However, many users expect + /// backreferences to be supported. Therefore, when octal support + /// is disabled, the error message will explicitly mention that + /// backreferences aren't supported. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// // Normally this pattern would not compile, with an error message + /// // about backreferences not being supported. But with octal mode + /// // enabled, octal escape sequences work. + /// let re = RegexSetBuilder::new([r"\141"]) + /// .octal(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match("a")); + /// ``` + pub fn octal(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.octal(yes); + self + } + + /// Sets the approximate size limit, in bytes, of the compiled regex. + /// + /// This roughly corresponds to the number of heap memory, in + /// bytes, occupied by a single regex. If the regex would otherwise + /// approximately exceed this limit, then compiling that regex will + /// fail. + /// + /// The main utility of a method like this is to avoid compiling + /// regexes that use an unexpected amount of resources, such as + /// time and memory. Even if the memory usage of a large regex is + /// acceptable, its search time may not be. Namely, worst case time + /// complexity for search is `O(m * n)`, where `m ~ len(pattern)` and + /// `n ~ len(haystack)`. That is, search time depends, in part, on the + /// size of the compiled regex. This means that putting a limit on the + /// size of the regex limits how much a regex can impact search time. + /// + /// For more information about regex size limits, see the section on + /// [untrusted inputs](crate#untrusted-input) in the top-level crate + /// documentation. + /// + /// The default for this is some reasonable number that permits most + /// patterns to compile successfully. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// // It may surprise you how big some seemingly small patterns can + /// // be! Since \w is Unicode aware, this generates a regex that can + /// // match approximately 140,000 distinct codepoints. + /// assert!( + /// RegexSetBuilder::new([r"\w"]) + /// .size_limit(45_000) + /// .build() + /// .is_err() + /// ); + /// ``` + pub fn size_limit(&mut self, bytes: usize) -> &mut RegexSetBuilder { + self.builder.size_limit(bytes); + self + } + + /// Set the approximate capacity, in bytes, of the cache of transitions + /// used by the lazy DFA. + /// + /// While the lazy DFA isn't always used, in tends to be the most + /// commonly use regex engine in default configurations. It tends to + /// adopt the performance profile of a fully build DFA, but without the + /// downside of taking worst case exponential time to build. + /// + /// The downside is that it needs to keep a cache of transitions and + /// states that are built while running a search, and this cache + /// can fill up. When it fills up, the cache will reset itself. Any + /// previously generated states and transitions will then need to be + /// re-generated. If this happens too many times, then this library + /// will bail out of using the lazy DFA and switch to a different regex + /// engine. + /// + /// If your regex provokes this particular downside of the lazy DFA, + /// then it may be beneficial to increase its cache capacity. This will + /// potentially reduce the frequency of cache resetting (ideally to + /// `0`). While it won't fix all potential performance problems with + /// the lazy DFA, increasing the cache capacity does fix some. + /// + /// There is no easy way to determine, a priori, whether increasing + /// this cache capacity will help. In general, the larger your regex, + /// the more cache it's likely to use. But that isn't an ironclad rule. + /// For example, a regex like `[01]*1[01]{N}` would normally produce a + /// fully build DFA that is exponential in size with respect to `N`. + /// The lazy DFA will prevent exponential space blow-up, but it cache + /// is likely to fill up, even when it's large and even for smallish + /// values of `N`. + /// + /// If you aren't sure whether this helps or not, it is sensible to + /// set this to some arbitrarily large number in testing, such as + /// `usize::MAX`. Namely, this represents the amount of capacity that + /// *may* be used. It's probably not a good idea to use `usize::MAX` in + /// production though, since it implies there are no controls on heap + /// memory used by this library during a search. In effect, set it to + /// whatever you're willing to allocate for a single regex search. + pub fn dfa_size_limit( + &mut self, + bytes: usize, + ) -> &mut RegexSetBuilder { + self.builder.dfa_size_limit(bytes); + self + } + + /// Set the nesting limit for this parser. + /// + /// The nesting limit controls how deep the abstract syntax tree is + /// allowed to be. If the AST exceeds the given limit (e.g., with too + /// many nested groups), then an error is returned by the parser. + /// + /// The purpose of this limit is to act as a heuristic to prevent stack + /// overflow for consumers that do structural induction on an AST using + /// explicit recursion. While this crate never does this (instead using + /// constant stack space and moving the call stack to the heap), other + /// crates may. + /// + /// This limit is not checked until the entire AST is parsed. + /// Therefore, if callers want to put a limit on the amount of heap + /// space used, then they should impose a limit on the length, in + /// bytes, of the concrete pattern string. In particular, this is + /// viable since this parser implementation will limit itself to heap + /// space proportional to the length of the pattern string. See also + /// the [untrusted inputs](crate#untrusted-input) section in the + /// top-level crate documentation for more information about this. + /// + /// Note that a nest limit of `0` will return a nest limit error for + /// most patterns but not all. For example, a nest limit of `0` permits + /// `a` but not `ab`, since `ab` requires an explicit concatenation, + /// which results in a nest depth of `1`. In general, a nest limit is + /// not something that manifests in an obvious way in the concrete + /// syntax, therefore, it should not be used in a granular way. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// assert!(RegexSetBuilder::new([r"a"]).nest_limit(0).build().is_ok()); + /// assert!(RegexSetBuilder::new([r"ab"]).nest_limit(0).build().is_err()); + /// ``` + pub fn nest_limit(&mut self, limit: u32) -> &mut RegexSetBuilder { + self.builder.nest_limit(limit); + self + } + } +} + +pub(crate) mod bytes { + use crate::{ + bytes::{Regex, RegexSet}, + error::Error, + }; + + use super::Builder; + + /// A configurable builder for a [`Regex`]. + /// + /// This builder can be used to programmatically set flags such as `i` + /// (case insensitive) and `x` (for verbose mode). This builder can also be + /// used to configure things like the line terminator and a size limit on + /// the compiled regular expression. + #[derive(Clone, Debug)] + pub struct RegexBuilder { + builder: Builder, + } + + impl RegexBuilder { + /// Create a new builder with a default configuration for the given + /// pattern. + /// + /// If the pattern is invalid or exceeds the configured size limits, + /// then an error will be returned when [`RegexBuilder::build`] is + /// called. + pub fn new(pattern: &str) -> RegexBuilder { + RegexBuilder { builder: Builder::new([pattern]) } + } + + /// Compiles the pattern given to `RegexBuilder::new` with the + /// configuration set on this builder. + /// + /// If the pattern isn't a valid regex or if a configured size limit + /// was exceeded, then an error is returned. + pub fn build(&self) -> Result<Regex, Error> { + self.builder.build_one_bytes() + } + + /// This configures Unicode mode for the entire pattern. + /// + /// Enabling Unicode mode does a number of things: + /// + /// * Most fundamentally, it causes the fundamental atom of matching + /// to be a single codepoint. When Unicode mode is disabled, it's a + /// single byte. For example, when Unicode mode is enabled, `.` will + /// match `💩` once, where as it will match 4 times when Unicode mode + /// is disabled. (Since the UTF-8 encoding of `💩` is 4 bytes long.) + /// * Case insensitive matching uses Unicode simple case folding rules. + /// * Unicode character classes like `\p{Letter}` and `\p{Greek}` are + /// available. + /// * Perl character classes are Unicode aware. That is, `\w`, `\s` and + /// `\d`. + /// * The word boundary assertions, `\b` and `\B`, use the Unicode + /// definition of a word character. + /// + /// Note that unlike the top-level `Regex` for searching `&str`, it + /// is permitted to disable Unicode mode even if the resulting pattern + /// could match invalid UTF-8. For example, `(?-u:.)` is not a valid + /// pattern for a top-level `Regex`, but is valid for a `bytes::Regex`. + /// + /// For more details on the Unicode support in this crate, see the + /// [Unicode section](crate#unicode) in this crate's top-level + /// documentation. + /// + /// The default for this is `true`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"\w") + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally greek letters would be included in \w, but since + /// // Unicode mode is disabled, it only matches ASCII letters. + /// assert!(!re.is_match("δ".as_bytes())); + /// + /// let re = RegexBuilder::new(r"s") + /// .case_insensitive(true) + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally 'ſ' is included when searching for 's' case + /// // insensitively due to Unicode's simple case folding rules. But + /// // when Unicode mode is disabled, only ASCII case insensitive rules + /// // are used. + /// assert!(!re.is_match("ſ".as_bytes())); + /// ``` + /// + /// Since this builder is for constructing a [`bytes::Regex`](Regex), + /// one can disable Unicode mode even if it would match invalid UTF-8: + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r".") + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally greek letters would be included in \w, but since + /// // Unicode mode is disabled, it only matches ASCII letters. + /// assert!(re.is_match(b"\xFF")); + /// ``` + pub fn unicode(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.unicode(yes); + self + } + + /// This configures whether to enable case insensitive matching for the + /// entire pattern. + /// + /// This setting can also be configured using the inline flag `i` + /// in the pattern. For example, `(?i:foo)` matches `foo` case + /// insensitively while `(?-i:foo)` matches `foo` case sensitively. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"foo(?-i:bar)quux") + /// .case_insensitive(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match(b"FoObarQuUx")); + /// // Even though case insensitive matching is enabled in the builder, + /// // it can be locally disabled within the pattern. In this case, + /// // `bar` is matched case sensitively. + /// assert!(!re.is_match(b"fooBARquux")); + /// ``` + pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.case_insensitive(yes); + self + } + + /// This configures multi-line mode for the entire pattern. + /// + /// Enabling multi-line mode changes the behavior of the `^` and `$` + /// anchor assertions. Instead of only matching at the beginning and + /// end of a haystack, respectively, multi-line mode causes them to + /// match at the beginning and end of a line *in addition* to the + /// beginning and end of a haystack. More precisely, `^` will match at + /// the position immediately following a `\n` and `$` will match at the + /// position immediately preceding a `\n`. + /// + /// The behavior of this option can be impacted by other settings too: + /// + /// * The [`RegexBuilder::line_terminator`] option changes `\n` above + /// to any ASCII byte. + /// * The [`RegexBuilder::crlf`] option changes the line terminator to + /// be either `\r` or `\n`, but never at the position between a `\r` + /// and `\n`. + /// + /// This setting can also be configured using the inline flag `m` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"^foo$") + /// .multi_line(true) + /// .build() + /// .unwrap(); + /// assert_eq!(Some(1..4), re.find(b"\nfoo\n").map(|m| m.range())); + /// ``` + pub fn multi_line(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.multi_line(yes); + self + } + + /// This configures dot-matches-new-line mode for the entire pattern. + /// + /// Perhaps surprisingly, the default behavior for `.` is not to match + /// any character, but rather, to match any character except for the + /// line terminator (which is `\n` by default). When this mode is + /// enabled, the behavior changes such that `.` truly matches any + /// character. + /// + /// This setting can also be configured using the inline flag `s` in + /// the pattern. For example, `(?s:.)` and `\p{any}` are equivalent + /// regexes. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"foo.bar") + /// .dot_matches_new_line(true) + /// .build() + /// .unwrap(); + /// let hay = b"foo\nbar"; + /// assert_eq!(Some(&b"foo\nbar"[..]), re.find(hay).map(|m| m.as_bytes())); + /// ``` + pub fn dot_matches_new_line( + &mut self, + yes: bool, + ) -> &mut RegexBuilder { + self.builder.dot_matches_new_line(yes); + self + } + + /// This configures CRLF mode for the entire pattern. + /// + /// When CRLF mode is enabled, both `\r` ("carriage return" or CR for + /// short) and `\n` ("line feed" or LF for short) are treated as line + /// terminators. This results in the following: + /// + /// * Unless dot-matches-new-line mode is enabled, `.` will now match + /// any character except for `\n` and `\r`. + /// * When multi-line mode is enabled, `^` will match immediately + /// following a `\n` or a `\r`. Similarly, `$` will match immediately + /// preceding a `\n` or a `\r`. Neither `^` nor `$` will ever match + /// between `\r` and `\n`. + /// + /// This setting can also be configured using the inline flag `R` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"^foo$") + /// .multi_line(true) + /// .crlf(true) + /// .build() + /// .unwrap(); + /// let hay = b"\r\nfoo\r\n"; + /// // If CRLF mode weren't enabled here, then '$' wouldn't match + /// // immediately after 'foo', and thus no match would be found. + /// assert_eq!(Some(&b"foo"[..]), re.find(hay).map(|m| m.as_bytes())); + /// ``` + /// + /// This example demonstrates that `^` will never match at a position + /// between `\r` and `\n`. (`$` will similarly not match between a `\r` + /// and a `\n`.) + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"^") + /// .multi_line(true) + /// .crlf(true) + /// .build() + /// .unwrap(); + /// let hay = b"\r\n\r\n"; + /// let ranges: Vec<_> = re.find_iter(hay).map(|m| m.range()).collect(); + /// assert_eq!(ranges, vec![0..0, 2..2, 4..4]); + /// ``` + pub fn crlf(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.crlf(yes); + self + } + + /// Configures the line terminator to be used by the regex. + /// + /// The line terminator is relevant in two ways for a particular regex: + /// + /// * When dot-matches-new-line mode is *not* enabled (the default), + /// then `.` will match any character except for the configured line + /// terminator. + /// * When multi-line mode is enabled (not the default), then `^` and + /// `$` will match immediately after and before, respectively, a line + /// terminator. + /// + /// In both cases, if CRLF mode is enabled in a particular context, + /// then it takes precedence over any configured line terminator. + /// + /// This option cannot be configured from within the pattern. + /// + /// The default line terminator is `\n`. + /// + /// # Example + /// + /// This shows how to treat the NUL byte as a line terminator. This can + /// be a useful heuristic when searching binary data. + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"^foo$") + /// .multi_line(true) + /// .line_terminator(b'\x00') + /// .build() + /// .unwrap(); + /// let hay = b"\x00foo\x00"; + /// assert_eq!(Some(1..4), re.find(hay).map(|m| m.range())); + /// ``` + /// + /// This example shows that the behavior of `.` is impacted by this + /// setting as well: + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r".") + /// .line_terminator(b'\x00') + /// .build() + /// .unwrap(); + /// assert!(re.is_match(b"\n")); + /// assert!(!re.is_match(b"\x00")); + /// ``` + /// + /// This shows that building a regex will work even when the byte + /// given is not ASCII. This is unlike the top-level `Regex` API where + /// matching invalid UTF-8 is not allowed. + /// + /// Note though that you must disable Unicode mode. This is required + /// because Unicode mode requires matching one codepoint at a time, + /// and there is no way to match a non-ASCII byte as if it were a + /// codepoint. + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// assert!( + /// RegexBuilder::new(r".") + /// .unicode(false) + /// .line_terminator(0x80) + /// .build() + /// .is_ok(), + /// ); + /// ``` + pub fn line_terminator(&mut self, byte: u8) -> &mut RegexBuilder { + self.builder.line_terminator(byte); + self + } + + /// This configures swap-greed mode for the entire pattern. + /// + /// When swap-greed mode is enabled, patterns like `a+` will become + /// non-greedy and patterns like `a+?` will become greedy. In other + /// words, the meanings of `a+` and `a+?` are switched. + /// + /// This setting can also be configured using the inline flag `U` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"a+") + /// .swap_greed(true) + /// .build() + /// .unwrap(); + /// assert_eq!(Some(&b"a"[..]), re.find(b"aaa").map(|m| m.as_bytes())); + /// ``` + pub fn swap_greed(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.swap_greed(yes); + self + } + + /// This configures verbose mode for the entire pattern. + /// + /// When enabled, whitespace will treated as insignifcant in the + /// pattern and `#` can be used to start a comment until the next new + /// line. + /// + /// Normally, in most places in a pattern, whitespace is treated + /// literally. For example ` +` will match one or more ASCII whitespace + /// characters. + /// + /// When verbose mode is enabled, `\#` can be used to match a literal + /// `#` and `\ ` can be used to match a literal ASCII whitespace + /// character. + /// + /// Verbose mode is useful for permitting regexes to be formatted and + /// broken up more nicely. This may make them more easily readable. + /// + /// This setting can also be configured using the inline flag `x` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let pat = r" + /// \b + /// (?<first>\p{Uppercase}\w*) # always start with uppercase letter + /// [\s--\n]+ # whitespace should separate names + /// (?: # middle name can be an initial! + /// (?:(?<initial>\p{Uppercase})\.|(?<middle>\p{Uppercase}\w*)) + /// [\s--\n]+ + /// )? + /// (?<last>\p{Uppercase}\w*) + /// \b + /// "; + /// let re = RegexBuilder::new(pat) + /// .ignore_whitespace(true) + /// .build() + /// .unwrap(); + /// + /// let caps = re.captures(b"Harry Potter").unwrap(); + /// assert_eq!(&b"Harry"[..], &caps["first"]); + /// assert_eq!(&b"Potter"[..], &caps["last"]); + /// + /// let caps = re.captures(b"Harry J. Potter").unwrap(); + /// assert_eq!(&b"Harry"[..], &caps["first"]); + /// // Since a middle name/initial isn't required for an overall match, + /// // we can't assume that 'initial' or 'middle' will be populated! + /// assert_eq!( + /// Some(&b"J"[..]), + /// caps.name("initial").map(|m| m.as_bytes()), + /// ); + /// assert_eq!(None, caps.name("middle").map(|m| m.as_bytes())); + /// assert_eq!(&b"Potter"[..], &caps["last"]); + /// + /// let caps = re.captures(b"Harry James Potter").unwrap(); + /// assert_eq!(&b"Harry"[..], &caps["first"]); + /// // Since a middle name/initial isn't required for an overall match, + /// // we can't assume that 'initial' or 'middle' will be populated! + /// assert_eq!(None, caps.name("initial").map(|m| m.as_bytes())); + /// assert_eq!( + /// Some(&b"James"[..]), + /// caps.name("middle").map(|m| m.as_bytes()), + /// ); + /// assert_eq!(&b"Potter"[..], &caps["last"]); + /// ``` + pub fn ignore_whitespace(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.ignore_whitespace(yes); + self + } + + /// This configures octal mode for the entire pattern. + /// + /// Octal syntax is a little-known way of uttering Unicode codepoints + /// in a pattern. For example, `a`, `\x61`, `\u0061` and `\141` are all + /// equivalent patterns, where the last example shows octal syntax. + /// + /// While supporting octal syntax isn't in and of itself a problem, + /// it does make good error messages harder. That is, in PCRE based + /// regex engines, syntax like `\1` invokes a backreference, which is + /// explicitly unsupported this library. However, many users expect + /// backreferences to be supported. Therefore, when octal support + /// is disabled, the error message will explicitly mention that + /// backreferences aren't supported. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// // Normally this pattern would not compile, with an error message + /// // about backreferences not being supported. But with octal mode + /// // enabled, octal escape sequences work. + /// let re = RegexBuilder::new(r"\141") + /// .octal(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match(b"a")); + /// ``` + pub fn octal(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.octal(yes); + self + } + + /// Sets the approximate size limit, in bytes, of the compiled regex. + /// + /// This roughly corresponds to the number of heap memory, in + /// bytes, occupied by a single regex. If the regex would otherwise + /// approximately exceed this limit, then compiling that regex will + /// fail. + /// + /// The main utility of a method like this is to avoid compiling + /// regexes that use an unexpected amount of resources, such as + /// time and memory. Even if the memory usage of a large regex is + /// acceptable, its search time may not be. Namely, worst case time + /// complexity for search is `O(m * n)`, where `m ~ len(pattern)` and + /// `n ~ len(haystack)`. That is, search time depends, in part, on the + /// size of the compiled regex. This means that putting a limit on the + /// size of the regex limits how much a regex can impact search time. + /// + /// For more information about regex size limits, see the section on + /// [untrusted inputs](crate#untrusted-input) in the top-level crate + /// documentation. + /// + /// The default for this is some reasonable number that permits most + /// patterns to compile successfully. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// // It may surprise you how big some seemingly small patterns can + /// // be! Since \w is Unicode aware, this generates a regex that can + /// // match approximately 140,000 distinct codepoints. + /// assert!(RegexBuilder::new(r"\w").size_limit(45_000).build().is_err()); + /// ``` + pub fn size_limit(&mut self, bytes: usize) -> &mut RegexBuilder { + self.builder.size_limit(bytes); + self + } + + /// Set the approximate capacity, in bytes, of the cache of transitions + /// used by the lazy DFA. + /// + /// While the lazy DFA isn't always used, in tends to be the most + /// commonly use regex engine in default configurations. It tends to + /// adopt the performance profile of a fully build DFA, but without the + /// downside of taking worst case exponential time to build. + /// + /// The downside is that it needs to keep a cache of transitions and + /// states that are built while running a search, and this cache + /// can fill up. When it fills up, the cache will reset itself. Any + /// previously generated states and transitions will then need to be + /// re-generated. If this happens too many times, then this library + /// will bail out of using the lazy DFA and switch to a different regex + /// engine. + /// + /// If your regex provokes this particular downside of the lazy DFA, + /// then it may be beneficial to increase its cache capacity. This will + /// potentially reduce the frequency of cache resetting (ideally to + /// `0`). While it won't fix all potential performance problems with + /// the lazy DFA, increasing the cache capacity does fix some. + /// + /// There is no easy way to determine, a priori, whether increasing + /// this cache capacity will help. In general, the larger your regex, + /// the more cache it's likely to use. But that isn't an ironclad rule. + /// For example, a regex like `[01]*1[01]{N}` would normally produce a + /// fully build DFA that is exponential in size with respect to `N`. + /// The lazy DFA will prevent exponential space blow-up, but it cache + /// is likely to fill up, even when it's large and even for smallish + /// values of `N`. + /// + /// If you aren't sure whether this helps or not, it is sensible to + /// set this to some arbitrarily large number in testing, such as + /// `usize::MAX`. Namely, this represents the amount of capacity that + /// *may* be used. It's probably not a good idea to use `usize::MAX` in + /// production though, since it implies there are no controls on heap + /// memory used by this library during a search. In effect, set it to + /// whatever you're willing to allocate for a single regex search. + pub fn dfa_size_limit(&mut self, bytes: usize) -> &mut RegexBuilder { + self.builder.dfa_size_limit(bytes); + self + } + + /// Set the nesting limit for this parser. + /// + /// The nesting limit controls how deep the abstract syntax tree is + /// allowed to be. If the AST exceeds the given limit (e.g., with too + /// many nested groups), then an error is returned by the parser. + /// + /// The purpose of this limit is to act as a heuristic to prevent stack + /// overflow for consumers that do structural induction on an AST using + /// explicit recursion. While this crate never does this (instead using + /// constant stack space and moving the call stack to the heap), other + /// crates may. + /// + /// This limit is not checked until the entire AST is parsed. + /// Therefore, if callers want to put a limit on the amount of heap + /// space used, then they should impose a limit on the length, in + /// bytes, of the concrete pattern string. In particular, this is + /// viable since this parser implementation will limit itself to heap + /// space proportional to the length of the pattern string. See also + /// the [untrusted inputs](crate#untrusted-input) section in the + /// top-level crate documentation for more information about this. + /// + /// Note that a nest limit of `0` will return a nest limit error for + /// most patterns but not all. For example, a nest limit of `0` permits + /// `a` but not `ab`, since `ab` requires an explicit concatenation, + /// which results in a nest depth of `1`. In general, a nest limit is + /// not something that manifests in an obvious way in the concrete + /// syntax, therefore, it should not be used in a granular way. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// assert!(RegexBuilder::new(r"a").nest_limit(0).build().is_ok()); + /// assert!(RegexBuilder::new(r"ab").nest_limit(0).build().is_err()); + /// ``` + pub fn nest_limit(&mut self, limit: u32) -> &mut RegexBuilder { + self.builder.nest_limit(limit); + self + } + } + + /// A configurable builder for a [`RegexSet`]. + /// + /// This builder can be used to programmatically set flags such as `i` + /// (case insensitive) and `x` (for verbose mode). This builder can also be + /// used to configure things like the line terminator and a size limit on + /// the compiled regular expression. + #[derive(Clone, Debug)] + pub struct RegexSetBuilder { + builder: Builder, + } + + impl RegexSetBuilder { + /// Create a new builder with a default configuration for the given + /// patterns. + /// + /// If the patterns are invalid or exceed the configured size limits, + /// then an error will be returned when [`RegexSetBuilder::build`] is + /// called. + pub fn new<I, S>(patterns: I) -> RegexSetBuilder + where + I: IntoIterator<Item = S>, + S: AsRef<str>, + { + RegexSetBuilder { builder: Builder::new(patterns) } + } + + /// Compiles the patterns given to `RegexSetBuilder::new` with the + /// configuration set on this builder. + /// + /// If the patterns aren't valid regexes or if a configured size limit + /// was exceeded, then an error is returned. + pub fn build(&self) -> Result<RegexSet, Error> { + self.builder.build_many_bytes() + } + + /// This configures Unicode mode for the all of the patterns. + /// + /// Enabling Unicode mode does a number of things: + /// + /// * Most fundamentally, it causes the fundamental atom of matching + /// to be a single codepoint. When Unicode mode is disabled, it's a + /// single byte. For example, when Unicode mode is enabled, `.` will + /// match `💩` once, where as it will match 4 times when Unicode mode + /// is disabled. (Since the UTF-8 encoding of `💩` is 4 bytes long.) + /// * Case insensitive matching uses Unicode simple case folding rules. + /// * Unicode character classes like `\p{Letter}` and `\p{Greek}` are + /// available. + /// * Perl character classes are Unicode aware. That is, `\w`, `\s` and + /// `\d`. + /// * The word boundary assertions, `\b` and `\B`, use the Unicode + /// definition of a word character. + /// + /// Note that unlike the top-level `RegexSet` for searching `&str`, + /// it is permitted to disable Unicode mode even if the resulting + /// pattern could match invalid UTF-8. For example, `(?-u:.)` is not + /// a valid pattern for a top-level `RegexSet`, but is valid for a + /// `bytes::RegexSet`. + /// + /// For more details on the Unicode support in this crate, see the + /// [Unicode section](crate#unicode) in this crate's top-level + /// documentation. + /// + /// The default for this is `true`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"\w"]) + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally greek letters would be included in \w, but since + /// // Unicode mode is disabled, it only matches ASCII letters. + /// assert!(!re.is_match("δ".as_bytes())); + /// + /// let re = RegexSetBuilder::new([r"s"]) + /// .case_insensitive(true) + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally 'ſ' is included when searching for 's' case + /// // insensitively due to Unicode's simple case folding rules. But + /// // when Unicode mode is disabled, only ASCII case insensitive rules + /// // are used. + /// assert!(!re.is_match("ſ".as_bytes())); + /// ``` + /// + /// Since this builder is for constructing a + /// [`bytes::RegexSet`](RegexSet), one can disable Unicode mode even if + /// it would match invalid UTF-8: + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"."]) + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally greek letters would be included in \w, but since + /// // Unicode mode is disabled, it only matches ASCII letters. + /// assert!(re.is_match(b"\xFF")); + /// ``` + pub fn unicode(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.unicode(yes); + self + } + + /// This configures whether to enable case insensitive matching for all + /// of the patterns. + /// + /// This setting can also be configured using the inline flag `i` + /// in the pattern. For example, `(?i:foo)` matches `foo` case + /// insensitively while `(?-i:foo)` matches `foo` case sensitively. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"foo(?-i:bar)quux"]) + /// .case_insensitive(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match(b"FoObarQuUx")); + /// // Even though case insensitive matching is enabled in the builder, + /// // it can be locally disabled within the pattern. In this case, + /// // `bar` is matched case sensitively. + /// assert!(!re.is_match(b"fooBARquux")); + /// ``` + pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.case_insensitive(yes); + self + } + + /// This configures multi-line mode for all of the patterns. + /// + /// Enabling multi-line mode changes the behavior of the `^` and `$` + /// anchor assertions. Instead of only matching at the beginning and + /// end of a haystack, respectively, multi-line mode causes them to + /// match at the beginning and end of a line *in addition* to the + /// beginning and end of a haystack. More precisely, `^` will match at + /// the position immediately following a `\n` and `$` will match at the + /// position immediately preceding a `\n`. + /// + /// The behavior of this option can be impacted by other settings too: + /// + /// * The [`RegexSetBuilder::line_terminator`] option changes `\n` + /// above to any ASCII byte. + /// * The [`RegexSetBuilder::crlf`] option changes the line terminator + /// to be either `\r` or `\n`, but never at the position between a `\r` + /// and `\n`. + /// + /// This setting can also be configured using the inline flag `m` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"^foo$"]) + /// .multi_line(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match(b"\nfoo\n")); + /// ``` + pub fn multi_line(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.multi_line(yes); + self + } + + /// This configures dot-matches-new-line mode for the entire pattern. + /// + /// Perhaps surprisingly, the default behavior for `.` is not to match + /// any character, but rather, to match any character except for the + /// line terminator (which is `\n` by default). When this mode is + /// enabled, the behavior changes such that `.` truly matches any + /// character. + /// + /// This setting can also be configured using the inline flag `s` in + /// the pattern. For example, `(?s:.)` and `\p{any}` are equivalent + /// regexes. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"foo.bar"]) + /// .dot_matches_new_line(true) + /// .build() + /// .unwrap(); + /// let hay = b"foo\nbar"; + /// assert!(re.is_match(hay)); + /// ``` + pub fn dot_matches_new_line( + &mut self, + yes: bool, + ) -> &mut RegexSetBuilder { + self.builder.dot_matches_new_line(yes); + self + } + + /// This configures CRLF mode for all of the patterns. + /// + /// When CRLF mode is enabled, both `\r` ("carriage return" or CR for + /// short) and `\n` ("line feed" or LF for short) are treated as line + /// terminators. This results in the following: + /// + /// * Unless dot-matches-new-line mode is enabled, `.` will now match + /// any character except for `\n` and `\r`. + /// * When multi-line mode is enabled, `^` will match immediately + /// following a `\n` or a `\r`. Similarly, `$` will match immediately + /// preceding a `\n` or a `\r`. Neither `^` nor `$` will ever match + /// between `\r` and `\n`. + /// + /// This setting can also be configured using the inline flag `R` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"^foo$"]) + /// .multi_line(true) + /// .crlf(true) + /// .build() + /// .unwrap(); + /// let hay = b"\r\nfoo\r\n"; + /// // If CRLF mode weren't enabled here, then '$' wouldn't match + /// // immediately after 'foo', and thus no match would be found. + /// assert!(re.is_match(hay)); + /// ``` + /// + /// This example demonstrates that `^` will never match at a position + /// between `\r` and `\n`. (`$` will similarly not match between a `\r` + /// and a `\n`.) + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"^\n"]) + /// .multi_line(true) + /// .crlf(true) + /// .build() + /// .unwrap(); + /// assert!(!re.is_match(b"\r\n")); + /// ``` + pub fn crlf(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.crlf(yes); + self + } + + /// Configures the line terminator to be used by the regex. + /// + /// The line terminator is relevant in two ways for a particular regex: + /// + /// * When dot-matches-new-line mode is *not* enabled (the default), + /// then `.` will match any character except for the configured line + /// terminator. + /// * When multi-line mode is enabled (not the default), then `^` and + /// `$` will match immediately after and before, respectively, a line + /// terminator. + /// + /// In both cases, if CRLF mode is enabled in a particular context, + /// then it takes precedence over any configured line terminator. + /// + /// This option cannot be configured from within the pattern. + /// + /// The default line terminator is `\n`. + /// + /// # Example + /// + /// This shows how to treat the NUL byte as a line terminator. This can + /// be a useful heuristic when searching binary data. + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"^foo$"]) + /// .multi_line(true) + /// .line_terminator(b'\x00') + /// .build() + /// .unwrap(); + /// let hay = b"\x00foo\x00"; + /// assert!(re.is_match(hay)); + /// ``` + /// + /// This example shows that the behavior of `.` is impacted by this + /// setting as well: + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"."]) + /// .line_terminator(b'\x00') + /// .build() + /// .unwrap(); + /// assert!(re.is_match(b"\n")); + /// assert!(!re.is_match(b"\x00")); + /// ``` + /// + /// This shows that building a regex will work even when the byte given + /// is not ASCII. This is unlike the top-level `RegexSet` API where + /// matching invalid UTF-8 is not allowed. + /// + /// Note though that you must disable Unicode mode. This is required + /// because Unicode mode requires matching one codepoint at a time, + /// and there is no way to match a non-ASCII byte as if it were a + /// codepoint. + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// assert!( + /// RegexSetBuilder::new([r"."]) + /// .unicode(false) + /// .line_terminator(0x80) + /// .build() + /// .is_ok(), + /// ); + /// ``` + pub fn line_terminator(&mut self, byte: u8) -> &mut RegexSetBuilder { + self.builder.line_terminator(byte); + self + } + + /// This configures swap-greed mode for all of the patterns. + /// + /// When swap-greed mode is enabled, patterns like `a+` will become + /// non-greedy and patterns like `a+?` will become greedy. In other + /// words, the meanings of `a+` and `a+?` are switched. + /// + /// This setting can also be configured using the inline flag `U` in + /// the pattern. + /// + /// Note that this is generally not useful for a `RegexSet` since a + /// `RegexSet` can only report whether a pattern matches or not. Since + /// greediness never impacts whether a match is found or not (only the + /// offsets of the match), it follows that whether parts of a pattern + /// are greedy or not doesn't matter for a `RegexSet`. + /// + /// The default for this is `false`. + pub fn swap_greed(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.swap_greed(yes); + self + } + + /// This configures verbose mode for all of the patterns. + /// + /// When enabled, whitespace will treated as insignifcant in the + /// pattern and `#` can be used to start a comment until the next new + /// line. + /// + /// Normally, in most places in a pattern, whitespace is treated + /// literally. For example ` +` will match one or more ASCII whitespace + /// characters. + /// + /// When verbose mode is enabled, `\#` can be used to match a literal + /// `#` and `\ ` can be used to match a literal ASCII whitespace + /// character. + /// + /// Verbose mode is useful for permitting regexes to be formatted and + /// broken up more nicely. This may make them more easily readable. + /// + /// This setting can also be configured using the inline flag `x` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let pat = r" + /// \b + /// (?<first>\p{Uppercase}\w*) # always start with uppercase letter + /// [\s--\n]+ # whitespace should separate names + /// (?: # middle name can be an initial! + /// (?:(?<initial>\p{Uppercase})\.|(?<middle>\p{Uppercase}\w*)) + /// [\s--\n]+ + /// )? + /// (?<last>\p{Uppercase}\w*) + /// \b + /// "; + /// let re = RegexSetBuilder::new([pat]) + /// .ignore_whitespace(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match(b"Harry Potter")); + /// assert!(re.is_match(b"Harry J. Potter")); + /// assert!(re.is_match(b"Harry James Potter")); + /// assert!(!re.is_match(b"harry J. Potter")); + /// ``` + pub fn ignore_whitespace( + &mut self, + yes: bool, + ) -> &mut RegexSetBuilder { + self.builder.ignore_whitespace(yes); + self + } + + /// This configures octal mode for all of the patterns. + /// + /// Octal syntax is a little-known way of uttering Unicode codepoints + /// in a pattern. For example, `a`, `\x61`, `\u0061` and `\141` are all + /// equivalent patterns, where the last example shows octal syntax. + /// + /// While supporting octal syntax isn't in and of itself a problem, + /// it does make good error messages harder. That is, in PCRE based + /// regex engines, syntax like `\1` invokes a backreference, which is + /// explicitly unsupported this library. However, many users expect + /// backreferences to be supported. Therefore, when octal support + /// is disabled, the error message will explicitly mention that + /// backreferences aren't supported. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// // Normally this pattern would not compile, with an error message + /// // about backreferences not being supported. But with octal mode + /// // enabled, octal escape sequences work. + /// let re = RegexSetBuilder::new([r"\141"]) + /// .octal(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match(b"a")); + /// ``` + pub fn octal(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.octal(yes); + self + } + + /// Sets the approximate size limit, in bytes, of the compiled regex. + /// + /// This roughly corresponds to the number of heap memory, in + /// bytes, occupied by a single regex. If the regex would otherwise + /// approximately exceed this limit, then compiling that regex will + /// fail. + /// + /// The main utility of a method like this is to avoid compiling + /// regexes that use an unexpected amount of resources, such as + /// time and memory. Even if the memory usage of a large regex is + /// acceptable, its search time may not be. Namely, worst case time + /// complexity for search is `O(m * n)`, where `m ~ len(pattern)` and + /// `n ~ len(haystack)`. That is, search time depends, in part, on the + /// size of the compiled regex. This means that putting a limit on the + /// size of the regex limits how much a regex can impact search time. + /// + /// For more information about regex size limits, see the section on + /// [untrusted inputs](crate#untrusted-input) in the top-level crate + /// documentation. + /// + /// The default for this is some reasonable number that permits most + /// patterns to compile successfully. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// // It may surprise you how big some seemingly small patterns can + /// // be! Since \w is Unicode aware, this generates a regex that can + /// // match approximately 140,000 distinct codepoints. + /// assert!( + /// RegexSetBuilder::new([r"\w"]) + /// .size_limit(45_000) + /// .build() + /// .is_err() + /// ); + /// ``` + pub fn size_limit(&mut self, bytes: usize) -> &mut RegexSetBuilder { + self.builder.size_limit(bytes); + self + } + + /// Set the approximate capacity, in bytes, of the cache of transitions + /// used by the lazy DFA. + /// + /// While the lazy DFA isn't always used, in tends to be the most + /// commonly use regex engine in default configurations. It tends to + /// adopt the performance profile of a fully build DFA, but without the + /// downside of taking worst case exponential time to build. + /// + /// The downside is that it needs to keep a cache of transitions and + /// states that are built while running a search, and this cache + /// can fill up. When it fills up, the cache will reset itself. Any + /// previously generated states and transitions will then need to be + /// re-generated. If this happens too many times, then this library + /// will bail out of using the lazy DFA and switch to a different regex + /// engine. + /// + /// If your regex provokes this particular downside of the lazy DFA, + /// then it may be beneficial to increase its cache capacity. This will + /// potentially reduce the frequency of cache resetting (ideally to + /// `0`). While it won't fix all potential performance problems with + /// the lazy DFA, increasing the cache capacity does fix some. + /// + /// There is no easy way to determine, a priori, whether increasing + /// this cache capacity will help. In general, the larger your regex, + /// the more cache it's likely to use. But that isn't an ironclad rule. + /// For example, a regex like `[01]*1[01]{N}` would normally produce a + /// fully build DFA that is exponential in size with respect to `N`. + /// The lazy DFA will prevent exponential space blow-up, but it cache + /// is likely to fill up, even when it's large and even for smallish + /// values of `N`. + /// + /// If you aren't sure whether this helps or not, it is sensible to + /// set this to some arbitrarily large number in testing, such as + /// `usize::MAX`. Namely, this represents the amount of capacity that + /// *may* be used. It's probably not a good idea to use `usize::MAX` in + /// production though, since it implies there are no controls on heap + /// memory used by this library during a search. In effect, set it to + /// whatever you're willing to allocate for a single regex search. + pub fn dfa_size_limit( + &mut self, + bytes: usize, + ) -> &mut RegexSetBuilder { + self.builder.dfa_size_limit(bytes); + self + } + + /// Set the nesting limit for this parser. + /// + /// The nesting limit controls how deep the abstract syntax tree is + /// allowed to be. If the AST exceeds the given limit (e.g., with too + /// many nested groups), then an error is returned by the parser. + /// + /// The purpose of this limit is to act as a heuristic to prevent stack + /// overflow for consumers that do structural induction on an AST using + /// explicit recursion. While this crate never does this (instead using + /// constant stack space and moving the call stack to the heap), other + /// crates may. + /// + /// This limit is not checked until the entire AST is parsed. + /// Therefore, if callers want to put a limit on the amount of heap + /// space used, then they should impose a limit on the length, in + /// bytes, of the concrete pattern string. In particular, this is + /// viable since this parser implementation will limit itself to heap + /// space proportional to the length of the pattern string. See also + /// the [untrusted inputs](crate#untrusted-input) section in the + /// top-level crate documentation for more information about this. + /// + /// Note that a nest limit of `0` will return a nest limit error for + /// most patterns but not all. For example, a nest limit of `0` permits + /// `a` but not `ab`, since `ab` requires an explicit concatenation, + /// which results in a nest depth of `1`. In general, a nest limit is + /// not something that manifests in an obvious way in the concrete + /// syntax, therefore, it should not be used in a granular way. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// assert!(RegexSetBuilder::new([r"a"]).nest_limit(0).build().is_ok()); + /// assert!(RegexSetBuilder::new([r"ab"]).nest_limit(0).build().is_err()); + /// ``` + pub fn nest_limit(&mut self, limit: u32) -> &mut RegexSetBuilder { + self.builder.nest_limit(limit); + self + } + } +} diff --git a/third_party/rust/regex/src/bytes.rs b/third_party/rust/regex/src/bytes.rs new file mode 100644 index 0000000000..3f53a3ea55 --- /dev/null +++ b/third_party/rust/regex/src/bytes.rs @@ -0,0 +1,91 @@ +/*! +Search for regex matches in `&[u8]` haystacks. + +This module provides a nearly identical API via [`Regex`] to the one found in +the top-level of this crate. There are two important differences: + +1. Matching is done on `&[u8]` instead of `&str`. Additionally, `Vec<u8>` +is used where `String` would have been used in the top-level API. +2. Unicode support can be disabled even when disabling it would result in +matching invalid UTF-8 bytes. + +# Example: match null terminated string + +This shows how to find all null-terminated strings in a slice of bytes. This +works even if a C string contains invalid UTF-8. + +```rust +use regex::bytes::Regex; + +let re = Regex::new(r"(?-u)(?<cstr>[^\x00]+)\x00").unwrap(); +let hay = b"foo\x00qu\xFFux\x00baz\x00"; + +// Extract all of the strings without the NUL terminator from each match. +// The unwrap is OK here since a match requires the `cstr` capture to match. +let cstrs: Vec<&[u8]> = + re.captures_iter(hay) + .map(|c| c.name("cstr").unwrap().as_bytes()) + .collect(); +assert_eq!(cstrs, vec![&b"foo"[..], &b"qu\xFFux"[..], &b"baz"[..]]); +``` + +# Example: selectively enable Unicode support + +This shows how to match an arbitrary byte pattern followed by a UTF-8 encoded +string (e.g., to extract a title from a Matroska file): + +```rust +use regex::bytes::Regex; + +let re = Regex::new( + r"(?-u)\x7b\xa9(?:[\x80-\xfe]|[\x40-\xff].)(?u:(.*))" +).unwrap(); +let hay = b"\x12\xd0\x3b\x5f\x7b\xa9\x85\xe2\x98\x83\x80\x98\x54\x76\x68\x65"; + +// Notice that despite the `.*` at the end, it will only match valid UTF-8 +// because Unicode mode was enabled with the `u` flag. Without the `u` flag, +// the `.*` would match the rest of the bytes regardless of whether they were +// valid UTF-8. +let (_, [title]) = re.captures(hay).unwrap().extract(); +assert_eq!(title, b"\xE2\x98\x83"); +// We can UTF-8 decode the title now. And the unwrap here +// is correct because the existence of a match guarantees +// that `title` is valid UTF-8. +let title = std::str::from_utf8(title).unwrap(); +assert_eq!(title, "☃"); +``` + +In general, if the Unicode flag is enabled in a capture group and that capture +is part of the overall match, then the capture is *guaranteed* to be valid +UTF-8. + +# Syntax + +The supported syntax is pretty much the same as the syntax for Unicode +regular expressions with a few changes that make sense for matching arbitrary +bytes: + +1. The `u` flag can be disabled even when disabling it might cause the regex to +match invalid UTF-8. When the `u` flag is disabled, the regex is said to be in +"ASCII compatible" mode. +2. In ASCII compatible mode, neither Unicode scalar values nor Unicode +character classes are allowed. +3. In ASCII compatible mode, Perl character classes (`\w`, `\d` and `\s`) +revert to their typical ASCII definition. `\w` maps to `[[:word:]]`, `\d` maps +to `[[:digit:]]` and `\s` maps to `[[:space:]]`. +4. In ASCII compatible mode, word boundaries use the ASCII compatible `\w` to +determine whether a byte is a word byte or not. +5. Hexadecimal notation can be used to specify arbitrary bytes instead of +Unicode codepoints. For example, in ASCII compatible mode, `\xFF` matches the +literal byte `\xFF`, while in Unicode mode, `\xFF` is the Unicode codepoint +`U+00FF` that matches its UTF-8 encoding of `\xC3\xBF`. Similarly for octal +notation when enabled. +6. In ASCII compatible mode, `.` matches any *byte* except for `\n`. When the +`s` flag is additionally enabled, `.` matches any byte. + +# Performance + +In general, one should expect performance on `&[u8]` to be roughly similar to +performance on `&str`. +*/ +pub use crate::{builders::bytes::*, regex::bytes::*, regexset::bytes::*}; diff --git a/third_party/rust/regex/src/error.rs b/third_party/rust/regex/src/error.rs new file mode 100644 index 0000000000..6026b3849d --- /dev/null +++ b/third_party/rust/regex/src/error.rs @@ -0,0 +1,102 @@ +use alloc::string::{String, ToString}; + +use regex_automata::meta; + +/// An error that occurred during parsing or compiling a regular expression. +#[non_exhaustive] +#[derive(Clone, PartialEq)] +pub enum Error { + /// A syntax error. + Syntax(String), + /// The compiled program exceeded the set size + /// limit. The argument is the size limit imposed by + /// [`RegexBuilder::size_limit`](crate::RegexBuilder::size_limit). Even + /// when not configured explicitly, it defaults to a reasonable limit. + /// + /// If you're getting this error, it occurred because your regex has been + /// compiled to an intermediate state that is too big. It is important to + /// note that exceeding this limit does _not_ mean the regex is too big to + /// _work_, but rather, the regex is big enough that it may wind up being + /// surprisingly slow when used in a search. In other words, this error is + /// meant to be a practical heuristic for avoiding a performance footgun, + /// and especially so for the case where the regex pattern is coming from + /// an untrusted source. + /// + /// There are generally two ways to move forward if you hit this error. + /// The first is to find some way to use a smaller regex. The second is to + /// increase the size limit via `RegexBuilder::size_limit`. However, if + /// your regex pattern is not from a trusted source, then neither of these + /// approaches may be appropriate. Instead, you'll have to determine just + /// how big of a regex you want to allow. + CompiledTooBig(usize), +} + +impl Error { + pub(crate) fn from_meta_build_error(err: meta::BuildError) -> Error { + if let Some(size_limit) = err.size_limit() { + Error::CompiledTooBig(size_limit) + } else if let Some(ref err) = err.syntax_error() { + Error::Syntax(err.to_string()) + } else { + // This is a little suspect. Technically there are more ways for + // a meta regex to fail to build other than "exceeded size limit" + // and "syntax error." For example, if there are too many states + // or even too many patterns. But in practice this is probably + // good enough. The worst thing that happens is that Error::Syntax + // represents an error that isn't technically a syntax error, but + // the actual message will still be shown. So... it's not too bad. + // + // We really should have made the Error type in the regex crate + // completely opaque. Rookie mistake. + Error::Syntax(err.to_string()) + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for Error { + // TODO: Remove this method entirely on the next breaking semver release. + #[allow(deprecated)] + fn description(&self) -> &str { + match *self { + Error::Syntax(ref err) => err, + Error::CompiledTooBig(_) => "compiled program too big", + } + } +} + +impl core::fmt::Display for Error { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match *self { + Error::Syntax(ref err) => err.fmt(f), + Error::CompiledTooBig(limit) => write!( + f, + "Compiled regex exceeds size limit of {} bytes.", + limit + ), + } + } +} + +// We implement our own Debug implementation so that we show nicer syntax +// errors when people use `Regex::new(...).unwrap()`. It's a little weird, +// but the `Syntax` variant is already storing a `String` anyway, so we might +// as well format it nicely. +impl core::fmt::Debug for Error { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match *self { + Error::Syntax(ref err) => { + let hr: String = core::iter::repeat('~').take(79).collect(); + writeln!(f, "Syntax(")?; + writeln!(f, "{}", hr)?; + writeln!(f, "{}", err)?; + writeln!(f, "{}", hr)?; + write!(f, ")")?; + Ok(()) + } + Error::CompiledTooBig(limit) => { + f.debug_tuple("CompiledTooBig").field(&limit).finish() + } + } + } +} diff --git a/third_party/rust/regex/src/find_byte.rs b/third_party/rust/regex/src/find_byte.rs new file mode 100644 index 0000000000..9c6915db40 --- /dev/null +++ b/third_party/rust/regex/src/find_byte.rs @@ -0,0 +1,17 @@ +/// Searches for the given needle in the given haystack. +/// +/// If the perf-literal feature is enabled, then this uses the super optimized +/// memchr crate. Otherwise, it uses the naive byte-at-a-time implementation. +pub(crate) fn find_byte(needle: u8, haystack: &[u8]) -> Option<usize> { + #[cfg(not(feature = "perf-literal"))] + fn imp(needle: u8, haystack: &[u8]) -> Option<usize> { + haystack.iter().position(|&b| b == needle) + } + + #[cfg(feature = "perf-literal")] + fn imp(needle: u8, haystack: &[u8]) -> Option<usize> { + memchr::memchr(needle, haystack) + } + + imp(needle, haystack) +} diff --git a/third_party/rust/regex/src/lib.rs b/third_party/rust/regex/src/lib.rs new file mode 100644 index 0000000000..1e191b6924 --- /dev/null +++ b/third_party/rust/regex/src/lib.rs @@ -0,0 +1,1336 @@ +/*! +This crate provides routines for searching strings for matches of a [regular +expression] (aka "regex"). The regex syntax supported by this crate is similar +to other regex engines, but it lacks several features that are not known how to +implement efficiently. This includes, but is not limited to, look-around and +backreferences. In exchange, all regex searches in this crate have worst case +`O(m * n)` time complexity, where `m` is proportional to the size of the regex +and `n` is proportional to the size of the string being searched. + +[regular expression]: https://en.wikipedia.org/wiki/Regular_expression + +If you just want API documentation, then skip to the [`Regex`] type. Otherwise, +here's a quick example showing one way of parsing the output of a grep-like +program: + +```rust +use regex::Regex; + +let re = Regex::new(r"(?m)^([^:]+):([0-9]+):(.+)$").unwrap(); +let hay = "\ +path/to/foo:54:Blue Harvest +path/to/bar:90:Something, Something, Something, Dark Side +path/to/baz:3:It's a Trap! +"; + +let mut results = vec![]; +for (_, [path, lineno, line]) in re.captures_iter(hay).map(|c| c.extract()) { + results.push((path, lineno.parse::<u64>()?, line)); +} +assert_eq!(results, vec![ + ("path/to/foo", 54, "Blue Harvest"), + ("path/to/bar", 90, "Something, Something, Something, Dark Side"), + ("path/to/baz", 3, "It's a Trap!"), +]); +# Ok::<(), Box<dyn std::error::Error>>(()) +``` + +# Overview + +The primary type in this crate is a [`Regex`]. Its most important methods are +as follows: + +* [`Regex::new`] compiles a regex using the default configuration. A +[`RegexBuilder`] permits setting a non-default configuration. (For example, +case insensitive matching, verbose mode and others.) +* [`Regex::is_match`] reports whether a match exists in a particular haystack. +* [`Regex::find`] reports the byte offsets of a match in a haystack, if one +exists. [`Regex::find_iter`] returns an iterator over all such matches. +* [`Regex::captures`] returns a [`Captures`], which reports both the byte +offsets of a match in a haystack and the byte offsets of each matching capture +group from the regex in the haystack. +[`Regex::captures_iter`] returns an iterator over all such matches. + +There is also a [`RegexSet`], which permits searching for multiple regex +patterns simultaneously in a single search. However, it currently only reports +which patterns match and *not* the byte offsets of a match. + +Otherwise, this top-level crate documentation is organized as follows: + +* [Usage](#usage) shows how to add the `regex` crate to your Rust project. +* [Examples](#examples) provides a limited selection of regex search examples. +* [Performance](#performance) provides a brief summary of how to optimize regex +searching speed. +* [Unicode](#unicode) discusses support for non-ASCII patterns. +* [Syntax](#syntax) enumerates the specific regex syntax supported by this +crate. +* [Untrusted input](#untrusted-input) discusses how this crate deals with regex +patterns or haystacks that are untrusted. +* [Crate features](#crate-features) documents the Cargo features that can be +enabled or disabled for this crate. +* [Other crates](#other-crates) links to other crates in the `regex` family. + +# Usage + +The `regex` crate is [on crates.io](https://crates.io/crates/regex) and can be +used by adding `regex` to your dependencies in your project's `Cargo.toml`. +Or more simply, just run `cargo add regex`. + +Here is a complete example that creates a new Rust project, adds a dependency +on `regex`, creates the source code for a regex search and then runs the +program. + +First, create the project in a new directory: + +```text +$ mkdir regex-example +$ cd regex-example +$ cargo init +``` + +Second, add a dependency on `regex`: + +```text +$ cargo add regex +``` + +Third, edit `src/main.rs`. Delete what's there and replace it with this: + +``` +use regex::Regex; + +fn main() { + let re = Regex::new(r"Hello (?<name>\w+)!").unwrap(); + let Some(caps) = re.captures("Hello Murphy!") else { + println!("no match!"); + return; + }; + println!("The name is: {}", &caps["name"]); +} +``` + +Fourth, run it with `cargo run`: + +```text +$ cargo run + Compiling memchr v2.5.0 + Compiling regex-syntax v0.7.1 + Compiling aho-corasick v1.0.1 + Compiling regex v1.8.1 + Compiling regex-example v0.1.0 (/tmp/regex-example) + Finished dev [unoptimized + debuginfo] target(s) in 4.22s + Running `target/debug/regex-example` +The name is: Murphy +``` + +The first time you run the program will show more output like above. But +subsequent runs shouldn't have to re-compile the dependencies. + +# Examples + +This section provides a few examples, in tutorial style, showing how to +search a haystack with a regex. There are more examples throughout the API +documentation. + +Before starting though, it's worth defining a few terms: + +* A **regex** is a Rust value whose type is `Regex`. We use `re` as a +variable name for a regex. +* A **pattern** is the string that is used to build a regex. We use `pat` as +a variable name for a pattern. +* A **haystack** is the string that is searched by a regex. We use `hay` as a +variable name for a haystack. + +Sometimes the words "regex" and "pattern" are used interchangeably. + +General use of regular expressions in this crate proceeds by compiling a +**pattern** into a **regex**, and then using that regex to search, split or +replace parts of a **haystack**. + +### Example: find a middle initial + +We'll start off with a very simple example: a regex that looks for a specific +name but uses a wildcard to match a middle initial. Our pattern serves as +something like a template that will match a particular name with *any* middle +initial. + +```rust +use regex::Regex; + +// We use 'unwrap()' here because it would be a bug in our program if the +// pattern failed to compile to a regex. Panicking in the presence of a bug +// is okay. +let re = Regex::new(r"Homer (.)\. Simpson").unwrap(); +let hay = "Homer J. Simpson"; +let Some(caps) = re.captures(hay) else { return }; +assert_eq!("J", &caps[1]); +``` + +There are a few things worth noticing here in our first example: + +* The `.` is a special pattern meta character that means "match any single +character except for new lines." (More precisely, in this crate, it means +"match any UTF-8 encoding of any Unicode scalar value other than `\n`.") +* We can match an actual `.` literally by escaping it, i.e., `\.`. +* We use Rust's [raw strings] to avoid needing to deal with escape sequences in +both the regex pattern syntax and in Rust's string literal syntax. If we didn't +use raw strings here, we would have had to use `\\.` to match a literal `.` +character. That is, `r"\."` and `"\\."` are equivalent patterns. +* We put our wildcard `.` instruction in parentheses. These parentheses have a +special meaning that says, "make whatever part of the haystack matches within +these parentheses available as a capturing group." After finding a match, we +access this capture group with `&caps[1]`. + +[raw strings]: https://doc.rust-lang.org/stable/reference/tokens.html#raw-string-literals + +Otherwise, we execute a search using `re.captures(hay)` and return from our +function if no match occurred. We then reference the middle initial by asking +for the part of the haystack that matched the capture group indexed at `1`. +(The capture group at index 0 is implicit and always corresponds to the entire +match. In this case, that's `Homer J. Simpson`.) + +### Example: named capture groups + +Continuing from our middle initial example above, we can tweak the pattern +slightly to give a name to the group that matches the middle initial: + +```rust +use regex::Regex; + +// Note that (?P<middle>.) is a different way to spell the same thing. +let re = Regex::new(r"Homer (?<middle>.)\. Simpson").unwrap(); +let hay = "Homer J. Simpson"; +let Some(caps) = re.captures(hay) else { return }; +assert_eq!("J", &caps["middle"]); +``` + +Giving a name to a group can be useful when there are multiple groups in +a pattern. It makes the code referring to those groups a bit easier to +understand. + +### Example: validating a particular date format + +This examples shows how to confirm whether a haystack, in its entirety, matches +a particular date format: + +```rust +use regex::Regex; + +let re = Regex::new(r"^\d{4}-\d{2}-\d{2}$").unwrap(); +assert!(re.is_match("2010-03-14")); +``` + +Notice the use of the `^` and `$` anchors. In this crate, every regex search is +run with an implicit `(?s:.)*?` at the beginning of its pattern, which allows +the regex to match anywhere in a haystack. Anchors, as above, can be used to +ensure that the full haystack matches a pattern. + +This crate is also Unicode aware by default, which means that `\d` might match +more than you might expect it to. For example: + +```rust +use regex::Regex; + +let re = Regex::new(r"^\d{4}-\d{2}-\d{2}$").unwrap(); +assert!(re.is_match("𝟚𝟘𝟙𝟘-𝟘𝟛-𝟙𝟜")); +``` + +To only match an ASCII decimal digit, all of the following are equivalent: + +* `[0-9]` +* `(?-u:\d)` +* `[[:digit:]]` +* `[\d&&\p{ascii}]` + +### Example: finding dates in a haystack + +In the previous example, we showed how one might validate that a haystack, +in its entirety, corresponded to a particular date format. But what if we wanted +to extract all things that look like dates in a specific format from a haystack? +To do this, we can use an iterator API to find all matches (notice that we've +removed the anchors and switched to looking for ASCII-only digits): + +```rust +use regex::Regex; + +let re = Regex::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}").unwrap(); +let hay = "What do 1865-04-14, 1881-07-02, 1901-09-06 and 1963-11-22 have in common?"; +// 'm' is a 'Match', and 'as_str()' returns the matching part of the haystack. +let dates: Vec<&str> = re.find_iter(hay).map(|m| m.as_str()).collect(); +assert_eq!(dates, vec![ + "1865-04-14", + "1881-07-02", + "1901-09-06", + "1963-11-22", +]); +``` + +We can also iterate over [`Captures`] values instead of [`Match`] values, and +that in turn permits accessing each component of the date via capturing groups: + +```rust +use regex::Regex; + +let re = Regex::new(r"(?<y>[0-9]{4})-(?<m>[0-9]{2})-(?<d>[0-9]{2})").unwrap(); +let hay = "What do 1865-04-14, 1881-07-02, 1901-09-06 and 1963-11-22 have in common?"; +// 'm' is a 'Match', and 'as_str()' returns the matching part of the haystack. +let dates: Vec<(&str, &str, &str)> = re.captures_iter(hay).map(|caps| { + // The unwraps are okay because every capture group must match if the whole + // regex matches, and in this context, we know we have a match. + // + // Note that we use `caps.name("y").unwrap().as_str()` instead of + // `&caps["y"]` because the lifetime of the former is the same as the + // lifetime of `hay` above, but the lifetime of the latter is tied to the + // lifetime of `caps` due to how the `Index` trait is defined. + let year = caps.name("y").unwrap().as_str(); + let month = caps.name("m").unwrap().as_str(); + let day = caps.name("d").unwrap().as_str(); + (year, month, day) +}).collect(); +assert_eq!(dates, vec![ + ("1865", "04", "14"), + ("1881", "07", "02"), + ("1901", "09", "06"), + ("1963", "11", "22"), +]); +``` + +### Example: simpler capture group extraction + +One can use [`Captures::extract`] to make the code from the previous example a +bit simpler in this case: + +```rust +use regex::Regex; + +let re = Regex::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})").unwrap(); +let hay = "What do 1865-04-14, 1881-07-02, 1901-09-06 and 1963-11-22 have in common?"; +let dates: Vec<(&str, &str, &str)> = re.captures_iter(hay).map(|caps| { + let (_, [year, month, day]) = caps.extract(); + (year, month, day) +}).collect(); +assert_eq!(dates, vec![ + ("1865", "04", "14"), + ("1881", "07", "02"), + ("1901", "09", "06"), + ("1963", "11", "22"), +]); +``` + +`Captures::extract` works by ensuring that the number of matching groups match +the number of groups requested via the `[year, month, day]` syntax. If they do, +then the substrings for each corresponding capture group are automatically +returned in an appropriately sized array. Rust's syntax for pattern matching +arrays does the rest. + +### Example: replacement with named capture groups + +Building on the previous example, perhaps we'd like to rearrange the date +formats. This can be done by finding each match and replacing it with +something different. The [`Regex::replace_all`] routine provides a convenient +way to do this, including by supporting references to named groups in the +replacement string: + +```rust +use regex::Regex; + +let re = Regex::new(r"(?<y>\d{4})-(?<m>\d{2})-(?<d>\d{2})").unwrap(); +let before = "1973-01-05, 1975-08-25 and 1980-10-18"; +let after = re.replace_all(before, "$m/$d/$y"); +assert_eq!(after, "01/05/1973, 08/25/1975 and 10/18/1980"); +``` + +The replace methods are actually polymorphic in the replacement, which +provides more flexibility than is seen here. (See the documentation for +[`Regex::replace`] for more details.) + +### Example: verbose mode + +When your regex gets complicated, you might consider using something other +than regex. But if you stick with regex, you can use the `x` flag to enable +insignificant whitespace mode or "verbose mode." In this mode, whitespace +is treated as insignificant and one may write comments. This may make your +patterns easier to comprehend. + +```rust +use regex::Regex; + +let re = Regex::new(r"(?x) + (?P<y>\d{4}) # the year, including all Unicode digits + - + (?P<m>\d{2}) # the month, including all Unicode digits + - + (?P<d>\d{2}) # the day, including all Unicode digits +").unwrap(); + +let before = "1973-01-05, 1975-08-25 and 1980-10-18"; +let after = re.replace_all(before, "$m/$d/$y"); +assert_eq!(after, "01/05/1973, 08/25/1975 and 10/18/1980"); +``` + +If you wish to match against whitespace in this mode, you can still use `\s`, +`\n`, `\t`, etc. For escaping a single space character, you can escape it +directly with `\ `, use its hex character code `\x20` or temporarily disable +the `x` flag, e.g., `(?-x: )`. + +### Example: match multiple regular expressions simultaneously + +This demonstrates how to use a [`RegexSet`] to match multiple (possibly +overlapping) regexes in a single scan of a haystack: + +```rust +use regex::RegexSet; + +let set = RegexSet::new(&[ + r"\w+", + r"\d+", + r"\pL+", + r"foo", + r"bar", + r"barfoo", + r"foobar", +]).unwrap(); + +// Iterate over and collect all of the matches. Each match corresponds to the +// ID of the matching pattern. +let matches: Vec<_> = set.matches("foobar").into_iter().collect(); +assert_eq!(matches, vec![0, 2, 3, 4, 6]); + +// You can also test whether a particular regex matched: +let matches = set.matches("foobar"); +assert!(!matches.matched(5)); +assert!(matches.matched(6)); +``` + +# Performance + +This section briefly discusses a few concerns regarding the speed and resource +usage of regexes. + +### Only ask for what you need + +When running a search with a regex, there are generally three different types +of information one can ask for: + +1. Does a regex match in a haystack? +2. Where does a regex match in a haystack? +3. Where do each of the capturing groups match in a haystack? + +Generally speaking, this crate could provide a function to answer only #3, +which would subsume #1 and #2 automatically. However, it can be significantly +more expensive to compute the location of capturing group matches, so it's best +not to do it if you don't need to. + +Therefore, only ask for what you need. For example, don't use [`Regex::find`] +if you only need to test if a regex matches a haystack. Use [`Regex::is_match`] +instead. + +### Unicode can impact memory usage and search speed + +This crate has first class support for Unicode and it is **enabled by default**. +In many cases, the extra memory required to support it will be negligible and +it typically won't impact search speed. But it can in some cases. + +With respect to memory usage, the impact of Unicode principally manifests +through the use of Unicode character classes. Unicode character classes +tend to be quite large. For example, `\w` by default matches around 140,000 +distinct codepoints. This requires additional memory, and tends to slow down +regex compilation. While a `\w` here and there is unlikely to be noticed, +writing `\w{100}` will for example result in quite a large regex by default. +Indeed, `\w` is considerably larger than its ASCII-only version, so if your +requirements are satisfied by ASCII, it's probably a good idea to stick to +ASCII classes. The ASCII-only version of `\w` can be spelled in a number of +ways. All of the following are equivalent: + +* `[0-9A-Za-z_]` +* `(?-u:\w)` +* `[[:word:]]` +* `[\w&&\p{ascii}]` + +With respect to search speed, Unicode tends to be handled pretty well, even when +using large Unicode character classes. However, some of the faster internal +regex engines cannot handle a Unicode aware word boundary assertion. So if you +don't need Unicode-aware word boundary assertions, you might consider using +`(?-u:\b)` instead of `\b`, where the former uses an ASCII-only definition of +a word character. + +### Literals might accelerate searches + +This crate tends to be quite good at recognizing literals in a regex pattern +and using them to accelerate a search. If it is at all possible to include +some kind of literal in your pattern, then it might make search substantially +faster. For example, in the regex `\w+@\w+`, the engine will look for +occurrences of `@` and then try a reverse match for `\w+` to find the start +position. + +### Avoid re-compiling regexes, especially in a loop + +It is an anti-pattern to compile the same pattern in a loop since regex +compilation is typically expensive. (It takes anywhere from a few microseconds +to a few **milliseconds** depending on the size of the pattern.) Not only is +compilation itself expensive, but this also prevents optimizations that reuse +allocations internally to the regex engine. + +In Rust, it can sometimes be a pain to pass regexes around if they're used from +inside a helper function. Instead, we recommend using crates like [`once_cell`] +and [`lazy_static`] to ensure that patterns are compiled exactly once. + +[`once_cell`]: https://crates.io/crates/once_cell +[`lazy_static`]: https://crates.io/crates/lazy_static + +This example shows how to use `once_cell`: + +```rust +use { + once_cell::sync::Lazy, + regex::Regex, +}; + +fn some_helper_function(haystack: &str) -> bool { + static RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"...").unwrap()); + RE.is_match(haystack) +} + +fn main() { + assert!(some_helper_function("abc")); + assert!(!some_helper_function("ac")); +} +``` + +Specifically, in this example, the regex will be compiled when it is used for +the first time. On subsequent uses, it will reuse the previously built `Regex`. +Notice how one can define the `Regex` locally to a specific function. + +### Sharing a regex across threads can result in contention + +While a single `Regex` can be freely used from multiple threads simultaneously, +there is a small synchronization cost that must be paid. Generally speaking, +one shouldn't expect to observe this unless the principal task in each thread +is searching with the regex *and* most searches are on short haystacks. In this +case, internal contention on shared resources can spike and increase latency, +which in turn may slow down each individual search. + +One can work around this by cloning each `Regex` before sending it to another +thread. The cloned regexes will still share the same internal read-only portion +of its compiled state (it's reference counted), but each thread will get +optimized access to the mutable space that is used to run a search. In general, +there is no additional cost in memory to doing this. The only cost is the added +code complexity required to explicitly clone the regex. (If you share the same +`Regex` across multiple threads, each thread still gets its own mutable space, +but accessing that space is slower.) + +# Unicode + +This section discusses what kind of Unicode support this regex library has. +Before showing some examples, we'll summarize the relevant points: + +* This crate almost fully implements "Basic Unicode Support" (Level 1) as +specified by the [Unicode Technical Standard #18][UTS18]. The full details +of what is supported are documented in [UNICODE.md] in the root of the regex +crate repository. There is virtually no support for "Extended Unicode Support" +(Level 2) from UTS#18. +* The top-level [`Regex`] runs searches *as if* iterating over each of the +codepoints in the haystack. That is, the fundamental atom of matching is a +single codepoint. +* [`bytes::Regex`], in contrast, permits disabling Unicode mode for part of all +of your pattern in all cases. When Unicode mode is disabled, then a search is +run *as if* iterating over each byte in the haystack. That is, the fundamental +atom of matching is a single byte. (A top-level `Regex` also permits disabling +Unicode and thus matching *as if* it were one byte at a time, but only when +doing so wouldn't permit matching invalid UTF-8.) +* When Unicode mode is enabled (the default), `.` will match an entire Unicode +scalar value, even when it is encoded using multiple bytes. When Unicode mode +is disabled (e.g., `(?-u:.)`), then `.` will match a single byte in all cases. +* The character classes `\w`, `\d` and `\s` are all Unicode-aware by default. +Use `(?-u:\w)`, `(?-u:\d)` and `(?-u:\s)` to get their ASCII-only definitions. +* Similarly, `\b` and `\B` use a Unicode definition of a "word" character. To +get ASCII-only word boundaries, use `(?-u:\b)` and `(?-u:\B)`. +* `^` and `$` are **not** Unicode-aware in multi-line mode. Namely, they only +recognize `\n` (assuming CRLF mode is not enabled) and not any of the other +forms of line terminators defined by Unicode. +* Case insensitive searching is Unicode-aware and uses simple case folding. +* Unicode general categories, scripts and many boolean properties are available +by default via the `\p{property name}` syntax. +* In all cases, matches are reported using byte offsets. Or more precisely, +UTF-8 code unit offsets. This permits constant time indexing and slicing of the +haystack. + +[UTS18]: https://unicode.org/reports/tr18/ +[UNICODE.md]: https://github.com/rust-lang/regex/blob/master/UNICODE.md + +Patterns themselves are **only** interpreted as a sequence of Unicode scalar +values. This means you can use Unicode characters directly in your pattern: + +```rust +use regex::Regex; + +let re = Regex::new(r"(?i)Δ+").unwrap(); +let m = re.find("ΔδΔ").unwrap(); +assert_eq!((0, 6), (m.start(), m.end())); +// alternatively: +assert_eq!(0..6, m.range()); +``` + +As noted above, Unicode general categories, scripts, script extensions, ages +and a smattering of boolean properties are available as character classes. For +example, you can match a sequence of numerals, Greek or Cherokee letters: + +```rust +use regex::Regex; + +let re = Regex::new(r"[\pN\p{Greek}\p{Cherokee}]+").unwrap(); +let m = re.find("abcΔᎠβⅠᏴγδⅡxyz").unwrap(); +assert_eq!(3..23, m.range()); +``` + +While not specific to Unicode, this library also supports character class set +operations. Namely, one can nest character classes arbitrarily and perform set +operations on them. Those set operations are union (the default), intersection, +difference and symmetric difference. These set operations tend to be most +useful with Unicode character classes. For example, to match any codepoint +that is both in the `Greek` script and in the `Letter` general category: + +```rust +use regex::Regex; + +let re = Regex::new(r"[\p{Greek}&&\pL]+").unwrap(); +let subs: Vec<&str> = re.find_iter("ΔδΔ𐅌ΔδΔ").map(|m| m.as_str()).collect(); +assert_eq!(subs, vec!["ΔδΔ", "ΔδΔ"]); + +// If we just matches on Greek, then all codepoints would match! +let re = Regex::new(r"\p{Greek}+").unwrap(); +let subs: Vec<&str> = re.find_iter("ΔδΔ𐅌ΔδΔ").map(|m| m.as_str()).collect(); +assert_eq!(subs, vec!["ΔδΔ𐅌ΔδΔ"]); +``` + +### Opt out of Unicode support + +The [`bytes::Regex`] type that can be used to search `&[u8]` haystacks. By +default, haystacks are conventionally treated as UTF-8 just like it is with the +main `Regex` type. However, this behavior can be disabled by turning off the +`u` flag, even if doing so could result in matching invalid UTF-8. For example, +when the `u` flag is disabled, `.` will match any byte instead of any Unicode +scalar value. + +Disabling the `u` flag is also possible with the standard `&str`-based `Regex` +type, but it is only allowed where the UTF-8 invariant is maintained. For +example, `(?-u:\w)` is an ASCII-only `\w` character class and is legal in an +`&str`-based `Regex`, but `(?-u:\W)` will attempt to match *any byte* that +isn't in `(?-u:\w)`, which in turn includes bytes that are invalid UTF-8. +Similarly, `(?-u:\xFF)` will attempt to match the raw byte `\xFF` (instead of +`U+00FF`), which is invalid UTF-8 and therefore is illegal in `&str`-based +regexes. + +Finally, since Unicode support requires bundling large Unicode data +tables, this crate exposes knobs to disable the compilation of those +data tables, which can be useful for shrinking binary size and reducing +compilation times. For details on how to do that, see the section on [crate +features](#crate-features). + +# Syntax + +The syntax supported in this crate is documented below. + +Note that the regular expression parser and abstract syntax are exposed in +a separate crate, [`regex-syntax`](https://docs.rs/regex-syntax). + +### Matching one character + +<pre class="rust"> +. any character except new line (includes new line with s flag) +[0-9] any ASCII digit +\d digit (\p{Nd}) +\D not digit +\pX Unicode character class identified by a one-letter name +\p{Greek} Unicode character class (general category or script) +\PX Negated Unicode character class identified by a one-letter name +\P{Greek} negated Unicode character class (general category or script) +</pre> + +### Character classes + +<pre class="rust"> +[xyz] A character class matching either x, y or z (union). +[^xyz] A character class matching any character except x, y and z. +[a-z] A character class matching any character in range a-z. +[[:alpha:]] ASCII character class ([A-Za-z]) +[[:^alpha:]] Negated ASCII character class ([^A-Za-z]) +[x[^xyz]] Nested/grouping character class (matching any character except y and z) +[a-y&&xyz] Intersection (matching x or y) +[0-9&&[^4]] Subtraction using intersection and negation (matching 0-9 except 4) +[0-9--4] Direct subtraction (matching 0-9 except 4) +[a-g~~b-h] Symmetric difference (matching `a` and `h` only) +[\[\]] Escaping in character classes (matching [ or ]) +[a&&b] An empty character class matching nothing +</pre> + +Any named character class may appear inside a bracketed `[...]` character +class. For example, `[\p{Greek}[:digit:]]` matches any ASCII digit or any +codepoint in the `Greek` script. `[\p{Greek}&&\pL]` matches Greek letters. + +Precedence in character classes, from most binding to least: + +1. Ranges: `[a-cd]` == `[[a-c]d]` +2. Union: `[ab&&bc]` == `[[ab]&&[bc]]` +3. Intersection, difference, symmetric difference. All three have equivalent +precedence, and are evaluated in left-to-right order. For example, +`[\pL--\p{Greek}&&\p{Uppercase}]` == `[[\pL--\p{Greek}]&&\p{Uppercase}]`. +4. Negation: `[^a-z&&b]` == `[^[a-z&&b]]`. + +### Composites + +<pre class="rust"> +xy concatenation (x followed by y) +x|y alternation (x or y, prefer x) +</pre> + +This example shows how an alternation works, and what it means to prefer a +branch in the alternation over subsequent branches. + +``` +use regex::Regex; + +let haystack = "samwise"; +// If 'samwise' comes first in our alternation, then it is +// preferred as a match, even if the regex engine could +// technically detect that 'sam' led to a match earlier. +let re = Regex::new(r"samwise|sam").unwrap(); +assert_eq!("samwise", re.find(haystack).unwrap().as_str()); +// But if 'sam' comes first, then it will match instead. +// In this case, it is impossible for 'samwise' to match +// because 'sam' is a prefix of it. +let re = Regex::new(r"sam|samwise").unwrap(); +assert_eq!("sam", re.find(haystack).unwrap().as_str()); +``` + +### Repetitions + +<pre class="rust"> +x* zero or more of x (greedy) +x+ one or more of x (greedy) +x? zero or one of x (greedy) +x*? zero or more of x (ungreedy/lazy) +x+? one or more of x (ungreedy/lazy) +x?? zero or one of x (ungreedy/lazy) +x{n,m} at least n x and at most m x (greedy) +x{n,} at least n x (greedy) +x{n} exactly n x +x{n,m}? at least n x and at most m x (ungreedy/lazy) +x{n,}? at least n x (ungreedy/lazy) +x{n}? exactly n x +</pre> + +### Empty matches + +<pre class="rust"> +^ the beginning of a haystack (or start-of-line with multi-line mode) +$ the end of a haystack (or end-of-line with multi-line mode) +\A only the beginning of a haystack (even with multi-line mode enabled) +\z only the end of a haystack (even with multi-line mode enabled) +\b a Unicode word boundary (\w on one side and \W, \A, or \z on other) +\B not a Unicode word boundary +</pre> + +The empty regex is valid and matches the empty string. For example, the +empty regex matches `abc` at positions `0`, `1`, `2` and `3`. When using the +top-level [`Regex`] on `&str` haystacks, an empty match that splits a codepoint +is guaranteed to never be returned. However, such matches are permitted when +using a [`bytes::Regex`]. For example: + +```rust +let re = regex::Regex::new(r"").unwrap(); +let ranges: Vec<_> = re.find_iter("💩").map(|m| m.range()).collect(); +assert_eq!(ranges, vec![0..0, 4..4]); + +let re = regex::bytes::Regex::new(r"").unwrap(); +let ranges: Vec<_> = re.find_iter("💩".as_bytes()).map(|m| m.range()).collect(); +assert_eq!(ranges, vec![0..0, 1..1, 2..2, 3..3, 4..4]); +``` + +Note that an empty regex is distinct from a regex that can never match. +For example, the regex `[a&&b]` is a character class that represents the +intersection of `a` and `b`. That intersection is empty, which means the +character class is empty. Since nothing is in the empty set, `[a&&b]` matches +nothing, not even the empty string. + +### Grouping and flags + +<pre class="rust"> +(exp) numbered capture group (indexed by opening parenthesis) +(?P<name>exp) named (also numbered) capture group (names must be alpha-numeric) +(?<name>exp) named (also numbered) capture group (names must be alpha-numeric) +(?:exp) non-capturing group +(?flags) set flags within current group +(?flags:exp) set flags for exp (non-capturing) +</pre> + +Capture group names must be any sequence of alpha-numeric Unicode codepoints, +in addition to `.`, `_`, `[` and `]`. Names must start with either an `_` or +an alphabetic codepoint. Alphabetic codepoints correspond to the `Alphabetic` +Unicode property, while numeric codepoints correspond to the union of the +`Decimal_Number`, `Letter_Number` and `Other_Number` general categories. + +Flags are each a single character. For example, `(?x)` sets the flag `x` +and `(?-x)` clears the flag `x`. Multiple flags can be set or cleared at +the same time: `(?xy)` sets both the `x` and `y` flags and `(?x-y)` sets +the `x` flag and clears the `y` flag. + +All flags are by default disabled unless stated otherwise. They are: + +<pre class="rust"> +i case-insensitive: letters match both upper and lower case +m multi-line mode: ^ and $ match begin/end of line +s allow . to match \n +R enables CRLF mode: when multi-line mode is enabled, \r\n is used +U swap the meaning of x* and x*? +u Unicode support (enabled by default) +x verbose mode, ignores whitespace and allow line comments (starting with `#`) +</pre> + +Note that in verbose mode, whitespace is ignored everywhere, including within +character classes. To insert whitespace, use its escaped form or a hex literal. +For example, `\ ` or `\x20` for an ASCII space. + +Flags can be toggled within a pattern. Here's an example that matches +case-insensitively for the first part but case-sensitively for the second part: + +```rust +use regex::Regex; + +let re = Regex::new(r"(?i)a+(?-i)b+").unwrap(); +let m = re.find("AaAaAbbBBBb").unwrap(); +assert_eq!(m.as_str(), "AaAaAbb"); +``` + +Notice that the `a+` matches either `a` or `A`, but the `b+` only matches +`b`. + +Multi-line mode means `^` and `$` no longer match just at the beginning/end of +the input, but also at the beginning/end of lines: + +``` +use regex::Regex; + +let re = Regex::new(r"(?m)^line \d+").unwrap(); +let m = re.find("line one\nline 2\n").unwrap(); +assert_eq!(m.as_str(), "line 2"); +``` + +Note that `^` matches after new lines, even at the end of input: + +``` +use regex::Regex; + +let re = Regex::new(r"(?m)^").unwrap(); +let m = re.find_iter("test\n").last().unwrap(); +assert_eq!((m.start(), m.end()), (5, 5)); +``` + +When both CRLF mode and multi-line mode are enabled, then `^` and `$` will +match either `\r` and `\n`, but never in the middle of a `\r\n`: + +``` +use regex::Regex; + +let re = Regex::new(r"(?mR)^foo$").unwrap(); +let m = re.find("\r\nfoo\r\n").unwrap(); +assert_eq!(m.as_str(), "foo"); +``` + +Unicode mode can also be selectively disabled, although only when the result +*would not* match invalid UTF-8. One good example of this is using an ASCII +word boundary instead of a Unicode word boundary, which might make some regex +searches run faster: + +```rust +use regex::Regex; + +let re = Regex::new(r"(?-u:\b).+(?-u:\b)").unwrap(); +let m = re.find("$$abc$$").unwrap(); +assert_eq!(m.as_str(), "abc"); +``` + +### Escape sequences + +Note that this includes all possible escape sequences, even ones that are +documented elsewhere. + +<pre class="rust"> +\* literal *, applies to all ASCII except [0-9A-Za-z<>] +\a bell (\x07) +\f form feed (\x0C) +\t horizontal tab +\n new line +\r carriage return +\v vertical tab (\x0B) +\A matches at the beginning of a haystack +\z matches at the end of a haystack +\b word boundary assertion +\B negated word boundary assertion +\123 octal character code, up to three digits (when enabled) +\x7F hex character code (exactly two digits) +\x{10FFFF} any hex character code corresponding to a Unicode code point +\u007F hex character code (exactly four digits) +\u{7F} any hex character code corresponding to a Unicode code point +\U0000007F hex character code (exactly eight digits) +\U{7F} any hex character code corresponding to a Unicode code point +\p{Letter} Unicode character class +\P{Letter} negated Unicode character class +\d, \s, \w Perl character class +\D, \S, \W negated Perl character class +</pre> + +### Perl character classes (Unicode friendly) + +These classes are based on the definitions provided in +[UTS#18](https://www.unicode.org/reports/tr18/#Compatibility_Properties): + +<pre class="rust"> +\d digit (\p{Nd}) +\D not digit +\s whitespace (\p{White_Space}) +\S not whitespace +\w word character (\p{Alphabetic} + \p{M} + \d + \p{Pc} + \p{Join_Control}) +\W not word character +</pre> + +### ASCII character classes + +These classes are based on the definitions provided in +[UTS#18](https://www.unicode.org/reports/tr18/#Compatibility_Properties): + +<pre class="rust"> +[[:alnum:]] alphanumeric ([0-9A-Za-z]) +[[:alpha:]] alphabetic ([A-Za-z]) +[[:ascii:]] ASCII ([\x00-\x7F]) +[[:blank:]] blank ([\t ]) +[[:cntrl:]] control ([\x00-\x1F\x7F]) +[[:digit:]] digits ([0-9]) +[[:graph:]] graphical ([!-~]) +[[:lower:]] lower case ([a-z]) +[[:print:]] printable ([ -~]) +[[:punct:]] punctuation ([!-/:-@\[-`{-~]) +[[:space:]] whitespace ([\t\n\v\f\r ]) +[[:upper:]] upper case ([A-Z]) +[[:word:]] word characters ([0-9A-Za-z_]) +[[:xdigit:]] hex digit ([0-9A-Fa-f]) +</pre> + +# Untrusted input + +This crate is meant to be able to run regex searches on untrusted haystacks +without fear of [ReDoS]. This crate also, to a certain extent, supports +untrusted patterns. + +[ReDoS]: https://en.wikipedia.org/wiki/ReDoS + +This crate differs from most (but not all) other regex engines in that it +doesn't use unbounded backtracking to run a regex search. In those cases, +one generally cannot use untrusted patterns *or* untrusted haystacks because +it can be very difficult to know whether a particular pattern will result in +catastrophic backtracking or not. + +We'll first discuss how this crate deals with untrusted inputs and then wrap +it up with a realistic discussion about what practice really looks like. + +### Panics + +Outside of clearly documented cases, most APIs in this crate are intended to +never panic regardless of the inputs given to them. For example, `Regex::new`, +`Regex::is_match`, `Regex::find` and `Regex::captures` should never panic. That +is, it is an API promise that those APIs will never panic no matter what inputs +are given to them. With that said, regex engines are complicated beasts, and +providing a rock solid guarantee that these APIs literally never panic is +essentially equivalent to saying, "there are no bugs in this library." That is +a bold claim, and not really one that can be feasibly made with a straight +face. + +Don't get the wrong impression here. This crate is extensively tested, not just +with unit and integration tests, but also via fuzz testing. For example, this +crate is part of the [OSS-fuzz project]. Panics should be incredibly rare, but +it is possible for bugs to exist, and thus possible for a panic to occur. If +you need a rock solid guarantee against panics, then you should wrap calls into +this library with [`std::panic::catch_unwind`]. + +It's also worth pointing out that this library will *generally* panic when +other regex engines would commit undefined behavior. When undefined behavior +occurs, your program might continue as if nothing bad has happened, but it also +might mean your program is open to the worst kinds of exploits. In contrast, +the worst thing a panic can do is a denial of service. + +[OSS-fuzz project]: https://android.googlesource.com/platform/external/oss-fuzz/+/refs/tags/android-t-preview-1/projects/rust-regex/ +[`std::panic::catch_unwind`]: https://doc.rust-lang.org/std/panic/fn.catch_unwind.html + +### Untrusted patterns + +The principal way this crate deals with them is by limiting their size by +default. The size limit can be configured via [`RegexBuilder::size_limit`]. The +idea of a size limit is that compiling a pattern into a `Regex` will fail if it +becomes "too big." Namely, while *most* resources consumed by compiling a regex +are approximately proportional (albeit with some high constant factors in some +cases, such as with Unicode character classes) to the length of the pattern +itself, there is one particular exception to this: counted repetitions. Namely, +this pattern: + +```text +a{5}{5}{5}{5}{5}{5} +``` + +Is equivalent to this pattern: + +```text +a{15625} +``` + +In both of these cases, the actual pattern string is quite small, but the +resulting `Regex` value is quite large. Indeed, as the first pattern shows, +it isn't enough to locally limit the size of each repetition because they can +be stacked in a way that results in exponential growth. + +To provide a bit more context, a simplified view of regex compilation looks +like this: + +* The pattern string is parsed into a structured representation called an AST. +Counted repetitions are not expanded and Unicode character classes are not +looked up in this stage. That is, the size of the AST is proportional to the +size of the pattern with "reasonable" constant factors. In other words, one +can reasonably limit the memory used by an AST by limiting the length of the +pattern string. +* The AST is translated into an HIR. Counted repetitions are still *not* +expanded at this stage, but Unicode character classes are embedded into the +HIR. The memory usage of a HIR is still proportional to the length of the +original pattern string, but the constant factors---mostly as a result of +Unicode character classes---can be quite high. Still though, the memory used by +an HIR can be reasonably limited by limiting the length of the pattern string. +* The HIR is compiled into a [Thompson NFA]. This is the stage at which +something like `\w{5}` is rewritten to `\w\w\w\w\w`. Thus, this is the stage +at which [`RegexBuilder::size_limit`] is enforced. If the NFA exceeds the +configured size, then this stage will fail. + +[Thompson NFA]: https://en.wikipedia.org/wiki/Thompson%27s_construction + +The size limit helps avoid two different kinds of exorbitant resource usage: + +* It avoids permitting exponential memory usage based on the size of the +pattern string. +* It avoids long search times. This will be discussed in more detail in the +next section, but worst case search time *is* dependent on the size of the +regex. So keeping regexes limited to a reasonable size is also a way of keeping +search times reasonable. + +Finally, it's worth pointing out that regex compilation is guaranteed to take +worst case `O(m)` time, where `m` is proportional to the size of regex. The +size of the regex here is *after* the counted repetitions have been expanded. + +**Advice for those using untrusted regexes**: limit the pattern length to +something small and expand it as needed. Configure [`RegexBuilder::size_limit`] +to something small and then expand it as needed. + +### Untrusted haystacks + +The main way this crate guards against searches from taking a long time is by +using algorithms that guarantee a `O(m * n)` worst case time and space bound. +Namely: + +* `m` is proportional to the size of the regex, where the size of the regex +includes the expansion of all counted repetitions. (See the previous section on +untrusted patterns.) +* `n` is proportional to the length, in bytes, of the haystack. + +In other words, if you consider `m` to be a constant (for example, the regex +pattern is a literal in the source code), then the search can be said to run +in "linear time." Or equivalently, "linear time with respect to the size of the +haystack." + +But the `m` factor here is important not to ignore. If a regex is +particularly big, the search times can get quite slow. This is why, in part, +[`RegexBuilder::size_limit`] exists. + +**Advice for those searching untrusted haystacks**: As long as your regexes +are not enormous, you should expect to be able to search untrusted haystacks +without fear. If you aren't sure, you should benchmark it. Unlike backtracking +engines, if your regex is so big that it's likely to result in slow searches, +this is probably something you'll be able to observe regardless of what the +haystack is made up of. + +### Iterating over matches + +One thing that is perhaps easy to miss is that the worst case time +complexity bound of `O(m * n)` applies to methods like [`Regex::is_match`], +[`Regex::find`] and [`Regex::captures`]. It does **not** apply to +[`Regex::find_iter`] or [`Regex::captures_iter`]. Namely, since iterating over +all matches can execute many searches, and each search can scan the entire +haystack, the worst case time complexity for iterators is `O(m * n^2)`. + +One example of where this occurs is when a pattern consists of an alternation, +where an earlier branch of the alternation requires scanning the entire +haystack only to discover that there is no match. It also requires a later +branch of the alternation to have matched at the beginning of the search. For +example, consider the pattern `.*[^A-Z]|[A-Z]` and the haystack `AAAAA`. The +first search will scan to the end looking for matches of `.*[^A-Z]` even though +a finite automata engine (as in this crate) knows that `[A-Z]` has already +matched the first character of the haystack. This is due to the greedy nature +of regex searching. That first search will report a match at the first `A` only +after scanning to the end to discover that no other match exists. The next +search then begins at the second `A` and the behavior repeats. + +There is no way to avoid this. This means that if both patterns and haystacks +are untrusted and you're iterating over all matches, you're susceptible to +worst case quadratic time complexity. One possible way to mitigate this +is to drop down to the lower level `regex-automata` crate and use its +`meta::Regex` iterator APIs. There, you can configure the search to operate +in "earliest" mode by passing a `Input::new(haystack).earliest(true)` to +`meta::Regex::find_iter` (for example). By enabling this mode, you give up +the normal greedy match semantics of regex searches and instead ask the regex +engine to immediately stop as soon as a match has been found. Enabling this +mode will thus restore the worst case `O(m * n)` time complexity bound, but at +the cost of different semantics. + +### Untrusted inputs in practice + +While providing a `O(m * n)` worst case time bound on all searches goes a long +way toward preventing [ReDoS], that doesn't mean every search you can possibly +run will complete without burning CPU time. In general, there are a few ways +for the `m * n` time bound to still bite you: + +* You are searching an exceptionally long haystack. No matter how you slice +it, a longer haystack will take more time to search. This crate may often make +very quick work of even long haystacks because of its literal optimizations, +but those aren't available for all regexes. +* Unicode character classes can cause searches to be quite slow in some cases. +This is especially true when they are combined with counted repetitions. While +the regex size limit above will protect you from the most egregious cases, +the default size limit still permits pretty big regexes that can execute more +slowly than one might expect. +* While routines like [`Regex::find`] and [`Regex::captures`] guarantee +worst case `O(m * n)` search time, routines like [`Regex::find_iter`] and +[`Regex::captures_iter`] actually have worst case `O(m * n^2)` search time. +This is because `find_iter` runs many searches, and each search takes worst +case `O(m * n)` time. Thus, iteration of all matches in a haystack has +worst case `O(m * n^2)`. A good example of a pattern that exhibits this is +`(?:A+){1000}|` or even `.*[^A-Z]|[A-Z]`. + +In general, unstrusted haystacks are easier to stomach than untrusted patterns. +Untrusted patterns give a lot more control to the caller to impact the +performance of a search. In many cases, a regex search will actually execute in +average case `O(n)` time (i.e., not dependent on the size of the regex), but +this can't be guaranteed in general. Therefore, permitting untrusted patterns +means that your only line of defense is to put a limit on how big `m` (and +perhaps also `n`) can be in `O(m * n)`. `n` is limited by simply inspecting +the length of the haystack while `m` is limited by *both* applying a limit to +the length of the pattern *and* a limit on the compiled size of the regex via +[`RegexBuilder::size_limit`]. + +It bears repeating: if you're accepting untrusted patterns, it would be a good +idea to start with conservative limits on `m` and `n`, and then carefully +increase them as needed. + +# Crate features + +By default, this crate tries pretty hard to make regex matching both as fast +as possible and as correct as it can be. This means that there is a lot of +code dedicated to performance, the handling of Unicode data and the Unicode +data itself. Overall, this leads to more dependencies, larger binaries and +longer compile times. This trade off may not be appropriate in all cases, and +indeed, even when all Unicode and performance features are disabled, one is +still left with a perfectly serviceable regex engine that will work well in +many cases. (Note that code is not arbitrarily reducible, and for this reason, +the [`regex-lite`](https://docs.rs/regex-lite) crate exists to provide an even +more minimal experience by cutting out Unicode and performance, but still +maintaining the linear search time bound.) + +This crate exposes a number of features for controlling that trade off. Some +of these features are strictly performance oriented, such that disabling them +won't result in a loss of functionality, but may result in worse performance. +Other features, such as the ones controlling the presence or absence of Unicode +data, can result in a loss of functionality. For example, if one disables the +`unicode-case` feature (described below), then compiling the regex `(?i)a` +will fail since Unicode case insensitivity is enabled by default. Instead, +callers must use `(?i-u)a` to disable Unicode case folding. Stated differently, +enabling or disabling any of the features below can only add or subtract from +the total set of valid regular expressions. Enabling or disabling a feature +will never modify the match semantics of a regular expression. + +Most features below are enabled by default. Features that aren't enabled by +default are noted. + +### Ecosystem features + +* **std** - + When enabled, this will cause `regex` to use the standard library. In terms + of APIs, `std` causes error types to implement the `std::error::Error` + trait. Enabling `std` will also result in performance optimizations, + including SIMD and faster synchronization primitives. Notably, **disabling + the `std` feature will result in the use of spin locks**. To use a regex + engine without `std` and without spin locks, you'll need to drop down to + the [`regex-automata`](https://docs.rs/regex-automata) crate. +* **logging** - + When enabled, the `log` crate is used to emit messages about regex + compilation and search strategies. This is **disabled by default**. This is + typically only useful to someone working on this crate's internals, but might + be useful if you're doing some rabbit hole performance hacking. Or if you're + just interested in the kinds of decisions being made by the regex engine. + +### Performance features + +* **perf** - + Enables all performance related features except for `perf-dfa-full`. This + feature is enabled by default is intended to cover all reasonable features + that improve performance, even if more are added in the future. +* **perf-dfa** - + Enables the use of a lazy DFA for matching. The lazy DFA is used to compile + portions of a regex to a very fast DFA on an as-needed basis. This can + result in substantial speedups, usually by an order of magnitude on large + haystacks. The lazy DFA does not bring in any new dependencies, but it can + make compile times longer. +* **perf-dfa-full** - + Enables the use of a full DFA for matching. Full DFAs are problematic because + they have worst case `O(2^n)` construction time. For this reason, when this + feature is enabled, full DFAs are only used for very small regexes and a + very small space bound is used during determinization to avoid the DFA + from blowing up. This feature is not enabled by default, even as part of + `perf`, because it results in fairly sizeable increases in binary size and + compilation time. It can result in faster search times, but they tend to be + more modest and limited to non-Unicode regexes. +* **perf-onepass** - + Enables the use of a one-pass DFA for extracting the positions of capture + groups. This optimization applies to a subset of certain types of NFAs and + represents the fastest engine in this crate for dealing with capture groups. +* **perf-backtrack** - + Enables the use of a bounded backtracking algorithm for extracting the + positions of capture groups. This usually sits between the slowest engine + (the PikeVM) and the fastest engine (one-pass DFA) for extracting capture + groups. It's used whenever the regex is not one-pass and is small enough. +* **perf-inline** - + Enables the use of aggressive inlining inside match routines. This reduces + the overhead of each match. The aggressive inlining, however, increases + compile times and binary size. +* **perf-literal** - + Enables the use of literal optimizations for speeding up matches. In some + cases, literal optimizations can result in speedups of _several_ orders of + magnitude. Disabling this drops the `aho-corasick` and `memchr` dependencies. +* **perf-cache** - + This feature used to enable a faster internal cache at the cost of using + additional dependencies, but this is no longer an option. A fast internal + cache is now used unconditionally with no additional dependencies. This may + change in the future. + +### Unicode features + +* **unicode** - + Enables all Unicode features. This feature is enabled by default, and will + always cover all Unicode features, even if more are added in the future. +* **unicode-age** - + Provide the data for the + [Unicode `Age` property](https://www.unicode.org/reports/tr44/tr44-24.html#Character_Age). + This makes it possible to use classes like `\p{Age:6.0}` to refer to all + codepoints first introduced in Unicode 6.0 +* **unicode-bool** - + Provide the data for numerous Unicode boolean properties. The full list + is not included here, but contains properties like `Alphabetic`, `Emoji`, + `Lowercase`, `Math`, `Uppercase` and `White_Space`. +* **unicode-case** - + Provide the data for case insensitive matching using + [Unicode's "simple loose matches" specification](https://www.unicode.org/reports/tr18/#Simple_Loose_Matches). +* **unicode-gencat** - + Provide the data for + [Unicode general categories](https://www.unicode.org/reports/tr44/tr44-24.html#General_Category_Values). + This includes, but is not limited to, `Decimal_Number`, `Letter`, + `Math_Symbol`, `Number` and `Punctuation`. +* **unicode-perl** - + Provide the data for supporting the Unicode-aware Perl character classes, + corresponding to `\w`, `\s` and `\d`. This is also necessary for using + Unicode-aware word boundary assertions. Note that if this feature is + disabled, the `\s` and `\d` character classes are still available if the + `unicode-bool` and `unicode-gencat` features are enabled, respectively. +* **unicode-script** - + Provide the data for + [Unicode scripts and script extensions](https://www.unicode.org/reports/tr24/). + This includes, but is not limited to, `Arabic`, `Cyrillic`, `Hebrew`, + `Latin` and `Thai`. +* **unicode-segment** - + Provide the data necessary to provide the properties used to implement the + [Unicode text segmentation algorithms](https://www.unicode.org/reports/tr29/). + This enables using classes like `\p{gcb=Extend}`, `\p{wb=Katakana}` and + `\p{sb=ATerm}`. + +# Other crates + +This crate has two required dependencies and several optional dependencies. +This section briefly describes them with the goal of raising awareness of how +different components of this crate may be used independently. + +It is somewhat unusual for a regex engine to have dependencies, as most regex +libraries are self contained units with no dependencies other than a particular +environment's standard library. Indeed, for other similarly optimized regex +engines, most or all of the code in the dependencies of this crate would +normally just be unseparable or coupled parts of the crate itself. But since +Rust and its tooling ecosystem make the use of dependencies so easy, it made +sense to spend some effort de-coupling parts of this crate and making them +independently useful. + +We only briefly describe each crate here. + +* [`regex-lite`](https://docs.rs/regex-lite) is not a dependency of `regex`, +but rather, a standalone zero-dependency simpler version of `regex` that +prioritizes compile times and binary size. In exchange, it eschews Unicode +support and performance. Its match semantics are as identical as possible to +the `regex` crate, and for the things it supports, its APIs are identical to +the APIs in this crate. In other words, for a lot of use cases, it is a drop-in +replacement. +* [`regex-syntax`](https://docs.rs/regex-syntax) provides a regular expression +parser via `Ast` and `Hir` types. It also provides routines for extracting +literals from a pattern. Folks can use this crate to do analysis, or even to +build their own regex engine without having to worry about writing a parser. +* [`regex-automata`](https://docs.rs/regex-automata) provides the regex engines +themselves. One of the downsides of finite automata based regex engines is that +they often need multiple internal engines in order to have similar or better +performance than an unbounded backtracking engine in practice. `regex-automata` +in particular provides public APIs for a PikeVM, a bounded backtracker, a +one-pass DFA, a lazy DFA, a fully compiled DFA and a meta regex engine that +combines all them together. It also has native multi-pattern support and +provides a way to compile and serialize full DFAs such that they can be loaded +and searched in a no-std no-alloc environment. `regex-automata` itself doesn't +even have a required dependency on `regex-syntax`! +* [`memchr`](https://docs.rs/memchr) provides low level SIMD vectorized +routines for quickly finding the location of single bytes or even substrings +in a haystack. In other words, it provides fast `memchr` and `memmem` routines. +These are used by this crate in literal optimizations. +* [`aho-corasick`](https://docs.rs/aho-corasick) provides multi-substring +search. It also provides SIMD vectorized routines in the case where the number +of substrings to search for is relatively small. The `regex` crate also uses +this for literal optimizations. +*/ + +#![no_std] +#![deny(missing_docs)] +#![cfg_attr(feature = "pattern", feature(pattern))] +#![warn(missing_debug_implementations)] + +#[cfg(doctest)] +doc_comment::doctest!("../README.md"); + +extern crate alloc; +#[cfg(any(test, feature = "std"))] +extern crate std; + +pub use crate::error::Error; + +pub use crate::{builders::string::*, regex::string::*, regexset::string::*}; + +mod builders; +pub mod bytes; +mod error; +mod find_byte; +#[cfg(feature = "pattern")] +mod pattern; +mod regex; +mod regexset; + +/// Escapes all regular expression meta characters in `pattern`. +/// +/// The string returned may be safely used as a literal in a regular +/// expression. +pub fn escape(pattern: &str) -> alloc::string::String { + regex_syntax::escape(pattern) +} diff --git a/third_party/rust/regex/src/pattern.rs b/third_party/rust/regex/src/pattern.rs new file mode 100644 index 0000000000..2db04d8b35 --- /dev/null +++ b/third_party/rust/regex/src/pattern.rs @@ -0,0 +1,63 @@ +use core::str::pattern::{Pattern, SearchStep, Searcher}; + +use crate::{Matches, Regex}; + +#[derive(Debug)] +pub struct RegexSearcher<'r, 't> { + haystack: &'t str, + it: Matches<'r, 't>, + last_step_end: usize, + next_match: Option<(usize, usize)>, +} + +impl<'r, 't> Pattern<'t> for &'r Regex { + type Searcher = RegexSearcher<'r, 't>; + + fn into_searcher(self, haystack: &'t str) -> RegexSearcher<'r, 't> { + RegexSearcher { + haystack, + it: self.find_iter(haystack), + last_step_end: 0, + next_match: None, + } + } +} + +unsafe impl<'r, 't> Searcher<'t> for RegexSearcher<'r, 't> { + #[inline] + fn haystack(&self) -> &'t str { + self.haystack + } + + #[inline] + fn next(&mut self) -> SearchStep { + if let Some((s, e)) = self.next_match { + self.next_match = None; + self.last_step_end = e; + return SearchStep::Match(s, e); + } + match self.it.next() { + None => { + if self.last_step_end < self.haystack().len() { + let last = self.last_step_end; + self.last_step_end = self.haystack().len(); + SearchStep::Reject(last, self.haystack().len()) + } else { + SearchStep::Done + } + } + Some(m) => { + let (s, e) = (m.start(), m.end()); + if s == self.last_step_end { + self.last_step_end = e; + SearchStep::Match(s, e) + } else { + self.next_match = Some((s, e)); + let last = self.last_step_end; + self.last_step_end = s; + SearchStep::Reject(last, s) + } + } + } + } +} diff --git a/third_party/rust/regex/src/regex/bytes.rs b/third_party/rust/regex/src/regex/bytes.rs new file mode 100644 index 0000000000..cc53482cbd --- /dev/null +++ b/third_party/rust/regex/src/regex/bytes.rs @@ -0,0 +1,2597 @@ +use alloc::{borrow::Cow, string::String, sync::Arc, vec::Vec}; + +use regex_automata::{meta, util::captures, Input, PatternID}; + +use crate::{bytes::RegexBuilder, error::Error}; + +/// A compiled regular expression for searching Unicode haystacks. +/// +/// A `Regex` can be used to search haystacks, split haystacks into substrings +/// or replace substrings in a haystack with a different substring. All +/// searching is done with an implicit `(?s:.)*?` at the beginning and end of +/// an pattern. To force an expression to match the whole string (or a prefix +/// or a suffix), you must use an anchor like `^` or `$` (or `\A` and `\z`). +/// +/// Like the `Regex` type in the parent module, matches with this regex return +/// byte offsets into the haystack. **Unlike** the parent `Regex` type, these +/// byte offsets may not correspond to UTF-8 sequence boundaries since the +/// regexes in this module can match arbitrary bytes. +/// +/// The only methods that allocate new byte strings are the string replacement +/// methods. All other methods (searching and splitting) return borrowed +/// references into the haystack given. +/// +/// # Example +/// +/// Find the offsets of a US phone number: +/// +/// ``` +/// use regex::bytes::Regex; +/// +/// let re = Regex::new("[0-9]{3}-[0-9]{3}-[0-9]{4}").unwrap(); +/// let m = re.find(b"phone: 111-222-3333").unwrap(); +/// assert_eq!(7..19, m.range()); +/// ``` +/// +/// # Example: extracting capture groups +/// +/// A common way to use regexes is with capture groups. That is, instead of +/// just looking for matches of an entire regex, parentheses are used to create +/// groups that represent part of the match. +/// +/// For example, consider a haystack with multiple lines, and each line has +/// three whitespace delimited fields where the second field is expected to be +/// a number and the third field a boolean. To make this convenient, we use +/// the [`Captures::extract`] API to put the strings that match each group +/// into a fixed size array: +/// +/// ``` +/// use regex::bytes::Regex; +/// +/// let hay = b" +/// rabbit 54 true +/// groundhog 2 true +/// does not match +/// fox 109 false +/// "; +/// let re = Regex::new(r"(?m)^\s*(\S+)\s+([0-9]+)\s+(true|false)\s*$").unwrap(); +/// let mut fields: Vec<(&[u8], i64, bool)> = vec![]; +/// for (_, [f1, f2, f3]) in re.captures_iter(hay).map(|caps| caps.extract()) { +/// // These unwraps are OK because our pattern is written in a way where +/// // all matches for f2 and f3 will be valid UTF-8. +/// let f2 = std::str::from_utf8(f2).unwrap(); +/// let f3 = std::str::from_utf8(f3).unwrap(); +/// fields.push((f1, f2.parse()?, f3.parse()?)); +/// } +/// assert_eq!(fields, vec![ +/// (&b"rabbit"[..], 54, true), +/// (&b"groundhog"[..], 2, true), +/// (&b"fox"[..], 109, false), +/// ]); +/// +/// # Ok::<(), Box<dyn std::error::Error>>(()) +/// ``` +/// +/// # Example: matching invalid UTF-8 +/// +/// One of the reasons for searching `&[u8]` haystacks is that the `&[u8]` +/// might not be valid UTF-8. Indeed, with a `bytes::Regex`, patterns that +/// match invalid UTF-8 are explicitly allowed. Here's one example that looks +/// for valid UTF-8 fields that might be separated by invalid UTF-8. In this +/// case, we use `(?s-u:.)`, which matches any byte. Attempting to use it in a +/// top-level `Regex` will result in the regex failing to compile. Notice also +/// that we use `.` with Unicode mode enabled, in which case, only valid UTF-8 +/// is matched. In this way, we can build one pattern where some parts only +/// match valid UTF-8 while other parts are more permissive. +/// +/// ``` +/// use regex::bytes::Regex; +/// +/// // F0 9F 92 A9 is the UTF-8 encoding for a Pile of Poo. +/// let hay = b"\xFF\xFFfoo\xFF\xFF\xFF\xF0\x9F\x92\xA9\xFF"; +/// // An equivalent to '(?s-u:.)' is '(?-u:[\x00-\xFF])'. +/// let re = Regex::new(r"(?s)(?-u:.)*?(?<f1>.+)(?-u:.)*?(?<f2>.+)").unwrap(); +/// let caps = re.captures(hay).unwrap(); +/// assert_eq!(&caps["f1"], &b"foo"[..]); +/// assert_eq!(&caps["f2"], "💩".as_bytes()); +/// ``` +#[derive(Clone)] +pub struct Regex { + pub(crate) meta: meta::Regex, + pub(crate) pattern: Arc<str>, +} + +impl core::fmt::Display for Regex { + /// Shows the original regular expression. + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "{}", self.as_str()) + } +} + +impl core::fmt::Debug for Regex { + /// Shows the original regular expression. + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_tuple("Regex").field(&self.as_str()).finish() + } +} + +impl core::str::FromStr for Regex { + type Err = Error; + + /// Attempts to parse a string into a regular expression + fn from_str(s: &str) -> Result<Regex, Error> { + Regex::new(s) + } +} + +impl TryFrom<&str> for Regex { + type Error = Error; + + /// Attempts to parse a string into a regular expression + fn try_from(s: &str) -> Result<Regex, Error> { + Regex::new(s) + } +} + +impl TryFrom<String> for Regex { + type Error = Error; + + /// Attempts to parse a string into a regular expression + fn try_from(s: String) -> Result<Regex, Error> { + Regex::new(&s) + } +} + +/// Core regular expression methods. +impl Regex { + /// Compiles a regular expression. Once compiled, it can be used repeatedly + /// to search, split or replace substrings in a haystack. + /// + /// Note that regex compilation tends to be a somewhat expensive process, + /// and unlike higher level environments, compilation is not automatically + /// cached for you. One should endeavor to compile a regex once and then + /// reuse it. For example, it's a bad idea to compile the same regex + /// repeatedly in a loop. + /// + /// # Errors + /// + /// If an invalid pattern is given, then an error is returned. + /// An error is also returned if the pattern is valid, but would + /// produce a regex that is bigger than the configured size limit via + /// [`RegexBuilder::size_limit`]. (A reasonable size limit is enabled by + /// default.) + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// // An Invalid pattern because of an unclosed parenthesis + /// assert!(Regex::new(r"foo(bar").is_err()); + /// // An invalid pattern because the regex would be too big + /// // because Unicode tends to inflate things. + /// assert!(Regex::new(r"\w{1000}").is_err()); + /// // Disabling Unicode can make the regex much smaller, + /// // potentially by up to or more than an order of magnitude. + /// assert!(Regex::new(r"(?-u:\w){1000}").is_ok()); + /// ``` + pub fn new(re: &str) -> Result<Regex, Error> { + RegexBuilder::new(re).build() + } + + /// Returns true if and only if there is a match for the regex anywhere + /// in the haystack given. + /// + /// It is recommended to use this method if all you need to do is test + /// whether a match exists, since the underlying matching engine may be + /// able to do less work. + /// + /// # Example + /// + /// Test if some haystack contains at least one word with exactly 13 + /// Unicode word characters: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\b\w{13}\b").unwrap(); + /// let hay = b"I categorically deny having triskaidekaphobia."; + /// assert!(re.is_match(hay)); + /// ``` + #[inline] + pub fn is_match(&self, haystack: &[u8]) -> bool { + self.is_match_at(haystack, 0) + } + + /// This routine searches for the first match of this regex in the + /// haystack given, and if found, returns a [`Match`]. The `Match` + /// provides access to both the byte offsets of the match and the actual + /// substring that matched. + /// + /// Note that this should only be used if you want to find the entire + /// match. If instead you just want to test the existence of a match, + /// it's potentially faster to use `Regex::is_match(hay)` instead of + /// `Regex::find(hay).is_some()`. + /// + /// # Example + /// + /// Find the first word with exactly 13 Unicode word characters: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\b\w{13}\b").unwrap(); + /// let hay = b"I categorically deny having triskaidekaphobia."; + /// let mat = re.find(hay).unwrap(); + /// assert_eq!(2..15, mat.range()); + /// assert_eq!(b"categorically", mat.as_bytes()); + /// ``` + #[inline] + pub fn find<'h>(&self, haystack: &'h [u8]) -> Option<Match<'h>> { + self.find_at(haystack, 0) + } + + /// Returns an iterator that yields successive non-overlapping matches in + /// the given haystack. The iterator yields values of type [`Match`]. + /// + /// # Time complexity + /// + /// Note that since `find_iter` runs potentially many searches on the + /// haystack and since each search has worst case `O(m * n)` time + /// complexity, the overall worst case time complexity for iteration is + /// `O(m * n^2)`. + /// + /// # Example + /// + /// Find every word with exactly 13 Unicode word characters: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\b\w{13}\b").unwrap(); + /// let hay = b"Retroactively relinquishing remunerations is reprehensible."; + /// let matches: Vec<_> = re.find_iter(hay).map(|m| m.as_bytes()).collect(); + /// assert_eq!(matches, vec![ + /// &b"Retroactively"[..], + /// &b"relinquishing"[..], + /// &b"remunerations"[..], + /// &b"reprehensible"[..], + /// ]); + /// ``` + #[inline] + pub fn find_iter<'r, 'h>(&'r self, haystack: &'h [u8]) -> Matches<'r, 'h> { + Matches { haystack, it: self.meta.find_iter(haystack) } + } + + /// This routine searches for the first match of this regex in the haystack + /// given, and if found, returns not only the overall match but also the + /// matches of each capture group in the regex. If no match is found, then + /// `None` is returned. + /// + /// Capture group `0` always corresponds to an implicit unnamed group that + /// includes the entire match. If a match is found, this group is always + /// present. Subsequent groups may be named and are numbered, starting + /// at 1, by the order in which the opening parenthesis appears in the + /// pattern. For example, in the pattern `(?<a>.(?<b>.))(?<c>.)`, `a`, + /// `b` and `c` correspond to capture group indices `1`, `2` and `3`, + /// respectively. + /// + /// You should only use `captures` if you need access to the capture group + /// matches. Otherwise, [`Regex::find`] is generally faster for discovering + /// just the overall match. + /// + /// # Example + /// + /// Say you have some haystack with movie names and their release years, + /// like "'Citizen Kane' (1941)". It'd be nice if we could search for + /// strings looking like that, while also extracting the movie name and its + /// release year separately. The example below shows how to do that. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap(); + /// let hay = b"Not my favorite movie: 'Citizen Kane' (1941)."; + /// let caps = re.captures(hay).unwrap(); + /// assert_eq!(caps.get(0).unwrap().as_bytes(), b"'Citizen Kane' (1941)"); + /// assert_eq!(caps.get(1).unwrap().as_bytes(), b"Citizen Kane"); + /// assert_eq!(caps.get(2).unwrap().as_bytes(), b"1941"); + /// // You can also access the groups by index using the Index notation. + /// // Note that this will panic on an invalid index. In this case, these + /// // accesses are always correct because the overall regex will only + /// // match when these capture groups match. + /// assert_eq!(&caps[0], b"'Citizen Kane' (1941)"); + /// assert_eq!(&caps[1], b"Citizen Kane"); + /// assert_eq!(&caps[2], b"1941"); + /// ``` + /// + /// Note that the full match is at capture group `0`. Each subsequent + /// capture group is indexed by the order of its opening `(`. + /// + /// We can make this example a bit clearer by using *named* capture groups: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"'(?<title>[^']+)'\s+\((?<year>\d{4})\)").unwrap(); + /// let hay = b"Not my favorite movie: 'Citizen Kane' (1941)."; + /// let caps = re.captures(hay).unwrap(); + /// assert_eq!(caps.get(0).unwrap().as_bytes(), b"'Citizen Kane' (1941)"); + /// assert_eq!(caps.name("title").unwrap().as_bytes(), b"Citizen Kane"); + /// assert_eq!(caps.name("year").unwrap().as_bytes(), b"1941"); + /// // You can also access the groups by name using the Index notation. + /// // Note that this will panic on an invalid group name. In this case, + /// // these accesses are always correct because the overall regex will + /// // only match when these capture groups match. + /// assert_eq!(&caps[0], b"'Citizen Kane' (1941)"); + /// assert_eq!(&caps["title"], b"Citizen Kane"); + /// assert_eq!(&caps["year"], b"1941"); + /// ``` + /// + /// Here we name the capture groups, which we can access with the `name` + /// method or the `Index` notation with a `&str`. Note that the named + /// capture groups are still accessible with `get` or the `Index` notation + /// with a `usize`. + /// + /// The `0`th capture group is always unnamed, so it must always be + /// accessed with `get(0)` or `[0]`. + /// + /// Finally, one other way to to get the matched substrings is with the + /// [`Captures::extract`] API: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap(); + /// let hay = b"Not my favorite movie: 'Citizen Kane' (1941)."; + /// let (full, [title, year]) = re.captures(hay).unwrap().extract(); + /// assert_eq!(full, b"'Citizen Kane' (1941)"); + /// assert_eq!(title, b"Citizen Kane"); + /// assert_eq!(year, b"1941"); + /// ``` + #[inline] + pub fn captures<'h>(&self, haystack: &'h [u8]) -> Option<Captures<'h>> { + self.captures_at(haystack, 0) + } + + /// Returns an iterator that yields successive non-overlapping matches in + /// the given haystack. The iterator yields values of type [`Captures`]. + /// + /// This is the same as [`Regex::find_iter`], but instead of only providing + /// access to the overall match, each value yield includes access to the + /// matches of all capture groups in the regex. Reporting this extra match + /// data is potentially costly, so callers should only use `captures_iter` + /// over `find_iter` when they actually need access to the capture group + /// matches. + /// + /// # Time complexity + /// + /// Note that since `captures_iter` runs potentially many searches on the + /// haystack and since each search has worst case `O(m * n)` time + /// complexity, the overall worst case time complexity for iteration is + /// `O(m * n^2)`. + /// + /// # Example + /// + /// We can use this to find all movie titles and their release years in + /// some haystack, where the movie is formatted like "'Title' (xxxx)": + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"'([^']+)'\s+\(([0-9]{4})\)").unwrap(); + /// let hay = b"'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)."; + /// let mut movies = vec![]; + /// for (_, [title, year]) in re.captures_iter(hay).map(|c| c.extract()) { + /// // OK because [0-9]{4} can only match valid UTF-8. + /// let year = std::str::from_utf8(year).unwrap(); + /// movies.push((title, year.parse::<i64>()?)); + /// } + /// assert_eq!(movies, vec![ + /// (&b"Citizen Kane"[..], 1941), + /// (&b"The Wizard of Oz"[..], 1939), + /// (&b"M"[..], 1931), + /// ]); + /// # Ok::<(), Box<dyn std::error::Error>>(()) + /// ``` + /// + /// Or with named groups: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"'(?<title>[^']+)'\s+\((?<year>[0-9]{4})\)").unwrap(); + /// let hay = b"'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)."; + /// let mut it = re.captures_iter(hay); + /// + /// let caps = it.next().unwrap(); + /// assert_eq!(&caps["title"], b"Citizen Kane"); + /// assert_eq!(&caps["year"], b"1941"); + /// + /// let caps = it.next().unwrap(); + /// assert_eq!(&caps["title"], b"The Wizard of Oz"); + /// assert_eq!(&caps["year"], b"1939"); + /// + /// let caps = it.next().unwrap(); + /// assert_eq!(&caps["title"], b"M"); + /// assert_eq!(&caps["year"], b"1931"); + /// ``` + #[inline] + pub fn captures_iter<'r, 'h>( + &'r self, + haystack: &'h [u8], + ) -> CaptureMatches<'r, 'h> { + CaptureMatches { haystack, it: self.meta.captures_iter(haystack) } + } + + /// Returns an iterator of substrings of the haystack given, delimited by a + /// match of the regex. Namely, each element of the iterator corresponds to + /// a part of the haystack that *isn't* matched by the regular expression. + /// + /// # Time complexity + /// + /// Since iterators over all matches requires running potentially many + /// searches on the haystack, and since each search has worst case + /// `O(m * n)` time complexity, the overall worst case time complexity for + /// this routine is `O(m * n^2)`. + /// + /// # Example + /// + /// To split a string delimited by arbitrary amounts of spaces or tabs: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"[ \t]+").unwrap(); + /// let hay = b"a b \t c\td e"; + /// let fields: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(fields, vec![ + /// &b"a"[..], &b"b"[..], &b"c"[..], &b"d"[..], &b"e"[..], + /// ]); + /// ``` + /// + /// # Example: more cases + /// + /// Basic usage: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r" ").unwrap(); + /// let hay = b"Mary had a little lamb"; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![ + /// &b"Mary"[..], &b"had"[..], &b"a"[..], &b"little"[..], &b"lamb"[..], + /// ]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = b""; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![&b""[..]]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = b"lionXXtigerXleopard"; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![ + /// &b"lion"[..], &b""[..], &b"tiger"[..], &b"leopard"[..], + /// ]); + /// + /// let re = Regex::new(r"::").unwrap(); + /// let hay = b"lion::tiger::leopard"; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![&b"lion"[..], &b"tiger"[..], &b"leopard"[..]]); + /// ``` + /// + /// If a haystack contains multiple contiguous matches, you will end up + /// with empty spans yielded by the iterator: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = b"XXXXaXXbXc"; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![ + /// &b""[..], &b""[..], &b""[..], &b""[..], + /// &b"a"[..], &b""[..], &b"b"[..], &b"c"[..], + /// ]); + /// + /// let re = Regex::new(r"/").unwrap(); + /// let hay = b"(///)"; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![&b"("[..], &b""[..], &b""[..], &b")"[..]]); + /// ``` + /// + /// Separators at the start or end of a haystack are neighbored by empty + /// substring. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"0").unwrap(); + /// let hay = b"010"; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![&b""[..], &b"1"[..], &b""[..]]); + /// ``` + /// + /// When the regex can match the empty string, it splits at every byte + /// position in the haystack. This includes between all UTF-8 code units. + /// (The top-level [`Regex::split`](crate::Regex::split) will only split + /// at valid UTF-8 boundaries.) + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"").unwrap(); + /// let hay = "☃".as_bytes(); + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![ + /// &[][..], &[b'\xE2'][..], &[b'\x98'][..], &[b'\x83'][..], &[][..], + /// ]); + /// ``` + /// + /// Contiguous separators (commonly shows up with whitespace), can lead to + /// possibly surprising behavior. For example, this code is correct: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r" ").unwrap(); + /// let hay = b" a b c"; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![ + /// &b""[..], &b""[..], &b""[..], &b""[..], + /// &b"a"[..], &b""[..], &b"b"[..], &b"c"[..], + /// ]); + /// ``` + /// + /// It does *not* give you `["a", "b", "c"]`. For that behavior, you'd want + /// to match contiguous space characters: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r" +").unwrap(); + /// let hay = b" a b c"; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// // N.B. This does still include a leading empty span because ' +' + /// // matches at the beginning of the haystack. + /// assert_eq!(got, vec![&b""[..], &b"a"[..], &b"b"[..], &b"c"[..]]); + /// ``` + #[inline] + pub fn split<'r, 'h>(&'r self, haystack: &'h [u8]) -> Split<'r, 'h> { + Split { haystack, it: self.meta.split(haystack) } + } + + /// Returns an iterator of at most `limit` substrings of the haystack + /// given, delimited by a match of the regex. (A `limit` of `0` will return + /// no substrings.) Namely, each element of the iterator corresponds to a + /// part of the haystack that *isn't* matched by the regular expression. + /// The remainder of the haystack that is not split will be the last + /// element in the iterator. + /// + /// # Time complexity + /// + /// Since iterators over all matches requires running potentially many + /// searches on the haystack, and since each search has worst case + /// `O(m * n)` time complexity, the overall worst case time complexity for + /// this routine is `O(m * n^2)`. + /// + /// Although note that the worst case time here has an upper bound given + /// by the `limit` parameter. + /// + /// # Example + /// + /// Get the first two words in some haystack: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\W+").unwrap(); + /// let hay = b"Hey! How are you?"; + /// let fields: Vec<&[u8]> = re.splitn(hay, 3).collect(); + /// assert_eq!(fields, vec![&b"Hey"[..], &b"How"[..], &b"are you?"[..]]); + /// ``` + /// + /// # Examples: more cases + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r" ").unwrap(); + /// let hay = b"Mary had a little lamb"; + /// let got: Vec<&[u8]> = re.splitn(hay, 3).collect(); + /// assert_eq!(got, vec![&b"Mary"[..], &b"had"[..], &b"a little lamb"[..]]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = b""; + /// let got: Vec<&[u8]> = re.splitn(hay, 3).collect(); + /// assert_eq!(got, vec![&b""[..]]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = b"lionXXtigerXleopard"; + /// let got: Vec<&[u8]> = re.splitn(hay, 3).collect(); + /// assert_eq!(got, vec![&b"lion"[..], &b""[..], &b"tigerXleopard"[..]]); + /// + /// let re = Regex::new(r"::").unwrap(); + /// let hay = b"lion::tiger::leopard"; + /// let got: Vec<&[u8]> = re.splitn(hay, 2).collect(); + /// assert_eq!(got, vec![&b"lion"[..], &b"tiger::leopard"[..]]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = b"abcXdef"; + /// let got: Vec<&[u8]> = re.splitn(hay, 1).collect(); + /// assert_eq!(got, vec![&b"abcXdef"[..]]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = b"abcdef"; + /// let got: Vec<&[u8]> = re.splitn(hay, 2).collect(); + /// assert_eq!(got, vec![&b"abcdef"[..]]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = b"abcXdef"; + /// let got: Vec<&[u8]> = re.splitn(hay, 0).collect(); + /// assert!(got.is_empty()); + /// ``` + #[inline] + pub fn splitn<'r, 'h>( + &'r self, + haystack: &'h [u8], + limit: usize, + ) -> SplitN<'r, 'h> { + SplitN { haystack, it: self.meta.splitn(haystack, limit) } + } + + /// Replaces the leftmost-first match in the given haystack with the + /// replacement provided. The replacement can be a regular string (where + /// `$N` and `$name` are expanded to match capture groups) or a function + /// that takes a [`Captures`] and returns the replaced string. + /// + /// If no match is found, then the haystack is returned unchanged. In that + /// case, this implementation will likely return a `Cow::Borrowed` value + /// such that no allocation is performed. + /// + /// # Replacement string syntax + /// + /// All instances of `$ref` in the replacement string are replaced with + /// the substring corresponding to the capture group identified by `ref`. + /// + /// `ref` may be an integer corresponding to the index of the capture group + /// (counted by order of opening parenthesis where `0` is the entire match) + /// or it can be a name (consisting of letters, digits or underscores) + /// corresponding to a named capture group. + /// + /// If `ref` isn't a valid capture group (whether the name doesn't exist or + /// isn't a valid index), then it is replaced with the empty string. + /// + /// The longest possible name is used. For example, `$1a` looks up the + /// capture group named `1a` and not the capture group at index `1`. To + /// exert more precise control over the name, use braces, e.g., `${1}a`. + /// + /// To write a literal `$` use `$$`. + /// + /// # Example + /// + /// Note that this function is polymorphic with respect to the replacement. + /// In typical usage, this can just be a normal string: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"[^01]+").unwrap(); + /// assert_eq!(re.replace(b"1078910", b""), &b"1010"[..]); + /// ``` + /// + /// But anything satisfying the [`Replacer`] trait will work. For example, + /// a closure of type `|&Captures| -> String` provides direct access to the + /// captures corresponding to a match. This allows one to access capturing + /// group matches easily: + /// + /// ``` + /// use regex::bytes::{Captures, Regex}; + /// + /// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap(); + /// let result = re.replace(b"Springsteen, Bruce", |caps: &Captures| { + /// let mut buf = vec![]; + /// buf.extend_from_slice(&caps[2]); + /// buf.push(b' '); + /// buf.extend_from_slice(&caps[1]); + /// buf + /// }); + /// assert_eq!(result, &b"Bruce Springsteen"[..]); + /// ``` + /// + /// But this is a bit cumbersome to use all the time. Instead, a simple + /// syntax is supported (as described above) that expands `$name` into the + /// corresponding capture group. Here's the last example, but using this + /// expansion technique with named capture groups: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(?<last>[^,\s]+),\s+(?<first>\S+)").unwrap(); + /// let result = re.replace(b"Springsteen, Bruce", b"$first $last"); + /// assert_eq!(result, &b"Bruce Springsteen"[..]); + /// ``` + /// + /// Note that using `$2` instead of `$first` or `$1` instead of `$last` + /// would produce the same result. To write a literal `$` use `$$`. + /// + /// Sometimes the replacement string requires use of curly braces to + /// delineate a capture group replacement when it is adjacent to some other + /// literal text. For example, if we wanted to join two words together with + /// an underscore: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(?<first>\w+)\s+(?<second>\w+)").unwrap(); + /// let result = re.replace(b"deep fried", b"${first}_$second"); + /// assert_eq!(result, &b"deep_fried"[..]); + /// ``` + /// + /// Without the curly braces, the capture group name `first_` would be + /// used, and since it doesn't exist, it would be replaced with the empty + /// string. + /// + /// Finally, sometimes you just want to replace a literal string with no + /// regard for capturing group expansion. This can be done by wrapping a + /// string with [`NoExpand`]: + /// + /// ``` + /// use regex::bytes::{NoExpand, Regex}; + /// + /// let re = Regex::new(r"(?<last>[^,\s]+),\s+(\S+)").unwrap(); + /// let result = re.replace(b"Springsteen, Bruce", NoExpand(b"$2 $last")); + /// assert_eq!(result, &b"$2 $last"[..]); + /// ``` + /// + /// Using `NoExpand` may also be faster, since the replacement string won't + /// need to be parsed for the `$` syntax. + #[inline] + pub fn replace<'h, R: Replacer>( + &self, + haystack: &'h [u8], + rep: R, + ) -> Cow<'h, [u8]> { + self.replacen(haystack, 1, rep) + } + + /// Replaces all non-overlapping matches in the haystack with the + /// replacement provided. This is the same as calling `replacen` with + /// `limit` set to `0`. + /// + /// The documentation for [`Regex::replace`] goes into more detail about + /// what kinds of replacement strings are supported. + /// + /// # Time complexity + /// + /// Since iterators over all matches requires running potentially many + /// searches on the haystack, and since each search has worst case + /// `O(m * n)` time complexity, the overall worst case time complexity for + /// this routine is `O(m * n^2)`. + /// + /// # Fallibility + /// + /// If you need to write a replacement routine where any individual + /// replacement might "fail," doing so with this API isn't really feasible + /// because there's no way to stop the search process if a replacement + /// fails. Instead, if you need this functionality, you should consider + /// implementing your own replacement routine: + /// + /// ``` + /// use regex::bytes::{Captures, Regex}; + /// + /// fn replace_all<E>( + /// re: &Regex, + /// haystack: &[u8], + /// replacement: impl Fn(&Captures) -> Result<Vec<u8>, E>, + /// ) -> Result<Vec<u8>, E> { + /// let mut new = Vec::with_capacity(haystack.len()); + /// let mut last_match = 0; + /// for caps in re.captures_iter(haystack) { + /// let m = caps.get(0).unwrap(); + /// new.extend_from_slice(&haystack[last_match..m.start()]); + /// new.extend_from_slice(&replacement(&caps)?); + /// last_match = m.end(); + /// } + /// new.extend_from_slice(&haystack[last_match..]); + /// Ok(new) + /// } + /// + /// // Let's replace each word with the number of bytes in that word. + /// // But if we see a word that is "too long," we'll give up. + /// let re = Regex::new(r"\w+").unwrap(); + /// let replacement = |caps: &Captures| -> Result<Vec<u8>, &'static str> { + /// if caps[0].len() >= 5 { + /// return Err("word too long"); + /// } + /// Ok(caps[0].len().to_string().into_bytes()) + /// }; + /// assert_eq!( + /// Ok(b"2 3 3 3?".to_vec()), + /// replace_all(&re, b"hi how are you?", &replacement), + /// ); + /// assert!(replace_all(&re, b"hi there", &replacement).is_err()); + /// ``` + /// + /// # Example + /// + /// This example shows how to flip the order of whitespace (excluding line + /// terminators) delimited fields, and normalizes the whitespace that + /// delimits the fields: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(?m)^(\S+)[\s--\r\n]+(\S+)$").unwrap(); + /// let hay = b" + /// Greetings 1973 + /// Wild\t1973 + /// BornToRun\t\t\t\t1975 + /// Darkness 1978 + /// TheRiver 1980 + /// "; + /// let new = re.replace_all(hay, b"$2 $1"); + /// assert_eq!(new, &b" + /// 1973 Greetings + /// 1973 Wild + /// 1975 BornToRun + /// 1978 Darkness + /// 1980 TheRiver + /// "[..]); + /// ``` + #[inline] + pub fn replace_all<'h, R: Replacer>( + &self, + haystack: &'h [u8], + rep: R, + ) -> Cow<'h, [u8]> { + self.replacen(haystack, 0, rep) + } + + /// Replaces at most `limit` non-overlapping matches in the haystack with + /// the replacement provided. If `limit` is `0`, then all non-overlapping + /// matches are replaced. That is, `Regex::replace_all(hay, rep)` is + /// equivalent to `Regex::replacen(hay, 0, rep)`. + /// + /// The documentation for [`Regex::replace`] goes into more detail about + /// what kinds of replacement strings are supported. + /// + /// # Time complexity + /// + /// Since iterators over all matches requires running potentially many + /// searches on the haystack, and since each search has worst case + /// `O(m * n)` time complexity, the overall worst case time complexity for + /// this routine is `O(m * n^2)`. + /// + /// Although note that the worst case time here has an upper bound given + /// by the `limit` parameter. + /// + /// # Fallibility + /// + /// See the corresponding section in the docs for [`Regex::replace_all`] + /// for tips on how to deal with a replacement routine that can fail. + /// + /// # Example + /// + /// This example shows how to flip the order of whitespace (excluding line + /// terminators) delimited fields, and normalizes the whitespace that + /// delimits the fields. But we only do it for the first two matches. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(?m)^(\S+)[\s--\r\n]+(\S+)$").unwrap(); + /// let hay = b" + /// Greetings 1973 + /// Wild\t1973 + /// BornToRun\t\t\t\t1975 + /// Darkness 1978 + /// TheRiver 1980 + /// "; + /// let new = re.replacen(hay, 2, b"$2 $1"); + /// assert_eq!(new, &b" + /// 1973 Greetings + /// 1973 Wild + /// BornToRun\t\t\t\t1975 + /// Darkness 1978 + /// TheRiver 1980 + /// "[..]); + /// ``` + #[inline] + pub fn replacen<'h, R: Replacer>( + &self, + haystack: &'h [u8], + limit: usize, + mut rep: R, + ) -> Cow<'h, [u8]> { + // If we know that the replacement doesn't have any capture expansions, + // then we can use the fast path. The fast path can make a tremendous + // difference: + // + // 1) We use `find_iter` instead of `captures_iter`. Not asking for + // captures generally makes the regex engines faster. + // 2) We don't need to look up all of the capture groups and do + // replacements inside the replacement string. We just push it + // at each match and be done with it. + if let Some(rep) = rep.no_expansion() { + let mut it = self.find_iter(haystack).enumerate().peekable(); + if it.peek().is_none() { + return Cow::Borrowed(haystack); + } + let mut new = Vec::with_capacity(haystack.len()); + let mut last_match = 0; + for (i, m) in it { + new.extend_from_slice(&haystack[last_match..m.start()]); + new.extend_from_slice(&rep); + last_match = m.end(); + if limit > 0 && i >= limit - 1 { + break; + } + } + new.extend_from_slice(&haystack[last_match..]); + return Cow::Owned(new); + } + + // The slower path, which we use if the replacement needs access to + // capture groups. + let mut it = self.captures_iter(haystack).enumerate().peekable(); + if it.peek().is_none() { + return Cow::Borrowed(haystack); + } + let mut new = Vec::with_capacity(haystack.len()); + let mut last_match = 0; + for (i, cap) in it { + // unwrap on 0 is OK because captures only reports matches + let m = cap.get(0).unwrap(); + new.extend_from_slice(&haystack[last_match..m.start()]); + rep.replace_append(&cap, &mut new); + last_match = m.end(); + if limit > 0 && i >= limit - 1 { + break; + } + } + new.extend_from_slice(&haystack[last_match..]); + Cow::Owned(new) + } +} + +/// A group of advanced or "lower level" search methods. Some methods permit +/// starting the search at a position greater than `0` in the haystack. Other +/// methods permit reusing allocations, for example, when extracting the +/// matches for capture groups. +impl Regex { + /// Returns the end byte offset of the first match in the haystack given. + /// + /// This method may have the same performance characteristics as + /// `is_match`. Behaviorlly, it doesn't just report whether it match + /// occurs, but also the end offset for a match. In particular, the offset + /// returned *may be shorter* than the proper end of the leftmost-first + /// match that you would find via [`Regex::find`]. + /// + /// Note that it is not guaranteed that this routine finds the shortest or + /// "earliest" possible match. Instead, the main idea of this API is that + /// it returns the offset at the point at which the internal regex engine + /// has determined that a match has occurred. This may vary depending on + /// which internal regex engine is used, and thus, the offset itself may + /// change based on internal heuristics. + /// + /// # Example + /// + /// Typically, `a+` would match the entire first sequence of `a` in some + /// haystack, but `shortest_match` *may* give up as soon as it sees the + /// first `a`. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"a+").unwrap(); + /// let offset = re.shortest_match(b"aaaaa").unwrap(); + /// assert_eq!(offset, 1); + /// ``` + #[inline] + pub fn shortest_match(&self, haystack: &[u8]) -> Option<usize> { + self.shortest_match_at(haystack, 0) + } + + /// Returns the same as `shortest_match`, but starts the search at the + /// given offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only match + /// when `start == 0`. + /// + /// If a match is found, the offset returned is relative to the beginning + /// of the haystack, not the beginning of the search. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = b"eschew"; + /// // We get a match here, but it's probably not intended. + /// assert_eq!(re.shortest_match(&hay[2..]), Some(4)); + /// // No match because the assertions take the context into account. + /// assert_eq!(re.shortest_match_at(hay, 2), None); + /// ``` + #[inline] + pub fn shortest_match_at( + &self, + haystack: &[u8], + start: usize, + ) -> Option<usize> { + let input = + Input::new(haystack).earliest(true).span(start..haystack.len()); + self.meta.search_half(&input).map(|hm| hm.offset()) + } + + /// Returns the same as [`Regex::is_match`], but starts the search at the + /// given offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = b"eschew"; + /// // We get a match here, but it's probably not intended. + /// assert!(re.is_match(&hay[2..])); + /// // No match because the assertions take the context into account. + /// assert!(!re.is_match_at(hay, 2)); + /// ``` + #[inline] + pub fn is_match_at(&self, haystack: &[u8], start: usize) -> bool { + self.meta.is_match(Input::new(haystack).span(start..haystack.len())) + } + + /// Returns the same as [`Regex::find`], but starts the search at the given + /// offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = b"eschew"; + /// // We get a match here, but it's probably not intended. + /// assert_eq!(re.find(&hay[2..]).map(|m| m.range()), Some(0..4)); + /// // No match because the assertions take the context into account. + /// assert_eq!(re.find_at(hay, 2), None); + /// ``` + #[inline] + pub fn find_at<'h>( + &self, + haystack: &'h [u8], + start: usize, + ) -> Option<Match<'h>> { + let input = Input::new(haystack).span(start..haystack.len()); + self.meta.find(input).map(|m| Match::new(haystack, m.start(), m.end())) + } + + /// Returns the same as [`Regex::captures`], but starts the search at the + /// given offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = b"eschew"; + /// // We get a match here, but it's probably not intended. + /// assert_eq!(&re.captures(&hay[2..]).unwrap()[0], b"chew"); + /// // No match because the assertions take the context into account. + /// assert!(re.captures_at(hay, 2).is_none()); + /// ``` + #[inline] + pub fn captures_at<'h>( + &self, + haystack: &'h [u8], + start: usize, + ) -> Option<Captures<'h>> { + let input = Input::new(haystack).span(start..haystack.len()); + let mut caps = self.meta.create_captures(); + self.meta.captures(input, &mut caps); + if caps.is_match() { + let static_captures_len = self.static_captures_len(); + Some(Captures { haystack, caps, static_captures_len }) + } else { + None + } + } + + /// This is like [`Regex::captures`], but writes the byte offsets of each + /// capture group match into the locations given. + /// + /// A [`CaptureLocations`] stores the same byte offsets as a [`Captures`], + /// but does *not* store a reference to the haystack. This makes its API + /// a bit lower level and less convenient. But in exchange, callers + /// may allocate their own `CaptureLocations` and reuse it for multiple + /// searches. This may be helpful if allocating a `Captures` shows up in a + /// profile as too costly. + /// + /// To create a `CaptureLocations` value, use the + /// [`Regex::capture_locations`] method. + /// + /// This also returns the overall match if one was found. When a match is + /// found, its offsets are also always stored in `locs` at index `0`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"^([a-z]+)=(\S*)$").unwrap(); + /// let mut locs = re.capture_locations(); + /// assert!(re.captures_read(&mut locs, b"id=foo123").is_some()); + /// assert_eq!(Some((0, 9)), locs.get(0)); + /// assert_eq!(Some((0, 2)), locs.get(1)); + /// assert_eq!(Some((3, 9)), locs.get(2)); + /// ``` + #[inline] + pub fn captures_read<'h>( + &self, + locs: &mut CaptureLocations, + haystack: &'h [u8], + ) -> Option<Match<'h>> { + self.captures_read_at(locs, haystack, 0) + } + + /// Returns the same as [`Regex::captures_read`], but starts the search at + /// the given offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = b"eschew"; + /// let mut locs = re.capture_locations(); + /// // We get a match here, but it's probably not intended. + /// assert!(re.captures_read(&mut locs, &hay[2..]).is_some()); + /// // No match because the assertions take the context into account. + /// assert!(re.captures_read_at(&mut locs, hay, 2).is_none()); + /// ``` + #[inline] + pub fn captures_read_at<'h>( + &self, + locs: &mut CaptureLocations, + haystack: &'h [u8], + start: usize, + ) -> Option<Match<'h>> { + let input = Input::new(haystack).span(start..haystack.len()); + self.meta.search_captures(&input, &mut locs.0); + locs.0.get_match().map(|m| Match::new(haystack, m.start(), m.end())) + } + + /// An undocumented alias for `captures_read_at`. + /// + /// The `regex-capi` crate previously used this routine, so to avoid + /// breaking that crate, we continue to provide the name as an undocumented + /// alias. + #[doc(hidden)] + #[inline] + pub fn read_captures_at<'h>( + &self, + locs: &mut CaptureLocations, + haystack: &'h [u8], + start: usize, + ) -> Option<Match<'h>> { + self.captures_read_at(locs, haystack, start) + } +} + +/// Auxiliary methods. +impl Regex { + /// Returns the original string of this regex. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"foo\w+bar").unwrap(); + /// assert_eq!(re.as_str(), r"foo\w+bar"); + /// ``` + #[inline] + pub fn as_str(&self) -> &str { + &self.pattern + } + + /// Returns an iterator over the capture names in this regex. + /// + /// The iterator returned yields elements of type `Option<&str>`. That is, + /// the iterator yields values for all capture groups, even ones that are + /// unnamed. The order of the groups corresponds to the order of the group's + /// corresponding opening parenthesis. + /// + /// The first element of the iterator always yields the group corresponding + /// to the overall match, and this group is always unnamed. Therefore, the + /// iterator always yields at least one group. + /// + /// # Example + /// + /// This shows basic usage with a mix of named and unnamed capture groups: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(?<a>.(?<b>.))(.)(?:.)(?<c>.)").unwrap(); + /// let mut names = re.capture_names(); + /// assert_eq!(names.next(), Some(None)); + /// assert_eq!(names.next(), Some(Some("a"))); + /// assert_eq!(names.next(), Some(Some("b"))); + /// assert_eq!(names.next(), Some(None)); + /// // the '(?:.)' group is non-capturing and so doesn't appear here! + /// assert_eq!(names.next(), Some(Some("c"))); + /// assert_eq!(names.next(), None); + /// ``` + /// + /// The iterator always yields at least one element, even for regexes with + /// no capture groups and even for regexes that can never match: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"").unwrap(); + /// let mut names = re.capture_names(); + /// assert_eq!(names.next(), Some(None)); + /// assert_eq!(names.next(), None); + /// + /// let re = Regex::new(r"[a&&b]").unwrap(); + /// let mut names = re.capture_names(); + /// assert_eq!(names.next(), Some(None)); + /// assert_eq!(names.next(), None); + /// ``` + #[inline] + pub fn capture_names(&self) -> CaptureNames<'_> { + CaptureNames(self.meta.group_info().pattern_names(PatternID::ZERO)) + } + + /// Returns the number of captures groups in this regex. + /// + /// This includes all named and unnamed groups, including the implicit + /// unnamed group that is always present and corresponds to the entire + /// match. + /// + /// Since the implicit unnamed group is always included in this length, the + /// length returned is guaranteed to be greater than zero. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"foo").unwrap(); + /// assert_eq!(1, re.captures_len()); + /// + /// let re = Regex::new(r"(foo)").unwrap(); + /// assert_eq!(2, re.captures_len()); + /// + /// let re = Regex::new(r"(?<a>.(?<b>.))(.)(?:.)(?<c>.)").unwrap(); + /// assert_eq!(5, re.captures_len()); + /// + /// let re = Regex::new(r"[a&&b]").unwrap(); + /// assert_eq!(1, re.captures_len()); + /// ``` + #[inline] + pub fn captures_len(&self) -> usize { + self.meta.group_info().group_len(PatternID::ZERO) + } + + /// Returns the total number of capturing groups that appear in every + /// possible match. + /// + /// If the number of capture groups can vary depending on the match, then + /// this returns `None`. That is, a value is only returned when the number + /// of matching groups is invariant or "static." + /// + /// Note that like [`Regex::captures_len`], this **does** include the + /// implicit capturing group corresponding to the entire match. Therefore, + /// when a non-None value is returned, it is guaranteed to be at least `1`. + /// Stated differently, a return value of `Some(0)` is impossible. + /// + /// # Example + /// + /// This shows a few cases where a static number of capture groups is + /// available and a few cases where it is not. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let len = |pattern| { + /// Regex::new(pattern).map(|re| re.static_captures_len()) + /// }; + /// + /// assert_eq!(Some(1), len("a")?); + /// assert_eq!(Some(2), len("(a)")?); + /// assert_eq!(Some(2), len("(a)|(b)")?); + /// assert_eq!(Some(3), len("(a)(b)|(c)(d)")?); + /// assert_eq!(None, len("(a)|b")?); + /// assert_eq!(None, len("a|(b)")?); + /// assert_eq!(None, len("(b)*")?); + /// assert_eq!(Some(2), len("(b)+")?); + /// + /// # Ok::<(), Box<dyn std::error::Error>>(()) + /// ``` + #[inline] + pub fn static_captures_len(&self) -> Option<usize> { + self.meta.static_captures_len() + } + + /// Returns a fresh allocated set of capture locations that can + /// be reused in multiple calls to [`Regex::captures_read`] or + /// [`Regex::captures_read_at`]. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(.)(.)(\w+)").unwrap(); + /// let mut locs = re.capture_locations(); + /// assert!(re.captures_read(&mut locs, b"Padron").is_some()); + /// assert_eq!(locs.get(0), Some((0, 6))); + /// assert_eq!(locs.get(1), Some((0, 1))); + /// assert_eq!(locs.get(2), Some((1, 2))); + /// assert_eq!(locs.get(3), Some((2, 6))); + /// ``` + #[inline] + pub fn capture_locations(&self) -> CaptureLocations { + CaptureLocations(self.meta.create_captures()) + } + + /// An alias for `capture_locations` to preserve backward compatibility. + /// + /// The `regex-capi` crate uses this method, so to avoid breaking that + /// crate, we continue to export it as an undocumented API. + #[doc(hidden)] + #[inline] + pub fn locations(&self) -> CaptureLocations { + self.capture_locations() + } +} + +/// Represents a single match of a regex in a haystack. +/// +/// A `Match` contains both the start and end byte offsets of the match and the +/// actual substring corresponding to the range of those byte offsets. It is +/// guaranteed that `start <= end`. When `start == end`, the match is empty. +/// +/// Unlike the top-level `Match` type, this `Match` type is produced by APIs +/// that search `&[u8]` haystacks. This means that the offsets in a `Match` can +/// point to anywhere in the haystack, including in a place that splits the +/// UTF-8 encoding of a Unicode scalar value. +/// +/// The lifetime parameter `'h` refers to the lifetime of the matched of the +/// haystack that this match was produced from. +/// +/// # Numbering +/// +/// The byte offsets in a `Match` form a half-open interval. That is, the +/// start of the range is inclusive and the end of the range is exclusive. +/// For example, given a haystack `abcFOOxyz` and a match of `FOO`, its byte +/// offset range starts at `3` and ends at `6`. `3` corresponds to `F` and +/// `6` corresponds to `x`, which is one past the end of the match. This +/// corresponds to the same kind of slicing that Rust uses. +/// +/// For more on why this was chosen over other schemes (aside from being +/// consistent with how Rust the language works), see [this discussion] and +/// [Dijkstra's note on a related topic][note]. +/// +/// [this discussion]: https://github.com/rust-lang/regex/discussions/866 +/// [note]: https://www.cs.utexas.edu/users/EWD/transcriptions/EWD08xx/EWD831.html +/// +/// # Example +/// +/// This example shows the value of each of the methods on `Match` for a +/// particular search. +/// +/// ``` +/// use regex::bytes::Regex; +/// +/// let re = Regex::new(r"\p{Greek}+").unwrap(); +/// let hay = "Greek: αβγδ".as_bytes(); +/// let m = re.find(hay).unwrap(); +/// assert_eq!(7, m.start()); +/// assert_eq!(15, m.end()); +/// assert!(!m.is_empty()); +/// assert_eq!(8, m.len()); +/// assert_eq!(7..15, m.range()); +/// assert_eq!("αβγδ".as_bytes(), m.as_bytes()); +/// ``` +#[derive(Copy, Clone, Eq, PartialEq)] +pub struct Match<'h> { + haystack: &'h [u8], + start: usize, + end: usize, +} + +impl<'h> Match<'h> { + /// Returns the byte offset of the start of the match in the haystack. The + /// start of the match corresponds to the position where the match begins + /// and includes the first byte in the match. + /// + /// It is guaranteed that `Match::start() <= Match::end()`. + /// + /// Unlike the top-level `Match` type, the start offset may appear anywhere + /// in the haystack. This includes between the code units of a UTF-8 + /// encoded Unicode scalar value. + #[inline] + pub fn start(&self) -> usize { + self.start + } + + /// Returns the byte offset of the end of the match in the haystack. The + /// end of the match corresponds to the byte immediately following the last + /// byte in the match. This means that `&slice[start..end]` works as one + /// would expect. + /// + /// It is guaranteed that `Match::start() <= Match::end()`. + /// + /// Unlike the top-level `Match` type, the start offset may appear anywhere + /// in the haystack. This includes between the code units of a UTF-8 + /// encoded Unicode scalar value. + #[inline] + pub fn end(&self) -> usize { + self.end + } + + /// Returns true if and only if this match has a length of zero. + /// + /// Note that an empty match can only occur when the regex itself can + /// match the empty string. Here are some examples of regexes that can + /// all match the empty string: `^`, `^$`, `\b`, `a?`, `a*`, `a{0}`, + /// `(foo|\d+|quux)?`. + #[inline] + pub fn is_empty(&self) -> bool { + self.start == self.end + } + + /// Returns the length, in bytes, of this match. + #[inline] + pub fn len(&self) -> usize { + self.end - self.start + } + + /// Returns the range over the starting and ending byte offsets of the + /// match in the haystack. + #[inline] + pub fn range(&self) -> core::ops::Range<usize> { + self.start..self.end + } + + /// Returns the substring of the haystack that matched. + #[inline] + pub fn as_bytes(&self) -> &'h [u8] { + &self.haystack[self.range()] + } + + /// Creates a new match from the given haystack and byte offsets. + #[inline] + fn new(haystack: &'h [u8], start: usize, end: usize) -> Match<'h> { + Match { haystack, start, end } + } +} + +impl<'h> core::fmt::Debug for Match<'h> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let mut fmt = f.debug_struct("Match"); + fmt.field("start", &self.start).field("end", &self.end); + if let Ok(s) = core::str::from_utf8(self.as_bytes()) { + fmt.field("bytes", &s); + } else { + // FIXME: It would be nice if this could be printed as a string + // with invalid UTF-8 replaced with hex escapes. A alloc would + // probably okay if that makes it easier, but regex-automata does + // (at time of writing) have internal routines that do this. So + // maybe we should expose them. + fmt.field("bytes", &self.as_bytes()); + } + fmt.finish() + } +} + +impl<'h> From<Match<'h>> for &'h [u8] { + fn from(m: Match<'h>) -> &'h [u8] { + m.as_bytes() + } +} + +impl<'h> From<Match<'h>> for core::ops::Range<usize> { + fn from(m: Match<'h>) -> core::ops::Range<usize> { + m.range() + } +} + +/// Represents the capture groups for a single match. +/// +/// Capture groups refer to parts of a regex enclosed in parentheses. They can +/// be optionally named. The purpose of capture groups is to be able to +/// reference different parts of a match based on the original pattern. For +/// example, say you want to match the individual letters in a 5-letter word: +/// +/// ```text +/// (?<first>\w)(\w)(?:\w)\w(?<last>\w) +/// ``` +/// +/// This regex has 4 capture groups: +/// +/// * The group at index `0` corresponds to the overall match. It is always +/// present in every match and never has a name. +/// * The group at index `1` with name `first` corresponding to the first +/// letter. +/// * The group at index `2` with no name corresponding to the second letter. +/// * The group at index `3` with name `last` corresponding to the fifth and +/// last letter. +/// +/// Notice that `(?:\w)` was not listed above as a capture group despite it +/// being enclosed in parentheses. That's because `(?:pattern)` is a special +/// syntax that permits grouping but *without* capturing. The reason for not +/// treating it as a capture is that tracking and reporting capture groups +/// requires additional state that may lead to slower searches. So using as few +/// capture groups as possible can help performance. (Although the difference +/// in performance of a couple of capture groups is likely immaterial.) +/// +/// Values with this type are created by [`Regex::captures`] or +/// [`Regex::captures_iter`]. +/// +/// `'h` is the lifetime of the haystack that these captures were matched from. +/// +/// # Example +/// +/// ``` +/// use regex::bytes::Regex; +/// +/// let re = Regex::new(r"(?<first>\w)(\w)(?:\w)\w(?<last>\w)").unwrap(); +/// let caps = re.captures(b"toady").unwrap(); +/// assert_eq!(b"toady", &caps[0]); +/// assert_eq!(b"t", &caps["first"]); +/// assert_eq!(b"o", &caps[2]); +/// assert_eq!(b"y", &caps["last"]); +/// ``` +pub struct Captures<'h> { + haystack: &'h [u8], + caps: captures::Captures, + static_captures_len: Option<usize>, +} + +impl<'h> Captures<'h> { + /// Returns the `Match` associated with the capture group at index `i`. If + /// `i` does not correspond to a capture group, or if the capture group did + /// not participate in the match, then `None` is returned. + /// + /// When `i == 0`, this is guaranteed to return a non-`None` value. + /// + /// # Examples + /// + /// Get the substring that matched with a default of an empty string if the + /// group didn't participate in the match: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))").unwrap(); + /// let caps = re.captures(b"abc123").unwrap(); + /// + /// let substr1 = caps.get(1).map_or(&b""[..], |m| m.as_bytes()); + /// let substr2 = caps.get(2).map_or(&b""[..], |m| m.as_bytes()); + /// assert_eq!(substr1, b"123"); + /// assert_eq!(substr2, b""); + /// ``` + #[inline] + pub fn get(&self, i: usize) -> Option<Match<'h>> { + self.caps + .get_group(i) + .map(|sp| Match::new(self.haystack, sp.start, sp.end)) + } + + /// Returns the `Match` associated with the capture group named `name`. If + /// `name` isn't a valid capture group or it refers to a group that didn't + /// match, then `None` is returned. + /// + /// Note that unlike `caps["name"]`, this returns a `Match` whose lifetime + /// matches the lifetime of the haystack in this `Captures` value. + /// Conversely, the substring returned by `caps["name"]` has a lifetime + /// of the `Captures` value, which is likely shorter than the lifetime of + /// the haystack. In some cases, it may be necessary to use this method to + /// access the matching substring instead of the `caps["name"]` notation. + /// + /// # Examples + /// + /// Get the substring that matched with a default of an empty string if the + /// group didn't participate in the match: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new( + /// r"[a-z]+(?:(?<numbers>[0-9]+)|(?<letters>[A-Z]+))", + /// ).unwrap(); + /// let caps = re.captures(b"abc123").unwrap(); + /// + /// let numbers = caps.name("numbers").map_or(&b""[..], |m| m.as_bytes()); + /// let letters = caps.name("letters").map_or(&b""[..], |m| m.as_bytes()); + /// assert_eq!(numbers, b"123"); + /// assert_eq!(letters, b""); + /// ``` + #[inline] + pub fn name(&self, name: &str) -> Option<Match<'h>> { + self.caps + .get_group_by_name(name) + .map(|sp| Match::new(self.haystack, sp.start, sp.end)) + } + + /// This is a convenience routine for extracting the substrings + /// corresponding to matching capture groups. + /// + /// This returns a tuple where the first element corresponds to the full + /// substring of the haystack that matched the regex. The second element is + /// an array of substrings, with each corresponding to the to the substring + /// that matched for a particular capture group. + /// + /// # Panics + /// + /// This panics if the number of possible matching groups in this + /// `Captures` value is not fixed to `N` in all circumstances. + /// More precisely, this routine only works when `N` is equivalent to + /// [`Regex::static_captures_len`]. + /// + /// Stated more plainly, if the number of matching capture groups in a + /// regex can vary from match to match, then this function always panics. + /// + /// For example, `(a)(b)|(c)` could produce two matching capture groups + /// or one matching capture group for any given match. Therefore, one + /// cannot use `extract` with such a pattern. + /// + /// But a pattern like `(a)(b)|(c)(d)` can be used with `extract` because + /// the number of capture groups in every match is always equivalent, + /// even if the capture _indices_ in each match are not. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})").unwrap(); + /// let hay = b"On 2010-03-14, I became a Tenneessee lamb."; + /// let Some((full, [year, month, day])) = + /// re.captures(hay).map(|caps| caps.extract()) else { return }; + /// assert_eq!(b"2010-03-14", full); + /// assert_eq!(b"2010", year); + /// assert_eq!(b"03", month); + /// assert_eq!(b"14", day); + /// ``` + /// + /// # Example: iteration + /// + /// This example shows how to use this method when iterating over all + /// `Captures` matches in a haystack. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})").unwrap(); + /// let hay = b"1973-01-05, 1975-08-25 and 1980-10-18"; + /// + /// let mut dates: Vec<(&[u8], &[u8], &[u8])> = vec![]; + /// for (_, [y, m, d]) in re.captures_iter(hay).map(|c| c.extract()) { + /// dates.push((y, m, d)); + /// } + /// assert_eq!(dates, vec![ + /// (&b"1973"[..], &b"01"[..], &b"05"[..]), + /// (&b"1975"[..], &b"08"[..], &b"25"[..]), + /// (&b"1980"[..], &b"10"[..], &b"18"[..]), + /// ]); + /// ``` + /// + /// # Example: parsing different formats + /// + /// This API is particularly useful when you need to extract a particular + /// value that might occur in a different format. Consider, for example, + /// an identifier that might be in double quotes or single quotes: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r#"id:(?:"([^"]+)"|'([^']+)')"#).unwrap(); + /// let hay = br#"The first is id:"foo" and the second is id:'bar'."#; + /// let mut ids = vec![]; + /// for (_, [id]) in re.captures_iter(hay).map(|c| c.extract()) { + /// ids.push(id); + /// } + /// assert_eq!(ids, vec![b"foo", b"bar"]); + /// ``` + pub fn extract<const N: usize>(&self) -> (&'h [u8], [&'h [u8]; N]) { + let len = self + .static_captures_len + .expect("number of capture groups can vary in a match") + .checked_sub(1) + .expect("number of groups is always greater than zero"); + assert_eq!(N, len, "asked for {} groups, but must ask for {}", N, len); + // The regex-automata variant of extract is a bit more permissive. + // It doesn't require the number of matching capturing groups to be + // static, and you can even request fewer groups than what's there. So + // this is guaranteed to never panic because we've asserted above that + // the user has requested precisely the number of groups that must be + // present in any match for this regex. + self.caps.extract_bytes(self.haystack) + } + + /// Expands all instances of `$ref` in `replacement` to the corresponding + /// capture group, and writes them to the `dst` buffer given. A `ref` can + /// be a capture group index or a name. If `ref` doesn't refer to a capture + /// group that participated in the match, then it is replaced with the + /// empty string. + /// + /// # Format + /// + /// The format of the replacement string supports two different kinds of + /// capture references: unbraced and braced. + /// + /// For the unbraced format, the format supported is `$ref` where `name` + /// can be any character in the class `[0-9A-Za-z_]`. `ref` is always + /// the longest possible parse. So for example, `$1a` corresponds to the + /// capture group named `1a` and not the capture group at index `1`. If + /// `ref` matches `^[0-9]+$`, then it is treated as a capture group index + /// itself and not a name. + /// + /// For the braced format, the format supported is `${ref}` where `ref` can + /// be any sequence of bytes except for `}`. If no closing brace occurs, + /// then it is not considered a capture reference. As with the unbraced + /// format, if `ref` matches `^[0-9]+$`, then it is treated as a capture + /// group index and not a name. + /// + /// The braced format is useful for exerting precise control over the name + /// of the capture reference. For example, `${1}a` corresponds to the + /// capture group reference `1` followed by the letter `a`, where as `$1a` + /// (as mentioned above) corresponds to the capture group reference `1a`. + /// The braced format is also useful for expressing capture group names + /// that use characters not supported by the unbraced format. For example, + /// `${foo[bar].baz}` refers to the capture group named `foo[bar].baz`. + /// + /// If a capture group reference is found and it does not refer to a valid + /// capture group, then it will be replaced with the empty string. + /// + /// To write a literal `$`, use `$$`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new( + /// r"(?<day>[0-9]{2})-(?<month>[0-9]{2})-(?<year>[0-9]{4})", + /// ).unwrap(); + /// let hay = b"On 14-03-2010, I became a Tenneessee lamb."; + /// let caps = re.captures(hay).unwrap(); + /// + /// let mut dst = vec![]; + /// caps.expand(b"year=$year, month=$month, day=$day", &mut dst); + /// assert_eq!(dst, b"year=2010, month=03, day=14"); + /// ``` + #[inline] + pub fn expand(&self, replacement: &[u8], dst: &mut Vec<u8>) { + self.caps.interpolate_bytes_into(self.haystack, replacement, dst); + } + + /// Returns an iterator over all capture groups. This includes both + /// matching and non-matching groups. + /// + /// The iterator always yields at least one matching group: the first group + /// (at index `0`) with no name. Subsequent groups are returned in the order + /// of their opening parenthesis in the regex. + /// + /// The elements yielded have type `Option<Match<'h>>`, where a non-`None` + /// value is present if the capture group matches. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(\w)(\d)?(\w)").unwrap(); + /// let caps = re.captures(b"AZ").unwrap(); + /// + /// let mut it = caps.iter(); + /// assert_eq!(it.next().unwrap().map(|m| m.as_bytes()), Some(&b"AZ"[..])); + /// assert_eq!(it.next().unwrap().map(|m| m.as_bytes()), Some(&b"A"[..])); + /// assert_eq!(it.next().unwrap().map(|m| m.as_bytes()), None); + /// assert_eq!(it.next().unwrap().map(|m| m.as_bytes()), Some(&b"Z"[..])); + /// assert_eq!(it.next(), None); + /// ``` + #[inline] + pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 'h> { + SubCaptureMatches { haystack: self.haystack, it: self.caps.iter() } + } + + /// Returns the total number of capture groups. This includes both + /// matching and non-matching groups. + /// + /// The length returned is always equivalent to the number of elements + /// yielded by [`Captures::iter`]. Consequently, the length is always + /// greater than zero since every `Captures` value always includes the + /// match for the entire regex. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(\w)(\d)?(\w)").unwrap(); + /// let caps = re.captures(b"AZ").unwrap(); + /// assert_eq!(caps.len(), 4); + /// ``` + #[inline] + pub fn len(&self) -> usize { + self.caps.group_len() + } +} + +impl<'h> core::fmt::Debug for Captures<'h> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + /// A little helper type to provide a nice map-like debug + /// representation for our capturing group spans. + /// + /// regex-automata has something similar, but it includes the pattern + /// ID in its debug output, which is confusing. It also doesn't include + /// that strings that match because a regex-automata `Captures` doesn't + /// borrow the haystack. + struct CapturesDebugMap<'a> { + caps: &'a Captures<'a>, + } + + impl<'a> core::fmt::Debug for CapturesDebugMap<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let mut map = f.debug_map(); + let names = + self.caps.caps.group_info().pattern_names(PatternID::ZERO); + for (group_index, maybe_name) in names.enumerate() { + let key = Key(group_index, maybe_name); + match self.caps.get(group_index) { + None => map.entry(&key, &None::<()>), + Some(mat) => map.entry(&key, &Value(mat)), + }; + } + map.finish() + } + } + + struct Key<'a>(usize, Option<&'a str>); + + impl<'a> core::fmt::Debug for Key<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "{}", self.0)?; + if let Some(name) = self.1 { + write!(f, "/{:?}", name)?; + } + Ok(()) + } + } + + struct Value<'a>(Match<'a>); + + impl<'a> core::fmt::Debug for Value<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + use regex_automata::util::escape::DebugHaystack; + + write!( + f, + "{}..{}/{:?}", + self.0.start(), + self.0.end(), + DebugHaystack(self.0.as_bytes()) + ) + } + } + + f.debug_tuple("Captures") + .field(&CapturesDebugMap { caps: self }) + .finish() + } +} + +/// Get a matching capture group's haystack substring by index. +/// +/// The haystack substring returned can't outlive the `Captures` object if this +/// method is used, because of how `Index` is defined (normally `a[i]` is part +/// of `a` and can't outlive it). To work around this limitation, do that, use +/// [`Captures::get`] instead. +/// +/// `'h` is the lifetime of the matched haystack, but the lifetime of the +/// `&str` returned by this implementation is the lifetime of the `Captures` +/// value itself. +/// +/// # Panics +/// +/// If there is no matching group at the given index. +impl<'h> core::ops::Index<usize> for Captures<'h> { + type Output = [u8]; + + // The lifetime is written out to make it clear that the &str returned + // does NOT have a lifetime equivalent to 'h. + fn index<'a>(&'a self, i: usize) -> &'a [u8] { + self.get(i) + .map(|m| m.as_bytes()) + .unwrap_or_else(|| panic!("no group at index '{}'", i)) + } +} + +/// Get a matching capture group's haystack substring by name. +/// +/// The haystack substring returned can't outlive the `Captures` object if this +/// method is used, because of how `Index` is defined (normally `a[i]` is part +/// of `a` and can't outlive it). To work around this limitation, do that, use +/// [`Captures::get`] instead. +/// +/// `'h` is the lifetime of the matched haystack, but the lifetime of the +/// `&str` returned by this implementation is the lifetime of the `Captures` +/// value itself. +/// +/// `'n` is the lifetime of the group name used to index the `Captures` value. +/// +/// # Panics +/// +/// If there is no matching group at the given name. +impl<'h, 'n> core::ops::Index<&'n str> for Captures<'h> { + type Output = [u8]; + + fn index<'a>(&'a self, name: &'n str) -> &'a [u8] { + self.name(name) + .map(|m| m.as_bytes()) + .unwrap_or_else(|| panic!("no group named '{}'", name)) + } +} + +/// A low level representation of the byte offsets of each capture group. +/// +/// You can think of this as a lower level [`Captures`], where this type does +/// not support named capturing groups directly and it does not borrow the +/// haystack that these offsets were matched on. +/// +/// Primarily, this type is useful when using the lower level `Regex` APIs such +/// as [`Regex::captures_read`], which permits amortizing the allocation in +/// which capture match offsets are stored. +/// +/// In order to build a value of this type, you'll need to call the +/// [`Regex::capture_locations`] method. The value returned can then be reused +/// in subsequent searches for that regex. Using it for other regexes may +/// result in a panic or otherwise incorrect results. +/// +/// # Example +/// +/// This example shows how to create and use `CaptureLocations` in a search. +/// +/// ``` +/// use regex::bytes::Regex; +/// +/// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap(); +/// let mut locs = re.capture_locations(); +/// let m = re.captures_read(&mut locs, b"Bruce Springsteen").unwrap(); +/// assert_eq!(0..17, m.range()); +/// assert_eq!(Some((0, 17)), locs.get(0)); +/// assert_eq!(Some((0, 5)), locs.get(1)); +/// assert_eq!(Some((6, 17)), locs.get(2)); +/// +/// // Asking for an invalid capture group always returns None. +/// assert_eq!(None, locs.get(3)); +/// assert_eq!(None, locs.get(34973498648)); +/// assert_eq!(None, locs.get(9944060567225171988)); +/// ``` +#[derive(Clone, Debug)] +pub struct CaptureLocations(captures::Captures); + +/// A type alias for `CaptureLocations` for backwards compatibility. +/// +/// Previously, we exported `CaptureLocations` as `Locations` in an +/// undocumented API. To prevent breaking that code (e.g., in `regex-capi`), +/// we continue re-exporting the same undocumented API. +#[doc(hidden)] +pub type Locations = CaptureLocations; + +impl CaptureLocations { + /// Returns the start and end byte offsets of the capture group at index + /// `i`. This returns `None` if `i` is not a valid capture group or if the + /// capture group did not match. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap(); + /// let mut locs = re.capture_locations(); + /// re.captures_read(&mut locs, b"Bruce Springsteen").unwrap(); + /// assert_eq!(Some((0, 17)), locs.get(0)); + /// assert_eq!(Some((0, 5)), locs.get(1)); + /// assert_eq!(Some((6, 17)), locs.get(2)); + /// ``` + #[inline] + pub fn get(&self, i: usize) -> Option<(usize, usize)> { + self.0.get_group(i).map(|sp| (sp.start, sp.end)) + } + + /// Returns the total number of capture groups (even if they didn't match). + /// That is, the length returned is unaffected by the result of a search. + /// + /// This is always at least `1` since every regex has at least `1` + /// capturing group that corresponds to the entire match. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap(); + /// let mut locs = re.capture_locations(); + /// assert_eq!(3, locs.len()); + /// re.captures_read(&mut locs, b"Bruce Springsteen").unwrap(); + /// assert_eq!(3, locs.len()); + /// ``` + /// + /// Notice that the length is always at least `1`, regardless of the regex: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"").unwrap(); + /// let locs = re.capture_locations(); + /// assert_eq!(1, locs.len()); + /// + /// // [a&&b] is a regex that never matches anything. + /// let re = Regex::new(r"[a&&b]").unwrap(); + /// let locs = re.capture_locations(); + /// assert_eq!(1, locs.len()); + /// ``` + #[inline] + pub fn len(&self) -> usize { + // self.0.group_len() returns 0 if the underlying captures doesn't + // represent a match, but the behavior guaranteed for this method is + // that the length doesn't change based on a match or not. + self.0.group_info().group_len(PatternID::ZERO) + } + + /// An alias for the `get` method for backwards compatibility. + /// + /// Previously, we exported `get` as `pos` in an undocumented API. To + /// prevent breaking that code (e.g., in `regex-capi`), we continue + /// re-exporting the same undocumented API. + #[doc(hidden)] + #[inline] + pub fn pos(&self, i: usize) -> Option<(usize, usize)> { + self.get(i) + } +} + +/// An iterator over all non-overlapping matches in a haystack. +/// +/// This iterator yields [`Match`] values. The iterator stops when no more +/// matches can be found. +/// +/// `'r` is the lifetime of the compiled regular expression and `'h` is the +/// lifetime of the haystack. +/// +/// This iterator is created by [`Regex::find_iter`]. +/// +/// # Time complexity +/// +/// Note that since an iterator runs potentially many searches on the haystack +/// and since each search has worst case `O(m * n)` time complexity, the +/// overall worst case time complexity for iteration is `O(m * n^2)`. +#[derive(Debug)] +pub struct Matches<'r, 'h> { + haystack: &'h [u8], + it: meta::FindMatches<'r, 'h>, +} + +impl<'r, 'h> Iterator for Matches<'r, 'h> { + type Item = Match<'h>; + + #[inline] + fn next(&mut self) -> Option<Match<'h>> { + self.it + .next() + .map(|sp| Match::new(self.haystack, sp.start(), sp.end())) + } + + #[inline] + fn count(self) -> usize { + // This can actually be up to 2x faster than calling `next()` until + // completion, because counting matches when using a DFA only requires + // finding the end of each match. But returning a `Match` via `next()` + // requires the start of each match which, with a DFA, requires a + // reverse forward scan to find it. + self.it.count() + } +} + +impl<'r, 'h> core::iter::FusedIterator for Matches<'r, 'h> {} + +/// An iterator over all non-overlapping capture matches in a haystack. +/// +/// This iterator yields [`Captures`] values. The iterator stops when no more +/// matches can be found. +/// +/// `'r` is the lifetime of the compiled regular expression and `'h` is the +/// lifetime of the matched string. +/// +/// This iterator is created by [`Regex::captures_iter`]. +/// +/// # Time complexity +/// +/// Note that since an iterator runs potentially many searches on the haystack +/// and since each search has worst case `O(m * n)` time complexity, the +/// overall worst case time complexity for iteration is `O(m * n^2)`. +#[derive(Debug)] +pub struct CaptureMatches<'r, 'h> { + haystack: &'h [u8], + it: meta::CapturesMatches<'r, 'h>, +} + +impl<'r, 'h> Iterator for CaptureMatches<'r, 'h> { + type Item = Captures<'h>; + + #[inline] + fn next(&mut self) -> Option<Captures<'h>> { + let static_captures_len = self.it.regex().static_captures_len(); + self.it.next().map(|caps| Captures { + haystack: self.haystack, + caps, + static_captures_len, + }) + } + + #[inline] + fn count(self) -> usize { + // This can actually be up to 2x faster than calling `next()` until + // completion, because counting matches when using a DFA only requires + // finding the end of each match. But returning a `Match` via `next()` + // requires the start of each match which, with a DFA, requires a + // reverse forward scan to find it. + self.it.count() + } +} + +impl<'r, 'h> core::iter::FusedIterator for CaptureMatches<'r, 'h> {} + +/// An iterator over all substrings delimited by a regex match. +/// +/// `'r` is the lifetime of the compiled regular expression and `'h` is the +/// lifetime of the byte string being split. +/// +/// This iterator is created by [`Regex::split`]. +/// +/// # Time complexity +/// +/// Note that since an iterator runs potentially many searches on the haystack +/// and since each search has worst case `O(m * n)` time complexity, the +/// overall worst case time complexity for iteration is `O(m * n^2)`. +#[derive(Debug)] +pub struct Split<'r, 'h> { + haystack: &'h [u8], + it: meta::Split<'r, 'h>, +} + +impl<'r, 'h> Iterator for Split<'r, 'h> { + type Item = &'h [u8]; + + #[inline] + fn next(&mut self) -> Option<&'h [u8]> { + self.it.next().map(|span| &self.haystack[span]) + } +} + +impl<'r, 'h> core::iter::FusedIterator for Split<'r, 'h> {} + +/// An iterator over at most `N` substrings delimited by a regex match. +/// +/// The last substring yielded by this iterator will be whatever remains after +/// `N-1` splits. +/// +/// `'r` is the lifetime of the compiled regular expression and `'h` is the +/// lifetime of the byte string being split. +/// +/// This iterator is created by [`Regex::splitn`]. +/// +/// # Time complexity +/// +/// Note that since an iterator runs potentially many searches on the haystack +/// and since each search has worst case `O(m * n)` time complexity, the +/// overall worst case time complexity for iteration is `O(m * n^2)`. +/// +/// Although note that the worst case time here has an upper bound given +/// by the `limit` parameter to [`Regex::splitn`]. +#[derive(Debug)] +pub struct SplitN<'r, 'h> { + haystack: &'h [u8], + it: meta::SplitN<'r, 'h>, +} + +impl<'r, 'h> Iterator for SplitN<'r, 'h> { + type Item = &'h [u8]; + + #[inline] + fn next(&mut self) -> Option<&'h [u8]> { + self.it.next().map(|span| &self.haystack[span]) + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.it.size_hint() + } +} + +impl<'r, 'h> core::iter::FusedIterator for SplitN<'r, 'h> {} + +/// An iterator over the names of all capture groups in a regex. +/// +/// This iterator yields values of type `Option<&str>` in order of the opening +/// capture group parenthesis in the regex pattern. `None` is yielded for +/// groups with no name. The first element always corresponds to the implicit +/// and unnamed group for the overall match. +/// +/// `'r` is the lifetime of the compiled regular expression. +/// +/// This iterator is created by [`Regex::capture_names`]. +#[derive(Clone, Debug)] +pub struct CaptureNames<'r>(captures::GroupInfoPatternNames<'r>); + +impl<'r> Iterator for CaptureNames<'r> { + type Item = Option<&'r str>; + + #[inline] + fn next(&mut self) -> Option<Option<&'r str>> { + self.0.next() + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.0.size_hint() + } + + #[inline] + fn count(self) -> usize { + self.0.count() + } +} + +impl<'r> ExactSizeIterator for CaptureNames<'r> {} + +impl<'r> core::iter::FusedIterator for CaptureNames<'r> {} + +/// An iterator over all group matches in a [`Captures`] value. +/// +/// This iterator yields values of type `Option<Match<'h>>`, where `'h` is the +/// lifetime of the haystack that the matches are for. The order of elements +/// yielded corresponds to the order of the opening parenthesis for the group +/// in the regex pattern. `None` is yielded for groups that did not participate +/// in the match. +/// +/// The first element always corresponds to the implicit group for the overall +/// match. Since this iterator is created by a [`Captures`] value, and a +/// `Captures` value is only created when a match occurs, it follows that the +/// first element yielded by this iterator is guaranteed to be non-`None`. +/// +/// The lifetime `'c` corresponds to the lifetime of the `Captures` value that +/// created this iterator, and the lifetime `'h` corresponds to the originally +/// matched haystack. +#[derive(Clone, Debug)] +pub struct SubCaptureMatches<'c, 'h> { + haystack: &'h [u8], + it: captures::CapturesPatternIter<'c>, +} + +impl<'c, 'h> Iterator for SubCaptureMatches<'c, 'h> { + type Item = Option<Match<'h>>; + + #[inline] + fn next(&mut self) -> Option<Option<Match<'h>>> { + self.it.next().map(|group| { + group.map(|sp| Match::new(self.haystack, sp.start, sp.end)) + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.it.size_hint() + } + + #[inline] + fn count(self) -> usize { + self.it.count() + } +} + +impl<'c, 'h> ExactSizeIterator for SubCaptureMatches<'c, 'h> {} + +impl<'c, 'h> core::iter::FusedIterator for SubCaptureMatches<'c, 'h> {} + +/// A trait for types that can be used to replace matches in a haystack. +/// +/// In general, users of this crate shouldn't need to implement this trait, +/// since implementations are already provided for `&[u8]` along with other +/// variants of byte string types, as well as `FnMut(&Captures) -> Vec<u8>` (or +/// any `FnMut(&Captures) -> T` where `T: AsRef<[u8]>`). Those cover most use +/// cases, but callers can implement this trait directly if necessary. +/// +/// # Example +/// +/// This example shows a basic implementation of the `Replacer` trait. This can +/// be done much more simply using the replacement byte string interpolation +/// support (e.g., `$first $last`), but this approach avoids needing to parse +/// the replacement byte string at all. +/// +/// ``` +/// use regex::bytes::{Captures, Regex, Replacer}; +/// +/// struct NameSwapper; +/// +/// impl Replacer for NameSwapper { +/// fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { +/// dst.extend_from_slice(&caps["first"]); +/// dst.extend_from_slice(b" "); +/// dst.extend_from_slice(&caps["last"]); +/// } +/// } +/// +/// let re = Regex::new(r"(?<last>[^,\s]+),\s+(?<first>\S+)").unwrap(); +/// let result = re.replace(b"Springsteen, Bruce", NameSwapper); +/// assert_eq!(result, &b"Bruce Springsteen"[..]); +/// ``` +pub trait Replacer { + /// Appends possibly empty data to `dst` to replace the current match. + /// + /// The current match is represented by `caps`, which is guaranteed to have + /// a match at capture group `0`. + /// + /// For example, a no-op replacement would be + /// `dst.extend_from_slice(&caps[0])`. + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>); + + /// Return a fixed unchanging replacement byte string. + /// + /// When doing replacements, if access to [`Captures`] is not needed (e.g., + /// the replacement byte string does not need `$` expansion), then it can + /// be beneficial to avoid finding sub-captures. + /// + /// In general, this is called once for every call to a replacement routine + /// such as [`Regex::replace_all`]. + fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> { + None + } + + /// Returns a type that implements `Replacer`, but that borrows and wraps + /// this `Replacer`. + /// + /// This is useful when you want to take a generic `Replacer` (which might + /// not be cloneable) and use it without consuming it, so it can be used + /// more than once. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::{Regex, Replacer}; + /// + /// fn replace_all_twice<R: Replacer>( + /// re: Regex, + /// src: &[u8], + /// mut rep: R, + /// ) -> Vec<u8> { + /// let dst = re.replace_all(src, rep.by_ref()); + /// let dst = re.replace_all(&dst, rep.by_ref()); + /// dst.into_owned() + /// } + /// ``` + fn by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self> { + ReplacerRef(self) + } +} + +impl<'a, const N: usize> Replacer for &'a [u8; N] { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + caps.expand(&**self, dst); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { + no_expansion(self) + } +} + +impl<const N: usize> Replacer for [u8; N] { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + caps.expand(&*self, dst); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { + no_expansion(self) + } +} + +impl<'a> Replacer for &'a [u8] { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + caps.expand(*self, dst); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { + no_expansion(self) + } +} + +impl<'a> Replacer for &'a Vec<u8> { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + caps.expand(*self, dst); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { + no_expansion(self) + } +} + +impl Replacer for Vec<u8> { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + caps.expand(self, dst); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { + no_expansion(self) + } +} + +impl<'a> Replacer for Cow<'a, [u8]> { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + caps.expand(self.as_ref(), dst); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { + no_expansion(self) + } +} + +impl<'a> Replacer for &'a Cow<'a, [u8]> { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + caps.expand(self.as_ref(), dst); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { + no_expansion(self) + } +} + +impl<F, T> Replacer for F +where + F: FnMut(&Captures<'_>) -> T, + T: AsRef<[u8]>, +{ + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + dst.extend_from_slice((*self)(caps).as_ref()); + } +} + +/// A by-reference adaptor for a [`Replacer`]. +/// +/// This permits reusing the same `Replacer` value in multiple calls to a +/// replacement routine like [`Regex::replace_all`]. +/// +/// This type is created by [`Replacer::by_ref`]. +#[derive(Debug)] +pub struct ReplacerRef<'a, R: ?Sized>(&'a mut R); + +impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + self.0.replace_append(caps, dst) + } + + fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> { + self.0.no_expansion() + } +} + +/// A helper type for forcing literal string replacement. +/// +/// It can be used with routines like [`Regex::replace`] and +/// [`Regex::replace_all`] to do a literal string replacement without expanding +/// `$name` to their corresponding capture groups. This can be both convenient +/// (to avoid escaping `$`, for example) and faster (since capture groups +/// don't need to be found). +/// +/// `'s` is the lifetime of the literal string to use. +/// +/// # Example +/// +/// ``` +/// use regex::bytes::{NoExpand, Regex}; +/// +/// let re = Regex::new(r"(?<last>[^,\s]+),\s+(\S+)").unwrap(); +/// let result = re.replace(b"Springsteen, Bruce", NoExpand(b"$2 $last")); +/// assert_eq!(result, &b"$2 $last"[..]); +/// ``` +#[derive(Clone, Debug)] +pub struct NoExpand<'s>(pub &'s [u8]); + +impl<'s> Replacer for NoExpand<'s> { + fn replace_append(&mut self, _: &Captures<'_>, dst: &mut Vec<u8>) { + dst.extend_from_slice(self.0); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { + Some(Cow::Borrowed(self.0)) + } +} + +/// Quickly checks the given replacement string for whether interpolation +/// should be done on it. It returns `None` if a `$` was found anywhere in the +/// given string, which suggests interpolation needs to be done. But if there's +/// no `$` anywhere, then interpolation definitely does not need to be done. In +/// that case, the given string is returned as a borrowed `Cow`. +/// +/// This is meant to be used to implement the `Replacer::no_expandsion` method +/// in its various trait impls. +fn no_expansion<T: AsRef<[u8]>>(replacement: &T) -> Option<Cow<'_, [u8]>> { + let replacement = replacement.as_ref(); + match crate::find_byte::find_byte(b'$', replacement) { + Some(_) => None, + None => Some(Cow::Borrowed(replacement)), + } +} diff --git a/third_party/rust/regex/src/regex/mod.rs b/third_party/rust/regex/src/regex/mod.rs new file mode 100644 index 0000000000..93fadec8bf --- /dev/null +++ b/third_party/rust/regex/src/regex/mod.rs @@ -0,0 +1,2 @@ +pub(crate) mod bytes; +pub(crate) mod string; diff --git a/third_party/rust/regex/src/regex/string.rs b/third_party/rust/regex/src/regex/string.rs new file mode 100644 index 0000000000..d5908ae0dd --- /dev/null +++ b/third_party/rust/regex/src/regex/string.rs @@ -0,0 +1,2579 @@ +use alloc::{borrow::Cow, string::String, sync::Arc}; + +use regex_automata::{meta, util::captures, Input, PatternID}; + +use crate::{error::Error, RegexBuilder}; + +/// A compiled regular expression for searching Unicode haystacks. +/// +/// A `Regex` can be used to search haystacks, split haystacks into substrings +/// or replace substrings in a haystack with a different substring. All +/// searching is done with an implicit `(?s:.)*?` at the beginning and end of +/// an pattern. To force an expression to match the whole string (or a prefix +/// or a suffix), you must use an anchor like `^` or `$` (or `\A` and `\z`). +/// +/// While this crate will handle Unicode strings (whether in the regular +/// expression or in the haystack), all positions returned are **byte +/// offsets**. Every byte offset is guaranteed to be at a Unicode code point +/// boundary. That is, all offsets returned by the `Regex` API are guaranteed +/// to be ranges that can slice a `&str` without panicking. If you want to +/// relax this requirement, then you must search `&[u8]` haystacks with a +/// [`bytes::Regex`](crate::bytes::Regex). +/// +/// The only methods that allocate new strings are the string replacement +/// methods. All other methods (searching and splitting) return borrowed +/// references into the haystack given. +/// +/// # Example +/// +/// Find the offsets of a US phone number: +/// +/// ``` +/// use regex::Regex; +/// +/// let re = Regex::new("[0-9]{3}-[0-9]{3}-[0-9]{4}").unwrap(); +/// let m = re.find("phone: 111-222-3333").unwrap(); +/// assert_eq!(7..19, m.range()); +/// ``` +/// +/// # Example: extracting capture groups +/// +/// A common way to use regexes is with capture groups. That is, instead of +/// just looking for matches of an entire regex, parentheses are used to create +/// groups that represent part of the match. +/// +/// For example, consider a haystack with multiple lines, and each line has +/// three whitespace delimited fields where the second field is expected to be +/// a number and the third field a boolean. To make this convenient, we use +/// the [`Captures::extract`] API to put the strings that match each group +/// into a fixed size array: +/// +/// ``` +/// use regex::Regex; +/// +/// let hay = " +/// rabbit 54 true +/// groundhog 2 true +/// does not match +/// fox 109 false +/// "; +/// let re = Regex::new(r"(?m)^\s*(\S+)\s+([0-9]+)\s+(true|false)\s*$").unwrap(); +/// let mut fields: Vec<(&str, i64, bool)> = vec![]; +/// for (_, [f1, f2, f3]) in re.captures_iter(hay).map(|caps| caps.extract()) { +/// fields.push((f1, f2.parse()?, f3.parse()?)); +/// } +/// assert_eq!(fields, vec![ +/// ("rabbit", 54, true), +/// ("groundhog", 2, true), +/// ("fox", 109, false), +/// ]); +/// +/// # Ok::<(), Box<dyn std::error::Error>>(()) +/// ``` +/// +/// # Example: searching with the `Pattern` trait +/// +/// **Note**: This section requires that this crate is compiled with the +/// `pattern` Cargo feature enabled, which **requires nightly Rust**. +/// +/// Since `Regex` implements `Pattern` from the standard library, one can +/// use regexes with methods defined on `&str`. For example, `is_match`, +/// `find`, `find_iter` and `split` can, in some cases, be replaced with +/// `str::contains`, `str::find`, `str::match_indices` and `str::split`. +/// +/// Here are some examples: +/// +/// ```ignore +/// use regex::Regex; +/// +/// let re = Regex::new(r"\d+").unwrap(); +/// let hay = "a111b222c"; +/// +/// assert!(hay.contains(&re)); +/// assert_eq!(hay.find(&re), Some(1)); +/// assert_eq!(hay.match_indices(&re).collect::<Vec<_>>(), vec![ +/// (1, "111"), +/// (5, "222"), +/// ]); +/// assert_eq!(hay.split(&re).collect::<Vec<_>>(), vec!["a", "b", "c"]); +/// ``` +#[derive(Clone)] +pub struct Regex { + pub(crate) meta: meta::Regex, + pub(crate) pattern: Arc<str>, +} + +impl core::fmt::Display for Regex { + /// Shows the original regular expression. + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "{}", self.as_str()) + } +} + +impl core::fmt::Debug for Regex { + /// Shows the original regular expression. + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_tuple("Regex").field(&self.as_str()).finish() + } +} + +impl core::str::FromStr for Regex { + type Err = Error; + + /// Attempts to parse a string into a regular expression + fn from_str(s: &str) -> Result<Regex, Error> { + Regex::new(s) + } +} + +impl TryFrom<&str> for Regex { + type Error = Error; + + /// Attempts to parse a string into a regular expression + fn try_from(s: &str) -> Result<Regex, Error> { + Regex::new(s) + } +} + +impl TryFrom<String> for Regex { + type Error = Error; + + /// Attempts to parse a string into a regular expression + fn try_from(s: String) -> Result<Regex, Error> { + Regex::new(&s) + } +} + +/// Core regular expression methods. +impl Regex { + /// Compiles a regular expression. Once compiled, it can be used repeatedly + /// to search, split or replace substrings in a haystack. + /// + /// Note that regex compilation tends to be a somewhat expensive process, + /// and unlike higher level environments, compilation is not automatically + /// cached for you. One should endeavor to compile a regex once and then + /// reuse it. For example, it's a bad idea to compile the same regex + /// repeatedly in a loop. + /// + /// # Errors + /// + /// If an invalid pattern is given, then an error is returned. + /// An error is also returned if the pattern is valid, but would + /// produce a regex that is bigger than the configured size limit via + /// [`RegexBuilder::size_limit`]. (A reasonable size limit is enabled by + /// default.) + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// // An Invalid pattern because of an unclosed parenthesis + /// assert!(Regex::new(r"foo(bar").is_err()); + /// // An invalid pattern because the regex would be too big + /// // because Unicode tends to inflate things. + /// assert!(Regex::new(r"\w{1000}").is_err()); + /// // Disabling Unicode can make the regex much smaller, + /// // potentially by up to or more than an order of magnitude. + /// assert!(Regex::new(r"(?-u:\w){1000}").is_ok()); + /// ``` + pub fn new(re: &str) -> Result<Regex, Error> { + RegexBuilder::new(re).build() + } + + /// Returns true if and only if there is a match for the regex anywhere + /// in the haystack given. + /// + /// It is recommended to use this method if all you need to do is test + /// whether a match exists, since the underlying matching engine may be + /// able to do less work. + /// + /// # Example + /// + /// Test if some haystack contains at least one word with exactly 13 + /// Unicode word characters: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\b\w{13}\b").unwrap(); + /// let hay = "I categorically deny having triskaidekaphobia."; + /// assert!(re.is_match(hay)); + /// ``` + #[inline] + pub fn is_match(&self, haystack: &str) -> bool { + self.is_match_at(haystack, 0) + } + + /// This routine searches for the first match of this regex in the + /// haystack given, and if found, returns a [`Match`]. The `Match` + /// provides access to both the byte offsets of the match and the actual + /// substring that matched. + /// + /// Note that this should only be used if you want to find the entire + /// match. If instead you just want to test the existence of a match, + /// it's potentially faster to use `Regex::is_match(hay)` instead of + /// `Regex::find(hay).is_some()`. + /// + /// # Example + /// + /// Find the first word with exactly 13 Unicode word characters: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\b\w{13}\b").unwrap(); + /// let hay = "I categorically deny having triskaidekaphobia."; + /// let mat = re.find(hay).unwrap(); + /// assert_eq!(2..15, mat.range()); + /// assert_eq!("categorically", mat.as_str()); + /// ``` + #[inline] + pub fn find<'h>(&self, haystack: &'h str) -> Option<Match<'h>> { + self.find_at(haystack, 0) + } + + /// Returns an iterator that yields successive non-overlapping matches in + /// the given haystack. The iterator yields values of type [`Match`]. + /// + /// # Time complexity + /// + /// Note that since `find_iter` runs potentially many searches on the + /// haystack and since each search has worst case `O(m * n)` time + /// complexity, the overall worst case time complexity for iteration is + /// `O(m * n^2)`. + /// + /// # Example + /// + /// Find every word with exactly 13 Unicode word characters: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\b\w{13}\b").unwrap(); + /// let hay = "Retroactively relinquishing remunerations is reprehensible."; + /// let matches: Vec<_> = re.find_iter(hay).map(|m| m.as_str()).collect(); + /// assert_eq!(matches, vec![ + /// "Retroactively", + /// "relinquishing", + /// "remunerations", + /// "reprehensible", + /// ]); + /// ``` + #[inline] + pub fn find_iter<'r, 'h>(&'r self, haystack: &'h str) -> Matches<'r, 'h> { + Matches { haystack, it: self.meta.find_iter(haystack) } + } + + /// This routine searches for the first match of this regex in the haystack + /// given, and if found, returns not only the overall match but also the + /// matches of each capture group in the regex. If no match is found, then + /// `None` is returned. + /// + /// Capture group `0` always corresponds to an implicit unnamed group that + /// includes the entire match. If a match is found, this group is always + /// present. Subsequent groups may be named and are numbered, starting + /// at 1, by the order in which the opening parenthesis appears in the + /// pattern. For example, in the pattern `(?<a>.(?<b>.))(?<c>.)`, `a`, + /// `b` and `c` correspond to capture group indices `1`, `2` and `3`, + /// respectively. + /// + /// You should only use `captures` if you need access to the capture group + /// matches. Otherwise, [`Regex::find`] is generally faster for discovering + /// just the overall match. + /// + /// # Example + /// + /// Say you have some haystack with movie names and their release years, + /// like "'Citizen Kane' (1941)". It'd be nice if we could search for + /// substrings looking like that, while also extracting the movie name and + /// its release year separately. The example below shows how to do that. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap(); + /// let hay = "Not my favorite movie: 'Citizen Kane' (1941)."; + /// let caps = re.captures(hay).unwrap(); + /// assert_eq!(caps.get(0).unwrap().as_str(), "'Citizen Kane' (1941)"); + /// assert_eq!(caps.get(1).unwrap().as_str(), "Citizen Kane"); + /// assert_eq!(caps.get(2).unwrap().as_str(), "1941"); + /// // You can also access the groups by index using the Index notation. + /// // Note that this will panic on an invalid index. In this case, these + /// // accesses are always correct because the overall regex will only + /// // match when these capture groups match. + /// assert_eq!(&caps[0], "'Citizen Kane' (1941)"); + /// assert_eq!(&caps[1], "Citizen Kane"); + /// assert_eq!(&caps[2], "1941"); + /// ``` + /// + /// Note that the full match is at capture group `0`. Each subsequent + /// capture group is indexed by the order of its opening `(`. + /// + /// We can make this example a bit clearer by using *named* capture groups: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"'(?<title>[^']+)'\s+\((?<year>\d{4})\)").unwrap(); + /// let hay = "Not my favorite movie: 'Citizen Kane' (1941)."; + /// let caps = re.captures(hay).unwrap(); + /// assert_eq!(caps.get(0).unwrap().as_str(), "'Citizen Kane' (1941)"); + /// assert_eq!(caps.name("title").unwrap().as_str(), "Citizen Kane"); + /// assert_eq!(caps.name("year").unwrap().as_str(), "1941"); + /// // You can also access the groups by name using the Index notation. + /// // Note that this will panic on an invalid group name. In this case, + /// // these accesses are always correct because the overall regex will + /// // only match when these capture groups match. + /// assert_eq!(&caps[0], "'Citizen Kane' (1941)"); + /// assert_eq!(&caps["title"], "Citizen Kane"); + /// assert_eq!(&caps["year"], "1941"); + /// ``` + /// + /// Here we name the capture groups, which we can access with the `name` + /// method or the `Index` notation with a `&str`. Note that the named + /// capture groups are still accessible with `get` or the `Index` notation + /// with a `usize`. + /// + /// The `0`th capture group is always unnamed, so it must always be + /// accessed with `get(0)` or `[0]`. + /// + /// Finally, one other way to to get the matched substrings is with the + /// [`Captures::extract`] API: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap(); + /// let hay = "Not my favorite movie: 'Citizen Kane' (1941)."; + /// let (full, [title, year]) = re.captures(hay).unwrap().extract(); + /// assert_eq!(full, "'Citizen Kane' (1941)"); + /// assert_eq!(title, "Citizen Kane"); + /// assert_eq!(year, "1941"); + /// ``` + #[inline] + pub fn captures<'h>(&self, haystack: &'h str) -> Option<Captures<'h>> { + self.captures_at(haystack, 0) + } + + /// Returns an iterator that yields successive non-overlapping matches in + /// the given haystack. The iterator yields values of type [`Captures`]. + /// + /// This is the same as [`Regex::find_iter`], but instead of only providing + /// access to the overall match, each value yield includes access to the + /// matches of all capture groups in the regex. Reporting this extra match + /// data is potentially costly, so callers should only use `captures_iter` + /// over `find_iter` when they actually need access to the capture group + /// matches. + /// + /// # Time complexity + /// + /// Note that since `captures_iter` runs potentially many searches on the + /// haystack and since each search has worst case `O(m * n)` time + /// complexity, the overall worst case time complexity for iteration is + /// `O(m * n^2)`. + /// + /// # Example + /// + /// We can use this to find all movie titles and their release years in + /// some haystack, where the movie is formatted like "'Title' (xxxx)": + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"'([^']+)'\s+\(([0-9]{4})\)").unwrap(); + /// let hay = "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)."; + /// let mut movies = vec![]; + /// for (_, [title, year]) in re.captures_iter(hay).map(|c| c.extract()) { + /// movies.push((title, year.parse::<i64>()?)); + /// } + /// assert_eq!(movies, vec![ + /// ("Citizen Kane", 1941), + /// ("The Wizard of Oz", 1939), + /// ("M", 1931), + /// ]); + /// # Ok::<(), Box<dyn std::error::Error>>(()) + /// ``` + /// + /// Or with named groups: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"'(?<title>[^']+)'\s+\((?<year>[0-9]{4})\)").unwrap(); + /// let hay = "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)."; + /// let mut it = re.captures_iter(hay); + /// + /// let caps = it.next().unwrap(); + /// assert_eq!(&caps["title"], "Citizen Kane"); + /// assert_eq!(&caps["year"], "1941"); + /// + /// let caps = it.next().unwrap(); + /// assert_eq!(&caps["title"], "The Wizard of Oz"); + /// assert_eq!(&caps["year"], "1939"); + /// + /// let caps = it.next().unwrap(); + /// assert_eq!(&caps["title"], "M"); + /// assert_eq!(&caps["year"], "1931"); + /// ``` + #[inline] + pub fn captures_iter<'r, 'h>( + &'r self, + haystack: &'h str, + ) -> CaptureMatches<'r, 'h> { + CaptureMatches { haystack, it: self.meta.captures_iter(haystack) } + } + + /// Returns an iterator of substrings of the haystack given, delimited by a + /// match of the regex. Namely, each element of the iterator corresponds to + /// a part of the haystack that *isn't* matched by the regular expression. + /// + /// # Time complexity + /// + /// Since iterators over all matches requires running potentially many + /// searches on the haystack, and since each search has worst case + /// `O(m * n)` time complexity, the overall worst case time complexity for + /// this routine is `O(m * n^2)`. + /// + /// # Example + /// + /// To split a string delimited by arbitrary amounts of spaces or tabs: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"[ \t]+").unwrap(); + /// let hay = "a b \t c\td e"; + /// let fields: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(fields, vec!["a", "b", "c", "d", "e"]); + /// ``` + /// + /// # Example: more cases + /// + /// Basic usage: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r" ").unwrap(); + /// let hay = "Mary had a little lamb"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["Mary", "had", "a", "little", "lamb"]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = ""; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec![""]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = "lionXXtigerXleopard"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["lion", "", "tiger", "leopard"]); + /// + /// let re = Regex::new(r"::").unwrap(); + /// let hay = "lion::tiger::leopard"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["lion", "tiger", "leopard"]); + /// ``` + /// + /// If a haystack contains multiple contiguous matches, you will end up + /// with empty spans yielded by the iterator: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = "XXXXaXXbXc"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["", "", "", "", "a", "", "b", "c"]); + /// + /// let re = Regex::new(r"/").unwrap(); + /// let hay = "(///)"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["(", "", "", ")"]); + /// ``` + /// + /// Separators at the start or end of a haystack are neighbored by empty + /// substring. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"0").unwrap(); + /// let hay = "010"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["", "1", ""]); + /// ``` + /// + /// When the empty string is used as a regex, it splits at every valid + /// UTF-8 boundary by default (which includes the beginning and end of the + /// haystack): + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"").unwrap(); + /// let hay = "rust"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["", "r", "u", "s", "t", ""]); + /// + /// // Splitting by an empty string is UTF-8 aware by default! + /// let re = Regex::new(r"").unwrap(); + /// let hay = "☃"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["", "☃", ""]); + /// ``` + /// + /// Contiguous separators (commonly shows up with whitespace), can lead to + /// possibly surprising behavior. For example, this code is correct: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r" ").unwrap(); + /// let hay = " a b c"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["", "", "", "", "a", "", "b", "c"]); + /// ``` + /// + /// It does *not* give you `["a", "b", "c"]`. For that behavior, you'd want + /// to match contiguous space characters: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r" +").unwrap(); + /// let hay = " a b c"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// // N.B. This does still include a leading empty span because ' +' + /// // matches at the beginning of the haystack. + /// assert_eq!(got, vec!["", "a", "b", "c"]); + /// ``` + #[inline] + pub fn split<'r, 'h>(&'r self, haystack: &'h str) -> Split<'r, 'h> { + Split { haystack, it: self.meta.split(haystack) } + } + + /// Returns an iterator of at most `limit` substrings of the haystack + /// given, delimited by a match of the regex. (A `limit` of `0` will return + /// no substrings.) Namely, each element of the iterator corresponds to a + /// part of the haystack that *isn't* matched by the regular expression. + /// The remainder of the haystack that is not split will be the last + /// element in the iterator. + /// + /// # Time complexity + /// + /// Since iterators over all matches requires running potentially many + /// searches on the haystack, and since each search has worst case + /// `O(m * n)` time complexity, the overall worst case time complexity for + /// this routine is `O(m * n^2)`. + /// + /// Although note that the worst case time here has an upper bound given + /// by the `limit` parameter. + /// + /// # Example + /// + /// Get the first two words in some haystack: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\W+").unwrap(); + /// let hay = "Hey! How are you?"; + /// let fields: Vec<&str> = re.splitn(hay, 3).collect(); + /// assert_eq!(fields, vec!["Hey", "How", "are you?"]); + /// ``` + /// + /// # Examples: more cases + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r" ").unwrap(); + /// let hay = "Mary had a little lamb"; + /// let got: Vec<&str> = re.splitn(hay, 3).collect(); + /// assert_eq!(got, vec!["Mary", "had", "a little lamb"]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = ""; + /// let got: Vec<&str> = re.splitn(hay, 3).collect(); + /// assert_eq!(got, vec![""]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = "lionXXtigerXleopard"; + /// let got: Vec<&str> = re.splitn(hay, 3).collect(); + /// assert_eq!(got, vec!["lion", "", "tigerXleopard"]); + /// + /// let re = Regex::new(r"::").unwrap(); + /// let hay = "lion::tiger::leopard"; + /// let got: Vec<&str> = re.splitn(hay, 2).collect(); + /// assert_eq!(got, vec!["lion", "tiger::leopard"]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = "abcXdef"; + /// let got: Vec<&str> = re.splitn(hay, 1).collect(); + /// assert_eq!(got, vec!["abcXdef"]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = "abcdef"; + /// let got: Vec<&str> = re.splitn(hay, 2).collect(); + /// assert_eq!(got, vec!["abcdef"]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = "abcXdef"; + /// let got: Vec<&str> = re.splitn(hay, 0).collect(); + /// assert!(got.is_empty()); + /// ``` + #[inline] + pub fn splitn<'r, 'h>( + &'r self, + haystack: &'h str, + limit: usize, + ) -> SplitN<'r, 'h> { + SplitN { haystack, it: self.meta.splitn(haystack, limit) } + } + + /// Replaces the leftmost-first match in the given haystack with the + /// replacement provided. The replacement can be a regular string (where + /// `$N` and `$name` are expanded to match capture groups) or a function + /// that takes a [`Captures`] and returns the replaced string. + /// + /// If no match is found, then the haystack is returned unchanged. In that + /// case, this implementation will likely return a `Cow::Borrowed` value + /// such that no allocation is performed. + /// + /// # Replacement string syntax + /// + /// All instances of `$ref` in the replacement string are replaced with + /// the substring corresponding to the capture group identified by `ref`. + /// + /// `ref` may be an integer corresponding to the index of the capture group + /// (counted by order of opening parenthesis where `0` is the entire match) + /// or it can be a name (consisting of letters, digits or underscores) + /// corresponding to a named capture group. + /// + /// If `ref` isn't a valid capture group (whether the name doesn't exist or + /// isn't a valid index), then it is replaced with the empty string. + /// + /// The longest possible name is used. For example, `$1a` looks up the + /// capture group named `1a` and not the capture group at index `1`. To + /// exert more precise control over the name, use braces, e.g., `${1}a`. + /// + /// To write a literal `$` use `$$`. + /// + /// # Example + /// + /// Note that this function is polymorphic with respect to the replacement. + /// In typical usage, this can just be a normal string: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"[^01]+").unwrap(); + /// assert_eq!(re.replace("1078910", ""), "1010"); + /// ``` + /// + /// But anything satisfying the [`Replacer`] trait will work. For example, + /// a closure of type `|&Captures| -> String` provides direct access to the + /// captures corresponding to a match. This allows one to access capturing + /// group matches easily: + /// + /// ``` + /// use regex::{Captures, Regex}; + /// + /// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap(); + /// let result = re.replace("Springsteen, Bruce", |caps: &Captures| { + /// format!("{} {}", &caps[2], &caps[1]) + /// }); + /// assert_eq!(result, "Bruce Springsteen"); + /// ``` + /// + /// But this is a bit cumbersome to use all the time. Instead, a simple + /// syntax is supported (as described above) that expands `$name` into the + /// corresponding capture group. Here's the last example, but using this + /// expansion technique with named capture groups: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(?<last>[^,\s]+),\s+(?<first>\S+)").unwrap(); + /// let result = re.replace("Springsteen, Bruce", "$first $last"); + /// assert_eq!(result, "Bruce Springsteen"); + /// ``` + /// + /// Note that using `$2` instead of `$first` or `$1` instead of `$last` + /// would produce the same result. To write a literal `$` use `$$`. + /// + /// Sometimes the replacement string requires use of curly braces to + /// delineate a capture group replacement when it is adjacent to some other + /// literal text. For example, if we wanted to join two words together with + /// an underscore: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(?<first>\w+)\s+(?<second>\w+)").unwrap(); + /// let result = re.replace("deep fried", "${first}_$second"); + /// assert_eq!(result, "deep_fried"); + /// ``` + /// + /// Without the curly braces, the capture group name `first_` would be + /// used, and since it doesn't exist, it would be replaced with the empty + /// string. + /// + /// Finally, sometimes you just want to replace a literal string with no + /// regard for capturing group expansion. This can be done by wrapping a + /// string with [`NoExpand`]: + /// + /// ``` + /// use regex::{NoExpand, Regex}; + /// + /// let re = Regex::new(r"(?<last>[^,\s]+),\s+(\S+)").unwrap(); + /// let result = re.replace("Springsteen, Bruce", NoExpand("$2 $last")); + /// assert_eq!(result, "$2 $last"); + /// ``` + /// + /// Using `NoExpand` may also be faster, since the replacement string won't + /// need to be parsed for the `$` syntax. + #[inline] + pub fn replace<'h, R: Replacer>( + &self, + haystack: &'h str, + rep: R, + ) -> Cow<'h, str> { + self.replacen(haystack, 1, rep) + } + + /// Replaces all non-overlapping matches in the haystack with the + /// replacement provided. This is the same as calling `replacen` with + /// `limit` set to `0`. + /// + /// The documentation for [`Regex::replace`] goes into more detail about + /// what kinds of replacement strings are supported. + /// + /// # Time complexity + /// + /// Since iterators over all matches requires running potentially many + /// searches on the haystack, and since each search has worst case + /// `O(m * n)` time complexity, the overall worst case time complexity for + /// this routine is `O(m * n^2)`. + /// + /// # Fallibility + /// + /// If you need to write a replacement routine where any individual + /// replacement might "fail," doing so with this API isn't really feasible + /// because there's no way to stop the search process if a replacement + /// fails. Instead, if you need this functionality, you should consider + /// implementing your own replacement routine: + /// + /// ``` + /// use regex::{Captures, Regex}; + /// + /// fn replace_all<E>( + /// re: &Regex, + /// haystack: &str, + /// replacement: impl Fn(&Captures) -> Result<String, E>, + /// ) -> Result<String, E> { + /// let mut new = String::with_capacity(haystack.len()); + /// let mut last_match = 0; + /// for caps in re.captures_iter(haystack) { + /// let m = caps.get(0).unwrap(); + /// new.push_str(&haystack[last_match..m.start()]); + /// new.push_str(&replacement(&caps)?); + /// last_match = m.end(); + /// } + /// new.push_str(&haystack[last_match..]); + /// Ok(new) + /// } + /// + /// // Let's replace each word with the number of bytes in that word. + /// // But if we see a word that is "too long," we'll give up. + /// let re = Regex::new(r"\w+").unwrap(); + /// let replacement = |caps: &Captures| -> Result<String, &'static str> { + /// if caps[0].len() >= 5 { + /// return Err("word too long"); + /// } + /// Ok(caps[0].len().to_string()) + /// }; + /// assert_eq!( + /// Ok("2 3 3 3?".to_string()), + /// replace_all(&re, "hi how are you?", &replacement), + /// ); + /// assert!(replace_all(&re, "hi there", &replacement).is_err()); + /// ``` + /// + /// # Example + /// + /// This example shows how to flip the order of whitespace (excluding line + /// terminators) delimited fields, and normalizes the whitespace that + /// delimits the fields: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(?m)^(\S+)[\s--\r\n]+(\S+)$").unwrap(); + /// let hay = " + /// Greetings 1973 + /// Wild\t1973 + /// BornToRun\t\t\t\t1975 + /// Darkness 1978 + /// TheRiver 1980 + /// "; + /// let new = re.replace_all(hay, "$2 $1"); + /// assert_eq!(new, " + /// 1973 Greetings + /// 1973 Wild + /// 1975 BornToRun + /// 1978 Darkness + /// 1980 TheRiver + /// "); + /// ``` + #[inline] + pub fn replace_all<'h, R: Replacer>( + &self, + haystack: &'h str, + rep: R, + ) -> Cow<'h, str> { + self.replacen(haystack, 0, rep) + } + + /// Replaces at most `limit` non-overlapping matches in the haystack with + /// the replacement provided. If `limit` is `0`, then all non-overlapping + /// matches are replaced. That is, `Regex::replace_all(hay, rep)` is + /// equivalent to `Regex::replacen(hay, 0, rep)`. + /// + /// The documentation for [`Regex::replace`] goes into more detail about + /// what kinds of replacement strings are supported. + /// + /// # Time complexity + /// + /// Since iterators over all matches requires running potentially many + /// searches on the haystack, and since each search has worst case + /// `O(m * n)` time complexity, the overall worst case time complexity for + /// this routine is `O(m * n^2)`. + /// + /// Although note that the worst case time here has an upper bound given + /// by the `limit` parameter. + /// + /// # Fallibility + /// + /// See the corresponding section in the docs for [`Regex::replace_all`] + /// for tips on how to deal with a replacement routine that can fail. + /// + /// # Example + /// + /// This example shows how to flip the order of whitespace (excluding line + /// terminators) delimited fields, and normalizes the whitespace that + /// delimits the fields. But we only do it for the first two matches. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(?m)^(\S+)[\s--\r\n]+(\S+)$").unwrap(); + /// let hay = " + /// Greetings 1973 + /// Wild\t1973 + /// BornToRun\t\t\t\t1975 + /// Darkness 1978 + /// TheRiver 1980 + /// "; + /// let new = re.replacen(hay, 2, "$2 $1"); + /// assert_eq!(new, " + /// 1973 Greetings + /// 1973 Wild + /// BornToRun\t\t\t\t1975 + /// Darkness 1978 + /// TheRiver 1980 + /// "); + /// ``` + #[inline] + pub fn replacen<'h, R: Replacer>( + &self, + haystack: &'h str, + limit: usize, + mut rep: R, + ) -> Cow<'h, str> { + // If we know that the replacement doesn't have any capture expansions, + // then we can use the fast path. The fast path can make a tremendous + // difference: + // + // 1) We use `find_iter` instead of `captures_iter`. Not asking for + // captures generally makes the regex engines faster. + // 2) We don't need to look up all of the capture groups and do + // replacements inside the replacement string. We just push it + // at each match and be done with it. + if let Some(rep) = rep.no_expansion() { + let mut it = self.find_iter(haystack).enumerate().peekable(); + if it.peek().is_none() { + return Cow::Borrowed(haystack); + } + let mut new = String::with_capacity(haystack.len()); + let mut last_match = 0; + for (i, m) in it { + new.push_str(&haystack[last_match..m.start()]); + new.push_str(&rep); + last_match = m.end(); + if limit > 0 && i >= limit - 1 { + break; + } + } + new.push_str(&haystack[last_match..]); + return Cow::Owned(new); + } + + // The slower path, which we use if the replacement may need access to + // capture groups. + let mut it = self.captures_iter(haystack).enumerate().peekable(); + if it.peek().is_none() { + return Cow::Borrowed(haystack); + } + let mut new = String::with_capacity(haystack.len()); + let mut last_match = 0; + for (i, cap) in it { + // unwrap on 0 is OK because captures only reports matches + let m = cap.get(0).unwrap(); + new.push_str(&haystack[last_match..m.start()]); + rep.replace_append(&cap, &mut new); + last_match = m.end(); + if limit > 0 && i >= limit - 1 { + break; + } + } + new.push_str(&haystack[last_match..]); + Cow::Owned(new) + } +} + +/// A group of advanced or "lower level" search methods. Some methods permit +/// starting the search at a position greater than `0` in the haystack. Other +/// methods permit reusing allocations, for example, when extracting the +/// matches for capture groups. +impl Regex { + /// Returns the end byte offset of the first match in the haystack given. + /// + /// This method may have the same performance characteristics as + /// `is_match`. Behaviorlly, it doesn't just report whether it match + /// occurs, but also the end offset for a match. In particular, the offset + /// returned *may be shorter* than the proper end of the leftmost-first + /// match that you would find via [`Regex::find`]. + /// + /// Note that it is not guaranteed that this routine finds the shortest or + /// "earliest" possible match. Instead, the main idea of this API is that + /// it returns the offset at the point at which the internal regex engine + /// has determined that a match has occurred. This may vary depending on + /// which internal regex engine is used, and thus, the offset itself may + /// change based on internal heuristics. + /// + /// # Example + /// + /// Typically, `a+` would match the entire first sequence of `a` in some + /// haystack, but `shortest_match` *may* give up as soon as it sees the + /// first `a`. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"a+").unwrap(); + /// let offset = re.shortest_match("aaaaa").unwrap(); + /// assert_eq!(offset, 1); + /// ``` + #[inline] + pub fn shortest_match(&self, haystack: &str) -> Option<usize> { + self.shortest_match_at(haystack, 0) + } + + /// Returns the same as [`Regex::shortest_match`], but starts the search at + /// the given offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only match + /// when `start == 0`. + /// + /// If a match is found, the offset returned is relative to the beginning + /// of the haystack, not the beginning of the search. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = "eschew"; + /// // We get a match here, but it's probably not intended. + /// assert_eq!(re.shortest_match(&hay[2..]), Some(4)); + /// // No match because the assertions take the context into account. + /// assert_eq!(re.shortest_match_at(hay, 2), None); + /// ``` + #[inline] + pub fn shortest_match_at( + &self, + haystack: &str, + start: usize, + ) -> Option<usize> { + let input = + Input::new(haystack).earliest(true).span(start..haystack.len()); + self.meta.search_half(&input).map(|hm| hm.offset()) + } + + /// Returns the same as [`Regex::is_match`], but starts the search at the + /// given offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = "eschew"; + /// // We get a match here, but it's probably not intended. + /// assert!(re.is_match(&hay[2..])); + /// // No match because the assertions take the context into account. + /// assert!(!re.is_match_at(hay, 2)); + /// ``` + #[inline] + pub fn is_match_at(&self, haystack: &str, start: usize) -> bool { + let input = + Input::new(haystack).earliest(true).span(start..haystack.len()); + self.meta.search_half(&input).is_some() + } + + /// Returns the same as [`Regex::find`], but starts the search at the given + /// offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = "eschew"; + /// // We get a match here, but it's probably not intended. + /// assert_eq!(re.find(&hay[2..]).map(|m| m.range()), Some(0..4)); + /// // No match because the assertions take the context into account. + /// assert_eq!(re.find_at(hay, 2), None); + /// ``` + #[inline] + pub fn find_at<'h>( + &self, + haystack: &'h str, + start: usize, + ) -> Option<Match<'h>> { + let input = Input::new(haystack).span(start..haystack.len()); + self.meta + .search(&input) + .map(|m| Match::new(haystack, m.start(), m.end())) + } + + /// Returns the same as [`Regex::captures`], but starts the search at the + /// given offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = "eschew"; + /// // We get a match here, but it's probably not intended. + /// assert_eq!(&re.captures(&hay[2..]).unwrap()[0], "chew"); + /// // No match because the assertions take the context into account. + /// assert!(re.captures_at(hay, 2).is_none()); + /// ``` + #[inline] + pub fn captures_at<'h>( + &self, + haystack: &'h str, + start: usize, + ) -> Option<Captures<'h>> { + let input = Input::new(haystack).span(start..haystack.len()); + let mut caps = self.meta.create_captures(); + self.meta.search_captures(&input, &mut caps); + if caps.is_match() { + let static_captures_len = self.static_captures_len(); + Some(Captures { haystack, caps, static_captures_len }) + } else { + None + } + } + + /// This is like [`Regex::captures`], but writes the byte offsets of each + /// capture group match into the locations given. + /// + /// A [`CaptureLocations`] stores the same byte offsets as a [`Captures`], + /// but does *not* store a reference to the haystack. This makes its API + /// a bit lower level and less convenient. But in exchange, callers + /// may allocate their own `CaptureLocations` and reuse it for multiple + /// searches. This may be helpful if allocating a `Captures` shows up in a + /// profile as too costly. + /// + /// To create a `CaptureLocations` value, use the + /// [`Regex::capture_locations`] method. + /// + /// This also returns the overall match if one was found. When a match is + /// found, its offsets are also always stored in `locs` at index `0`. + /// + /// # Panics + /// + /// This routine may panic if the given `CaptureLocations` was not created + /// by this regex. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"^([a-z]+)=(\S*)$").unwrap(); + /// let mut locs = re.capture_locations(); + /// assert!(re.captures_read(&mut locs, "id=foo123").is_some()); + /// assert_eq!(Some((0, 9)), locs.get(0)); + /// assert_eq!(Some((0, 2)), locs.get(1)); + /// assert_eq!(Some((3, 9)), locs.get(2)); + /// ``` + #[inline] + pub fn captures_read<'h>( + &self, + locs: &mut CaptureLocations, + haystack: &'h str, + ) -> Option<Match<'h>> { + self.captures_read_at(locs, haystack, 0) + } + + /// Returns the same as [`Regex::captures_read`], but starts the search at + /// the given offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// This routine may also panic if the given `CaptureLocations` was not + /// created by this regex. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = "eschew"; + /// let mut locs = re.capture_locations(); + /// // We get a match here, but it's probably not intended. + /// assert!(re.captures_read(&mut locs, &hay[2..]).is_some()); + /// // No match because the assertions take the context into account. + /// assert!(re.captures_read_at(&mut locs, hay, 2).is_none()); + /// ``` + #[inline] + pub fn captures_read_at<'h>( + &self, + locs: &mut CaptureLocations, + haystack: &'h str, + start: usize, + ) -> Option<Match<'h>> { + let input = Input::new(haystack).span(start..haystack.len()); + self.meta.search_captures(&input, &mut locs.0); + locs.0.get_match().map(|m| Match::new(haystack, m.start(), m.end())) + } + + /// An undocumented alias for `captures_read_at`. + /// + /// The `regex-capi` crate previously used this routine, so to avoid + /// breaking that crate, we continue to provide the name as an undocumented + /// alias. + #[doc(hidden)] + #[inline] + pub fn read_captures_at<'h>( + &self, + locs: &mut CaptureLocations, + haystack: &'h str, + start: usize, + ) -> Option<Match<'h>> { + self.captures_read_at(locs, haystack, start) + } +} + +/// Auxiliary methods. +impl Regex { + /// Returns the original string of this regex. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"foo\w+bar").unwrap(); + /// assert_eq!(re.as_str(), r"foo\w+bar"); + /// ``` + #[inline] + pub fn as_str(&self) -> &str { + &self.pattern + } + + /// Returns an iterator over the capture names in this regex. + /// + /// The iterator returned yields elements of type `Option<&str>`. That is, + /// the iterator yields values for all capture groups, even ones that are + /// unnamed. The order of the groups corresponds to the order of the group's + /// corresponding opening parenthesis. + /// + /// The first element of the iterator always yields the group corresponding + /// to the overall match, and this group is always unnamed. Therefore, the + /// iterator always yields at least one group. + /// + /// # Example + /// + /// This shows basic usage with a mix of named and unnamed capture groups: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(?<a>.(?<b>.))(.)(?:.)(?<c>.)").unwrap(); + /// let mut names = re.capture_names(); + /// assert_eq!(names.next(), Some(None)); + /// assert_eq!(names.next(), Some(Some("a"))); + /// assert_eq!(names.next(), Some(Some("b"))); + /// assert_eq!(names.next(), Some(None)); + /// // the '(?:.)' group is non-capturing and so doesn't appear here! + /// assert_eq!(names.next(), Some(Some("c"))); + /// assert_eq!(names.next(), None); + /// ``` + /// + /// The iterator always yields at least one element, even for regexes with + /// no capture groups and even for regexes that can never match: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"").unwrap(); + /// let mut names = re.capture_names(); + /// assert_eq!(names.next(), Some(None)); + /// assert_eq!(names.next(), None); + /// + /// let re = Regex::new(r"[a&&b]").unwrap(); + /// let mut names = re.capture_names(); + /// assert_eq!(names.next(), Some(None)); + /// assert_eq!(names.next(), None); + /// ``` + #[inline] + pub fn capture_names(&self) -> CaptureNames<'_> { + CaptureNames(self.meta.group_info().pattern_names(PatternID::ZERO)) + } + + /// Returns the number of captures groups in this regex. + /// + /// This includes all named and unnamed groups, including the implicit + /// unnamed group that is always present and corresponds to the entire + /// match. + /// + /// Since the implicit unnamed group is always included in this length, the + /// length returned is guaranteed to be greater than zero. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"foo").unwrap(); + /// assert_eq!(1, re.captures_len()); + /// + /// let re = Regex::new(r"(foo)").unwrap(); + /// assert_eq!(2, re.captures_len()); + /// + /// let re = Regex::new(r"(?<a>.(?<b>.))(.)(?:.)(?<c>.)").unwrap(); + /// assert_eq!(5, re.captures_len()); + /// + /// let re = Regex::new(r"[a&&b]").unwrap(); + /// assert_eq!(1, re.captures_len()); + /// ``` + #[inline] + pub fn captures_len(&self) -> usize { + self.meta.group_info().group_len(PatternID::ZERO) + } + + /// Returns the total number of capturing groups that appear in every + /// possible match. + /// + /// If the number of capture groups can vary depending on the match, then + /// this returns `None`. That is, a value is only returned when the number + /// of matching groups is invariant or "static." + /// + /// Note that like [`Regex::captures_len`], this **does** include the + /// implicit capturing group corresponding to the entire match. Therefore, + /// when a non-None value is returned, it is guaranteed to be at least `1`. + /// Stated differently, a return value of `Some(0)` is impossible. + /// + /// # Example + /// + /// This shows a few cases where a static number of capture groups is + /// available and a few cases where it is not. + /// + /// ``` + /// use regex::Regex; + /// + /// let len = |pattern| { + /// Regex::new(pattern).map(|re| re.static_captures_len()) + /// }; + /// + /// assert_eq!(Some(1), len("a")?); + /// assert_eq!(Some(2), len("(a)")?); + /// assert_eq!(Some(2), len("(a)|(b)")?); + /// assert_eq!(Some(3), len("(a)(b)|(c)(d)")?); + /// assert_eq!(None, len("(a)|b")?); + /// assert_eq!(None, len("a|(b)")?); + /// assert_eq!(None, len("(b)*")?); + /// assert_eq!(Some(2), len("(b)+")?); + /// + /// # Ok::<(), Box<dyn std::error::Error>>(()) + /// ``` + #[inline] + pub fn static_captures_len(&self) -> Option<usize> { + self.meta.static_captures_len() + } + + /// Returns a fresh allocated set of capture locations that can + /// be reused in multiple calls to [`Regex::captures_read`] or + /// [`Regex::captures_read_at`]. + /// + /// The returned locations can be used for any subsequent search for this + /// particular regex. There is no guarantee that it is correct to use for + /// other regexes, even if they have the same number of capture groups. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(.)(.)(\w+)").unwrap(); + /// let mut locs = re.capture_locations(); + /// assert!(re.captures_read(&mut locs, "Padron").is_some()); + /// assert_eq!(locs.get(0), Some((0, 6))); + /// assert_eq!(locs.get(1), Some((0, 1))); + /// assert_eq!(locs.get(2), Some((1, 2))); + /// assert_eq!(locs.get(3), Some((2, 6))); + /// ``` + #[inline] + pub fn capture_locations(&self) -> CaptureLocations { + CaptureLocations(self.meta.create_captures()) + } + + /// An alias for `capture_locations` to preserve backward compatibility. + /// + /// The `regex-capi` crate used this method, so to avoid breaking that + /// crate, we continue to export it as an undocumented API. + #[doc(hidden)] + #[inline] + pub fn locations(&self) -> CaptureLocations { + self.capture_locations() + } +} + +/// Represents a single match of a regex in a haystack. +/// +/// A `Match` contains both the start and end byte offsets of the match and the +/// actual substring corresponding to the range of those byte offsets. It is +/// guaranteed that `start <= end`. When `start == end`, the match is empty. +/// +/// Since this `Match` can only be produced by the top-level `Regex` APIs +/// that only support searching UTF-8 encoded strings, the byte offsets for a +/// `Match` are guaranteed to fall on valid UTF-8 codepoint boundaries. That +/// is, slicing a `&str` with [`Match::range`] is guaranteed to never panic. +/// +/// Values with this type are created by [`Regex::find`] or +/// [`Regex::find_iter`]. Other APIs can create `Match` values too. For +/// example, [`Captures::get`]. +/// +/// The lifetime parameter `'h` refers to the lifetime of the matched of the +/// haystack that this match was produced from. +/// +/// # Numbering +/// +/// The byte offsets in a `Match` form a half-open interval. That is, the +/// start of the range is inclusive and the end of the range is exclusive. +/// For example, given a haystack `abcFOOxyz` and a match of `FOO`, its byte +/// offset range starts at `3` and ends at `6`. `3` corresponds to `F` and +/// `6` corresponds to `x`, which is one past the end of the match. This +/// corresponds to the same kind of slicing that Rust uses. +/// +/// For more on why this was chosen over other schemes (aside from being +/// consistent with how Rust the language works), see [this discussion] and +/// [Dijkstra's note on a related topic][note]. +/// +/// [this discussion]: https://github.com/rust-lang/regex/discussions/866 +/// [note]: https://www.cs.utexas.edu/users/EWD/transcriptions/EWD08xx/EWD831.html +/// +/// # Example +/// +/// This example shows the value of each of the methods on `Match` for a +/// particular search. +/// +/// ``` +/// use regex::Regex; +/// +/// let re = Regex::new(r"\p{Greek}+").unwrap(); +/// let hay = "Greek: αβγδ"; +/// let m = re.find(hay).unwrap(); +/// assert_eq!(7, m.start()); +/// assert_eq!(15, m.end()); +/// assert!(!m.is_empty()); +/// assert_eq!(8, m.len()); +/// assert_eq!(7..15, m.range()); +/// assert_eq!("αβγδ", m.as_str()); +/// ``` +#[derive(Copy, Clone, Eq, PartialEq)] +pub struct Match<'h> { + haystack: &'h str, + start: usize, + end: usize, +} + +impl<'h> Match<'h> { + /// Returns the byte offset of the start of the match in the haystack. The + /// start of the match corresponds to the position where the match begins + /// and includes the first byte in the match. + /// + /// It is guaranteed that `Match::start() <= Match::end()`. + /// + /// This is guaranteed to fall on a valid UTF-8 codepoint boundary. That + /// is, it will never be an offset that appears between the UTF-8 code + /// units of a UTF-8 encoded Unicode scalar value. Consequently, it is + /// always safe to slice the corresponding haystack using this offset. + #[inline] + pub fn start(&self) -> usize { + self.start + } + + /// Returns the byte offset of the end of the match in the haystack. The + /// end of the match corresponds to the byte immediately following the last + /// byte in the match. This means that `&slice[start..end]` works as one + /// would expect. + /// + /// It is guaranteed that `Match::start() <= Match::end()`. + /// + /// This is guaranteed to fall on a valid UTF-8 codepoint boundary. That + /// is, it will never be an offset that appears between the UTF-8 code + /// units of a UTF-8 encoded Unicode scalar value. Consequently, it is + /// always safe to slice the corresponding haystack using this offset. + #[inline] + pub fn end(&self) -> usize { + self.end + } + + /// Returns true if and only if this match has a length of zero. + /// + /// Note that an empty match can only occur when the regex itself can + /// match the empty string. Here are some examples of regexes that can + /// all match the empty string: `^`, `^$`, `\b`, `a?`, `a*`, `a{0}`, + /// `(foo|\d+|quux)?`. + #[inline] + pub fn is_empty(&self) -> bool { + self.start == self.end + } + + /// Returns the length, in bytes, of this match. + #[inline] + pub fn len(&self) -> usize { + self.end - self.start + } + + /// Returns the range over the starting and ending byte offsets of the + /// match in the haystack. + /// + /// It is always correct to slice the original haystack searched with this + /// range. That is, because the offsets are guaranteed to fall on valid + /// UTF-8 boundaries, the range returned is always valid. + #[inline] + pub fn range(&self) -> core::ops::Range<usize> { + self.start..self.end + } + + /// Returns the substring of the haystack that matched. + #[inline] + pub fn as_str(&self) -> &'h str { + &self.haystack[self.range()] + } + + /// Creates a new match from the given haystack and byte offsets. + #[inline] + fn new(haystack: &'h str, start: usize, end: usize) -> Match<'h> { + Match { haystack, start, end } + } +} + +impl<'h> core::fmt::Debug for Match<'h> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_struct("Match") + .field("start", &self.start) + .field("end", &self.end) + .field("string", &self.as_str()) + .finish() + } +} + +impl<'h> From<Match<'h>> for &'h str { + fn from(m: Match<'h>) -> &'h str { + m.as_str() + } +} + +impl<'h> From<Match<'h>> for core::ops::Range<usize> { + fn from(m: Match<'h>) -> core::ops::Range<usize> { + m.range() + } +} + +/// Represents the capture groups for a single match. +/// +/// Capture groups refer to parts of a regex enclosed in parentheses. They can +/// be optionally named. The purpose of capture groups is to be able to +/// reference different parts of a match based on the original pattern. For +/// example, say you want to match the individual letters in a 5-letter word: +/// +/// ```text +/// (?<first>\w)(\w)(?:\w)\w(?<last>\w) +/// ``` +/// +/// This regex has 4 capture groups: +/// +/// * The group at index `0` corresponds to the overall match. It is always +/// present in every match and never has a name. +/// * The group at index `1` with name `first` corresponding to the first +/// letter. +/// * The group at index `2` with no name corresponding to the second letter. +/// * The group at index `3` with name `last` corresponding to the fifth and +/// last letter. +/// +/// Notice that `(?:\w)` was not listed above as a capture group despite it +/// being enclosed in parentheses. That's because `(?:pattern)` is a special +/// syntax that permits grouping but *without* capturing. The reason for not +/// treating it as a capture is that tracking and reporting capture groups +/// requires additional state that may lead to slower searches. So using as few +/// capture groups as possible can help performance. (Although the difference +/// in performance of a couple of capture groups is likely immaterial.) +/// +/// Values with this type are created by [`Regex::captures`] or +/// [`Regex::captures_iter`]. +/// +/// `'h` is the lifetime of the haystack that these captures were matched from. +/// +/// # Example +/// +/// ``` +/// use regex::Regex; +/// +/// let re = Regex::new(r"(?<first>\w)(\w)(?:\w)\w(?<last>\w)").unwrap(); +/// let caps = re.captures("toady").unwrap(); +/// assert_eq!("toady", &caps[0]); +/// assert_eq!("t", &caps["first"]); +/// assert_eq!("o", &caps[2]); +/// assert_eq!("y", &caps["last"]); +/// ``` +pub struct Captures<'h> { + haystack: &'h str, + caps: captures::Captures, + static_captures_len: Option<usize>, +} + +impl<'h> Captures<'h> { + /// Returns the `Match` associated with the capture group at index `i`. If + /// `i` does not correspond to a capture group, or if the capture group did + /// not participate in the match, then `None` is returned. + /// + /// When `i == 0`, this is guaranteed to return a non-`None` value. + /// + /// # Examples + /// + /// Get the substring that matched with a default of an empty string if the + /// group didn't participate in the match: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))").unwrap(); + /// let caps = re.captures("abc123").unwrap(); + /// + /// let substr1 = caps.get(1).map_or("", |m| m.as_str()); + /// let substr2 = caps.get(2).map_or("", |m| m.as_str()); + /// assert_eq!(substr1, "123"); + /// assert_eq!(substr2, ""); + /// ``` + #[inline] + pub fn get(&self, i: usize) -> Option<Match<'h>> { + self.caps + .get_group(i) + .map(|sp| Match::new(self.haystack, sp.start, sp.end)) + } + + /// Returns the `Match` associated with the capture group named `name`. If + /// `name` isn't a valid capture group or it refers to a group that didn't + /// match, then `None` is returned. + /// + /// Note that unlike `caps["name"]`, this returns a `Match` whose lifetime + /// matches the lifetime of the haystack in this `Captures` value. + /// Conversely, the substring returned by `caps["name"]` has a lifetime + /// of the `Captures` value, which is likely shorter than the lifetime of + /// the haystack. In some cases, it may be necessary to use this method to + /// access the matching substring instead of the `caps["name"]` notation. + /// + /// # Examples + /// + /// Get the substring that matched with a default of an empty string if the + /// group didn't participate in the match: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new( + /// r"[a-z]+(?:(?<numbers>[0-9]+)|(?<letters>[A-Z]+))", + /// ).unwrap(); + /// let caps = re.captures("abc123").unwrap(); + /// + /// let numbers = caps.name("numbers").map_or("", |m| m.as_str()); + /// let letters = caps.name("letters").map_or("", |m| m.as_str()); + /// assert_eq!(numbers, "123"); + /// assert_eq!(letters, ""); + /// ``` + #[inline] + pub fn name(&self, name: &str) -> Option<Match<'h>> { + self.caps + .get_group_by_name(name) + .map(|sp| Match::new(self.haystack, sp.start, sp.end)) + } + + /// This is a convenience routine for extracting the substrings + /// corresponding to matching capture groups. + /// + /// This returns a tuple where the first element corresponds to the full + /// substring of the haystack that matched the regex. The second element is + /// an array of substrings, with each corresponding to the to the substring + /// that matched for a particular capture group. + /// + /// # Panics + /// + /// This panics if the number of possible matching groups in this + /// `Captures` value is not fixed to `N` in all circumstances. + /// More precisely, this routine only works when `N` is equivalent to + /// [`Regex::static_captures_len`]. + /// + /// Stated more plainly, if the number of matching capture groups in a + /// regex can vary from match to match, then this function always panics. + /// + /// For example, `(a)(b)|(c)` could produce two matching capture groups + /// or one matching capture group for any given match. Therefore, one + /// cannot use `extract` with such a pattern. + /// + /// But a pattern like `(a)(b)|(c)(d)` can be used with `extract` because + /// the number of capture groups in every match is always equivalent, + /// even if the capture _indices_ in each match are not. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})").unwrap(); + /// let hay = "On 2010-03-14, I became a Tenneessee lamb."; + /// let Some((full, [year, month, day])) = + /// re.captures(hay).map(|caps| caps.extract()) else { return }; + /// assert_eq!("2010-03-14", full); + /// assert_eq!("2010", year); + /// assert_eq!("03", month); + /// assert_eq!("14", day); + /// ``` + /// + /// # Example: iteration + /// + /// This example shows how to use this method when iterating over all + /// `Captures` matches in a haystack. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})").unwrap(); + /// let hay = "1973-01-05, 1975-08-25 and 1980-10-18"; + /// + /// let mut dates: Vec<(&str, &str, &str)> = vec![]; + /// for (_, [y, m, d]) in re.captures_iter(hay).map(|c| c.extract()) { + /// dates.push((y, m, d)); + /// } + /// assert_eq!(dates, vec![ + /// ("1973", "01", "05"), + /// ("1975", "08", "25"), + /// ("1980", "10", "18"), + /// ]); + /// ``` + /// + /// # Example: parsing different formats + /// + /// This API is particularly useful when you need to extract a particular + /// value that might occur in a different format. Consider, for example, + /// an identifier that might be in double quotes or single quotes: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r#"id:(?:"([^"]+)"|'([^']+)')"#).unwrap(); + /// let hay = r#"The first is id:"foo" and the second is id:'bar'."#; + /// let mut ids = vec![]; + /// for (_, [id]) in re.captures_iter(hay).map(|c| c.extract()) { + /// ids.push(id); + /// } + /// assert_eq!(ids, vec!["foo", "bar"]); + /// ``` + pub fn extract<const N: usize>(&self) -> (&'h str, [&'h str; N]) { + let len = self + .static_captures_len + .expect("number of capture groups can vary in a match") + .checked_sub(1) + .expect("number of groups is always greater than zero"); + assert_eq!(N, len, "asked for {} groups, but must ask for {}", N, len); + // The regex-automata variant of extract is a bit more permissive. + // It doesn't require the number of matching capturing groups to be + // static, and you can even request fewer groups than what's there. So + // this is guaranteed to never panic because we've asserted above that + // the user has requested precisely the number of groups that must be + // present in any match for this regex. + self.caps.extract(self.haystack) + } + + /// Expands all instances of `$ref` in `replacement` to the corresponding + /// capture group, and writes them to the `dst` buffer given. A `ref` can + /// be a capture group index or a name. If `ref` doesn't refer to a capture + /// group that participated in the match, then it is replaced with the + /// empty string. + /// + /// # Format + /// + /// The format of the replacement string supports two different kinds of + /// capture references: unbraced and braced. + /// + /// For the unbraced format, the format supported is `$ref` where `name` + /// can be any character in the class `[0-9A-Za-z_]`. `ref` is always + /// the longest possible parse. So for example, `$1a` corresponds to the + /// capture group named `1a` and not the capture group at index `1`. If + /// `ref` matches `^[0-9]+$`, then it is treated as a capture group index + /// itself and not a name. + /// + /// For the braced format, the format supported is `${ref}` where `ref` can + /// be any sequence of bytes except for `}`. If no closing brace occurs, + /// then it is not considered a capture reference. As with the unbraced + /// format, if `ref` matches `^[0-9]+$`, then it is treated as a capture + /// group index and not a name. + /// + /// The braced format is useful for exerting precise control over the name + /// of the capture reference. For example, `${1}a` corresponds to the + /// capture group reference `1` followed by the letter `a`, where as `$1a` + /// (as mentioned above) corresponds to the capture group reference `1a`. + /// The braced format is also useful for expressing capture group names + /// that use characters not supported by the unbraced format. For example, + /// `${foo[bar].baz}` refers to the capture group named `foo[bar].baz`. + /// + /// If a capture group reference is found and it does not refer to a valid + /// capture group, then it will be replaced with the empty string. + /// + /// To write a literal `$`, use `$$`. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new( + /// r"(?<day>[0-9]{2})-(?<month>[0-9]{2})-(?<year>[0-9]{4})", + /// ).unwrap(); + /// let hay = "On 14-03-2010, I became a Tenneessee lamb."; + /// let caps = re.captures(hay).unwrap(); + /// + /// let mut dst = String::new(); + /// caps.expand("year=$year, month=$month, day=$day", &mut dst); + /// assert_eq!(dst, "year=2010, month=03, day=14"); + /// ``` + #[inline] + pub fn expand(&self, replacement: &str, dst: &mut String) { + self.caps.interpolate_string_into(self.haystack, replacement, dst); + } + + /// Returns an iterator over all capture groups. This includes both + /// matching and non-matching groups. + /// + /// The iterator always yields at least one matching group: the first group + /// (at index `0`) with no name. Subsequent groups are returned in the order + /// of their opening parenthesis in the regex. + /// + /// The elements yielded have type `Option<Match<'h>>`, where a non-`None` + /// value is present if the capture group matches. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(\w)(\d)?(\w)").unwrap(); + /// let caps = re.captures("AZ").unwrap(); + /// + /// let mut it = caps.iter(); + /// assert_eq!(it.next().unwrap().map(|m| m.as_str()), Some("AZ")); + /// assert_eq!(it.next().unwrap().map(|m| m.as_str()), Some("A")); + /// assert_eq!(it.next().unwrap().map(|m| m.as_str()), None); + /// assert_eq!(it.next().unwrap().map(|m| m.as_str()), Some("Z")); + /// assert_eq!(it.next(), None); + /// ``` + #[inline] + pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 'h> { + SubCaptureMatches { haystack: self.haystack, it: self.caps.iter() } + } + + /// Returns the total number of capture groups. This includes both + /// matching and non-matching groups. + /// + /// The length returned is always equivalent to the number of elements + /// yielded by [`Captures::iter`]. Consequently, the length is always + /// greater than zero since every `Captures` value always includes the + /// match for the entire regex. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(\w)(\d)?(\w)").unwrap(); + /// let caps = re.captures("AZ").unwrap(); + /// assert_eq!(caps.len(), 4); + /// ``` + #[inline] + pub fn len(&self) -> usize { + self.caps.group_len() + } +} + +impl<'h> core::fmt::Debug for Captures<'h> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + /// A little helper type to provide a nice map-like debug + /// representation for our capturing group spans. + /// + /// regex-automata has something similar, but it includes the pattern + /// ID in its debug output, which is confusing. It also doesn't include + /// that strings that match because a regex-automata `Captures` doesn't + /// borrow the haystack. + struct CapturesDebugMap<'a> { + caps: &'a Captures<'a>, + } + + impl<'a> core::fmt::Debug for CapturesDebugMap<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let mut map = f.debug_map(); + let names = + self.caps.caps.group_info().pattern_names(PatternID::ZERO); + for (group_index, maybe_name) in names.enumerate() { + let key = Key(group_index, maybe_name); + match self.caps.get(group_index) { + None => map.entry(&key, &None::<()>), + Some(mat) => map.entry(&key, &Value(mat)), + }; + } + map.finish() + } + } + + struct Key<'a>(usize, Option<&'a str>); + + impl<'a> core::fmt::Debug for Key<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "{}", self.0)?; + if let Some(name) = self.1 { + write!(f, "/{:?}", name)?; + } + Ok(()) + } + } + + struct Value<'a>(Match<'a>); + + impl<'a> core::fmt::Debug for Value<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!( + f, + "{}..{}/{:?}", + self.0.start(), + self.0.end(), + self.0.as_str() + ) + } + } + + f.debug_tuple("Captures") + .field(&CapturesDebugMap { caps: self }) + .finish() + } +} + +/// Get a matching capture group's haystack substring by index. +/// +/// The haystack substring returned can't outlive the `Captures` object if this +/// method is used, because of how `Index` is defined (normally `a[i]` is part +/// of `a` and can't outlive it). To work around this limitation, do that, use +/// [`Captures::get`] instead. +/// +/// `'h` is the lifetime of the matched haystack, but the lifetime of the +/// `&str` returned by this implementation is the lifetime of the `Captures` +/// value itself. +/// +/// # Panics +/// +/// If there is no matching group at the given index. +impl<'h> core::ops::Index<usize> for Captures<'h> { + type Output = str; + + // The lifetime is written out to make it clear that the &str returned + // does NOT have a lifetime equivalent to 'h. + fn index<'a>(&'a self, i: usize) -> &'a str { + self.get(i) + .map(|m| m.as_str()) + .unwrap_or_else(|| panic!("no group at index '{}'", i)) + } +} + +/// Get a matching capture group's haystack substring by name. +/// +/// The haystack substring returned can't outlive the `Captures` object if this +/// method is used, because of how `Index` is defined (normally `a[i]` is part +/// of `a` and can't outlive it). To work around this limitation, do that, use +/// [`Captures::get`] instead. +/// +/// `'h` is the lifetime of the matched haystack, but the lifetime of the +/// `&str` returned by this implementation is the lifetime of the `Captures` +/// value itself. +/// +/// `'n` is the lifetime of the group name used to index the `Captures` value. +/// +/// # Panics +/// +/// If there is no matching group at the given name. +impl<'h, 'n> core::ops::Index<&'n str> for Captures<'h> { + type Output = str; + + fn index<'a>(&'a self, name: &'n str) -> &'a str { + self.name(name) + .map(|m| m.as_str()) + .unwrap_or_else(|| panic!("no group named '{}'", name)) + } +} + +/// A low level representation of the byte offsets of each capture group. +/// +/// You can think of this as a lower level [`Captures`], where this type does +/// not support named capturing groups directly and it does not borrow the +/// haystack that these offsets were matched on. +/// +/// Primarily, this type is useful when using the lower level `Regex` APIs such +/// as [`Regex::captures_read`], which permits amortizing the allocation in +/// which capture match offsets are stored. +/// +/// In order to build a value of this type, you'll need to call the +/// [`Regex::capture_locations`] method. The value returned can then be reused +/// in subsequent searches for that regex. Using it for other regexes may +/// result in a panic or otherwise incorrect results. +/// +/// # Example +/// +/// This example shows how to create and use `CaptureLocations` in a search. +/// +/// ``` +/// use regex::Regex; +/// +/// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap(); +/// let mut locs = re.capture_locations(); +/// let m = re.captures_read(&mut locs, "Bruce Springsteen").unwrap(); +/// assert_eq!(0..17, m.range()); +/// assert_eq!(Some((0, 17)), locs.get(0)); +/// assert_eq!(Some((0, 5)), locs.get(1)); +/// assert_eq!(Some((6, 17)), locs.get(2)); +/// +/// // Asking for an invalid capture group always returns None. +/// assert_eq!(None, locs.get(3)); +/// assert_eq!(None, locs.get(34973498648)); +/// assert_eq!(None, locs.get(9944060567225171988)); +/// ``` +#[derive(Clone, Debug)] +pub struct CaptureLocations(captures::Captures); + +/// A type alias for `CaptureLocations` for backwards compatibility. +/// +/// Previously, we exported `CaptureLocations` as `Locations` in an +/// undocumented API. To prevent breaking that code (e.g., in `regex-capi`), +/// we continue re-exporting the same undocumented API. +#[doc(hidden)] +pub type Locations = CaptureLocations; + +impl CaptureLocations { + /// Returns the start and end byte offsets of the capture group at index + /// `i`. This returns `None` if `i` is not a valid capture group or if the + /// capture group did not match. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap(); + /// let mut locs = re.capture_locations(); + /// re.captures_read(&mut locs, "Bruce Springsteen").unwrap(); + /// assert_eq!(Some((0, 17)), locs.get(0)); + /// assert_eq!(Some((0, 5)), locs.get(1)); + /// assert_eq!(Some((6, 17)), locs.get(2)); + /// ``` + #[inline] + pub fn get(&self, i: usize) -> Option<(usize, usize)> { + self.0.get_group(i).map(|sp| (sp.start, sp.end)) + } + + /// Returns the total number of capture groups (even if they didn't match). + /// That is, the length returned is unaffected by the result of a search. + /// + /// This is always at least `1` since every regex has at least `1` + /// capturing group that corresponds to the entire match. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap(); + /// let mut locs = re.capture_locations(); + /// assert_eq!(3, locs.len()); + /// re.captures_read(&mut locs, "Bruce Springsteen").unwrap(); + /// assert_eq!(3, locs.len()); + /// ``` + /// + /// Notice that the length is always at least `1`, regardless of the regex: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"").unwrap(); + /// let locs = re.capture_locations(); + /// assert_eq!(1, locs.len()); + /// + /// // [a&&b] is a regex that never matches anything. + /// let re = Regex::new(r"[a&&b]").unwrap(); + /// let locs = re.capture_locations(); + /// assert_eq!(1, locs.len()); + /// ``` + #[inline] + pub fn len(&self) -> usize { + // self.0.group_len() returns 0 if the underlying captures doesn't + // represent a match, but the behavior guaranteed for this method is + // that the length doesn't change based on a match or not. + self.0.group_info().group_len(PatternID::ZERO) + } + + /// An alias for the `get` method for backwards compatibility. + /// + /// Previously, we exported `get` as `pos` in an undocumented API. To + /// prevent breaking that code (e.g., in `regex-capi`), we continue + /// re-exporting the same undocumented API. + #[doc(hidden)] + #[inline] + pub fn pos(&self, i: usize) -> Option<(usize, usize)> { + self.get(i) + } +} + +/// An iterator over all non-overlapping matches in a haystack. +/// +/// This iterator yields [`Match`] values. The iterator stops when no more +/// matches can be found. +/// +/// `'r` is the lifetime of the compiled regular expression and `'h` is the +/// lifetime of the haystack. +/// +/// This iterator is created by [`Regex::find_iter`]. +/// +/// # Time complexity +/// +/// Note that since an iterator runs potentially many searches on the haystack +/// and since each search has worst case `O(m * n)` time complexity, the +/// overall worst case time complexity for iteration is `O(m * n^2)`. +#[derive(Debug)] +pub struct Matches<'r, 'h> { + haystack: &'h str, + it: meta::FindMatches<'r, 'h>, +} + +impl<'r, 'h> Iterator for Matches<'r, 'h> { + type Item = Match<'h>; + + #[inline] + fn next(&mut self) -> Option<Match<'h>> { + self.it + .next() + .map(|sp| Match::new(self.haystack, sp.start(), sp.end())) + } + + #[inline] + fn count(self) -> usize { + // This can actually be up to 2x faster than calling `next()` until + // completion, because counting matches when using a DFA only requires + // finding the end of each match. But returning a `Match` via `next()` + // requires the start of each match which, with a DFA, requires a + // reverse forward scan to find it. + self.it.count() + } +} + +impl<'r, 'h> core::iter::FusedIterator for Matches<'r, 'h> {} + +/// An iterator over all non-overlapping capture matches in a haystack. +/// +/// This iterator yields [`Captures`] values. The iterator stops when no more +/// matches can be found. +/// +/// `'r` is the lifetime of the compiled regular expression and `'h` is the +/// lifetime of the matched string. +/// +/// This iterator is created by [`Regex::captures_iter`]. +/// +/// # Time complexity +/// +/// Note that since an iterator runs potentially many searches on the haystack +/// and since each search has worst case `O(m * n)` time complexity, the +/// overall worst case time complexity for iteration is `O(m * n^2)`. +#[derive(Debug)] +pub struct CaptureMatches<'r, 'h> { + haystack: &'h str, + it: meta::CapturesMatches<'r, 'h>, +} + +impl<'r, 'h> Iterator for CaptureMatches<'r, 'h> { + type Item = Captures<'h>; + + #[inline] + fn next(&mut self) -> Option<Captures<'h>> { + let static_captures_len = self.it.regex().static_captures_len(); + self.it.next().map(|caps| Captures { + haystack: self.haystack, + caps, + static_captures_len, + }) + } + + #[inline] + fn count(self) -> usize { + // This can actually be up to 2x faster than calling `next()` until + // completion, because counting matches when using a DFA only requires + // finding the end of each match. But returning a `Match` via `next()` + // requires the start of each match which, with a DFA, requires a + // reverse forward scan to find it. + self.it.count() + } +} + +impl<'r, 'h> core::iter::FusedIterator for CaptureMatches<'r, 'h> {} + +/// An iterator over all substrings delimited by a regex match. +/// +/// `'r` is the lifetime of the compiled regular expression and `'h` is the +/// lifetime of the byte string being split. +/// +/// This iterator is created by [`Regex::split`]. +/// +/// # Time complexity +/// +/// Note that since an iterator runs potentially many searches on the haystack +/// and since each search has worst case `O(m * n)` time complexity, the +/// overall worst case time complexity for iteration is `O(m * n^2)`. +#[derive(Debug)] +pub struct Split<'r, 'h> { + haystack: &'h str, + it: meta::Split<'r, 'h>, +} + +impl<'r, 'h> Iterator for Split<'r, 'h> { + type Item = &'h str; + + #[inline] + fn next(&mut self) -> Option<&'h str> { + self.it.next().map(|span| &self.haystack[span]) + } +} + +impl<'r, 'h> core::iter::FusedIterator for Split<'r, 'h> {} + +/// An iterator over at most `N` substrings delimited by a regex match. +/// +/// The last substring yielded by this iterator will be whatever remains after +/// `N-1` splits. +/// +/// `'r` is the lifetime of the compiled regular expression and `'h` is the +/// lifetime of the byte string being split. +/// +/// This iterator is created by [`Regex::splitn`]. +/// +/// # Time complexity +/// +/// Note that since an iterator runs potentially many searches on the haystack +/// and since each search has worst case `O(m * n)` time complexity, the +/// overall worst case time complexity for iteration is `O(m * n^2)`. +/// +/// Although note that the worst case time here has an upper bound given +/// by the `limit` parameter to [`Regex::splitn`]. +#[derive(Debug)] +pub struct SplitN<'r, 'h> { + haystack: &'h str, + it: meta::SplitN<'r, 'h>, +} + +impl<'r, 'h> Iterator for SplitN<'r, 'h> { + type Item = &'h str; + + #[inline] + fn next(&mut self) -> Option<&'h str> { + self.it.next().map(|span| &self.haystack[span]) + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.it.size_hint() + } +} + +impl<'r, 'h> core::iter::FusedIterator for SplitN<'r, 'h> {} + +/// An iterator over the names of all capture groups in a regex. +/// +/// This iterator yields values of type `Option<&str>` in order of the opening +/// capture group parenthesis in the regex pattern. `None` is yielded for +/// groups with no name. The first element always corresponds to the implicit +/// and unnamed group for the overall match. +/// +/// `'r` is the lifetime of the compiled regular expression. +/// +/// This iterator is created by [`Regex::capture_names`]. +#[derive(Clone, Debug)] +pub struct CaptureNames<'r>(captures::GroupInfoPatternNames<'r>); + +impl<'r> Iterator for CaptureNames<'r> { + type Item = Option<&'r str>; + + #[inline] + fn next(&mut self) -> Option<Option<&'r str>> { + self.0.next() + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.0.size_hint() + } + + #[inline] + fn count(self) -> usize { + self.0.count() + } +} + +impl<'r> ExactSizeIterator for CaptureNames<'r> {} + +impl<'r> core::iter::FusedIterator for CaptureNames<'r> {} + +/// An iterator over all group matches in a [`Captures`] value. +/// +/// This iterator yields values of type `Option<Match<'h>>`, where `'h` is the +/// lifetime of the haystack that the matches are for. The order of elements +/// yielded corresponds to the order of the opening parenthesis for the group +/// in the regex pattern. `None` is yielded for groups that did not participate +/// in the match. +/// +/// The first element always corresponds to the implicit group for the overall +/// match. Since this iterator is created by a [`Captures`] value, and a +/// `Captures` value is only created when a match occurs, it follows that the +/// first element yielded by this iterator is guaranteed to be non-`None`. +/// +/// The lifetime `'c` corresponds to the lifetime of the `Captures` value that +/// created this iterator, and the lifetime `'h` corresponds to the originally +/// matched haystack. +#[derive(Clone, Debug)] +pub struct SubCaptureMatches<'c, 'h> { + haystack: &'h str, + it: captures::CapturesPatternIter<'c>, +} + +impl<'c, 'h> Iterator for SubCaptureMatches<'c, 'h> { + type Item = Option<Match<'h>>; + + #[inline] + fn next(&mut self) -> Option<Option<Match<'h>>> { + self.it.next().map(|group| { + group.map(|sp| Match::new(self.haystack, sp.start, sp.end)) + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.it.size_hint() + } + + #[inline] + fn count(self) -> usize { + self.it.count() + } +} + +impl<'c, 'h> ExactSizeIterator for SubCaptureMatches<'c, 'h> {} + +impl<'c, 'h> core::iter::FusedIterator for SubCaptureMatches<'c, 'h> {} + +/// A trait for types that can be used to replace matches in a haystack. +/// +/// In general, users of this crate shouldn't need to implement this trait, +/// since implementations are already provided for `&str` along with other +/// variants of string types, as well as `FnMut(&Captures) -> String` (or any +/// `FnMut(&Captures) -> T` where `T: AsRef<str>`). Those cover most use cases, +/// but callers can implement this trait directly if necessary. +/// +/// # Example +/// +/// This example shows a basic implementation of the `Replacer` trait. This +/// can be done much more simply using the replacement string interpolation +/// support (e.g., `$first $last`), but this approach avoids needing to parse +/// the replacement string at all. +/// +/// ``` +/// use regex::{Captures, Regex, Replacer}; +/// +/// struct NameSwapper; +/// +/// impl Replacer for NameSwapper { +/// fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { +/// dst.push_str(&caps["first"]); +/// dst.push_str(" "); +/// dst.push_str(&caps["last"]); +/// } +/// } +/// +/// let re = Regex::new(r"(?<last>[^,\s]+),\s+(?<first>\S+)").unwrap(); +/// let result = re.replace("Springsteen, Bruce", NameSwapper); +/// assert_eq!(result, "Bruce Springsteen"); +/// ``` +pub trait Replacer { + /// Appends possibly empty data to `dst` to replace the current match. + /// + /// The current match is represented by `caps`, which is guaranteed to + /// have a match at capture group `0`. + /// + /// For example, a no-op replacement would be `dst.push_str(&caps[0])`. + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String); + + /// Return a fixed unchanging replacement string. + /// + /// When doing replacements, if access to [`Captures`] is not needed (e.g., + /// the replacement string does not need `$` expansion), then it can be + /// beneficial to avoid finding sub-captures. + /// + /// In general, this is called once for every call to a replacement routine + /// such as [`Regex::replace_all`]. + fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, str>> { + None + } + + /// Returns a type that implements `Replacer`, but that borrows and wraps + /// this `Replacer`. + /// + /// This is useful when you want to take a generic `Replacer` (which might + /// not be cloneable) and use it without consuming it, so it can be used + /// more than once. + /// + /// # Example + /// + /// ``` + /// use regex::{Regex, Replacer}; + /// + /// fn replace_all_twice<R: Replacer>( + /// re: Regex, + /// src: &str, + /// mut rep: R, + /// ) -> String { + /// let dst = re.replace_all(src, rep.by_ref()); + /// let dst = re.replace_all(&dst, rep.by_ref()); + /// dst.into_owned() + /// } + /// ``` + fn by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self> { + ReplacerRef(self) + } +} + +impl<'a> Replacer for &'a str { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { + caps.expand(*self, dst); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { + no_expansion(self) + } +} + +impl<'a> Replacer for &'a String { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { + self.as_str().replace_append(caps, dst) + } + + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { + no_expansion(self) + } +} + +impl Replacer for String { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { + self.as_str().replace_append(caps, dst) + } + + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { + no_expansion(self) + } +} + +impl<'a> Replacer for Cow<'a, str> { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { + self.as_ref().replace_append(caps, dst) + } + + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { + no_expansion(self) + } +} + +impl<'a> Replacer for &'a Cow<'a, str> { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { + self.as_ref().replace_append(caps, dst) + } + + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { + no_expansion(self) + } +} + +impl<F, T> Replacer for F +where + F: FnMut(&Captures<'_>) -> T, + T: AsRef<str>, +{ + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { + dst.push_str((*self)(caps).as_ref()); + } +} + +/// A by-reference adaptor for a [`Replacer`]. +/// +/// This permits reusing the same `Replacer` value in multiple calls to a +/// replacement routine like [`Regex::replace_all`]. +/// +/// This type is created by [`Replacer::by_ref`]. +#[derive(Debug)] +pub struct ReplacerRef<'a, R: ?Sized>(&'a mut R); + +impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { + self.0.replace_append(caps, dst) + } + + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { + self.0.no_expansion() + } +} + +/// A helper type for forcing literal string replacement. +/// +/// It can be used with routines like [`Regex::replace`] and +/// [`Regex::replace_all`] to do a literal string replacement without expanding +/// `$name` to their corresponding capture groups. This can be both convenient +/// (to avoid escaping `$`, for example) and faster (since capture groups +/// don't need to be found). +/// +/// `'s` is the lifetime of the literal string to use. +/// +/// # Example +/// +/// ``` +/// use regex::{NoExpand, Regex}; +/// +/// let re = Regex::new(r"(?<last>[^,\s]+),\s+(\S+)").unwrap(); +/// let result = re.replace("Springsteen, Bruce", NoExpand("$2 $last")); +/// assert_eq!(result, "$2 $last"); +/// ``` +#[derive(Clone, Debug)] +pub struct NoExpand<'s>(pub &'s str); + +impl<'s> Replacer for NoExpand<'s> { + fn replace_append(&mut self, _: &Captures<'_>, dst: &mut String) { + dst.push_str(self.0); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { + Some(Cow::Borrowed(self.0)) + } +} + +/// Quickly checks the given replacement string for whether interpolation +/// should be done on it. It returns `None` if a `$` was found anywhere in the +/// given string, which suggests interpolation needs to be done. But if there's +/// no `$` anywhere, then interpolation definitely does not need to be done. In +/// that case, the given string is returned as a borrowed `Cow`. +/// +/// This is meant to be used to implement the `Replacer::no_expandsion` method +/// in its various trait impls. +fn no_expansion<T: AsRef<str>>(replacement: &T) -> Option<Cow<'_, str>> { + let replacement = replacement.as_ref(); + match crate::find_byte::find_byte(b'$', replacement.as_bytes()) { + Some(_) => None, + None => Some(Cow::Borrowed(replacement)), + } +} diff --git a/third_party/rust/regex/src/regexset/bytes.rs b/third_party/rust/regex/src/regexset/bytes.rs new file mode 100644 index 0000000000..1220a14662 --- /dev/null +++ b/third_party/rust/regex/src/regexset/bytes.rs @@ -0,0 +1,710 @@ +use alloc::string::String; + +use regex_automata::{meta, Input, PatternID, PatternSet, PatternSetIter}; + +use crate::{bytes::RegexSetBuilder, Error}; + +/// Match multiple, possibly overlapping, regexes in a single search. +/// +/// A regex set corresponds to the union of zero or more regular expressions. +/// That is, a regex set will match a haystack when at least one of its +/// constituent regexes matches. A regex set as its formulated here provides a +/// touch more power: it will also report *which* regular expressions in the +/// set match. Indeed, this is the key difference between regex sets and a +/// single `Regex` with many alternates, since only one alternate can match at +/// a time. +/// +/// For example, consider regular expressions to match email addresses and +/// domains: `[a-z]+@[a-z]+\.(com|org|net)` and `[a-z]+\.(com|org|net)`. If a +/// regex set is constructed from those regexes, then searching the haystack +/// `foo@example.com` will report both regexes as matching. Of course, one +/// could accomplish this by compiling each regex on its own and doing two +/// searches over the haystack. The key advantage of using a regex set is +/// that it will report the matching regexes using a *single pass through the +/// haystack*. If one has hundreds or thousands of regexes to match repeatedly +/// (like a URL router for a complex web application or a user agent matcher), +/// then a regex set *can* realize huge performance gains. +/// +/// Unlike the top-level [`RegexSet`](crate::RegexSet), this `RegexSet` +/// searches haystacks with type `&[u8]` instead of `&str`. Consequently, this +/// `RegexSet` is permitted to match invalid UTF-8. +/// +/// # Limitations +/// +/// Regex sets are limited to answering the following two questions: +/// +/// 1. Does any regex in the set match? +/// 2. If so, which regexes in the set match? +/// +/// As with the main [`Regex`][crate::bytes::Regex] type, it is cheaper to ask +/// (1) instead of (2) since the matching engines can stop after the first +/// match is found. +/// +/// You cannot directly extract [`Match`][crate::bytes::Match] or +/// [`Captures`][crate::bytes::Captures] objects from a regex set. If you need +/// these operations, the recommended approach is to compile each pattern in +/// the set independently and scan the exact same haystack a second time with +/// those independently compiled patterns: +/// +/// ``` +/// use regex::bytes::{Regex, RegexSet}; +/// +/// let patterns = ["foo", "bar"]; +/// // Both patterns will match different ranges of this string. +/// let hay = b"barfoo"; +/// +/// // Compile a set matching any of our patterns. +/// let set = RegexSet::new(patterns).unwrap(); +/// // Compile each pattern independently. +/// let regexes: Vec<_> = set +/// .patterns() +/// .iter() +/// .map(|pat| Regex::new(pat).unwrap()) +/// .collect(); +/// +/// // Match against the whole set first and identify the individual +/// // matching patterns. +/// let matches: Vec<&[u8]> = set +/// .matches(hay) +/// .into_iter() +/// // Dereference the match index to get the corresponding +/// // compiled pattern. +/// .map(|index| ®exes[index]) +/// // To get match locations or any other info, we then have to search the +/// // exact same haystack again, using our separately-compiled pattern. +/// .map(|re| re.find(hay).unwrap().as_bytes()) +/// .collect(); +/// +/// // Matches arrive in the order the constituent patterns were declared, +/// // not the order they appear in the haystack. +/// assert_eq!(vec![&b"foo"[..], &b"bar"[..]], matches); +/// ``` +/// +/// # Performance +/// +/// A `RegexSet` has the same performance characteristics as `Regex`. Namely, +/// search takes `O(m * n)` time, where `m` is proportional to the size of the +/// regex set and `n` is proportional to the length of the haystack. +/// +/// # Trait implementations +/// +/// The `Default` trait is implemented for `RegexSet`. The default value +/// is an empty set. An empty set can also be explicitly constructed via +/// [`RegexSet::empty`]. +/// +/// # Example +/// +/// This shows how the above two regexes (for matching email addresses and +/// domains) might work: +/// +/// ``` +/// use regex::bytes::RegexSet; +/// +/// let set = RegexSet::new(&[ +/// r"[a-z]+@[a-z]+\.(com|org|net)", +/// r"[a-z]+\.(com|org|net)", +/// ]).unwrap(); +/// +/// // Ask whether any regexes in the set match. +/// assert!(set.is_match(b"foo@example.com")); +/// +/// // Identify which regexes in the set match. +/// let matches: Vec<_> = set.matches(b"foo@example.com").into_iter().collect(); +/// assert_eq!(vec![0, 1], matches); +/// +/// // Try again, but with a haystack that only matches one of the regexes. +/// let matches: Vec<_> = set.matches(b"example.com").into_iter().collect(); +/// assert_eq!(vec![1], matches); +/// +/// // Try again, but with a haystack that doesn't match any regex in the set. +/// let matches: Vec<_> = set.matches(b"example").into_iter().collect(); +/// assert!(matches.is_empty()); +/// ``` +/// +/// Note that it would be possible to adapt the above example to using `Regex` +/// with an expression like: +/// +/// ```text +/// (?P<email>[a-z]+@(?P<email_domain>[a-z]+[.](com|org|net)))|(?P<domain>[a-z]+[.](com|org|net)) +/// ``` +/// +/// After a match, one could then inspect the capture groups to figure out +/// which alternates matched. The problem is that it is hard to make this +/// approach scale when there are many regexes since the overlap between each +/// alternate isn't always obvious to reason about. +#[derive(Clone)] +pub struct RegexSet { + pub(crate) meta: meta::Regex, + pub(crate) patterns: alloc::sync::Arc<[String]>, +} + +impl RegexSet { + /// Create a new regex set with the given regular expressions. + /// + /// This takes an iterator of `S`, where `S` is something that can produce + /// a `&str`. If any of the strings in the iterator are not valid regular + /// expressions, then an error is returned. + /// + /// # Example + /// + /// Create a new regex set from an iterator of strings: + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap(); + /// assert!(set.is_match(b"foo")); + /// ``` + pub fn new<I, S>(exprs: I) -> Result<RegexSet, Error> + where + S: AsRef<str>, + I: IntoIterator<Item = S>, + { + RegexSetBuilder::new(exprs).build() + } + + /// Create a new empty regex set. + /// + /// An empty regex never matches anything. + /// + /// This is a convenience function for `RegexSet::new([])`, but doesn't + /// require one to specify the type of the input. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::empty(); + /// assert!(set.is_empty()); + /// // an empty set matches nothing + /// assert!(!set.is_match(b"")); + /// ``` + pub fn empty() -> RegexSet { + let empty: [&str; 0] = []; + RegexSetBuilder::new(empty).build().unwrap() + } + + /// Returns true if and only if one of the regexes in this set matches + /// the haystack given. + /// + /// This method should be preferred if you only need to test whether any + /// of the regexes in the set should match, but don't care about *which* + /// regexes matched. This is because the underlying matching engine will + /// quit immediately after seeing the first match instead of continuing to + /// find all matches. + /// + /// Note that as with searches using [`Regex`](crate::bytes::Regex), the + /// expression is unanchored by default. That is, if the regex does not + /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted + /// to match anywhere in the haystack. + /// + /// # Example + /// + /// Tests whether a set matches somewhere in a haystack: + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap(); + /// assert!(set.is_match(b"foo")); + /// assert!(!set.is_match("☃".as_bytes())); + /// ``` + #[inline] + pub fn is_match(&self, haystack: &[u8]) -> bool { + self.is_match_at(haystack, 0) + } + + /// Returns true if and only if one of the regexes in this set matches the + /// haystack given, with the search starting at the offset given. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start`. Namely, consider a + /// haystack `foobar` and a desire to execute a search starting at offset + /// `3`. You could search a substring explicitly, but then the look-around + /// assertions won't work correctly. Instead, you can use this method to + /// specify the start position of a search. + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap(); + /// let hay = b"foobar"; + /// // We get a match here, but it's probably not intended. + /// assert!(set.is_match(&hay[3..])); + /// // No match because the assertions take the context into account. + /// assert!(!set.is_match_at(hay, 3)); + /// ``` + #[inline] + pub fn is_match_at(&self, haystack: &[u8], start: usize) -> bool { + self.meta.is_match(Input::new(haystack).span(start..haystack.len())) + } + + /// Returns the set of regexes that match in the given haystack. + /// + /// The set returned contains the index of each regex that matches in + /// the given haystack. The index is in correspondence with the order of + /// regular expressions given to `RegexSet`'s constructor. + /// + /// The set can also be used to iterate over the matched indices. The order + /// of iteration is always ascending with respect to the matching indices. + /// + /// Note that as with searches using [`Regex`](crate::bytes::Regex), the + /// expression is unanchored by default. That is, if the regex does not + /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted + /// to match anywhere in the haystack. + /// + /// # Example + /// + /// Tests which regular expressions match the given haystack: + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"\w+", + /// r"\d+", + /// r"\pL+", + /// r"foo", + /// r"bar", + /// r"barfoo", + /// r"foobar", + /// ]).unwrap(); + /// let matches: Vec<_> = set.matches(b"foobar").into_iter().collect(); + /// assert_eq!(matches, vec![0, 2, 3, 4, 6]); + /// + /// // You can also test whether a particular regex matched: + /// let matches = set.matches(b"foobar"); + /// assert!(!matches.matched(5)); + /// assert!(matches.matched(6)); + /// ``` + #[inline] + pub fn matches(&self, haystack: &[u8]) -> SetMatches { + self.matches_at(haystack, 0) + } + + /// Returns the set of regexes that match in the given haystack. + /// + /// The set returned contains the index of each regex that matches in + /// the given haystack. The index is in correspondence with the order of + /// regular expressions given to `RegexSet`'s constructor. + /// + /// The set can also be used to iterate over the matched indices. The order + /// of iteration is always ascending with respect to the matching indices. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// Tests which regular expressions match the given haystack: + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap(); + /// let hay = b"foobar"; + /// // We get matches here, but it's probably not intended. + /// let matches: Vec<_> = set.matches(&hay[3..]).into_iter().collect(); + /// assert_eq!(matches, vec![0, 1]); + /// // No matches because the assertions take the context into account. + /// let matches: Vec<_> = set.matches_at(hay, 3).into_iter().collect(); + /// assert_eq!(matches, vec![]); + /// ``` + #[inline] + pub fn matches_at(&self, haystack: &[u8], start: usize) -> SetMatches { + let input = Input::new(haystack).span(start..haystack.len()); + let mut patset = PatternSet::new(self.meta.pattern_len()); + self.meta.which_overlapping_matches(&input, &mut patset); + SetMatches(patset) + } + + /// Returns the same as matches, but starts the search at the given + /// offset and stores the matches into the slice given. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// `matches` must have a length that is at least the number of regexes + /// in this set. + /// + /// This method returns true if and only if at least one member of + /// `matches` is true after executing the set against `haystack`. + #[doc(hidden)] + #[inline] + pub fn matches_read_at( + &self, + matches: &mut [bool], + haystack: &[u8], + start: usize, + ) -> bool { + // This is pretty dumb. We should try to fix this, but the + // regex-automata API doesn't provide a way to store matches in an + // arbitrary &mut [bool]. Thankfully, this API is is doc(hidden) and + // thus not public... But regex-capi currently uses it. We should + // fix regex-capi to use a PatternSet, maybe? Not sure... PatternSet + // is in regex-automata, not regex. So maybe we should just accept a + // 'SetMatches', which is basically just a newtype around PatternSet. + let mut patset = PatternSet::new(self.meta.pattern_len()); + let mut input = Input::new(haystack); + input.set_start(start); + self.meta.which_overlapping_matches(&input, &mut patset); + for pid in patset.iter() { + matches[pid] = true; + } + !patset.is_empty() + } + + /// An alias for `matches_read_at` to preserve backward compatibility. + /// + /// The `regex-capi` crate used this method, so to avoid breaking that + /// crate, we continue to export it as an undocumented API. + #[doc(hidden)] + #[inline] + pub fn read_matches_at( + &self, + matches: &mut [bool], + haystack: &[u8], + start: usize, + ) -> bool { + self.matches_read_at(matches, haystack, start) + } + + /// Returns the total number of regexes in this set. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// assert_eq!(0, RegexSet::empty().len()); + /// assert_eq!(1, RegexSet::new([r"[0-9]"]).unwrap().len()); + /// assert_eq!(2, RegexSet::new([r"[0-9]", r"[a-z]"]).unwrap().len()); + /// ``` + #[inline] + pub fn len(&self) -> usize { + self.meta.pattern_len() + } + + /// Returns `true` if this set contains no regexes. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// assert!(RegexSet::empty().is_empty()); + /// assert!(!RegexSet::new([r"[0-9]"]).unwrap().is_empty()); + /// ``` + #[inline] + pub fn is_empty(&self) -> bool { + self.meta.pattern_len() == 0 + } + + /// Returns the regex patterns that this regex set was constructed from. + /// + /// This function can be used to determine the pattern for a match. The + /// slice returned has exactly as many patterns givens to this regex set, + /// and the order of the slice is the same as the order of the patterns + /// provided to the set. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new(&[ + /// r"\w+", + /// r"\d+", + /// r"\pL+", + /// r"foo", + /// r"bar", + /// r"barfoo", + /// r"foobar", + /// ]).unwrap(); + /// let matches: Vec<_> = set + /// .matches(b"foobar") + /// .into_iter() + /// .map(|index| &set.patterns()[index]) + /// .collect(); + /// assert_eq!(matches, vec![r"\w+", r"\pL+", r"foo", r"bar", r"foobar"]); + /// ``` + #[inline] + pub fn patterns(&self) -> &[String] { + &self.patterns + } +} + +impl Default for RegexSet { + fn default() -> Self { + RegexSet::empty() + } +} + +/// A set of matches returned by a regex set. +/// +/// Values of this type are constructed by [`RegexSet::matches`]. +#[derive(Clone, Debug)] +pub struct SetMatches(PatternSet); + +impl SetMatches { + /// Whether this set contains any matches. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new(&[ + /// r"[a-z]+@[a-z]+\.(com|org|net)", + /// r"[a-z]+\.(com|org|net)", + /// ]).unwrap(); + /// let matches = set.matches(b"foo@example.com"); + /// assert!(matches.matched_any()); + /// ``` + #[inline] + pub fn matched_any(&self) -> bool { + !self.0.is_empty() + } + + /// Whether the regex at the given index matched. + /// + /// The index for a regex is determined by its insertion order upon the + /// initial construction of a `RegexSet`, starting at `0`. + /// + /// # Panics + /// + /// If `index` is greater than or equal to the number of regexes in the + /// original set that produced these matches. Equivalently, when `index` + /// is greater than or equal to [`SetMatches::len`]. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"[a-z]+@[a-z]+\.(com|org|net)", + /// r"[a-z]+\.(com|org|net)", + /// ]).unwrap(); + /// let matches = set.matches(b"example.com"); + /// assert!(!matches.matched(0)); + /// assert!(matches.matched(1)); + /// ``` + #[inline] + pub fn matched(&self, index: usize) -> bool { + self.0.contains(PatternID::new_unchecked(index)) + } + + /// The total number of regexes in the set that created these matches. + /// + /// **WARNING:** This always returns the same value as [`RegexSet::len`]. + /// In particular, it does *not* return the number of elements yielded by + /// [`SetMatches::iter`]. The only way to determine the total number of + /// matched regexes is to iterate over them. + /// + /// # Example + /// + /// Notice that this method returns the total number of regexes in the + /// original set, and *not* the total number of regexes that matched. + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"[a-z]+@[a-z]+\.(com|org|net)", + /// r"[a-z]+\.(com|org|net)", + /// ]).unwrap(); + /// let matches = set.matches(b"example.com"); + /// // Total number of patterns that matched. + /// assert_eq!(1, matches.iter().count()); + /// // Total number of patterns in the set. + /// assert_eq!(2, matches.len()); + /// ``` + #[inline] + pub fn len(&self) -> usize { + self.0.capacity() + } + + /// Returns an iterator over the indices of the regexes that matched. + /// + /// This will always produces matches in ascending order, where the index + /// yielded corresponds to the index of the regex that matched with respect + /// to its position when initially building the set. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"[0-9]", + /// r"[a-z]", + /// r"[A-Z]", + /// r"\p{Greek}", + /// ]).unwrap(); + /// let hay = "βa1".as_bytes(); + /// let matches: Vec<_> = set.matches(hay).iter().collect(); + /// assert_eq!(matches, vec![0, 1, 3]); + /// ``` + /// + /// Note that `SetMatches` also implemnets the `IntoIterator` trait, so + /// this method is not always needed. For example: + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"[0-9]", + /// r"[a-z]", + /// r"[A-Z]", + /// r"\p{Greek}", + /// ]).unwrap(); + /// let hay = "βa1".as_bytes(); + /// let mut matches = vec![]; + /// for index in set.matches(hay) { + /// matches.push(index); + /// } + /// assert_eq!(matches, vec![0, 1, 3]); + /// ``` + #[inline] + pub fn iter(&self) -> SetMatchesIter<'_> { + SetMatchesIter(self.0.iter()) + } +} + +impl IntoIterator for SetMatches { + type IntoIter = SetMatchesIntoIter; + type Item = usize; + + fn into_iter(self) -> Self::IntoIter { + let it = 0..self.0.capacity(); + SetMatchesIntoIter { patset: self.0, it } + } +} + +impl<'a> IntoIterator for &'a SetMatches { + type IntoIter = SetMatchesIter<'a>; + type Item = usize; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +/// An owned iterator over the set of matches from a regex set. +/// +/// This will always produces matches in ascending order of index, where the +/// index corresponds to the index of the regex that matched with respect to +/// its position when initially building the set. +/// +/// This iterator is created by calling `SetMatches::into_iter` via the +/// `IntoIterator` trait. This is automatically done in `for` loops. +/// +/// # Example +/// +/// ``` +/// use regex::bytes::RegexSet; +/// +/// let set = RegexSet::new([ +/// r"[0-9]", +/// r"[a-z]", +/// r"[A-Z]", +/// r"\p{Greek}", +/// ]).unwrap(); +/// let hay = "βa1".as_bytes(); +/// let mut matches = vec![]; +/// for index in set.matches(hay) { +/// matches.push(index); +/// } +/// assert_eq!(matches, vec![0, 1, 3]); +/// ``` +#[derive(Debug)] +pub struct SetMatchesIntoIter { + patset: PatternSet, + it: core::ops::Range<usize>, +} + +impl Iterator for SetMatchesIntoIter { + type Item = usize; + + fn next(&mut self) -> Option<usize> { + loop { + let id = self.it.next()?; + if self.patset.contains(PatternID::new_unchecked(id)) { + return Some(id); + } + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + self.it.size_hint() + } +} + +impl DoubleEndedIterator for SetMatchesIntoIter { + fn next_back(&mut self) -> Option<usize> { + loop { + let id = self.it.next_back()?; + if self.patset.contains(PatternID::new_unchecked(id)) { + return Some(id); + } + } + } +} + +impl core::iter::FusedIterator for SetMatchesIntoIter {} + +/// A borrowed iterator over the set of matches from a regex set. +/// +/// The lifetime `'a` refers to the lifetime of the [`SetMatches`] value that +/// created this iterator. +/// +/// This will always produces matches in ascending order, where the index +/// corresponds to the index of the regex that matched with respect to its +/// position when initially building the set. +/// +/// This iterator is created by the [`SetMatches::iter`] method. +#[derive(Clone, Debug)] +pub struct SetMatchesIter<'a>(PatternSetIter<'a>); + +impl<'a> Iterator for SetMatchesIter<'a> { + type Item = usize; + + fn next(&mut self) -> Option<usize> { + self.0.next().map(|pid| pid.as_usize()) + } + + fn size_hint(&self) -> (usize, Option<usize>) { + self.0.size_hint() + } +} + +impl<'a> DoubleEndedIterator for SetMatchesIter<'a> { + fn next_back(&mut self) -> Option<usize> { + self.0.next_back().map(|pid| pid.as_usize()) + } +} + +impl<'a> core::iter::FusedIterator for SetMatchesIter<'a> {} + +impl core::fmt::Debug for RegexSet { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "RegexSet({:?})", self.patterns()) + } +} diff --git a/third_party/rust/regex/src/regexset/mod.rs b/third_party/rust/regex/src/regexset/mod.rs new file mode 100644 index 0000000000..93fadec8bf --- /dev/null +++ b/third_party/rust/regex/src/regexset/mod.rs @@ -0,0 +1,2 @@ +pub(crate) mod bytes; +pub(crate) mod string; diff --git a/third_party/rust/regex/src/regexset/string.rs b/third_party/rust/regex/src/regexset/string.rs new file mode 100644 index 0000000000..2a3e7b8027 --- /dev/null +++ b/third_party/rust/regex/src/regexset/string.rs @@ -0,0 +1,706 @@ +use alloc::string::String; + +use regex_automata::{meta, Input, PatternID, PatternSet, PatternSetIter}; + +use crate::{Error, RegexSetBuilder}; + +/// Match multiple, possibly overlapping, regexes in a single search. +/// +/// A regex set corresponds to the union of zero or more regular expressions. +/// That is, a regex set will match a haystack when at least one of its +/// constituent regexes matches. A regex set as its formulated here provides a +/// touch more power: it will also report *which* regular expressions in the +/// set match. Indeed, this is the key difference between regex sets and a +/// single `Regex` with many alternates, since only one alternate can match at +/// a time. +/// +/// For example, consider regular expressions to match email addresses and +/// domains: `[a-z]+@[a-z]+\.(com|org|net)` and `[a-z]+\.(com|org|net)`. If a +/// regex set is constructed from those regexes, then searching the haystack +/// `foo@example.com` will report both regexes as matching. Of course, one +/// could accomplish this by compiling each regex on its own and doing two +/// searches over the haystack. The key advantage of using a regex set is +/// that it will report the matching regexes using a *single pass through the +/// haystack*. If one has hundreds or thousands of regexes to match repeatedly +/// (like a URL router for a complex web application or a user agent matcher), +/// then a regex set *can* realize huge performance gains. +/// +/// # Limitations +/// +/// Regex sets are limited to answering the following two questions: +/// +/// 1. Does any regex in the set match? +/// 2. If so, which regexes in the set match? +/// +/// As with the main [`Regex`][crate::Regex] type, it is cheaper to ask (1) +/// instead of (2) since the matching engines can stop after the first match +/// is found. +/// +/// You cannot directly extract [`Match`][crate::Match] or +/// [`Captures`][crate::Captures] objects from a regex set. If you need these +/// operations, the recommended approach is to compile each pattern in the set +/// independently and scan the exact same haystack a second time with those +/// independently compiled patterns: +/// +/// ``` +/// use regex::{Regex, RegexSet}; +/// +/// let patterns = ["foo", "bar"]; +/// // Both patterns will match different ranges of this string. +/// let hay = "barfoo"; +/// +/// // Compile a set matching any of our patterns. +/// let set = RegexSet::new(patterns).unwrap(); +/// // Compile each pattern independently. +/// let regexes: Vec<_> = set +/// .patterns() +/// .iter() +/// .map(|pat| Regex::new(pat).unwrap()) +/// .collect(); +/// +/// // Match against the whole set first and identify the individual +/// // matching patterns. +/// let matches: Vec<&str> = set +/// .matches(hay) +/// .into_iter() +/// // Dereference the match index to get the corresponding +/// // compiled pattern. +/// .map(|index| ®exes[index]) +/// // To get match locations or any other info, we then have to search the +/// // exact same haystack again, using our separately-compiled pattern. +/// .map(|re| re.find(hay).unwrap().as_str()) +/// .collect(); +/// +/// // Matches arrive in the order the constituent patterns were declared, +/// // not the order they appear in the haystack. +/// assert_eq!(vec!["foo", "bar"], matches); +/// ``` +/// +/// # Performance +/// +/// A `RegexSet` has the same performance characteristics as `Regex`. Namely, +/// search takes `O(m * n)` time, where `m` is proportional to the size of the +/// regex set and `n` is proportional to the length of the haystack. +/// +/// # Trait implementations +/// +/// The `Default` trait is implemented for `RegexSet`. The default value +/// is an empty set. An empty set can also be explicitly constructed via +/// [`RegexSet::empty`]. +/// +/// # Example +/// +/// This shows how the above two regexes (for matching email addresses and +/// domains) might work: +/// +/// ``` +/// use regex::RegexSet; +/// +/// let set = RegexSet::new(&[ +/// r"[a-z]+@[a-z]+\.(com|org|net)", +/// r"[a-z]+\.(com|org|net)", +/// ]).unwrap(); +/// +/// // Ask whether any regexes in the set match. +/// assert!(set.is_match("foo@example.com")); +/// +/// // Identify which regexes in the set match. +/// let matches: Vec<_> = set.matches("foo@example.com").into_iter().collect(); +/// assert_eq!(vec![0, 1], matches); +/// +/// // Try again, but with a haystack that only matches one of the regexes. +/// let matches: Vec<_> = set.matches("example.com").into_iter().collect(); +/// assert_eq!(vec![1], matches); +/// +/// // Try again, but with a haystack that doesn't match any regex in the set. +/// let matches: Vec<_> = set.matches("example").into_iter().collect(); +/// assert!(matches.is_empty()); +/// ``` +/// +/// Note that it would be possible to adapt the above example to using `Regex` +/// with an expression like: +/// +/// ```text +/// (?P<email>[a-z]+@(?P<email_domain>[a-z]+[.](com|org|net)))|(?P<domain>[a-z]+[.](com|org|net)) +/// ``` +/// +/// After a match, one could then inspect the capture groups to figure out +/// which alternates matched. The problem is that it is hard to make this +/// approach scale when there are many regexes since the overlap between each +/// alternate isn't always obvious to reason about. +#[derive(Clone)] +pub struct RegexSet { + pub(crate) meta: meta::Regex, + pub(crate) patterns: alloc::sync::Arc<[String]>, +} + +impl RegexSet { + /// Create a new regex set with the given regular expressions. + /// + /// This takes an iterator of `S`, where `S` is something that can produce + /// a `&str`. If any of the strings in the iterator are not valid regular + /// expressions, then an error is returned. + /// + /// # Example + /// + /// Create a new regex set from an iterator of strings: + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap(); + /// assert!(set.is_match("foo")); + /// ``` + pub fn new<I, S>(exprs: I) -> Result<RegexSet, Error> + where + S: AsRef<str>, + I: IntoIterator<Item = S>, + { + RegexSetBuilder::new(exprs).build() + } + + /// Create a new empty regex set. + /// + /// An empty regex never matches anything. + /// + /// This is a convenience function for `RegexSet::new([])`, but doesn't + /// require one to specify the type of the input. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::empty(); + /// assert!(set.is_empty()); + /// // an empty set matches nothing + /// assert!(!set.is_match("")); + /// ``` + pub fn empty() -> RegexSet { + let empty: [&str; 0] = []; + RegexSetBuilder::new(empty).build().unwrap() + } + + /// Returns true if and only if one of the regexes in this set matches + /// the haystack given. + /// + /// This method should be preferred if you only need to test whether any + /// of the regexes in the set should match, but don't care about *which* + /// regexes matched. This is because the underlying matching engine will + /// quit immediately after seeing the first match instead of continuing to + /// find all matches. + /// + /// Note that as with searches using [`Regex`](crate::Regex), the + /// expression is unanchored by default. That is, if the regex does not + /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted + /// to match anywhere in the haystack. + /// + /// # Example + /// + /// Tests whether a set matches somewhere in a haystack: + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap(); + /// assert!(set.is_match("foo")); + /// assert!(!set.is_match("☃")); + /// ``` + #[inline] + pub fn is_match(&self, haystack: &str) -> bool { + self.is_match_at(haystack, 0) + } + + /// Returns true if and only if one of the regexes in this set matches the + /// haystack given, with the search starting at the offset given. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start`. Namely, consider a + /// haystack `foobar` and a desire to execute a search starting at offset + /// `3`. You could search a substring explicitly, but then the look-around + /// assertions won't work correctly. Instead, you can use this method to + /// specify the start position of a search. + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap(); + /// let hay = "foobar"; + /// // We get a match here, but it's probably not intended. + /// assert!(set.is_match(&hay[3..])); + /// // No match because the assertions take the context into account. + /// assert!(!set.is_match_at(hay, 3)); + /// ``` + #[inline] + pub fn is_match_at(&self, haystack: &str, start: usize) -> bool { + self.meta.is_match(Input::new(haystack).span(start..haystack.len())) + } + + /// Returns the set of regexes that match in the given haystack. + /// + /// The set returned contains the index of each regex that matches in + /// the given haystack. The index is in correspondence with the order of + /// regular expressions given to `RegexSet`'s constructor. + /// + /// The set can also be used to iterate over the matched indices. The order + /// of iteration is always ascending with respect to the matching indices. + /// + /// Note that as with searches using [`Regex`](crate::Regex), the + /// expression is unanchored by default. That is, if the regex does not + /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted + /// to match anywhere in the haystack. + /// + /// # Example + /// + /// Tests which regular expressions match the given haystack: + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"\w+", + /// r"\d+", + /// r"\pL+", + /// r"foo", + /// r"bar", + /// r"barfoo", + /// r"foobar", + /// ]).unwrap(); + /// let matches: Vec<_> = set.matches("foobar").into_iter().collect(); + /// assert_eq!(matches, vec![0, 2, 3, 4, 6]); + /// + /// // You can also test whether a particular regex matched: + /// let matches = set.matches("foobar"); + /// assert!(!matches.matched(5)); + /// assert!(matches.matched(6)); + /// ``` + #[inline] + pub fn matches(&self, haystack: &str) -> SetMatches { + self.matches_at(haystack, 0) + } + + /// Returns the set of regexes that match in the given haystack. + /// + /// The set returned contains the index of each regex that matches in + /// the given haystack. The index is in correspondence with the order of + /// regular expressions given to `RegexSet`'s constructor. + /// + /// The set can also be used to iterate over the matched indices. The order + /// of iteration is always ascending with respect to the matching indices. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// Tests which regular expressions match the given haystack: + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap(); + /// let hay = "foobar"; + /// // We get matches here, but it's probably not intended. + /// let matches: Vec<_> = set.matches(&hay[3..]).into_iter().collect(); + /// assert_eq!(matches, vec![0, 1]); + /// // No matches because the assertions take the context into account. + /// let matches: Vec<_> = set.matches_at(hay, 3).into_iter().collect(); + /// assert_eq!(matches, vec![]); + /// ``` + #[inline] + pub fn matches_at(&self, haystack: &str, start: usize) -> SetMatches { + let input = Input::new(haystack).span(start..haystack.len()); + let mut patset = PatternSet::new(self.meta.pattern_len()); + self.meta.which_overlapping_matches(&input, &mut patset); + SetMatches(patset) + } + + /// Returns the same as matches, but starts the search at the given + /// offset and stores the matches into the slice given. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// `matches` must have a length that is at least the number of regexes + /// in this set. + /// + /// This method returns true if and only if at least one member of + /// `matches` is true after executing the set against `haystack`. + #[doc(hidden)] + #[inline] + pub fn matches_read_at( + &self, + matches: &mut [bool], + haystack: &str, + start: usize, + ) -> bool { + // This is pretty dumb. We should try to fix this, but the + // regex-automata API doesn't provide a way to store matches in an + // arbitrary &mut [bool]. Thankfully, this API is is doc(hidden) and + // thus not public... But regex-capi currently uses it. We should + // fix regex-capi to use a PatternSet, maybe? Not sure... PatternSet + // is in regex-automata, not regex. So maybe we should just accept a + // 'SetMatches', which is basically just a newtype around PatternSet. + let mut patset = PatternSet::new(self.meta.pattern_len()); + let mut input = Input::new(haystack); + input.set_start(start); + self.meta.which_overlapping_matches(&input, &mut patset); + for pid in patset.iter() { + matches[pid] = true; + } + !patset.is_empty() + } + + /// An alias for `matches_read_at` to preserve backward compatibility. + /// + /// The `regex-capi` crate used this method, so to avoid breaking that + /// crate, we continue to export it as an undocumented API. + #[doc(hidden)] + #[inline] + pub fn read_matches_at( + &self, + matches: &mut [bool], + haystack: &str, + start: usize, + ) -> bool { + self.matches_read_at(matches, haystack, start) + } + + /// Returns the total number of regexes in this set. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSet; + /// + /// assert_eq!(0, RegexSet::empty().len()); + /// assert_eq!(1, RegexSet::new([r"[0-9]"]).unwrap().len()); + /// assert_eq!(2, RegexSet::new([r"[0-9]", r"[a-z]"]).unwrap().len()); + /// ``` + #[inline] + pub fn len(&self) -> usize { + self.meta.pattern_len() + } + + /// Returns `true` if this set contains no regexes. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSet; + /// + /// assert!(RegexSet::empty().is_empty()); + /// assert!(!RegexSet::new([r"[0-9]"]).unwrap().is_empty()); + /// ``` + #[inline] + pub fn is_empty(&self) -> bool { + self.meta.pattern_len() == 0 + } + + /// Returns the regex patterns that this regex set was constructed from. + /// + /// This function can be used to determine the pattern for a match. The + /// slice returned has exactly as many patterns givens to this regex set, + /// and the order of the slice is the same as the order of the patterns + /// provided to the set. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new(&[ + /// r"\w+", + /// r"\d+", + /// r"\pL+", + /// r"foo", + /// r"bar", + /// r"barfoo", + /// r"foobar", + /// ]).unwrap(); + /// let matches: Vec<_> = set + /// .matches("foobar") + /// .into_iter() + /// .map(|index| &set.patterns()[index]) + /// .collect(); + /// assert_eq!(matches, vec![r"\w+", r"\pL+", r"foo", r"bar", r"foobar"]); + /// ``` + #[inline] + pub fn patterns(&self) -> &[String] { + &self.patterns + } +} + +impl Default for RegexSet { + fn default() -> Self { + RegexSet::empty() + } +} + +/// A set of matches returned by a regex set. +/// +/// Values of this type are constructed by [`RegexSet::matches`]. +#[derive(Clone, Debug)] +pub struct SetMatches(PatternSet); + +impl SetMatches { + /// Whether this set contains any matches. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new(&[ + /// r"[a-z]+@[a-z]+\.(com|org|net)", + /// r"[a-z]+\.(com|org|net)", + /// ]).unwrap(); + /// let matches = set.matches("foo@example.com"); + /// assert!(matches.matched_any()); + /// ``` + #[inline] + pub fn matched_any(&self) -> bool { + !self.0.is_empty() + } + + /// Whether the regex at the given index matched. + /// + /// The index for a regex is determined by its insertion order upon the + /// initial construction of a `RegexSet`, starting at `0`. + /// + /// # Panics + /// + /// If `index` is greater than or equal to the number of regexes in the + /// original set that produced these matches. Equivalently, when `index` + /// is greater than or equal to [`SetMatches::len`]. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"[a-z]+@[a-z]+\.(com|org|net)", + /// r"[a-z]+\.(com|org|net)", + /// ]).unwrap(); + /// let matches = set.matches("example.com"); + /// assert!(!matches.matched(0)); + /// assert!(matches.matched(1)); + /// ``` + #[inline] + pub fn matched(&self, index: usize) -> bool { + self.0.contains(PatternID::new_unchecked(index)) + } + + /// The total number of regexes in the set that created these matches. + /// + /// **WARNING:** This always returns the same value as [`RegexSet::len`]. + /// In particular, it does *not* return the number of elements yielded by + /// [`SetMatches::iter`]. The only way to determine the total number of + /// matched regexes is to iterate over them. + /// + /// # Example + /// + /// Notice that this method returns the total number of regexes in the + /// original set, and *not* the total number of regexes that matched. + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"[a-z]+@[a-z]+\.(com|org|net)", + /// r"[a-z]+\.(com|org|net)", + /// ]).unwrap(); + /// let matches = set.matches("example.com"); + /// // Total number of patterns that matched. + /// assert_eq!(1, matches.iter().count()); + /// // Total number of patterns in the set. + /// assert_eq!(2, matches.len()); + /// ``` + #[inline] + pub fn len(&self) -> usize { + self.0.capacity() + } + + /// Returns an iterator over the indices of the regexes that matched. + /// + /// This will always produces matches in ascending order, where the index + /// yielded corresponds to the index of the regex that matched with respect + /// to its position when initially building the set. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"[0-9]", + /// r"[a-z]", + /// r"[A-Z]", + /// r"\p{Greek}", + /// ]).unwrap(); + /// let hay = "βa1"; + /// let matches: Vec<_> = set.matches(hay).iter().collect(); + /// assert_eq!(matches, vec![0, 1, 3]); + /// ``` + /// + /// Note that `SetMatches` also implemnets the `IntoIterator` trait, so + /// this method is not always needed. For example: + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"[0-9]", + /// r"[a-z]", + /// r"[A-Z]", + /// r"\p{Greek}", + /// ]).unwrap(); + /// let hay = "βa1"; + /// let mut matches = vec![]; + /// for index in set.matches(hay) { + /// matches.push(index); + /// } + /// assert_eq!(matches, vec![0, 1, 3]); + /// ``` + #[inline] + pub fn iter(&self) -> SetMatchesIter<'_> { + SetMatchesIter(self.0.iter()) + } +} + +impl IntoIterator for SetMatches { + type IntoIter = SetMatchesIntoIter; + type Item = usize; + + fn into_iter(self) -> Self::IntoIter { + let it = 0..self.0.capacity(); + SetMatchesIntoIter { patset: self.0, it } + } +} + +impl<'a> IntoIterator for &'a SetMatches { + type IntoIter = SetMatchesIter<'a>; + type Item = usize; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +/// An owned iterator over the set of matches from a regex set. +/// +/// This will always produces matches in ascending order of index, where the +/// index corresponds to the index of the regex that matched with respect to +/// its position when initially building the set. +/// +/// This iterator is created by calling `SetMatches::into_iter` via the +/// `IntoIterator` trait. This is automatically done in `for` loops. +/// +/// # Example +/// +/// ``` +/// use regex::RegexSet; +/// +/// let set = RegexSet::new([ +/// r"[0-9]", +/// r"[a-z]", +/// r"[A-Z]", +/// r"\p{Greek}", +/// ]).unwrap(); +/// let hay = "βa1"; +/// let mut matches = vec![]; +/// for index in set.matches(hay) { +/// matches.push(index); +/// } +/// assert_eq!(matches, vec![0, 1, 3]); +/// ``` +#[derive(Debug)] +pub struct SetMatchesIntoIter { + patset: PatternSet, + it: core::ops::Range<usize>, +} + +impl Iterator for SetMatchesIntoIter { + type Item = usize; + + fn next(&mut self) -> Option<usize> { + loop { + let id = self.it.next()?; + if self.patset.contains(PatternID::new_unchecked(id)) { + return Some(id); + } + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + self.it.size_hint() + } +} + +impl DoubleEndedIterator for SetMatchesIntoIter { + fn next_back(&mut self) -> Option<usize> { + loop { + let id = self.it.next_back()?; + if self.patset.contains(PatternID::new_unchecked(id)) { + return Some(id); + } + } + } +} + +impl core::iter::FusedIterator for SetMatchesIntoIter {} + +/// A borrowed iterator over the set of matches from a regex set. +/// +/// The lifetime `'a` refers to the lifetime of the [`SetMatches`] value that +/// created this iterator. +/// +/// This will always produces matches in ascending order, where the index +/// corresponds to the index of the regex that matched with respect to its +/// position when initially building the set. +/// +/// This iterator is created by the [`SetMatches::iter`] method. +#[derive(Clone, Debug)] +pub struct SetMatchesIter<'a>(PatternSetIter<'a>); + +impl<'a> Iterator for SetMatchesIter<'a> { + type Item = usize; + + fn next(&mut self) -> Option<usize> { + self.0.next().map(|pid| pid.as_usize()) + } + + fn size_hint(&self) -> (usize, Option<usize>) { + self.0.size_hint() + } +} + +impl<'a> DoubleEndedIterator for SetMatchesIter<'a> { + fn next_back(&mut self) -> Option<usize> { + self.0.next_back().map(|pid| pid.as_usize()) + } +} + +impl<'a> core::iter::FusedIterator for SetMatchesIter<'a> {} + +impl core::fmt::Debug for RegexSet { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "RegexSet({:?})", self.patterns()) + } +} diff --git a/third_party/rust/regex/test b/third_party/rust/regex/test new file mode 100755 index 0000000000..48224c6d11 --- /dev/null +++ b/third_party/rust/regex/test @@ -0,0 +1,46 @@ +#!/bin/bash + +set -e + +# cd to the directory containing this crate's Cargo.toml so that we don't need +# to pass --manifest-path to every `cargo` command. +cd "$(dirname "$0")" + +# This is a convenience script for running a broad swath of tests across +# features. We don't test the complete space, since the complete space is quite +# large. Hopefully once we migrate the test suite to better infrastructure +# (like regex-automata), we'll be able to test more of the space. +echo "===== DEFAULT FEATURES =====" +cargo test + +# no-std mode is annoyingly difficult to test. Currently, the integration tests +# don't run. So for now, we just test that library tests run. (There aren't +# many because `regex` is just a wrapper crate.) +cargo test --no-default-features --lib + +echo "===== DOC TESTS =====" +cargo test --doc + +features=( + "std" + "std unicode" + "std unicode-perl" + "std perf" + "std perf-cache" + "std perf-dfa" + "std perf-inline" + "std perf-literal" + "std perf-dfa-full" + "std perf-onepass" + "std perf-backtrack" +) +for f in "${features[@]}"; do + echo "===== FEATURE: $f =====" + cargo test --test integration --no-default-features --features "$f" +done + +# And test the probably-forever-nightly-only 'pattern' feature... +if rustc --version | grep -q nightly; then + echo "===== FEATURE: std,pattern,unicode-perl =====" + cargo test --test integration --no-default-features --features std,pattern,unicode-perl +fi diff --git a/third_party/rust/regex/testdata/README.md b/third_party/rust/regex/testdata/README.md new file mode 100644 index 0000000000..c3bc1acb5d --- /dev/null +++ b/third_party/rust/regex/testdata/README.md @@ -0,0 +1,22 @@ +This directory contains a large suite of regex tests defined in a TOML format. +They are used to drive tests in `tests/lib.rs`, `regex-automata/tests/lib.rs` +and `regex-lite/tests/lib.rs`. + +See the [`regex-test`][regex-test] crate documentation for an explanation of +the format and how it generates tests. + +The basic idea here is that we have many different regex engines but generally +one set of tests. We want to be able to run those tests (or most of them) on +every engine. Prior to `regex 1.9`, we used to do this with a hodge podge soup +of macros and a different test executable for each engine. It overall took a +longer time to compile, was harder to maintain and it made the test definitions +themselves less clear. + +In `regex 1.9`, when we moved over to `regex-automata`, the situation got a lot +worse because of an increase in the number of engines. So I devised an engine +independent format for testing regex patterns and their semantics. + +Note: the naming scheme used in these tests isn't terribly consistent. It would +be great to fix that. + +[regex-test]: https://docs.rs/regex-test diff --git a/third_party/rust/regex/testdata/anchored.toml b/third_party/rust/regex/testdata/anchored.toml new file mode 100644 index 0000000000..0f2248d098 --- /dev/null +++ b/third_party/rust/regex/testdata/anchored.toml @@ -0,0 +1,127 @@ +# These tests are specifically geared toward searches with 'anchored = true'. +# While they are interesting in their own right, they are particularly +# important for testing the one-pass DFA since the one-pass DFA can't work in +# unanchored contexts. +# +# Note that "anchored" in this context does not mean "^". Anchored searches are +# searches whose matches must begin at the start of the search, which may not +# be at the start of the haystack. That's why anchored searches---and there are +# some examples below---can still report multiple matches. This occurs when the +# matches are adjacent to one another. + +[[test]] +name = "greedy" +regex = '(abc)+' +haystack = "abcabcabc" +matches = [ + [[0, 9], [6, 9]], +] +anchored = true + +# When a "earliest" search is used, greediness doesn't really exist because +# matches are reported as soon as they are known. +[[test]] +name = "greedy-earliest" +regex = '(abc)+' +haystack = "abcabcabc" +matches = [ + [[0, 3], [0, 3]], + [[3, 6], [3, 6]], + [[6, 9], [6, 9]], +] +anchored = true +search-kind = "earliest" + +[[test]] +name = "nongreedy" +regex = '(abc)+?' +haystack = "abcabcabc" +matches = [ + [[0, 3], [0, 3]], + [[3, 6], [3, 6]], + [[6, 9], [6, 9]], +] +anchored = true + +# When "all" semantics are used, non-greediness doesn't exist since the longest +# possible match is always taken. +[[test]] +name = "nongreedy-all" +regex = '(abc)+?' +haystack = "abcabcabc" +matches = [ + [[0, 9], [6, 9]], +] +anchored = true +match-kind = "all" + +[[test]] +name = "word-boundary-unicode-01" +regex = '\b\w+\b' +haystack = 'βββ☃' +matches = [[0, 6]] +anchored = true + +[[test]] +name = "word-boundary-nounicode-01" +regex = '\b\w+\b' +haystack = 'abcβ' +matches = [[0, 3]] +anchored = true +unicode = false + +# Tests that '.c' doesn't match 'abc' when performing an anchored search from +# the beginning of the haystack. This test found two different bugs in the +# PikeVM and the meta engine. +[[test]] +name = "no-match-at-start" +regex = '.c' +haystack = 'abc' +matches = [] +anchored = true + +# Like above, but at a non-zero start offset. +[[test]] +name = "no-match-at-start-bounds" +regex = '.c' +haystack = 'aabc' +bounds = [1, 4] +matches = [] +anchored = true + +# This is like no-match-at-start, but hits the "reverse inner" optimization +# inside the meta engine. (no-match-at-start hits the "reverse suffix" +# optimization.) +[[test]] +name = "no-match-at-start-reverse-inner" +regex = '.c[a-z]' +haystack = 'abcz' +matches = [] +anchored = true + +# Like above, but at a non-zero start offset. +[[test]] +name = "no-match-at-start-reverse-inner-bounds" +regex = '.c[a-z]' +haystack = 'aabcz' +bounds = [1, 5] +matches = [] +anchored = true + +# Same as no-match-at-start, but applies to the meta engine's "reverse +# anchored" optimization. +[[test]] +name = "no-match-at-start-reverse-anchored" +regex = '.c[a-z]$' +haystack = 'abcz' +matches = [] +anchored = true + +# Like above, but at a non-zero start offset. +[[test]] +name = "no-match-at-start-reverse-anchored-bounds" +regex = '.c[a-z]$' +haystack = 'aabcz' +bounds = [1, 5] +matches = [] +anchored = true diff --git a/third_party/rust/regex/testdata/bytes.toml b/third_party/rust/regex/testdata/bytes.toml new file mode 100644 index 0000000000..346e36971d --- /dev/null +++ b/third_party/rust/regex/testdata/bytes.toml @@ -0,0 +1,235 @@ +# These are tests specifically crafted for regexes that can match arbitrary +# bytes. In some cases, we also test the Unicode variant as well, just because +# it's good sense to do so. But also, these tests aren't really about Unicode, +# but whether matches are only reported at valid UTF-8 boundaries. For most +# tests in this entire collection, utf8 = true. But for these tests, we use +# utf8 = false. + +[[test]] +name = "word-boundary-ascii" +regex = ' \b' +haystack = " δ" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "word-boundary-unicode" +regex = ' \b' +haystack = " δ" +matches = [[0, 1]] +unicode = true +utf8 = false + +[[test]] +name = "word-boundary-ascii-not" +regex = ' \B' +haystack = " δ" +matches = [[0, 1]] +unicode = false +utf8 = false + +[[test]] +name = "word-boundary-unicode-not" +regex = ' \B' +haystack = " δ" +matches = [] +unicode = true +utf8 = false + +[[test]] +name = "perl-word-ascii" +regex = '\w+' +haystack = "aδ" +matches = [[0, 1]] +unicode = false +utf8 = false + +[[test]] +name = "perl-word-unicode" +regex = '\w+' +haystack = "aδ" +matches = [[0, 3]] +unicode = true +utf8 = false + +[[test]] +name = "perl-decimal-ascii" +regex = '\d+' +haystack = "1२३9" +matches = [[0, 1], [7, 8]] +unicode = false +utf8 = false + +[[test]] +name = "perl-decimal-unicode" +regex = '\d+' +haystack = "1२३9" +matches = [[0, 8]] +unicode = true +utf8 = false + +[[test]] +name = "perl-whitespace-ascii" +regex = '\s+' +haystack = " \u1680" +matches = [[0, 1]] +unicode = false +utf8 = false + +[[test]] +name = "perl-whitespace-unicode" +regex = '\s+' +haystack = " \u1680" +matches = [[0, 4]] +unicode = true +utf8 = false + +# The first `(.+)` matches two Unicode codepoints, but can't match the 5th +# byte, which isn't valid UTF-8. The second (byte based) `(.+)` takes over and +# matches. +[[test]] +name = "mixed-dot" +regex = '(.+)(?-u)(.+)' +haystack = '\xCE\x93\xCE\x94\xFF' +matches = [ + [[0, 5], [0, 4], [4, 5]], +] +unescape = true +unicode = true +utf8 = false + +[[test]] +name = "case-one-ascii" +regex = 'a' +haystack = "A" +matches = [[0, 1]] +case-insensitive = true +unicode = false +utf8 = false + +[[test]] +name = "case-one-unicode" +regex = 'a' +haystack = "A" +matches = [[0, 1]] +case-insensitive = true +unicode = true +utf8 = false + +[[test]] +name = "case-class-simple-ascii" +regex = '[a-z]+' +haystack = "AaAaA" +matches = [[0, 5]] +case-insensitive = true +unicode = false +utf8 = false + +[[test]] +name = "case-class-ascii" +regex = '[a-z]+' +haystack = "aA\u212AaA" +matches = [[0, 2], [5, 7]] +case-insensitive = true +unicode = false +utf8 = false + +[[test]] +name = "case-class-unicode" +regex = '[a-z]+' +haystack = "aA\u212AaA" +matches = [[0, 7]] +case-insensitive = true +unicode = true +utf8 = false + +[[test]] +name = "negate-ascii" +regex = '[^a]' +haystack = "δ" +matches = [[0, 1], [1, 2]] +unicode = false +utf8 = false + +[[test]] +name = "negate-unicode" +regex = '[^a]' +haystack = "δ" +matches = [[0, 2]] +unicode = true +utf8 = false + +# When utf8=true, this won't match, because the implicit '.*?' prefix is +# Unicode aware and will refuse to match through invalid UTF-8 bytes. +[[test]] +name = "dotstar-prefix-ascii" +regex = 'a' +haystack = '\xFFa' +matches = [[1, 2]] +unescape = true +unicode = false +utf8 = false + +[[test]] +name = "dotstar-prefix-unicode" +regex = 'a' +haystack = '\xFFa' +matches = [[1, 2]] +unescape = true +unicode = true +utf8 = false + +[[test]] +name = "null-bytes" +regex = '(?P<cstr>[^\x00]+)\x00' +haystack = 'foo\x00' +matches = [ + [[0, 4], [0, 3]], +] +unescape = true +unicode = false +utf8 = false + +[[test]] +name = "invalid-utf8-anchor-100" +regex = '\xCC?^' +haystack = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4' +matches = [[0, 0]] +unescape = true +unicode = false +utf8 = false + +[[test]] +name = "invalid-utf8-anchor-200" +regex = '^\xf7|4\xff\d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########[] d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########\[] #####\x80\S7|$' +haystack = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4' +matches = [[22, 22]] +unescape = true +unicode = false +utf8 = false + +[[test]] +name = "invalid-utf8-anchor-300" +regex = '^|ddp\xff\xffdddddlQd@\x80' +haystack = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4' +matches = [[0, 0]] +unescape = true +unicode = false +utf8 = false + +[[test]] +name = "word-boundary-ascii-100" +regex = '\Bx\B' +haystack = "áxβ" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "word-boundary-ascii-200" +regex = '\B' +haystack = "0\U0007EF5E" +matches = [[2, 2], [3, 3], [4, 4], [5, 5]] +unicode = false +utf8 = false diff --git a/third_party/rust/regex/testdata/crazy.toml b/third_party/rust/regex/testdata/crazy.toml new file mode 100644 index 0000000000..aed46ea157 --- /dev/null +++ b/third_party/rust/regex/testdata/crazy.toml @@ -0,0 +1,315 @@ +[[test]] +name = "nothing-empty" +regex = [] +haystack = "" +matches = [] + +[[test]] +name = "nothing-something" +regex = [] +haystack = "wat" +matches = [] + +[[test]] +name = "ranges" +regex = '(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b' +haystack = "num: 255" +matches = [[5, 8]] + +[[test]] +name = "ranges-not" +regex = '(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b' +haystack = "num: 256" +matches = [] + +[[test]] +name = "float1" +regex = '[-+]?[0-9]*\.?[0-9]+' +haystack = "0.1" +matches = [[0, 3]] + +[[test]] +name = "float2" +regex = '[-+]?[0-9]*\.?[0-9]+' +haystack = "0.1.2" +matches = [[0, 3]] +match-limit = 1 + +[[test]] +name = "float3" +regex = '[-+]?[0-9]*\.?[0-9]+' +haystack = "a1.2" +matches = [[1, 4]] + +[[test]] +name = "float4" +regex = '[-+]?[0-9]*\.?[0-9]+' +haystack = "1.a" +matches = [[0, 1]] + +[[test]] +name = "float5" +regex = '^[-+]?[0-9]*\.?[0-9]+$' +haystack = "1.a" +matches = [] + +[[test]] +name = "email" +regex = '(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b' +haystack = "mine is jam.slam@gmail.com " +matches = [[8, 26]] + +[[test]] +name = "email-not" +regex = '(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b' +haystack = "mine is jam.slam@gmail " +matches = [] + +[[test]] +name = "email-big" +regex = '''[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?''' +haystack = "mine is jam.slam@gmail.com " +matches = [[8, 26]] + +[[test]] +name = "date1" +regex = '^(?:19|20)\d\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])$' +haystack = "1900-01-01" +matches = [[0, 10]] +unicode = false + +[[test]] +name = "date2" +regex = '^(?:19|20)\d\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])$' +haystack = "1900-00-01" +matches = [] +unicode = false + +[[test]] +name = "date3" +regex = '^(?:19|20)\d\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])$' +haystack = "1900-13-01" +matches = [] +unicode = false + +[[test]] +name = "start-end-empty" +regex = '^$' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "start-end-empty-rev" +regex = '$^' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "start-end-empty-many-1" +regex = '^$^$^$' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "start-end-empty-many-2" +regex = '^^^$$$' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "start-end-empty-rep" +regex = '(?:^$)*' +haystack = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "start-end-empty-rep-rev" +regex = '(?:$^)*' +haystack = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "neg-class-letter" +regex = '[^ac]' +haystack = "acx" +matches = [[2, 3]] + +[[test]] +name = "neg-class-letter-comma" +regex = '[^a,]' +haystack = "a,x" +matches = [[2, 3]] + +[[test]] +name = "neg-class-letter-space" +regex = '[^a[:space:]]' +haystack = "a x" +matches = [[2, 3]] + +[[test]] +name = "neg-class-comma" +regex = '[^,]' +haystack = ",,x" +matches = [[2, 3]] + +[[test]] +name = "neg-class-space" +regex = '[^[:space:]]' +haystack = " a" +matches = [[1, 2]] + +[[test]] +name = "neg-class-space-comma" +regex = '[^,[:space:]]' +haystack = ", a" +matches = [[2, 3]] + +[[test]] +name = "neg-class-comma-space" +regex = '[^[:space:],]' +haystack = " ,a" +matches = [[2, 3]] + +[[test]] +name = "neg-class-ascii" +regex = '[^[:alpha:]Z]' +haystack = "A1" +matches = [[1, 2]] + +[[test]] +name = "lazy-many-many" +regex = '(?:(?:.*)*?)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "lazy-many-optional" +regex = '(?:(?:.?)*?)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "lazy-one-many-many" +regex = '(?:(?:.*)+?)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "lazy-one-many-optional" +regex = '(?:(?:.?)+?)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "lazy-range-min-many" +regex = '(?:(?:.*){1,}?)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "lazy-range-many" +regex = '(?:(?:.*){1,2}?)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "greedy-many-many" +regex = '(?:(?:.*)*)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "greedy-many-optional" +regex = '(?:(?:.?)*)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "greedy-one-many-many" +regex = '(?:(?:.*)+)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "greedy-one-many-optional" +regex = '(?:(?:.?)+)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "greedy-range-min-many" +regex = '(?:(?:.*){1,})=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "greedy-range-many" +regex = '(?:(?:.*){1,2})=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "empty1" +regex = '' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "empty2" +regex = '' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty3" +regex = '(?:)' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty4" +regex = '(?:)*' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty5" +regex = '(?:)+' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty6" +regex = '(?:)?' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty7" +regex = '(?:)(?:)' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty8" +regex = '(?:)+|z' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty9" +regex = 'z|(?:)+' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty10" +regex = '(?:)+|b' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty11" +regex = 'b|(?:)+' +haystack = "abc" +matches = [[0, 0], [1, 2], [3, 3]] diff --git a/third_party/rust/regex/testdata/crlf.toml b/third_party/rust/regex/testdata/crlf.toml new file mode 100644 index 0000000000..9e2d3761af --- /dev/null +++ b/third_party/rust/regex/testdata/crlf.toml @@ -0,0 +1,117 @@ +# This is a basic test that checks ^ and $ treat \r\n as a single line +# terminator. If ^ and $ only treated \n as a line terminator, then this would +# only match 'xyz' at the end of the haystack. +[[test]] +name = "basic" +regex = '(?mR)^[a-z]+$' +haystack = "abc\r\ndef\r\nxyz" +matches = [[0, 3], [5, 8], [10, 13]] + +# Tests that a CRLF-aware '^$' assertion does not match between CR and LF. +[[test]] +name = "start-end-non-empty" +regex = '(?mR)^$' +haystack = "abc\r\ndef\r\nxyz" +matches = [] + +# Tests that a CRLF-aware '^$' assertion matches the empty string, just like +# a non-CRLF-aware '^$' assertion. +[[test]] +name = "start-end-empty" +regex = '(?mR)^$' +haystack = "" +matches = [[0, 0]] + +# Tests that a CRLF-aware '^$' assertion matches the empty string preceding +# and following a line terminator. +[[test]] +name = "start-end-before-after" +regex = '(?mR)^$' +haystack = "\r\n" +matches = [[0, 0], [2, 2]] + +# Tests that a CRLF-aware '^' assertion does not split a line terminator. +[[test]] +name = "start-no-split" +regex = '(?mR)^' +haystack = "abc\r\ndef\r\nxyz" +matches = [[0, 0], [5, 5], [10, 10]] + +# Same as above, but with adjacent runs of line terminators. +[[test]] +name = "start-no-split-adjacent" +regex = '(?mR)^' +haystack = "\r\n\r\n\r\n" +matches = [[0, 0], [2, 2], [4, 4], [6, 6]] + +# Same as above, but with adjacent runs of just carriage returns. +[[test]] +name = "start-no-split-adjacent-cr" +regex = '(?mR)^' +haystack = "\r\r\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +# Same as above, but with adjacent runs of just line feeds. +[[test]] +name = "start-no-split-adjacent-lf" +regex = '(?mR)^' +haystack = "\n\n\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +# Tests that a CRLF-aware '$' assertion does not split a line terminator. +[[test]] +name = "end-no-split" +regex = '(?mR)$' +haystack = "abc\r\ndef\r\nxyz" +matches = [[3, 3], [8, 8], [13, 13]] + +# Same as above, but with adjacent runs of line terminators. +[[test]] +name = "end-no-split-adjacent" +regex = '(?mR)$' +haystack = "\r\n\r\n\r\n" +matches = [[0, 0], [2, 2], [4, 4], [6, 6]] + +# Same as above, but with adjacent runs of just carriage returns. +[[test]] +name = "end-no-split-adjacent-cr" +regex = '(?mR)$' +haystack = "\r\r\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +# Same as above, but with adjacent runs of just line feeds. +[[test]] +name = "end-no-split-adjacent-lf" +regex = '(?mR)$' +haystack = "\n\n\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +# Tests that '.' does not match either \r or \n when CRLF mode is enabled. Note +# that this doesn't require multi-line mode to be enabled. +[[test]] +name = "dot-no-crlf" +regex = '(?R).' +haystack = "\r\n\r\n\r\n" +matches = [] + +# This is a test that caught a bug in the one-pass DFA where it (amazingly) was +# using 'is_end_lf' instead of 'is_end_crlf' here. It was probably a copy & +# paste bug. We insert an empty capture group here because it provokes the meta +# regex engine to first find a match and then trip over a panic because the +# one-pass DFA erroneously says there is no match. +[[test]] +name = "onepass-wrong-crlf-with-capture" +regex = '(?Rm:().$)' +haystack = "ZZ\r" +matches = [[[1, 2], [1, 1]]] + +# This is like onepass-wrong-crlf-with-capture above, except it sets up the +# test so that it can be run by the one-pass DFA directly. (i.e., Make it +# anchored and start the search at the right place.) +[[test]] +name = "onepass-wrong-crlf-anchored" +regex = '(?Rm:.$)' +haystack = "ZZ\r" +matches = [[1, 2]] +anchored = true +bounds = [1, 3] diff --git a/third_party/rust/regex/testdata/earliest.toml b/third_party/rust/regex/testdata/earliest.toml new file mode 100644 index 0000000000..951689358e --- /dev/null +++ b/third_party/rust/regex/testdata/earliest.toml @@ -0,0 +1,52 @@ +[[test]] +name = "no-greedy-100" +regex = 'a+' +haystack = "aaa" +matches = [[0, 1], [1, 2], [2, 3]] +search-kind = "earliest" + +[[test]] +name = "no-greedy-200" +regex = 'abc+' +haystack = "zzzabccc" +matches = [[3, 6]] +search-kind = "earliest" + +[[test]] +name = "is-ungreedy" +regex = 'a+?' +haystack = "aaa" +matches = [[0, 1], [1, 2], [2, 3]] +search-kind = "earliest" + +[[test]] +name = "look-start-test" +regex = '^(abc|a)' +haystack = "abc" +matches = [ + [[0, 1], [0, 1]], +] +search-kind = "earliest" + +[[test]] +name = "look-end-test" +regex = '(abc|a)$' +haystack = "abc" +matches = [ + [[0, 3], [0, 3]], +] +search-kind = "earliest" + +[[test]] +name = "no-leftmost-first-100" +regex = 'abc|a' +haystack = "abc" +matches = [[0, 1]] +search-kind = "earliest" + +[[test]] +name = "no-leftmost-first-200" +regex = 'aba|a' +haystack = "aba" +matches = [[0, 1], [2, 3]] +search-kind = "earliest" diff --git a/third_party/rust/regex/testdata/empty.toml b/third_party/rust/regex/testdata/empty.toml new file mode 100644 index 0000000000..7dfd8027a4 --- /dev/null +++ b/third_party/rust/regex/testdata/empty.toml @@ -0,0 +1,113 @@ +[[test]] +name = "100" +regex = "|b" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "110" +regex = "b|" +haystack = "abc" +matches = [[0, 0], [1, 2], [3, 3]] + +[[test]] +name = "120" +regex = "|z" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "130" +regex = "z|" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "200" +regex = "|" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "210" +regex = "||" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "220" +regex = "||b" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "230" +regex = "b||" +haystack = "abc" +matches = [[0, 0], [1, 2], [3, 3]] + +[[test]] +name = "240" +regex = "||z" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "300" +regex = "(?:)|b" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "310" +regex = "b|(?:)" +haystack = "abc" +matches = [[0, 0], [1, 2], [3, 3]] + +[[test]] +name = "320" +regex = "(?:|)" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "330" +regex = "(?:|)|z" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "400" +regex = "a(?:)|b" +haystack = "abc" +matches = [[0, 1], [1, 2]] + +[[test]] +name = "500" +regex = "" +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "510" +regex = "" +haystack = "a" +matches = [[0, 0], [1, 1]] + +[[test]] +name = "520" +regex = "" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "600" +regex = '(?:|a)*' +haystack = "aaa" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "610" +regex = '(?:|a)+' +haystack = "aaa" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] diff --git a/third_party/rust/regex/testdata/expensive.toml b/third_party/rust/regex/testdata/expensive.toml new file mode 100644 index 0000000000..b70e42f9bb --- /dev/null +++ b/third_party/rust/regex/testdata/expensive.toml @@ -0,0 +1,23 @@ +# This file represent tests that may be expensive to run on some regex engines. +# For example, tests that build a full DFA ahead of time and minimize it can +# take a horrendously long time on regexes that are large (or result in an +# explosion in the number of states). We group these tests together so that +# such engines can simply skip these tests. + +# See: https://github.com/rust-lang/regex/issues/98 +[[test]] +name = "regression-many-repeat-no-stack-overflow" +regex = '^.{1,2500}' +haystack = "a" +matches = [[0, 1]] + +# This test is meant to blow the bounded backtracker's visited capacity. In +# order to do that, we need a somewhat sizeable regex. The purpose of this +# is to make sure there's at least one test that exercises this path in the +# backtracker. All other tests (at time of writing) are small enough that the +# backtracker can handle them fine. +[[test]] +name = "backtrack-blow-visited-capacity" +regex = '\pL{50}' +haystack = "abcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyZZ" +matches = [[0, 50], [50, 100], [100, 150]] diff --git a/third_party/rust/regex/testdata/flags.toml b/third_party/rust/regex/testdata/flags.toml new file mode 100644 index 0000000000..30b412ca65 --- /dev/null +++ b/third_party/rust/regex/testdata/flags.toml @@ -0,0 +1,68 @@ +[[test]] +name = "1" +regex = "(?i)abc" +haystack = "ABC" +matches = [[0, 3]] + +[[test]] +name = "2" +regex = "(?i)a(?-i)bc" +haystack = "Abc" +matches = [[0, 3]] + +[[test]] +name = "3" +regex = "(?i)a(?-i)bc" +haystack = "ABC" +matches = [] + +[[test]] +name = "4" +regex = "(?is)a." +haystack = "A\n" +matches = [[0, 2]] + +[[test]] +name = "5" +regex = "(?is)a.(?-is)a." +haystack = "A\nab" +matches = [[0, 4]] + +[[test]] +name = "6" +regex = "(?is)a.(?-is)a." +haystack = "A\na\n" +matches = [] + +[[test]] +name = "7" +regex = "(?is)a.(?-is:a.)?" +haystack = "A\na\n" +matches = [[0, 2]] +match-limit = 1 + +[[test]] +name = "8" +regex = "(?U)a+" +haystack = "aa" +matches = [[0, 1]] +match-limit = 1 + +[[test]] +name = "9" +regex = "(?U)a+?" +haystack = "aa" +matches = [[0, 2]] + +[[test]] +name = "10" +regex = "(?U)(?-U)a+" +haystack = "aa" +matches = [[0, 2]] + +[[test]] +name = "11" +regex = '(?m)(?:^\d+$\n?)+' +haystack = "123\n456\n789" +matches = [[0, 11]] +unicode = false diff --git a/third_party/rust/regex/testdata/fowler/basic.toml b/third_party/rust/regex/testdata/fowler/basic.toml new file mode 100644 index 0000000000..92b4e4cf72 --- /dev/null +++ b/third_party/rust/regex/testdata/fowler/basic.toml @@ -0,0 +1,1611 @@ +# !!! DO NOT EDIT !!! +# Automatically generated by 'regex-cli generate fowler'. +# Numbers in the test names correspond to the line number of the test from +# the original dat file. + +[[test]] +name = "basic3" +regex = '''abracadabra$''' +haystack = '''abracadabracadabra''' +matches = [[[7, 18]]] +match-limit = 1 + +[[test]] +name = "basic4" +regex = '''a...b''' +haystack = '''abababbb''' +matches = [[[2, 7]]] +match-limit = 1 + +[[test]] +name = "basic5" +regex = '''XXXXXX''' +haystack = '''..XXXXXX''' +matches = [[[2, 8]]] +match-limit = 1 + +[[test]] +name = "basic6" +regex = '''\)''' +haystack = '''()''' +matches = [[[1, 2]]] +match-limit = 1 + +[[test]] +name = "basic7" +regex = '''a]''' +haystack = '''a]a''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic9" +regex = '''\}''' +haystack = '''}''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic10" +regex = '''\]''' +haystack = ''']''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic12" +regex = ''']''' +haystack = ''']''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic15" +regex = '''^a''' +haystack = '''ax''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic16" +regex = '''\^a''' +haystack = '''a^a''' +matches = [[[1, 3]]] +match-limit = 1 + +[[test]] +name = "basic17" +regex = '''a\^''' +haystack = '''a^''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic18" +regex = '''a$''' +haystack = '''aa''' +matches = [[[1, 2]]] +match-limit = 1 + +[[test]] +name = "basic19" +regex = '''a\$''' +haystack = '''a$''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic20" +regex = '''^$''' +haystack = '''''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic21" +regex = '''$^''' +haystack = '''''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic22" +regex = '''a($)''' +haystack = '''aa''' +matches = [[[1, 2], [2, 2]]] +match-limit = 1 + +[[test]] +name = "basic23" +regex = '''a*(^a)''' +haystack = '''aa''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic24" +regex = '''(..)*(...)*''' +haystack = '''a''' +matches = [[[0, 0], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic25" +regex = '''(..)*(...)*''' +haystack = '''abcd''' +matches = [[[0, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic26" +regex = '''(ab|a)(bc|c)''' +haystack = '''abc''' +matches = [[[0, 3], [0, 2], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic27" +regex = '''(ab)c|abc''' +haystack = '''abc''' +matches = [[[0, 3], [0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic28" +regex = '''a{0}b''' +haystack = '''ab''' +matches = [[[1, 2]]] +match-limit = 1 + +[[test]] +name = "basic29" +regex = '''(a*)(b?)(b+)b{3}''' +haystack = '''aaabbbbbbb''' +matches = [[[0, 10], [0, 3], [3, 4], [4, 7]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic30" +regex = '''(a*)(b{0,1})(b{1,})b{3}''' +haystack = '''aaabbbbbbb''' +matches = [[[0, 10], [0, 3], [3, 4], [4, 7]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic32" +regex = '''((a|a)|a)''' +haystack = '''a''' +matches = [[[0, 1], [0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic33" +regex = '''(a*)(a|aa)''' +haystack = '''aaaa''' +matches = [[[0, 4], [0, 3], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic34" +regex = '''a*(a.|aa)''' +haystack = '''aaaa''' +matches = [[[0, 4], [2, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic35" +regex = '''a(b)|c(d)|a(e)f''' +haystack = '''aef''' +matches = [[[0, 3], [], [], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic36" +regex = '''(a|b)?.*''' +haystack = '''b''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic37" +regex = '''(a|b)c|a(b|c)''' +haystack = '''ac''' +matches = [[[0, 2], [0, 1], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic38" +regex = '''(a|b)c|a(b|c)''' +haystack = '''ab''' +matches = [[[0, 2], [], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic39" +regex = '''(a|b)*c|(a|ab)*c''' +haystack = '''abc''' +matches = [[[0, 3], [1, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic40" +regex = '''(a|b)*c|(a|ab)*c''' +haystack = '''xc''' +matches = [[[1, 2], [], []]] +match-limit = 1 + +[[test]] +name = "basic41" +regex = '''(.a|.b).*|.*(.a|.b)''' +haystack = '''xa''' +matches = [[[0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic42" +regex = '''a?(ab|ba)ab''' +haystack = '''abab''' +matches = [[[0, 4], [0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic43" +regex = '''a?(ac{0}b|ba)ab''' +haystack = '''abab''' +matches = [[[0, 4], [0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic44" +regex = '''ab|abab''' +haystack = '''abbabab''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic45" +regex = '''aba|bab|bba''' +haystack = '''baaabbbaba''' +matches = [[[5, 8]]] +match-limit = 1 + +[[test]] +name = "basic46" +regex = '''aba|bab''' +haystack = '''baaabbbaba''' +matches = [[[6, 9]]] +match-limit = 1 + +[[test]] +name = "basic47" +regex = '''(aa|aaa)*|(a|aaaaa)''' +haystack = '''aa''' +matches = [[[0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic48" +regex = '''(a.|.a.)*|(a|.a...)''' +haystack = '''aa''' +matches = [[[0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic49" +regex = '''ab|a''' +haystack = '''xabc''' +matches = [[[1, 3]]] +match-limit = 1 + +[[test]] +name = "basic50" +regex = '''ab|a''' +haystack = '''xxabc''' +matches = [[[2, 4]]] +match-limit = 1 + +[[test]] +name = "basic51" +regex = '''(Ab|cD)*''' +haystack = '''aBcD''' +matches = [[[0, 4], [2, 4]]] +match-limit = 1 +anchored = true +case-insensitive = true + +[[test]] +name = "basic52" +regex = '''[^-]''' +haystack = '''--a''' +matches = [[[2, 3]]] +match-limit = 1 + +[[test]] +name = "basic53" +regex = '''[a-]*''' +haystack = '''--a''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic54" +regex = '''[a-m-]*''' +haystack = '''--amoma--''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic55" +regex = ''':::1:::0:|:::1:1:0:''' +haystack = ''':::0:::1:::1:::0:''' +matches = [[[8, 17]]] +match-limit = 1 + +[[test]] +name = "basic56" +regex = ''':::1:::0:|:::1:1:1:''' +haystack = ''':::0:::1:::1:::0:''' +matches = [[[8, 17]]] +match-limit = 1 + +[[test]] +name = "basic57" +regex = '''[[:upper:]]''' +haystack = '''A''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic58" +regex = '''[[:lower:]]+''' +haystack = '''`az{''' +matches = [[[1, 3]]] +match-limit = 1 + +[[test]] +name = "basic59" +regex = '''[[:upper:]]+''' +haystack = '''@AZ[''' +matches = [[[1, 3]]] +match-limit = 1 + +[[test]] +name = "basic65" +regex = '''\n''' +haystack = '''\n''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true +unescape = true + +[[test]] +name = "basic66" +regex = '''\n''' +haystack = '''\n''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true +unescape = true + +[[test]] +name = "basic67" +regex = '''[^a]''' +haystack = '''\n''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true +unescape = true + +[[test]] +name = "basic68" +regex = '''\na''' +haystack = '''\na''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true +unescape = true + +[[test]] +name = "basic69" +regex = '''(a)(b)(c)''' +haystack = '''abc''' +matches = [[[0, 3], [0, 1], [1, 2], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic70" +regex = '''xxx''' +haystack = '''xxx''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "basic72" +regex = '''(?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$)''' +haystack = '''feb 6,''' +matches = [[[0, 6]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "basic74" +regex = '''(?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$)''' +haystack = '''2/7''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "basic76" +regex = '''(?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$)''' +haystack = '''feb 1,Feb 6''' +matches = [[[5, 11]]] +match-limit = 1 + +# Test added by Rust regex project. +[[test]] +name = "basic78" +regex = '''(((?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:x))))))))))))))))))))))))))))))''' +haystack = '''x''' +matches = [[[0, 1], [0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "basic80" +regex = '''(((?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:x))))))))))))))))))))))))))))))*''' +haystack = '''xx''' +matches = [[[0, 2], [1, 2], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic81" +regex = '''a?(ab|ba)*''' +haystack = '''ababababababababababababababababababababababababababababababababababababababababa''' +matches = [[[0, 81], [79, 81]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic82" +regex = '''abaa|abbaa|abbbaa|abbbbaa''' +haystack = '''ababbabbbabbbabbbbabbbbaa''' +matches = [[[18, 25]]] +match-limit = 1 + +[[test]] +name = "basic83" +regex = '''abaa|abbaa|abbbaa|abbbbaa''' +haystack = '''ababbabbbabbbabbbbabaa''' +matches = [[[18, 22]]] +match-limit = 1 + +[[test]] +name = "basic84" +regex = '''aaac|aabc|abac|abbc|baac|babc|bbac|bbbc''' +haystack = '''baaabbbabac''' +matches = [[[7, 11]]] +match-limit = 1 + +# Test added by Rust regex project. +[[test]] +name = "basic86" +regex = '''.*''' +haystack = '''\x01\x7f''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true +unescape = true + +[[test]] +name = "basic87" +regex = '''aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll''' +haystack = '''XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa''' +matches = [[[53, 57]]] +match-limit = 1 + +[[test]] +name = "basic89" +regex = '''a*a*a*a*a*b''' +haystack = '''aaaaaaaaab''' +matches = [[[0, 10]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic90" +regex = '''^''' +haystack = '''''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic91" +regex = '''$''' +haystack = '''''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic92" +regex = '''^$''' +haystack = '''''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic93" +regex = '''^a$''' +haystack = '''a''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic94" +regex = '''abc''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic95" +regex = '''abc''' +haystack = '''xabcy''' +matches = [[[1, 4]]] +match-limit = 1 + +[[test]] +name = "basic96" +regex = '''abc''' +haystack = '''ababc''' +matches = [[[2, 5]]] +match-limit = 1 + +[[test]] +name = "basic97" +regex = '''ab*c''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic98" +regex = '''ab*bc''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic99" +regex = '''ab*bc''' +haystack = '''abbc''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic100" +regex = '''ab*bc''' +haystack = '''abbbbc''' +matches = [[[0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic101" +regex = '''ab+bc''' +haystack = '''abbc''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic102" +regex = '''ab+bc''' +haystack = '''abbbbc''' +matches = [[[0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic103" +regex = '''ab?bc''' +haystack = '''abbc''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic104" +regex = '''ab?bc''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic105" +regex = '''ab?c''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic106" +regex = '''^abc$''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic107" +regex = '''^abc''' +haystack = '''abcc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic108" +regex = '''abc$''' +haystack = '''aabc''' +matches = [[[1, 4]]] +match-limit = 1 + +[[test]] +name = "basic109" +regex = '''^''' +haystack = '''abc''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic110" +regex = '''$''' +haystack = '''abc''' +matches = [[[3, 3]]] +match-limit = 1 + +[[test]] +name = "basic111" +regex = '''a.c''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic112" +regex = '''a.c''' +haystack = '''axc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic113" +regex = '''a.*c''' +haystack = '''axyzc''' +matches = [[[0, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic114" +regex = '''a[bc]d''' +haystack = '''abd''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic115" +regex = '''a[b-d]e''' +haystack = '''ace''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic116" +regex = '''a[b-d]''' +haystack = '''aac''' +matches = [[[1, 3]]] +match-limit = 1 + +[[test]] +name = "basic117" +regex = '''a[-b]''' +haystack = '''a-''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic118" +regex = '''a[b-]''' +haystack = '''a-''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic119" +regex = '''a]''' +haystack = '''a]''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic120" +regex = '''a[]]b''' +haystack = '''a]b''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic121" +regex = '''a[^bc]d''' +haystack = '''aed''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic122" +regex = '''a[^-b]c''' +haystack = '''adc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic123" +regex = '''a[^]b]c''' +haystack = '''adc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic124" +regex = '''ab|cd''' +haystack = '''abc''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic125" +regex = '''ab|cd''' +haystack = '''abcd''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic126" +regex = '''a\(b''' +haystack = '''a(b''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic127" +regex = '''a\(*b''' +haystack = '''ab''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic128" +regex = '''a\(*b''' +haystack = '''a((b''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic129" +regex = '''((a))''' +haystack = '''abc''' +matches = [[[0, 1], [0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic130" +regex = '''(a)b(c)''' +haystack = '''abc''' +matches = [[[0, 3], [0, 1], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic131" +regex = '''a+b+c''' +haystack = '''aabbabc''' +matches = [[[4, 7]]] +match-limit = 1 + +[[test]] +name = "basic132" +regex = '''a*''' +haystack = '''aaa''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic133" +regex = '''(a*)*''' +haystack = '''-''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic134" +regex = '''(a*)+''' +haystack = '''-''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic135" +regex = '''(a*|b)*''' +haystack = '''-''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic136" +regex = '''(a+|b)*''' +haystack = '''ab''' +matches = [[[0, 2], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic137" +regex = '''(a+|b)+''' +haystack = '''ab''' +matches = [[[0, 2], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic138" +regex = '''(a+|b)?''' +haystack = '''ab''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic139" +regex = '''[^ab]*''' +haystack = '''cde''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic140" +regex = '''(^)*''' +haystack = '''-''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic141" +regex = '''a*''' +haystack = '''''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic142" +regex = '''([abc])*d''' +haystack = '''abbbcd''' +matches = [[[0, 6], [4, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic143" +regex = '''([abc])*bcd''' +haystack = '''abcd''' +matches = [[[0, 4], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic144" +regex = '''a|b|c|d|e''' +haystack = '''e''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic145" +regex = '''(a|b|c|d|e)f''' +haystack = '''ef''' +matches = [[[0, 2], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic146" +regex = '''((a*|b))*''' +haystack = '''-''' +matches = [[[0, 0], [0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic147" +regex = '''abcd*efg''' +haystack = '''abcdefg''' +matches = [[[0, 7]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic148" +regex = '''ab*''' +haystack = '''xabyabbbz''' +matches = [[[1, 3]]] +match-limit = 1 + +[[test]] +name = "basic149" +regex = '''ab*''' +haystack = '''xayabbbz''' +matches = [[[1, 2]]] +match-limit = 1 + +[[test]] +name = "basic150" +regex = '''(ab|cd)e''' +haystack = '''abcde''' +matches = [[[2, 5], [2, 4]]] +match-limit = 1 + +[[test]] +name = "basic151" +regex = '''[abhgefdc]ij''' +haystack = '''hij''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic152" +regex = '''(a|b)c*d''' +haystack = '''abcd''' +matches = [[[1, 4], [1, 2]]] +match-limit = 1 + +[[test]] +name = "basic153" +regex = '''(ab|ab*)bc''' +haystack = '''abc''' +matches = [[[0, 3], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic154" +regex = '''a([bc]*)c*''' +haystack = '''abc''' +matches = [[[0, 3], [1, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic155" +regex = '''a([bc]*)(c*d)''' +haystack = '''abcd''' +matches = [[[0, 4], [1, 3], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic156" +regex = '''a([bc]+)(c*d)''' +haystack = '''abcd''' +matches = [[[0, 4], [1, 3], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic157" +regex = '''a([bc]*)(c+d)''' +haystack = '''abcd''' +matches = [[[0, 4], [1, 2], [2, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic158" +regex = '''a[bcd]*dcdcde''' +haystack = '''adcdcde''' +matches = [[[0, 7]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic159" +regex = '''(ab|a)b*c''' +haystack = '''abc''' +matches = [[[0, 3], [0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic160" +regex = '''((a)(b)c)(d)''' +haystack = '''abcd''' +matches = [[[0, 4], [0, 3], [0, 1], [1, 2], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic161" +regex = '''[A-Za-z_][A-Za-z0-9_]*''' +haystack = '''alpha''' +matches = [[[0, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic162" +regex = '''^a(bc+|b[eh])g|.h$''' +haystack = '''abh''' +matches = [[[1, 3], []]] +match-limit = 1 + +[[test]] +name = "basic163" +regex = '''(bc+d$|ef*g.|h?i(j|k))''' +haystack = '''effgz''' +matches = [[[0, 5], [0, 5], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic164" +regex = '''(bc+d$|ef*g.|h?i(j|k))''' +haystack = '''ij''' +matches = [[[0, 2], [0, 2], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic165" +regex = '''(bc+d$|ef*g.|h?i(j|k))''' +haystack = '''reffgz''' +matches = [[[1, 6], [1, 6], []]] +match-limit = 1 + +[[test]] +name = "basic166" +regex = '''(((((((((a)))))))))''' +haystack = '''a''' +matches = [[[0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic167" +regex = '''multiple words''' +haystack = '''multiple words yeah''' +matches = [[[0, 14]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic168" +regex = '''(.*)c(.*)''' +haystack = '''abcde''' +matches = [[[0, 5], [0, 2], [3, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic169" +regex = '''abcd''' +haystack = '''abcd''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic170" +regex = '''a(bc)d''' +haystack = '''abcd''' +matches = [[[0, 4], [1, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic171" +regex = '''a[\x01-\x03]?c''' +haystack = '''a\x02c''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true +unescape = true + +[[test]] +name = "basic172" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Qaddafi''' +matches = [[[0, 15], [], [10, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic173" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Mo'ammar Gadhafi''' +matches = [[[0, 16], [], [11, 13]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic174" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Kaddafi''' +matches = [[[0, 15], [], [10, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic175" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Qadhafi''' +matches = [[[0, 15], [], [10, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic176" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Gadafi''' +matches = [[[0, 14], [], [10, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic177" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Mu'ammar Qadafi''' +matches = [[[0, 15], [], [11, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic178" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Moamar Gaddafi''' +matches = [[[0, 14], [], [9, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic179" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Mu'ammar Qadhdhafi''' +matches = [[[0, 18], [], [13, 15]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic180" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Khaddafi''' +matches = [[[0, 16], [], [11, 13]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic181" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Ghaddafy''' +matches = [[[0, 16], [], [11, 13]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic182" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Ghadafi''' +matches = [[[0, 15], [], [11, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic183" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Ghaddafi''' +matches = [[[0, 16], [], [11, 13]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic184" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muamar Kaddafi''' +matches = [[[0, 14], [], [9, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic185" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Quathafi''' +matches = [[[0, 16], [], [11, 13]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic186" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Gheddafi''' +matches = [[[0, 16], [], [11, 13]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic187" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Moammar Khadafy''' +matches = [[[0, 15], [], [11, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic188" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Moammar Qudhafi''' +matches = [[[0, 15], [], [10, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic189" +regex = '''a+(b|c)*d+''' +haystack = '''aabcdd''' +matches = [[[0, 6], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic190" +regex = '''^.+$''' +haystack = '''vivi''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic191" +regex = '''^(.+)$''' +haystack = '''vivi''' +matches = [[[0, 4], [0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic192" +regex = '''^([^!.]+).att.com!(.+)$''' +haystack = '''gryphon.att.com!eby''' +matches = [[[0, 19], [0, 7], [16, 19]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic193" +regex = '''^([^!]+!)?([^!]+)$''' +haystack = '''bas''' +matches = [[[0, 3], [], [0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic194" +regex = '''^([^!]+!)?([^!]+)$''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 4], [4, 7]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic195" +regex = '''^([^!]+!)?([^!]+)$''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 4], [4, 7]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic196" +regex = '''^.+!([^!]+!)([^!]+)$''' +haystack = '''foo!bar!bas''' +matches = [[[0, 11], [4, 8], [8, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic197" +regex = '''((foo)|(bar))!bas''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 3], [], [0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic198" +regex = '''((foo)|(bar))!bas''' +haystack = '''foo!bar!bas''' +matches = [[[4, 11], [4, 7], [], [4, 7]]] +match-limit = 1 + +[[test]] +name = "basic199" +regex = '''((foo)|(bar))!bas''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 3], [0, 3], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic200" +regex = '''((foo)|bar)!bas''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 3], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic201" +regex = '''((foo)|bar)!bas''' +haystack = '''foo!bar!bas''' +matches = [[[4, 11], [4, 7], []]] +match-limit = 1 + +[[test]] +name = "basic202" +regex = '''((foo)|bar)!bas''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 3], [0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic203" +regex = '''(foo|(bar))!bas''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 3], [0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic204" +regex = '''(foo|(bar))!bas''' +haystack = '''foo!bar!bas''' +matches = [[[4, 11], [4, 7], [4, 7]]] +match-limit = 1 + +[[test]] +name = "basic205" +regex = '''(foo|(bar))!bas''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 3], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic206" +regex = '''(foo|bar)!bas''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic207" +regex = '''(foo|bar)!bas''' +haystack = '''foo!bar!bas''' +matches = [[[4, 11], [4, 7]]] +match-limit = 1 + +[[test]] +name = "basic208" +regex = '''(foo|bar)!bas''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic209" +regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' +haystack = '''foo!bar!bas''' +matches = [[[0, 11], [0, 11], [], [], [4, 8], [8, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic210" +regex = '''^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$''' +haystack = '''bas''' +matches = [[[0, 3], [], [0, 3], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic211" +regex = '''^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 4], [4, 7], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic212" +regex = '''^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$''' +haystack = '''foo!bar!bas''' +matches = [[[0, 11], [], [], [4, 8], [8, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic213" +regex = '''^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 4], [4, 7], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic214" +regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' +haystack = '''bas''' +matches = [[[0, 3], [0, 3], [], [0, 3], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic215" +regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 7], [0, 4], [4, 7], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic216" +regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' +haystack = '''foo!bar!bas''' +matches = [[[0, 11], [0, 11], [], [], [4, 8], [8, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic217" +regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 7], [0, 4], [4, 7], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic218" +regex = '''.*(/XXX).*''' +haystack = '''/XXX''' +matches = [[[0, 4], [0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic219" +regex = '''.*(\\XXX).*''' +haystack = '''\XXX''' +matches = [[[0, 4], [0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic220" +regex = '''\\XXX''' +haystack = '''\XXX''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic221" +regex = '''.*(/000).*''' +haystack = '''/000''' +matches = [[[0, 4], [0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic222" +regex = '''.*(\\000).*''' +haystack = '''\000''' +matches = [[[0, 4], [0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic223" +regex = '''\\000''' +haystack = '''\000''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + diff --git a/third_party/rust/regex/testdata/fowler/dat/README b/third_party/rust/regex/testdata/fowler/dat/README new file mode 100644 index 0000000000..242a0e6c3a --- /dev/null +++ b/third_party/rust/regex/testdata/fowler/dat/README @@ -0,0 +1,25 @@ +Test data was taken from the Go distribution, which was in turn taken from the +testregex test suite: + + http://web.archive.org/web/20150925124103/http://www2.research.att.com/~astopen/testregex/testregex.html + +Unfortunately, the original web site now appears dead, but the test data lives +on. + +The LICENSE in this directory corresponds to the LICENSE that the data was +originally released under. + +The tests themselves were modified for RE2/Go (and marked as such). A +couple were modified further by me (Andrew Gallant) and marked with 'Rust'. + +After some number of years, these tests were transformed into a TOML format +using the 'regex-cli generate fowler' command. To re-generate the +TOML files, run the following from the root of this repository: + + regex-cli generate fowler tests/data/fowler tests/data/fowler/dat/*.dat + +This assumes that you have 'regex-cli' installed. See 'regex-cli/README.md' +from the root of the repository for more information. + +This brings the Fowler tests into a more "sensible" structured format in which +other tests can be written such that they aren't write-only. diff --git a/third_party/rust/regex/testdata/fowler/dat/basic.dat b/third_party/rust/regex/testdata/fowler/dat/basic.dat new file mode 100644 index 0000000000..654a72b39b --- /dev/null +++ b/third_party/rust/regex/testdata/fowler/dat/basic.dat @@ -0,0 +1,223 @@ +NOTE all standard compliant implementations should pass these : 2002-05-31 + +BE abracadabra$ abracadabracadabra (7,18) +BE a...b abababbb (2,7) +BE XXXXXX ..XXXXXX (2,8) +E \) () (1,2) +BE a] a]a (0,2) +B } } (0,1) +E \} } (0,1) +BE \] ] (0,1) +B ] ] (0,1) +E ] ] (0,1) +B { { (0,1) +B } } (0,1) +BE ^a ax (0,1) +BE \^a a^a (1,3) +BE a\^ a^ (0,2) +BE a$ aa (1,2) +BE a\$ a$ (0,2) +BE ^$ NULL (0,0) +E $^ NULL (0,0) +E a($) aa (1,2)(2,2) +E a*(^a) aa (0,1)(0,1) +E (..)*(...)* a (0,0) +E (..)*(...)* abcd (0,4)(2,4) +E (ab|a)(bc|c) abc (0,3)(0,2)(2,3) +E (ab)c|abc abc (0,3)(0,2) +E a{0}b ab (1,2) +E (a*)(b?)(b+)b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7) +E (a*)(b{0,1})(b{1,})b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7) +E a{9876543210} NULL BADBR +E ((a|a)|a) a (0,1)(0,1)(0,1) +E (a*)(a|aa) aaaa (0,4)(0,3)(3,4) +E a*(a.|aa) aaaa (0,4)(2,4) +E a(b)|c(d)|a(e)f aef (0,3)(?,?)(?,?)(1,2) +E (a|b)?.* b (0,1)(0,1) +E (a|b)c|a(b|c) ac (0,2)(0,1) +E (a|b)c|a(b|c) ab (0,2)(?,?)(1,2) +E (a|b)*c|(a|ab)*c abc (0,3)(1,2) +E (a|b)*c|(a|ab)*c xc (1,2) +E (.a|.b).*|.*(.a|.b) xa (0,2)(0,2) +E a?(ab|ba)ab abab (0,4)(0,2) +E a?(ac{0}b|ba)ab abab (0,4)(0,2) +E ab|abab abbabab (0,2) +E aba|bab|bba baaabbbaba (5,8) +E aba|bab baaabbbaba (6,9) +E (aa|aaa)*|(a|aaaaa) aa (0,2)(0,2) +E (a.|.a.)*|(a|.a...) aa (0,2)(0,2) +E ab|a xabc (1,3) +E ab|a xxabc (2,4) +Ei (Ab|cD)* aBcD (0,4)(2,4) +BE [^-] --a (2,3) +BE [a-]* --a (0,3) +BE [a-m-]* --amoma-- (0,4) +E :::1:::0:|:::1:1:0: :::0:::1:::1:::0: (8,17) +E :::1:::0:|:::1:1:1: :::0:::1:::1:::0: (8,17) +{E [[:upper:]] A (0,1) [[<element>]] not supported +E [[:lower:]]+ `az{ (1,3) +E [[:upper:]]+ @AZ[ (1,3) +# No collation in Go +#BE [[-]] [[-]] (2,4) +#BE [[.NIL.]] NULL ECOLLATE +#BE [[=aleph=]] NULL ECOLLATE +} +BE$ \n \n (0,1) +BEn$ \n \n (0,1) +BE$ [^a] \n (0,1) +BE$ \na \na (0,2) +E (a)(b)(c) abc (0,3)(0,1)(1,2)(2,3) +BE xxx xxx (0,3) +#E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 6, (0,6) +E (?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$) feb 6, (0,6) Rust +#E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) 2/7 (0,3) +E (?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$) 2/7 (0,3) Rust +#E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 1,Feb 6 (5,11) +E (?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$) feb 1,Feb 6 (5,11) Rust +#E3 ((((((((((((((((((((((((((((((x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1) +E (((?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1) Rust +#E3 ((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2) +E (((?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2) Rust +E a?(ab|ba)* ababababababababababababababababababababababababababababababababababababababababa (0,81)(79,81) +E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabbbbaa (18,25) +E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabaa (18,22) +E aaac|aabc|abac|abbc|baac|babc|bbac|bbbc baaabbbabac (7,11) +#BE$ .* \x01\xff (0,2) +BE$ .* \x01\x7f (0,2) Rust +E aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa (53,57) +L aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa NOMATCH +E a*a*a*a*a*b aaaaaaaaab (0,10) +BE ^ NULL (0,0) +BE $ NULL (0,0) +BE ^$ NULL (0,0) +BE ^a$ a (0,1) +BE abc abc (0,3) +BE abc xabcy (1,4) +BE abc ababc (2,5) +BE ab*c abc (0,3) +BE ab*bc abc (0,3) +BE ab*bc abbc (0,4) +BE ab*bc abbbbc (0,6) +E ab+bc abbc (0,4) +E ab+bc abbbbc (0,6) +E ab?bc abbc (0,4) +E ab?bc abc (0,3) +E ab?c abc (0,3) +BE ^abc$ abc (0,3) +BE ^abc abcc (0,3) +BE abc$ aabc (1,4) +BE ^ abc (0,0) +BE $ abc (3,3) +BE a.c abc (0,3) +BE a.c axc (0,3) +BE a.*c axyzc (0,5) +BE a[bc]d abd (0,3) +BE a[b-d]e ace (0,3) +BE a[b-d] aac (1,3) +BE a[-b] a- (0,2) +BE a[b-] a- (0,2) +BE a] a] (0,2) +BE a[]]b a]b (0,3) +BE a[^bc]d aed (0,3) +BE a[^-b]c adc (0,3) +BE a[^]b]c adc (0,3) +E ab|cd abc (0,2) +E ab|cd abcd (0,2) +E a\(b a(b (0,3) +E a\(*b ab (0,2) +E a\(*b a((b (0,4) +E ((a)) abc (0,1)(0,1)(0,1) +E (a)b(c) abc (0,3)(0,1)(2,3) +E a+b+c aabbabc (4,7) +E a* aaa (0,3) +E (a*)* - (0,0)(0,0) +E (a*)+ - (0,0)(0,0) +E (a*|b)* - (0,0)(0,0) +E (a+|b)* ab (0,2)(1,2) +E (a+|b)+ ab (0,2)(1,2) +E (a+|b)? ab (0,1)(0,1) +BE [^ab]* cde (0,3) +E (^)* - (0,0)(0,0) +BE a* NULL (0,0) +E ([abc])*d abbbcd (0,6)(4,5) +E ([abc])*bcd abcd (0,4)(0,1) +E a|b|c|d|e e (0,1) +E (a|b|c|d|e)f ef (0,2)(0,1) +E ((a*|b))* - (0,0)(0,0)(0,0) +BE abcd*efg abcdefg (0,7) +BE ab* xabyabbbz (1,3) +BE ab* xayabbbz (1,2) +E (ab|cd)e abcde (2,5)(2,4) +BE [abhgefdc]ij hij (0,3) +E (a|b)c*d abcd (1,4)(1,2) +E (ab|ab*)bc abc (0,3)(0,1) +E a([bc]*)c* abc (0,3)(1,3) +E a([bc]*)(c*d) abcd (0,4)(1,3)(3,4) +E a([bc]+)(c*d) abcd (0,4)(1,3)(3,4) +E a([bc]*)(c+d) abcd (0,4)(1,2)(2,4) +E a[bcd]*dcdcde adcdcde (0,7) +E (ab|a)b*c abc (0,3)(0,2) +E ((a)(b)c)(d) abcd (0,4)(0,3)(0,1)(1,2)(3,4) +BE [A-Za-z_][A-Za-z0-9_]* alpha (0,5) +E ^a(bc+|b[eh])g|.h$ abh (1,3) +E (bc+d$|ef*g.|h?i(j|k)) effgz (0,5)(0,5) +E (bc+d$|ef*g.|h?i(j|k)) ij (0,2)(0,2)(1,2) +E (bc+d$|ef*g.|h?i(j|k)) reffgz (1,6)(1,6) +E (((((((((a))))))))) a (0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1) +BE multiple words multiple words yeah (0,14) +E (.*)c(.*) abcde (0,5)(0,2)(3,5) +BE abcd abcd (0,4) +E a(bc)d abcd (0,4)(1,3) +E a[-]?c ac (0,3) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qaddafi (0,15)(?,?)(10,12) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mo'ammar Gadhafi (0,16)(?,?)(11,13) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Kaddafi (0,15)(?,?)(10,12) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qadhafi (0,15)(?,?)(10,12) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gadafi (0,14)(?,?)(10,11) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadafi (0,15)(?,?)(11,12) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moamar Gaddafi (0,14)(?,?)(9,11) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadhdhafi (0,18)(?,?)(13,15) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Khaddafi (0,16)(?,?)(11,13) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafy (0,16)(?,?)(11,13) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghadafi (0,15)(?,?)(11,12) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafi (0,16)(?,?)(11,13) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muamar Kaddafi (0,14)(?,?)(9,11) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Quathafi (0,16)(?,?)(11,13) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gheddafi (0,16)(?,?)(11,13) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Khadafy (0,15)(?,?)(11,12) +E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Qudhafi (0,15)(?,?)(10,12) +E a+(b|c)*d+ aabcdd (0,6)(3,4) +E ^.+$ vivi (0,4) +E ^(.+)$ vivi (0,4)(0,4) +E ^([^!.]+).att.com!(.+)$ gryphon.att.com!eby (0,19)(0,7)(16,19) +E ^([^!]+!)?([^!]+)$ bas (0,3)(?,?)(0,3) +E ^([^!]+!)?([^!]+)$ bar!bas (0,7)(0,4)(4,7) +E ^([^!]+!)?([^!]+)$ foo!bas (0,7)(0,4)(4,7) +E ^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(4,8)(8,11) +E ((foo)|(bar))!bas bar!bas (0,7)(0,3)(?,?)(0,3) +E ((foo)|(bar))!bas foo!bar!bas (4,11)(4,7)(?,?)(4,7) +E ((foo)|(bar))!bas foo!bas (0,7)(0,3)(0,3) +E ((foo)|bar)!bas bar!bas (0,7)(0,3) +E ((foo)|bar)!bas foo!bar!bas (4,11)(4,7) +E ((foo)|bar)!bas foo!bas (0,7)(0,3)(0,3) +E (foo|(bar))!bas bar!bas (0,7)(0,3)(0,3) +E (foo|(bar))!bas foo!bar!bas (4,11)(4,7)(4,7) +E (foo|(bar))!bas foo!bas (0,7)(0,3) +E (foo|bar)!bas bar!bas (0,7)(0,3) +E (foo|bar)!bas foo!bar!bas (4,11)(4,7) +E (foo|bar)!bas foo!bas (0,7)(0,3) +E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11) +E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bas (0,3)(?,?)(0,3) +E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bar!bas (0,7)(0,4)(4,7) +E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(?,?)(?,?)(4,8)(8,11) +E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bas (0,7)(0,4)(4,7) +E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bas (0,3)(0,3)(?,?)(0,3) +E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bar!bas (0,7)(0,7)(0,4)(4,7) +E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11) +E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bas (0,7)(0,7)(0,4)(4,7) +E .*(/XXX).* /XXX (0,4)(0,4) +E .*(\\XXX).* \XXX (0,4)(0,4) +E \\XXX \XXX (0,4) +E .*(/000).* /000 (0,4)(0,4) +E .*(\\000).* \000 (0,4)(0,4) +E \\000 \000 (0,4) diff --git a/third_party/rust/regex/testdata/fowler/dat/nullsubexpr.dat b/third_party/rust/regex/testdata/fowler/dat/nullsubexpr.dat new file mode 100644 index 0000000000..a94430649e --- /dev/null +++ b/third_party/rust/regex/testdata/fowler/dat/nullsubexpr.dat @@ -0,0 +1,74 @@ +NOTE null subexpression matches : 2002-06-06 + +E (a*)* a (0,1)(0,1) +E SAME x (0,0)(0,0) +E SAME aaaaaa (0,6)(0,6) +E SAME aaaaaax (0,6)(0,6) +E (a*)+ a (0,1)(0,1) +E SAME x (0,0)(0,0) +E SAME aaaaaa (0,6)(0,6) +E SAME aaaaaax (0,6)(0,6) +E (a+)* a (0,1)(0,1) +E SAME x (0,0) +E SAME aaaaaa (0,6)(0,6) +E SAME aaaaaax (0,6)(0,6) +E (a+)+ a (0,1)(0,1) +E SAME x NOMATCH +E SAME aaaaaa (0,6)(0,6) +E SAME aaaaaax (0,6)(0,6) + +E ([a]*)* a (0,1)(0,1) +E SAME x (0,0)(0,0) +E SAME aaaaaa (0,6)(0,6) +E SAME aaaaaax (0,6)(0,6) +E ([a]*)+ a (0,1)(0,1) +E SAME x (0,0)(0,0) +E SAME aaaaaa (0,6)(0,6) +E SAME aaaaaax (0,6)(0,6) +E ([^b]*)* a (0,1)(0,1) +E SAME b (0,0)(0,0) +E SAME aaaaaa (0,6)(0,6) +E SAME aaaaaab (0,6)(0,6) +E ([ab]*)* a (0,1)(0,1) +E SAME aaaaaa (0,6)(0,6) +E SAME ababab (0,6)(0,6) +E SAME bababa (0,6)(0,6) +E SAME b (0,1)(0,1) +E SAME bbbbbb (0,6)(0,6) +E SAME aaaabcde (0,5)(0,5) +E ([^a]*)* b (0,1)(0,1) +E SAME bbbbbb (0,6)(0,6) +E SAME aaaaaa (0,0)(0,0) +E ([^ab]*)* ccccxx (0,6)(0,6) +E SAME ababab (0,0)(0,0) + +#E ((z)+|a)* zabcde (0,2)(1,2) +E ((z)+|a)* zabcde (0,2)(1,2)(0,1) Rust + +#{E a+? aaaaaa (0,1) no *? +? mimimal match ops +#E (a) aaa (0,1)(0,1) +#E (a*?) aaa (0,0)(0,0) +#E (a)*? aaa (0,0) +#E (a*?)*? aaa (0,0) +#} + +B \(a*\)*\(x\) x (0,1)(0,0)(0,1) +B \(a*\)*\(x\) ax (0,2)(0,1)(1,2) +B \(a*\)*\(x\) axa (0,2)(0,1)(1,2) +B \(a*\)*\(x\)\(\1\) x (0,1)(0,0)(0,1)(1,1) +B \(a*\)*\(x\)\(\1\) ax (0,2)(1,1)(1,2)(2,2) +B \(a*\)*\(x\)\(\1\) axa (0,3)(0,1)(1,2)(2,3) +B \(a*\)*\(x\)\(\1\)\(x\) axax (0,4)(0,1)(1,2)(2,3)(3,4) +B \(a*\)*\(x\)\(\1\)\(x\) axxa (0,3)(1,1)(1,2)(2,2)(2,3) + +E (a*)*(x) x (0,1)(0,0)(0,1) +E (a*)*(x) ax (0,2)(0,1)(1,2) +E (a*)*(x) axa (0,2)(0,1)(1,2) + +E (a*)+(x) x (0,1)(0,0)(0,1) +E (a*)+(x) ax (0,2)(0,1)(1,2) +E (a*)+(x) axa (0,2)(0,1)(1,2) + +E (a*){2}(x) x (0,1)(0,0)(0,1) +E (a*){2}(x) ax (0,2)(1,1)(1,2) +E (a*){2}(x) axa (0,2)(1,1)(1,2) diff --git a/third_party/rust/regex/testdata/fowler/dat/repetition.dat b/third_party/rust/regex/testdata/fowler/dat/repetition.dat new file mode 100644 index 0000000000..cf0d8382f8 --- /dev/null +++ b/third_party/rust/regex/testdata/fowler/dat/repetition.dat @@ -0,0 +1,169 @@ +NOTE implicit vs. explicit repetitions : 2009-02-02 + +# Glenn Fowler <gsf@research.att.com> +# conforming matches (column 4) must match one of the following BREs +# NOMATCH +# (0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)* +# (0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)* +# i.e., each 3-tuple has two identical elements and one (?,?) + +E ((..)|(.)) NULL NOMATCH +E ((..)|(.))((..)|(.)) NULL NOMATCH +E ((..)|(.))((..)|(.))((..)|(.)) NULL NOMATCH + +E ((..)|(.)){1} NULL NOMATCH +E ((..)|(.)){2} NULL NOMATCH +E ((..)|(.)){3} NULL NOMATCH + +E ((..)|(.))* NULL (0,0) + +E ((..)|(.)) a (0,1)(0,1)(?,?)(0,1) +E ((..)|(.))((..)|(.)) a NOMATCH +E ((..)|(.))((..)|(.))((..)|(.)) a NOMATCH + +E ((..)|(.)){1} a (0,1)(0,1)(?,?)(0,1) +E ((..)|(.)){2} a NOMATCH +E ((..)|(.)){3} a NOMATCH + +E ((..)|(.))* a (0,1)(0,1)(?,?)(0,1) + +E ((..)|(.)) aa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.))((..)|(.)) aa (0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2) +E ((..)|(.))((..)|(.))((..)|(.)) aa NOMATCH + +E ((..)|(.)){1} aa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.)){2} aa (0,2)(1,2)(?,?)(1,2) +E ((..)|(.)){3} aa NOMATCH + +E ((..)|(.))* aa (0,2)(0,2)(0,2)(?,?) + +E ((..)|(.)) aaa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.))((..)|(.)) aaa (0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3) +E ((..)|(.))((..)|(.))((..)|(.)) aaa (0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3) + +E ((..)|(.)){1} aaa (0,2)(0,2)(0,2)(?,?) +#E ((..)|(.)){2} aaa (0,3)(2,3)(?,?)(2,3) +E ((..)|(.)){2} aaa (0,3)(2,3)(0,2)(2,3) RE2/Go +E ((..)|(.)){3} aaa (0,3)(2,3)(?,?)(2,3) + +#E ((..)|(.))* aaa (0,3)(2,3)(?,?)(2,3) +E ((..)|(.))* aaa (0,3)(2,3)(0,2)(2,3) RE2/Go + +E ((..)|(.)) aaaa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?) +E ((..)|(.))((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4) + +E ((..)|(.)){1} aaaa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.)){2} aaaa (0,4)(2,4)(2,4)(?,?) +#E ((..)|(.)){3} aaaa (0,4)(3,4)(?,?)(3,4) +E ((..)|(.)){3} aaaa (0,4)(3,4)(0,2)(3,4) RE2/Go + +E ((..)|(.))* aaaa (0,4)(2,4)(2,4)(?,?) + +E ((..)|(.)) aaaaa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.))((..)|(.)) aaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?) +E ((..)|(.))((..)|(.))((..)|(.)) aaaaa (0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5) + +E ((..)|(.)){1} aaaaa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.)){2} aaaaa (0,4)(2,4)(2,4)(?,?) +#E ((..)|(.)){3} aaaaa (0,5)(4,5)(?,?)(4,5) +E ((..)|(.)){3} aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go + +#E ((..)|(.))* aaaaa (0,5)(4,5)(?,?)(4,5) +E ((..)|(.))* aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go + +E ((..)|(.)) aaaaaa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.))((..)|(.)) aaaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?) +E ((..)|(.))((..)|(.))((..)|(.)) aaaaaa (0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?) + +E ((..)|(.)){1} aaaaaa (0,2)(0,2)(0,2)(?,?) +E ((..)|(.)){2} aaaaaa (0,4)(2,4)(2,4)(?,?) +E ((..)|(.)){3} aaaaaa (0,6)(4,6)(4,6)(?,?) + +E ((..)|(.))* aaaaaa (0,6)(4,6)(4,6)(?,?) + +NOTE additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02 + +# These test a bug in OS X / FreeBSD / NetBSD, and libtree. +# Linux/GLIBC gets the {8,} and {8,8} wrong. + +:HA#100:E X(.?){0,}Y X1234567Y (0,9)(7,8) +:HA#101:E X(.?){1,}Y X1234567Y (0,9)(7,8) +:HA#102:E X(.?){2,}Y X1234567Y (0,9)(7,8) +:HA#103:E X(.?){3,}Y X1234567Y (0,9)(7,8) +:HA#104:E X(.?){4,}Y X1234567Y (0,9)(7,8) +:HA#105:E X(.?){5,}Y X1234567Y (0,9)(7,8) +:HA#106:E X(.?){6,}Y X1234567Y (0,9)(7,8) +:HA#107:E X(.?){7,}Y X1234567Y (0,9)(7,8) +:HA#108:E X(.?){8,}Y X1234567Y (0,9)(8,8) +#:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(7,8) +:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(8,8) RE2/Go +#:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(7,8) +:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(8,8) RE2/Go +#:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(7,8) +:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(8,8) RE2/Go +#:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(7,8) +:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(8,8) RE2/Go +#:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(7,8) +:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(8,8) RE2/Go +#:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(7,8) +:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(8,8) RE2/Go +#:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(7,8) +:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(8,8) RE2/Go +#:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(7,8) +:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(8,8) RE2/Go +:HA#118:E X(.?){8,8}Y X1234567Y (0,9)(8,8) + +# These test a fixed bug in my regex-tdfa that did not keep the expanded +# form properly grouped, so right association did the wrong thing with +# these ambiguous patterns (crafted just to test my code when I became +# suspicious of my implementation). The first subexpression should use +# "ab" then "a" then "bcd". + +# OS X / FreeBSD / NetBSD badly fail many of these, with impossible +# results like (0,6)(4,5)(6,6). + +#:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6) +:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,1)(0,1)(1,1) Rust +#:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6) +:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,1)(0,1)(1,1) Rust +:HA#262:E (a|ab|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6) +:HA#263:E (a|ab|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6) +:HA#264:E (a|ab|c|bcd){4,}(d*) ababcd NOMATCH +#:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6) +:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,1)(0,1)(1,1) Rust +#:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6) +:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,1)(0,1)(1,1) Rust +:HA#267:E (a|ab|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6) +:HA#268:E (a|ab|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6) +:HA#269:E (a|ab|c|bcd){4,10}(d*) ababcd NOMATCH +#:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6) +:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,1)(0,1)(1,1) Rust +#:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6) +:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,1)(0,1)(1,1) Rust + +# The above worked on Linux/GLIBC but the following often fail. +# They also trip up OS X / FreeBSD / NetBSD: + +#:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6) +:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +#:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6) +:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +#:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6) +:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +#:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6) +:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +:HA#284:E (ab|a|c|bcd){4,}(d*) ababcd NOMATCH +#:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6) +:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +#:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6) +:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +#:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6) +:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +#:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6) +:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +:HA#289:E (ab|a|c|bcd){4,10}(d*) ababcd NOMATCH +#:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6) +:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(4,5)(5,6) RE2/Go +#:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6) +:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(4,5)(5,6) RE2/Go diff --git a/third_party/rust/regex/testdata/fowler/nullsubexpr.toml b/third_party/rust/regex/testdata/fowler/nullsubexpr.toml new file mode 100644 index 0000000000..2f1f0183ed --- /dev/null +++ b/third_party/rust/regex/testdata/fowler/nullsubexpr.toml @@ -0,0 +1,405 @@ +# !!! DO NOT EDIT !!! +# Automatically generated by 'regex-cli generate fowler'. +# Numbers in the test names correspond to the line number of the test from +# the original dat file. + +[[test]] +name = "nullsubexpr3" +regex = '''(a*)*''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr4" +regex = '''(a*)*''' +haystack = '''x''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr5" +regex = '''(a*)*''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr6" +regex = '''(a*)*''' +haystack = '''aaaaaax''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr7" +regex = '''(a*)+''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr8" +regex = '''(a*)+''' +haystack = '''x''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr9" +regex = '''(a*)+''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr10" +regex = '''(a*)+''' +haystack = '''aaaaaax''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr11" +regex = '''(a+)*''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr12" +regex = '''(a+)*''' +haystack = '''x''' +matches = [[[0, 0], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr13" +regex = '''(a+)*''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr14" +regex = '''(a+)*''' +haystack = '''aaaaaax''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr15" +regex = '''(a+)+''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr16" +regex = '''(a+)+''' +haystack = '''x''' +matches = [] +match-limit = 1 + +[[test]] +name = "nullsubexpr17" +regex = '''(a+)+''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr18" +regex = '''(a+)+''' +haystack = '''aaaaaax''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr20" +regex = '''([a]*)*''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr21" +regex = '''([a]*)*''' +haystack = '''x''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr22" +regex = '''([a]*)*''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr23" +regex = '''([a]*)*''' +haystack = '''aaaaaax''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr24" +regex = '''([a]*)+''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr25" +regex = '''([a]*)+''' +haystack = '''x''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr26" +regex = '''([a]*)+''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr27" +regex = '''([a]*)+''' +haystack = '''aaaaaax''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr28" +regex = '''([^b]*)*''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr29" +regex = '''([^b]*)*''' +haystack = '''b''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr30" +regex = '''([^b]*)*''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr31" +regex = '''([^b]*)*''' +haystack = '''aaaaaab''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr32" +regex = '''([ab]*)*''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr33" +regex = '''([ab]*)*''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr34" +regex = '''([ab]*)*''' +haystack = '''ababab''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr35" +regex = '''([ab]*)*''' +haystack = '''bababa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr36" +regex = '''([ab]*)*''' +haystack = '''b''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr37" +regex = '''([ab]*)*''' +haystack = '''bbbbbb''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr38" +regex = '''([ab]*)*''' +haystack = '''aaaabcde''' +matches = [[[0, 5], [0, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr39" +regex = '''([^a]*)*''' +haystack = '''b''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr40" +regex = '''([^a]*)*''' +haystack = '''bbbbbb''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr41" +regex = '''([^a]*)*''' +haystack = '''aaaaaa''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr42" +regex = '''([^ab]*)*''' +haystack = '''ccccxx''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr43" +regex = '''([^ab]*)*''' +haystack = '''ababab''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "nullsubexpr46" +regex = '''((z)+|a)*''' +haystack = '''zabcde''' +matches = [[[0, 2], [1, 2], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr64" +regex = '''(a*)*(x)''' +haystack = '''x''' +matches = [[[0, 1], [0, 0], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr65" +regex = '''(a*)*(x)''' +haystack = '''ax''' +matches = [[[0, 2], [0, 1], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr66" +regex = '''(a*)*(x)''' +haystack = '''axa''' +matches = [[[0, 2], [0, 1], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr68" +regex = '''(a*)+(x)''' +haystack = '''x''' +matches = [[[0, 1], [0, 0], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr69" +regex = '''(a*)+(x)''' +haystack = '''ax''' +matches = [[[0, 2], [0, 1], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr70" +regex = '''(a*)+(x)''' +haystack = '''axa''' +matches = [[[0, 2], [0, 1], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr72" +regex = '''(a*){2}(x)''' +haystack = '''x''' +matches = [[[0, 1], [0, 0], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr73" +regex = '''(a*){2}(x)''' +haystack = '''ax''' +matches = [[[0, 2], [1, 1], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr74" +regex = '''(a*){2}(x)''' +haystack = '''axa''' +matches = [[[0, 2], [1, 1], [1, 2]]] +match-limit = 1 +anchored = true + diff --git a/third_party/rust/regex/testdata/fowler/repetition.toml b/third_party/rust/regex/testdata/fowler/repetition.toml new file mode 100644 index 0000000000..d6a7112022 --- /dev/null +++ b/third_party/rust/regex/testdata/fowler/repetition.toml @@ -0,0 +1,746 @@ +# !!! DO NOT EDIT !!! +# Automatically generated by 'regex-cli generate fowler'. +# Numbers in the test names correspond to the line number of the test from +# the original dat file. + +[[test]] +name = "repetition10" +regex = '''((..)|(.))''' +haystack = '''''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition11" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition12" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition14" +regex = '''((..)|(.)){1}''' +haystack = '''''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition15" +regex = '''((..)|(.)){2}''' +haystack = '''''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition16" +regex = '''((..)|(.)){3}''' +haystack = '''''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition18" +regex = '''((..)|(.))*''' +haystack = '''''' +matches = [[[0, 0], [], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition20" +regex = '''((..)|(.))''' +haystack = '''a''' +matches = [[[0, 1], [0, 1], [], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition21" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''a''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition22" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''a''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition24" +regex = '''((..)|(.)){1}''' +haystack = '''a''' +matches = [[[0, 1], [0, 1], [], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition25" +regex = '''((..)|(.)){2}''' +haystack = '''a''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition26" +regex = '''((..)|(.)){3}''' +haystack = '''a''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition28" +regex = '''((..)|(.))*''' +haystack = '''a''' +matches = [[[0, 1], [0, 1], [], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition30" +regex = '''((..)|(.))''' +haystack = '''aa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition31" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''aa''' +matches = [[[0, 2], [0, 1], [], [0, 1], [1, 2], [], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition32" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''aa''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition34" +regex = '''((..)|(.)){1}''' +haystack = '''aa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition35" +regex = '''((..)|(.)){2}''' +haystack = '''aa''' +matches = [[[0, 2], [1, 2], [], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition36" +regex = '''((..)|(.)){3}''' +haystack = '''aa''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition38" +regex = '''((..)|(.))*''' +haystack = '''aa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition40" +regex = '''((..)|(.))''' +haystack = '''aaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition41" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''aaa''' +matches = [[[0, 3], [0, 2], [0, 2], [], [2, 3], [], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition42" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''aaa''' +matches = [[[0, 3], [0, 1], [], [0, 1], [1, 2], [], [1, 2], [2, 3], [], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition44" +regex = '''((..)|(.)){1}''' +haystack = '''aaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition46" +regex = '''((..)|(.)){2}''' +haystack = '''aaa''' +matches = [[[0, 3], [2, 3], [0, 2], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition47" +regex = '''((..)|(.)){3}''' +haystack = '''aaa''' +matches = [[[0, 3], [2, 3], [], [2, 3]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition50" +regex = '''((..)|(.))*''' +haystack = '''aaa''' +matches = [[[0, 3], [2, 3], [0, 2], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition52" +regex = '''((..)|(.))''' +haystack = '''aaaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition53" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''aaaa''' +matches = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition54" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''aaaa''' +matches = [[[0, 4], [0, 2], [0, 2], [], [2, 3], [], [2, 3], [3, 4], [], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition56" +regex = '''((..)|(.)){1}''' +haystack = '''aaaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition57" +regex = '''((..)|(.)){2}''' +haystack = '''aaaa''' +matches = [[[0, 4], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition59" +regex = '''((..)|(.)){3}''' +haystack = '''aaaa''' +matches = [[[0, 4], [3, 4], [0, 2], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition61" +regex = '''((..)|(.))*''' +haystack = '''aaaa''' +matches = [[[0, 4], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition63" +regex = '''((..)|(.))''' +haystack = '''aaaaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition64" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''aaaaa''' +matches = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition65" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''aaaaa''' +matches = [[[0, 5], [0, 2], [0, 2], [], [2, 4], [2, 4], [], [4, 5], [], [4, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition67" +regex = '''((..)|(.)){1}''' +haystack = '''aaaaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition68" +regex = '''((..)|(.)){2}''' +haystack = '''aaaaa''' +matches = [[[0, 4], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition70" +regex = '''((..)|(.)){3}''' +haystack = '''aaaaa''' +matches = [[[0, 5], [4, 5], [2, 4], [4, 5]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition73" +regex = '''((..)|(.))*''' +haystack = '''aaaaa''' +matches = [[[0, 5], [4, 5], [2, 4], [4, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition75" +regex = '''((..)|(.))''' +haystack = '''aaaaaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition76" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''aaaaaa''' +matches = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition77" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 2], [0, 2], [], [2, 4], [2, 4], [], [4, 6], [4, 6], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition79" +regex = '''((..)|(.)){1}''' +haystack = '''aaaaaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition80" +regex = '''((..)|(.)){2}''' +haystack = '''aaaaaa''' +matches = [[[0, 4], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition81" +regex = '''((..)|(.)){3}''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [4, 6], [4, 6], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition83" +regex = '''((..)|(.))*''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [4, 6], [4, 6], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive90" +regex = '''X(.?){0,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive91" +regex = '''X(.?){1,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive92" +regex = '''X(.?){2,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive93" +regex = '''X(.?){3,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive94" +regex = '''X(.?){4,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive95" +regex = '''X(.?){5,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive96" +regex = '''X(.?){6,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive97" +regex = '''X(.?){7,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive98" +regex = '''X(.?){8,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive100" +regex = '''X(.?){0,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive102" +regex = '''X(.?){1,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive104" +regex = '''X(.?){2,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive106" +regex = '''X(.?){3,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive108" +regex = '''X(.?){4,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive110" +regex = '''X(.?){5,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive112" +regex = '''X(.?){6,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive114" +regex = '''X(.?){7,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive115" +regex = '''X(.?){8,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "repetition-expensive127" +regex = '''(a|ab|c|bcd){0,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 1], [0, 1], [1, 1]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "repetition-expensive129" +regex = '''(a|ab|c|bcd){1,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 1], [0, 1], [1, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive130" +regex = '''(a|ab|c|bcd){2,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [3, 6], [6, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive131" +regex = '''(a|ab|c|bcd){3,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [3, 6], [6, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive132" +regex = '''(a|ab|c|bcd){4,}(d*)''' +haystack = '''ababcd''' +matches = [] +match-limit = 1 + +# Test added by Rust regex project. +[[test]] +name = "repetition-expensive134" +regex = '''(a|ab|c|bcd){0,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 1], [0, 1], [1, 1]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "repetition-expensive136" +regex = '''(a|ab|c|bcd){1,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 1], [0, 1], [1, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive137" +regex = '''(a|ab|c|bcd){2,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [3, 6], [6, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive138" +regex = '''(a|ab|c|bcd){3,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [3, 6], [6, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive139" +regex = '''(a|ab|c|bcd){4,10}(d*)''' +haystack = '''ababcd''' +matches = [] +match-limit = 1 + +# Test added by Rust regex project. +[[test]] +name = "repetition-expensive141" +regex = '''(a|ab|c|bcd)*(d*)''' +haystack = '''ababcd''' +matches = [[[0, 1], [0, 1], [1, 1]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "repetition-expensive143" +regex = '''(a|ab|c|bcd)+(d*)''' +haystack = '''ababcd''' +matches = [[[0, 1], [0, 1], [1, 1]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive149" +regex = '''(ab|a|c|bcd){0,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive151" +regex = '''(ab|a|c|bcd){1,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive153" +regex = '''(ab|a|c|bcd){2,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive155" +regex = '''(ab|a|c|bcd){3,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive156" +regex = '''(ab|a|c|bcd){4,}(d*)''' +haystack = '''ababcd''' +matches = [] +match-limit = 1 + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive158" +regex = '''(ab|a|c|bcd){0,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive160" +regex = '''(ab|a|c|bcd){1,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive162" +regex = '''(ab|a|c|bcd){2,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive164" +regex = '''(ab|a|c|bcd){3,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive165" +regex = '''(ab|a|c|bcd){4,10}(d*)''' +haystack = '''ababcd''' +matches = [] +match-limit = 1 + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive167" +regex = '''(ab|a|c|bcd)*(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive169" +regex = '''(ab|a|c|bcd)+(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + diff --git a/third_party/rust/regex/testdata/iter.toml b/third_party/rust/regex/testdata/iter.toml new file mode 100644 index 0000000000..329b9f031b --- /dev/null +++ b/third_party/rust/regex/testdata/iter.toml @@ -0,0 +1,143 @@ +[[test]] +name = "1" +regex = "a" +haystack = "aaa" +matches = [[0, 1], [1, 2], [2, 3]] + +[[test]] +name = "2" +regex = "a" +haystack = "aba" +matches = [[0, 1], [2, 3]] + +[[test]] +name = "empty1" +regex = '' +haystack = '' +matches = [[0, 0]] + +[[test]] +name = "empty2" +regex = '' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty3" +regex = '(?:)' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty4" +regex = '(?:)*' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty5" +regex = '(?:)+' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty6" +regex = '(?:)?' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty7" +regex = '(?:)(?:)' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty8" +regex = '(?:)+|z' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty9" +regex = 'z|(?:)+' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty10" +regex = '(?:)+|b' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty11" +regex = 'b|(?:)+' +haystack = 'abc' +matches = [[0, 0], [1, 2], [3, 3]] + +[[test]] +name = "start1" +regex = "^a" +haystack = "a" +matches = [[0, 1]] + +[[test]] +name = "start2" +regex = "^a" +haystack = "aa" +matches = [[0, 1]] + +[[test]] +name = "anchored1" +regex = "a" +haystack = "a" +matches = [[0, 1]] +anchored = true + +# This test is pretty subtle. It demonstrates the crucial difference between +# '^a' and 'a' compiled in 'anchored' mode. The former regex exclusively +# matches at the start of a haystack and nowhere else. The latter regex has +# no such restriction, but its automaton is constructed such that it lacks a +# `.*?` prefix. So it can actually produce matches at multiple locations. +# The anchored3 test drives this point home. +[[test]] +name = "anchored2" +regex = "a" +haystack = "aa" +matches = [[0, 1], [1, 2]] +anchored = true + +# Unlikely anchored2, this test stops matching anything after it sees `b` +# since it lacks a `.*?` prefix. Since it is looking for 'a' but sees 'b', it +# determines that there are no remaining matches. +[[test]] +name = "anchored3" +regex = "a" +haystack = "aaba" +matches = [[0, 1], [1, 2]] +anchored = true + +[[test]] +name = "nonempty-followedby-empty" +regex = 'abc|.*?' +haystack = "abczzz" +matches = [[0, 3], [4, 4], [5, 5], [6, 6]] + +[[test]] +name = "nonempty-followedby-oneempty" +regex = 'abc|.*?' +haystack = "abcz" +matches = [[0, 3], [4, 4]] + +[[test]] +name = "nonempty-followedby-onemixed" +regex = 'abc|.*?' +haystack = "abczabc" +matches = [[0, 3], [4, 7]] + +[[test]] +name = "nonempty-followedby-twomixed" +regex = 'abc|.*?' +haystack = "abczzabc" +matches = [[0, 3], [4, 4], [5, 8]] diff --git a/third_party/rust/regex/testdata/leftmost-all.toml b/third_party/rust/regex/testdata/leftmost-all.toml new file mode 100644 index 0000000000..e3fd950b6b --- /dev/null +++ b/third_party/rust/regex/testdata/leftmost-all.toml @@ -0,0 +1,25 @@ +[[test]] +name = "alt" +regex = 'foo|foobar' +haystack = "foobar" +matches = [[0, 6]] +match-kind = "all" +search-kind = "leftmost" + +[[test]] +name = "multi" +regex = ['foo', 'foobar'] +haystack = "foobar" +matches = [ + { id = 1, span = [0, 6] }, +] +match-kind = "all" +search-kind = "leftmost" + +[[test]] +name = "dotall" +regex = '(?s:.)' +haystack = "foobar" +matches = [[5, 6]] +match-kind = "all" +search-kind = "leftmost" diff --git a/third_party/rust/regex/testdata/line-terminator.toml b/third_party/rust/regex/testdata/line-terminator.toml new file mode 100644 index 0000000000..4de72de31e --- /dev/null +++ b/third_party/rust/regex/testdata/line-terminator.toml @@ -0,0 +1,97 @@ +# This tests that we can switch the line terminator to the NUL byte. +[[test]] +name = "nul" +regex = '(?m)^[a-z]+$' +haystack = '\x00abc\x00' +matches = [[1, 4]] +unescape = true +line-terminator = '\x00' + +# This tests that '.' will not match the configured line terminator, but will +# match \n. +[[test]] +name = "dot-changes-with-line-terminator" +regex = '.' +haystack = '\x00\n' +matches = [[1, 2]] +unescape = true +line-terminator = '\x00' + +# This tests that when we switch the line terminator, \n is no longer +# recognized as the terminator. +[[test]] +name = "not-line-feed" +regex = '(?m)^[a-z]+$' +haystack = '\nabc\n' +matches = [] +unescape = true +line-terminator = '\x00' + +# This tests that we can set the line terminator to a non-ASCII byte and have +# it behave as expected. +[[test]] +name = "non-ascii" +regex = '(?m)^[a-z]+$' +haystack = '\xFFabc\xFF' +matches = [[1, 4]] +unescape = true +line-terminator = '\xFF' +utf8 = false + +# This tests that we can set the line terminator to a byte corresponding to a +# word character, and things work as expected. +[[test]] +name = "word-byte" +regex = '(?m)^[a-z]+$' +haystack = 'ZabcZ' +matches = [[1, 4]] +unescape = true +line-terminator = 'Z' + +# This tests that we can set the line terminator to a byte corresponding to a +# non-word character, and things work as expected. +[[test]] +name = "non-word-byte" +regex = '(?m)^[a-z]+$' +haystack = '%abc%' +matches = [[1, 4]] +unescape = true +line-terminator = '%' + +# This combines "set line terminator to a word byte" with a word boundary +# assertion, which should result in no match even though ^/$ matches. +[[test]] +name = "word-boundary" +regex = '(?m)^\b[a-z]+\b$' +haystack = 'ZabcZ' +matches = [] +unescape = true +line-terminator = 'Z' + +# Like 'word-boundary', but does an anchored search at the point where ^ +# matches, but where \b should not. +[[test]] +name = "word-boundary-at" +regex = '(?m)^\b[a-z]+\b$' +haystack = 'ZabcZ' +matches = [] +bounds = [1, 4] +anchored = true +unescape = true +line-terminator = 'Z' + +# Like 'word-boundary-at', but flips the word boundary to a negation. This +# in particular tests a tricky case in DFA engines, where they must consider +# explicitly that a starting configuration from a custom line terminator may +# also required setting the "is from word byte" flag on a state. Otherwise, +# it's treated as "not from a word byte," which would result in \B not matching +# here when it should. +[[test]] +name = "not-word-boundary-at" +regex = '(?m)^\B[a-z]+\B$' +haystack = 'ZabcZ' +matches = [[1, 4]] +bounds = [1, 4] +anchored = true +unescape = true +line-terminator = 'Z' diff --git a/third_party/rust/regex/testdata/misc.toml b/third_party/rust/regex/testdata/misc.toml new file mode 100644 index 0000000000..c65531f5d9 --- /dev/null +++ b/third_party/rust/regex/testdata/misc.toml @@ -0,0 +1,99 @@ +[[test]] +name = "ascii-literal" +regex = "a" +haystack = "a" +matches = [[0, 1]] + +[[test]] +name = "ascii-literal-not" +regex = "a" +haystack = "z" +matches = [] + +[[test]] +name = "ascii-literal-anchored" +regex = "a" +haystack = "a" +matches = [[0, 1]] +anchored = true + +[[test]] +name = "ascii-literal-anchored-not" +regex = "a" +haystack = "z" +matches = [] +anchored = true + +[[test]] +name = "anchor-start-end-line" +regex = '(?m)^bar$' +haystack = "foo\nbar\nbaz" +matches = [[4, 7]] + +[[test]] +name = "prefix-literal-match" +regex = '^abc' +haystack = "abc" +matches = [[0, 3]] + +[[test]] +name = "prefix-literal-match-ascii" +regex = '^abc' +haystack = "abc" +matches = [[0, 3]] +unicode = false +utf8 = false + +[[test]] +name = "prefix-literal-no-match" +regex = '^abc' +haystack = "zabc" +matches = [] + +[[test]] +name = "one-literal-edge" +regex = 'abc' +haystack = "xxxxxab" +matches = [] + +[[test]] +name = "terminates" +regex = 'a$' +haystack = "a" +matches = [[0, 1]] + +[[test]] +name = "suffix-100" +regex = '.*abcd' +haystack = "abcd" +matches = [[0, 4]] + +[[test]] +name = "suffix-200" +regex = '.*(?:abcd)+' +haystack = "abcd" +matches = [[0, 4]] + +[[test]] +name = "suffix-300" +regex = '.*(?:abcd)+' +haystack = "abcdabcd" +matches = [[0, 8]] + +[[test]] +name = "suffix-400" +regex = '.*(?:abcd)+' +haystack = "abcdxabcd" +matches = [[0, 9]] + +[[test]] +name = "suffix-500" +regex = '.*x(?:abcd)+' +haystack = "abcdxabcd" +matches = [[0, 9]] + +[[test]] +name = "suffix-600" +regex = '[^abcd]*x(?:abcd)+' +haystack = "abcdxabcd" +matches = [[4, 9]] diff --git a/third_party/rust/regex/testdata/multiline.toml b/third_party/rust/regex/testdata/multiline.toml new file mode 100644 index 0000000000..3acc901d50 --- /dev/null +++ b/third_party/rust/regex/testdata/multiline.toml @@ -0,0 +1,845 @@ +[[test]] +name = "basic1" +regex = '(?m)^[a-z]+$' +haystack = "abc\ndef\nxyz" +matches = [[0, 3], [4, 7], [8, 11]] + +[[test]] +name = "basic1-crlf" +regex = '(?Rm)^[a-z]+$' +haystack = "abc\ndef\nxyz" +matches = [[0, 3], [4, 7], [8, 11]] + +[[test]] +name = "basic1-crlf-cr" +regex = '(?Rm)^[a-z]+$' +haystack = "abc\rdef\rxyz" +matches = [[0, 3], [4, 7], [8, 11]] + +[[test]] +name = "basic2" +regex = '(?m)^$' +haystack = "abc\ndef\nxyz" +matches = [] + +[[test]] +name = "basic2-crlf" +regex = '(?Rm)^$' +haystack = "abc\ndef\nxyz" +matches = [] + +[[test]] +name = "basic2-crlf-cr" +regex = '(?Rm)^$' +haystack = "abc\rdef\rxyz" +matches = [] + +[[test]] +name = "basic3" +regex = '(?m)^' +haystack = "abc\ndef\nxyz" +matches = [[0, 0], [4, 4], [8, 8]] + +[[test]] +name = "basic3-crlf" +regex = '(?Rm)^' +haystack = "abc\ndef\nxyz" +matches = [[0, 0], [4, 4], [8, 8]] + +[[test]] +name = "basic3-crlf-cr" +regex = '(?Rm)^' +haystack = "abc\rdef\rxyz" +matches = [[0, 0], [4, 4], [8, 8]] + +[[test]] +name = "basic4" +regex = '(?m)$' +haystack = "abc\ndef\nxyz" +matches = [[3, 3], [7, 7], [11, 11]] + +[[test]] +name = "basic4-crlf" +regex = '(?Rm)$' +haystack = "abc\ndef\nxyz" +matches = [[3, 3], [7, 7], [11, 11]] + +[[test]] +name = "basic4-crlf-cr" +regex = '(?Rm)$' +haystack = "abc\rdef\rxyz" +matches = [[3, 3], [7, 7], [11, 11]] + +[[test]] +name = "basic5" +regex = '(?m)^[a-z]' +haystack = "abc\ndef\nxyz" +matches = [[0, 1], [4, 5], [8, 9]] + +[[test]] +name = "basic5-crlf" +regex = '(?Rm)^[a-z]' +haystack = "abc\ndef\nxyz" +matches = [[0, 1], [4, 5], [8, 9]] + +[[test]] +name = "basic5-crlf-cr" +regex = '(?Rm)^[a-z]' +haystack = "abc\rdef\rxyz" +matches = [[0, 1], [4, 5], [8, 9]] + +[[test]] +name = "basic6" +regex = '(?m)[a-z]^' +haystack = "abc\ndef\nxyz" +matches = [] + +[[test]] +name = "basic6-crlf" +regex = '(?Rm)[a-z]^' +haystack = "abc\ndef\nxyz" +matches = [] + +[[test]] +name = "basic6-crlf-cr" +regex = '(?Rm)[a-z]^' +haystack = "abc\rdef\rxyz" +matches = [] + +[[test]] +name = "basic7" +regex = '(?m)[a-z]$' +haystack = "abc\ndef\nxyz" +matches = [[2, 3], [6, 7], [10, 11]] + +[[test]] +name = "basic7-crlf" +regex = '(?Rm)[a-z]$' +haystack = "abc\ndef\nxyz" +matches = [[2, 3], [6, 7], [10, 11]] + +[[test]] +name = "basic7-crlf-cr" +regex = '(?Rm)[a-z]$' +haystack = "abc\rdef\rxyz" +matches = [[2, 3], [6, 7], [10, 11]] + +[[test]] +name = "basic8" +regex = '(?m)$[a-z]' +haystack = "abc\ndef\nxyz" +matches = [] + +[[test]] +name = "basic8-crlf" +regex = '(?Rm)$[a-z]' +haystack = "abc\ndef\nxyz" +matches = [] + +[[test]] +name = "basic8-crlf-cr" +regex = '(?Rm)$[a-z]' +haystack = "abc\rdef\rxyz" +matches = [] + +[[test]] +name = "basic9" +regex = '(?m)^$' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "basic9-crlf" +regex = '(?Rm)^$' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "repeat1" +regex = '(?m)(?:^$)*' +haystack = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "repeat1-crlf" +regex = '(?Rm)(?:^$)*' +haystack = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "repeat1-crlf-cr" +regex = '(?Rm)(?:^$)*' +haystack = "a\rb\rc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "repeat1-no-multi" +regex = '(?:^$)*' +haystack = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "repeat1-no-multi-crlf" +regex = '(?R)(?:^$)*' +haystack = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "repeat1-no-multi-crlf-cr" +regex = '(?R)(?:^$)*' +haystack = "a\rb\rc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "repeat2" +regex = '(?m)(?:^|a)+' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat2-crlf" +regex = '(?Rm)(?:^|a)+' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat2-crlf-cr" +regex = '(?Rm)(?:^|a)+' +haystack = "a\raaa\r" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat2-no-multi" +regex = '(?:^|a)+' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 5]] + +[[test]] +name = "repeat2-no-multi-crlf" +regex = '(?R)(?:^|a)+' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 5]] + +[[test]] +name = "repeat2-no-multi-crlf-cr" +regex = '(?R)(?:^|a)+' +haystack = "a\raaa\r" +matches = [[0, 0], [2, 5]] + +[[test]] +name = "repeat3" +regex = '(?m)(?:^|a)*' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat3-crlf" +regex = '(?Rm)(?:^|a)*' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat3-crlf-cr" +regex = '(?Rm)(?:^|a)*' +haystack = "a\raaa\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat3-no-multi" +regex = '(?:^|a)*' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 5], [6, 6]] + +[[test]] +name = "repeat3-no-multi-crlf" +regex = '(?R)(?:^|a)*' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 5], [6, 6]] + +[[test]] +name = "repeat3-no-multi-crlf-cr" +regex = '(?R)(?:^|a)*' +haystack = "a\raaa\r" +matches = [[0, 0], [1, 1], [2, 5], [6, 6]] + +[[test]] +name = "repeat4" +regex = '(?m)(?:^|a+)' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat4-crlf" +regex = '(?Rm)(?:^|a+)' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat4-crlf-cr" +regex = '(?Rm)(?:^|a+)' +haystack = "a\raaa\r" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat4-no-multi" +regex = '(?:^|a+)' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 5]] + +[[test]] +name = "repeat4-no-multi-crlf" +regex = '(?R)(?:^|a+)' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 5]] + +[[test]] +name = "repeat4-no-multi-crlf-cr" +regex = '(?R)(?:^|a+)' +haystack = "a\raaa\r" +matches = [[0, 0], [2, 5]] + +[[test]] +name = "repeat5" +regex = '(?m)(?:^|a*)' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat5-crlf" +regex = '(?Rm)(?:^|a*)' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat5-crlf-cr" +regex = '(?Rm)(?:^|a*)' +haystack = "a\raaa\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat5-no-multi" +regex = '(?:^|a*)' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 5], [6, 6]] + +[[test]] +name = "repeat5-no-multi-crlf" +regex = '(?R)(?:^|a*)' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 5], [6, 6]] + +[[test]] +name = "repeat5-no-multi-crlf-cr" +regex = '(?R)(?:^|a*)' +haystack = "a\raaa\r" +matches = [[0, 0], [1, 1], [2, 5], [6, 6]] + +[[test]] +name = "repeat6" +regex = '(?m)(?:^[a-z])+' +haystack = "abc\ndef\nxyz" +matches = [[0, 1], [4, 5], [8, 9]] + +[[test]] +name = "repeat6-crlf" +regex = '(?Rm)(?:^[a-z])+' +haystack = "abc\ndef\nxyz" +matches = [[0, 1], [4, 5], [8, 9]] + +[[test]] +name = "repeat6-crlf-cr" +regex = '(?Rm)(?:^[a-z])+' +haystack = "abc\rdef\rxyz" +matches = [[0, 1], [4, 5], [8, 9]] + +[[test]] +name = "repeat6-no-multi" +regex = '(?:^[a-z])+' +haystack = "abc\ndef\nxyz" +matches = [[0, 1]] + +[[test]] +name = "repeat6-no-multi-crlf" +regex = '(?R)(?:^[a-z])+' +haystack = "abc\ndef\nxyz" +matches = [[0, 1]] + +[[test]] +name = "repeat6-no-multi-crlf-cr" +regex = '(?R)(?:^[a-z])+' +haystack = "abc\rdef\rxyz" +matches = [[0, 1]] + +[[test]] +name = "repeat7" +regex = '(?m)(?:^[a-z]{3}\n?)+' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat7-crlf" +regex = '(?Rm)(?:^[a-z]{3}\n?)+' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat7-crlf-cr" +regex = '(?Rm)(?:^[a-z]{3}\r?)+' +haystack = "abc\rdef\rxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat7-no-multi" +regex = '(?:^[a-z]{3}\n?)+' +haystack = "abc\ndef\nxyz" +matches = [[0, 4]] + +[[test]] +name = "repeat7-no-multi-crlf" +regex = '(?R)(?:^[a-z]{3}\n?)+' +haystack = "abc\ndef\nxyz" +matches = [[0, 4]] + +[[test]] +name = "repeat7-no-multi-crlf-cr" +regex = '(?R)(?:^[a-z]{3}\r?)+' +haystack = "abc\rdef\rxyz" +matches = [[0, 4]] + +[[test]] +name = "repeat8" +regex = '(?m)(?:^[a-z]{3}\n?)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat8-crlf" +regex = '(?Rm)(?:^[a-z]{3}\n?)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat8-crlf-cr" +regex = '(?Rm)(?:^[a-z]{3}\r?)*' +haystack = "abc\rdef\rxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat8-no-multi" +regex = '(?:^[a-z]{3}\n?)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]] + +[[test]] +name = "repeat8-no-multi-crlf" +regex = '(?R)(?:^[a-z]{3}\n?)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]] + +[[test]] +name = "repeat8-no-multi-crlf-cr" +regex = '(?R)(?:^[a-z]{3}\r?)*' +haystack = "abc\rdef\rxyz" +matches = [[0, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]] + +[[test]] +name = "repeat9" +regex = '(?m)(?:\n?[a-z]{3}$)+' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat9-crlf" +regex = '(?Rm)(?:\n?[a-z]{3}$)+' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat9-crlf-cr" +regex = '(?Rm)(?:\r?[a-z]{3}$)+' +haystack = "abc\rdef\rxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat9-no-multi" +regex = '(?:\n?[a-z]{3}$)+' +haystack = "abc\ndef\nxyz" +matches = [[7, 11]] + +[[test]] +name = "repeat9-no-multi-crlf" +regex = '(?R)(?:\n?[a-z]{3}$)+' +haystack = "abc\ndef\nxyz" +matches = [[7, 11]] + +[[test]] +name = "repeat9-no-multi-crlf-cr" +regex = '(?R)(?:\r?[a-z]{3}$)+' +haystack = "abc\rdef\rxyz" +matches = [[7, 11]] + +[[test]] +name = "repeat10" +regex = '(?m)(?:\n?[a-z]{3}$)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat10-crlf" +regex = '(?Rm)(?:\n?[a-z]{3}$)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat10-crlf-cr" +regex = '(?Rm)(?:\r?[a-z]{3}$)*' +haystack = "abc\rdef\rxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat10-no-multi" +regex = '(?:\n?[a-z]{3}$)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 11]] + +[[test]] +name = "repeat10-no-multi-crlf" +regex = '(?R)(?:\n?[a-z]{3}$)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 11]] + +[[test]] +name = "repeat10-no-multi-crlf-cr" +regex = '(?R)(?:\r?[a-z]{3}$)*' +haystack = "abc\rdef\rxyz" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 11]] + +[[test]] +name = "repeat11" +regex = '(?m)^*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat11-crlf" +regex = '(?Rm)^*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat11-crlf-cr" +regex = '(?Rm)^*' +haystack = "\raa\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat11-no-multi" +regex = '^*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat11-no-multi-crlf" +regex = '(?R)^*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat11-no-multi-crlf-cr" +regex = '(?R)^*' +haystack = "\raa\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat12" +regex = '(?m)^+' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [4, 4]] + +[[test]] +name = "repeat12-crlf" +regex = '(?Rm)^+' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [4, 4]] + +[[test]] +name = "repeat12-crlf-cr" +regex = '(?Rm)^+' +haystack = "\raa\r" +matches = [[0, 0], [1, 1], [4, 4]] + +[[test]] +name = "repeat12-no-multi" +regex = '^+' +haystack = "\naa\n" +matches = [[0, 0]] + +[[test]] +name = "repeat12-no-multi-crlf" +regex = '(?R)^+' +haystack = "\naa\n" +matches = [[0, 0]] + +[[test]] +name = "repeat12-no-multi-crlf-cr" +regex = '(?R)^+' +haystack = "\raa\r" +matches = [[0, 0]] + +[[test]] +name = "repeat13" +regex = '(?m)$*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat13-crlf" +regex = '(?Rm)$*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat13-crlf-cr" +regex = '(?Rm)$*' +haystack = "\raa\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat13-no-multi" +regex = '$*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat13-no-multi-crlf" +regex = '(?R)$*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat13-no-multi-crlf-cr" +regex = '(?R)$*' +haystack = "\raa\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat14" +regex = '(?m)$+' +haystack = "\naa\n" +matches = [[0, 0], [3, 3], [4, 4]] + +[[test]] +name = "repeat14-crlf" +regex = '(?Rm)$+' +haystack = "\naa\n" +matches = [[0, 0], [3, 3], [4, 4]] + +[[test]] +name = "repeat14-crlf-cr" +regex = '(?Rm)$+' +haystack = "\raa\r" +matches = [[0, 0], [3, 3], [4, 4]] + +[[test]] +name = "repeat14-no-multi" +regex = '$+' +haystack = "\naa\n" +matches = [[4, 4]] + +[[test]] +name = "repeat14-no-multi-crlf" +regex = '(?R)$+' +haystack = "\naa\n" +matches = [[4, 4]] + +[[test]] +name = "repeat14-no-multi-crlf-cr" +regex = '(?R)$+' +haystack = "\raa\r" +matches = [[4, 4]] + +[[test]] +name = "repeat15" +regex = '(?m)(?:$\n)+' +haystack = "\n\naaa\n\n" +matches = [[0, 2], [5, 7]] + +[[test]] +name = "repeat15-crlf" +regex = '(?Rm)(?:$\n)+' +haystack = "\n\naaa\n\n" +matches = [[0, 2], [5, 7]] + +[[test]] +name = "repeat15-crlf-cr" +regex = '(?Rm)(?:$\r)+' +haystack = "\r\raaa\r\r" +matches = [[0, 2], [5, 7]] + +[[test]] +name = "repeat15-no-multi" +regex = '(?:$\n)+' +haystack = "\n\naaa\n\n" +matches = [] + +[[test]] +name = "repeat15-no-multi-crlf" +regex = '(?R)(?:$\n)+' +haystack = "\n\naaa\n\n" +matches = [] + +[[test]] +name = "repeat15-no-multi-crlf-cr" +regex = '(?R)(?:$\r)+' +haystack = "\r\raaa\r\r" +matches = [] + +[[test]] +name = "repeat16" +regex = '(?m)(?:$\n)*' +haystack = "\n\naaa\n\n" +matches = [[0, 2], [3, 3], [4, 4], [5, 7]] + +[[test]] +name = "repeat16-crlf" +regex = '(?Rm)(?:$\n)*' +haystack = "\n\naaa\n\n" +matches = [[0, 2], [3, 3], [4, 4], [5, 7]] + +[[test]] +name = "repeat16-crlf-cr" +regex = '(?Rm)(?:$\r)*' +haystack = "\r\raaa\r\r" +matches = [[0, 2], [3, 3], [4, 4], [5, 7]] + +[[test]] +name = "repeat16-no-multi" +regex = '(?:$\n)*' +haystack = "\n\naaa\n\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]] + +[[test]] +name = "repeat16-no-multi-crlf" +regex = '(?R)(?:$\n)*' +haystack = "\n\naaa\n\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]] + +[[test]] +name = "repeat16-no-multi-crlf-cr" +regex = '(?R)(?:$\r)*' +haystack = "\r\raaa\r\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]] + +[[test]] +name = "repeat17" +regex = '(?m)(?:$\n^)+' +haystack = "\n\naaa\n\n" +matches = [[0, 2], [5, 7]] + +[[test]] +name = "repeat17-crlf" +regex = '(?Rm)(?:$\n^)+' +haystack = "\n\naaa\n\n" +matches = [[0, 2], [5, 7]] + +[[test]] +name = "repeat17-crlf-cr" +regex = '(?Rm)(?:$\r^)+' +haystack = "\r\raaa\r\r" +matches = [[0, 2], [5, 7]] + +[[test]] +name = "repeat17-no-multi" +regex = '(?:$\n^)+' +haystack = "\n\naaa\n\n" +matches = [] + +[[test]] +name = "repeat17-no-multi-crlf" +regex = '(?R)(?:$\n^)+' +haystack = "\n\naaa\n\n" +matches = [] + +[[test]] +name = "repeat17-no-multi-crlf-cr" +regex = '(?R)(?:$\r^)+' +haystack = "\r\raaa\r\r" +matches = [] + +[[test]] +name = "repeat18" +regex = '(?m)(?:^|$)+' +haystack = "\n\naaa\n\n" +matches = [[0, 0], [1, 1], [2, 2], [5, 5], [6, 6], [7, 7]] + +[[test]] +name = "repeat18-crlf" +regex = '(?Rm)(?:^|$)+' +haystack = "\n\naaa\n\n" +matches = [[0, 0], [1, 1], [2, 2], [5, 5], [6, 6], [7, 7]] + +[[test]] +name = "repeat18-crlf-cr" +regex = '(?Rm)(?:^|$)+' +haystack = "\r\raaa\r\r" +matches = [[0, 0], [1, 1], [2, 2], [5, 5], [6, 6], [7, 7]] + +[[test]] +name = "repeat18-no-multi" +regex = '(?:^|$)+' +haystack = "\n\naaa\n\n" +matches = [[0, 0], [7, 7]] + +[[test]] +name = "repeat18-no-multi-crlf" +regex = '(?R)(?:^|$)+' +haystack = "\n\naaa\n\n" +matches = [[0, 0], [7, 7]] + +[[test]] +name = "repeat18-no-multi-crlf-cr" +regex = '(?R)(?:^|$)+' +haystack = "\r\raaa\r\r" +matches = [[0, 0], [7, 7]] + +[[test]] +name = "match-line-100" +regex = '(?m)^.+$' +haystack = "aa\naaaaaaaaaaaaaaaaaaa\n" +matches = [[0, 2], [3, 22]] + +[[test]] +name = "match-line-100-crlf" +regex = '(?Rm)^.+$' +haystack = "aa\naaaaaaaaaaaaaaaaaaa\n" +matches = [[0, 2], [3, 22]] + +[[test]] +name = "match-line-100-crlf-cr" +regex = '(?Rm)^.+$' +haystack = "aa\raaaaaaaaaaaaaaaaaaa\r" +matches = [[0, 2], [3, 22]] + +[[test]] +name = "match-line-200" +regex = '(?m)^.+$' +haystack = "aa\naaaaaaaaaaaaaaaaaaa\n" +matches = [[0, 2], [3, 22]] +unicode = false +utf8 = false + +[[test]] +name = "match-line-200-crlf" +regex = '(?Rm)^.+$' +haystack = "aa\naaaaaaaaaaaaaaaaaaa\n" +matches = [[0, 2], [3, 22]] +unicode = false +utf8 = false + +[[test]] +name = "match-line-200-crlf-cr" +regex = '(?Rm)^.+$' +haystack = "aa\raaaaaaaaaaaaaaaaaaa\r" +matches = [[0, 2], [3, 22]] +unicode = false +utf8 = false diff --git a/third_party/rust/regex/testdata/no-unicode.toml b/third_party/rust/regex/testdata/no-unicode.toml new file mode 100644 index 0000000000..0ddac4c96d --- /dev/null +++ b/third_party/rust/regex/testdata/no-unicode.toml @@ -0,0 +1,222 @@ +[[test]] +name = "invalid-utf8-literal1" +regex = '\xFF' +haystack = '\xFF' +matches = [[0, 1]] +unicode = false +utf8 = false +unescape = true + + +[[test]] +name = "mixed" +regex = '(?:.+)(?-u)(?:.+)' +haystack = '\xCE\x93\xCE\x94\xFF' +matches = [[0, 5]] +utf8 = false +unescape = true + + +[[test]] +name = "case1" +regex = "a" +haystack = "A" +matches = [[0, 1]] +case-insensitive = true +unicode = false + +[[test]] +name = "case2" +regex = "[a-z]+" +haystack = "AaAaA" +matches = [[0, 5]] +case-insensitive = true +unicode = false + +[[test]] +name = "case3" +regex = "[a-z]+" +haystack = "aA\u212AaA" +matches = [[0, 7]] +case-insensitive = true + +[[test]] +name = "case4" +regex = "[a-z]+" +haystack = "aA\u212AaA" +matches = [[0, 2], [5, 7]] +case-insensitive = true +unicode = false + + +[[test]] +name = "negate1" +regex = "[^a]" +haystack = "δ" +matches = [[0, 2]] + +[[test]] +name = "negate2" +regex = "[^a]" +haystack = "δ" +matches = [[0, 1], [1, 2]] +unicode = false +utf8 = false + + +[[test]] +name = "dotstar-prefix1" +regex = "a" +haystack = '\xFFa' +matches = [[1, 2]] +unicode = false +utf8 = false +unescape = true + +[[test]] +name = "dotstar-prefix2" +regex = "a" +haystack = '\xFFa' +matches = [[1, 2]] +utf8 = false +unescape = true + + +[[test]] +name = "null-bytes1" +regex = '[^\x00]+\x00' +haystack = 'foo\x00' +matches = [[0, 4]] +unicode = false +utf8 = false +unescape = true + + +[[test]] +name = "word-ascii" +regex = '\w+' +haystack = "aδ" +matches = [[0, 1]] +unicode = false + +[[test]] +name = "word-unicode" +regex = '\w+' +haystack = "aδ" +matches = [[0, 3]] + +[[test]] +name = "decimal-ascii" +regex = '\d+' +haystack = "1२३9" +matches = [[0, 1], [7, 8]] +unicode = false + +[[test]] +name = "decimal-unicode" +regex = '\d+' +haystack = "1२३9" +matches = [[0, 8]] + +[[test]] +name = "space-ascii" +regex = '\s+' +haystack = " \u1680" +matches = [[0, 1]] +unicode = false + +[[test]] +name = "space-unicode" +regex = '\s+' +haystack = " \u1680" +matches = [[0, 4]] + + +[[test]] +# See: https://github.com/rust-lang/regex/issues/484 +name = "iter1-bytes" +regex = '' +haystack = "☃" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +utf8 = false + +[[test]] +# See: https://github.com/rust-lang/regex/issues/484 +name = "iter1-utf8" +regex = '' +haystack = "☃" +matches = [[0, 0], [3, 3]] + +[[test]] +# See: https://github.com/rust-lang/regex/issues/484 +# Note that iter2-utf8 doesn't make sense here, since the input isn't UTF-8. +name = "iter2-bytes" +regex = '' +haystack = 'b\xFFr' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +unescape = true +utf8 = false + + +# These test that unanchored prefixes can munch through invalid UTF-8 even when +# utf8 is enabled. +# +# This test actually reflects an interesting simplification in how the Thompson +# NFA is constructed. It used to be that the NFA could be built with an +# unanchored prefix that either matched any byte or _only_ matched valid UTF-8. +# But the latter turns out to be pretty precarious when it comes to prefilters, +# because if you search a haystack that contains invalid UTF-8 but have an +# unanchored prefix that requires UTF-8, then prefilters are no longer a valid +# optimization because you actually have to check that everything is valid +# UTF-8. +# +# Originally, I had thought that we needed a valid UTF-8 unanchored prefix in +# order to guarantee that we only match at valid UTF-8 boundaries. But this +# isn't actually true! There are really only two things to consider here: +# +# 1) Will a regex match split an encoded codepoint? No. Because by construction, +# we ensure that a MATCH state can only be reached by following valid UTF-8 (assuming +# all of the UTF-8 modes are enabled). +# +# 2) Will a regex match arbitrary bytes that aren't valid UTF-8? Again, no, +# assuming all of the UTF-8 modes are enabled. +[[test]] +name = "unanchored-invalid-utf8-match-100" +regex = '[a-z]' +haystack = '\xFFa\xFF' +matches = [[1, 2]] +unescape = true +utf8 = false + +# This test shows that we can still prevent a match from occurring by requiring +# that valid UTF-8 match by inserting our own unanchored prefix. Thus, if the +# behavior of not munching through invalid UTF-8 anywhere is needed, then it +# can be achieved thusly. +[[test]] +name = "unanchored-invalid-utf8-nomatch" +regex = '^(?s:.)*?[a-z]' +haystack = '\xFFa\xFF' +matches = [] +unescape = true +utf8 = false + +# This is a tricky test that makes sure we don't accidentally do a kind of +# unanchored search when we've requested that a regex engine not report +# empty matches that split a codepoint. This test caught a regression during +# development where the code for skipping over bad empty matches would do so +# even if the search should have been anchored. This is ultimately what led to +# making 'anchored' an 'Input' option, so that it was always clear what kind +# of search was being performed. (Before that, whether a search was anchored +# or not was a config knob on the regex engine.) This did wind up making DFAs +# a little more complex to configure (with their 'StartKind' knob), but it +# generally smoothed out everything else. +# +# Great example of a test whose failure motivated a sweeping API refactoring. +[[test]] +name = "anchored-iter-empty-utf8" +regex = '' +haystack = 'a☃z' +matches = [[0, 0], [1, 1]] +unescape = false +utf8 = true +anchored = true diff --git a/third_party/rust/regex/testdata/overlapping.toml b/third_party/rust/regex/testdata/overlapping.toml new file mode 100644 index 0000000000..7bcd45a2f7 --- /dev/null +++ b/third_party/rust/regex/testdata/overlapping.toml @@ -0,0 +1,280 @@ +# NOTE: We define a number of tests where the *match* kind is 'leftmost-first' +# but the *search* kind is 'overlapping'. This is a somewhat nonsensical +# combination and can produce odd results. Nevertheless, those results should +# be consistent so we test them here. (At the time of writing this note, I +# hadn't yet decided whether to make 'leftmost-first' with 'overlapping' result +# in unspecified behavior.) + +# This demonstrates how a full overlapping search is obvious quadratic. This +# regex reports a match for every substring in the haystack. +[[test]] +name = "ungreedy-dotstar-matches-everything-100" +regex = [".*?"] +haystack = "zzz" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [0, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [1, 2] }, + { id = 0, span = [0, 2] }, + { id = 0, span = [3, 3] }, + { id = 0, span = [2, 3] }, + { id = 0, span = [1, 3] }, + { id = 0, span = [0, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "greedy-dotstar-matches-everything-100" +regex = [".*"] +haystack = "zzz" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [0, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [1, 2] }, + { id = 0, span = [0, 2] }, + { id = 0, span = [3, 3] }, + { id = 0, span = [2, 3] }, + { id = 0, span = [1, 3] }, + { id = 0, span = [0, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "repetition-plus-leftmost-first-100" +regex = 'a+' +haystack = "aaa" +matches = [[0, 1], [1, 2], [0, 2], [2, 3], [1, 3], [0, 3]] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "repetition-plus-leftmost-first-110" +regex = '☃+' +haystack = "☃☃☃" +matches = [[0, 3], [3, 6], [0, 6], [6, 9], [3, 9], [0, 9]] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "repetition-plus-all-100" +regex = 'a+' +haystack = "aaa" +matches = [[0, 1], [1, 2], [0, 2], [2, 3], [1, 3], [0, 3]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "repetition-plus-all-110" +regex = '☃+' +haystack = "☃☃☃" +matches = [[0, 3], [3, 6], [0, 6], [6, 9], [3, 9], [0, 9]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "repetition-plus-leftmost-first-200" +regex = '(abc)+' +haystack = "zzabcabczzabc" +matches = [ + [[2, 5], [2, 5]], + [[5, 8], [5, 8]], + [[2, 8], [5, 8]], +] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "repetition-plus-all-200" +regex = '(abc)+' +haystack = "zzabcabczzabc" +matches = [ + [[2, 5], [2, 5]], + [[5, 8], [5, 8]], + [[2, 8], [5, 8]], + [[10, 13], [10, 13]], +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "repetition-star-leftmost-first-100" +regex = 'a*' +haystack = "aaa" +matches = [ + [0, 0], + [1, 1], + [0, 1], + [2, 2], + [1, 2], + [0, 2], + [3, 3], + [2, 3], + [1, 3], + [0, 3], +] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "repetition-star-all-100" +regex = 'a*' +haystack = "aaa" +matches = [ + [0, 0], + [1, 1], + [0, 1], + [2, 2], + [1, 2], + [0, 2], + [3, 3], + [2, 3], + [1, 3], + [0, 3], +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "repetition-star-leftmost-first-200" +regex = '(abc)*' +haystack = "zzabcabczzabc" +matches = [ + [[0, 0], []], +] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "repetition-star-all-200" +regex = '(abc)*' +haystack = "zzabcabczzabc" +matches = [ + [[0, 0], []], + [[1, 1], []], + [[2, 2], []], + [[3, 3], []], + [[4, 4], []], + [[5, 5], []], + [[2, 5], [2, 5]], + [[6, 6], []], + [[7, 7], []], + [[8, 8], []], + [[5, 8], [5, 8]], + [[2, 8], [5, 8]], + [[9, 9], []], + [[10, 10], []], + [[11, 11], []], + [[12, 12], []], + [[13, 13], []], + [[10, 13], [10, 13]], +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "start-end-rep-leftmost-first" +regex = '(^$)*' +haystack = "abc" +matches = [ + [[0, 0], []], +] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "start-end-rep-all" +regex = '(^$)*' +haystack = "abc" +matches = [ + [[0, 0], []], + [[1, 1], []], + [[2, 2], []], + [[3, 3], []], +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "alt-leftmost-first-100" +regex = 'abc|a' +haystack = "zzabcazzaabc" +matches = [[2, 3], [2, 5]] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "alt-all-100" +regex = 'abc|a' +haystack = "zzabcazzaabc" +matches = [[2, 3], [2, 5], [5, 6], [8, 9], [9, 10], [9, 12]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty-000" +regex = "" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty-alt-000" +regex = "|b" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [1, 2], [3, 3]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty-alt-010" +regex = "b|" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [1, 2], [3, 3]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +# See: https://github.com/rust-lang/regex/issues/484 +name = "iter1-bytes" +regex = '' +haystack = "☃" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +[[test]] +# See: https://github.com/rust-lang/regex/issues/484 +name = "iter1-utf8" +regex = '' +haystack = "☃" +matches = [[0, 0], [3, 3]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "iter1-incomplete-utf8" +regex = '' +haystack = '\xE2\x98' # incomplete snowman +matches = [[0, 0], [1, 1], [2, 2]] +match-kind = "all" +search-kind = "overlapping" +unescape = true +utf8 = false + +[[test]] +name = "scratch" +regex = ['sam', 'samwise'] +haystack = "samwise" +matches = [ + { id = 0, span = [0, 3] }, +] +match-kind = "leftmost-first" +search-kind = "overlapping" diff --git a/third_party/rust/regex/testdata/regex-lite.toml b/third_party/rust/regex/testdata/regex-lite.toml new file mode 100644 index 0000000000..1769d803d4 --- /dev/null +++ b/third_party/rust/regex/testdata/regex-lite.toml @@ -0,0 +1,98 @@ +# These tests are specifically written to test the regex-lite crate. While it +# largely has the same semantics as the regex crate, there are some differences +# around Unicode support and UTF-8. +# +# To be clear, regex-lite supports far fewer patterns because of its lack of +# Unicode support, nested character classes and character class set operations. +# What we're talking about here are the patterns that both crates support but +# where the semantics might differ. + +# regex-lite uses ASCII definitions for Perl character classes. +[[test]] +name = "perl-class-decimal" +regex = '\d' +haystack = '᠕' +matches = [] +unicode = true + +# regex-lite uses ASCII definitions for Perl character classes. +[[test]] +name = "perl-class-space" +regex = '\s' +haystack = "\u2000" +matches = [] +unicode = true + +# regex-lite uses ASCII definitions for Perl character classes. +[[test]] +name = "perl-class-word" +regex = '\w' +haystack = 'δ' +matches = [] +unicode = true + +# regex-lite uses the ASCII definition of word for word boundary assertions. +[[test]] +name = "word-boundary" +regex = '\b' +haystack = 'δ' +matches = [] +unicode = true + +# regex-lite uses the ASCII definition of word for negated word boundary +# assertions. But note that it should still not split codepoints! +[[test]] +name = "word-boundary-negated" +regex = '\B' +haystack = 'δ' +matches = [[0, 0], [2, 2]] +unicode = true + +# While we're here, the empty regex---which matches at every +# position---shouldn't split a codepoint either. +[[test]] +name = "empty-no-split-codepoint" +regex = '' +haystack = '💩' +matches = [[0, 0], [4, 4]] +unicode = true + +# A dot always matches a full codepoint. +[[test]] +name = "dot-always-matches-codepoint" +regex = '.' +haystack = '💩' +matches = [[0, 4]] +unicode = false + +# A negated character class also always matches a full codepoint. +[[test]] +name = "negated-class-always-matches-codepoint" +regex = '[^a]' +haystack = '💩' +matches = [[0, 4]] +unicode = false + +# regex-lite only supports ASCII-aware case insensitive matching. +[[test]] +name = "case-insensitive-is-ascii-only" +regex = 's' +haystack = 'ſ' +matches = [] +unicode = true +case-insensitive = true + +# Negated word boundaries shouldn't split a codepoint, but they will match +# between invalid UTF-8. +# +# This test is only valid for a 'bytes' API, but that doesn't (yet) exist in +# regex-lite. This can't happen in the main API because &str can't contain +# invalid UTF-8. +# [[test]] +# name = "word-boundary-invalid-utf8" +# regex = '\B' +# haystack = '\xFF\xFF\xFF\xFF' +# unescape = true +# matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] +# unicode = true +# utf8 = false diff --git a/third_party/rust/regex/testdata/regression.toml b/third_party/rust/regex/testdata/regression.toml new file mode 100644 index 0000000000..03b15d6d54 --- /dev/null +++ b/third_party/rust/regex/testdata/regression.toml @@ -0,0 +1,784 @@ +# See: https://github.com/rust-lang/regex/issues/48 +[[test]] +name = "invalid-regex-no-crash-100" +regex = '(*)' +haystack = "" +matches = [] +compiles = false + +# See: https://github.com/rust-lang/regex/issues/48 +[[test]] +name = "invalid-regex-no-crash-200" +regex = '(?:?)' +haystack = "" +matches = [] +compiles = false + +# See: https://github.com/rust-lang/regex/issues/48 +[[test]] +name = "invalid-regex-no-crash-300" +regex = '(?)' +haystack = "" +matches = [] +compiles = false + +# See: https://github.com/rust-lang/regex/issues/48 +[[test]] +name = "invalid-regex-no-crash-400" +regex = '*' +haystack = "" +matches = [] +compiles = false + +# See: https://github.com/rust-lang/regex/issues/75 +[[test]] +name = "unsorted-binary-search-100" +regex = '(?i-u)[a_]+' +haystack = "A_" +matches = [[0, 2]] + +# See: https://github.com/rust-lang/regex/issues/75 +[[test]] +name = "unsorted-binary-search-200" +regex = '(?i-u)[A_]+' +haystack = "a_" +matches = [[0, 2]] + +# See: https://github.com/rust-lang/regex/issues/76 +[[test]] +name = "unicode-case-lower-nocase-flag" +regex = '(?i)\p{Ll}+' +haystack = "ΛΘΓΔα" +matches = [[0, 10]] + +# See: https://github.com/rust-lang/regex/issues/99 +[[test]] +name = "negated-char-class-100" +regex = '(?i)[^x]' +haystack = "x" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/99 +[[test]] +name = "negated-char-class-200" +regex = '(?i)[^x]' +haystack = "X" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/101 +[[test]] +name = "ascii-word-underscore" +regex = '[[:word:]]' +haystack = "_" +matches = [[0, 1]] + +# See: https://github.com/rust-lang/regex/issues/129 +[[test]] +name = "captures-repeat" +regex = '([a-f]){2}(?P<foo>[x-z])' +haystack = "abx" +matches = [ + [[0, 3], [1, 2], [2, 3]], +] + +# See: https://github.com/rust-lang/regex/issues/153 +[[test]] +name = "alt-in-alt-100" +regex = 'ab?|$' +haystack = "az" +matches = [[0, 1], [2, 2]] + +# See: https://github.com/rust-lang/regex/issues/153 +[[test]] +name = "alt-in-alt-200" +regex = '^(?:.*?)(?:\n|\r\n?|$)' +haystack = "ab\rcd" +matches = [[0, 3]] + +# See: https://github.com/rust-lang/regex/issues/169 +[[test]] +name = "leftmost-first-prefix" +regex = 'z*azb' +haystack = "azb" +matches = [[0, 3]] + +# See: https://github.com/rust-lang/regex/issues/191 +[[test]] +name = "many-alternates" +regex = '1|2|3|4|5|6|7|8|9|10|int' +haystack = "int" +matches = [[0, 3]] + +# See: https://github.com/rust-lang/regex/issues/204 +[[test]] +name = "word-boundary-alone-100" +regex = '\b' +haystack = "Should this (work?)" +matches = [[0, 0], [6, 6], [7, 7], [11, 11], [13, 13], [17, 17]] + +# See: https://github.com/rust-lang/regex/issues/204 +[[test]] +name = "word-boundary-alone-200" +regex = '\b' +haystack = "a b c" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +# See: https://github.com/rust-lang/regex/issues/264 +[[test]] +name = "word-boundary-ascii-no-capture" +regex = '\B' +haystack = "\U00028F3E" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] +unicode = false +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/264 +[[test]] +name = "word-boundary-ascii-capture" +regex = '(?:\B)' +haystack = "\U00028F3E" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] +unicode = false +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/268 +[[test]] +name = "partial-anchor" +regex = '^a|b' +haystack = "ba" +matches = [[0, 1]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[test]] +name = "endl-or-word-boundary" +regex = '(?m:$)|(?-u:\b)' +haystack = "\U0006084E" +matches = [[4, 4]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[test]] +name = "zero-or-end" +regex = '(?i-u:\x00)|$' +haystack = "\U000E682F" +matches = [[4, 4]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[test]] +name = "y-or-endl" +regex = '(?i-u:y)|(?m:$)' +haystack = "\U000B4331" +matches = [[4, 4]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[test]] +name = "word-boundary-start-x" +regex = '(?u:\b)^(?-u:X)' +haystack = "X" +matches = [[0, 1]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[test]] +name = "word-boundary-ascii-start-x" +regex = '(?-u:\b)^(?-u:X)' +haystack = "X" +matches = [[0, 1]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[test]] +name = "end-not-word-boundary" +regex = '$\B' +haystack = "\U0005C124\U000B576C" +matches = [[8, 8]] +unicode = false +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/280 +[[test]] +name = "partial-anchor-alternate-begin" +regex = '^a|z' +haystack = "yyyyya" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/280 +[[test]] +name = "partial-anchor-alternate-end" +regex = 'a$|z' +haystack = "ayyyyy" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/289 +[[test]] +name = "lits-unambiguous-100" +regex = '(?:ABC|CDA|BC)X' +haystack = "CDAX" +matches = [[0, 4]] + +# See: https://github.com/rust-lang/regex/issues/291 +[[test]] +name = "lits-unambiguous-200" +regex = '((IMG|CAM|MG|MB2)_|(DSCN|CIMG))(?P<n>[0-9]+)$' +haystack = "CIMG2341" +matches = [ + [[0, 8], [0, 4], [], [0, 4], [4, 8]], +] + +# See: https://github.com/rust-lang/regex/issues/303 +# +# 2022-09-19: This has now been "properly" fixed in that empty character +# classes are fully supported as something that can never match. This test +# used to be marked as 'compiles = false', but now it works. +[[test]] +name = "negated-full-byte-range" +regex = '[^\x00-\xFF]' +haystack = "" +matches = [] +compiles = true +unicode = false +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/321 +[[test]] +name = "strange-anchor-non-complete-prefix" +regex = 'a^{2}' +haystack = "" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/321 +[[test]] +name = "strange-anchor-non-complete-suffix" +regex = '${2}a' +haystack = "" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/334 +# See: https://github.com/rust-lang/regex/issues/557 +[[test]] +name = "captures-after-dfa-premature-end-100" +regex = 'a(b*(X|$))?' +haystack = "abcbX" +matches = [ + [[0, 1], [], []], +] + +# See: https://github.com/rust-lang/regex/issues/334 +# See: https://github.com/rust-lang/regex/issues/557 +[[test]] +name = "captures-after-dfa-premature-end-200" +regex = 'a(bc*(X|$))?' +haystack = "abcbX" +matches = [ + [[0, 1], [], []], +] + +# See: https://github.com/rust-lang/regex/issues/334 +# See: https://github.com/rust-lang/regex/issues/557 +[[test]] +name = "captures-after-dfa-premature-end-300" +regex = '(aa$)?' +haystack = "aaz" +matches = [ + [[0, 0], []], + [[1, 1], []], + [[2, 2], []], + [[3, 3], []], +] + +# Plucked from "Why aren’t regular expressions a lingua franca? an empirical +# study on the re-use and portability of regular expressions", The ACM Joint +# European Software Engineering Conference and Symposium on the Foundations of +# Software Engineering (ESEC/FSE), 2019. +# +# Link: https://dl.acm.org/doi/pdf/10.1145/3338906.3338909 +[[test]] +name = "captures-after-dfa-premature-end-400" +regex = '(a)\d*\.?\d+\b' +haystack = "a0.0c" +matches = [ + [[0, 2], [0, 1]], +] + +# See: https://github.com/rust-lang/regex/issues/437 +[[test]] +name = "literal-panic" +regex = 'typename type\-parameter\-[0-9]+\-[0-9]+::.+' +haystack = "test" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/527 +[[test]] +name = "empty-flag-expr" +regex = '(?:(?:(?x)))' +haystack = "" +matches = [[0, 0]] + +# See: https://github.com/rust-lang/regex/issues/533 +#[[tests]] +#name = "blank-matches-nothing-between-space-and-tab" +#regex = '[[:blank:]]' +#input = '\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F' +#match = false +#unescape = true + +# See: https://github.com/rust-lang/regex/issues/533 +#[[tests]] +#name = "blank-matches-nothing-between-space-and-tab-inverted" +#regex = '^[[:^blank:]]+$' +#input = '\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F' +#match = true +#unescape = true + +# See: https://github.com/rust-lang/regex/issues/555 +[[test]] +name = "invalid-repetition" +regex = '(?m){1,1}' +haystack = "" +matches = [] +compiles = false + +# See: https://github.com/rust-lang/regex/issues/640 +[[test]] +name = "flags-are-unset" +regex = '(?:(?i)foo)|Bar' +haystack = "foo Foo bar Bar" +matches = [[0, 3], [4, 7], [12, 15]] + +# Note that 'Ј' is not 'j', but cyrillic Je +# https://en.wikipedia.org/wiki/Je_(Cyrillic) +# +# See: https://github.com/rust-lang/regex/issues/659 +[[test]] +name = "empty-group-with-unicode" +regex = '(?:)Ј01' +haystack = 'zЈ01' +matches = [[1, 5]] + +# See: https://github.com/rust-lang/regex/issues/579 +[[test]] +name = "word-boundary-weird" +regex = '\b..\b' +haystack = "I have 12, he has 2!" +matches = [[0, 2], [7, 9], [9, 11], [11, 13], [17, 19]] + +# See: https://github.com/rust-lang/regex/issues/579 +[[test]] +name = "word-boundary-weird-ascii" +regex = '\b..\b' +haystack = "I have 12, he has 2!" +matches = [[0, 2], [7, 9], [9, 11], [11, 13], [17, 19]] +unicode = false +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/579 +[[test]] +name = "word-boundary-weird-minimal-ascii" +regex = '\b..\b' +haystack = "az,,b" +matches = [[0, 2], [2, 4]] +unicode = false +utf8 = false + +# See: https://github.com/BurntSushi/ripgrep/issues/1203 +[[test]] +name = "reverse-suffix-100" +regex = '[0-4][0-4][0-4]000' +haystack = "153.230000" +matches = [[4, 10]] + +# See: https://github.com/BurntSushi/ripgrep/issues/1203 +[[test]] +name = "reverse-suffix-200" +regex = '[0-9][0-9][0-9]000' +haystack = "153.230000\n" +matches = [[4, 10]] + +# This is a tricky case for the reverse suffix optimization, because it +# finds the 'foobar' match but the reverse scan must fail to find a match by +# correctly dealing with the word boundary following the 'foobar' literal when +# computing the start state. +# +# This test exists because I tried to break the following assumption that +# is currently in the code: that if a suffix is found and the reverse scan +# succeeds, then it's guaranteed that there is an overall match. Namely, the +# 'is_match' routine does *not* do another forward scan in this case because of +# this assumption. +[[test]] +name = "reverse-suffix-300" +regex = '\w+foobar\b' +haystack = "xyzfoobarZ" +matches = [] +unicode = false +utf8 = false + +# See: https://github.com/BurntSushi/ripgrep/issues/1247 +[[test]] +name = "stops" +regex = '\bs(?:[ab])' +haystack = 's\xE4' +matches = [] +unescape = true +utf8 = false + +# See: https://github.com/BurntSushi/ripgrep/issues/1247 +[[test]] +name = "stops-ascii" +regex = '(?-u:\b)s(?:[ab])' +haystack = 's\xE4' +matches = [] +unescape = true +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/850 +[[test]] +name = "adjacent-line-boundary-100" +regex = '(?m)^(?:[^ ]+?)$' +haystack = "line1\nline2" +matches = [[0, 5], [6, 11]] + +# Continued. +[[test]] +name = "adjacent-line-boundary-200" +regex = '(?m)^(?:[^ ]+?)$' +haystack = "A\nB" +matches = [[0, 1], [2, 3]] + +# There is no issue for this bug. +[[test]] +name = "anchored-prefix-100" +regex = '^a[[:^space:]]' +haystack = "a " +matches = [] + +# There is no issue for this bug. +[[test]] +name = "anchored-prefix-200" +regex = '^a[[:^space:]]' +haystack = "foo boo a" +matches = [] + +# There is no issue for this bug. +[[test]] +name = "anchored-prefix-300" +regex = '^-[a-z]' +haystack = "r-f" +matches = [] + +# Tests that a possible Aho-Corasick optimization works correctly. It only +# kicks in when we have a lot of literals. By "works correctly," we mean that +# leftmost-first match semantics are properly respected. That is, samwise +# should match, not sam. +# +# There is no issue for this bug. +[[test]] +name = "aho-corasick-100" +regex = 'samwise|sam|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z' +haystack = "samwise" +matches = [[0, 7]] + +# See: https://github.com/rust-lang/regex/issues/921 +[[test]] +name = "interior-anchor-capture" +regex = '(a$)b$' +haystack = 'ab' +matches = [] + +# I found this bug in the course of adding some of the regexes that Ruff uses +# to rebar. It turns out that the lazy DFA was finding a match that was being +# rejected by the one-pass DFA. Yikes. I then minimized the regex and haystack. +# +# Source: https://github.com/charliermarsh/ruff/blob/a919041ddaa64cdf6f216f90dd0480dab69fd3ba/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs#L52 +[[test]] +name = "ruff-whitespace-around-keywords" +regex = '^(a|ab)$' +haystack = "ab" +anchored = true +unicode = false +utf8 = true +matches = [[[0, 2], [0, 2]]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-0" +regex = '(?:(?-u:\b)|(?u:h))+' +haystack = "h" +unicode = true +utf8 = false +matches = [[0, 0], [1, 1]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-1" +regex = '(?u:\B)' +haystack = "鋸" +unicode = true +utf8 = false +matches = [] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-2" +regex = '(?:(?u:\b)|(?s-u:.))+' +haystack = "oB" +unicode = true +utf8 = false +matches = [[0, 0], [1, 2]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-3" +regex = '(?:(?-u:\B)|(?su:.))+' +haystack = "\U000FEF80" +unicode = true +utf8 = false +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-3-utf8" +regex = '(?:(?-u:\B)|(?su:.))+' +haystack = "\U000FEF80" +unicode = true +utf8 = true +matches = [[0, 0], [4, 4]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-4" +regex = '(?m:$)(?m:^)(?su:.)' +haystack = "\n‣" +unicode = true +utf8 = false +matches = [[0, 1]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-5" +regex = '(?m:$)^(?m:^)' +haystack = "\n" +unicode = true +utf8 = false +matches = [[0, 0]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-6" +regex = '(?P<kp>(?iu:do)(?m:$))*' +haystack = "dodo" +unicode = true +utf8 = false +matches = [ + [[0, 0], []], + [[1, 1], []], + [[2, 4], [2, 4]], +] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-7" +regex = '(?u:\B)' +haystack = "䡁" +unicode = true +utf8 = false +matches = [] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-8" +regex = '(?:(?-u:\b)|(?u:[\u{0}-W]))+' +haystack = "0" +unicode = true +utf8 = false +matches = [[0, 0], [1, 1]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-9" +regex = '((?m:$)(?-u:\B)(?s-u:.)(?-u:\B)$)' +haystack = "\n\n" +unicode = true +utf8 = false +matches = [ + [[1, 2], [1, 2]], +] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-10" +regex = '(?m:$)(?m:$)^(?su:.)' +haystack = "\n\u0081¨\u200a" +unicode = true +utf8 = false +matches = [[0, 1]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-11" +regex = '(?-u:\B)(?m:^)' +haystack = "0\n" +unicode = true +utf8 = false +matches = [[2, 2]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-12" +regex = '(?:(?u:\b)|(?-u:.))+' +haystack = "0" +unicode = true +utf8 = false +matches = [[0, 0], [1, 1]] + +# From: https://github.com/rust-lang/regex/issues/969 +[[test]] +name = "i969" +regex = 'c.*d\z' +haystack = "ababcd" +bounds = [4, 6] +search-kind = "earliest" +matches = [[4, 6]] + +# I found this during the regex-automata migration. This is the fowler basic +# 154 test, but without anchored = true and without a match limit. +# +# This test caught a subtle bug in the hybrid reverse DFA search, where it +# would skip over the termination condition if it entered a start state. This +# was a double bug. Firstly, the reverse DFA shouldn't have had start states +# specialized in the first place, and thus it shouldn't have possible to detect +# that the DFA had entered a start state. The second bug was that the start +# state handling was incorrect by jumping over the termination condition. +[[test]] +name = "fowler-basic154-unanchored" +regex = '''a([bc]*)c*''' +haystack = '''abc''' +matches = [[[0, 3], [1, 3]]] + +# From: https://github.com/rust-lang/regex/issues/981 +# +# This was never really a problem in the new architecture because the +# regex-automata engines are far more principled about how they deal with +# look-around. (This was one of the many reasons I wanted to re-work the +# original regex crate engines.) +[[test]] +name = "word-boundary-interact-poorly-with-literal-optimizations" +regex = '(?i:(?:\b|_)win(?:32|64|dows)?(?:\b|_))' +haystack = 'ubi-Darwin-x86_64.tar.gz' +matches = [] + +# This was found during fuzz testing of regex. It provoked a panic in the meta +# engine as a result of the reverse suffix optimization. Namely, it hit a case +# where a suffix match was found, a corresponding reverse match was found, but +# the forward search turned up no match. The forward search should always match +# if the suffix and reverse search match. +# +# This in turn uncovered an inconsistency between the PikeVM and the DFA (lazy +# and fully compiled) engines. It was caused by a mishandling of the collection +# of NFA state IDs in the generic determinization code (which is why both types +# of DFA were impacted). Namely, when a fail state was encountered (that's the +# `[^\s\S]` in the pattern below), then it would just stop collecting states. +# But that's not correct since a later state could lead to a match. +[[test]] +name = "impossible-branch" +regex = '.*[^\s\S]A|B' +haystack = "B" +matches = [[0, 1]] + +# This was found during fuzz testing in regex-lite. The regex crate never +# suffered from this bug, but it causes regex-lite to incorrectly compile +# captures. +[[test]] +name = "captures-wrong-order" +regex = '(a){0}(a)' +haystack = 'a' +matches = [[[0, 1], [], [0, 1]]] + +# This tests a bug in how quit states are handled in the DFA. At some point +# during development, the DFAs were tweaked slightly such that if they hit +# a quit state (which means, they hit a byte that the caller configured should +# stop the search), then it might not return an error necessarily. Namely, if a +# match had already been found, then it would be returned instead of an error. +# +# But this is actually wrong! Why? Because even though a match had been found, +# it wouldn't be fully correct to return it once a quit state has been seen +# because you can't determine whether the match offset returned is the correct +# greedy/leftmost-first match. Since you can't complete the search as requested +# by the caller, the DFA should just stop and return an error. +# +# Interestingly, this does seem to produce an unavoidable difference between +# 'try_is_match().unwrap()' and 'try_find().unwrap().is_some()' for the DFAs. +# The former will stop immediately once a match is known to occur and return +# 'Ok(true)', where as the latter could find the match but quit with an +# 'Err(..)' first. +# +# Thankfully, I believe this inconsistency between 'is_match()' and 'find()' +# cannot be observed in the higher level meta regex API because it specifically +# will try another engine that won't fail in the case of a DFA failing. +# +# This regression happened in the regex crate rewrite, but before anything got +# released. +[[test]] +name = "negated-unicode-word-boundary-dfa-fail" +regex = '\B.*' +haystack = "!\u02D7" +matches = [[0, 3]] + +# This failure was found in the *old* regex crate (prior to regex 1.9), but +# I didn't investigate why. My best guess is that it's a literal optimization +# bug. It didn't occur in the rewrite. +[[test]] +name = "missed-match" +regex = 'e..+e.ee>' +haystack = 'Zeee.eZZZZZZZZeee>eeeeeee>' +matches = [[1, 26]] + +# This test came from the 'ignore' crate and tripped a bug in how accelerated +# DFA states were handled in an overlapping search. +[[test]] +name = "regex-to-glob" +regex = ['(?-u)^path1/[^/]*$'] +haystack = "path1/foo" +matches = [[0, 9]] +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# See: https://github.com/rust-lang/regex/issues/1060 +[[test]] +name = "reverse-inner-plus-shorter-than-expected" +regex = '(?:(\d+)[:.])?(\d{1,2})[:.](\d{2})' +haystack = '102:12:39' +matches = [[[0, 9], [0, 3], [4, 6], [7, 9]]] + +# Like reverse-inner-plus-shorter-than-expected, but using a far simpler regex +# to demonstrate the extent of the rot. Sigh. +# +# See: https://github.com/rust-lang/regex/issues/1060 +[[test]] +name = "reverse-inner-short" +regex = '(?:([0-9][0-9][0-9]):)?([0-9][0-9]):([0-9][0-9])' +haystack = '102:12:39' +matches = [[[0, 9], [0, 3], [4, 6], [7, 9]]] + +# This regression test was found via the RegexSet APIs. It triggered a +# particular code path where a regex was compiled with 'All' match semantics +# (to support overlapping search), but got funneled down into a standard +# leftmost search when calling 'is_match'. This is fine on its own, but the +# leftmost search will use a prefilter and that's where this went awry. +# +# Namely, since 'All' semantics were used, the aho-corasick prefilter was +# incorrectly compiled with 'Standard' semantics. This was wrong because +# 'Standard' immediately attempts to report a match at every position, even if +# that would mean reporting a match past the leftmost match before reporting +# the leftmost match. This breaks the prefilter contract of never having false +# negatives and leads overall to the engine not finding a match. +# +# See: https://github.com/rust-lang/regex/issues/1070 +[[test]] +name = "prefilter-with-aho-corasick-standard-semantics" +regex = '(?m)^ *v [0-9]' +haystack = 'v 0' +matches = [ + { id = 0, spans = [[0, 3]] }, +] +match-kind = "all" +search-kind = "overlapping" +unicode = true +utf8 = true diff --git a/third_party/rust/regex/testdata/set.toml b/third_party/rust/regex/testdata/set.toml new file mode 100644 index 0000000000..049e8a89d1 --- /dev/null +++ b/third_party/rust/regex/testdata/set.toml @@ -0,0 +1,641 @@ +# Basic multi-regex tests. + +[[test]] +name = "basic10" +regex = ["a", "a"] +haystack = "a" +matches = [ + { id = 0, span = [0, 1] }, + { id = 1, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic10-leftmost-first" +regex = ["a", "a"] +haystack = "a" +matches = [ + { id = 0, span = [0, 1] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "basic20" +regex = ["a", "a"] +haystack = "ba" +matches = [ + { id = 0, span = [1, 2] }, + { id = 1, span = [1, 2] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic30" +regex = ["a", "b"] +haystack = "a" +matches = [ + { id = 0, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic40" +regex = ["a", "b"] +haystack = "b" +matches = [ + { id = 1, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic50" +regex = ["a|b", "b|a"] +haystack = "b" +matches = [ + { id = 0, span = [0, 1] }, + { id = 1, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic60" +regex = ["foo", "oo"] +haystack = "foo" +matches = [ + { id = 0, span = [0, 3] }, + { id = 1, span = [1, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic60-leftmost-first" +regex = ["foo", "oo"] +haystack = "foo" +matches = [ + { id = 0, span = [0, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "basic61" +regex = ["oo", "foo"] +haystack = "foo" +matches = [ + { id = 1, span = [0, 3] }, + { id = 0, span = [1, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic61-leftmost-first" +regex = ["oo", "foo"] +haystack = "foo" +matches = [ + { id = 1, span = [0, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "basic70" +regex = ["abcd", "bcd", "cd", "d"] +haystack = "abcd" +matches = [ + { id = 0, span = [0, 4] }, + { id = 1, span = [1, 4] }, + { id = 2, span = [2, 4] }, + { id = 3, span = [3, 4] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic71" +regex = ["bcd", "cd", "d", "abcd"] +haystack = "abcd" +matches = [ + { id = 3, span = [0, 4] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "basic80" +regex = ["^foo", "bar$"] +haystack = "foo" +matches = [ + { id = 0, span = [0, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic81" +regex = ["^foo", "bar$"] +haystack = "foo bar" +matches = [ + { id = 0, span = [0, 3] }, + { id = 1, span = [4, 7] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic82" +regex = ["^foo", "bar$"] +haystack = "bar" +matches = [ + { id = 1, span = [0, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic90" +regex = ["[a-z]+$", "foo"] +haystack = "01234 foo" +matches = [ + { id = 0, span = [8, 9] }, + { id = 0, span = [7, 9] }, + { id = 0, span = [6, 9] }, + { id = 1, span = [6, 9] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic91" +regex = ["[a-z]+$", "foo"] +haystack = "foo 01234" +matches = [ + { id = 1, span = [0, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic100" +regex = [".*?", "a"] +haystack = "zzza" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [0, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [1, 2] }, + { id = 0, span = [0, 2] }, + { id = 0, span = [3, 3] }, + { id = 0, span = [2, 3] }, + { id = 0, span = [1, 3] }, + { id = 0, span = [0, 3] }, + { id = 0, span = [4, 4] }, + { id = 0, span = [3, 4] }, + { id = 0, span = [2, 4] }, + { id = 0, span = [1, 4] }, + { id = 0, span = [0, 4] }, + { id = 1, span = [3, 4] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic101" +regex = [".*", "a"] +haystack = "zzza" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [0, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [1, 2] }, + { id = 0, span = [0, 2] }, + { id = 0, span = [3, 3] }, + { id = 0, span = [2, 3] }, + { id = 0, span = [1, 3] }, + { id = 0, span = [0, 3] }, + { id = 0, span = [4, 4] }, + { id = 0, span = [3, 4] }, + { id = 0, span = [2, 4] }, + { id = 0, span = [1, 4] }, + { id = 0, span = [0, 4] }, + { id = 1, span = [3, 4] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic102" +regex = [".*", "a"] +haystack = "zzz" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [0, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [1, 2] }, + { id = 0, span = [0, 2] }, + { id = 0, span = [3, 3] }, + { id = 0, span = [2, 3] }, + { id = 0, span = [1, 3] }, + { id = 0, span = [0, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic110" +regex = ['\ba\b'] +haystack = "hello a bye" +matches = [ + { id = 0, span = [6, 7] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic111" +regex = ['\ba\b', '\be\b'] +haystack = "hello a bye e" +matches = [ + { id = 0, span = [6, 7] }, + { id = 1, span = [12, 13] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic120" +regex = ["a"] +haystack = "a" +matches = [ + { id = 0, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic121" +regex = [".*a"] +haystack = "a" +matches = [ + { id = 0, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic122" +regex = [".*a", "β"] +haystack = "β" +matches = [ + { id = 1, span = [0, 2] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic130" +regex = ["ab", "b"] +haystack = "ba" +matches = [ + { id = 1, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +# These test cases where one of the regexes matches the empty string. + +[[test]] +name = "empty10" +regex = ["", "a"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 1, span = [0, 1] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty10-leftmost-first" +regex = ["", "a"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "empty11" +regex = ["a", ""] +haystack = "abc" +matches = [ + { id = 1, span = [0, 0] }, + { id = 0, span = [0, 1] }, + { id = 1, span = [1, 1] }, + { id = 1, span = [2, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty11-leftmost-first" +regex = ["a", ""] +haystack = "abc" +matches = [ + { id = 0, span = [0, 1] }, + { id = 1, span = [2, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "empty20" +regex = ["", "b"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 1, span = [1, 2] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty20-leftmost-first" +regex = ["", "b"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "empty21" +regex = ["b", ""] +haystack = "abc" +matches = [ + { id = 1, span = [0, 0] }, + { id = 1, span = [1, 1] }, + { id = 0, span = [1, 2] }, + { id = 1, span = [2, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty21-leftmost-first" +regex = ["b", ""] +haystack = "abc" +matches = [ + { id = 1, span = [0, 0] }, + { id = 0, span = [1, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "empty22" +regex = ["(?:)", "b"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 1, span = [1, 2] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty23" +regex = ["b", "(?:)"] +haystack = "abc" +matches = [ + { id = 1, span = [0, 0] }, + { id = 1, span = [1, 1] }, + { id = 0, span = [1, 2] }, + { id = 1, span = [2, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty30" +regex = ["", "z"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty30-leftmost-first" +regex = ["", "z"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "empty31" +regex = ["z", ""] +haystack = "abc" +matches = [ + { id = 1, span = [0, 0] }, + { id = 1, span = [1, 1] }, + { id = 1, span = [2, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty31-leftmost-first" +regex = ["z", ""] +haystack = "abc" +matches = [ + { id = 1, span = [0, 0] }, + { id = 1, span = [1, 1] }, + { id = 1, span = [2, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "empty40" +regex = ["c(?:)", "b"] +haystack = "abc" +matches = [ + { id = 1, span = [1, 2] }, + { id = 0, span = [2, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty40-leftmost-first" +regex = ["c(?:)", "b"] +haystack = "abc" +matches = [ + { id = 1, span = [1, 2] }, + { id = 0, span = [2, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +# These test cases where there are no matches. + +[[test]] +name = "nomatch10" +regex = ["a", "a"] +haystack = "b" +matches = [] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "nomatch20" +regex = ["^foo", "bar$"] +haystack = "bar foo" +matches = [] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "nomatch30" +regex = [] +haystack = "a" +matches = [] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "nomatch40" +regex = ["^rooted$", '\.log$'] +haystack = "notrooted" +matches = [] +match-kind = "all" +search-kind = "overlapping" + +# These test multi-regex searches with capture groups. +# +# NOTE: I wrote these tests in the course of developing a first class API for +# overlapping capturing group matches, but ultimately removed that API because +# the semantics for overlapping matches aren't totally clear. However, I've +# left the tests because I believe the semantics for these patterns are clear +# and because we can still test our "which patterns matched" APIs with them. + +[[test]] +name = "caps-010" +regex = ['^(\w+) (\w+)$', '^(\S+) (\S+)$'] +haystack = "Bruce Springsteen" +matches = [ + { id = 0, spans = [[0, 17], [0, 5], [6, 17]] }, + { id = 1, spans = [[0, 17], [0, 5], [6, 17]] }, +] +match-kind = "all" +search-kind = "overlapping" +unicode = false +utf8 = false + +[[test]] +name = "caps-020" +regex = ['^(\w+) (\w+)$', '^[A-Z](\S+) [A-Z](\S+)$'] +haystack = "Bruce Springsteen" +matches = [ + { id = 0, spans = [[0, 17], [0, 5], [6, 17]] }, + { id = 1, spans = [[0, 17], [1, 5], [7, 17]] }, +] +match-kind = "all" +search-kind = "overlapping" +unicode = false +utf8 = false + +[[test]] +name = "caps-030" +regex = ['^(\w+) (\w+)$', '^([A-Z])(\S+) ([A-Z])(\S+)$'] +haystack = "Bruce Springsteen" +matches = [ + { id = 0, spans = [[0, 17], [0, 5], [6, 17]] }, + { id = 1, spans = [[0, 17], [0, 1], [1, 5], [6, 7], [7, 17]] }, +] +match-kind = "all" +search-kind = "overlapping" +unicode = false +utf8 = false + +[[test]] +name = "caps-110" +regex = ['(\w+) (\w+)', '(\S+) (\S+)'] +haystack = "Bruce Springsteen" +matches = [ + { id = 0, spans = [[0, 17], [0, 5], [6, 17]] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" +unicode = false +utf8 = false + +[[test]] +name = "caps-120" +regex = ['(\w+) (\w+)', '(\S+) (\S+)'] +haystack = "&ruce $pringsteen" +matches = [ + { id = 1, spans = [[0, 17], [0, 5], [6, 17]] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" +unicode = false +utf8 = false + +[[test]] +name = "caps-121" +regex = ['(\w+) (\w+)', '(\S+) (\S+)'] +haystack = "&ruce $pringsteen Foo Bar" +matches = [ + { id = 1, spans = [[0, 17], [0, 5], [6, 17]] }, + { id = 0, spans = [[18, 25], [18, 21], [22, 25]] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" +unicode = false +utf8 = false diff --git a/third_party/rust/regex/testdata/substring.toml b/third_party/rust/regex/testdata/substring.toml new file mode 100644 index 0000000000..69595ce851 --- /dev/null +++ b/third_party/rust/regex/testdata/substring.toml @@ -0,0 +1,36 @@ +# These tests check that regex engines perform as expected when the search is +# instructed to only search a substring of a haystack instead of the entire +# haystack. This tends to exercise interesting edge cases that are otherwise +# difficult to provoke. (But not necessarily impossible. Regex search iterators +# for example, make use of the "search just a substring" APIs by changing the +# starting position of a search to the end position of the previous match.) + +[[test]] +name = "unicode-word-start" +regex = '\b[0-9]+\b' +haystack = "β123" +bounds = { start = 2, end = 5 } +matches = [] + +[[test]] +name = "unicode-word-end" +regex = '\b[0-9]+\b' +haystack = "123β" +bounds = { start = 0, end = 3 } +matches = [] + +[[test]] +name = "ascii-word-start" +regex = '\b[0-9]+\b' +haystack = "β123" +bounds = { start = 2, end = 5 } +matches = [[2, 5]] +unicode = false + +[[test]] +name = "ascii-word-end" +regex = '\b[0-9]+\b' +haystack = "123β" +bounds = { start = 0, end = 3 } +matches = [[0, 3]] +unicode = false diff --git a/third_party/rust/regex/testdata/unicode.toml b/third_party/rust/regex/testdata/unicode.toml new file mode 100644 index 0000000000..f4ac76bae6 --- /dev/null +++ b/third_party/rust/regex/testdata/unicode.toml @@ -0,0 +1,517 @@ +# Basic Unicode literal support. +[[test]] +name = "literal1" +regex = '☃' +haystack = "☃" +matches = [[0, 3]] + +[[test]] +name = "literal2" +regex = '☃+' +haystack = "☃" +matches = [[0, 3]] + +[[test]] +name = "literal3" +regex = '☃+' +haystack = "☃" +matches = [[0, 3]] +case-insensitive = true + +[[test]] +name = "literal4" +regex = 'Δ' +haystack = "δ" +matches = [[0, 2]] +case-insensitive = true + +# Unicode word boundaries. +[[test]] +name = "wb-100" +regex = '\d\b' +haystack = "6δ" +matches = [] + +[[test]] +name = "wb-200" +regex = '\d\b' +haystack = "6 " +matches = [[0, 1]] + +[[test]] +name = "wb-300" +regex = '\d\B' +haystack = "6δ" +matches = [[0, 1]] + +[[test]] +name = "wb-400" +regex = '\d\B' +haystack = "6 " +matches = [] + +# Unicode character class support. +[[test]] +name = "class1" +regex = '[☃Ⅰ]+' +haystack = "☃" +matches = [[0, 3]] + +[[test]] +name = "class2" +regex = '\pN' +haystack = "Ⅰ" +matches = [[0, 3]] + +[[test]] +name = "class3" +regex = '\pN+' +haystack = "Ⅰ1Ⅱ2" +matches = [[0, 8]] + +[[test]] +name = "class4" +regex = '\PN+' +haystack = "abⅠ" +matches = [[0, 2]] + +[[test]] +name = "class5" +regex = '[\PN]+' +haystack = "abⅠ" +matches = [[0, 2]] + +[[test]] +name = "class6" +regex = '[^\PN]+' +haystack = "abⅠ" +matches = [[2, 5]] + +[[test]] +name = "class7" +regex = '\p{Lu}+' +haystack = "ΛΘΓΔα" +matches = [[0, 8]] + +[[test]] +name = "class8" +regex = '\p{Lu}+' +haystack = "ΛΘΓΔα" +matches = [[0, 10]] +case-insensitive = true + +[[test]] +name = "class9" +regex = '\pL+' +haystack = "ΛΘΓΔα" +matches = [[0, 10]] + +[[test]] +name = "class10" +regex = '\p{Ll}+' +haystack = "ΛΘΓΔα" +matches = [[8, 10]] + +# Unicode aware "Perl" character classes. +[[test]] +name = "perl1" +regex = '\w+' +haystack = "dδd" +matches = [[0, 4]] + +[[test]] +name = "perl2" +regex = '\w+' +haystack = "⥡" +matches = [] + +[[test]] +name = "perl3" +regex = '\W+' +haystack = "⥡" +matches = [[0, 3]] + +[[test]] +name = "perl4" +regex = '\d+' +haystack = "1२३9" +matches = [[0, 8]] + +[[test]] +name = "perl5" +regex = '\d+' +haystack = "Ⅱ" +matches = [] + +[[test]] +name = "perl6" +regex = '\D+' +haystack = "Ⅱ" +matches = [[0, 3]] + +[[test]] +name = "perl7" +regex = '\s+' +haystack = " " +matches = [[0, 3]] + +[[test]] +name = "perl8" +regex = '\s+' +haystack = "☃" +matches = [] + +[[test]] +name = "perl9" +regex = '\S+' +haystack = "☃" +matches = [[0, 3]] + +# Specific tests for Unicode general category classes. +[[test]] +name = "class-gencat1" +regex = '\p{Cased_Letter}' +haystack = "A" +matches = [[0, 3]] + +[[test]] +name = "class-gencat2" +regex = '\p{Close_Punctuation}' +haystack = "❯" +matches = [[0, 3]] + +[[test]] +name = "class-gencat3" +regex = '\p{Connector_Punctuation}' +haystack = "⁀" +matches = [[0, 3]] + +[[test]] +name = "class-gencat4" +regex = '\p{Control}' +haystack = "\u009F" +matches = [[0, 2]] + +[[test]] +name = "class-gencat5" +regex = '\p{Currency_Symbol}' +haystack = "£" +matches = [[0, 3]] + +[[test]] +name = "class-gencat6" +regex = '\p{Dash_Punctuation}' +haystack = "〰" +matches = [[0, 3]] + +[[test]] +name = "class-gencat7" +regex = '\p{Decimal_Number}' +haystack = "𑓙" +matches = [[0, 4]] + +[[test]] +name = "class-gencat8" +regex = '\p{Enclosing_Mark}' +haystack = "\uA672" +matches = [[0, 3]] + +[[test]] +name = "class-gencat9" +regex = '\p{Final_Punctuation}' +haystack = "⸡" +matches = [[0, 3]] + +[[test]] +name = "class-gencat10" +regex = '\p{Format}' +haystack = "\U000E007F" +matches = [[0, 4]] + +[[test]] +name = "class-gencat11" +regex = '\p{Initial_Punctuation}' +haystack = "⸜" +matches = [[0, 3]] + +[[test]] +name = "class-gencat12" +regex = '\p{Letter}' +haystack = "Έ" +matches = [[0, 2]] + +[[test]] +name = "class-gencat13" +regex = '\p{Letter_Number}' +haystack = "ↂ" +matches = [[0, 3]] + +[[test]] +name = "class-gencat14" +regex = '\p{Line_Separator}' +haystack = "\u2028" +matches = [[0, 3]] + +[[test]] +name = "class-gencat15" +regex = '\p{Lowercase_Letter}' +haystack = "ϛ" +matches = [[0, 2]] + +[[test]] +name = "class-gencat16" +regex = '\p{Mark}' +haystack = "\U000E01EF" +matches = [[0, 4]] + +[[test]] +name = "class-gencat17" +regex = '\p{Math}' +haystack = "⋿" +matches = [[0, 3]] + +[[test]] +name = "class-gencat18" +regex = '\p{Modifier_Letter}' +haystack = "𖭃" +matches = [[0, 4]] + +[[test]] +name = "class-gencat19" +regex = '\p{Modifier_Symbol}' +haystack = "🏿" +matches = [[0, 4]] + +[[test]] +name = "class-gencat20" +regex = '\p{Nonspacing_Mark}' +haystack = "\U0001E94A" +matches = [[0, 4]] + +[[test]] +name = "class-gencat21" +regex = '\p{Number}' +haystack = "⓿" +matches = [[0, 3]] + +[[test]] +name = "class-gencat22" +regex = '\p{Open_Punctuation}' +haystack = "⦅" +matches = [[0, 3]] + +[[test]] +name = "class-gencat23" +regex = '\p{Other}' +haystack = "\u0BC9" +matches = [[0, 3]] + +[[test]] +name = "class-gencat24" +regex = '\p{Other_Letter}' +haystack = "ꓷ" +matches = [[0, 3]] + +[[test]] +name = "class-gencat25" +regex = '\p{Other_Number}' +haystack = "㉏" +matches = [[0, 3]] + +[[test]] +name = "class-gencat26" +regex = '\p{Other_Punctuation}' +haystack = "𞥞" +matches = [[0, 4]] + +[[test]] +name = "class-gencat27" +regex = '\p{Other_Symbol}' +haystack = "⅌" +matches = [[0, 3]] + +[[test]] +name = "class-gencat28" +regex = '\p{Paragraph_Separator}' +haystack = "\u2029" +matches = [[0, 3]] + +[[test]] +name = "class-gencat29" +regex = '\p{Private_Use}' +haystack = "\U0010FFFD" +matches = [[0, 4]] + +[[test]] +name = "class-gencat30" +regex = '\p{Punctuation}' +haystack = "𑁍" +matches = [[0, 4]] + +[[test]] +name = "class-gencat31" +regex = '\p{Separator}' +haystack = "\u3000" +matches = [[0, 3]] + +[[test]] +name = "class-gencat32" +regex = '\p{Space_Separator}' +haystack = "\u205F" +matches = [[0, 3]] + +[[test]] +name = "class-gencat33" +regex = '\p{Spacing_Mark}' +haystack = "\U00016F7E" +matches = [[0, 4]] + +[[test]] +name = "class-gencat34" +regex = '\p{Symbol}' +haystack = "⯈" +matches = [[0, 3]] + +[[test]] +name = "class-gencat35" +regex = '\p{Titlecase_Letter}' +haystack = "ῼ" +matches = [[0, 3]] + +[[test]] +name = "class-gencat36" +regex = '\p{Unassigned}' +haystack = "\U0010FFFF" +matches = [[0, 4]] + +[[test]] +name = "class-gencat37" +regex = '\p{Uppercase_Letter}' +haystack = "Ꝋ" +matches = [[0, 3]] + + +# Tests for Unicode emoji properties. +[[test]] +name = "class-emoji1" +regex = '\p{Emoji}' +haystack = "\u23E9" +matches = [[0, 3]] + +[[test]] +name = "class-emoji2" +regex = '\p{emoji}' +haystack = "\U0001F21A" +matches = [[0, 4]] + +[[test]] +name = "class-emoji3" +regex = '\p{extendedpictographic}' +haystack = "\U0001FA6E" +matches = [[0, 4]] + +[[test]] +name = "class-emoji4" +regex = '\p{extendedpictographic}' +haystack = "\U0001FFFD" +matches = [[0, 4]] + + +# Tests for Unicode grapheme cluster properties. +[[test]] +name = "class-gcb1" +regex = '\p{grapheme_cluster_break=prepend}' +haystack = "\U00011D46" +matches = [[0, 4]] + +[[test]] +name = "class-gcb2" +regex = '\p{gcb=regional_indicator}' +haystack = "\U0001F1E6" +matches = [[0, 4]] + +[[test]] +name = "class-gcb3" +regex = '\p{gcb=ri}' +haystack = "\U0001F1E7" +matches = [[0, 4]] + +[[test]] +name = "class-gcb4" +regex = '\p{regionalindicator}' +haystack = "\U0001F1FF" +matches = [[0, 4]] + +[[test]] +name = "class-gcb5" +regex = '\p{gcb=lvt}' +haystack = "\uC989" +matches = [[0, 3]] + +[[test]] +name = "class-gcb6" +regex = '\p{gcb=zwj}' +haystack = "\u200D" +matches = [[0, 3]] + +# Tests for Unicode word boundary properties. +[[test]] +name = "class-word-break1" +regex = '\p{word_break=Hebrew_Letter}' +haystack = "\uFB46" +matches = [[0, 3]] + +[[test]] +name = "class-word-break2" +regex = '\p{wb=hebrewletter}' +haystack = "\uFB46" +matches = [[0, 3]] + +[[test]] +name = "class-word-break3" +regex = '\p{wb=ExtendNumLet}' +haystack = "\uFF3F" +matches = [[0, 3]] + +[[test]] +name = "class-word-break4" +regex = '\p{wb=WSegSpace}' +haystack = "\u3000" +matches = [[0, 3]] + +[[test]] +name = "class-word-break5" +regex = '\p{wb=numeric}' +haystack = "\U0001E950" +matches = [[0, 4]] + +# Tests for Unicode sentence boundary properties. +[[test]] +name = "class-sentence-break1" +regex = '\p{sentence_break=Lower}' +haystack = "\u0469" +matches = [[0, 2]] + +[[test]] +name = "class-sentence-break2" +regex = '\p{sb=lower}' +haystack = "\u0469" +matches = [[0, 2]] + +[[test]] +name = "class-sentence-break3" +regex = '\p{sb=Close}' +haystack = "\uFF60" +matches = [[0, 3]] + +[[test]] +name = "class-sentence-break4" +regex = '\p{sb=Close}' +haystack = "\U0001F677" +matches = [[0, 4]] + +[[test]] +name = "class-sentence-break5" +regex = '\p{sb=SContinue}' +haystack = "\uFF64" +matches = [[0, 3]] diff --git a/third_party/rust/regex/testdata/utf8.toml b/third_party/rust/regex/testdata/utf8.toml new file mode 100644 index 0000000000..39e284b382 --- /dev/null +++ b/third_party/rust/regex/testdata/utf8.toml @@ -0,0 +1,399 @@ +# These test the UTF-8 modes expose by regex-automata. Namely, when utf8 is +# true, then we promise that the haystack is valid UTF-8. (Otherwise behavior +# is unspecified.) This also corresponds to building the regex engine with the +# following two guarantees: +# +# 1) For any non-empty match reported, its span is guaranteed to correspond to +# valid UTF-8. +# 2) All empty or zero-width matches reported must never split a UTF-8 +# encoded codepoint. If the haystack has invalid UTF-8, then this results in +# unspecified behavior. +# +# The (2) is in particular what we focus our testing on since (1) is generally +# guaranteed by regex-syntax's AST-to-HIR translator and is well tested there. +# The thing with (2) is that it can't be described in the HIR, so the regex +# engines have to handle that case. Thus, we test it here. +# +# Note that it is possible to build a regex that has property (1) but not +# (2), and vice versa. This is done by building the HIR with 'utf8=true' but +# building the Thompson NFA with 'utf8=false'. We don't test that here because +# the harness doesn't expose a way to enable or disable UTF-8 mode with that +# granularity. Instead, those combinations are lightly tested via doc examples. +# That's not to say that (1) without (2) is uncommon. Indeed, ripgrep uses it +# because it cannot guarantee that its haystack is valid UTF-8. + +# This tests that an empty regex doesn't split a codepoint. +[[test]] +name = "empty-utf8yes" +regex = '' +haystack = '☃' +matches = [[0, 0], [3, 3]] +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-overlapping" +regex = '' +haystack = '☃' +matches = [[0, 0], [3, 3]] +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# This tests that an empty regex DOES split a codepoint when utf=false. +[[test]] +name = "empty-utf8no" +regex = '' +haystack = '☃' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8no-overlapping" +regex = '' +haystack = '☃' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# This tests that an empty regex doesn't split a codepoint, even if we give +# it bounds entirely within the codepoint. +# +# This is one of the trickier cases and is what motivated the current UTF-8 +# mode design. In particular, at one point, this test failed the 'is_match' +# variant of the test but not 'find'. This is because the 'is_match' code path +# is specifically optimized for "was a match found" rather than "where is the +# match." In the former case, you don't really care about the empty-vs-non-empty +# matches, and thus, the codepoint splitting filtering logic wasn't getting +# applied. (In multiple ways across multiple regex engines.) In this way, you +# can wind up with a situation where 'is_match' says "yes," but 'find' says, +# "I didn't find anything." Which is... not great. +# +# I could have decided to say that providing boundaries that themselves split +# a codepoint would have unspecified behavior. But I couldn't quite convince +# myself that such boundaries were the only way to get an inconsistency between +# 'is_match' and 'find'. +# +# Note that I also tried to come up with a test like this that fails without +# using `bounds`. Specifically, a test where 'is_match' and 'find' disagree. +# But I couldn't do it, and I'm tempted to conclude it is impossible. The +# fundamental problem is that you need to simultaneously produce an empty match +# that splits a codepoint while *not* matching before or after the codepoint. +[[test]] +name = "empty-utf8yes-bounds" +regex = '' +haystack = '𝛃' +bounds = [1, 3] +matches = [] +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-bounds-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 3] +matches = [] +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# This tests that an empty regex splits a codepoint when the bounds are +# entirely within the codepoint. +[[test]] +name = "empty-utf8no-bounds" +regex = '' +haystack = '𝛃' +bounds = [1, 3] +matches = [[1, 1], [2, 2], [3, 3]] +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8no-bounds-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 3] +matches = [[1, 1], [2, 2], [3, 3]] +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# In this test, we anchor the search. Since the start position is also a UTF-8 +# boundary, we get a match. +[[test]] +name = "empty-utf8yes-anchored" +regex = '' +haystack = '𝛃' +matches = [[0, 0]] +anchored = true +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-anchored-overlapping" +regex = '' +haystack = '𝛃' +matches = [[0, 0]] +anchored = true +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# Same as above, except with UTF-8 mode disabled. It almost doesn't change the +# result, except for the fact that since this is an anchored search and we +# always find all matches, the test harness will keep reporting matches until +# none are found. Because it's anchored, matches will be reported so long as +# they are directly adjacent. Since with UTF-8 mode the next anchored search +# after the match at [0, 0] fails, iteration stops (and doesn't find the last +# match at [4, 4]). +[[test]] +name = "empty-utf8no-anchored" +regex = '' +haystack = '𝛃' +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] +anchored = true +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +# +# Note that overlapping anchored searches are a little weird, and it's not +# totally clear what their semantics ought to be. For now, we just test the +# current behavior of our test shim that implements overlapping search. (This +# is one of the reasons why we don't really expose regex-level overlapping +# searches.) +[[test]] +name = "empty-utf8no-anchored-overlapping" +regex = '' +haystack = '𝛃' +matches = [[0, 0]] +anchored = true +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# In this test, we anchor the search, but also set bounds. The bounds start the +# search in the middle of a codepoint, so there should never be a match. +[[test]] +name = "empty-utf8yes-anchored-bounds" +regex = '' +haystack = '𝛃' +matches = [] +bounds = [1, 3] +anchored = true +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-anchored-bounds-overlapping" +regex = '' +haystack = '𝛃' +matches = [] +bounds = [1, 3] +anchored = true +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# Same as above, except with UTF-8 mode disabled. Without UTF-8 mode enabled, +# matching within a codepoint is allowed. And remember, as in the anchored test +# above with UTF-8 mode disabled, iteration will report all adjacent matches. +# The matches at [0, 0] and [4, 4] are not included because of the bounds of +# the search. +[[test]] +name = "empty-utf8no-anchored-bounds" +regex = '' +haystack = '𝛃' +bounds = [1, 3] +matches = [[1, 1], [2, 2], [3, 3]] +anchored = true +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +# +# Note that overlapping anchored searches are a little weird, and it's not +# totally clear what their semantics ought to be. For now, we just test the +# current behavior of our test shim that implements overlapping search. (This +# is one of the reasons why we don't really expose regex-level overlapping +# searches.) +[[test]] +name = "empty-utf8no-anchored-bounds-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 3] +matches = [[1, 1]] +anchored = true +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# This tests that we find the match at the end of the string when the bounds +# exclude the first match. +[[test]] +name = "empty-utf8yes-startbound" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [[4, 4]] +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-startbound-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [[4, 4]] +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# Same as above, except since UTF-8 mode is disabled, we also find the matches +# inbetween that split the codepoint. +[[test]] +name = "empty-utf8no-startbound" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [[1, 1], [2, 2], [3, 3], [4, 4]] +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8no-startbound-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [[1, 1], [2, 2], [3, 3], [4, 4]] +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# This tests that we don't find any matches in an anchored search, even when +# the bounds include a match (at the end). +[[test]] +name = "empty-utf8yes-anchored-startbound" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [] +anchored = true +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-anchored-startbound-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [] +anchored = true +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# Same as above, except since UTF-8 mode is disabled, we also find the matches +# inbetween that split the codepoint. Even though this is an anchored search, +# since the matches are adjacent, we find all of them. +[[test]] +name = "empty-utf8no-anchored-startbound" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [[1, 1], [2, 2], [3, 3], [4, 4]] +anchored = true +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +# +# Note that overlapping anchored searches are a little weird, and it's not +# totally clear what their semantics ought to be. For now, we just test the +# current behavior of our test shim that implements overlapping search. (This +# is one of the reasons why we don't really expose regex-level overlapping +# searches.) +[[test]] +name = "empty-utf8no-anchored-startbound-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [[1, 1]] +anchored = true +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# This tests that we find the match at the end of the haystack in UTF-8 mode +# when our bounds only include the empty string at the end of the haystack. +[[test]] +name = "empty-utf8yes-anchored-endbound" +regex = '' +haystack = '𝛃' +bounds = [4, 4] +matches = [[4, 4]] +anchored = true +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-anchored-endbound-overlapping" +regex = '' +haystack = '𝛃' +bounds = [4, 4] +matches = [[4, 4]] +anchored = true +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# Same as above, but with UTF-8 mode disabled. Results remain the same since +# the only possible match does not split a codepoint. +[[test]] +name = "empty-utf8no-anchored-endbound" +regex = '' +haystack = '𝛃' +bounds = [4, 4] +matches = [[4, 4]] +anchored = true +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8no-anchored-endbound-overlapping" +regex = '' +haystack = '𝛃' +bounds = [4, 4] +matches = [[4, 4]] +anchored = true +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" diff --git a/third_party/rust/regex/testdata/word-boundary.toml b/third_party/rust/regex/testdata/word-boundary.toml new file mode 100644 index 0000000000..1d86fc9bb3 --- /dev/null +++ b/third_party/rust/regex/testdata/word-boundary.toml @@ -0,0 +1,781 @@ +# Some of these are cribbed from RE2's test suite. + +# These test \b. Below are tests for \B. +[[test]] +name = "wb1" +regex = '\b' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "wb2" +regex = '\b' +haystack = "a" +matches = [[0, 0], [1, 1]] +unicode = false + +[[test]] +name = "wb3" +regex = '\b' +haystack = "ab" +matches = [[0, 0], [2, 2]] +unicode = false + +[[test]] +name = "wb4" +regex = '^\b' +haystack = "ab" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "wb5" +regex = '\b$' +haystack = "ab" +matches = [[2, 2]] +unicode = false + +[[test]] +name = "wb6" +regex = '^\b$' +haystack = "ab" +matches = [] +unicode = false + +[[test]] +name = "wb7" +regex = '\bbar\b' +haystack = "nobar bar foo bar" +matches = [[6, 9], [14, 17]] +unicode = false + +[[test]] +name = "wb8" +regex = 'a\b' +haystack = "faoa x" +matches = [[3, 4]] +unicode = false + +[[test]] +name = "wb9" +regex = '\bbar' +haystack = "bar x" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb10" +regex = '\bbar' +haystack = "foo\nbar x" +matches = [[4, 7]] +unicode = false + +[[test]] +name = "wb11" +regex = 'bar\b' +haystack = "foobar" +matches = [[3, 6]] +unicode = false + +[[test]] +name = "wb12" +regex = 'bar\b' +haystack = "foobar\nxxx" +matches = [[3, 6]] +unicode = false + +[[test]] +name = "wb13" +regex = '(?:foo|bar|[A-Z])\b' +haystack = "foo" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb14" +regex = '(?:foo|bar|[A-Z])\b' +haystack = "foo\n" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb15" +regex = '\b(?:foo|bar|[A-Z])' +haystack = "foo" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb16" +regex = '\b(?:foo|bar|[A-Z])\b' +haystack = "X" +matches = [[0, 1]] +unicode = false + +[[test]] +name = "wb17" +regex = '\b(?:foo|bar|[A-Z])\b' +haystack = "XY" +matches = [] +unicode = false + +[[test]] +name = "wb18" +regex = '\b(?:foo|bar|[A-Z])\b' +haystack = "bar" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb19" +regex = '\b(?:foo|bar|[A-Z])\b' +haystack = "foo" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb20" +regex = '\b(?:foo|bar|[A-Z])\b' +haystack = "foo\n" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb21" +regex = '\b(?:foo|bar|[A-Z])\b' +haystack = "ffoo bbar N x" +matches = [[10, 11]] +unicode = false + +[[test]] +name = "wb22" +regex = '\b(?:fo|foo)\b' +haystack = "fo" +matches = [[0, 2]] +unicode = false + +[[test]] +name = "wb23" +regex = '\b(?:fo|foo)\b' +haystack = "foo" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb24" +regex = '\b\b' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "wb25" +regex = '\b\b' +haystack = "a" +matches = [[0, 0], [1, 1]] +unicode = false + +[[test]] +name = "wb26" +regex = '\b$' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "wb27" +regex = '\b$' +haystack = "x" +matches = [[1, 1]] +unicode = false + +[[test]] +name = "wb28" +regex = '\b$' +haystack = "y x" +matches = [[3, 3]] +unicode = false + +[[test]] +name = "wb29" +regex = '(?-u:\b).$' +haystack = "x" +matches = [[0, 1]] + +[[test]] +name = "wb30" +regex = '^\b(?:fo|foo)\b' +haystack = "fo" +matches = [[0, 2]] +unicode = false + +[[test]] +name = "wb31" +regex = '^\b(?:fo|foo)\b' +haystack = "foo" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb32" +regex = '^\b$' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "wb33" +regex = '^\b$' +haystack = "x" +matches = [] +unicode = false + +[[test]] +name = "wb34" +regex = '^(?-u:\b).$' +haystack = "x" +matches = [[0, 1]] + +[[test]] +name = "wb35" +regex = '^(?-u:\b).(?-u:\b)$' +haystack = "x" +matches = [[0, 1]] + +[[test]] +name = "wb36" +regex = '^^^^^\b$$$$$' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "wb37" +regex = '^^^^^(?-u:\b).$$$$$' +haystack = "x" +matches = [[0, 1]] + +[[test]] +name = "wb38" +regex = '^^^^^\b$$$$$' +haystack = "x" +matches = [] +unicode = false + +[[test]] +name = "wb39" +regex = '^^^^^(?-u:\b\b\b).(?-u:\b\b\b)$$$$$' +haystack = "x" +matches = [[0, 1]] + +[[test]] +name = "wb40" +regex = '(?-u:\b).+(?-u:\b)' +haystack = "$$abc$$" +matches = [[2, 5]] + +[[test]] +name = "wb41" +regex = '\b' +haystack = "a b c" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] +unicode = false + +[[test]] +name = "wb42" +regex = '\bfoo\b' +haystack = "zzz foo zzz" +matches = [[4, 7]] +unicode = false + +[[test]] +name = "wb43" +regex = '\b^' +haystack = "ab" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "wb44" +regex = '$\b' +haystack = "ab" +matches = [[2, 2]] +unicode = false + + +# Tests for \B. Note that \B is not allowed if UTF-8 mode is enabled, so we +# have to disable it for most of these tests. This is because \B can match at +# non-UTF-8 boundaries. +[[test]] +name = "nb1" +regex = '\Bfoo\B' +haystack = "n foo xfoox that" +matches = [[7, 10]] +unicode = false +utf8 = false + +[[test]] +name = "nb2" +regex = 'a\B' +haystack = "faoa x" +matches = [[1, 2]] +unicode = false +utf8 = false + +[[test]] +name = "nb3" +regex = '\Bbar' +haystack = "bar x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb4" +regex = '\Bbar' +haystack = "foo\nbar x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb5" +regex = 'bar\B' +haystack = "foobar" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb6" +regex = 'bar\B' +haystack = "foobar\nxxx" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb7" +regex = '(?:foo|bar|[A-Z])\B' +haystack = "foox" +matches = [[0, 3]] +unicode = false +utf8 = false + +[[test]] +name = "nb8" +regex = '(?:foo|bar|[A-Z])\B' +haystack = "foo\n" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb9" +regex = '\B' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb10" +regex = '\B' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb11" +regex = '\B(?:foo|bar|[A-Z])' +haystack = "foo" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb12" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "xXy" +matches = [[1, 2]] +unicode = false +utf8 = false + +[[test]] +name = "nb13" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "XY" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb14" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "XYZ" +matches = [[1, 2]] +unicode = false +utf8 = false + +[[test]] +name = "nb15" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "abara" +matches = [[1, 4]] +unicode = false +utf8 = false + +[[test]] +name = "nb16" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "xfoo_" +matches = [[1, 4]] +unicode = false +utf8 = false + +[[test]] +name = "nb17" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "xfoo\n" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb18" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "foo bar vNX" +matches = [[9, 10]] +unicode = false +utf8 = false + +[[test]] +name = "nb19" +regex = '\B(?:fo|foo)\B' +haystack = "xfoo" +matches = [[1, 3]] +unicode = false +utf8 = false + +[[test]] +name = "nb20" +regex = '\B(?:foo|fo)\B' +haystack = "xfooo" +matches = [[1, 4]] +unicode = false +utf8 = false + +[[test]] +name = "nb21" +regex = '\B\B' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb22" +regex = '\B\B' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb23" +regex = '\B$' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb24" +regex = '\B$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb25" +regex = '\B$' +haystack = "y x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb26" +regex = '\B.$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb27" +regex = '^\B(?:fo|foo)\B' +haystack = "fo" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb28" +regex = '^\B(?:fo|foo)\B' +haystack = "fo" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb29" +regex = '^\B' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb30" +regex = '^\B' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb31" +regex = '^\B\B' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb32" +regex = '^\B\B' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb33" +regex = '^\B$' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb34" +regex = '^\B$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb35" +regex = '^\B.$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb36" +regex = '^\B.\B$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb37" +regex = '^^^^^\B$$$$$' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb38" +regex = '^^^^^\B.$$$$$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb39" +regex = '^^^^^\B$$$$$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + + +# unicode1* and unicode2* work for both Unicode and ASCII because all matches +# are reported as byte offsets, and « and » do not correspond to word +# boundaries at either the character or byte level. +[[test]] +name = "unicode1" +regex = '\bx\b' +haystack = "«x" +matches = [[2, 3]] + +[[test]] +name = "unicode1-only-ascii" +regex = '\bx\b' +haystack = "«x" +matches = [[2, 3]] +unicode = false + +[[test]] +name = "unicode2" +regex = '\bx\b' +haystack = "x»" +matches = [[0, 1]] + +[[test]] +name = "unicode2-only-ascii" +regex = '\bx\b' +haystack = "x»" +matches = [[0, 1]] +unicode = false + +# ASCII word boundaries are completely oblivious to Unicode characters, so +# even though β is a character, an ASCII \b treats it as a word boundary +# when it is adjacent to another ASCII character. (The ASCII \b only looks +# at the leading byte of β.) For Unicode \b, the tests are precisely inverted. +[[test]] +name = "unicode3" +regex = '\bx\b' +haystack = 'áxβ' +matches = [] + +[[test]] +name = "unicode3-only-ascii" +regex = '\bx\b' +haystack = 'áxβ' +matches = [[2, 3]] +unicode = false + +[[test]] +name = "unicode4" +regex = '\Bx\B' +haystack = 'áxβ' +matches = [[2, 3]] + +[[test]] +name = "unicode4-only-ascii" +regex = '\Bx\B' +haystack = 'áxβ' +matches = [] +unicode = false +utf8 = false + +# The same as above, but with \b instead of \B as a sanity check. +[[test]] +name = "unicode5" +regex = '\b' +haystack = "0\U0007EF5E" +matches = [[0, 0], [1, 1]] + +[[test]] +name = "unicode5-only-ascii" +regex = '\b' +haystack = "0\U0007EF5E" +matches = [[0, 0], [1, 1]] +unicode = false +utf8 = false + +[[test]] +name = "unicode5-noutf8" +regex = '\b' +haystack = '0\xFF\xFF\xFF\xFF' +matches = [[0, 0], [1, 1]] +unescape = true +utf8 = false + +[[test]] +name = "unicode5-noutf8-only-ascii" +regex = '\b' +haystack = '0\xFF\xFF\xFF\xFF' +matches = [[0, 0], [1, 1]] +unescape = true +unicode = false +utf8 = false + +# Weird special case to ensure that ASCII \B treats each individual code unit +# as a non-word byte. (The specific codepoint is irrelevant. It's an arbitrary +# codepoint that uses 4 bytes in its UTF-8 encoding and is not a member of the +# \w character class.) +[[test]] +name = "unicode5-not" +regex = '\B' +haystack = "0\U0007EF5E" +matches = [[5, 5]] + +[[test]] +name = "unicode5-not-only-ascii" +regex = '\B' +haystack = "0\U0007EF5E" +matches = [[2, 2], [3, 3], [4, 4], [5, 5]] +unicode = false +utf8 = false + +# This gets no matches since \B only matches in the presence of valid UTF-8 +# when Unicode is enabled, even when UTF-8 mode is disabled. +[[test]] +name = "unicode5-not-noutf8" +regex = '\B' +haystack = '0\xFF\xFF\xFF\xFF' +matches = [] +unescape = true +utf8 = false + +# But this DOES get matches since \B in ASCII mode only looks at individual +# bytes. +[[test]] +name = "unicode5-not-noutf8-only-ascii" +regex = '\B' +haystack = '0\xFF\xFF\xFF\xFF' +matches = [[2, 2], [3, 3], [4, 4], [5, 5]] +unescape = true +unicode = false +utf8 = false + +# Some tests of no particular significance. +[[test]] +name = "unicode6" +regex = '\b[0-9]+\b' +haystack = "foo 123 bar 456 quux 789" +matches = [[4, 7], [12, 15], [21, 24]] + +[[test]] +name = "unicode7" +regex = '\b[0-9]+\b' +haystack = "foo 123 bar a456 quux 789" +matches = [[4, 7], [22, 25]] + +[[test]] +name = "unicode8" +regex = '\b[0-9]+\b' +haystack = "foo 123 bar 456a quux 789" +matches = [[4, 7], [22, 25]] + +# A variant of the problem described here: +# https://github.com/google/re2/blob/89567f5de5b23bb5ad0c26cbafc10bdc7389d1fa/re2/dfa.cc#L658-L667 +[[test]] +name = "alt-with-assertion-repetition" +regex = '(?:\b|%)+' +haystack = "z%" +bounds = [1, 2] +anchored = true +matches = [[1, 1]] diff --git a/third_party/rust/regex/tests/fuzz/mod.rs b/third_party/rust/regex/tests/fuzz/mod.rs new file mode 100644 index 0000000000..88c196ae67 --- /dev/null +++ b/third_party/rust/regex/tests/fuzz/mod.rs @@ -0,0 +1,166 @@ +// This set of tests is different from regression_fuzz in that the tests start +// from the fuzzer data directly. The test essentially duplicates the fuzz +// target. I wonder if there's a better way to set this up... Hmmm. I bet +// `cargo fuzz` has something where it can run a target against crash files and +// verify that they pass. + +// This case found by the fuzzer causes the meta engine to use the "reverse +// inner" literal strategy. That in turn uses a specialized search routine +// for the lazy DFA in order to avoid worst case quadratic behavior. That +// specialized search routine had a bug where it assumed that start state +// specialization was disabled. But this is indeed not the case, since it +// reuses the "general" lazy DFA for the full regex created as part of the core +// strategy, which might very well have start states specialized due to the +// existence of a prefilter. +// +// This is a somewhat weird case because if the core engine has a prefilter, +// then it's usually the case that the "reverse inner" optimization won't be +// pursued in that case. But there are some heuristics that try to detect +// whether a prefilter is "fast" or not. If it's not, then the meta engine will +// attempt the reverse inner optimization. And indeed, that's what happens +// here. So the reverse inner optimization ends up with a lazy DFA that has +// start states specialized. Ideally this wouldn't happen because specializing +// start states without a prefilter inside the DFA can be disastrous for +// performance by causing the DFA to ping-pong in and out of the special state +// handling. In this case, it's probably not a huge deal because the lazy +// DFA is only used for part of the matching where as the work horse is the +// prefilter found by the reverse inner optimization. +// +// We could maybe fix this by refactoring the meta engine to be a little more +// careful. For example, by attempting the optimizations before building the +// core engine. But this is perhaps a little tricky. +#[test] +fn meta_stopat_specialize_start_states() { + let data = include_bytes!( + "testdata/crash-8760b19b25d74e3603d4c643e9c7404fdd3631f9", + ); + let _ = run(data); +} + +// Same bug as meta_stopat_specialize_start_states, but minimized by the +// fuzzer. +#[test] +fn meta_stopat_specialize_start_states_min() { + let data = include_bytes!( + "testdata/minimized-from-8760b19b25d74e3603d4c643e9c7404fdd3631f9", + ); + let _ = run(data); +} + +// This input generated a pattern with a fail state (e.g., \P{any}, [^\s\S] +// or [a&&b]). But the fail state was in a branch, where a subsequent branch +// should have led to an overall match, but handling of the fail state +// prevented it from doing so. A hand-minimized version of this is '[^\s\S]A|B' +// on the haystack 'B'. That should yield a match of 'B'. +// +// The underlying cause was an issue in how DFA determinization handled fail +// states. The bug didn't impact the PikeVM or the bounded backtracker. +#[test] +fn fail_branch_prevents_match() { + let data = include_bytes!( + "testdata/crash-cd33b13df59ea9d74503986f9d32a270dd43cc04", + ); + let _ = run(data); +} + +// This input generated a pattern that contained a sub-expression like this: +// +// a{0}{50000} +// +// This turned out to provoke quadratic behavior in the NFA compiler. +// Basically, the NFA compiler works in two phases. The first phase builds +// a more complicated-but-simpler-to-construct sequence of NFA states that +// includes unconditional epsilon transitions. As part of converting this +// sequence to the "final" NFA, we remove those unconditional espilon +// transition. The code responsible for doing this follows every chain of +// these transitions and remaps the state IDs. The way we were doing this +// before resulted in re-following every subsequent part of the chain for each +// state in the chain, which ended up being quadratic behavior. We effectively +// memoized this, which fixed the performance bug. +#[test] +fn slow_big_empty_chain() { + let data = include_bytes!( + "testdata/slow-unit-9ca9cc9929fee1fcbb847a78384effb8b98ea18a", + ); + let _ = run(data); +} + +// A different case of slow_big_empty_chain. +#[test] +fn slow_big_empty_chain2() { + let data = include_bytes!( + "testdata/slow-unit-3ab758ea520027fefd3f00e1384d9aeef155739e", + ); + let _ = run(data); +} + +// A different case of slow_big_empty_chain. +#[test] +fn slow_big_empty_chain3() { + let data = include_bytes!( + "testdata/slow-unit-b8a052f4254802edbe5f569b6ce6e9b6c927e9d6", + ); + let _ = run(data); +} + +// A different case of slow_big_empty_chain. +#[test] +fn slow_big_empty_chain4() { + let data = include_bytes!( + "testdata/slow-unit-93c73a43581f205f9aaffd9c17e52b34b17becd0", + ); + let _ = run(data); +} + +// A different case of slow_big_empty_chain. +#[test] +fn slow_big_empty_chain5() { + let data = include_bytes!( + "testdata/slow-unit-5345fccadf3812c53c3ccc7af5aa2741b7b2106c", + ); + let _ = run(data); +} + +// A different case of slow_big_empty_chain. +#[test] +fn slow_big_empty_chain6() { + let data = include_bytes!( + "testdata/slow-unit-6bd643eec330166e4ada91da2d3f284268481085", + ); + let _ = run(data); +} + +// This fuzz input generated a pattern with a large repetition that would fail +// NFA compilation, but its HIR was small. (HIR doesn't expand repetitions.) +// But, the bounds were high enough that the minimum length calculation +// overflowed. We fixed this by using saturating arithmetic (and also checked +// arithmetic for the maximum length calculation). +// +// Incidentally, this was the only unguarded arithmetic operation performed in +// the HIR smart constructors. And the fuzzer found it. Hah. Nice. +#[test] +fn minimum_len_overflow() { + let data = include_bytes!( + "testdata/crash-7eb3351f0965e5d6c1cb98aa8585949ef96531ff", + ); + let _ = run(data); +} + +// This is the fuzz target function. We duplicate it here since this is the +// thing we use to interpret the data. It is ultimately what we want to +// succeed. +fn run(data: &[u8]) -> Option<()> { + if data.len() < 2 { + return None; + } + let mut split_at = usize::from(data[0]); + let data = std::str::from_utf8(&data[1..]).ok()?; + // Split data into a regex and haystack to search. + let len = usize::try_from(data.chars().count()).ok()?; + split_at = std::cmp::max(split_at, 1) % len; + let char_index = data.char_indices().nth(split_at)?.0; + let (pattern, input) = data.split_at(char_index); + let re = regex::Regex::new(pattern).ok()?; + re.is_match(input); + Some(()) +} diff --git a/third_party/rust/regex/tests/fuzz/testdata/crash-7eb3351f0965e5d6c1cb98aa8585949ef96531ff b/third_party/rust/regex/tests/fuzz/testdata/crash-7eb3351f0965e5d6c1cb98aa8585949ef96531ff Binary files differnew file mode 100644 index 0000000000..f7ffbc9740 --- /dev/null +++ b/third_party/rust/regex/tests/fuzz/testdata/crash-7eb3351f0965e5d6c1cb98aa8585949ef96531ff diff --git a/third_party/rust/regex/tests/fuzz/testdata/crash-8760b19b25d74e3603d4c643e9c7404fdd3631f9 b/third_party/rust/regex/tests/fuzz/testdata/crash-8760b19b25d74e3603d4c643e9c7404fdd3631f9 Binary files differnew file mode 100644 index 0000000000..86748199b3 --- /dev/null +++ b/third_party/rust/regex/tests/fuzz/testdata/crash-8760b19b25d74e3603d4c643e9c7404fdd3631f9 diff --git a/third_party/rust/regex/tests/fuzz/testdata/crash-cd33b13df59ea9d74503986f9d32a270dd43cc04 b/third_party/rust/regex/tests/fuzz/testdata/crash-cd33b13df59ea9d74503986f9d32a270dd43cc04 Binary files differnew file mode 100644 index 0000000000..152769d2da --- /dev/null +++ b/third_party/rust/regex/tests/fuzz/testdata/crash-cd33b13df59ea9d74503986f9d32a270dd43cc04 diff --git a/third_party/rust/regex/tests/fuzz/testdata/minimized-from-8760b19b25d74e3603d4c643e9c7404fdd3631f9 b/third_party/rust/regex/tests/fuzz/testdata/minimized-from-8760b19b25d74e3603d4c643e9c7404fdd3631f9 Binary files differnew file mode 100644 index 0000000000..69663d5c73 --- /dev/null +++ b/third_party/rust/regex/tests/fuzz/testdata/minimized-from-8760b19b25d74e3603d4c643e9c7404fdd3631f9 diff --git a/third_party/rust/regex/tests/fuzz/testdata/slow-unit-3ab758ea520027fefd3f00e1384d9aeef155739e b/third_party/rust/regex/tests/fuzz/testdata/slow-unit-3ab758ea520027fefd3f00e1384d9aeef155739e Binary files differnew file mode 100644 index 0000000000..6c22803538 --- /dev/null +++ b/third_party/rust/regex/tests/fuzz/testdata/slow-unit-3ab758ea520027fefd3f00e1384d9aeef155739e diff --git a/third_party/rust/regex/tests/fuzz/testdata/slow-unit-5345fccadf3812c53c3ccc7af5aa2741b7b2106c b/third_party/rust/regex/tests/fuzz/testdata/slow-unit-5345fccadf3812c53c3ccc7af5aa2741b7b2106c Binary files differnew file mode 100644 index 0000000000..0570f328c3 --- /dev/null +++ b/third_party/rust/regex/tests/fuzz/testdata/slow-unit-5345fccadf3812c53c3ccc7af5aa2741b7b2106c diff --git a/third_party/rust/regex/tests/fuzz/testdata/slow-unit-6bd643eec330166e4ada91da2d3f284268481085 b/third_party/rust/regex/tests/fuzz/testdata/slow-unit-6bd643eec330166e4ada91da2d3f284268481085 Binary files differnew file mode 100644 index 0000000000..182bc7fa13 --- /dev/null +++ b/third_party/rust/regex/tests/fuzz/testdata/slow-unit-6bd643eec330166e4ada91da2d3f284268481085 diff --git a/third_party/rust/regex/tests/fuzz/testdata/slow-unit-93c73a43581f205f9aaffd9c17e52b34b17becd0 b/third_party/rust/regex/tests/fuzz/testdata/slow-unit-93c73a43581f205f9aaffd9c17e52b34b17becd0 Binary files differnew file mode 100644 index 0000000000..f939c33ab7 --- /dev/null +++ b/third_party/rust/regex/tests/fuzz/testdata/slow-unit-93c73a43581f205f9aaffd9c17e52b34b17becd0 diff --git a/third_party/rust/regex/tests/fuzz/testdata/slow-unit-9ca9cc9929fee1fcbb847a78384effb8b98ea18a b/third_party/rust/regex/tests/fuzz/testdata/slow-unit-9ca9cc9929fee1fcbb847a78384effb8b98ea18a Binary files differnew file mode 100644 index 0000000000..a87de230ff --- /dev/null +++ b/third_party/rust/regex/tests/fuzz/testdata/slow-unit-9ca9cc9929fee1fcbb847a78384effb8b98ea18a diff --git a/third_party/rust/regex/tests/fuzz/testdata/slow-unit-b8a052f4254802edbe5f569b6ce6e9b6c927e9d6 b/third_party/rust/regex/tests/fuzz/testdata/slow-unit-b8a052f4254802edbe5f569b6ce6e9b6c927e9d6 Binary files differnew file mode 100644 index 0000000000..dc3329339e --- /dev/null +++ b/third_party/rust/regex/tests/fuzz/testdata/slow-unit-b8a052f4254802edbe5f569b6ce6e9b6c927e9d6 diff --git a/third_party/rust/regex/tests/lib.rs b/third_party/rust/regex/tests/lib.rs new file mode 100644 index 0000000000..badd57455d --- /dev/null +++ b/third_party/rust/regex/tests/lib.rs @@ -0,0 +1,57 @@ +#![cfg_attr(feature = "pattern", feature(pattern))] + +mod fuzz; +mod misc; +mod regression; +mod regression_fuzz; +mod replace; +#[cfg(feature = "pattern")] +mod searcher; +mod suite_bytes; +mod suite_bytes_set; +mod suite_string; +mod suite_string_set; + +const BLACKLIST: &[&str] = &[ + // Nothing to blacklist yet! +]; + +fn suite() -> anyhow::Result<regex_test::RegexTests> { + let _ = env_logger::try_init(); + + let mut tests = regex_test::RegexTests::new(); + macro_rules! load { + ($name:expr) => {{ + const DATA: &[u8] = + include_bytes!(concat!("../testdata/", $name, ".toml")); + tests.load_slice($name, DATA)?; + }}; + } + + load!("anchored"); + load!("bytes"); + load!("crazy"); + load!("crlf"); + load!("earliest"); + load!("empty"); + load!("expensive"); + load!("flags"); + load!("iter"); + load!("leftmost-all"); + load!("line-terminator"); + load!("misc"); + load!("multiline"); + load!("no-unicode"); + load!("overlapping"); + load!("regression"); + load!("set"); + load!("substring"); + load!("unicode"); + load!("utf8"); + load!("word-boundary"); + load!("fowler/basic"); + load!("fowler/nullsubexpr"); + load!("fowler/repetition"); + + Ok(tests) +} diff --git a/third_party/rust/regex/tests/misc.rs b/third_party/rust/regex/tests/misc.rs new file mode 100644 index 0000000000..91e7d28980 --- /dev/null +++ b/third_party/rust/regex/tests/misc.rs @@ -0,0 +1,143 @@ +use regex::Regex; + +macro_rules! regex { + ($pattern:expr) => { + regex::Regex::new($pattern).unwrap() + }; +} + +#[test] +fn unclosed_group_error() { + let err = Regex::new(r"(").unwrap_err(); + let msg = err.to_string(); + assert!(msg.contains("unclosed group"), "error message: {:?}", msg); +} + +#[test] +fn regex_string() { + assert_eq!(r"[a-zA-Z0-9]+", regex!(r"[a-zA-Z0-9]+").as_str()); + assert_eq!(r"[a-zA-Z0-9]+", &format!("{}", regex!(r"[a-zA-Z0-9]+"))); + assert_eq!( + r#"Regex("[a-zA-Z0-9]+")"#, + &format!("{:?}", regex!(r"[a-zA-Z0-9]+")) + ); +} + +#[test] +fn capture_names() { + let re = regex!(r"(.)(?P<a>.)"); + assert_eq!(3, re.captures_len()); + assert_eq!((3, Some(3)), re.capture_names().size_hint()); + assert_eq!( + vec![None, None, Some("a")], + re.capture_names().collect::<Vec<_>>() + ); +} + +#[test] +fn capture_index() { + let re = regex!(r"^(?P<name>.+)$"); + let cap = re.captures("abc").unwrap(); + assert_eq!(&cap[0], "abc"); + assert_eq!(&cap[1], "abc"); + assert_eq!(&cap["name"], "abc"); +} + +#[test] +#[should_panic] +fn capture_index_panic_usize() { + let re = regex!(r"^(?P<name>.+)$"); + let cap = re.captures("abc").unwrap(); + let _ = cap[2]; +} + +#[test] +#[should_panic] +fn capture_index_panic_name() { + let re = regex!(r"^(?P<name>.+)$"); + let cap = re.captures("abc").unwrap(); + let _ = cap["bad name"]; +} + +#[test] +fn capture_index_lifetime() { + // This is a test of whether the types on `caps["..."]` are general + // enough. If not, this will fail to typecheck. + fn inner(s: &str) -> usize { + let re = regex!(r"(?P<number>[0-9]+)"); + let caps = re.captures(s).unwrap(); + caps["number"].len() + } + assert_eq!(3, inner("123")); +} + +#[test] +fn capture_misc() { + let re = regex!(r"(.)(?P<a>a)?(.)(?P<b>.)"); + let cap = re.captures("abc").unwrap(); + + assert_eq!(5, cap.len()); + + assert_eq!((0, 3), { + let m = cap.get(0).unwrap(); + (m.start(), m.end()) + }); + assert_eq!(None, cap.get(2)); + assert_eq!((2, 3), { + let m = cap.get(4).unwrap(); + (m.start(), m.end()) + }); + + assert_eq!("abc", cap.get(0).unwrap().as_str()); + assert_eq!(None, cap.get(2)); + assert_eq!("c", cap.get(4).unwrap().as_str()); + + assert_eq!(None, cap.name("a")); + assert_eq!("c", cap.name("b").unwrap().as_str()); +} + +#[test] +fn sub_capture_matches() { + let re = regex!(r"([a-z])(([a-z])|([0-9]))"); + let cap = re.captures("a5").unwrap(); + let subs: Vec<_> = cap.iter().collect(); + + assert_eq!(5, subs.len()); + assert!(subs[0].is_some()); + assert!(subs[1].is_some()); + assert!(subs[2].is_some()); + assert!(subs[3].is_none()); + assert!(subs[4].is_some()); + + assert_eq!("a5", subs[0].unwrap().as_str()); + assert_eq!("a", subs[1].unwrap().as_str()); + assert_eq!("5", subs[2].unwrap().as_str()); + assert_eq!("5", subs[4].unwrap().as_str()); +} + +// Test that the DFA can handle pathological cases. (This should result in the +// DFA's cache being flushed too frequently, which should cause it to quit and +// fall back to the NFA algorithm.) +#[test] +fn dfa_handles_pathological_case() { + fn ones_and_zeroes(count: usize) -> String { + let mut s = String::new(); + for i in 0..count { + if i % 3 == 0 { + s.push('1'); + } else { + s.push('0'); + } + } + s + } + + let re = regex!(r"[01]*1[01]{20}$"); + let text = { + let mut pieces = ones_and_zeroes(100_000); + pieces.push('1'); + pieces.push_str(&ones_and_zeroes(20)); + pieces + }; + assert!(re.is_match(&text)); +} diff --git a/third_party/rust/regex/tests/regression.rs b/third_party/rust/regex/tests/regression.rs new file mode 100644 index 0000000000..a5867016b2 --- /dev/null +++ b/third_party/rust/regex/tests/regression.rs @@ -0,0 +1,94 @@ +use regex::Regex; + +macro_rules! regex { + ($pattern:expr) => { + regex::Regex::new($pattern).unwrap() + }; +} + +// See: https://github.com/rust-lang/regex/issues/48 +#[test] +fn invalid_regexes_no_crash() { + assert!(Regex::new("(*)").is_err()); + assert!(Regex::new("(?:?)").is_err()); + assert!(Regex::new("(?)").is_err()); + assert!(Regex::new("*").is_err()); +} + +// See: https://github.com/rust-lang/regex/issues/98 +#[test] +fn regression_many_repeat_stack_overflow() { + let re = regex!("^.{1,2500}"); + assert_eq!( + vec![0..1], + re.find_iter("a").map(|m| m.range()).collect::<Vec<_>>() + ); +} + +// See: https://github.com/rust-lang/regex/issues/555 +#[test] +fn regression_invalid_repetition_expr() { + assert!(Regex::new("(?m){1,1}").is_err()); +} + +// See: https://github.com/rust-lang/regex/issues/527 +#[test] +fn regression_invalid_flags_expression() { + assert!(Regex::new("(((?x)))").is_ok()); +} + +// See: https://github.com/rust-lang/regex/issues/129 +#[test] +fn regression_captures_rep() { + let re = regex!(r"([a-f]){2}(?P<foo>[x-z])"); + let caps = re.captures("abx").unwrap(); + assert_eq!(&caps["foo"], "x"); +} + +// See: https://github.com/BurntSushi/ripgrep/issues/1247 +#[cfg(feature = "unicode-perl")] +#[test] +fn regression_nfa_stops1() { + let re = regex::bytes::Regex::new(r"\bs(?:[ab])").unwrap(); + assert_eq!(0, re.find_iter(b"s\xE4").count()); +} + +// See: https://github.com/rust-lang/regex/issues/981 +#[cfg(feature = "unicode")] +#[test] +fn regression_bad_word_boundary() { + let re = regex!(r#"(?i:(?:\b|_)win(?:32|64|dows)?(?:\b|_))"#); + let hay = "ubi-Darwin-x86_64.tar.gz"; + assert!(!re.is_match(hay)); + let hay = "ubi-Windows-x86_64.zip"; + assert!(re.is_match(hay)); +} + +// See: https://github.com/rust-lang/regex/issues/982 +#[cfg(feature = "unicode-perl")] +#[test] +fn regression_unicode_perl_not_enabled() { + let pat = r"(\d+\s?(years|year|y))?\s?(\d+\s?(months|month|m))?\s?(\d+\s?(weeks|week|w))?\s?(\d+\s?(days|day|d))?\s?(\d+\s?(hours|hour|h))?"; + assert!(Regex::new(pat).is_ok()); +} + +// See: https://github.com/rust-lang/regex/issues/995 +#[test] +fn regression_big_regex_overflow() { + let pat = r" {2147483516}{2147483416}{5}"; + assert!(Regex::new(pat).is_err()); +} + +// See: https://github.com/rust-lang/regex/issues/999 +#[test] +fn regression_complete_literals_suffix_incorrect() { + let needles = vec![ + "aA", "bA", "cA", "dA", "eA", "fA", "gA", "hA", "iA", "jA", "kA", + "lA", "mA", "nA", "oA", "pA", "qA", "rA", "sA", "tA", "uA", "vA", + "wA", "xA", "yA", "zA", + ]; + let pattern = needles.join("|"); + let re = regex!(&pattern); + let hay = "FUBAR"; + assert_eq!(0, re.find_iter(hay).count()); +} diff --git a/third_party/rust/regex/tests/regression_fuzz.rs b/third_party/rust/regex/tests/regression_fuzz.rs new file mode 100644 index 0000000000..f90ad4cb20 --- /dev/null +++ b/third_party/rust/regex/tests/regression_fuzz.rs @@ -0,0 +1,61 @@ +// These tests are only run for the "default" test target because some of them +// can take quite a long time. Some of them take long enough that it's not +// practical to run them in debug mode. :-/ + +use regex::Regex; + +macro_rules! regex { + ($pattern:expr) => { + regex::Regex::new($pattern).unwrap() + }; +} + +// See: https://oss-fuzz.com/testcase-detail/5673225499181056 +// +// Ignored by default since it takes too long in debug mode (almost a minute). +#[test] +#[ignore] +fn fuzz1() { + regex!(r"1}{55}{0}*{1}{55}{55}{5}*{1}{55}+{56}|;**"); +} + +// See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=26505 +// See: https://github.com/rust-lang/regex/issues/722 +#[test] +#[cfg(feature = "unicode")] +fn empty_any_errors_no_panic() { + assert!(Regex::new(r"\P{any}").is_ok()); +} + +// This tests that a very large regex errors during compilation instead of +// using gratuitous amounts of memory. The specific problem is that the +// compiler wasn't accounting for the memory used by Unicode character classes +// correctly. +// +// See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=33579 +#[test] +fn big_regex_fails_to_compile() { + let pat = "[\u{0}\u{e}\u{2}\\w~~>[l\t\u{0}]p?<]{971158}"; + assert!(Regex::new(pat).is_err()); +} + +// This was caught while on master but before a release went out(!). +// +// See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=58173 +#[test] +fn todo() { + let pat = "(?:z|xx)@|xx"; + assert!(Regex::new(pat).is_ok()); +} + +// This was caused by the fuzzer, and then minimized by hand. +// +// This was caused by a bug in DFA determinization that mishandled NFA fail +// states. +#[test] +fn fail_branch_prevents_match() { + let pat = r".*[a&&b]A|B"; + let hay = "B"; + let re = Regex::new(pat).unwrap(); + assert!(re.is_match(hay)); +} diff --git a/third_party/rust/regex/tests/replace.rs b/third_party/rust/regex/tests/replace.rs new file mode 100644 index 0000000000..f26ae46030 --- /dev/null +++ b/third_party/rust/regex/tests/replace.rs @@ -0,0 +1,183 @@ +macro_rules! replace( + ($name:ident, $which:ident, $re:expr, + $search:expr, $replace:expr, $result:expr) => ( + #[test] + fn $name() { + let re = regex::Regex::new($re).unwrap(); + assert_eq!(re.$which($search, $replace), $result); + } + ); +); + +replace!(first, replace, r"[0-9]", "age: 26", "Z", "age: Z6"); +replace!(plus, replace, r"[0-9]+", "age: 26", "Z", "age: Z"); +replace!(all, replace_all, r"[0-9]", "age: 26", "Z", "age: ZZ"); +replace!(groups, replace, r"([^ ]+)[ ]+([^ ]+)", "w1 w2", "$2 $1", "w2 w1"); +replace!( + double_dollar, + replace, + r"([^ ]+)[ ]+([^ ]+)", + "w1 w2", + "$2 $$1", + "w2 $1" +); +// replace!(adjacent_index, replace, +// r"([^aeiouy])ies$", "skies", "$1y", "sky"); +replace!( + named, + replace_all, + r"(?P<first>[^ ]+)[ ]+(?P<last>[^ ]+)(?P<space>[ ]*)", + "w1 w2 w3 w4", + "$last $first$space", + "w2 w1 w4 w3" +); +replace!( + trim, + replace_all, + "^[ \t]+|[ \t]+$", + " \t trim me\t \t", + "", + "trim me" +); +replace!(number_hyphen, replace, r"(.)(.)", "ab", "$1-$2", "a-b"); +// replace!(number_underscore, replace, r"(.)(.)", "ab", "$1_$2", "a_b"); +replace!( + simple_expand, + replace_all, + r"([a-z]) ([a-z])", + "a b", + "$2 $1", + "b a" +); +replace!( + literal_dollar1, + replace_all, + r"([a-z]+) ([a-z]+)", + "a b", + "$$1", + "$1" +); +replace!( + literal_dollar2, + replace_all, + r"([a-z]+) ([a-z]+)", + "a b", + "$2 $$c $1", + "b $c a" +); +replace!( + no_expand1, + replace, + r"([^ ]+)[ ]+([^ ]+)", + "w1 w2", + regex::NoExpand("$2 $1"), + "$2 $1" +); +replace!( + no_expand2, + replace, + r"([^ ]+)[ ]+([^ ]+)", + "w1 w2", + regex::NoExpand("$$1"), + "$$1" +); +replace!( + closure_returning_reference, + replace, + r"([0-9]+)", + "age: 26", + |captures: ®ex::Captures<'_>| { captures[1][0..1].to_owned() }, + "age: 2" +); +replace!( + closure_returning_value, + replace, + r"[0-9]+", + "age: 26", + |_captures: ®ex::Captures<'_>| "Z".to_owned(), + "age: Z" +); + +// See https://github.com/rust-lang/regex/issues/314 +replace!( + match_at_start_replace_with_empty, + replace_all, + r"foo", + "foobar", + "", + "bar" +); + +// See https://github.com/rust-lang/regex/issues/393 +replace!(single_empty_match, replace, r"^", "bar", "foo", "foobar"); + +// See https://github.com/rust-lang/regex/issues/399 +replace!( + capture_longest_possible_name, + replace_all, + r"(.)", + "b", + "${1}a $1a", + "ba " +); + +replace!( + impl_string, + replace, + r"[0-9]", + "age: 26", + "Z".to_string(), + "age: Z6" +); +replace!( + impl_string_ref, + replace, + r"[0-9]", + "age: 26", + &"Z".to_string(), + "age: Z6" +); +replace!( + impl_cow_str_borrowed, + replace, + r"[0-9]", + "age: 26", + std::borrow::Cow::<'_, str>::Borrowed("Z"), + "age: Z6" +); +replace!( + impl_cow_str_borrowed_ref, + replace, + r"[0-9]", + "age: 26", + &std::borrow::Cow::<'_, str>::Borrowed("Z"), + "age: Z6" +); +replace!( + impl_cow_str_owned, + replace, + r"[0-9]", + "age: 26", + std::borrow::Cow::<'_, str>::Owned("Z".to_string()), + "age: Z6" +); +replace!( + impl_cow_str_owned_ref, + replace, + r"[0-9]", + "age: 26", + &std::borrow::Cow::<'_, str>::Owned("Z".to_string()), + "age: Z6" +); + +#[test] +fn replacen_no_captures() { + let re = regex::Regex::new(r"[0-9]").unwrap(); + assert_eq!(re.replacen("age: 1234", 2, "Z"), "age: ZZ34"); +} + +#[test] +fn replacen_with_captures() { + let re = regex::Regex::new(r"([0-9])").unwrap(); + assert_eq!(re.replacen("age: 1234", 2, "${1}Z"), "age: 1Z2Z34"); +} diff --git a/third_party/rust/regex/tests/searcher.rs b/third_party/rust/regex/tests/searcher.rs new file mode 100644 index 0000000000..f6dae13105 --- /dev/null +++ b/third_party/rust/regex/tests/searcher.rs @@ -0,0 +1,93 @@ +macro_rules! searcher { + ($name:ident, $re:expr, $haystack:expr) => ( + searcher!($name, $re, $haystack, vec vec![]); + ); + ($name:ident, $re:expr, $haystack:expr, $($steps:expr,)*) => ( + searcher!($name, $re, $haystack, vec vec![$($steps),*]); + ); + ($name:ident, $re:expr, $haystack:expr, $($steps:expr),*) => ( + searcher!($name, $re, $haystack, vec vec![$($steps),*]); + ); + ($name:ident, $re:expr, $haystack:expr, vec $expect_steps:expr) => ( + #[test] + #[allow(unused_imports)] + fn $name() { + use std::str::pattern::{Pattern, Searcher}; + use std::str::pattern::SearchStep::{Match, Reject, Done}; + let re = regex::Regex::new($re).unwrap(); + let mut se = re.into_searcher($haystack); + let mut got_steps = vec![]; + loop { + match se.next() { + Done => break, + step => { got_steps.push(step); } + } + } + assert_eq!(got_steps, $expect_steps); + } + ); +} + +searcher!(searcher_empty_regex_empty_haystack, r"", "", Match(0, 0)); +searcher!( + searcher_empty_regex, + r"", + "ab", + Match(0, 0), + Reject(0, 1), + Match(1, 1), + Reject(1, 2), + Match(2, 2) +); +searcher!(searcher_empty_haystack, r"\d", ""); +searcher!(searcher_one_match, r"\d", "5", Match(0, 1)); +searcher!(searcher_no_match, r"\d", "a", Reject(0, 1)); +searcher!( + searcher_two_adjacent_matches, + r"\d", + "56", + Match(0, 1), + Match(1, 2) +); +searcher!( + searcher_two_non_adjacent_matches, + r"\d", + "5a6", + Match(0, 1), + Reject(1, 2), + Match(2, 3) +); +searcher!(searcher_reject_first, r"\d", "a6", Reject(0, 1), Match(1, 2)); +searcher!( + searcher_one_zero_length_matches, + r"\d*", + "a1b2", + Match(0, 0), // ^ + Reject(0, 1), // a + Match(1, 2), // a1 + Reject(2, 3), // a1b + Match(3, 4), // a1b2 +); +searcher!( + searcher_many_zero_length_matches, + r"\d*", + "a1bbb2", + Match(0, 0), // ^ + Reject(0, 1), // a + Match(1, 2), // a1 + Reject(2, 3), // a1b + Match(3, 3), // a1bb + Reject(3, 4), // a1bb + Match(4, 4), // a1bbb + Reject(4, 5), // a1bbb + Match(5, 6), // a1bbba +); +searcher!( + searcher_unicode, + r".+?", + "Ⅰ1Ⅱ2", + Match(0, 3), + Match(3, 4), + Match(4, 7), + Match(7, 8) +); diff --git a/third_party/rust/regex/tests/suite_bytes.rs b/third_party/rust/regex/tests/suite_bytes.rs new file mode 100644 index 0000000000..106d998085 --- /dev/null +++ b/third_party/rust/regex/tests/suite_bytes.rs @@ -0,0 +1,108 @@ +use { + anyhow::Result, + regex::bytes::{Regex, RegexBuilder}, + regex_test::{ + CompiledRegex, Match, RegexTest, Span, TestResult, TestRunner, + }, +}; + +/// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "find", "captures"], |test| test.compiles()) + .blacklist_iter(super::BLACKLIST) + .test_iter(crate::suite()?.iter(), compiler) + .assert(); + Ok(()) +} + +fn run_test(re: &Regex, test: &RegexTest) -> TestResult { + match test.additional_name() { + "is_match" => TestResult::matched(re.is_match(test.haystack())), + "find" => TestResult::matches( + re.find_iter(test.haystack()) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: 0, + span: Span { start: m.start(), end: m.end() }, + }), + ), + "captures" => { + let it = re + .captures_iter(test.haystack()) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|caps| testify_captures(&caps)); + TestResult::captures(it) + } + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Converts the given regex test to a closure that searches with a +/// `bytes::Regex`. If the test configuration is unsupported, then a +/// `CompiledRegex` that skips the test is returned. +fn compiler( + test: &RegexTest, + _patterns: &[String], +) -> anyhow::Result<CompiledRegex> { + let skip = Ok(CompiledRegex::skip()); + + // We're only testing bytes::Regex here, which supports one pattern only. + let pattern = match test.regexes().len() { + 1 => &test.regexes()[0], + _ => return skip, + }; + // We only test is_match, find_iter and captures_iter. All of those are + // leftmost searches. + if !matches!(test.search_kind(), regex_test::SearchKind::Leftmost) { + return skip; + } + // The top-level single-pattern regex API always uses leftmost-first. + if !matches!(test.match_kind(), regex_test::MatchKind::LeftmostFirst) { + return skip; + } + // The top-level regex API always runs unanchored searches. ... But we can + // handle tests that are anchored but have only one match. + if test.anchored() && test.match_limit() != Some(1) { + return skip; + } + // We don't support tests with explicit search bounds. We could probably + // support this by using the 'find_at' (and such) APIs. + let bounds = test.bounds(); + if !(bounds.start == 0 && bounds.end == test.haystack().len()) { + return skip; + } + // The bytes::Regex API specifically does not support enabling UTF-8 mode. + // It could I suppose, but currently it does not. That is, it permits + // matches to have offsets that split codepoints. + if test.utf8() { + return skip; + } + // If the test requires Unicode but the Unicode feature isn't enabled, + // skip it. This is a little aggressive, but the test suite doesn't + // have any easy way of communicating which Unicode features are needed. + if test.unicode() && !cfg!(feature = "unicode") { + return skip; + } + let re = RegexBuilder::new(pattern) + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .line_terminator(test.line_terminator()) + .build()?; + Ok(CompiledRegex::compiled(move |test| run_test(&re, test))) +} + +/// Convert `Captures` into the test suite's capture values. +fn testify_captures( + caps: ®ex::bytes::Captures<'_>, +) -> regex_test::Captures { + let spans = caps.iter().map(|group| { + group.map(|m| regex_test::Span { start: m.start(), end: m.end() }) + }); + // This unwrap is OK because we assume our 'caps' represents a match, and + // a match always gives a non-zero number of groups with the first group + // being non-None. + regex_test::Captures::new(0, spans).unwrap() +} diff --git a/third_party/rust/regex/tests/suite_bytes_set.rs b/third_party/rust/regex/tests/suite_bytes_set.rs new file mode 100644 index 0000000000..899d24c17e --- /dev/null +++ b/third_party/rust/regex/tests/suite_bytes_set.rs @@ -0,0 +1,71 @@ +use { + anyhow::Result, + regex::bytes::{RegexSet, RegexSetBuilder}, + regex_test::{CompiledRegex, RegexTest, TestResult, TestRunner}, +}; + +/// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "which"], |test| test.compiles()) + .blacklist_iter(super::BLACKLIST) + .test_iter(crate::suite()?.iter(), compiler) + .assert(); + Ok(()) +} + +fn run_test(re: &RegexSet, test: &RegexTest) -> TestResult { + match test.additional_name() { + "is_match" => TestResult::matched(re.is_match(test.haystack())), + "which" => TestResult::which(re.matches(test.haystack()).iter()), + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Converts the given regex test to a closure that searches with a +/// `bytes::Regex`. If the test configuration is unsupported, then a +/// `CompiledRegex` that skips the test is returned. +fn compiler( + test: &RegexTest, + _patterns: &[String], +) -> anyhow::Result<CompiledRegex> { + let skip = Ok(CompiledRegex::skip()); + + // The top-level RegexSet API only supports "overlapping" semantics. + if !matches!(test.search_kind(), regex_test::SearchKind::Overlapping) { + return skip; + } + // The top-level RegexSet API only supports "all" semantics. + if !matches!(test.match_kind(), regex_test::MatchKind::All) { + return skip; + } + // The top-level RegexSet API always runs unanchored searches. + if test.anchored() { + return skip; + } + // We don't support tests with explicit search bounds. + let bounds = test.bounds(); + if !(bounds.start == 0 && bounds.end == test.haystack().len()) { + return skip; + } + // The bytes::Regex API specifically does not support enabling UTF-8 mode. + // It could I suppose, but currently it does not. That is, it permits + // matches to have offsets that split codepoints. + if test.utf8() { + return skip; + } + // If the test requires Unicode but the Unicode feature isn't enabled, + // skip it. This is a little aggressive, but the test suite doesn't + // have any easy way of communicating which Unicode features are needed. + if test.unicode() && !cfg!(feature = "unicode") { + return skip; + } + let re = RegexSetBuilder::new(test.regexes()) + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .line_terminator(test.line_terminator()) + .build()?; + Ok(CompiledRegex::compiled(move |test| run_test(&re, test))) +} diff --git a/third_party/rust/regex/tests/suite_string.rs b/third_party/rust/regex/tests/suite_string.rs new file mode 100644 index 0000000000..1e5bf0bb3b --- /dev/null +++ b/third_party/rust/regex/tests/suite_string.rs @@ -0,0 +1,114 @@ +use { + anyhow::Result, + regex::{Regex, RegexBuilder}, + regex_test::{ + CompiledRegex, Match, RegexTest, Span, TestResult, TestRunner, + }, +}; + +/// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "find", "captures"], |test| test.compiles()) + .blacklist_iter(super::BLACKLIST) + .test_iter(crate::suite()?.iter(), compiler) + .assert(); + Ok(()) +} + +fn run_test(re: &Regex, test: &RegexTest) -> TestResult { + let hay = match std::str::from_utf8(test.haystack()) { + Ok(hay) => hay, + Err(err) => { + return TestResult::fail(&format!( + "haystack is not valid UTF-8: {}", + err + )); + } + }; + match test.additional_name() { + "is_match" => TestResult::matched(re.is_match(hay)), + "find" => TestResult::matches( + re.find_iter(hay) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: 0, + span: Span { start: m.start(), end: m.end() }, + }), + ), + "captures" => { + let it = re + .captures_iter(hay) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|caps| testify_captures(&caps)); + TestResult::captures(it) + } + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Converts the given regex test to a closure that searches with a +/// `bytes::Regex`. If the test configuration is unsupported, then a +/// `CompiledRegex` that skips the test is returned. +fn compiler( + test: &RegexTest, + _patterns: &[String], +) -> anyhow::Result<CompiledRegex> { + let skip = Ok(CompiledRegex::skip()); + + // We're only testing bytes::Regex here, which supports one pattern only. + let pattern = match test.regexes().len() { + 1 => &test.regexes()[0], + _ => return skip, + }; + // We only test is_match, find_iter and captures_iter. All of those are + // leftmost searches. + if !matches!(test.search_kind(), regex_test::SearchKind::Leftmost) { + return skip; + } + // The top-level single-pattern regex API always uses leftmost-first. + if !matches!(test.match_kind(), regex_test::MatchKind::LeftmostFirst) { + return skip; + } + // The top-level regex API always runs unanchored searches. ... But we can + // handle tests that are anchored but have only one match. + if test.anchored() && test.match_limit() != Some(1) { + return skip; + } + // We don't support tests with explicit search bounds. We could probably + // support this by using the 'find_at' (and such) APIs. + let bounds = test.bounds(); + if !(bounds.start == 0 && bounds.end == test.haystack().len()) { + return skip; + } + // The Regex API specifically does not support disabling UTF-8 mode because + // it can only search &str which is always valid UTF-8. + if !test.utf8() { + return skip; + } + // If the test requires Unicode but the Unicode feature isn't enabled, + // skip it. This is a little aggressive, but the test suite doesn't + // have any easy way of communicating which Unicode features are needed. + if test.unicode() && !cfg!(feature = "unicode") { + return skip; + } + let re = RegexBuilder::new(pattern) + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .line_terminator(test.line_terminator()) + .build()?; + Ok(CompiledRegex::compiled(move |test| run_test(&re, test))) +} + +/// Convert `Captures` into the test suite's capture values. +fn testify_captures(caps: ®ex::Captures<'_>) -> regex_test::Captures { + let spans = caps.iter().map(|group| { + group.map(|m| regex_test::Span { start: m.start(), end: m.end() }) + }); + // This unwrap is OK because we assume our 'caps' represents a match, and + // a match always gives a non-zero number of groups with the first group + // being non-None. + regex_test::Captures::new(0, spans).unwrap() +} diff --git a/third_party/rust/regex/tests/suite_string_set.rs b/third_party/rust/regex/tests/suite_string_set.rs new file mode 100644 index 0000000000..dffdc70810 --- /dev/null +++ b/third_party/rust/regex/tests/suite_string_set.rs @@ -0,0 +1,79 @@ +use { + anyhow::Result, + regex::{RegexSet, RegexSetBuilder}, + regex_test::{CompiledRegex, RegexTest, TestResult, TestRunner}, +}; + +/// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "which"], |test| test.compiles()) + .blacklist_iter(super::BLACKLIST) + .test_iter(crate::suite()?.iter(), compiler) + .assert(); + Ok(()) +} + +fn run_test(re: &RegexSet, test: &RegexTest) -> TestResult { + let hay = match std::str::from_utf8(test.haystack()) { + Ok(hay) => hay, + Err(err) => { + return TestResult::fail(&format!( + "haystack is not valid UTF-8: {}", + err + )); + } + }; + match test.additional_name() { + "is_match" => TestResult::matched(re.is_match(hay)), + "which" => TestResult::which(re.matches(hay).iter()), + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Converts the given regex test to a closure that searches with a +/// `bytes::Regex`. If the test configuration is unsupported, then a +/// `CompiledRegex` that skips the test is returned. +fn compiler( + test: &RegexTest, + _patterns: &[String], +) -> anyhow::Result<CompiledRegex> { + let skip = Ok(CompiledRegex::skip()); + + // The top-level RegexSet API only supports "overlapping" semantics. + if !matches!(test.search_kind(), regex_test::SearchKind::Overlapping) { + return skip; + } + // The top-level RegexSet API only supports "all" semantics. + if !matches!(test.match_kind(), regex_test::MatchKind::All) { + return skip; + } + // The top-level RegexSet API always runs unanchored searches. + if test.anchored() { + return skip; + } + // We don't support tests with explicit search bounds. + let bounds = test.bounds(); + if !(bounds.start == 0 && bounds.end == test.haystack().len()) { + return skip; + } + // The Regex API specifically does not support disabling UTF-8 mode because + // it can only search &str which is always valid UTF-8. + if !test.utf8() { + return skip; + } + // If the test requires Unicode but the Unicode feature isn't enabled, + // skip it. This is a little aggressive, but the test suite doesn't + // have any easy way of communicating which Unicode features are needed. + if test.unicode() && !cfg!(feature = "unicode") { + return skip; + } + let re = RegexSetBuilder::new(test.regexes()) + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .line_terminator(test.line_terminator()) + .build()?; + Ok(CompiledRegex::compiled(move |test| run_test(&re, test))) +} |