diff options
Diffstat (limited to 'third_party/rust/glean-core')
84 files changed, 18717 insertions, 0 deletions
diff --git a/third_party/rust/glean-core/.cargo-checksum.json b/third_party/rust/glean-core/.cargo-checksum.json new file mode 100644 index 0000000000..ed76b5bfc1 --- /dev/null +++ b/third_party/rust/glean-core/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.lock":"0e93431e841aa4809e732ecc273268ae6d45801a7496e0c14177d86ddb3de8d0","Cargo.toml":"765550648c271e260ed5d6f05b586def8c2436a99800cd46e547ea597d76717b","LICENSE":"1f256ecad192880510e84ad60474eab7589218784b9a50bc7ceee34c2b91f1d5","README.md":"026495898699b54608eb4ec16074ffafc57920d80ccb59961c501a1ea28c9985","examples/sample.rs":"ea5b72b702cb6bfb4ad38041a006c40960f40664865a73a904a2d033c7f9f7f0","src/common_metric_data.rs":"d95d20dc9637e9b39255cd6b1d2d13072735d63b69324579662d8613f28c7124","src/database/mod.rs":"9dd5abe7e0a70c0c9c0f2d6c79415967c1f76ad0185d1c5c1eeb2b218571fb82","src/debug.rs":"b5d8117ef1ad043637093cc9d12a6557aa9b85ce90ecde8d88a161d48cedc625","src/error.rs":"97172a791efdf8b791a04ae7961085fbef4391b313ff3ffb24e9381f2901fccc","src/error_recording.rs":"c1bf82011a149277b78b3d6a5f6293520ef3b38571649560b8adfe8fc19174e9","src/event_database/mod.rs":"1add194e555b17a0dd33c09e607e991fb86668f028c87197e036b0deebf345d2","src/histogram/exponential.rs":"389d48bab03f7229445ee4d4c8a4c49f6b43303f658dc954da75142f3c040dc6","src/histogram/functional.rs":"1a63a305b48bcef7bc38136b40d916df4bb8f098dc602514ada54a9b091f6951","src/histogram/linear.rs":"5642c8983c3c1ce9b69c6ac99133c8ed5e67abe2398c5dbcf67e69d66880f4ca","src/histogram/mod.rs":"11e5d3b6440a33a3e5544d0e03ffd69fa2fdcabe603ad75e0327f3ed548e8ce3","src/internal_metrics.rs":"146018386ed5fd3d1e813a2a31b0a226381df32fff85c325b04e7f336ece24a8","src/internal_pings.rs":"f9f589f30510b207845a3b80e219b976e719db1266f7cffd8a589bfdc75b096e","src/lib.rs":"47dac20b235f1edc90ce1cc39e5ba6af9e99678a56229fb5f7c4ad76907f5261","src/lib_unit_tests.rs":"60c9ea2572161feb6a66b7bd667efc469e87e1c58e167795fdc62f6f50c4dc7b","src/macros.rs":"3f3fc8c36dd05b7528b66b27e95b6494505a7a1f48abd013218de23ada6b5a43","src/metrics/boolean.rs":"6c2495ce63539fad2c2dfab4a4a2c80179f2a0144436a3bceb4995c93ebe5d73","src/metrics/counter.rs":"c23dd9d9df51f16426c6f97fdf46cc7c03ea4b2a083e408aee4b524ef94d57c3","src/metrics/custom_distribution.rs":"05bffc95cb3290c6f9288c22fd8e3611e4c35eed5cfd5fc2cbb6e8645f002c8a","src/metrics/datetime.rs":"5fdfa7fe44f2c0d43cc5cc0149a4da2322b40423469fcdc0c05298d000565c02","src/metrics/event.rs":"d6e4a52dbde7fbd44c21df567508c35acf046e08c6ea593bbb65fa25223a2b40","src/metrics/experiment.rs":"ec903e983a092c3f862e11d417db7f357a85c0ea24e75bf7c4e31658a76f7e73","src/metrics/jwe.rs":"27c12bfddf955039bee3b46a37226dc58b1e5e9001221ef18be50fd23f27dfb6","src/metrics/labeled.rs":"b731e306b64ee0d0b554e5af04e0098664572e5b054448c488f9823366b183e8","src/metrics/memory_distribution.rs":"8966a6db71428e2152738ac3edeaaf3929cd7f27ecd84ca880b1dc30a73e35bf","src/metrics/memory_unit.rs":"d7a678e5242febd021283b30c0099a9e62729944816a3f17d2d91e2808bc0570","src/metrics/mod.rs":"76bb599cdb0fba77181eb8f02aa990d608232c3c049fe86d08db2882ef58ee50","src/metrics/ping.rs":"dcabf0f8fd8b38911d32348f6d518273c7b3dfafa43502bf2fc3e7fc27e2d0a2","src/metrics/quantity.rs":"ac94c57851c76d49ed6fffd7428bb8966fbfb4f8ea5476cce34233fa6d5cee6b","src/metrics/string.rs":"60b62084f5202751a86c5c516e808589db644844e999e7c6d3204b0ee561421a","src/metrics/string_list.rs":"615d0cb26abcbd0b4dbae501cdb0a0ff2ba006447db0e3fa3ce0e76676f28d80","src/metrics/time_unit.rs":"9bb3f6c03b1d385008b254f36ed35daf22f9a6fa2d18e007c9b70aac767cde50","src/metrics/timespan.rs":"0ddb48ca9087bddf151110694390bea429317cb2f4a998640201aced3c6380c5","src/metrics/timing_distribution.rs":"eefdeaef0659f132a2c46057ee83165797b33812143a6174db1b0fad8db75234","src/metrics/uuid.rs":"e908d89860b90d2880f003ab5829184a8a3a546b51c14ed49456a730ca970669","src/ping/mod.rs":"87de8f88d1a4eb9a37b7891c2efdcb22843437d32a0920f7ded1590c3cd147a1","src/storage/mod.rs":"2f0a1be2801649be65c61aa17ce55f20e96b0bf5be0113b68e5d9472a7fc24b8","src/system.rs":"cb33444d393bf4960ebe716a01de3465f2d5a28969ca1034d9cd1a5b31ed2a7f","src/traits/boolean.rs":"f17f16f379b777e435e38e810e25336a919f4aeeefb1b4e7cf7749c0ef173fd9","src/traits/counter.rs":"631208036059d498a4485c74c2a80c24a5f13bffeb1053bdf5cce06af0e9d361","src/traits/custom_distribution.rs":"1b3ffda6e857bfa8ad5be412e88f3d9706ededf5a86407364ae19a81f06ef798","src/traits/datetime.rs":"2f4b9e64161c15c912b6a915045dd9840f76229d144a4d1542c54eb4f3b3406b","src/traits/event.rs":"c91901e589232f981c5cf4457eaa09cedff4af5cc050fd13db610146811000e9","src/traits/jwe.rs":"80cdc61e6d99d7c05139c9d05288290c731b1be61afcbd4a4c415973665ea194","src/traits/labeled.rs":"7f3281fc0a1a745238e7e260463f397dfa61717408fe9d260dc9767d9c659e52","src/traits/memory_distribution.rs":"0b72ffde68eb97f01a57afbc5c6109565ec9435561984d34696622bf5a57d559","src/traits/mod.rs":"e50dd9e2fa3abe93bfb943e046cb00240be770372c03c915dda63f79ab7f0346","src/traits/ping.rs":"8831c106c03afeb458b0b028fa1ce61f056ebf8e82bc0a171a1bff255d920748","src/traits/quantity.rs":"087d7a4b58f0eb7c766a0202dcf38a52d172ce4edf626211ffe5d94ed02ae754","src/traits/string.rs":"48e5623c7db59b8e6454fbc7fb6e2395f808c9a0fa721d7c07b72abb171eb1ad","src/traits/string_list.rs":"5d5773d5a130323dd82ca95f4ecdccdd6b237afe2eadf8655363bd6b7e8508c1","src/traits/timespan.rs":"f53281a668ec62b4a68af8fb3413ace6ffe14bcceb65831df6e7c0c5f77830ed","src/traits/timing_distribution.rs":"ccd6c3737150a1e23ff75dbac14373741bc30bff51002964b4b50591aa1eed2b","src/traits/uuid.rs":"bd78515611ba406d8b1bda35a9fe30c317512a9afcea7c5dece8fced7d410a92","src/upload/directory.rs":"939106ec37bf66064b70d11d0c99968bbe445cdd979e39214b4952843ef6444f","src/upload/mod.rs":"e92194a7f8bc91d16904d18d276ff4493f1ceb6c3b7822453f7a5a02ee33cc23","src/upload/policy.rs":"c250957a37783e74af8002cd80ba06ef9780a389fb0f61b8b665b79688f0a360","src/upload/request.rs":"7939e77bde4b55263446c3660b6831bafa27308bc983a51e1679321e962f372f","src/upload/result.rs":"d73454e81f185e4e28c887f7b496bffad7baac74c8f1b719885f940f909a530d","src/util.rs":"4fb47b56b3ddb5c368e6b8a290b05fdb77ddf90bd745c326276c403eee7c77c3","tests/boolean.rs":"9f4830f3b5f5ab6768060917deb1f546324a4744b8ed893197e31a0aed092947","tests/common/mod.rs":"f4856ee4fdf0b29cfea7b251ad7048ec34e5d3d24b75befc649ac13a91cea2df","tests/counter.rs":"39a2cd40335c164b3e7407f5505bbf571d7e4a71f032d476a7939f7dd1af90d0","tests/custom_distribution.rs":"596575504ad4eab2765b5b88245dcc92dc68e83d04c93deb4b27692c142c0b94","tests/datetime.rs":"230698c7101176730483289e90c320ef559197111a814cf2b8db185ab68f82fb","tests/event.rs":"cb78dfc0a2315253da8ef6663b81979dd5b0638bb6c31456f16ce4e9bb50c67a","tests/jwe.rs":"dc8a73d1e1925ac49061e8bb5c8de01230aa364942a22470e37aa88965b0eaba","tests/labeled.rs":"4ac1a36223b5218232c6d83c5a2bfebbf547abe78db4b0fa3609abacc70c39bc","tests/memory_distribution.rs":"0c03d0a491150f508dc12ecac7e5db70043f3a7de2f90cc6b3509aba90549821","tests/metrics.rs":"0560e0645321628015610bbf1a90c0adbb12ad42e5a068399073572120377cbb","tests/ping.rs":"8b80e7d1cdffd6afc513780e9704e83de338697db8a459f0d34830f87aaa5be8","tests/ping_maker.rs":"f65ba2797c0d3acacd8a8b8cc6326d2d60a86284c48b15e707457644ecc680a6","tests/quantity.rs":"2543f0f57085d27b0bc0c25c24c17a7e376ac673cf9265a3a9dec202920679c5","tests/storage.rs":"d8d50aba92e17a3053fd80788f5fbbcc8f82293c11c19b58e2de6c4ee45f71ff","tests/string.rs":"9c3e21bfa3ad0b75373f8f5addb8ac023ac5336ac9f969e92b89ea6fda675fdd","tests/string_list.rs":"2b83710b949bea4bd0f6b2029069887f3ea87312b00091e5aa3c7bda0fb9073c","tests/timespan.rs":"bb61e4cfb843b3f387378409ee1a88ef4b8f32b2181af686baacee262b8bca2b","tests/timing_distribution.rs":"577f5ff0059509f6005014604097f31c7d01536d7f05571c76564af4b88edcbd","tests/uuid.rs":"21f79bc0d456caefc1e7e7931f803da6144eb6af6e1d41aa687a6c9c32175f80"},"package":"c1d724cc540f6ee8b8f4a7e2cdf357dd409fe585e453e2e6750c10bd92d5b64f"}
\ No newline at end of file diff --git a/third_party/rust/glean-core/Cargo.lock b/third_party/rust/glean-core/Cargo.lock new file mode 100644 index 0000000000..4c44875aa6 --- /dev/null +++ b/third_party/rust/glean-core/Cargo.lock @@ -0,0 +1,703 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "adler" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e" + +[[package]] +name = "arrayref" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" + +[[package]] +name = "bincode" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f30d3a39baa26f9651f17b375061f3233dde33424a8b72b0dbe93a68a0bc896d" +dependencies = [ + "byteorder", + "serde", +] + +[[package]] +name = "bitflags" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" + +[[package]] +name = "byteorder" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae44d1a3d5a19df61dd0c8beb138458ac2a53a7ac09eba97d55592540004306b" + +[[package]] +name = "cc" +version = "1.0.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c0496836a84f8d0495758516b8621a622beb77c0fed418570e50764093ced48" + +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +dependencies = [ + "libc", + "num-integer", + "num-traits", + "serde", + "time", + "winapi", +] + +[[package]] +name = "crc32fast" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81156fece84ab6a9f2afdb109ce3ae577e42b1228441eded99bd77f627953b1a" +dependencies = [ + "cfg-if 1.0.0", +] + +[[package]] +name = "ctor" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10bcb9d7dcbf7002aaffbb53eac22906b64cdcc127971dcc387d8eb7c95d5560" +dependencies = [ + "quote", + "syn", +] + +[[package]] +name = "env_logger" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36" +dependencies = [ + "atty", + "humantime", + "log", + "termcolor", +] + +[[package]] +name = "failure" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d32e9bd16cc02eae7db7ef620b392808b89f6a5e16bb3497d159c6b92a0f4f86" +dependencies = [ + "failure_derive", +] + +[[package]] +name = "failure_derive" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa4da3c766cd7a0db8242e326e9e4e081edd567072893ed320008189715366a4" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "ffi-support" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f85d4d1be103c0b2d86968f0b0690dc09ac0ba205b90adb0389b552869e5000e" +dependencies = [ + "lazy_static", + "log", +] + +[[package]] +name = "flate2" +version = "1.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7411863d55df97a419aa64cb4d2f167103ea9d767e2c54a1868b7ac3f6b47129" +dependencies = [ + "cfg-if 1.0.0", + "crc32fast", + "libc", + "miniz_oxide", +] + +[[package]] +name = "form_urlencoded" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ece68d15c92e84fa4f19d3780f1294e5ca82a78a6d515f1efaabcc144688be00" +dependencies = [ + "matches", + "percent-encoding", +] + +[[package]] +name = "getrandom" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4060f4657be78b8e766215b02b18a2e862d83745545de804638e2b545e81aee6" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "wasi", +] + +[[package]] +name = "glean-core" +version = "33.10.2" +dependencies = [ + "bincode", + "chrono", + "ctor", + "env_logger", + "ffi-support", + "flate2", + "iso8601", + "log", + "once_cell", + "rkv", + "serde", + "serde_json", + "tempfile", + "uuid", +] + +[[package]] +name = "hermit-abi" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aca5565f760fb5b220e499d72710ed156fdb74e631659e99377d9ebfbd13ae8" +dependencies = [ + "libc", +] + +[[package]] +name = "humantime" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f" +dependencies = [ + "quick-error", +] + +[[package]] +name = "id-arena" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25a2bc672d1148e28034f176e01fffebb08b35768468cc954630da77a1449005" + +[[package]] +name = "idna" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02e2673c30ee86b5b96a9cb52ad15718aa1f966f5ab9ad54a8b95d5ca33120a9" +dependencies = [ + "matches", + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "iso8601" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cee08a007a59a8adfc96f69738ddf59e374888dfd84b49c4b916543067644d58" +dependencies = [ + "nom", +] + +[[package]] +name = "itoa" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89203f3fba0a3795506acaad8ebce3c80c0af93f994d5a1d7a0b1eeb23271929" + +[[package]] +name = "lmdb-rkv" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "447a296f7aca299cfbb50f4e4f3d49451549af655fb7215d7f8c0c3d64bad42b" +dependencies = [ + "bitflags", + "byteorder", + "libc", + "lmdb-rkv-sys", +] + +[[package]] +name = "lmdb-rkv-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b27470ac25167b3afdfb6af8fcd3bc1be67de50ffbdaf4073378cfded6ae24a5" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] +name = "log" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcf3805d4480bb5b86070dcfeb9e2cb2ebc148adb753c5cca5f884d1d65a42b2" +dependencies = [ + "cfg-if 0.1.10", +] + +[[package]] +name = "matches" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" + +[[package]] +name = "memchr" +version = "2.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" + +[[package]] +name = "miniz_oxide" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f2d26ec3309788e423cfbf68ad1800f061638098d76a83681af979dc4eda19d" +dependencies = [ + "adler", + "autocfg", +] + +[[package]] +name = "nom" +version = "5.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af" +dependencies = [ + "memchr", + "version_check", +] + +[[package]] +name = "num-integer" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13bd41f508810a131401606d54ac32a467c97172d74ba7662562ebba5ad07fa0" + +[[package]] +name = "ordered-float" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3305af35278dd29f46fcdd139e0b1fbfae2153f0e5928b39b035542dd31e37b7" +dependencies = [ + "num-traits", +] + +[[package]] +name = "paste" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45ca20c77d80be666aef2b45486da86238fabe33e38306bd3118fe4af33fa880" +dependencies = [ + "paste-impl", + "proc-macro-hack", +] + +[[package]] +name = "paste-impl" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d95a7db200b97ef370c8e6de0088252f7e0dfff7d047a28528e47456c0fc98b6" +dependencies = [ + "proc-macro-hack", +] + +[[package]] +name = "percent-encoding" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" + +[[package]] +name = "pkg-config" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c" + +[[package]] +name = "ppv-lite86" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" + +[[package]] +name = "proc-macro-hack" +version = "0.5.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" + +[[package]] +name = "proc-macro2" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + +[[package]] +name = "quote" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "991431c3519a3f36861882da93630ce66b52918dcf1b8e2fd66b397fc96f28df" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18519b42a40024d661e1714153e9ad0c3de27cd495760ceb09710920f1098b1e" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", + "rand_hc", +] + +[[package]] +name = "rand_chacha" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e12735cf05c9e10bf21534da50a147b924d555dc7a547c42e6bb2d5b6017ae0d" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c026d7df8b298d90ccbbc5190bd04d85e159eaf5576caeacf8741da93ccbd2e5" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_hc" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3190ef7066a446f2e7f42e239d161e905420ccab01eb967c9eb27d21b2322a73" +dependencies = [ + "rand_core", +] + +[[package]] +name = "redox_syscall" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05ec8ca9416c5ea37062b502703cd7fcb207736bc294f6e0cf367ac6fc234570" +dependencies = [ + "bitflags", +] + +[[package]] +name = "remove_dir_all" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +dependencies = [ + "winapi", +] + +[[package]] +name = "rkv" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "917d7a01f8c1ae46226e9d8dd24314279be7b04dfd0b24340d420e6927c2e687" +dependencies = [ + "arrayref", + "bincode", + "bitflags", + "byteorder", + "failure", + "id-arena", + "lazy_static", + "lmdb-rkv", + "log", + "ordered-float", + "paste", + "serde", + "serde_derive", + "url", + "uuid", +] + +[[package]] +name = "ryu" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" + +[[package]] +name = "serde" +version = "1.0.119" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bdd36f49e35b61d49efd8aa7fc068fd295961fd2286d0b2ee9a4c7a14e99cc3" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.119" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "552954ce79a059ddd5fd68c271592374bd15cab2274970380c000118aeffe1cd" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fceb2595057b6891a4ee808f70054bd2d12f0e97f1cbb78689b59f676df325a" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "syn" +version = "1.0.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc60a3d73ea6594cd712d830cc1f0390fd71542d8c8cd24e70cc54cdfd5e05d5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "synstructure" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b834f2d66f734cb897113e34aaff2f1ab4719ca946f9a7358dba8f8064148701" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "unicode-xid", +] + +[[package]] +name = "tempfile" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "rand", + "redox_syscall", + "remove_dir_all", + "winapi", +] + +[[package]] +name = "termcolor" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "time" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "tinyvec" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf8dbc19eb42fba10e8feaaec282fb50e2c14b2726d6301dbfeed0f73306a6f" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" + +[[package]] +name = "unicode-bidi" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5" +dependencies = [ + "matches", +] + +[[package]] +name = "unicode-normalization" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a13e63ab62dbe32aeee58d1c5408d35c36c392bba5d9d3142287219721afe606" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-xid" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" + +[[package]] +name = "url" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5909f2b0817350449ed73e8bcd81c8c3c8d9a7a5d8acba4b27db277f1868976e" +dependencies = [ + "form_urlencoded", + "idna", + "matches", + "percent-encoding", +] + +[[package]] +name = "uuid" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" +dependencies = [ + "getrandom", +] + +[[package]] +name = "version_check" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed" + +[[package]] +name = "wasi" +version = "0.10.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93c6c3420963c5c64bca373b25e77acb562081b9bb4dd5bb864187742186cea9" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/third_party/rust/glean-core/Cargo.toml b/third_party/rust/glean-core/Cargo.toml new file mode 100644 index 0000000000..e08b06d25c --- /dev/null +++ b/third_party/rust/glean-core/Cargo.toml @@ -0,0 +1,80 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies +# +# If you believe there's an error in this file please file an +# issue against the rust-lang/cargo repository. If you're +# editing this file be aware that the upstream Cargo.toml +# will likely look very different (and much more reasonable) + +[package] +edition = "2018" +name = "glean-core" +version = "33.10.2" +authors = ["Jan-Erik Rediger <jrediger@mozilla.com>", "The Glean Team <glean-team@mozilla.com>"] +include = ["/README.md", "/LICENSE", "/src", "/examples", "/tests", "/Cargo.toml"] +description = "A modern Telemetry library" +readme = "README.md" +keywords = ["telemetry"] +license = "MPL-2.0" +repository = "https://github.com/mozilla/glean" +[package.metadata.glean] +glean-parser = "1.29.0" +[dependencies.bincode] +version = "1.2.1" + +[dependencies.chrono] +version = "0.4.10" +features = ["serde"] + +[dependencies.ffi-support] +version = "0.4.0" + +[dependencies.flate2] +version = "1.0.19" + +[dependencies.log] +version = "0.4.8" + +[dependencies.once_cell] +version = "1.4.1" + +[dependencies.rkv] +version = "0.16.0" +default-features = false + +[dependencies.serde] +version = "1.0.104" +features = ["derive"] + +[dependencies.serde_json] +version = "1.0.44" + +[dependencies.uuid] +version = "0.8.1" +features = ["v4"] +[dev-dependencies.ctor] +version = "0.1.12" + +[dev-dependencies.env_logger] +version = "0.7.1" +features = ["termcolor", "atty", "humantime"] +default-features = false + +[dev-dependencies.iso8601] +version = "0.4" + +[dev-dependencies.tempfile] +version = "3.1.0" + +[features] +rkv-safe-mode = [] +[badges.circle-ci] +branch = "main" +repository = "mozilla/glean" + +[badges.maintenance] +status = "actively-developed" diff --git a/third_party/rust/glean-core/LICENSE b/third_party/rust/glean-core/LICENSE new file mode 100644 index 0000000000..a612ad9813 --- /dev/null +++ b/third_party/rust/glean-core/LICENSE @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/third_party/rust/glean-core/README.md b/third_party/rust/glean-core/README.md new file mode 100644 index 0000000000..68868f2565 --- /dev/null +++ b/third_party/rust/glean-core/README.md @@ -0,0 +1,54 @@ +# Glean SDK + +The `Glean SDK` is a modern approach for a Telemetry library and is part of the [Glean project](https://docs.telemetry.mozilla.org/concepts/glean/glean.html). + +## `glean-core` + +This library provides the core functionality of the Glean SDK, including implementations of all metric types, the ping serializer and the storage layer. +It's used in all platform-specific wrappers. + +It's not intended to be used by users directly. +Each supported platform has a specific Glean package with a nicer API. +A nice Rust API will be provided by the [Glean](https://crates.io/crates/glean) crate. + +## Documentation + +All documentation is available online: + +* [The Glean SDK Book][book] +* [API documentation][apidocs] + +[book]: https://mozilla.github.io/glean/ +[apidocs]: https://mozilla.github.io/glean/docs/glean_core/index.html + +## Usage + +```rust +use glean_core::{Glean, Configuration, CommonMetricData, metrics::*}; +let cfg = Configuration { + data_path: "/tmp/glean".into(), + application_id: "glean.sample.app".into(), + upload_enabled: true, + max_events: None, +}; +let mut glean = Glean::new(cfg).unwrap(); +let ping = PingType::new("sample", true, true, vec![]); +glean.register_ping_type(&ping); + +let call_counter: CounterMetric = CounterMetric::new(CommonMetricData { + name: "calls".into(), + category: "local".into(), + send_in_pings: vec!["sample".into()], + ..Default::default() +}); + +call_counter.add(&glean, 1); + +glean.submit_ping(&ping, None).unwrap(); +``` + +## License + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/ diff --git a/third_party/rust/glean-core/examples/sample.rs b/third_party/rust/glean-core/examples/sample.rs new file mode 100644 index 0000000000..7f292dde14 --- /dev/null +++ b/third_party/rust/glean-core/examples/sample.rs @@ -0,0 +1,77 @@ +use std::env; + +use glean_core::metrics::*; +use glean_core::ping::PingMaker; +use glean_core::{CommonMetricData, Glean}; +use tempfile::Builder; + +fn main() { + env_logger::init(); + + let mut args = env::args().skip(1); + + let data_path = if let Some(path) = args.next() { + path + } else { + let root = Builder::new().prefix("simple-db").tempdir().unwrap(); + root.path().display().to_string() + }; + + let cfg = glean_core::Configuration { + data_path, + application_id: "org.mozilla.glean_core.example".into(), + language_binding_name: "Rust".into(), + upload_enabled: true, + max_events: None, + delay_ping_lifetime_io: false, + }; + let mut glean = Glean::new(cfg).unwrap(); + glean.register_ping_type(&PingType::new("baseline", true, false, vec![])); + glean.register_ping_type(&PingType::new("metrics", true, false, vec![])); + + let local_metric: StringMetric = StringMetric::new(CommonMetricData { + name: "local_metric".into(), + category: "local".into(), + send_in_pings: vec!["baseline".into()], + ..Default::default() + }); + + let call_counter: CounterMetric = CounterMetric::new(CommonMetricData { + name: "calls".into(), + category: "local".into(), + send_in_pings: vec!["baseline".into(), "metrics".into()], + ..Default::default() + }); + + local_metric.set(&glean, "I can set this"); + call_counter.add(&glean, 1); + + println!("Baseline Data:\n{}", glean.snapshot("baseline", true)); + + call_counter.add(&glean, 2); + println!("Metrics Data:\n{}", glean.snapshot("metrics", true)); + + call_counter.add(&glean, 3); + + println!(); + println!("Baseline Data 2:\n{}", glean.snapshot("baseline", false)); + println!("Metrics Data 2:\n{}", glean.snapshot("metrics", true)); + + let list: StringListMetric = StringListMetric::new(CommonMetricData { + name: "list".into(), + category: "local".into(), + send_in_pings: vec!["baseline".into()], + ..Default::default() + }); + list.add(&glean, "once"); + list.add(&glean, "upon"); + + let ping_maker = PingMaker::new(); + let ping = ping_maker + .collect_string(&glean, glean.get_ping_by_name("baseline").unwrap(), None) + .unwrap(); + println!("Baseline Ping:\n{}", ping); + + let ping = ping_maker.collect_string(&glean, glean.get_ping_by_name("metrics").unwrap(), None); + println!("Metrics Ping: {:?}", ping); +} diff --git a/third_party/rust/glean-core/src/common_metric_data.rs b/third_party/rust/glean-core/src/common_metric_data.rs new file mode 100644 index 0000000000..8113e1efc4 --- /dev/null +++ b/third_party/rust/glean-core/src/common_metric_data.rs @@ -0,0 +1,130 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::convert::TryFrom; + +use crate::error::{Error, ErrorKind}; +#[allow(unused_imports)] +use crate::metrics::{dynamic_label, LabeledMetric}; +use crate::Glean; + +/// The supported metrics' lifetimes. +/// +/// A metric's lifetime determines when its stored data gets reset. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[repr(i32)] // Use i32 to be compatible with our JNA definition +pub enum Lifetime { + /// The metric is reset with each sent ping + Ping, + /// The metric is reset on application restart + Application, + /// The metric is reset with each user profile + User, +} + +impl Default for Lifetime { + fn default() -> Self { + Lifetime::Ping + } +} + +impl Lifetime { + /// String representation of the lifetime. + pub fn as_str(self) -> &'static str { + match self { + Lifetime::Ping => "ping", + Lifetime::Application => "app", + Lifetime::User => "user", + } + } +} + +impl TryFrom<i32> for Lifetime { + type Error = Error; + + fn try_from(value: i32) -> Result<Lifetime, Self::Error> { + match value { + 0 => Ok(Lifetime::Ping), + 1 => Ok(Lifetime::Application), + 2 => Ok(Lifetime::User), + e => Err(ErrorKind::Lifetime(e).into()), + } + } +} + +/// The common set of data shared across all different metric types. +#[derive(Default, Debug, Clone)] +pub struct CommonMetricData { + /// The metric's name. + pub name: String, + /// The metric's category. + pub category: String, + /// List of ping names to include this metric in. + pub send_in_pings: Vec<String>, + /// The metric's lifetime. + pub lifetime: Lifetime, + /// Whether or not the metric is disabled. + /// + /// Disabled metrics are never recorded. + pub disabled: bool, + /// Dynamic label. + /// + /// When a [`LabeledMetric<T>`](LabeledMetric) factory creates the specific + /// metric to be recorded to, dynamic labels are stored in the specific + /// label so that we can validate them when the Glean singleton is + /// available. + pub dynamic_label: Option<String>, +} + +impl CommonMetricData { + /// Creates a new metadata object. + pub fn new<A: Into<String>, B: Into<String>, C: Into<String>>( + category: A, + name: B, + ping_name: C, + ) -> CommonMetricData { + CommonMetricData { + name: name.into(), + category: category.into(), + send_in_pings: vec![ping_name.into()], + ..Default::default() + } + } + + /// The metric's base identifier, including the category and name, but not the label. + /// + /// If `category` is empty, it's ommitted. + /// Otherwise, it's the combination of the metric's `category` and `name`. + pub(crate) fn base_identifier(&self) -> String { + if self.category.is_empty() { + self.name.clone() + } else { + format!("{}.{}", self.category, self.name) + } + } + + /// The metric's unique identifier, including the category, name and label. + /// + /// If `category` is empty, it's ommitted. + /// Otherwise, it's the combination of the metric's `category`, `name` and `label`. + pub(crate) fn identifier(&self, glean: &Glean) -> String { + let base_identifier = self.base_identifier(); + + if let Some(label) = &self.dynamic_label { + dynamic_label(glean, self, &base_identifier, label) + } else { + base_identifier + } + } + + /// Whether this metric should be recorded. + pub fn should_record(&self) -> bool { + !self.disabled + } + + /// The list of storages this metric should be recorded into. + pub fn storage_names(&self) -> &[String] { + &self.send_in_pings + } +} diff --git a/third_party/rust/glean-core/src/database/mod.rs b/third_party/rust/glean-core/src/database/mod.rs new file mode 100644 index 0000000000..9287c4a8e9 --- /dev/null +++ b/third_party/rust/glean-core/src/database/mod.rs @@ -0,0 +1,1651 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::collections::btree_map::Entry; +use std::collections::BTreeMap; +use std::fs; +use std::num::NonZeroU64; +use std::path::Path; +use std::str; +use std::sync::RwLock; + +use rkv::StoreOptions; + +// Select the LMDB-powered storage backend when the feature is not activated. +#[cfg(not(feature = "rkv-safe-mode"))] +mod backend { + use std::path::Path; + + /// cbindgen:ignore + pub type Rkv = rkv::Rkv<rkv::backend::LmdbEnvironment>; + /// cbindgen:ignore + pub type SingleStore = rkv::SingleStore<rkv::backend::LmdbDatabase>; + /// cbindgen:ignore + pub type Writer<'t> = rkv::Writer<rkv::backend::LmdbRwTransaction<'t>>; + + pub fn rkv_new(path: &Path) -> Result<Rkv, rkv::StoreError> { + Rkv::new::<rkv::backend::Lmdb>(path) + } + + /// No migration necessary when staying with LMDB. + pub fn migrate(_path: &Path, _dst_env: &Rkv) { + // Intentionally left empty. + } +} + +// Select the "safe mode" storage backend when the feature is activated. +#[cfg(feature = "rkv-safe-mode")] +mod backend { + use rkv::migrator::Migrator; + use std::{fs, path::Path}; + + /// cbindgen:ignore + pub type Rkv = rkv::Rkv<rkv::backend::SafeModeEnvironment>; + /// cbindgen:ignore + pub type SingleStore = rkv::SingleStore<rkv::backend::SafeModeDatabase>; + /// cbindgen:ignore + pub type Writer<'t> = rkv::Writer<rkv::backend::SafeModeRwTransaction<'t>>; + + pub fn rkv_new(path: &Path) -> Result<Rkv, rkv::StoreError> { + match Rkv::new::<rkv::backend::SafeMode>(path) { + // An invalid file can mean: + // 1. An empty file. + // 2. A corrupted file. + // + // In both instances there's not much we can do. + // Drop the data by removing the file, and start over. + Err(rkv::StoreError::FileInvalid) => { + let safebin = path.join("data.safe.bin"); + fs::remove_file(safebin).map_err(|_| rkv::StoreError::FileInvalid)?; + // Now try again, we only handle that error once. + Rkv::new::<rkv::backend::SafeMode>(path) + } + other => other, + } + } + + fn delete_and_log(path: &Path, msg: &str) { + if let Err(err) = fs::remove_file(path) { + match err.kind() { + std::io::ErrorKind::NotFound => { + // Silently drop this error, the file was already non-existing. + } + _ => log::warn!("{}", msg), + } + } + } + + fn delete_lmdb_database(path: &Path) { + let datamdb = path.join("data.mdb"); + delete_and_log(&datamdb, "Failed to delete old data."); + + let lockmdb = path.join("lock.mdb"); + delete_and_log(&lockmdb, "Failed to delete old lock."); + } + + /// Migrate from LMDB storage to safe-mode storage. + /// + /// This migrates the data once, then deletes the LMDB storage. + /// The safe-mode storage must be empty for it to work. + /// Existing data will not be overwritten. + /// If the destination database is not empty the LMDB database is deleted + /// without migrating data. + /// This is a no-op if no LMDB database file exists. + pub fn migrate(path: &Path, dst_env: &Rkv) { + use rkv::{MigrateError, StoreError}; + + log::debug!("Migrating files in {}", path.display()); + + // Shortcut if no data to migrate is around. + let datamdb = path.join("data.mdb"); + if !datamdb.exists() { + log::debug!("No data to migrate."); + return; + } + + // We're handling the same error cases as `easy_migrate_lmdb_to_safe_mode`, + // but annotate each why they don't cause problems for Glean. + // Additionally for known cases we delete the LMDB database regardless. + let should_delete = + match Migrator::open_and_migrate_lmdb_to_safe_mode(path, |builder| builder, dst_env) { + // Source environment is corrupted. + // We start fresh with the new database. + Err(MigrateError::StoreError(StoreError::FileInvalid)) => true, + Err(MigrateError::StoreError(StoreError::DatabaseCorrupted)) => true, + // Path not accessible. + // Somehow our directory vanished between us creating it and reading from it. + // Nothing we can do really. + Err(MigrateError::StoreError(StoreError::IoError(_))) => true, + // Path accessible but incompatible for configuration. + // This should not happen, we never used storages that safe-mode doesn't understand. + // If it does happen, let's start fresh and use the safe-mode from now on. + Err(MigrateError::StoreError(StoreError::UnsuitableEnvironmentPath(_))) => true, + // Nothing to migrate. + // Source database was empty. We just start fresh anyway. + Err(MigrateError::SourceEmpty) => true, + // Migrating would overwrite. + // Either a previous migration failed and we still started writing data, + // or someone placed back an old data file. + // In any case we better stay on the new data and delete the old one. + Err(MigrateError::DestinationNotEmpty) => { + log::warn!("Failed to migrate old data. Destination was not empty"); + true + } + // An internal lock was poisoned. + // This would only happen if multiple things run concurrently and one crashes. + Err(MigrateError::ManagerPoisonError) => false, + // Couldn't close source environment and delete files on disk (e.g. other stores still open). + // This could only happen if multiple instances are running, + // we leave files in place. + Err(MigrateError::CloseError(_)) => false, + // Other store errors are never returned from the migrator. + // We need to handle them to please rustc. + Err(MigrateError::StoreError(_)) => false, + // Other errors can't happen, so this leaves us with the Ok case. + // This already deleted the LMDB files. + Ok(()) => false, + }; + + if should_delete { + log::debug!("Need to delete remaining LMDB files."); + delete_lmdb_database(&path); + } + + log::debug!("Migration ended. Safe-mode database in {}", path.display()); + } +} + +use crate::metrics::Metric; +use crate::CommonMetricData; +use crate::Glean; +use crate::Lifetime; +use crate::Result; +use backend::*; + +pub struct Database { + /// Handle to the database environment. + rkv: Rkv, + + /// Handles to the "lifetime" stores. + /// + /// A "store" is a handle to the underlying database. + /// We keep them open for fast and frequent access. + user_store: SingleStore, + ping_store: SingleStore, + application_store: SingleStore, + + /// If the `delay_ping_lifetime_io` Glean config option is `true`, + /// we will save metrics with 'ping' lifetime data in a map temporarily + /// so as to persist them to disk using rkv in bulk on demand. + ping_lifetime_data: Option<RwLock<BTreeMap<String, Metric>>>, + + // Initial file size when opening the database. + file_size: Option<NonZeroU64>, +} + +impl std::fmt::Debug for Database { + fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { + fmt.debug_struct("Database") + .field("rkv", &self.rkv) + .field("user_store", &"SingleStore") + .field("ping_store", &"SingleStore") + .field("application_store", &"SingleStore") + .field("ping_lifetime_data", &self.ping_lifetime_data) + .finish() + } +} + +/// Calculate the database size from all the files in the directory. +/// +/// # Arguments +/// +/// *`path` - The path to the directory +/// +/// # Returns +/// +/// Returns the non-zero combined size of all files in a directory, +/// or `None` on error or if the size is `0`. +fn database_size(dir: &Path) -> Option<NonZeroU64> { + let mut total_size = 0; + if let Ok(entries) = fs::read_dir(dir) { + for entry in entries { + if let Ok(entry) = entry { + if let Ok(file_type) = entry.file_type() { + if file_type.is_file() { + let path = entry.path(); + if let Ok(metadata) = fs::metadata(path) { + total_size += metadata.len(); + } else { + continue; + } + } + } + } + } + } + + NonZeroU64::new(total_size) +} + +impl Database { + /// Initializes the data store. + /// + /// This opens the underlying rkv store and creates + /// the underlying directory structure. + /// + /// It also loads any Lifetime::Ping data that might be + /// persisted, in case `delay_ping_lifetime_io` is set. + pub fn new(data_path: &str, delay_ping_lifetime_io: bool) -> Result<Self> { + let path = Path::new(data_path).join("db"); + log::debug!("Database path: {:?}", path.display()); + let file_size = database_size(&path); + + let rkv = Self::open_rkv(&path)?; + let user_store = rkv.open_single(Lifetime::User.as_str(), StoreOptions::create())?; + let ping_store = rkv.open_single(Lifetime::Ping.as_str(), StoreOptions::create())?; + let application_store = + rkv.open_single(Lifetime::Application.as_str(), StoreOptions::create())?; + let ping_lifetime_data = if delay_ping_lifetime_io { + Some(RwLock::new(BTreeMap::new())) + } else { + None + }; + + let db = Self { + rkv, + user_store, + ping_store, + application_store, + ping_lifetime_data, + file_size, + }; + + db.load_ping_lifetime_data(); + + Ok(db) + } + + /// Get the initial database file size. + pub fn file_size(&self) -> Option<NonZeroU64> { + self.file_size + } + + fn get_store(&self, lifetime: Lifetime) -> &SingleStore { + match lifetime { + Lifetime::User => &self.user_store, + Lifetime::Ping => &self.ping_store, + Lifetime::Application => &self.application_store, + } + } + + /// Creates the storage directories and inits rkv. + fn open_rkv(path: &Path) -> Result<Rkv> { + fs::create_dir_all(&path)?; + + let rkv = rkv_new(&path)?; + migrate(path, &rkv); + + log::info!("Database initialized"); + Ok(rkv) + } + + /// Build the key of the final location of the data in the database. + /// Such location is built using the storage name and the metric + /// key/name (if available). + /// + /// # Arguments + /// + /// * `storage_name` - the name of the storage to store/fetch data from. + /// * `metric_key` - the optional metric key/name. + /// + /// # Returns + /// + /// A string representing the location in the database. + fn get_storage_key(storage_name: &str, metric_key: Option<&str>) -> String { + match metric_key { + Some(k) => format!("{}#{}", storage_name, k), + None => format!("{}#", storage_name), + } + } + + /// Loads Lifetime::Ping data from rkv to memory, + /// if `delay_ping_lifetime_io` is set to true. + /// + /// Does nothing if it isn't or if there is not data to load. + fn load_ping_lifetime_data(&self) { + if let Some(ping_lifetime_data) = &self.ping_lifetime_data { + let mut data = ping_lifetime_data + .write() + .expect("Can't read ping lifetime data"); + + let reader = unwrap_or!(self.rkv.read(), return); + let store = self.get_store(Lifetime::Ping); + let mut iter = unwrap_or!(store.iter_start(&reader), return); + + while let Some(Ok((metric_id, value))) = iter.next() { + let metric_id = match str::from_utf8(metric_id) { + Ok(metric_id) => metric_id.to_string(), + _ => continue, + }; + let metric: Metric = match value { + rkv::Value::Blob(blob) => unwrap_or!(bincode::deserialize(blob), continue), + _ => continue, + }; + + data.insert(metric_id, metric); + } + } + } + + /// Iterates with the provided transaction function + /// over the requested data from the given storage. + /// + /// * If the storage is unavailable, the transaction function is never invoked. + /// * If the read data cannot be deserialized it will be silently skipped. + /// + /// # Arguments + /// + /// * `lifetime` - The metric lifetime to iterate over. + /// * `storage_name` - The storage name to iterate over. + /// * `metric_key` - The metric key to iterate over. All metrics iterated over + /// will have this prefix. For example, if `metric_key` is of the form `{category}.`, + /// it will iterate over all metrics in the given category. If the `metric_key` is of the + /// form `{category}.{name}/`, the iterator will iterate over all specific metrics for + /// a given labeled metric. If not provided, the entire storage for the given lifetime + /// will be iterated over. + /// * `transaction_fn` - Called for each entry being iterated over. It is + /// passed two arguments: `(metric_id: &[u8], metric: &Metric)`. + /// + /// # Panics + /// + /// This function will **not** panic on database errors. + pub fn iter_store_from<F>( + &self, + lifetime: Lifetime, + storage_name: &str, + metric_key: Option<&str>, + mut transaction_fn: F, + ) where + F: FnMut(&[u8], &Metric), + { + let iter_start = Self::get_storage_key(storage_name, metric_key); + let len = iter_start.len(); + + // Lifetime::Ping data is not immediately persisted to disk if + // Glean has `delay_ping_lifetime_io` set to true + if lifetime == Lifetime::Ping { + if let Some(ping_lifetime_data) = &self.ping_lifetime_data { + let data = ping_lifetime_data + .read() + .expect("Can't read ping lifetime data"); + for (key, value) in data.iter() { + if key.starts_with(&iter_start) { + let key = &key[len..]; + transaction_fn(key.as_bytes(), value); + } + } + return; + } + } + + let reader = unwrap_or!(self.rkv.read(), return); + let mut iter = unwrap_or!( + self.get_store(lifetime).iter_from(&reader, &iter_start), + return + ); + + while let Some(Ok((metric_id, value))) = iter.next() { + if !metric_id.starts_with(iter_start.as_bytes()) { + break; + } + + let metric_id = &metric_id[len..]; + let metric: Metric = match value { + rkv::Value::Blob(blob) => unwrap_or!(bincode::deserialize(blob), continue), + _ => continue, + }; + transaction_fn(metric_id, &metric); + } + } + + /// Determines if the storage has the given metric. + /// + /// If data cannot be read it is assumed that the storage does not have the metric. + /// + /// # Arguments + /// + /// * `lifetime` - The lifetime of the metric. + /// * `storage_name` - The storage name to look in. + /// * `metric_identifier` - The metric identifier. + /// + /// # Panics + /// + /// This function will **not** panic on database errors. + pub fn has_metric( + &self, + lifetime: Lifetime, + storage_name: &str, + metric_identifier: &str, + ) -> bool { + let key = Self::get_storage_key(storage_name, Some(metric_identifier)); + + // Lifetime::Ping data is not persisted to disk if + // Glean has `delay_ping_lifetime_io` set to true + if lifetime == Lifetime::Ping { + if let Some(ping_lifetime_data) = &self.ping_lifetime_data { + return ping_lifetime_data + .read() + .map(|data| data.contains_key(&key)) + .unwrap_or(false); + } + } + + let reader = unwrap_or!(self.rkv.read(), return false); + self.get_store(lifetime) + .get(&reader, &key) + .unwrap_or(None) + .is_some() + } + + /// Writes to the specified storage with the provided transaction function. + /// + /// If the storage is unavailable, it will return an error. + /// + /// # Panics + /// + /// * This function will **not** panic on database errors. + fn write_with_store<F>(&self, store_name: Lifetime, mut transaction_fn: F) -> Result<()> + where + F: FnMut(Writer, &SingleStore) -> Result<()>, + { + let writer = self.rkv.write().unwrap(); + let store = self.get_store(store_name); + transaction_fn(writer, store) + } + + /// Records a metric in the underlying storage system. + pub fn record(&self, glean: &Glean, data: &CommonMetricData, value: &Metric) { + // If upload is disabled we don't want to record. + if !glean.is_upload_enabled() { + return; + } + + let name = data.identifier(glean); + + for ping_name in data.storage_names() { + if let Err(e) = self.record_per_lifetime(data.lifetime, ping_name, &name, value) { + log::error!("Failed to record metric into {}: {:?}", ping_name, e); + } + } + } + + /// Records a metric in the underlying storage system, for a single lifetime. + /// + /// # Returns + /// + /// If the storage is unavailable or the write fails, no data will be stored and an error will be returned. + /// + /// Otherwise `Ok(())` is returned. + /// + /// # Panics + /// + /// This function will **not** panic on database errors. + fn record_per_lifetime( + &self, + lifetime: Lifetime, + storage_name: &str, + key: &str, + metric: &Metric, + ) -> Result<()> { + let final_key = Self::get_storage_key(storage_name, Some(key)); + + // Lifetime::Ping data is not immediately persisted to disk if + // Glean has `delay_ping_lifetime_io` set to true + if lifetime == Lifetime::Ping { + if let Some(ping_lifetime_data) = &self.ping_lifetime_data { + let mut data = ping_lifetime_data + .write() + .expect("Can't read ping lifetime data"); + data.insert(final_key, metric.clone()); + return Ok(()); + } + } + + let encoded = bincode::serialize(&metric).expect("IMPOSSIBLE: Serializing metric failed"); + let value = rkv::Value::Blob(&encoded); + + let mut writer = self.rkv.write()?; + self.get_store(lifetime) + .put(&mut writer, final_key, &value)?; + writer.commit()?; + Ok(()) + } + + /// Records the provided value, with the given lifetime, + /// after applying a transformation function. + pub fn record_with<F>(&self, glean: &Glean, data: &CommonMetricData, mut transform: F) + where + F: FnMut(Option<Metric>) -> Metric, + { + // If upload is disabled we don't want to record. + if !glean.is_upload_enabled() { + return; + } + + let name = data.identifier(glean); + for ping_name in data.storage_names() { + if let Err(e) = + self.record_per_lifetime_with(data.lifetime, ping_name, &name, &mut transform) + { + log::error!("Failed to record metric into {}: {:?}", ping_name, e); + } + } + } + + /// Records a metric in the underlying storage system, + /// after applying the given transformation function, for a single lifetime. + /// + /// # Returns + /// + /// If the storage is unavailable or the write fails, no data will be stored and an error will be returned. + /// + /// Otherwise `Ok(())` is returned. + /// + /// # Panics + /// + /// This function will **not** panic on database errors. + fn record_per_lifetime_with<F>( + &self, + lifetime: Lifetime, + storage_name: &str, + key: &str, + mut transform: F, + ) -> Result<()> + where + F: FnMut(Option<Metric>) -> Metric, + { + let final_key = Self::get_storage_key(storage_name, Some(key)); + + // Lifetime::Ping data is not persisted to disk if + // Glean has `delay_ping_lifetime_io` set to true + if lifetime == Lifetime::Ping { + if let Some(ping_lifetime_data) = &self.ping_lifetime_data { + let mut data = ping_lifetime_data + .write() + .expect("Can't access ping lifetime data as writable"); + let entry = data.entry(final_key); + match entry { + Entry::Vacant(entry) => { + entry.insert(transform(None)); + } + Entry::Occupied(mut entry) => { + let old_value = entry.get().clone(); + entry.insert(transform(Some(old_value))); + } + } + return Ok(()); + } + } + + let mut writer = self.rkv.write()?; + let store = self.get_store(lifetime); + let new_value: Metric = { + let old_value = store.get(&writer, &final_key)?; + + match old_value { + Some(rkv::Value::Blob(blob)) => { + let old_value = bincode::deserialize(blob).ok(); + transform(old_value) + } + _ => transform(None), + } + }; + + let encoded = + bincode::serialize(&new_value).expect("IMPOSSIBLE: Serializing metric failed"); + let value = rkv::Value::Blob(&encoded); + store.put(&mut writer, final_key, &value)?; + writer.commit()?; + Ok(()) + } + + /// Clears a storage (only Ping Lifetime). + /// + /// # Returns + /// + /// * If the storage is unavailable an error is returned. + /// * If any individual delete fails, an error is returned, but other deletions might have + /// happened. + /// + /// Otherwise `Ok(())` is returned. + /// + /// # Panics + /// + /// This function will **not** panic on database errors. + pub fn clear_ping_lifetime_storage(&self, storage_name: &str) -> Result<()> { + // Lifetime::Ping data will be saved to `ping_lifetime_data` + // in case `delay_ping_lifetime_io` is set to true + if let Some(ping_lifetime_data) = &self.ping_lifetime_data { + ping_lifetime_data + .write() + .expect("Can't access ping lifetime data as writable") + .clear(); + } + + self.write_with_store(Lifetime::Ping, |mut writer, store| { + let mut metrics = Vec::new(); + { + let mut iter = store.iter_from(&writer, &storage_name)?; + while let Some(Ok((metric_id, _))) = iter.next() { + if let Ok(metric_id) = std::str::from_utf8(metric_id) { + if !metric_id.starts_with(&storage_name) { + break; + } + metrics.push(metric_id.to_owned()); + } + } + } + + let mut res = Ok(()); + for to_delete in metrics { + if let Err(e) = store.delete(&mut writer, to_delete) { + log::warn!("Can't delete from store: {:?}", e); + res = Err(e); + } + } + + writer.commit()?; + Ok(res?) + }) + } + + /// Removes a single metric from the storage. + /// + /// # Arguments + /// + /// * `lifetime` - the lifetime of the storage in which to look for the metric. + /// * `storage_name` - the name of the storage to store/fetch data from. + /// * `metric_id` - the metric category + name. + /// + /// # Returns + /// + /// * If the storage is unavailable an error is returned. + /// * If the metric could not be deleted, an error is returned. + /// + /// Otherwise `Ok(())` is returned. + /// + /// # Panics + /// + /// This function will **not** panic on database errors. + pub fn remove_single_metric( + &self, + lifetime: Lifetime, + storage_name: &str, + metric_id: &str, + ) -> Result<()> { + let final_key = Self::get_storage_key(storage_name, Some(metric_id)); + + // Lifetime::Ping data is not persisted to disk if + // Glean has `delay_ping_lifetime_io` set to true + if lifetime == Lifetime::Ping { + if let Some(ping_lifetime_data) = &self.ping_lifetime_data { + let mut data = ping_lifetime_data + .write() + .expect("Can't access app lifetime data as writable"); + data.remove(&final_key); + } + } + + self.write_with_store(lifetime, |mut writer, store| { + if let Err(e) = store.delete(&mut writer, final_key.clone()) { + if self.ping_lifetime_data.is_some() { + // If ping_lifetime_data exists, it might be + // that data is in memory, but not yet in rkv. + return Ok(()); + } + return Err(e.into()); + } + writer.commit()?; + Ok(()) + }) + } + + /// Clears all the metrics in the database, for the provided lifetime. + /// + /// Errors are logged. + /// + /// # Panics + /// + /// * This function will **not** panic on database errors. + pub fn clear_lifetime(&self, lifetime: Lifetime) { + let res = self.write_with_store(lifetime, |mut writer, store| { + store.clear(&mut writer)?; + writer.commit()?; + Ok(()) + }); + if let Err(e) = res { + log::warn!("Could not clear store for lifetime {:?}: {:?}", lifetime, e); + } + } + + /// Clears all metrics in the database. + /// + /// Errors are logged. + /// + /// # Panics + /// + /// * This function will **not** panic on database errors. + pub fn clear_all(&self) { + if let Some(ping_lifetime_data) = &self.ping_lifetime_data { + ping_lifetime_data + .write() + .expect("Can't access ping lifetime data as writable") + .clear(); + } + + for lifetime in [Lifetime::User, Lifetime::Ping, Lifetime::Application].iter() { + self.clear_lifetime(*lifetime); + } + } + + /// Persists ping_lifetime_data to disk. + /// + /// Does nothing in case there is nothing to persist. + /// + /// # Panics + /// + /// * This function will **not** panic on database errors. + pub fn persist_ping_lifetime_data(&self) -> Result<()> { + if let Some(ping_lifetime_data) = &self.ping_lifetime_data { + let data = ping_lifetime_data + .read() + .expect("Can't read ping lifetime data"); + + self.write_with_store(Lifetime::Ping, |mut writer, store| { + for (key, value) in data.iter() { + let encoded = + bincode::serialize(&value).expect("IMPOSSIBLE: Serializing metric failed"); + // There is no need for `get_storage_key` here because + // the key is already formatted from when it was saved + // to ping_lifetime_data. + store.put(&mut writer, &key, &rkv::Value::Blob(&encoded))?; + } + writer.commit()?; + Ok(()) + })?; + } + Ok(()) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::tests::new_glean; + use crate::CommonMetricData; + use std::collections::HashMap; + use tempfile::tempdir; + + #[test] + fn test_panicks_if_fails_dir_creation() { + assert!(Database::new("/!#\"'@#°ç", false).is_err()); + } + + #[test] + fn test_data_dir_rkv_inits() { + let dir = tempdir().unwrap(); + let str_dir = dir.path().display().to_string(); + + Database::new(&str_dir, false).unwrap(); + + assert!(dir.path().exists()); + } + + #[test] + fn test_ping_lifetime_metric_recorded() { + // Init the database in a temporary directory. + let dir = tempdir().unwrap(); + let str_dir = dir.path().display().to_string(); + let db = Database::new(&str_dir, false).unwrap(); + + assert!(db.ping_lifetime_data.is_none()); + + // Attempt to record a known value. + let test_value = "test-value"; + let test_storage = "test-storage"; + let test_metric_id = "telemetry_test.test_name"; + db.record_per_lifetime( + Lifetime::Ping, + test_storage, + test_metric_id, + &Metric::String(test_value.to_string()), + ) + .unwrap(); + + // Verify that the data is correctly recorded. + let mut found_metrics = 0; + let mut snapshotter = |metric_id: &[u8], metric: &Metric| { + found_metrics += 1; + let metric_id = String::from_utf8_lossy(metric_id).into_owned(); + assert_eq!(test_metric_id, metric_id); + match metric { + Metric::String(s) => assert_eq!(test_value, s), + _ => panic!("Unexpected data found"), + } + }; + + db.iter_store_from(Lifetime::Ping, test_storage, None, &mut snapshotter); + assert_eq!(1, found_metrics, "We only expect 1 Lifetime.Ping metric."); + } + + #[test] + fn test_application_lifetime_metric_recorded() { + // Init the database in a temporary directory. + let dir = tempdir().unwrap(); + let str_dir = dir.path().display().to_string(); + let db = Database::new(&str_dir, false).unwrap(); + + // Attempt to record a known value. + let test_value = "test-value"; + let test_storage = "test-storage1"; + let test_metric_id = "telemetry_test.test_name"; + db.record_per_lifetime( + Lifetime::Application, + test_storage, + test_metric_id, + &Metric::String(test_value.to_string()), + ) + .unwrap(); + + // Verify that the data is correctly recorded. + let mut found_metrics = 0; + let mut snapshotter = |metric_id: &[u8], metric: &Metric| { + found_metrics += 1; + let metric_id = String::from_utf8_lossy(metric_id).into_owned(); + assert_eq!(test_metric_id, metric_id); + match metric { + Metric::String(s) => assert_eq!(test_value, s), + _ => panic!("Unexpected data found"), + } + }; + + db.iter_store_from(Lifetime::Application, test_storage, None, &mut snapshotter); + assert_eq!( + 1, found_metrics, + "We only expect 1 Lifetime.Application metric." + ); + } + + #[test] + fn test_user_lifetime_metric_recorded() { + // Init the database in a temporary directory. + let dir = tempdir().unwrap(); + let str_dir = dir.path().display().to_string(); + let db = Database::new(&str_dir, false).unwrap(); + + // Attempt to record a known value. + let test_value = "test-value"; + let test_storage = "test-storage2"; + let test_metric_id = "telemetry_test.test_name"; + db.record_per_lifetime( + Lifetime::User, + test_storage, + test_metric_id, + &Metric::String(test_value.to_string()), + ) + .unwrap(); + + // Verify that the data is correctly recorded. + let mut found_metrics = 0; + let mut snapshotter = |metric_id: &[u8], metric: &Metric| { + found_metrics += 1; + let metric_id = String::from_utf8_lossy(metric_id).into_owned(); + assert_eq!(test_metric_id, metric_id); + match metric { + Metric::String(s) => assert_eq!(test_value, s), + _ => panic!("Unexpected data found"), + } + }; + + db.iter_store_from(Lifetime::User, test_storage, None, &mut snapshotter); + assert_eq!(1, found_metrics, "We only expect 1 Lifetime.User metric."); + } + + #[test] + fn test_clear_ping_storage() { + // Init the database in a temporary directory. + let dir = tempdir().unwrap(); + let str_dir = dir.path().display().to_string(); + let db = Database::new(&str_dir, false).unwrap(); + + // Attempt to record a known value for every single lifetime. + let test_storage = "test-storage"; + db.record_per_lifetime( + Lifetime::User, + test_storage, + "telemetry_test.test_name_user", + &Metric::String("test-value-user".to_string()), + ) + .unwrap(); + db.record_per_lifetime( + Lifetime::Ping, + test_storage, + "telemetry_test.test_name_ping", + &Metric::String("test-value-ping".to_string()), + ) + .unwrap(); + db.record_per_lifetime( + Lifetime::Application, + test_storage, + "telemetry_test.test_name_application", + &Metric::String("test-value-application".to_string()), + ) + .unwrap(); + + // Take a snapshot for the data, all the lifetimes. + { + let mut snapshot: HashMap<String, String> = HashMap::new(); + let mut snapshotter = |metric_id: &[u8], metric: &Metric| { + let metric_id = String::from_utf8_lossy(metric_id).into_owned(); + match metric { + Metric::String(s) => snapshot.insert(metric_id, s.to_string()), + _ => panic!("Unexpected data found"), + }; + }; + + db.iter_store_from(Lifetime::User, test_storage, None, &mut snapshotter); + db.iter_store_from(Lifetime::Ping, test_storage, None, &mut snapshotter); + db.iter_store_from(Lifetime::Application, test_storage, None, &mut snapshotter); + + assert_eq!(3, snapshot.len(), "We expect all lifetimes to be present."); + assert!(snapshot.contains_key("telemetry_test.test_name_user")); + assert!(snapshot.contains_key("telemetry_test.test_name_ping")); + assert!(snapshot.contains_key("telemetry_test.test_name_application")); + } + + // Clear the Ping lifetime. + db.clear_ping_lifetime_storage(test_storage).unwrap(); + + // Take a snapshot again and check that we're only clearing the Ping lifetime. + { + let mut snapshot: HashMap<String, String> = HashMap::new(); + let mut snapshotter = |metric_id: &[u8], metric: &Metric| { + let metric_id = String::from_utf8_lossy(metric_id).into_owned(); + match metric { + Metric::String(s) => snapshot.insert(metric_id, s.to_string()), + _ => panic!("Unexpected data found"), + }; + }; + + db.iter_store_from(Lifetime::User, test_storage, None, &mut snapshotter); + db.iter_store_from(Lifetime::Ping, test_storage, None, &mut snapshotter); + db.iter_store_from(Lifetime::Application, test_storage, None, &mut snapshotter); + + assert_eq!(2, snapshot.len(), "We only expect 2 metrics to be left."); + assert!(snapshot.contains_key("telemetry_test.test_name_user")); + assert!(snapshot.contains_key("telemetry_test.test_name_application")); + } + } + + #[test] + fn test_remove_single_metric() { + // Init the database in a temporary directory. + let dir = tempdir().unwrap(); + let str_dir = dir.path().display().to_string(); + let db = Database::new(&str_dir, false).unwrap(); + + let test_storage = "test-storage-single-lifetime"; + let metric_id_pattern = "telemetry_test.single_metric"; + + // Write sample metrics to the database. + let lifetimes = vec![Lifetime::User, Lifetime::Ping, Lifetime::Application]; + + for lifetime in lifetimes.iter() { + for value in &["retain", "delete"] { + db.record_per_lifetime( + *lifetime, + test_storage, + &format!("{}_{}", metric_id_pattern, value), + &Metric::String((*value).to_string()), + ) + .unwrap(); + } + } + + // Remove "telemetry_test.single_metric_delete" from each lifetime. + for lifetime in lifetimes.iter() { + db.remove_single_metric( + *lifetime, + test_storage, + &format!("{}_delete", metric_id_pattern), + ) + .unwrap(); + } + + // Verify that "telemetry_test.single_metric_retain" is still around for all lifetimes. + for lifetime in lifetimes.iter() { + let mut found_metrics = 0; + let mut snapshotter = |metric_id: &[u8], metric: &Metric| { + found_metrics += 1; + let metric_id = String::from_utf8_lossy(metric_id).into_owned(); + assert_eq!(format!("{}_retain", metric_id_pattern), metric_id); + match metric { + Metric::String(s) => assert_eq!("retain", s), + _ => panic!("Unexpected data found"), + } + }; + + // Check the User lifetime. + db.iter_store_from(*lifetime, test_storage, None, &mut snapshotter); + assert_eq!( + 1, found_metrics, + "We only expect 1 metric for this lifetime." + ); + } + } + + #[test] + fn test_delayed_ping_lifetime_persistence() { + // Init the database in a temporary directory. + let dir = tempdir().unwrap(); + let str_dir = dir.path().display().to_string(); + let db = Database::new(&str_dir, true).unwrap(); + let test_storage = "test-storage"; + + assert!(db.ping_lifetime_data.is_some()); + + // Attempt to record a known value. + let test_value1 = "test-value1"; + let test_metric_id1 = "telemetry_test.test_name1"; + db.record_per_lifetime( + Lifetime::Ping, + test_storage, + test_metric_id1, + &Metric::String(test_value1.to_string()), + ) + .unwrap(); + + // Attempt to persist data. + db.persist_ping_lifetime_data().unwrap(); + + // Attempt to record another known value. + let test_value2 = "test-value2"; + let test_metric_id2 = "telemetry_test.test_name2"; + db.record_per_lifetime( + Lifetime::Ping, + test_storage, + test_metric_id2, + &Metric::String(test_value2.to_string()), + ) + .unwrap(); + + { + // At this stage we expect `test_value1` to be persisted and in memory, + // since it was recorded before calling `persist_ping_lifetime_data`, + // and `test_value2` to be only in memory, since it was recorded after. + let store: SingleStore = db + .rkv + .open_single(Lifetime::Ping.as_str(), StoreOptions::create()) + .unwrap(); + let reader = db.rkv.read().unwrap(); + + // Verify that test_value1 is in rkv. + assert!(store + .get(&reader, format!("{}#{}", test_storage, test_metric_id1)) + .unwrap_or(None) + .is_some()); + // Verifiy that test_value2 is **not** in rkv. + assert!(store + .get(&reader, format!("{}#{}", test_storage, test_metric_id2)) + .unwrap_or(None) + .is_none()); + + let data = match &db.ping_lifetime_data { + Some(ping_lifetime_data) => ping_lifetime_data, + None => panic!("Expected `ping_lifetime_data` to exist here!"), + }; + let data = data.read().unwrap(); + // Verify that test_value1 is also in memory. + assert!(data + .get(&format!("{}#{}", test_storage, test_metric_id1)) + .is_some()); + // Verify that test_value2 is in memory. + assert!(data + .get(&format!("{}#{}", test_storage, test_metric_id2)) + .is_some()); + } + + // Attempt to persist data again. + db.persist_ping_lifetime_data().unwrap(); + + { + // At this stage we expect `test_value1` and `test_value2` to + // be persisted, since both were created before a call to + // `persist_ping_lifetime_data`. + let store: SingleStore = db + .rkv + .open_single(Lifetime::Ping.as_str(), StoreOptions::create()) + .unwrap(); + let reader = db.rkv.read().unwrap(); + + // Verify that test_value1 is in rkv. + assert!(store + .get(&reader, format!("{}#{}", test_storage, test_metric_id1)) + .unwrap_or(None) + .is_some()); + // Verifiy that test_value2 is also in rkv. + assert!(store + .get(&reader, format!("{}#{}", test_storage, test_metric_id2)) + .unwrap_or(None) + .is_some()); + + let data = match &db.ping_lifetime_data { + Some(ping_lifetime_data) => ping_lifetime_data, + None => panic!("Expected `ping_lifetime_data` to exist here!"), + }; + let data = data.read().unwrap(); + // Verify that test_value1 is also in memory. + assert!(data + .get(&format!("{}#{}", test_storage, test_metric_id1)) + .is_some()); + // Verify that test_value2 is also in memory. + assert!(data + .get(&format!("{}#{}", test_storage, test_metric_id2)) + .is_some()); + } + } + + #[test] + fn test_load_ping_lifetime_data_from_memory() { + // Init the database in a temporary directory. + let dir = tempdir().unwrap(); + let str_dir = dir.path().display().to_string(); + + let test_storage = "test-storage"; + let test_value = "test-value"; + let test_metric_id = "telemetry_test.test_name"; + + { + let db = Database::new(&str_dir, true).unwrap(); + + // Attempt to record a known value. + db.record_per_lifetime( + Lifetime::Ping, + test_storage, + test_metric_id, + &Metric::String(test_value.to_string()), + ) + .unwrap(); + + // Verify that test_value is in memory. + let data = match &db.ping_lifetime_data { + Some(ping_lifetime_data) => ping_lifetime_data, + None => panic!("Expected `ping_lifetime_data` to exist here!"), + }; + let data = data.read().unwrap(); + assert!(data + .get(&format!("{}#{}", test_storage, test_metric_id)) + .is_some()); + + // Attempt to persist data. + db.persist_ping_lifetime_data().unwrap(); + + // Verify that test_value is now in rkv. + let store: SingleStore = db + .rkv + .open_single(Lifetime::Ping.as_str(), StoreOptions::create()) + .unwrap(); + let reader = db.rkv.read().unwrap(); + assert!(store + .get(&reader, format!("{}#{}", test_storage, test_metric_id)) + .unwrap_or(None) + .is_some()); + } + + // Now create a new instace of the db and check if data was + // correctly loaded from rkv to memory. + { + let db = Database::new(&str_dir, true).unwrap(); + + // Verify that test_value is in memory. + let data = match &db.ping_lifetime_data { + Some(ping_lifetime_data) => ping_lifetime_data, + None => panic!("Expected `ping_lifetime_data` to exist here!"), + }; + let data = data.read().unwrap(); + assert!(data + .get(&format!("{}#{}", test_storage, test_metric_id)) + .is_some()); + + // Verify that test_value is also in rkv. + let store: SingleStore = db + .rkv + .open_single(Lifetime::Ping.as_str(), StoreOptions::create()) + .unwrap(); + let reader = db.rkv.read().unwrap(); + assert!(store + .get(&reader, format!("{}#{}", test_storage, test_metric_id)) + .unwrap_or(None) + .is_some()); + } + } + + #[test] + fn doesnt_record_when_upload_is_disabled() { + let (mut glean, dir) = new_glean(None); + + // Init the database in a temporary directory. + let str_dir = dir.path().display().to_string(); + + let test_storage = "test-storage"; + let test_data = CommonMetricData::new("category", "name", test_storage); + let test_metric_id = test_data.identifier(&glean); + + // Attempt to record metric with the record and record_with functions, + // this should work since upload is enabled. + let db = Database::new(&str_dir, true).unwrap(); + db.record(&glean, &test_data, &Metric::String("record".to_owned())); + db.iter_store_from( + Lifetime::Ping, + test_storage, + None, + &mut |metric_id: &[u8], metric: &Metric| { + assert_eq!( + String::from_utf8_lossy(metric_id).into_owned(), + test_metric_id + ); + match metric { + Metric::String(v) => assert_eq!("record", *v), + _ => panic!("Unexpected data found"), + } + }, + ); + + db.record_with(&glean, &test_data, |_| { + Metric::String("record_with".to_owned()) + }); + db.iter_store_from( + Lifetime::Ping, + test_storage, + None, + &mut |metric_id: &[u8], metric: &Metric| { + assert_eq!( + String::from_utf8_lossy(metric_id).into_owned(), + test_metric_id + ); + match metric { + Metric::String(v) => assert_eq!("record_with", *v), + _ => panic!("Unexpected data found"), + } + }, + ); + + // Disable upload + glean.set_upload_enabled(false); + + // Attempt to record metric with the record and record_with functions, + // this should work since upload is now **disabled**. + db.record(&glean, &test_data, &Metric::String("record_nop".to_owned())); + db.iter_store_from( + Lifetime::Ping, + test_storage, + None, + &mut |metric_id: &[u8], metric: &Metric| { + assert_eq!( + String::from_utf8_lossy(metric_id).into_owned(), + test_metric_id + ); + match metric { + Metric::String(v) => assert_eq!("record_with", *v), + _ => panic!("Unexpected data found"), + } + }, + ); + db.record_with(&glean, &test_data, |_| { + Metric::String("record_with_nop".to_owned()) + }); + db.iter_store_from( + Lifetime::Ping, + test_storage, + None, + &mut |metric_id: &[u8], metric: &Metric| { + assert_eq!( + String::from_utf8_lossy(metric_id).into_owned(), + test_metric_id + ); + match metric { + Metric::String(v) => assert_eq!("record_with", *v), + _ => panic!("Unexpected data found"), + } + }, + ); + } + + /// LDMB ignores an empty database file just fine. + #[cfg(not(feature = "rkv-safe-mode"))] + #[test] + fn empty_data_file() { + let dir = tempdir().unwrap(); + let str_dir = dir.path().display().to_string(); + + // Create database directory structure. + let database_dir = dir.path().join("db"); + fs::create_dir_all(&database_dir).expect("create database dir"); + + // Create empty database file. + let datamdb = database_dir.join("data.mdb"); + let f = fs::File::create(datamdb).expect("create database file"); + drop(f); + + Database::new(&str_dir, false).unwrap(); + + assert!(dir.path().exists()); + } + + #[cfg(feature = "rkv-safe-mode")] + mod safe_mode { + use std::fs::File; + + use super::*; + use rkv::Value; + + #[test] + fn empty_data_file() { + let dir = tempdir().unwrap(); + let str_dir = dir.path().display().to_string(); + + // Create database directory structure. + let database_dir = dir.path().join("db"); + fs::create_dir_all(&database_dir).expect("create database dir"); + + // Create empty database file. + let safebin = database_dir.join("data.safe.bin"); + let f = File::create(safebin).expect("create database file"); + drop(f); + + Database::new(&str_dir, false).unwrap(); + + assert!(dir.path().exists()); + } + + #[test] + fn corrupted_data_file() { + let dir = tempdir().unwrap(); + let str_dir = dir.path().display().to_string(); + + // Create database directory structure. + let database_dir = dir.path().join("db"); + fs::create_dir_all(&database_dir).expect("create database dir"); + + // Create empty database file. + let safebin = database_dir.join("data.safe.bin"); + fs::write(safebin, "<broken>").expect("write to database file"); + + Database::new(&str_dir, false).unwrap(); + + assert!(dir.path().exists()); + } + + #[test] + fn migration_works_on_startup() { + let dir = tempdir().unwrap(); + let str_dir = dir.path().display().to_string(); + + let database_dir = dir.path().join("db"); + let datamdb = database_dir.join("data.mdb"); + let lockmdb = database_dir.join("lock.mdb"); + let safebin = database_dir.join("data.safe.bin"); + + assert!(!safebin.exists()); + assert!(!datamdb.exists()); + assert!(!lockmdb.exists()); + + let store_name = "store1"; + let metric_name = "bool"; + let key = Database::get_storage_key(store_name, Some(metric_name)); + + // Ensure some old data in the LMDB format exists. + { + fs::create_dir_all(&database_dir).expect("create dir"); + let rkv_db = rkv::Rkv::new::<rkv::backend::Lmdb>(&database_dir).expect("rkv env"); + + let store = rkv_db + .open_single("ping", StoreOptions::create()) + .expect("opened"); + let mut writer = rkv_db.write().expect("writer"); + let metric = Metric::Boolean(true); + let value = bincode::serialize(&metric).expect("serialized"); + store + .put(&mut writer, &key, &Value::Blob(&value)) + .expect("wrote"); + writer.commit().expect("committed"); + + assert!(datamdb.exists()); + assert!(lockmdb.exists()); + assert!(!safebin.exists()); + } + + // First open should migrate the data. + { + let db = Database::new(&str_dir, false).unwrap(); + let safebin = database_dir.join("data.safe.bin"); + assert!(safebin.exists(), "safe-mode file should exist"); + assert!(!datamdb.exists(), "LMDB data should be deleted"); + assert!(!lockmdb.exists(), "LMDB lock should be deleted"); + + let mut stored_metrics = vec![]; + let mut snapshotter = |name: &[u8], metric: &Metric| { + let name = str::from_utf8(name).unwrap().to_string(); + stored_metrics.push((name, metric.clone())) + }; + db.iter_store_from(Lifetime::Ping, "store1", None, &mut snapshotter); + + assert_eq!(1, stored_metrics.len()); + assert_eq!(metric_name, stored_metrics[0].0); + assert_eq!(&Metric::Boolean(true), &stored_metrics[0].1); + } + + // Next open should not re-create the LMDB files. + { + let db = Database::new(&str_dir, false).unwrap(); + let safebin = database_dir.join("data.safe.bin"); + assert!(safebin.exists(), "safe-mode file exists"); + assert!(!datamdb.exists(), "LMDB data should not be recreated"); + assert!(!lockmdb.exists(), "LMDB lock should not be recreated"); + + let mut stored_metrics = vec![]; + let mut snapshotter = |name: &[u8], metric: &Metric| { + let name = str::from_utf8(name).unwrap().to_string(); + stored_metrics.push((name, metric.clone())) + }; + db.iter_store_from(Lifetime::Ping, "store1", None, &mut snapshotter); + + assert_eq!(1, stored_metrics.len()); + assert_eq!(metric_name, stored_metrics[0].0); + assert_eq!(&Metric::Boolean(true), &stored_metrics[0].1); + } + } + + #[test] + fn migration_doesnt_overwrite() { + let dir = tempdir().unwrap(); + let str_dir = dir.path().display().to_string(); + + let database_dir = dir.path().join("db"); + let datamdb = database_dir.join("data.mdb"); + let lockmdb = database_dir.join("lock.mdb"); + let safebin = database_dir.join("data.safe.bin"); + + assert!(!safebin.exists()); + assert!(!datamdb.exists()); + assert!(!lockmdb.exists()); + + let store_name = "store1"; + let metric_name = "counter"; + let key = Database::get_storage_key(store_name, Some(metric_name)); + + // Ensure some old data in the LMDB format exists. + { + fs::create_dir_all(&database_dir).expect("create dir"); + let rkv_db = rkv::Rkv::new::<rkv::backend::Lmdb>(&database_dir).expect("rkv env"); + + let store = rkv_db + .open_single("ping", StoreOptions::create()) + .expect("opened"); + let mut writer = rkv_db.write().expect("writer"); + let metric = Metric::Counter(734); // this value will be ignored + let value = bincode::serialize(&metric).expect("serialized"); + store + .put(&mut writer, &key, &Value::Blob(&value)) + .expect("wrote"); + writer.commit().expect("committed"); + + assert!(datamdb.exists()); + assert!(lockmdb.exists()); + } + + // Ensure some data exists in the new database. + { + fs::create_dir_all(&database_dir).expect("create dir"); + let rkv_db = + rkv::Rkv::new::<rkv::backend::SafeMode>(&database_dir).expect("rkv env"); + + let store = rkv_db + .open_single("ping", StoreOptions::create()) + .expect("opened"); + let mut writer = rkv_db.write().expect("writer"); + let metric = Metric::Counter(2); + let value = bincode::serialize(&metric).expect("serialized"); + store + .put(&mut writer, &key, &Value::Blob(&value)) + .expect("wrote"); + writer.commit().expect("committed"); + + assert!(safebin.exists()); + } + + // First open should try migration and ignore it, because destination is not empty. + // It also deletes the leftover LMDB database. + { + let db = Database::new(&str_dir, false).unwrap(); + let safebin = database_dir.join("data.safe.bin"); + assert!(safebin.exists(), "safe-mode file should exist"); + assert!(!datamdb.exists(), "LMDB data should be deleted"); + assert!(!lockmdb.exists(), "LMDB lock should be deleted"); + + let mut stored_metrics = vec![]; + let mut snapshotter = |name: &[u8], metric: &Metric| { + let name = str::from_utf8(name).unwrap().to_string(); + stored_metrics.push((name, metric.clone())) + }; + db.iter_store_from(Lifetime::Ping, "store1", None, &mut snapshotter); + + assert_eq!(1, stored_metrics.len()); + assert_eq!(metric_name, stored_metrics[0].0); + assert_eq!(&Metric::Counter(2), &stored_metrics[0].1); + } + } + + #[test] + fn migration_ignores_broken_database() { + let dir = tempdir().unwrap(); + let str_dir = dir.path().display().to_string(); + + let database_dir = dir.path().join("db"); + let datamdb = database_dir.join("data.mdb"); + let lockmdb = database_dir.join("lock.mdb"); + let safebin = database_dir.join("data.safe.bin"); + + assert!(!safebin.exists()); + assert!(!datamdb.exists()); + assert!(!lockmdb.exists()); + + let store_name = "store1"; + let metric_name = "counter"; + let key = Database::get_storage_key(store_name, Some(metric_name)); + + // Ensure some old data in the LMDB format exists. + { + fs::create_dir_all(&database_dir).expect("create dir"); + fs::write(&datamdb, "bogus").expect("dbfile created"); + + assert!(datamdb.exists()); + } + + // Ensure some data exists in the new database. + { + fs::create_dir_all(&database_dir).expect("create dir"); + let rkv_db = + rkv::Rkv::new::<rkv::backend::SafeMode>(&database_dir).expect("rkv env"); + + let store = rkv_db + .open_single("ping", StoreOptions::create()) + .expect("opened"); + let mut writer = rkv_db.write().expect("writer"); + let metric = Metric::Counter(2); + let value = bincode::serialize(&metric).expect("serialized"); + store + .put(&mut writer, &key, &Value::Blob(&value)) + .expect("wrote"); + writer.commit().expect("committed"); + } + + // First open should try migration and ignore it, because destination is not empty. + // It also deletes the leftover LMDB database. + { + let db = Database::new(&str_dir, false).unwrap(); + let safebin = database_dir.join("data.safe.bin"); + assert!(safebin.exists(), "safe-mode file should exist"); + assert!(!datamdb.exists(), "LMDB data should be deleted"); + assert!(!lockmdb.exists(), "LMDB lock should be deleted"); + + let mut stored_metrics = vec![]; + let mut snapshotter = |name: &[u8], metric: &Metric| { + let name = str::from_utf8(name).unwrap().to_string(); + stored_metrics.push((name, metric.clone())) + }; + db.iter_store_from(Lifetime::Ping, "store1", None, &mut snapshotter); + + assert_eq!(1, stored_metrics.len()); + assert_eq!(metric_name, stored_metrics[0].0); + assert_eq!(&Metric::Counter(2), &stored_metrics[0].1); + } + } + + #[test] + fn migration_ignores_empty_database() { + let dir = tempdir().unwrap(); + let str_dir = dir.path().display().to_string(); + + let database_dir = dir.path().join("db"); + let datamdb = database_dir.join("data.mdb"); + let lockmdb = database_dir.join("lock.mdb"); + let safebin = database_dir.join("data.safe.bin"); + + assert!(!safebin.exists()); + assert!(!datamdb.exists()); + assert!(!lockmdb.exists()); + + // Ensure old LMDB database exists, but is empty. + { + fs::create_dir_all(&database_dir).expect("create dir"); + let rkv_db = rkv::Rkv::new::<rkv::backend::Lmdb>(&database_dir).expect("rkv env"); + drop(rkv_db); + assert!(datamdb.exists()); + assert!(lockmdb.exists()); + } + + // First open should try migration, but find no data. + // safe-mode does not write an empty database to disk. + // It also deletes the leftover LMDB database. + { + let _db = Database::new(&str_dir, false).unwrap(); + let safebin = database_dir.join("data.safe.bin"); + assert!(!safebin.exists(), "safe-mode file should exist"); + assert!(!datamdb.exists(), "LMDB data should be deleted"); + assert!(!lockmdb.exists(), "LMDB lock should be deleted"); + } + } + } +} diff --git a/third_party/rust/glean-core/src/debug.rs b/third_party/rust/glean-core/src/debug.rs new file mode 100644 index 0000000000..54bfb086fa --- /dev/null +++ b/third_party/rust/glean-core/src/debug.rs @@ -0,0 +1,321 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! # Debug options +//! +//! The debug options for Glean may be set by calling one of the `set_*` functions +//! or by setting specific environment variables. +//! +//! The environment variables will be read only once when the options are initialized. +//! +//! The possible debugging features available out of the box are: +//! +//! * **Ping logging** - logging the contents of ping requests that are correctly assembled; +//! This may be set by calling glean.set_log_pings(value: bool) +//! or by setting the environment variable GLEAN_LOG_PINGS="true"; +//! * **Debug tagging** - Adding the X-Debug-ID header to every ping request, +//! allowing these tagged pings to be sent to the ["Ping Debug Viewer"](https://mozilla.github.io/glean/book/dev/core/internal/debug-pings.html). +//! This may be set by calling glean.set_debug_view_tag(value: &str) +//! or by setting the environment variable GLEAN_DEBUG_VIEW_TAG=<some tag>; +//! * **Source tagging** - Adding the X-Source-Tags header to every ping request, +//! allowing pings to be tagged with custom labels. +//! This may be set by calling glean.set_source_tags(value: Vec<String>) +//! or by setting the environment variable GLEAN_SOURCE_TAGS=<some, tags>; +//! +//! Bindings may implement other debugging features, e.g. sending pings on demand. + +use std::env; + +const GLEAN_LOG_PINGS: &str = "GLEAN_LOG_PINGS"; +const GLEAN_DEBUG_VIEW_TAG: &str = "GLEAN_DEBUG_VIEW_TAG"; +const GLEAN_SOURCE_TAGS: &str = "GLEAN_SOURCE_TAGS"; +const GLEAN_MAX_SOURCE_TAGS: usize = 5; + +/// A representation of all of Glean's debug options. +pub struct DebugOptions { + /// Option to log the payload of pings that are successfully assembled into a ping request. + pub log_pings: DebugOption<bool>, + /// Option to add the X-Debug-ID header to every ping request. + pub debug_view_tag: DebugOption<String>, + /// Option to add the X-Source-Tags header to ping requests. This will allow the data + /// consumers to classify data depending on the applied tags. + pub source_tags: DebugOption<Vec<String>>, +} + +impl std::fmt::Debug for DebugOptions { + fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { + fmt.debug_struct("DebugOptions") + .field("log_pings", &self.log_pings.get()) + .field("debug_view_tag", &self.debug_view_tag.get()) + .field("source_tags", &self.source_tags.get()) + .finish() + } +} + +impl DebugOptions { + pub fn new() -> Self { + Self { + log_pings: DebugOption::new(GLEAN_LOG_PINGS, get_bool_from_str, None), + debug_view_tag: DebugOption::new(GLEAN_DEBUG_VIEW_TAG, Some, Some(validate_tag)), + source_tags: DebugOption::new( + GLEAN_SOURCE_TAGS, + tokenize_string, + Some(validate_source_tags), + ), + } + } +} + +/// A representation of a debug option, +/// where the value can be set programmatically or come from an environment variable. +#[derive(Debug)] +pub struct DebugOption<T, E = fn(String) -> Option<T>, V = fn(&T) -> bool> { + /// The name of the environment variable related to this debug option. + env: String, + /// The actual value of this option. + value: Option<T>, + /// Function to extract the data of type `T` from a `String`, used when + /// extracting data from the environment. + extraction: E, + /// Optional function to validate the value parsed from the environment + /// or passed to the `set` function. + validation: Option<V>, +} + +impl<T, E, V> DebugOption<T, E, V> +where + T: Clone, + E: Fn(String) -> Option<T>, + V: Fn(&T) -> bool, +{ + /// Creates a new debug option. + /// + /// Tries to get the initial value of the option from the environment. + pub fn new(env: &str, extraction: E, validation: Option<V>) -> Self { + let mut option = Self { + env: env.into(), + value: None, + extraction, + validation, + }; + + option.set_from_env(); + option + } + + fn validate(&self, value: &T) -> bool { + if let Some(f) = self.validation.as_ref() { + f(value) + } else { + true + } + } + + fn set_from_env(&mut self) { + let extract = &self.extraction; + match env::var(&self.env) { + Ok(env_value) => match extract(env_value.clone()) { + Some(v) => { + self.set(v); + } + None => { + log::error!( + "Unable to parse debug option {}={} into {}. Ignoring.", + self.env, + env_value, + std::any::type_name::<T>() + ); + } + }, + Err(env::VarError::NotUnicode(_)) => { + log::error!("The value of {} is not valid unicode. Ignoring.", self.env) + } + // The other possible error is that the env var is not set, + // which is not an error for us and can safely be ignored. + Err(_) => {} + } + } + + /// Tries to set a value for this debug option. + /// + /// Validates the value in case a validation function is available. + /// + /// # Returns + /// + /// Whether the option passed validation and was succesfully set. + pub fn set(&mut self, value: T) -> bool { + let validated = self.validate(&value); + if validated { + log::info!("Setting the debug option {}.", self.env); + self.value = Some(value); + return true; + } + log::error!("Invalid value for debug option {}.", self.env); + false + } + + /// Gets the value of this debug option. + pub fn get(&self) -> Option<&T> { + self.value.as_ref() + } +} + +fn get_bool_from_str(value: String) -> Option<bool> { + std::str::FromStr::from_str(&value).ok() +} + +fn tokenize_string(value: String) -> Option<Vec<String>> { + let trimmed = value.trim(); + if trimmed.is_empty() { + return None; + } + + Some(trimmed.split(',').map(|s| s.trim().to_string()).collect()) +} + +/// A tag is the value used in both the `X-Debug-ID` and `X-Source-Tags` headers +/// of tagged ping requests, thus is it must be a valid header value. +/// +/// In other words, it must match the regex: "[a-zA-Z0-9-]{1,20}" +/// +/// The regex crate isn't used here because it adds to the binary size, +/// and the Glean SDK doesn't use regular expressions anywhere else. +#[allow(clippy::ptr_arg)] +fn validate_tag(value: &String) -> bool { + if value.is_empty() { + log::error!("A tag must have at least one character."); + return false; + } + + let mut iter = value.chars(); + let mut count = 0; + + loop { + match iter.next() { + // We are done, so the whole expression is valid. + None => return true, + // Valid characters. + Some('-') | Some('a'..='z') | Some('A'..='Z') | Some('0'..='9') => (), + // An invalid character + Some(c) => { + log::error!("Invalid character '{}' in the tag.", c); + return false; + } + } + count += 1; + if count == 20 { + log::error!("A tag cannot exceed 20 characters."); + return false; + } + } +} + +/// Validate the list of source tags. +/// +/// This builds upon the existing `validate_tag` function, since all the +/// tags should respect the same rules to make the pipeline happy. +#[allow(clippy::ptr_arg)] +fn validate_source_tags(tags: &Vec<String>) -> bool { + if tags.is_empty() { + return false; + } + + if tags.len() > GLEAN_MAX_SOURCE_TAGS { + log::error!( + "A list of tags cannot contain more than {} elements.", + GLEAN_MAX_SOURCE_TAGS + ); + return false; + } + + // Filter out tags starting with "glean". They are reserved. + if tags.iter().any(|s| s.starts_with("glean")) { + log::error!("Tags starting with `glean` are reserved and must not be used."); + return false; + } + + tags.iter().all(|x| validate_tag(&x)) +} + +#[cfg(test)] +mod test { + use super::*; + use std::env; + + #[test] + fn debug_option_is_correctly_loaded_from_env() { + env::set_var("GLEAN_TEST_1", "test"); + let option: DebugOption<String> = DebugOption::new("GLEAN_TEST_1", Some, None); + assert_eq!(option.get().unwrap(), "test"); + } + + #[test] + fn debug_option_is_correctly_validated_when_necessary() { + #[allow(clippy::ptr_arg)] + fn validate(value: &String) -> bool { + value == "test" + } + + // Invalid values from the env are not set + env::set_var("GLEAN_TEST_2", "invalid"); + let mut option: DebugOption<String> = + DebugOption::new("GLEAN_TEST_2", Some, Some(validate)); + assert!(option.get().is_none()); + + // Valid values are set using the `set` function + assert!(option.set("test".into())); + assert_eq!(option.get().unwrap(), "test"); + + // Invalid values are not set using the `set` function + assert!(!option.set("invalid".into())); + assert_eq!(option.get().unwrap(), "test"); + } + + #[test] + fn tokenize_string_splits_correctly() { + // Valid list is properly tokenized and spaces are trimmed. + assert_eq!( + Some(vec!["test1".to_string(), "test2".to_string()]), + tokenize_string(" test1, test2 ".to_string()) + ); + + // Empty strings return no item. + assert_eq!(None, tokenize_string("".to_string())); + } + + #[test] + fn validates_tag_correctly() { + assert!(validate_tag(&"valid-value".to_string())); + assert!(validate_tag(&"-also-valid-value".to_string())); + assert!(!validate_tag(&"invalid_value".to_string())); + assert!(!validate_tag(&"invalid value".to_string())); + assert!(!validate_tag(&"!nv@lid-val*e".to_string())); + assert!(!validate_tag( + &"invalid-value-because-way-too-long".to_string() + )); + assert!(!validate_tag(&"".to_string())); + } + + #[test] + fn validates_source_tags_correctly() { + // Empty tags. + assert!(!validate_source_tags(&vec!["".to_string()])); + // Too many tags. + assert!(!validate_source_tags(&vec![ + "1".to_string(), + "2".to_string(), + "3".to_string(), + "4".to_string(), + "5".to_string(), + "6".to_string() + ])); + // Invalid tags. + assert!(!validate_source_tags(&vec!["!nv@lid-val*e".to_string()])); + // Entries starting with 'glean' are filtered out. + assert!(!validate_source_tags(&vec![ + "glean-test1".to_string(), + "test2".to_string() + ])); + } +} diff --git a/third_party/rust/glean-core/src/error.rs b/third_party/rust/glean-core/src/error.rs new file mode 100644 index 0000000000..a74d6d3dc8 --- /dev/null +++ b/third_party/rust/glean-core/src/error.rs @@ -0,0 +1,189 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::ffi::OsString; +use std::fmt::{self, Display}; +use std::io; +use std::result; + +use ffi_support::{handle_map::HandleError, ExternError}; + +use rkv::StoreError; + +/// A specialized [`Result`] type for this crate's operations. +/// +/// This is generally used to avoid writing out [`Error`] directly and +/// is otherwise a direct mapping to [`Result`]. +/// +/// [`Result`]: https://doc.rust-lang.org/stable/std/result/enum.Result.html +/// [`Error`]: std.struct.Error.html +pub type Result<T> = result::Result<T, Error>; + +/// A list enumerating the categories of errors in this crate. +/// +/// [`Error`]: https://doc.rust-lang.org/stable/std/error/trait.Error.html +/// +/// This list is intended to grow over time and it is not recommended to +/// exhaustively match against it. +#[derive(Debug)] +#[non_exhaustive] +pub enum ErrorKind { + /// Lifetime conversion failed + Lifetime(i32), + + /// FFI-Support error + Handle(HandleError), + + /// IO error + IoError(io::Error), + + /// IO error + Rkv(StoreError), + + /// JSON error + Json(serde_json::error::Error), + + /// TimeUnit conversion failed + TimeUnit(i32), + + /// MemoryUnit conversion failed + MemoryUnit(i32), + + /// HistogramType conversion failed + HistogramType(i32), + + /// [`OsString`] conversion failed + OsString(OsString), + + /// Unknown error + Utf8Error, + + /// Glean initialization was attempted with an invalid configuration + InvalidConfig, + + /// Glean not initialized + NotInitialized, + + /// Ping request body size overflowed + PingBodyOverflow(usize), +} + +/// A specialized [`Error`] type for this crate's operations. +/// +/// [`Error`]: https://doc.rust-lang.org/stable/std/error/trait.Error.html +#[derive(Debug)] +pub struct Error { + kind: ErrorKind, +} + +impl Error { + /// Returns a new UTF-8 error + /// + /// This is exposed in order to expose conversion errors on the FFI layer. + pub fn utf8_error() -> Error { + Error { + kind: ErrorKind::Utf8Error, + } + } + + /// Indicates an error that no requested global object is initialized + pub fn not_initialized() -> Error { + Error { + kind: ErrorKind::NotInitialized, + } + } + + /// Returns the kind of the current error instance. + pub fn kind(&self) -> &ErrorKind { + &self.kind + } +} + +impl std::error::Error for Error {} + +impl Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use ErrorKind::*; + match self.kind() { + Lifetime(l) => write!(f, "Lifetime conversion from {} failed", l), + Handle(e) => write!(f, "Invalid handle: {}", e), + IoError(e) => write!(f, "An I/O error occurred: {}", e), + Rkv(e) => write!(f, "An Rkv error occurred: {}", e), + Json(e) => write!(f, "A JSON error occurred: {}", e), + TimeUnit(t) => write!(f, "TimeUnit conversion from {} failed", t), + MemoryUnit(m) => write!(f, "MemoryUnit conversion from {} failed", m), + HistogramType(h) => write!(f, "HistogramType conversion from {} failed", h), + OsString(s) => write!(f, "OsString conversion from {:?} failed", s), + Utf8Error => write!(f, "Invalid UTF-8 byte sequence in string"), + InvalidConfig => write!(f, "Invalid Glean configuration provided"), + NotInitialized => write!(f, "Global Glean object missing"), + PingBodyOverflow(s) => write!( + f, + "Ping request body size exceeded maximum size allowed: {}kB.", + s / 1024 + ), + } + } +} + +impl From<ErrorKind> for Error { + fn from(kind: ErrorKind) -> Error { + Error { kind } + } +} + +impl From<HandleError> for Error { + fn from(error: HandleError) -> Error { + Error { + kind: ErrorKind::Handle(error), + } + } +} + +impl From<io::Error> for Error { + fn from(error: io::Error) -> Error { + Error { + kind: ErrorKind::IoError(error), + } + } +} + +impl From<StoreError> for Error { + fn from(error: StoreError) -> Error { + Error { + kind: ErrorKind::Rkv(error), + } + } +} + +impl From<Error> for ExternError { + fn from(error: Error) -> ExternError { + ffi_support::ExternError::new_error(ffi_support::ErrorCode::new(42), format!("{}", error)) + } +} + +impl From<serde_json::error::Error> for Error { + fn from(error: serde_json::error::Error) -> Error { + Error { + kind: ErrorKind::Json(error), + } + } +} + +impl From<OsString> for Error { + fn from(error: OsString) -> Error { + Error { + kind: ErrorKind::OsString(error), + } + } +} + +/// To satisfy integer conversion done by the macros on the FFI side, we need to be able to turn +/// something infallible into an error. +/// This will never actually be reached, as an integer-to-integer conversion is infallible. +impl From<std::convert::Infallible> for Error { + fn from(_: std::convert::Infallible) -> Error { + unreachable!() + } +} diff --git a/third_party/rust/glean-core/src/error_recording.rs b/third_party/rust/glean-core/src/error_recording.rs new file mode 100644 index 0000000000..e70848d109 --- /dev/null +++ b/third_party/rust/glean-core/src/error_recording.rs @@ -0,0 +1,223 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! # Error Recording +//! +//! Glean keeps track of errors that occured due to invalid labels or invalid values when recording +//! other metrics. +//! +//! Error counts are stored in labeled counters in the `glean.error` category. +//! The labeled counter metrics that store the errors are defined in the `metrics.yaml` for documentation purposes, +//! but are not actually used directly, since the `send_in_pings` value needs to match the pings of the metric that is erroring (plus the "metrics" ping), +//! not some constant value that we could define in `metrics.yaml`. + +use std::convert::TryFrom; +use std::fmt::Display; + +use crate::error::{Error, ErrorKind}; +use crate::metrics::CounterMetric; +use crate::metrics::{combine_base_identifier_and_label, strip_label}; +use crate::CommonMetricData; +use crate::Glean; +use crate::Lifetime; + +/// The possible error types for metric recording. +/// Note: the cases in this enum must be kept in sync with the ones +/// in the platform-specific code (e.g. `ErrorType.kt`) and with the +/// metrics in the registry files. +#[derive(Debug, PartialEq)] +pub enum ErrorType { + /// For when the value to be recorded does not match the metric-specific restrictions + InvalidValue, + /// For when the label of a labeled metric does not match the restrictions + InvalidLabel, + /// For when the metric caught an invalid state while recording + InvalidState, + /// For when the value to be recorded overflows the metric-specific upper range + InvalidOverflow, +} + +impl ErrorType { + /// The error type's metric id + pub fn as_str(&self) -> &'static str { + match self { + ErrorType::InvalidValue => "invalid_value", + ErrorType::InvalidLabel => "invalid_label", + ErrorType::InvalidState => "invalid_state", + ErrorType::InvalidOverflow => "invalid_overflow", + } + } +} + +impl TryFrom<i32> for ErrorType { + type Error = Error; + + fn try_from(value: i32) -> Result<ErrorType, Self::Error> { + match value { + 0 => Ok(ErrorType::InvalidValue), + 1 => Ok(ErrorType::InvalidLabel), + 2 => Ok(ErrorType::InvalidState), + 3 => Ok(ErrorType::InvalidOverflow), + e => Err(ErrorKind::Lifetime(e).into()), + } + } +} + +/// For a given metric, get the metric in which to record errors +fn get_error_metric_for_metric(meta: &CommonMetricData, error: ErrorType) -> CounterMetric { + // Can't use meta.identifier here, since that might cause infinite recursion + // if the label on this metric needs to report an error. + let identifier = meta.base_identifier(); + let name = strip_label(&identifier); + + // Record errors in the pings the metric is in, as well as the metrics ping. + let mut send_in_pings = meta.send_in_pings.clone(); + let ping_name = "metrics".to_string(); + if !send_in_pings.contains(&ping_name) { + send_in_pings.push(ping_name); + } + + CounterMetric::new(CommonMetricData { + name: combine_base_identifier_and_label(error.as_str(), name), + category: "glean.error".into(), + lifetime: Lifetime::Ping, + send_in_pings, + ..Default::default() + }) +} + +/// Records an error into Glean. +/// +/// Errors are recorded as labeled counters in the `glean.error` category. +/// +/// *Note*: We do make assumptions here how labeled metrics are encoded, namely by having the name +/// `<name>/<label>`. +/// Errors do not adhere to the usual "maximum label" restriction. +/// +/// # Arguments +/// +/// * `glean` - The Glean instance containing the database +/// * `meta` - The metric's meta data +/// * `error` - The error type to record +/// * `message` - The message to log. This message is not sent with the ping. +/// It does not need to include the metric id, as that is automatically prepended to the message. +/// * `num_errors` - The number of errors of the same type to report. +pub fn record_error<O: Into<Option<i32>>>( + glean: &Glean, + meta: &CommonMetricData, + error: ErrorType, + message: impl Display, + num_errors: O, +) { + let metric = get_error_metric_for_metric(meta, error); + + log::warn!("{}: {}", meta.base_identifier(), message); + let to_report = num_errors.into().unwrap_or(1); + debug_assert!(to_report > 0); + metric.add(glean, to_report); +} + +/// Gets the number of recorded errors for the given metric and error type. +/// +/// *Notes: This is a **test-only** API, but we need to expose it to be used in integration tests. +/// +/// # Arguments +/// +/// * `glean` - The Glean object holding the database +/// * `meta` - The metadata of the metric instance +/// * `error` - The type of error +/// +/// # Returns +/// +/// The number of errors reported. +pub fn test_get_num_recorded_errors( + glean: &Glean, + meta: &CommonMetricData, + error: ErrorType, + ping_name: Option<&str>, +) -> Result<i32, String> { + let use_ping_name = ping_name.unwrap_or(&meta.send_in_pings[0]); + let metric = get_error_metric_for_metric(meta, error); + + metric.test_get_value(glean, use_ping_name).ok_or_else(|| { + format!( + "No error recorded for {} in '{}' store", + meta.base_identifier(), + use_ping_name + ) + }) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::metrics::*; + use crate::tests::new_glean; + + #[test] + fn error_type_i32_mapping() { + let error: ErrorType = std::convert::TryFrom::try_from(0).unwrap(); + assert_eq!(error, ErrorType::InvalidValue); + let error: ErrorType = std::convert::TryFrom::try_from(1).unwrap(); + assert_eq!(error, ErrorType::InvalidLabel); + let error: ErrorType = std::convert::TryFrom::try_from(2).unwrap(); + assert_eq!(error, ErrorType::InvalidState); + let error: ErrorType = std::convert::TryFrom::try_from(3).unwrap(); + assert_eq!(error, ErrorType::InvalidOverflow); + } + + #[test] + fn recording_of_all_error_types() { + let (glean, _t) = new_glean(None); + + let string_metric = StringMetric::new(CommonMetricData { + name: "string_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into(), "store2".into()], + disabled: false, + lifetime: Lifetime::User, + ..Default::default() + }); + + let expected_invalid_values_errors: i32 = 1; + let expected_invalid_labels_errors: i32 = 2; + + record_error( + &glean, + string_metric.meta(), + ErrorType::InvalidValue, + "Invalid value", + None, + ); + + record_error( + &glean, + string_metric.meta(), + ErrorType::InvalidLabel, + "Invalid label", + expected_invalid_labels_errors, + ); + + for store in &["store1", "store2", "metrics"] { + assert_eq!( + Ok(expected_invalid_values_errors), + test_get_num_recorded_errors( + &glean, + string_metric.meta(), + ErrorType::InvalidValue, + Some(store) + ) + ); + assert_eq!( + Ok(expected_invalid_labels_errors), + test_get_num_recorded_errors( + &glean, + string_metric.meta(), + ErrorType::InvalidLabel, + Some(store) + ) + ); + } + } +} diff --git a/third_party/rust/glean-core/src/event_database/mod.rs b/third_party/rust/glean-core/src/event_database/mod.rs new file mode 100644 index 0000000000..23ff9ca6f1 --- /dev/null +++ b/third_party/rust/glean-core/src/event_database/mod.rs @@ -0,0 +1,502 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::collections::HashMap; +use std::fs; +use std::fs::{create_dir_all, File, OpenOptions}; +use std::io::BufRead; +use std::io::BufReader; +use std::io::Write; +use std::iter::FromIterator; +use std::path::{Path, PathBuf}; +use std::sync::RwLock; + +use serde::{Deserialize, Serialize}; +use serde_json::{json, Value as JsonValue}; + +use crate::CommonMetricData; +use crate::Glean; +use crate::Result; + +/// Represents the recorded data for a single event. +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)] +pub struct RecordedEvent { + /// The timestamp of when the event was recorded. + /// + /// This allows to order events from a single process run. + pub timestamp: u64, + + /// The event's category. + /// + /// This is defined by users in the metrics file. + pub category: String, + + /// The event's name. + /// + /// This is defined by users in the metrics file. + pub name: String, + + /// A map of all extra data values. + /// + /// The set of allowed extra keys is defined by users in the metrics file. + #[serde(skip_serializing_if = "Option::is_none")] + pub extra: Option<HashMap<String, String>>, +} + +impl RecordedEvent { + /// Serialize an event to JSON, adjusting its timestamp relative to a base timestamp + fn serialize_relative(&self, timestamp_offset: u64) -> JsonValue { + json!(&RecordedEvent { + timestamp: self.timestamp - timestamp_offset, + category: self.category.clone(), + name: self.name.clone(), + extra: self.extra.clone(), + }) + } +} + +/// This struct handles the in-memory and on-disk storage logic for events. +/// +/// So that the data survives shutting down of the application, events are stored +/// in an append-only file on disk, in addition to the store in memory. Each line +/// of this file records a single event in JSON, exactly as it will be sent in the +/// ping. There is one file per store. +/// +/// When restarting the application, these on-disk files are checked, and if any are +/// found, they are loaded, queued for sending and flushed immediately before any +/// further events are collected. This is because the timestamps for these events +/// may have come from a previous boot of the device, and therefore will not be +/// compatible with any newly-collected events. +#[derive(Debug)] +pub struct EventDatabase { + /// Path to directory of on-disk event files + pub path: PathBuf, + /// The in-memory list of events + event_stores: RwLock<HashMap<String, Vec<RecordedEvent>>>, + /// A lock to be held when doing operations on the filesystem + file_lock: RwLock<()>, +} + +impl EventDatabase { + /// Creates a new event database. + /// + /// # Arguments + /// + /// * `data_path` - The directory to store events in. A new directory + /// * `events` - will be created inside of this directory. + pub fn new(data_path: &str) -> Result<Self> { + let path = Path::new(data_path).join("events"); + create_dir_all(&path)?; + + Ok(Self { + path, + event_stores: RwLock::new(HashMap::new()), + file_lock: RwLock::new(()), + }) + } + + /// Initializes events storage after Glean is fully initialized and ready to send pings. + /// + /// This must be called once on application startup, e.g. from + /// [Glean.initialize], but after we are ready to send pings, since this + /// could potentially collect and send pings. + /// + /// If there are any events queued on disk, it loads them into memory so + /// that the memory and disk representations are in sync. + /// + /// Secondly, if this is the first time the application has been run since + /// rebooting, any pings containing events are assembled into pings and cleared + /// immediately, since their timestamps won't be compatible with the timestamps + /// we would create during this boot of the device. + /// + /// # Arguments + /// + /// * `glean` - The Glean instance. + /// + /// # Returns + /// + /// Whether at least one ping was generated. + pub fn flush_pending_events_on_startup(&self, glean: &Glean) -> bool { + match self.load_events_from_disk() { + Ok(_) => self.send_all_events(glean), + Err(err) => { + log::warn!("Error loading events from disk: {}", err); + false + } + } + } + + fn load_events_from_disk(&self) -> Result<()> { + // NOTE: The order of locks here is important. + // In other code parts we might acquire the `file_lock` when we already have acquired + // a lock on `event_stores`. + // This is a potential lock-order-inversion. + let mut db = self.event_stores.write().unwrap(); // safe unwrap, only error case is poisoning + let _lock = self.file_lock.read().unwrap(); // safe unwrap, only error case is poisoning + + for entry in fs::read_dir(&self.path)? { + let entry = entry?; + if entry.file_type()?.is_file() { + let store_name = entry.file_name().into_string()?; + let file = BufReader::new(File::open(entry.path())?); + db.insert( + store_name, + file.lines() + .filter_map(|line| line.ok()) + .filter_map(|line| serde_json::from_str::<RecordedEvent>(&line).ok()) + .collect(), + ); + } + } + Ok(()) + } + + fn send_all_events(&self, glean: &Glean) -> bool { + let store_names = { + let db = self.event_stores.read().unwrap(); // safe unwrap, only error case is poisoning + db.keys().cloned().collect::<Vec<String>>() + }; + + let mut ping_sent = false; + for store_name in store_names { + if let Err(err) = glean.submit_ping_by_name(&store_name, Some("startup")) { + log::warn!( + "Error flushing existing events to the '{}' ping: {}", + store_name, + err + ); + } else { + ping_sent = true; + } + } + + ping_sent + } + + /// Records an event in the desired stores. + /// + /// # Arguments + /// + /// * `glean` - The Glean instance. + /// * `meta` - The metadata about the event metric. Used to get the category, + /// name and stores for the metric. + /// * `timestamp` - The timestamp of the event, in milliseconds. Must use a + /// monotonically increasing timer (this value is obtained on the + /// platform-specific side). + /// * `extra` - Extra data values, mapping strings to strings. + pub fn record( + &self, + glean: &Glean, + meta: &CommonMetricData, + timestamp: u64, + extra: Option<HashMap<String, String>>, + ) { + // If upload is disabled we don't want to record. + if !glean.is_upload_enabled() { + return; + } + + // Create RecordedEvent object, and its JSON form for serialization + // on disk. + let event = RecordedEvent { + timestamp, + category: meta.category.to_string(), + name: meta.name.to_string(), + extra, + }; + let event_json = serde_json::to_string(&event).unwrap(); // safe unwrap, event can always be serialized + + // Store the event in memory and on disk to each of the stores. + let mut stores_to_submit: Vec<&str> = Vec::new(); + { + let mut db = self.event_stores.write().unwrap(); // safe unwrap, only error case is poisoning + for store_name in meta.send_in_pings.iter() { + let store = db.entry(store_name.to_string()).or_insert_with(Vec::new); + store.push(event.clone()); + self.write_event_to_disk(store_name, &event_json); + if store.len() == glean.get_max_events() { + stores_to_submit.push(&store_name); + } + } + } + + // If any of the event stores reached maximum size, submit the pings + // containing those events immediately. + for store_name in stores_to_submit { + if let Err(err) = glean.submit_ping_by_name(store_name, Some("max_capacity")) { + log::warn!( + "Got more than {} events, but could not send {} ping: {}", + glean.get_max_events(), + store_name, + err + ); + } + } + } + + /// Writes an event to a single store on disk. + /// + /// # Arguments + /// + /// * `store_name` - The name of the store. + /// * `event_json` - The event content, as a single-line JSON-encoded string. + fn write_event_to_disk(&self, store_name: &str, event_json: &str) { + let _lock = self.file_lock.write().unwrap(); // safe unwrap, only error case is poisoning + if let Err(err) = OpenOptions::new() + .create(true) + .append(true) + .open(self.path.join(store_name)) + .and_then(|mut file| writeln!(file, "{}", event_json)) + { + log::warn!("IO error writing event to store '{}': {}", store_name, err); + } + } + + /// Gets a snapshot of the stored event data as a JsonValue. + /// + /// # Arguments + /// + /// * `store_name` - The name of the desired store. + /// * `clear_store` - Whether to clear the store after snapshotting. + /// + /// # Returns + /// + /// A array of events, JSON encoded, if any. Otherwise `None`. + pub fn snapshot_as_json(&self, store_name: &str, clear_store: bool) -> Option<JsonValue> { + let result = { + let mut db = self.event_stores.write().unwrap(); // safe unwrap, only error case is poisoning + db.get_mut(&store_name.to_string()).and_then(|store| { + if !store.is_empty() { + // Timestamps may have been recorded out-of-order, so sort the events + // by the timestamp. + // We can't insert events in order as-we-go, because we also append + // events to a file on disk, where this would be expensive. Best to + // handle this in every case (whether events came from disk or memory) + // in a single location. + store.sort_by(|a, b| a.timestamp.cmp(&b.timestamp)); + let first_timestamp = store[0].timestamp; + Some(JsonValue::from_iter( + store.iter().map(|e| e.serialize_relative(first_timestamp)), + )) + } else { + log::warn!("Unexpectly got empty event store for '{}'", store_name); + None + } + }) + }; + + if clear_store { + self.event_stores + .write() + .unwrap() // safe unwrap, only error case is poisoning + .remove(&store_name.to_string()); + + let _lock = self.file_lock.write().unwrap(); // safe unwrap, only error case is poisoning + if let Err(err) = fs::remove_file(self.path.join(store_name)) { + match err.kind() { + std::io::ErrorKind::NotFound => { + // silently drop this error, the file was already non-existing + } + _ => log::warn!("Error removing events queue file '{}': {}", store_name, err), + } + } + } + + result + } + + /// Clears all stored events, both in memory and on-disk. + pub fn clear_all(&self) -> Result<()> { + // safe unwrap, only error case is poisoning + self.event_stores.write().unwrap().clear(); + + // safe unwrap, only error case is poisoning + let _lock = self.file_lock.write().unwrap(); + std::fs::remove_dir_all(&self.path)?; + create_dir_all(&self.path)?; + + Ok(()) + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Returns whether there are any events currently stored for the given even + /// metric. + /// + /// This doesn't clear the stored value. + pub fn test_has_value<'a>(&'a self, meta: &'a CommonMetricData, store_name: &str) -> bool { + self.event_stores + .read() + .unwrap() // safe unwrap, only error case is poisoning + .get(&store_name.to_string()) + .into_iter() + .flatten() + .any(|event| event.name == meta.name && event.category == meta.category) + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the vector of currently stored events for the given event metric in + /// the given store. + /// + /// This doesn't clear the stored value. + pub fn test_get_value<'a>( + &'a self, + meta: &'a CommonMetricData, + store_name: &str, + ) -> Option<Vec<RecordedEvent>> { + let value: Vec<RecordedEvent> = self + .event_stores + .read() + .unwrap() // safe unwrap, only error case is poisoning + .get(&store_name.to_string()) + .into_iter() + .flatten() + .filter(|event| event.name == meta.name && event.category == meta.category) + .cloned() + .collect(); + if !value.is_empty() { + Some(value) + } else { + None + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::tests::new_glean; + use crate::CommonMetricData; + + #[test] + fn handle_truncated_events_on_disk() { + let t = tempfile::tempdir().unwrap(); + + { + let db = EventDatabase::new(&t.path().display().to_string()).unwrap(); + db.write_event_to_disk("events", "{\"timestamp\": 500"); + db.write_event_to_disk("events", "{\"timestamp\""); + db.write_event_to_disk( + "events", + "{\"timestamp\": 501, \"category\": \"ui\", \"name\": \"click\"}", + ); + } + + { + let db = EventDatabase::new(&t.path().display().to_string()).unwrap(); + db.load_events_from_disk().unwrap(); + let events = &db.event_stores.read().unwrap()["events"]; + assert_eq!(1, events.len()); + } + } + + #[test] + fn stable_serialization() { + let event_empty = RecordedEvent { + timestamp: 2, + category: "cat".to_string(), + name: "name".to_string(), + extra: None, + }; + + let mut data = HashMap::new(); + data.insert("a key".to_string(), "a value".to_string()); + let event_data = RecordedEvent { + timestamp: 2, + category: "cat".to_string(), + name: "name".to_string(), + extra: Some(data), + }; + + let event_empty_json = ::serde_json::to_string_pretty(&event_empty).unwrap(); + let event_data_json = ::serde_json::to_string_pretty(&event_data).unwrap(); + + assert_eq!( + event_empty, + serde_json::from_str(&event_empty_json).unwrap() + ); + assert_eq!(event_data, serde_json::from_str(&event_data_json).unwrap()); + } + + #[test] + fn deserialize_existing_data() { + let event_empty_json = r#" +{ + "timestamp": 2, + "category": "cat", + "name": "name" +} + "#; + + let event_data_json = r#" +{ + "timestamp": 2, + "category": "cat", + "name": "name", + "extra": { + "a key": "a value" + } +} + "#; + + let event_empty = RecordedEvent { + timestamp: 2, + category: "cat".to_string(), + name: "name".to_string(), + extra: None, + }; + + let mut data = HashMap::new(); + data.insert("a key".to_string(), "a value".to_string()); + let event_data = RecordedEvent { + timestamp: 2, + category: "cat".to_string(), + name: "name".to_string(), + extra: Some(data), + }; + + assert_eq!( + event_empty, + serde_json::from_str(&event_empty_json).unwrap() + ); + assert_eq!(event_data, serde_json::from_str(&event_data_json).unwrap()); + } + + #[test] + fn doesnt_record_when_upload_is_disabled() { + let (mut glean, dir) = new_glean(None); + let db = EventDatabase::new(dir.path().to_str().unwrap()).unwrap(); + + let test_storage = "test-storage"; + let test_category = "category"; + let test_name = "name"; + let test_timestamp = 2; + let test_meta = CommonMetricData::new(test_category, test_name, test_storage); + let event_data = RecordedEvent { + timestamp: test_timestamp, + category: test_category.to_string(), + name: test_name.to_string(), + extra: None, + }; + + // Upload is not yet disabled, + // so let's check that everything is getting recorded as expected. + db.record(&glean, &test_meta, 2, None); + { + let event_stores = db.event_stores.read().unwrap(); + assert_eq!(&event_data, &event_stores.get(test_storage).unwrap()[0]); + assert_eq!(event_stores.get(test_storage).unwrap().len(), 1); + } + + glean.set_upload_enabled(false); + + // Now that upload is disabled, let's check nothing is recorded. + db.record(&glean, &test_meta, 2, None); + { + let event_stores = db.event_stores.read().unwrap(); + assert_eq!(event_stores.get(test_storage).unwrap().len(), 1); + } + } +} diff --git a/third_party/rust/glean-core/src/histogram/exponential.rs b/third_party/rust/glean-core/src/histogram/exponential.rs new file mode 100644 index 0000000000..5ccb441210 --- /dev/null +++ b/third_party/rust/glean-core/src/histogram/exponential.rs @@ -0,0 +1,206 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::collections::HashMap; + +use once_cell::sync::OnceCell; +use serde::{Deserialize, Serialize}; + +use super::{Bucketing, Histogram}; + +use crate::util::floating_point_context::FloatingPointContext; + +/// Create the possible ranges in an exponential distribution from `min` to `max` with +/// `bucket_count` buckets. +/// +/// This algorithm calculates the bucket sizes using a natural log approach to get `bucket_count` number of buckets, +/// exponentially spaced between `min` and `max` +/// +/// Bucket limits are the minimal bucket value. +/// That means values in a bucket `i` are `bucket[i] <= value < bucket[i+1]`. +/// It will always contain an underflow bucket (`< 1`). +fn exponential_range(min: u64, max: u64, bucket_count: usize) -> Vec<u64> { + // Set the FPU control flag to the required state within this function + let _fpc = FloatingPointContext::new(); + + let log_max = (max as f64).ln(); + + let mut ranges = Vec::with_capacity(bucket_count); + let mut current = min; + if current == 0 { + current = 1; + } + + // undeflow bucket + ranges.push(0); + ranges.push(current); + + for i in 2..bucket_count { + let log_current = (current as f64).ln(); + let log_ratio = (log_max - log_current) / (bucket_count - i) as f64; + let log_next = log_current + log_ratio; + let next_value = log_next.exp().round() as u64; + current = if next_value > current { + next_value + } else { + current + 1 + }; + ranges.push(current); + } + + ranges +} + +/// An exponential bucketing algorithm. +/// +/// Buckets are pre-computed at instantiation with an exponential distribution from `min` to `max` +/// and `bucket_count` buckets. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct PrecomputedExponential { + // Don't serialize the (potentially large) array of ranges, instead compute them on first + // access. + #[serde(skip)] + bucket_ranges: OnceCell<Vec<u64>>, + min: u64, + max: u64, + bucket_count: usize, +} + +impl Bucketing for PrecomputedExponential { + /// Get the bucket for the sample. + /// + /// This uses a binary search to locate the index `i` of the bucket such that: + /// bucket[i] <= sample < bucket[i+1] + fn sample_to_bucket_minimum(&self, sample: u64) -> u64 { + let limit = match self.ranges().binary_search(&sample) { + // Found an exact match to fit it in + Ok(i) => i, + // Sorted it fits after the bucket's limit, therefore it fits into the previous bucket + Err(i) => i - 1, + }; + + self.ranges()[limit] + } + + fn ranges(&self) -> &[u64] { + // Create the exponential range on first access. + self.bucket_ranges + .get_or_init(|| exponential_range(self.min, self.max, self.bucket_count)) + } +} + +impl Histogram<PrecomputedExponential> { + /// Creates a histogram with `count` exponential buckets in the range `min` to `max`. + pub fn exponential( + min: u64, + max: u64, + bucket_count: usize, + ) -> Histogram<PrecomputedExponential> { + Histogram { + values: HashMap::new(), + count: 0, + sum: 0, + bucketing: PrecomputedExponential { + bucket_ranges: OnceCell::new(), + min, + max, + bucket_count, + }, + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + const DEFAULT_BUCKET_COUNT: usize = 100; + const DEFAULT_RANGE_MIN: u64 = 0; + const DEFAULT_RANGE_MAX: u64 = 60_000; + + #[test] + fn can_count() { + let mut hist = Histogram::exponential(1, 500, 10); + assert!(hist.is_empty()); + + for i in 1..=10 { + hist.accumulate(i); + } + + assert_eq!(10, hist.count()); + assert_eq!(55, hist.sum()); + } + + #[test] + fn overflow_values_accumulate_in_the_last_bucket() { + let mut hist = + Histogram::exponential(DEFAULT_RANGE_MIN, DEFAULT_RANGE_MAX, DEFAULT_BUCKET_COUNT); + + hist.accumulate(DEFAULT_RANGE_MAX + 100); + assert_eq!(1, hist.values[&DEFAULT_RANGE_MAX]); + } + + #[test] + fn short_exponential_buckets_are_correct() { + let test_buckets = vec![0, 1, 2, 3, 5, 9, 16, 29, 54, 100]; + + assert_eq!(test_buckets, exponential_range(1, 100, 10)); + // There's always a zero bucket, so we increase the lower limit. + assert_eq!(test_buckets, exponential_range(0, 100, 10)); + } + + #[test] + fn default_exponential_buckets_are_correct() { + // Hand calculated values using current default range 0 - 60000 and bucket count of 100. + // NOTE: The final bucket, regardless of width, represents the overflow bucket to hold any + // values beyond the maximum (in this case the maximum is 60000) + let test_buckets = vec![ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 19, 21, 23, 25, 28, 31, 34, + 38, 42, 46, 51, 56, 62, 68, 75, 83, 92, 101, 111, 122, 135, 149, 164, 181, 200, 221, + 244, 269, 297, 328, 362, 399, 440, 485, 535, 590, 651, 718, 792, 874, 964, 1064, 1174, + 1295, 1429, 1577, 1740, 1920, 2118, 2337, 2579, 2846, 3140, 3464, 3822, 4217, 4653, + 5134, 5665, 6250, 6896, 7609, 8395, 9262, 10219, 11275, 12440, 13726, 15144, 16709, + 18436, 20341, 22443, 24762, 27321, 30144, 33259, 36696, 40488, 44672, 49288, 54381, + 60000, + ]; + + assert_eq!( + test_buckets, + exponential_range(DEFAULT_RANGE_MIN, DEFAULT_RANGE_MAX, DEFAULT_BUCKET_COUNT) + ); + } + + #[test] + fn default_buckets_correctly_accumulate() { + let mut hist = + Histogram::exponential(DEFAULT_RANGE_MIN, DEFAULT_RANGE_MAX, DEFAULT_BUCKET_COUNT); + + for i in &[1, 10, 100, 1000, 10000] { + hist.accumulate(*i); + } + + assert_eq!(11111, hist.sum()); + assert_eq!(5, hist.count()); + + assert_eq!(None, hist.values.get(&0)); // underflow is empty + assert_eq!(1, hist.values[&1]); // bucket_ranges[1] = 1 + assert_eq!(1, hist.values[&10]); // bucket_ranges[10] = 10 + assert_eq!(1, hist.values[&92]); // bucket_ranges[33] = 92 + assert_eq!(1, hist.values[&964]); // bucket_ranges[57] = 964 + assert_eq!(1, hist.values[&9262]); // bucket_ranges[80] = 9262 + } + + #[test] + fn accumulate_large_numbers() { + let mut hist = Histogram::exponential(1, 500, 10); + + hist.accumulate(u64::max_value()); + hist.accumulate(u64::max_value()); + + assert_eq!(2, hist.count()); + // Saturate before overflowing + assert_eq!(u64::max_value(), hist.sum()); + assert_eq!(2, hist.values[&500]); + } +} diff --git a/third_party/rust/glean-core/src/histogram/functional.rs b/third_party/rust/glean-core/src/histogram/functional.rs new file mode 100644 index 0000000000..64df9a1a4d --- /dev/null +++ b/third_party/rust/glean-core/src/histogram/functional.rs @@ -0,0 +1,174 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::collections::HashMap; + +use serde::{Deserialize, Serialize}; + +use super::{Bucketing, Histogram}; + +use crate::util::floating_point_context::FloatingPointContext; + +/// A functional bucketing algorithm. +/// +/// Bucketing is performed by a function, rather than pre-computed buckets. +/// The bucket index of a given sample is determined with the following function: +/// +/// i = ⌊n log<sub>base</sub>(𝑥)⌋ +/// +/// In other words, there are n buckets for each power of `base` magnitude. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct Functional { + exponent: f64, +} + +impl Functional { + /// Instantiate a new functional bucketing. + fn new(log_base: f64, buckets_per_magnitude: f64) -> Functional { + // Set the FPU control flag to the required state within this function + let _fpc = FloatingPointContext::new(); + + let exponent = log_base.powf(1.0 / buckets_per_magnitude); + + Functional { exponent } + } + + /// Maps a sample to a "bucket index" that it belongs in. + /// A "bucket index" is the consecutive integer index of each bucket, useful as a + /// mathematical concept, even though the internal representation is stored and + /// sent using the minimum value in each bucket. + fn sample_to_bucket_index(&self, sample: u64) -> u64 { + // Set the FPU control flag to the required state within this function + let _fpc = FloatingPointContext::new(); + + ((sample.saturating_add(1)) as f64).log(self.exponent) as u64 + } + + /// Determines the minimum value of a bucket, given a bucket index. + fn bucket_index_to_bucket_minimum(&self, index: u64) -> u64 { + // Set the FPU control flag to the required state within this function + let _fpc = FloatingPointContext::new(); + + self.exponent.powf(index as f64) as u64 + } +} + +impl Bucketing for Functional { + fn sample_to_bucket_minimum(&self, sample: u64) -> u64 { + if sample == 0 { + return 0; + } + + let index = self.sample_to_bucket_index(sample); + self.bucket_index_to_bucket_minimum(index) + } + + fn ranges(&self) -> &[u64] { + unimplemented!("Bucket ranges for functional bucketing are not precomputed") + } +} + +impl Histogram<Functional> { + /// Creates a histogram with functional buckets. + pub fn functional(log_base: f64, buckets_per_magnitude: f64) -> Histogram<Functional> { + Histogram { + values: HashMap::new(), + count: 0, + sum: 0, + bucketing: Functional::new(log_base, buckets_per_magnitude), + } + } + + /// Gets a snapshot of all contiguous values. + /// + /// **Caution** This is a more specific implementation of `snapshot_values` on functional + /// histograms. `snapshot_values` cannot be used with those, due to buckets not being + /// precomputed. + pub fn snapshot(&self) -> HashMap<u64, u64> { + if self.values.is_empty() { + return HashMap::new(); + } + + let mut min_key = None; + let mut max_key = None; + + // `Iterator#min` and `Iterator#max` would do the same job independently, + // but we want to avoid iterating the keys twice, so we loop ourselves. + for key in self.values.keys() { + let key = *key; + + // safe unwrap, we checked it's not none + if min_key.is_none() || key < min_key.unwrap() { + min_key = Some(key); + } + + // safe unwrap, we checked it's not none + if max_key.is_none() || key > max_key.unwrap() { + max_key = Some(key); + } + } + + // Non-empty values, therefore minimum/maximum exists. + // safe unwraps, we set it at least once. + let min_bucket = self.bucketing.sample_to_bucket_index(min_key.unwrap()); + let max_bucket = self.bucketing.sample_to_bucket_index(max_key.unwrap()) + 1; + + let mut values = self.values.clone(); + + for idx in min_bucket..=max_bucket { + // Fill in missing entries. + let min_bucket = self.bucketing.bucket_index_to_bucket_minimum(idx); + let _ = values.entry(min_bucket).or_insert(0); + } + + values + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn can_count() { + let mut hist = Histogram::functional(2.0, 8.0); + assert!(hist.is_empty()); + + for i in 1..=10 { + hist.accumulate(i); + } + + assert_eq!(10, hist.count()); + assert_eq!(55, hist.sum()); + } + + #[test] + fn sample_to_bucket_minimum_correctly_rounds_down() { + let hist = Histogram::functional(2.0, 8.0); + + // Check each of the first 100 integers, where numerical accuracy of the round-tripping + // is most potentially problematic + for value in 0..100 { + let bucket_minimum = hist.bucketing.sample_to_bucket_minimum(value); + assert!(bucket_minimum <= value); + + assert_eq!( + bucket_minimum, + hist.bucketing.sample_to_bucket_minimum(bucket_minimum) + ); + } + + // Do an exponential sampling of higher numbers + for i in 11..500 { + let value = 1.5f64.powi(i); + let value = value as u64; + let bucket_minimum = hist.bucketing.sample_to_bucket_minimum(value); + assert!(bucket_minimum <= value); + assert_eq!( + bucket_minimum, + hist.bucketing.sample_to_bucket_minimum(bucket_minimum) + ); + } + } +} diff --git a/third_party/rust/glean-core/src/histogram/linear.rs b/third_party/rust/glean-core/src/histogram/linear.rs new file mode 100644 index 0000000000..18a5761099 --- /dev/null +++ b/third_party/rust/glean-core/src/histogram/linear.rs @@ -0,0 +1,178 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::cmp; +use std::collections::HashMap; + +use once_cell::sync::OnceCell; +use serde::{Deserialize, Serialize}; + +use super::{Bucketing, Histogram}; + +/// Create the possible ranges in a linear distribution from `min` to `max` with +/// `bucket_count` buckets. +/// +/// This algorithm calculates `bucket_count` number of buckets of equal sizes between `min` and `max`. +/// +/// Bucket limits are the minimal bucket value. +/// That means values in a bucket `i` are `bucket[i] <= value < bucket[i+1]`. +/// It will always contain an underflow bucket (`< 1`). +fn linear_range(min: u64, max: u64, count: usize) -> Vec<u64> { + let mut ranges = Vec::with_capacity(count); + ranges.push(0); + + let min = cmp::max(1, min); + let count = count as u64; + for i in 1..count { + let range = (min * (count - 1 - i) + max * (i - 1)) / (count - 2); + ranges.push(range); + } + + ranges +} + +/// A linear bucketing algorithm. +/// +/// Buckets are pre-computed at instantiation with a linear distribution from `min` to `max` +/// and `bucket_count` buckets. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct PrecomputedLinear { + // Don't serialize the (potentially large) array of ranges, instead compute them on first + // access. + #[serde(skip)] + bucket_ranges: OnceCell<Vec<u64>>, + min: u64, + max: u64, + bucket_count: usize, +} + +impl Bucketing for PrecomputedLinear { + /// Get the bucket for the sample. + /// + /// This uses a binary search to locate the index `i` of the bucket such that: + /// bucket[i] <= sample < bucket[i+1] + fn sample_to_bucket_minimum(&self, sample: u64) -> u64 { + let limit = match self.ranges().binary_search(&sample) { + // Found an exact match to fit it in + Ok(i) => i, + // Sorted it fits after the bucket's limit, therefore it fits into the previous bucket + Err(i) => i - 1, + }; + + self.ranges()[limit] + } + + fn ranges(&self) -> &[u64] { + // Create the linear range on first access. + self.bucket_ranges + .get_or_init(|| linear_range(self.min, self.max, self.bucket_count)) + } +} + +impl Histogram<PrecomputedLinear> { + /// Creates a histogram with `bucket_count` linear buckets in the range `min` to `max`. + pub fn linear(min: u64, max: u64, bucket_count: usize) -> Histogram<PrecomputedLinear> { + Histogram { + values: HashMap::new(), + count: 0, + sum: 0, + bucketing: PrecomputedLinear { + bucket_ranges: OnceCell::new(), + min, + max, + bucket_count, + }, + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + const DEFAULT_BUCKET_COUNT: usize = 100; + const DEFAULT_RANGE_MIN: u64 = 0; + const DEFAULT_RANGE_MAX: u64 = 100; + + #[test] + fn can_count() { + let mut hist = Histogram::linear(1, 500, 10); + assert!(hist.is_empty()); + + for i in 1..=10 { + hist.accumulate(i); + } + + assert_eq!(10, hist.count()); + assert_eq!(55, hist.sum()); + } + + #[test] + fn overflow_values_accumulate_in_the_last_bucket() { + let mut hist = + Histogram::linear(DEFAULT_RANGE_MIN, DEFAULT_RANGE_MAX, DEFAULT_BUCKET_COUNT); + + hist.accumulate(DEFAULT_RANGE_MAX + 100); + assert_eq!(1, hist.values[&DEFAULT_RANGE_MAX]); + } + + #[test] + fn short_linear_buckets_are_correct() { + let test_buckets = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 10]; + + assert_eq!(test_buckets, linear_range(1, 10, 10)); + // There's always a zero bucket, so we increase the lower limit. + assert_eq!(test_buckets, linear_range(0, 10, 10)); + } + + #[test] + fn long_linear_buckets_are_correct() { + // Hand calculated values using current default range 0 - 60000 and bucket count of 100. + // NOTE: The final bucket, regardless of width, represents the overflow bucket to hold any + // values beyond the maximum (in this case the maximum is 60000) + let test_buckets = vec![ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, + 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, + 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 100, + ]; + + assert_eq!( + test_buckets, + linear_range(DEFAULT_RANGE_MIN, DEFAULT_RANGE_MAX, DEFAULT_BUCKET_COUNT) + ); + } + + #[test] + fn default_buckets_correctly_accumulate() { + let mut hist = + Histogram::linear(DEFAULT_RANGE_MIN, DEFAULT_RANGE_MAX, DEFAULT_BUCKET_COUNT); + + for i in &[1, 10, 100, 1000, 10000] { + hist.accumulate(*i); + } + + assert_eq!(11111, hist.sum()); + assert_eq!(5, hist.count()); + + assert_eq!(None, hist.values.get(&0)); + assert_eq!(1, hist.values[&1]); + assert_eq!(1, hist.values[&10]); + assert_eq!(3, hist.values[&100]); + } + + #[test] + fn accumulate_large_numbers() { + let mut hist = Histogram::linear(1, 500, 10); + + hist.accumulate(u64::max_value()); + hist.accumulate(u64::max_value()); + + assert_eq!(2, hist.count()); + // Saturate before overflowing + assert_eq!(u64::max_value(), hist.sum()); + assert_eq!(2, hist.values[&500]); + } +} diff --git a/third_party/rust/glean-core/src/histogram/mod.rs b/third_party/rust/glean-core/src/histogram/mod.rs new file mode 100644 index 0000000000..be783fb321 --- /dev/null +++ b/third_party/rust/glean-core/src/histogram/mod.rs @@ -0,0 +1,139 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! A simple histogram implementation for exponential histograms. + +use std::collections::HashMap; +use std::convert::TryFrom; + +use serde::{Deserialize, Serialize}; + +use crate::error::{Error, ErrorKind}; + +pub use exponential::PrecomputedExponential; +pub use functional::Functional; +pub use linear::PrecomputedLinear; + +mod exponential; +mod functional; +mod linear; + +/// Different kinds of histograms. +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum HistogramType { + /// A histogram with linear distributed buckets. + Linear, + /// A histogram with exponential distributed buckets. + Exponential, +} + +impl TryFrom<i32> for HistogramType { + type Error = Error; + + fn try_from(value: i32) -> Result<HistogramType, Self::Error> { + match value { + 0 => Ok(HistogramType::Linear), + 1 => Ok(HistogramType::Exponential), + e => Err(ErrorKind::HistogramType(e).into()), + } + } +} + +/// A histogram. +/// +/// Stores the counts per bucket and tracks the count of added samples and the total sum. +/// The bucketing algorithm can be changed. +/// +/// ## Example +/// +/// ```rust,ignore +/// let mut hist = Histogram::exponential(1, 500, 10); +/// +/// for i in 1..=10 { +/// hist.accumulate(i); +/// } +/// +/// assert_eq!(10, hist.count()); +/// assert_eq!(55, hist.sum()); +/// ``` +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct Histogram<B> { + /// Mapping bucket's minimum to sample count. + values: HashMap<u64, u64>, + + /// The count of samples added. + count: u64, + /// The total sum of samples. + sum: u64, + + /// The bucketing algorithm used. + bucketing: B, +} + +/// A bucketing algorithm for histograms. +/// +/// It's responsible to calculate the bucket a sample goes into. +/// It can calculate buckets on-the-fly or pre-calculate buckets and re-use that when needed. +pub trait Bucketing { + /// Get the bucket's minimum value the sample falls into. + fn sample_to_bucket_minimum(&self, sample: u64) -> u64; + + /// The computed bucket ranges for this bucketing algorithm. + fn ranges(&self) -> &[u64]; +} + +impl<B: Bucketing> Histogram<B> { + /// Gets the number of buckets in this histogram. + pub fn bucket_count(&self) -> usize { + self.values.len() + } + + /// Adds a single value to this histogram. + pub fn accumulate(&mut self, sample: u64) { + let bucket_min = self.bucketing.sample_to_bucket_minimum(sample); + let entry = self.values.entry(bucket_min).or_insert(0); + *entry += 1; + self.sum = self.sum.saturating_add(sample); + self.count += 1; + } + + /// Gets the total sum of values recorded in this histogram. + pub fn sum(&self) -> u64 { + self.sum + } + + /// Gets the total count of values recorded in this histogram. + pub fn count(&self) -> u64 { + self.count + } + + /// Gets the filled values. + pub fn values(&self) -> &HashMap<u64, u64> { + &self.values + } + + /// Checks if this histogram recorded any values. + pub fn is_empty(&self) -> bool { + self.count() == 0 + } + + /// Gets a snapshot of all values from the first bucket until one past the last filled bucket, + /// filling in empty buckets with 0. + pub fn snapshot_values(&self) -> HashMap<u64, u64> { + let mut res = self.values.clone(); + + let max_bucket = self.values.keys().max().cloned().unwrap_or(0); + + for &min_bucket in self.bucketing.ranges() { + // Fill in missing entries. + let _ = res.entry(min_bucket).or_insert(0); + // stop one after the last filled bucket + if min_bucket > max_bucket { + break; + } + } + res + } +} diff --git a/third_party/rust/glean-core/src/internal_metrics.rs b/third_party/rust/glean-core/src/internal_metrics.rs new file mode 100644 index 0000000000..feca2a3bd6 --- /dev/null +++ b/third_party/rust/glean-core/src/internal_metrics.rs @@ -0,0 +1,175 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::{metrics::*, CommonMetricData, Lifetime}; + +#[derive(Debug)] +pub struct CoreMetrics { + pub client_id: UuidMetric, + pub first_run_date: DatetimeMetric, + pub first_run_hour: DatetimeMetric, + pub os: StringMetric, + + /// The number of times we encountered an IO error + /// when writing a pending ping to disk. + /// + /// **Note**: Not a _core_ metric, but an error metric, + /// placed here for the lack of a more suitable part in the Glean struct. + pub io_errors: CounterMetric, +} + +impl CoreMetrics { + pub fn new() -> CoreMetrics { + CoreMetrics { + client_id: UuidMetric::new(CommonMetricData { + name: "client_id".into(), + category: "".into(), + send_in_pings: vec!["glean_client_info".into()], + lifetime: Lifetime::User, + disabled: false, + dynamic_label: None, + }), + + first_run_date: DatetimeMetric::new( + CommonMetricData { + name: "first_run_date".into(), + category: "".into(), + send_in_pings: vec!["glean_client_info".into()], + lifetime: Lifetime::User, + disabled: false, + dynamic_label: None, + }, + TimeUnit::Day, + ), + + first_run_hour: DatetimeMetric::new( + CommonMetricData { + name: "first_run_hour".into(), + category: "glean.validation".into(), + send_in_pings: vec!["metrics".into(), "baseline".into()], + lifetime: Lifetime::User, + disabled: false, + dynamic_label: None, + }, + TimeUnit::Hour, + ), + + os: StringMetric::new(CommonMetricData { + name: "os".into(), + category: "".into(), + send_in_pings: vec!["glean_client_info".into()], + lifetime: Lifetime::Application, + disabled: false, + dynamic_label: None, + }), + + io_errors: CounterMetric::new(CommonMetricData { + name: "io".into(), + category: "glean.error".into(), + send_in_pings: vec!["metrics".into()], + lifetime: Lifetime::Ping, + disabled: false, + dynamic_label: None, + }), + } + } +} + +#[derive(Debug)] +pub struct UploadMetrics { + pub ping_upload_failure: LabeledMetric<CounterMetric>, + pub discarded_exceeding_pings_size: MemoryDistributionMetric, + pub pending_pings_directory_size: MemoryDistributionMetric, + pub deleted_pings_after_quota_hit: CounterMetric, + pub pending_pings: CounterMetric, +} + +impl UploadMetrics { + pub fn new() -> UploadMetrics { + UploadMetrics { + ping_upload_failure: LabeledMetric::new( + CounterMetric::new(CommonMetricData { + name: "ping_upload_failure".into(), + category: "glean.upload".into(), + send_in_pings: vec!["metrics".into()], + lifetime: Lifetime::Ping, + disabled: false, + dynamic_label: None, + }), + Some(vec![ + "status_code_4xx".into(), + "status_code_5xx".into(), + "status_code_unknown".into(), + "unrecoverable".into(), + "recoverable".into(), + ]), + ), + + discarded_exceeding_pings_size: MemoryDistributionMetric::new( + CommonMetricData { + name: "discarded_exceeding_ping_size".into(), + category: "glean.upload".into(), + send_in_pings: vec!["metrics".into()], + lifetime: Lifetime::Ping, + disabled: false, + dynamic_label: None, + }, + MemoryUnit::Kilobyte, + ), + + pending_pings_directory_size: MemoryDistributionMetric::new( + CommonMetricData { + name: "pending_pings_directory_size".into(), + category: "glean.upload".into(), + send_in_pings: vec!["metrics".into()], + lifetime: Lifetime::Ping, + disabled: false, + dynamic_label: None, + }, + MemoryUnit::Kilobyte, + ), + + deleted_pings_after_quota_hit: CounterMetric::new(CommonMetricData { + name: "deleted_pings_after_quota_hit".into(), + category: "glean.upload".into(), + send_in_pings: vec!["metrics".into()], + lifetime: Lifetime::Ping, + disabled: false, + dynamic_label: None, + }), + + pending_pings: CounterMetric::new(CommonMetricData { + name: "pending_pings".into(), + category: "glean.upload".into(), + send_in_pings: vec!["metrics".into()], + lifetime: Lifetime::Ping, + disabled: false, + dynamic_label: None, + }), + } + } +} + +#[derive(Debug)] +pub struct DatabaseMetrics { + pub size: MemoryDistributionMetric, +} + +impl DatabaseMetrics { + pub fn new() -> DatabaseMetrics { + DatabaseMetrics { + size: MemoryDistributionMetric::new( + CommonMetricData { + name: "size".into(), + category: "glean.database".into(), + send_in_pings: vec!["metrics".into()], + lifetime: Lifetime::Ping, + disabled: false, + dynamic_label: None, + }, + MemoryUnit::Byte, + ), + } + } +} diff --git a/third_party/rust/glean-core/src/internal_pings.rs b/third_party/rust/glean-core/src/internal_pings.rs new file mode 100644 index 0000000000..159f20e4bc --- /dev/null +++ b/third_party/rust/glean-core/src/internal_pings.rs @@ -0,0 +1,41 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::metrics::PingType; + +/// Glean-provided pings, all enabled by default. +/// +/// These pings are defined in `glean-core/pings.yaml` and for now manually translated into Rust code. +/// This might get auto-generated when the Rust API lands ([Bug 1579146](https://bugzilla.mozilla.org/show_bug.cgi?id=1579146)). +/// +/// They are parsed and registered by the platform-specific wrappers, but might be used Glean-internal directly. +#[derive(Debug)] +pub struct InternalPings { + pub baseline: PingType, + pub metrics: PingType, + pub events: PingType, + pub deletion_request: PingType, +} + +impl InternalPings { + pub fn new() -> InternalPings { + InternalPings { + baseline: PingType::new("baseline", true, false, vec![]), + metrics: PingType::new( + "metrics", + true, + false, + vec![ + "overdue".to_string(), + "reschedule".to_string(), + "today".to_string(), + "tomorrow".to_string(), + "upgrade".to_string(), + ], + ), + events: PingType::new("events", true, false, vec![]), + deletion_request: PingType::new("deletion-request", true, true, vec![]), + } + } +} diff --git a/third_party/rust/glean-core/src/lib.rs b/third_party/rust/glean-core/src/lib.rs new file mode 100644 index 0000000000..a2619dceb4 --- /dev/null +++ b/third_party/rust/glean-core/src/lib.rs @@ -0,0 +1,940 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +#![deny(broken_intra_doc_links)] +#![deny(missing_docs)] + +//! Glean is a modern approach for recording and sending Telemetry data. +//! +//! It's in use at Mozilla. +//! +//! All documentation can be found online: +//! +//! ## [The Glean SDK Book](https://mozilla.github.io/glean) + +use std::collections::HashMap; +use std::path::{Path, PathBuf}; + +use chrono::{DateTime, FixedOffset}; +use once_cell::sync::Lazy; +use once_cell::sync::OnceCell; +use std::sync::Mutex; +use uuid::Uuid; + +// This needs to be included first, and the space below prevents rustfmt from +// alphabetizing it. +mod macros; + +mod common_metric_data; +mod database; +mod debug; +mod error; +mod error_recording; +mod event_database; +mod histogram; +mod internal_metrics; +mod internal_pings; +pub mod metrics; +pub mod ping; +pub mod storage; +mod system; +pub mod traits; +pub mod upload; +mod util; + +pub use crate::common_metric_data::{CommonMetricData, Lifetime}; +use crate::database::Database; +use crate::debug::DebugOptions; +pub use crate::error::{Error, ErrorKind, Result}; +pub use crate::error_recording::{test_get_num_recorded_errors, ErrorType}; +use crate::event_database::EventDatabase; +pub use crate::histogram::HistogramType; +use crate::internal_metrics::{CoreMetrics, DatabaseMetrics}; +use crate::internal_pings::InternalPings; +use crate::metrics::{Metric, MetricType, PingType}; +use crate::ping::PingMaker; +use crate::storage::StorageManager; +use crate::upload::{PingUploadManager, PingUploadTask, UploadResult}; +use crate::util::{local_now_with_offset, sanitize_application_id}; + +const GLEAN_VERSION: &str = env!("CARGO_PKG_VERSION"); +const GLEAN_SCHEMA_VERSION: u32 = 1; +const DEFAULT_MAX_EVENTS: usize = 500; +static KNOWN_CLIENT_ID: Lazy<Uuid> = + Lazy::new(|| Uuid::parse_str("c0ffeec0-ffee-c0ff-eec0-ffeec0ffeec0").unwrap()); +// An internal ping name, not to be touched by anything else +pub(crate) const INTERNAL_STORAGE: &str = "glean_internal_info"; + +// The names of the pings directories. +pub(crate) const PENDING_PINGS_DIRECTORY: &str = "pending_pings"; +pub(crate) const DELETION_REQUEST_PINGS_DIRECTORY: &str = "deletion_request"; + +/// The global Glean instance. +/// +/// This is the singleton used by all wrappers to allow for a nice API. +/// All state for Glean is kept inside this object (such as the database handle and `upload_enabled` flag). +/// +/// It should be initialized with `glean_core::initialize` at the start of the application using +/// Glean. +static GLEAN: OnceCell<Mutex<Glean>> = OnceCell::new(); + +/// Gets a reference to the global Glean object. +pub fn global_glean() -> Option<&'static Mutex<Glean>> { + GLEAN.get() +} + +/// Sets or replaces the global Glean object. +pub fn setup_glean(glean: Glean) -> Result<()> { + // The `OnceCell` type wrapping our Glean is thread-safe and can only be set once. + // Therefore even if our check for it being empty succeeds, setting it could fail if a + // concurrent thread is quicker in setting it. + // However this will not cause a bigger problem, as the second `set` operation will just fail. + // We can log it and move on. + // + // For all wrappers this is not a problem, as the Glean object is intialized exactly once on + // calling `initialize` on the global singleton and further operations check that it has been + // initialized. + if GLEAN.get().is_none() { + if GLEAN.set(Mutex::new(glean)).is_err() { + log::warn!( + "Global Glean object is initialized already. This probably happened concurrently." + ) + } + } else { + // We allow overriding the global Glean object to support test mode. + // In test mode the Glean object is fully destroyed and recreated. + // This all happens behind a mutex and is therefore also thread-safe.. + let mut lock = GLEAN.get().unwrap().lock().unwrap(); + *lock = glean; + } + Ok(()) +} + +/// The Glean configuration. +/// +/// Optional values will be filled in with default values. +#[derive(Debug, Clone)] +pub struct Configuration { + /// Whether upload should be enabled. + pub upload_enabled: bool, + /// Path to a directory to store all data in. + pub data_path: String, + /// The application ID (will be sanitized during initialization). + pub application_id: String, + /// The name of the programming language used by the binding creating this instance of Glean. + pub language_binding_name: String, + /// The maximum number of events to store before sending a ping containing events. + pub max_events: Option<usize>, + /// Whether Glean should delay persistence of data from metrics with ping lifetime. + pub delay_ping_lifetime_io: bool, +} + +/// The object holding meta information about a Glean instance. +/// +/// ## Example +/// +/// Create a new Glean instance, register a ping, record a simple counter and then send the final +/// ping. +/// +/// ```rust,no_run +/// # use glean_core::{Glean, Configuration, CommonMetricData, metrics::*}; +/// let cfg = Configuration { +/// data_path: "/tmp/glean".into(), +/// application_id: "glean.sample.app".into(), +/// language_binding_name: "Rust".into(), +/// upload_enabled: true, +/// max_events: None, +/// delay_ping_lifetime_io: false, +/// }; +/// let mut glean = Glean::new(cfg).unwrap(); +/// let ping = PingType::new("sample", true, false, vec![]); +/// glean.register_ping_type(&ping); +/// +/// let call_counter: CounterMetric = CounterMetric::new(CommonMetricData { +/// name: "calls".into(), +/// category: "local".into(), +/// send_in_pings: vec!["sample".into()], +/// ..Default::default() +/// }); +/// +/// call_counter.add(&glean, 1); +/// +/// glean.submit_ping(&ping, None).unwrap(); +/// ``` +/// +/// ## Note +/// +/// In specific language bindings, this is usually wrapped in a singleton and all metric recording goes to a single instance of this object. +/// In the Rust core, it is possible to create multiple instances, which is used in testing. +#[derive(Debug)] +pub struct Glean { + upload_enabled: bool, + data_store: Option<Database>, + event_data_store: EventDatabase, + core_metrics: CoreMetrics, + database_metrics: DatabaseMetrics, + internal_pings: InternalPings, + data_path: PathBuf, + application_id: String, + ping_registry: HashMap<String, PingType>, + start_time: DateTime<FixedOffset>, + max_events: usize, + is_first_run: bool, + upload_manager: PingUploadManager, + debug: DebugOptions, +} + +impl Glean { + /// Creates and initializes a new Glean object for use in a subprocess. + /// + /// Importantly, this will not send any pings at startup, since that + /// sort of management should only happen in the main process. + pub fn new_for_subprocess(cfg: &Configuration, scan_directories: bool) -> Result<Self> { + log::info!("Creating new Glean v{}", GLEAN_VERSION); + + let application_id = sanitize_application_id(&cfg.application_id); + if application_id.is_empty() { + return Err(ErrorKind::InvalidConfig.into()); + } + + // Creating the data store creates the necessary path as well. + // If that fails we bail out and don't initialize further. + let data_store = Some(Database::new(&cfg.data_path, cfg.delay_ping_lifetime_io)?); + let event_data_store = EventDatabase::new(&cfg.data_path)?; + + // Create an upload manager with rate limiting of 15 pings every 60 seconds. + let mut upload_manager = PingUploadManager::new(&cfg.data_path, &cfg.language_binding_name); + upload_manager.set_rate_limiter( + /* seconds per interval */ 60, /* max pings per interval */ 15, + ); + + // We only scan the pending ping sdirectories when calling this from a subprocess, + // when calling this from ::new we need to scan the directories after dealing with the upload state. + if scan_directories { + let _scanning_thread = upload_manager.scan_pending_pings_directories(); + } + + Ok(Self { + upload_enabled: cfg.upload_enabled, + data_store, + event_data_store, + core_metrics: CoreMetrics::new(), + database_metrics: DatabaseMetrics::new(), + internal_pings: InternalPings::new(), + upload_manager, + data_path: PathBuf::from(&cfg.data_path), + application_id, + ping_registry: HashMap::new(), + start_time: local_now_with_offset(), + max_events: cfg.max_events.unwrap_or(DEFAULT_MAX_EVENTS), + is_first_run: false, + debug: DebugOptions::new(), + }) + } + + /// Creates and initializes a new Glean object. + /// + /// This will create the necessary directories and files in + /// [`cfg.data_path`](Configuration::data_path). This will also initialize + /// the core metrics. + pub fn new(cfg: Configuration) -> Result<Self> { + let mut glean = Self::new_for_subprocess(&cfg, false)?; + + // The upload enabled flag may have changed since the last run, for + // example by the changing of a config file. + if cfg.upload_enabled { + // If upload is enabled, just follow the normal code path to + // instantiate the core metrics. + glean.on_upload_enabled(); + } else { + // If upload is disabled, and we've never run before, only set the + // client_id to KNOWN_CLIENT_ID, but do not send a deletion request + // ping. + // If we have run before, and if the client_id is not equal to + // the KNOWN_CLIENT_ID, do the full upload disabled operations to + // clear metrics, set the client_id to KNOWN_CLIENT_ID, and send a + // deletion request ping. + match glean + .core_metrics + .client_id + .get_value(&glean, "glean_client_info") + { + None => glean.clear_metrics(), + Some(uuid) => { + if uuid != *KNOWN_CLIENT_ID { + // Temporarily enable uploading so we can submit a + // deletion request ping. + glean.upload_enabled = true; + glean.on_upload_disabled(); + } + } + } + } + + // We only scan the pendings pings directories **after** dealing with the upload state. + // If upload is disabled, we delete all pending pings files + // and we need to do that **before** scanning the pending pings folder + // to ensure we don't enqueue pings before their files are deleted. + let _scanning_thread = glean.upload_manager.scan_pending_pings_directories(); + + Ok(glean) + } + + /// For tests make it easy to create a Glean object using only the required configuration. + #[cfg(test)] + pub(crate) fn with_options( + data_path: &str, + application_id: &str, + upload_enabled: bool, + ) -> Self { + let cfg = Configuration { + data_path: data_path.into(), + application_id: application_id.into(), + language_binding_name: "Rust".into(), + upload_enabled, + max_events: None, + delay_ping_lifetime_io: false, + }; + + let mut glean = Self::new(cfg).unwrap(); + + // Disable all upload manager policies for testing + glean.upload_manager = PingUploadManager::no_policy(data_path); + + glean + } + + /// Destroys the database. + /// + /// After this Glean needs to be reinitialized. + pub fn destroy_db(&mut self) { + self.data_store = None; + } + + /// Initializes the core metrics managed by Glean's Rust core. + fn initialize_core_metrics(&mut self) { + let need_new_client_id = match self + .core_metrics + .client_id + .get_value(self, "glean_client_info") + { + None => true, + Some(uuid) => uuid == *KNOWN_CLIENT_ID, + }; + if need_new_client_id { + self.core_metrics.client_id.generate_and_set(self); + } + + if self + .core_metrics + .first_run_date + .get_value(self, "glean_client_info") + .is_none() + { + self.core_metrics.first_run_date.set(self, None); + self.core_metrics.first_run_hour.set(self, None); + // The `first_run_date` field is generated on the very first run + // and persisted across upload toggling. We can assume that, the only + // time it is set, that's indeed our "first run". + self.is_first_run = true; + } + + self.set_application_lifetime_core_metrics(); + } + + /// Initializes the database metrics managed by Glean's Rust core. + fn initialize_database_metrics(&mut self) { + log::trace!("Initializing database metrics"); + + if let Some(size) = self + .data_store + .as_ref() + .and_then(|database| database.file_size()) + { + log::trace!("Database file size: {}", size.get()); + self.database_metrics.size.accumulate(self, size.get()) + } + } + + /// Signals that the environment is ready to submit pings. + /// + /// Should be called when Glean is initialized to the point where it can correctly assemble pings. + /// Usually called from the language binding after all of the core metrics have been set + /// and the ping types have been registered. + /// + /// # Returns + /// + /// Whether at least one ping was generated. + pub fn on_ready_to_submit_pings(&self) -> bool { + self.event_data_store.flush_pending_events_on_startup(&self) + } + + /// Sets whether upload is enabled or not. + /// + /// When uploading is disabled, metrics aren't recorded at all and no + /// data is uploaded. + /// + /// When disabling, all pending metrics, events and queued pings are cleared. + /// + /// When enabling, the core Glean metrics are recreated. + /// + /// If the value of this flag is not actually changed, this is a no-op. + /// + /// # Arguments + /// + /// * `flag` - When true, enable metric collection. + /// + /// # Returns + /// + /// Whether the flag was different from the current value, + /// and actual work was done to clear or reinstate metrics. + pub fn set_upload_enabled(&mut self, flag: bool) -> bool { + log::info!("Upload enabled: {:?}", flag); + + if self.upload_enabled != flag { + if flag { + self.on_upload_enabled(); + } else { + self.on_upload_disabled(); + } + true + } else { + false + } + } + + /// Determines whether upload is enabled. + /// + /// When upload is disabled, no data will be recorded. + pub fn is_upload_enabled(&self) -> bool { + self.upload_enabled + } + + /// Handles the changing of state from upload disabled to enabled. + /// + /// Should only be called when the state actually changes. + /// + /// The `upload_enabled` flag is set to true and the core Glean metrics are + /// recreated. + fn on_upload_enabled(&mut self) { + self.upload_enabled = true; + self.initialize_core_metrics(); + self.initialize_database_metrics(); + } + + /// Handles the changing of state from upload enabled to disabled. + /// + /// Should only be called when the state actually changes. + /// + /// A deletion_request ping is sent, all pending metrics, events and queued + /// pings are cleared, and the client_id is set to KNOWN_CLIENT_ID. + /// Afterward, the upload_enabled flag is set to false. + fn on_upload_disabled(&mut self) { + // The upload_enabled flag should be true here, or the deletion ping + // won't be submitted. + if let Err(err) = self.internal_pings.deletion_request.submit(self, None) { + log::error!("Failed to submit deletion-request ping on optout: {}", err); + } + self.clear_metrics(); + self.upload_enabled = false; + } + + /// Clear any pending metrics when telemetry is disabled. + fn clear_metrics(&mut self) { + // Clear the pending pings queue and acquire the lock + // so that it can't be accessed until this function is done. + let _lock = self.upload_manager.clear_ping_queue(); + + // There are only two metrics that we want to survive after clearing all + // metrics: first_run_date and first_run_hour. Here, we store their values + // so we can restore them after clearing the metrics. + let existing_first_run_date = self + .core_metrics + .first_run_date + .get_value(self, "glean_client_info"); + + let existing_first_run_hour = self.core_metrics.first_run_hour.get_value(self, "metrics"); + + // Clear any pending pings. + let ping_maker = PingMaker::new(); + if let Err(err) = ping_maker.clear_pending_pings(self.get_data_path()) { + log::warn!("Error clearing pending pings: {}", err); + } + + // Delete all stored metrics. + // Note that this also includes the ping sequence numbers, so it has + // the effect of resetting those to their initial values. + if let Some(data) = self.data_store.as_ref() { + data.clear_all() + } + if let Err(err) = self.event_data_store.clear_all() { + log::warn!("Error clearing pending events: {}", err); + } + + // This does not clear the experiments store (which isn't managed by the + // StorageEngineManager), since doing so would mean we would have to have the + // application tell us again which experiments are active if telemetry is + // re-enabled. + + { + // We need to briefly set upload_enabled to true here so that `set` + // is not a no-op. This is safe, since nothing on the Rust side can + // run concurrently to this since we hold a mutable reference to the + // Glean object. Additionally, the pending pings have been cleared + // from disk, so the PingUploader can't wake up and start sending + // pings. + self.upload_enabled = true; + + // Store a "dummy" KNOWN_CLIENT_ID in the client_id metric. This will + // make it easier to detect if pings were unintentionally sent after + // uploading is disabled. + self.core_metrics.client_id.set(self, *KNOWN_CLIENT_ID); + + // Restore the first_run_date. + if let Some(existing_first_run_date) = existing_first_run_date { + self.core_metrics + .first_run_date + .set(self, Some(existing_first_run_date)); + } + + // Restore the first_run_hour. + if let Some(existing_first_run_hour) = existing_first_run_hour { + self.core_metrics + .first_run_hour + .set(self, Some(existing_first_run_hour)); + } + + self.upload_enabled = false; + } + } + + /// Gets the application ID as specified on instantiation. + pub fn get_application_id(&self) -> &str { + &self.application_id + } + + /// Gets the data path of this instance. + pub fn get_data_path(&self) -> &Path { + &self.data_path + } + + /// Gets a handle to the database. + pub fn storage(&self) -> &Database { + &self.data_store.as_ref().expect("No database found") + } + + /// Gets a handle to the event database. + pub fn event_storage(&self) -> &EventDatabase { + &self.event_data_store + } + + /// Gets the maximum number of events to store before sending a ping. + pub fn get_max_events(&self) -> usize { + self.max_events + } + + /// Gets the next task for an uploader. + /// + /// This can be one of: + /// + /// * [`Wait`](PingUploadTask::Wait) - which means the requester should ask + /// again later; + /// * [`Upload(PingRequest)`](PingUploadTask::Upload) - which means there is + /// a ping to upload. This wraps the actual request object; + /// * [`Done`](PingUploadTask::Done) - which means requester should stop + /// asking for now. + /// + /// # Returns + /// + /// A [`PingUploadTask`] representing the next task. + pub fn get_upload_task(&self) -> PingUploadTask { + self.upload_manager.get_upload_task(self, self.log_pings()) + } + + /// Processes the response from an attempt to upload a ping. + /// + /// # Arguments + /// + /// * `uuid` - The UUID of the ping in question. + /// * `status` - The upload result. + pub fn process_ping_upload_response(&self, uuid: &str, status: UploadResult) { + self.upload_manager + .process_ping_upload_response(self, uuid, status); + } + + /// Takes a snapshot for the given store and optionally clear it. + /// + /// # Arguments + /// + /// * `store_name` - The store to snapshot. + /// * `clear_store` - Whether to clear the store after snapshotting. + /// + /// # Returns + /// + /// The snapshot in a string encoded as JSON. If the snapshot is empty, returns an empty string. + pub fn snapshot(&mut self, store_name: &str, clear_store: bool) -> String { + StorageManager + .snapshot(&self.storage(), store_name, clear_store) + .unwrap_or_else(|| String::from("")) + } + + fn make_path(&self, ping_name: &str, doc_id: &str) -> String { + format!( + "/submit/{}/{}/{}/{}", + self.get_application_id(), + ping_name, + GLEAN_SCHEMA_VERSION, + doc_id + ) + } + + /// Collects and submits a ping for eventual uploading. + /// + /// The ping content is assembled as soon as possible, but upload is not + /// guaranteed to happen immediately, as that depends on the upload policies. + /// + /// If the ping currently contains no content, it will not be sent, + /// unless it is configured to be sent if empty. + /// + /// # Arguments + /// + /// * `ping` - The ping to submit + /// * `reason` - A reason code to include in the ping + /// + /// # Returns + /// + /// Whether the ping was succesfully assembled and queued. + /// + /// # Errors + /// + /// If collecting or writing the ping to disk failed. + pub fn submit_ping(&self, ping: &PingType, reason: Option<&str>) -> Result<bool> { + if !self.is_upload_enabled() { + log::info!("Glean disabled: not submitting any pings."); + return Ok(false); + } + + let ping_maker = PingMaker::new(); + let doc_id = Uuid::new_v4().to_string(); + let url_path = self.make_path(&ping.name, &doc_id); + match ping_maker.collect(self, &ping, reason) { + None => { + log::info!( + "No content for ping '{}', therefore no ping queued.", + ping.name + ); + Ok(false) + } + Some(content) => { + if let Err(e) = ping_maker.store_ping( + self, + &doc_id, + &ping.name, + &self.get_data_path(), + &url_path, + &content, + ) { + log::warn!("IO error while writing ping to file: {}", e); + self.core_metrics.io_errors.add(self, 1); + return Err(e.into()); + } + + self.upload_manager.enqueue_ping_from_file(self, &doc_id); + + log::info!( + "The ping '{}' was submitted and will be sent as soon as possible", + ping.name + ); + Ok(true) + } + } + } + + /// Collects and submits a ping by name for eventual uploading. + /// + /// The ping content is assembled as soon as possible, but upload is not + /// guaranteed to happen immediately, as that depends on the upload policies. + /// + /// If the ping currently contains no content, it will not be sent, + /// unless it is configured to be sent if empty. + /// + /// # Arguments + /// + /// * `ping_name` - The name of the ping to submit + /// * `reason` - A reason code to include in the ping + /// + /// # Returns + /// + /// Whether the ping was succesfully assembled and queued. + /// + /// # Errors + /// + /// If collecting or writing the ping to disk failed. + pub fn submit_ping_by_name(&self, ping_name: &str, reason: Option<&str>) -> Result<bool> { + match self.get_ping_by_name(ping_name) { + None => { + log::error!("Attempted to submit unknown ping '{}'", ping_name); + Ok(false) + } + Some(ping) => self.submit_ping(ping, reason), + } + } + + /// Gets a [`PingType`] by name. + /// + /// # Returns + /// + /// The [`PingType`] of a ping if the given name was registered before, [`None`] + /// otherwise. + pub fn get_ping_by_name(&self, ping_name: &str) -> Option<&PingType> { + self.ping_registry.get(ping_name) + } + + /// Register a new [`PingType`](metrics/struct.PingType.html). + pub fn register_ping_type(&mut self, ping: &PingType) { + if self.ping_registry.contains_key(&ping.name) { + log::debug!("Duplicate ping named '{}'", ping.name) + } + + self.ping_registry.insert(ping.name.clone(), ping.clone()); + } + + /// Get create time of the Glean object. + pub(crate) fn start_time(&self) -> DateTime<FixedOffset> { + self.start_time + } + + /// Indicates that an experiment is running. + /// + /// Glean will then add an experiment annotation to the environment + /// which is sent with pings. This information is not persisted between runs. + /// + /// # Arguments + /// + /// * `experiment_id` - The id of the active experiment (maximum 30 bytes). + /// * `branch` - The experiment branch (maximum 30 bytes). + /// * `extra` - Optional metadata to output with the ping. + pub fn set_experiment_active( + &self, + experiment_id: String, + branch: String, + extra: Option<HashMap<String, String>>, + ) { + let metric = metrics::ExperimentMetric::new(&self, experiment_id); + metric.set_active(&self, branch, extra); + } + + /// Indicates that an experiment is no longer running. + /// + /// # Arguments + /// + /// * `experiment_id` - The id of the active experiment to deactivate (maximum 30 bytes). + pub fn set_experiment_inactive(&self, experiment_id: String) { + let metric = metrics::ExperimentMetric::new(&self, experiment_id); + metric.set_inactive(&self); + } + + /// Persists [`Lifetime::Ping`] data that might be in memory in case + /// [`delay_ping_lifetime_io`](Configuration::delay_ping_lifetime_io) is set + /// or was set at a previous time. + /// + /// If there is no data to persist, this function does nothing. + pub fn persist_ping_lifetime_data(&self) -> Result<()> { + if let Some(data) = self.data_store.as_ref() { + return data.persist_ping_lifetime_data(); + } + + Ok(()) + } + + /// Sets internally-handled application lifetime metrics. + fn set_application_lifetime_core_metrics(&self) { + self.core_metrics.os.set(self, system::OS); + } + + /// **This is not meant to be used directly.** + /// + /// Clears all the metrics that have [`Lifetime::Application`]. + pub fn clear_application_lifetime_metrics(&self) { + log::trace!("Clearing Lifetime::Application metrics"); + if let Some(data) = self.data_store.as_ref() { + data.clear_lifetime(Lifetime::Application); + } + + // Set internally handled app lifetime metrics again. + self.set_application_lifetime_core_metrics(); + } + + /// Whether or not this is the first run on this profile. + pub fn is_first_run(&self) -> bool { + self.is_first_run + } + + /// Sets a debug view tag. + /// + /// This will return `false` in case `value` is not a valid tag. + /// + /// When the debug view tag is set, pings are sent with a `X-Debug-ID` header with the value of the tag + /// and are sent to the ["Ping Debug Viewer"](https://mozilla.github.io/glean/book/dev/core/internal/debug-pings.html). + /// + /// # Arguments + /// + /// * `value` - A valid HTTP header value. Must match the regex: "[a-zA-Z0-9-]{1,20}". + pub fn set_debug_view_tag(&mut self, value: &str) -> bool { + self.debug.debug_view_tag.set(value.into()) + } + + /// Return the value for the debug view tag or [`None`] if it hasn't been set. + /// + /// The `debug_view_tag` may be set from an environment variable + /// (`GLEAN_DEBUG_VIEW_TAG`) or through the [`set_debug_view_tag`] function. + pub(crate) fn debug_view_tag(&self) -> Option<&String> { + self.debug.debug_view_tag.get() + } + + /// Sets source tags. + /// + /// This will return `false` in case `value` contains invalid tags. + /// + /// Ping tags will show in the destination datasets, after ingestion. + /// + /// # Arguments + /// + /// * `value` - A vector of at most 5 valid HTTP header values. Individual tags must match the regex: "[a-zA-Z0-9-]{1,20}". + pub fn set_source_tags(&mut self, value: Vec<String>) -> bool { + self.debug.source_tags.set(value) + } + + /// Return the value for the source tags or [`None`] if it hasn't been set. + /// + /// The `source_tags` may be set from an environment variable (`GLEAN_SOURCE_TAGS`) + /// or through the [`set_source_tags`] function. + pub(crate) fn source_tags(&self) -> Option<&Vec<String>> { + self.debug.source_tags.get() + } + + /// Sets the log pings debug option. + /// + /// This will return `false` in case we are unable to set the option. + /// + /// When the log pings debug option is `true`, + /// we log the payload of all succesfully assembled pings. + /// + /// # Arguments + /// + /// * `value` - The value of the log pings option + pub fn set_log_pings(&mut self, value: bool) -> bool { + self.debug.log_pings.set(value) + } + + /// Return the value for the log pings debug option or [`None`] if it hasn't been set. + /// + /// The `log_pings` option may be set from an environment variable (`GLEAN_LOG_PINGS`) + /// or through the [`set_log_pings`] function. + pub(crate) fn log_pings(&self) -> bool { + self.debug.log_pings.get().copied().unwrap_or(false) + } + + fn get_dirty_bit_metric(&self) -> metrics::BooleanMetric { + metrics::BooleanMetric::new(CommonMetricData { + name: "dirtybit".into(), + // We don't need a category, the name is already unique + category: "".into(), + send_in_pings: vec![INTERNAL_STORAGE.into()], + lifetime: Lifetime::User, + ..Default::default() + }) + } + + /// **This is not meant to be used directly.** + /// + /// Sets the value of a "dirty flag" in the permanent storage. + /// + /// The "dirty flag" is meant to have the following behaviour, implemented + /// by the consumers of the FFI layer: + /// + /// - on mobile: set to `false` when going to background or shutting down, + /// set to `true` at startup and when going to foreground. + /// - on non-mobile platforms: set to `true` at startup and `false` at + /// shutdown. + /// + /// At startup, before setting its new value, if the "dirty flag" value is + /// `true`, then Glean knows it did not exit cleanly and can implement + /// coping mechanisms (e.g. sending a `baseline` ping). + pub fn set_dirty_flag(&self, new_value: bool) { + self.get_dirty_bit_metric().set(self, new_value); + } + + /// **This is not meant to be used directly.** + /// + /// Checks the stored value of the "dirty flag". + pub fn is_dirty_flag_set(&self) -> bool { + let dirty_bit_metric = self.get_dirty_bit_metric(); + match StorageManager.snapshot_metric( + self.storage(), + INTERNAL_STORAGE, + &dirty_bit_metric.meta().identifier(self), + dirty_bit_metric.meta().lifetime, + ) { + Some(Metric::Boolean(b)) => b, + _ => false, + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Checks if an experiment is currently active. + /// + /// # Arguments + /// + /// * `experiment_id` - The id of the experiment (maximum 30 bytes). + /// + /// # Returns + /// + /// Whether the experiment is active. + pub fn test_is_experiment_active(&self, experiment_id: String) -> bool { + self.test_get_experiment_data_as_json(experiment_id) + .is_some() + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets stored data for the requested experiment. + /// + /// # Arguments + /// + /// * `experiment_id` - The id of the active experiment (maximum 30 bytes). + /// + /// # Returns + /// + /// A JSON string with the following format: + /// + /// `{ 'branch': 'the-branch-name', 'extra': {'key': 'value', ...}}` + /// + /// if the requested experiment is active, `None` otherwise. + pub fn test_get_experiment_data_as_json(&self, experiment_id: String) -> Option<String> { + let metric = metrics::ExperimentMetric::new(&self, experiment_id); + metric.test_get_value_as_json_string(&self) + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Deletes all stored metrics. + /// + /// Note that this also includes the ping sequence numbers, so it has + /// the effect of resetting those to their initial values. + pub fn test_clear_all_stores(&self) { + if let Some(data) = self.data_store.as_ref() { + data.clear_all() + } + // We don't care about this failing, maybe the data does just not exist. + let _ = self.event_data_store.clear_all(); + } +} + +// Split unit tests to a separate file, to reduce the file of this one. +#[cfg(test)] +#[cfg(test)] +#[path = "lib_unit_tests.rs"] +mod tests; diff --git a/third_party/rust/glean-core/src/lib_unit_tests.rs b/third_party/rust/glean-core/src/lib_unit_tests.rs new file mode 100644 index 0000000000..1e1a36be92 --- /dev/null +++ b/third_party/rust/glean-core/src/lib_unit_tests.rs @@ -0,0 +1,908 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// NOTE: This is a test-only file that contains unit tests for +// the lib.rs file. + +use std::collections::HashSet; +use std::iter::FromIterator; + +use super::*; +use crate::metrics::RecordedExperimentData; +use crate::metrics::{StringMetric, TimeUnit, TimespanMetric, TimingDistributionMetric}; + +const GLOBAL_APPLICATION_ID: &str = "org.mozilla.glean.test.app"; +pub fn new_glean(tempdir: Option<tempfile::TempDir>) -> (Glean, tempfile::TempDir) { + let dir = match tempdir { + Some(tempdir) => tempdir, + None => tempfile::tempdir().unwrap(), + }; + let tmpname = dir.path().display().to_string(); + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true); + (glean, dir) +} + +#[test] +fn path_is_constructed_from_data() { + let (glean, _) = new_glean(None); + + assert_eq!( + "/submit/org-mozilla-glean-test-app/baseline/1/this-is-a-docid", + glean.make_path("baseline", "this-is-a-docid") + ); +} + +// Experiment's API tests: the next two tests come from glean-ac's +// ExperimentsStorageEngineTest.kt. +#[test] +fn experiment_id_and_branch_get_truncated_if_too_long() { + let t = tempfile::tempdir().unwrap(); + let name = t.path().display().to_string(); + let glean = Glean::with_options(&name, "org.mozilla.glean.tests", true); + + // Generate long strings for the used ids. + let very_long_id = "test-experiment-id".repeat(10); + let very_long_branch_id = "test-branch-id".repeat(10); + + // Mark the experiment as active. + glean.set_experiment_active(very_long_id.clone(), very_long_branch_id.clone(), None); + + // Generate the expected id and branch strings. + let mut expected_id = very_long_id; + expected_id.truncate(100); + let mut expected_branch_id = very_long_branch_id; + expected_branch_id.truncate(100); + + assert!( + glean.test_is_experiment_active(expected_id.clone()), + "An experiment with the truncated id should be available" + ); + + // Make sure the branch id was truncated as well. + let experiment_data = glean.test_get_experiment_data_as_json(expected_id); + assert!( + !experiment_data.is_none(), + "Experiment data must be available" + ); + + let parsed_json: RecordedExperimentData = + ::serde_json::from_str(&experiment_data.unwrap()).unwrap(); + assert_eq!(expected_branch_id, parsed_json.branch); +} + +#[test] +fn limits_on_experiments_extras_are_applied_correctly() { + let t = tempfile::tempdir().unwrap(); + let name = t.path().display().to_string(); + let glean = Glean::with_options(&name, "org.mozilla.glean.tests", true); + + let experiment_id = "test-experiment_id".to_string(); + let branch_id = "test-branch-id".to_string(); + let mut extras = HashMap::new(); + + let too_long_key = "0123456789".repeat(11); + let too_long_value = "0123456789".repeat(11); + + // Build and extras HashMap that's a little too long in every way + for n in 0..21 { + extras.insert(format!("{}-{}", n, too_long_key), too_long_value.clone()); + } + + // Mark the experiment as active. + glean.set_experiment_active(experiment_id.clone(), branch_id, Some(extras)); + + // Make sure it is active + assert!( + glean.test_is_experiment_active(experiment_id.clone()), + "An experiment with the truncated id should be available" + ); + + // Get the data + let experiment_data = glean.test_get_experiment_data_as_json(experiment_id); + assert!( + !experiment_data.is_none(), + "Experiment data must be available" + ); + + // Parse the JSON and validate the lengths + let parsed_json: RecordedExperimentData = + ::serde_json::from_str(&experiment_data.unwrap()).unwrap(); + assert_eq!( + 20, + parsed_json.clone().extra.unwrap().len(), + "Experiments extra must be less than max length" + ); + + for (key, value) in parsed_json.extra.as_ref().unwrap().iter() { + assert!( + key.len() <= 100, + "Experiments extra key must be less than max length" + ); + assert!( + value.len() <= 100, + "Experiments extra value must be less than max length" + ); + } +} + +#[test] +fn experiments_status_is_correctly_toggled() { + let t = tempfile::tempdir().unwrap(); + let name = t.path().display().to_string(); + let glean = Glean::with_options(&name, "org.mozilla.glean.tests", true); + + // Define the experiment's data. + let experiment_id: String = "test-toggle-experiment".into(); + let branch_id: String = "test-branch-toggle".into(); + let extra: HashMap<String, String> = [("test-key".into(), "test-value".into())] + .iter() + .cloned() + .collect(); + + // Activate an experiment. + glean.set_experiment_active(experiment_id.clone(), branch_id, Some(extra.clone())); + + // Check that the experiment is marekd as active. + assert!( + glean.test_is_experiment_active(experiment_id.clone()), + "The experiment must be marked as active." + ); + + // Check that the extra data was stored. + let experiment_data = glean.test_get_experiment_data_as_json(experiment_id.clone()); + assert!( + experiment_data.is_some(), + "Experiment data must be available" + ); + + let parsed_data: RecordedExperimentData = + ::serde_json::from_str(&experiment_data.unwrap()).unwrap(); + assert_eq!(parsed_data.extra.unwrap(), extra); + + // Disable the experiment and check that is no longer available. + glean.set_experiment_inactive(experiment_id.clone()); + assert!( + !glean.test_is_experiment_active(experiment_id), + "The experiment must not be available any more." + ); +} + +#[test] +fn client_id_and_first_run_date_and_first_run_hour_must_be_regenerated() { + let dir = tempfile::tempdir().unwrap(); + let tmpname = dir.path().display().to_string(); + { + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true); + + glean.data_store.as_ref().unwrap().clear_all(); + + assert!(glean + .core_metrics + .client_id + .test_get_value(&glean, "glean_client_info") + .is_none()); + assert!(glean + .core_metrics + .first_run_date + .test_get_value_as_string(&glean, "glean_client_info") + .is_none()); + assert!(glean + .core_metrics + .first_run_hour + .test_get_value_as_string(&glean, "metrics") + .is_none()); + } + + { + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true); + assert!(glean + .core_metrics + .client_id + .test_get_value(&glean, "glean_client_info") + .is_some()); + assert!(glean + .core_metrics + .first_run_date + .test_get_value_as_string(&glean, "glean_client_info") + .is_some()); + assert!(glean + .core_metrics + .first_run_hour + .test_get_value_as_string(&glean, "metrics") + .is_some()); + } +} + +#[test] +fn basic_metrics_should_be_cleared_when_uploading_is_disabled() { + let (mut glean, _t) = new_glean(None); + let metric = StringMetric::new(CommonMetricData::new( + "category", + "string_metric", + "baseline", + )); + + metric.set(&glean, "TEST VALUE"); + assert!(metric.test_get_value(&glean, "baseline").is_some()); + + glean.set_upload_enabled(false); + assert!(metric.test_get_value(&glean, "baseline").is_none()); + + metric.set(&glean, "TEST VALUE"); + assert!(metric.test_get_value(&glean, "baseline").is_none()); + + glean.set_upload_enabled(true); + assert!(metric.test_get_value(&glean, "baseline").is_none()); + + metric.set(&glean, "TEST VALUE"); + assert!(metric.test_get_value(&glean, "baseline").is_some()); +} + +#[test] +fn first_run_date_is_managed_correctly_when_toggling_uploading() { + let (mut glean, _) = new_glean(None); + + let original_first_run_date = glean + .core_metrics + .first_run_date + .get_value(&glean, "glean_client_info"); + + glean.set_upload_enabled(false); + assert_eq!( + original_first_run_date, + glean + .core_metrics + .first_run_date + .get_value(&glean, "glean_client_info") + ); + + glean.set_upload_enabled(true); + assert_eq!( + original_first_run_date, + glean + .core_metrics + .first_run_date + .get_value(&glean, "glean_client_info") + ); +} + +#[test] +fn first_run_hour_is_managed_correctly_when_toggling_uploading() { + let (mut glean, _) = new_glean(None); + + let original_first_run_hour = glean + .core_metrics + .first_run_hour + .get_value(&glean, "metrics"); + + glean.set_upload_enabled(false); + assert_eq!( + original_first_run_hour, + glean + .core_metrics + .first_run_hour + .get_value(&glean, "metrics") + ); + + glean.set_upload_enabled(true); + assert_eq!( + original_first_run_hour, + glean + .core_metrics + .first_run_hour + .get_value(&glean, "metrics") + ); +} + +#[test] +fn client_id_is_managed_correctly_when_toggling_uploading() { + let (mut glean, _) = new_glean(None); + + let original_client_id = glean + .core_metrics + .client_id + .get_value(&glean, "glean_client_info"); + assert!(original_client_id.is_some()); + assert_ne!(*KNOWN_CLIENT_ID, original_client_id.unwrap()); + + glean.set_upload_enabled(false); + assert_eq!( + *KNOWN_CLIENT_ID, + glean + .core_metrics + .client_id + .get_value(&glean, "glean_client_info") + .unwrap() + ); + + glean.set_upload_enabled(true); + let current_client_id = glean + .core_metrics + .client_id + .get_value(&glean, "glean_client_info"); + assert!(current_client_id.is_some()); + assert_ne!(*KNOWN_CLIENT_ID, current_client_id.unwrap()); + assert_ne!(original_client_id, current_client_id); +} + +#[test] +fn client_id_is_set_to_known_value_when_uploading_disabled_at_start() { + let dir = tempfile::tempdir().unwrap(); + let tmpname = dir.path().display().to_string(); + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, false); + + assert_eq!( + *KNOWN_CLIENT_ID, + glean + .core_metrics + .client_id + .get_value(&glean, "glean_client_info") + .unwrap() + ); +} + +#[test] +fn client_id_is_set_to_random_value_when_uploading_enabled_at_start() { + let dir = tempfile::tempdir().unwrap(); + let tmpname = dir.path().display().to_string(); + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true); + + let current_client_id = glean + .core_metrics + .client_id + .get_value(&glean, "glean_client_info"); + assert!(current_client_id.is_some()); + assert_ne!(*KNOWN_CLIENT_ID, current_client_id.unwrap()); +} + +#[test] +fn enabling_when_already_enabled_is_a_noop() { + let dir = tempfile::tempdir().unwrap(); + let tmpname = dir.path().display().to_string(); + let mut glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true); + + assert!(!glean.set_upload_enabled(true)); +} + +#[test] +fn disabling_when_already_disabled_is_a_noop() { + let dir = tempfile::tempdir().unwrap(); + let tmpname = dir.path().display().to_string(); + let mut glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, false); + + assert!(!glean.set_upload_enabled(false)); +} + +// Test that the enum variants keep a stable discriminant when serialized. +// Discriminant values are taken from a stable ordering from v20.0.0. +// New metrics after that should be added in order. +#[test] +#[rustfmt::skip] // Let's not add newlines unnecessary +fn correct_order() { + use histogram::Histogram; + use metrics::{Metric::*, TimeUnit}; + use std::time::Duration; + use util::local_now_with_offset; + + // Extract the discriminant of the serialized value, + // that is: the first 4 bytes. + fn discriminant(metric: &metrics::Metric) -> u32 { + let ser = bincode::serialize(metric).unwrap(); + (ser[0] as u32) + | (ser[1] as u32) << 8 + | (ser[2] as u32) << 16 + | (ser[3] as u32) << 24 + } + + // One of every metric type. The values are arbitrary and don't matter. + let all_metrics = vec![ + Boolean(false), + Counter(0), + CustomDistributionExponential(Histogram::exponential(1, 500, 10)), + CustomDistributionLinear(Histogram::linear(1, 500, 10)), + Datetime(local_now_with_offset(), TimeUnit::Second), + Experiment(RecordedExperimentData { branch: "branch".into(), extra: None, }), + Quantity(0), + String("glean".into()), + StringList(vec!["glean".into()]), + Uuid("082c3e52-0a18-11ea-946f-0fe0c98c361c".into()), + Timespan(Duration::new(5, 0), TimeUnit::Second), + TimingDistribution(Histogram::functional(2.0, 8.0)), + MemoryDistribution(Histogram::functional(2.0, 8.0)), + Jwe("eyJhbGciOiJSU0EtT0FFUCIsImVuYyI6IkEyNTZHQ00ifQ.OKOawDo13gRp2ojaHV7LFpZcgV7T6DVZKTyKOMTYUmKoTCVJRgckCL9kiMT03JGeipsEdY3mx_etLbbWSrFr05kLzcSr4qKAq7YN7e9jwQRb23nfa6c9d-StnImGyFDbSv04uVuxIp5Zms1gNxKKK2Da14B8S4rzVRltdYwam_lDp5XnZAYpQdb76FdIKLaVmqgfwX7XWRxv2322i-vDxRfqNzo_tETKzpVLzfiwQyeyPGLBIO56YJ7eObdv0je81860ppamavo35UgoRdbYaBcoh9QcfylQr66oc6vFWXRcZ_ZT2LawVCWTIy3brGPi6UklfCpIMfIjf7iGdXKHzg.48V1_ALb6US04U3b.5eym8TW_c8SuK0ltJ3rpYIzOeDQz7TALvtu6UG9oMo4vpzs9tX_EFShS8iB7j6jiSdiwkIr3ajwQzaBtQD_A.XFBoMYUZodetZdvTiFvSkQ".into()), + ]; + + for metric in all_metrics { + let disc = discriminant(&metric); + + // DO NOT TOUCH THE EXPECTED VALUE. + // If this test fails because of non-equal discriminants, that is a bug in the code, not + // the test. + + // We're matching here, thus fail the build if new variants are added. + match metric { + Boolean(..) => assert_eq!( 0, disc), + Counter(..) => assert_eq!( 1, disc), + CustomDistributionExponential(..) => assert_eq!( 2, disc), + CustomDistributionLinear(..) => assert_eq!( 3, disc), + Datetime(..) => assert_eq!( 4, disc), + Experiment(..) => assert_eq!( 5, disc), + Quantity(..) => assert_eq!( 6, disc), + String(..) => assert_eq!( 7, disc), + StringList(..) => assert_eq!( 8, disc), + Uuid(..) => assert_eq!( 9, disc), + Timespan(..) => assert_eq!(10, disc), + TimingDistribution(..) => assert_eq!(11, disc), + MemoryDistribution(..) => assert_eq!(12, disc), + Jwe(..) => assert_eq!(13, disc), + } + } +} + +#[test] +#[rustfmt::skip] // Let's not merge lines +fn backwards_compatible_deserialization() { + use std::env; + use std::time::Duration; + use chrono::prelude::*; + use histogram::Histogram; + use metrics::{Metric::*, TimeUnit}; + + // Prepare some data to fill in + let dt = FixedOffset::east(9*3600).ymd(2014, 11, 28).and_hms_nano(21, 45, 59, 12); + + let mut custom_dist_exp = Histogram::exponential(1, 500, 10); + custom_dist_exp.accumulate(10); + + let mut custom_dist_linear = Histogram::linear(1, 500, 10); + custom_dist_linear.accumulate(10); + + let mut time_dist = Histogram::functional(2.0, 8.0); + time_dist.accumulate(10); + + let mut mem_dist = Histogram::functional(2.0, 16.0); + mem_dist.accumulate(10); + + // One of every metric type. The values are arbitrary, but stable. + let all_metrics = vec![ + ( + "boolean", + vec![0, 0, 0, 0, 1], + Boolean(true) + ), + ( + "counter", + vec![1, 0, 0, 0, 20, 0, 0, 0], + Counter(20) + ), + ( + "custom exponential distribution", + vec![2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 244, 1, 0, 0, 0, 0, 0, 0, 10, 0, + 0, 0, 0, 0, 0, 0], + CustomDistributionExponential(custom_dist_exp) + ), + ( + "custom linear distribution", + vec![3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 244, 1, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0], + CustomDistributionLinear(custom_dist_linear) + ), + ( + "datetime", + vec![4, 0, 0, 0, 35, 0, 0, 0, 0, 0, 0, 0, 50, 48, 49, 52, 45, 49, 49, 45, + 50, 56, 84, 50, 49, 58, 52, 53, 58, 53, 57, 46, 48, 48, 48, 48, 48, + 48, 48, 49, 50, 43, 48, 57, 58, 48, 48, 3, 0, 0, 0], + Datetime(dt, TimeUnit::Second), + ), + ( + "experiment", + vec![5, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 98, 114, 97, 110, 99, 104, 0], + Experiment(RecordedExperimentData { branch: "branch".into(), extra: None, }), + ), + ( + "quantity", + vec![6, 0, 0, 0, 17, 0, 0, 0, 0, 0, 0, 0], + Quantity(17) + ), + ( + "string", + vec![7, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 103, 108, 101, 97, 110], + String("glean".into()) + ), + ( + "string list", + vec![8, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, + 103, 108, 101, 97, 110], + StringList(vec!["glean".into()]) + ), + ( + "uuid", + vec![9, 0, 0, 0, 36, 0, 0, 0, 0, 0, 0, 0, 48, 56, 50, 99, 51, 101, 53, 50, + 45, 48, 97, 49, 56, 45, 49, 49, 101, 97, 45, 57, 52, 54, 102, 45, 48, + 102, 101, 48, 99, 57, 56, 99, 51, 54, 49, 99], + Uuid("082c3e52-0a18-11ea-946f-0fe0c98c361c".into()), + ), + ( + "timespan", + vec![10, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0], + Timespan(Duration::new(5, 0), TimeUnit::Second), + ), + ( + "timing distribution", + vec![11, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 123, 81, 125, + 60, 184, 114, 241, 63], + TimingDistribution(time_dist), + ), + ( + "memory distribution", + vec![12, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 15, 137, 249, + 108, 88, 181, 240, 63], + MemoryDistribution(mem_dist), + ), + ]; + + for (name, data, metric) in all_metrics { + // Helper to print serialization data if instructed by environment variable + // Run with: + // + // ```text + // PRINT_DATA=1 cargo test -p glean-core --lib -- --nocapture backwards + // ``` + // + // This should not be necessary to re-run and change here, unless a bincode upgrade + // requires us to also migrate existing data. + if env::var("PRINT_DATA").is_ok() { + let bindata = bincode::serialize(&metric).unwrap(); + println!("(\n {:?},\n vec!{:?},", name, bindata); + } else { + // Otherwise run the test + let deserialized = bincode::deserialize(&data).unwrap(); + if let CustomDistributionExponential(hist) = &deserialized { + hist.snapshot_values(); // Force initialization of the ranges + } + if let CustomDistributionLinear(hist) = &deserialized { + hist.snapshot_values(); // Force initialization of the ranges + } + + assert_eq!( + metric, deserialized, + "Expected properly deserialized {}", + name + ); + } + } +} + +#[test] +fn test_first_run() { + let dir = tempfile::tempdir().unwrap(); + let tmpname = dir.path().display().to_string(); + { + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true); + // Check that this is indeed the first run. + assert!(glean.is_first_run()); + } + + { + // Other runs must be not marked as "first run". + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true); + assert!(!glean.is_first_run()); + } +} + +#[test] +fn test_dirty_bit() { + let dir = tempfile::tempdir().unwrap(); + let tmpname = dir.path().display().to_string(); + { + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true); + // The dirty flag must not be set the first time Glean runs. + assert!(!glean.is_dirty_flag_set()); + + // Set the dirty flag and check that it gets correctly set. + glean.set_dirty_flag(true); + assert!(glean.is_dirty_flag_set()); + } + + { + // Check that next time Glean runs, it correctly picks up the "dirty flag". + // It is expected to be 'true'. + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true); + assert!(glean.is_dirty_flag_set()); + + // Set the dirty flag to false. + glean.set_dirty_flag(false); + assert!(!glean.is_dirty_flag_set()); + } + + { + // Check that next time Glean runs, it correctly picks up the "dirty flag". + // It is expected to be 'false'. + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true); + assert!(!glean.is_dirty_flag_set()); + } +} + +#[test] +fn test_change_metric_type_runtime() { + let dir = tempfile::tempdir().unwrap(); + + let (glean, _) = new_glean(Some(dir)); + + // We attempt to create two metrics: one with a 'string' type and the other + // with a 'timespan' type, both being sent in the same pings and having the + // same lifetime. + let metric_name = "type_swap"; + let metric_category = "test"; + let metric_lifetime = Lifetime::Ping; + let ping_name = "store1"; + + let string_metric = StringMetric::new(CommonMetricData { + name: metric_name.into(), + category: metric_category.into(), + send_in_pings: vec![ping_name.into()], + disabled: false, + lifetime: metric_lifetime, + ..Default::default() + }); + + let string_value = "definitely-a-string!"; + string_metric.set(&glean, string_value); + + assert_eq!( + string_metric.test_get_value(&glean, ping_name).unwrap(), + string_value, + "Expected properly deserialized string" + ); + + let mut timespan_metric = TimespanMetric::new( + CommonMetricData { + name: metric_name.into(), + category: metric_category.into(), + send_in_pings: vec![ping_name.into()], + disabled: false, + lifetime: metric_lifetime, + ..Default::default() + }, + TimeUnit::Nanosecond, + ); + + let duration = 60; + timespan_metric.set_start(&glean, 0); + timespan_metric.set_stop(&glean, duration); + + assert_eq!( + timespan_metric.test_get_value(&glean, ping_name).unwrap(), + 60, + "Expected properly deserialized time" + ); + + // We expect old data to be lost forever. See the following bug comment + // https://bugzilla.mozilla.org/show_bug.cgi?id=1621757#c1 for more context. + assert_eq!(None, string_metric.test_get_value(&glean, ping_name)); +} + +#[test] +fn timing_distribution_truncation() { + let dir = tempfile::tempdir().unwrap(); + + let (glean, _) = new_glean(Some(dir)); + let max_sample_time = 1000 * 1000 * 1000 * 60 * 10; + + for (unit, expected_keys) in &[ + ( + TimeUnit::Nanosecond, + HashSet::<u64>::from_iter(vec![961_548, 939, 599_512_966_122, 1]), + ), + ( + TimeUnit::Microsecond, + HashSet::<u64>::from_iter(vec![939, 562_949_953_421_318, 599_512_966_122, 961_548]), + ), + ( + TimeUnit::Millisecond, + HashSet::<u64>::from_iter(vec![ + 961_548, + 576_460_752_303_431_040, + 599_512_966_122, + 562_949_953_421_318, + ]), + ), + ] { + let mut dist = TimingDistributionMetric::new( + CommonMetricData { + name: format!("local_metric_{:?}", unit), + category: "local".into(), + send_in_pings: vec!["baseline".into()], + ..Default::default() + }, + *unit, + ); + + for &value in &[ + 1, + 1_000, + 1_000_000, + max_sample_time, + max_sample_time * 1_000, + max_sample_time * 1_000_000, + ] { + let timer_id = dist.set_start(0); + dist.set_stop_and_accumulate(&glean, timer_id, value); + } + + let snapshot = dist.test_get_value(&glean, "baseline").unwrap(); + + let mut keys = HashSet::new(); + let mut recorded_values = 0; + + for (&key, &value) in &snapshot.values { + // A snapshot potentially includes buckets with a 0 count. + // We can ignore them here. + if value > 0 { + assert!(key < max_sample_time * unit.as_nanos(1)); + keys.insert(key); + recorded_values += 1; + } + } + + assert_eq!(4, recorded_values); + assert_eq!(keys, *expected_keys); + + // The number of samples was originally designed around 1ns to + // 10minutes, with 8 steps per power of 2, which works out to 316 items. + // This is to ensure that holds even when the time unit is changed. + assert!(snapshot.values.len() < 316); + } +} + +#[test] +fn timing_distribution_truncation_accumulate() { + let dir = tempfile::tempdir().unwrap(); + + let (glean, _) = new_glean(Some(dir)); + let max_sample_time = 1000 * 1000 * 1000 * 60 * 10; + + for &unit in &[ + TimeUnit::Nanosecond, + TimeUnit::Microsecond, + TimeUnit::Millisecond, + ] { + let mut dist = TimingDistributionMetric::new( + CommonMetricData { + name: format!("local_metric_{:?}", unit), + category: "local".into(), + send_in_pings: vec!["baseline".into()], + ..Default::default() + }, + unit, + ); + + dist.accumulate_samples_signed( + &glean, + vec![ + 1, + 1_000, + 1_000_000, + max_sample_time, + max_sample_time * 1_000, + max_sample_time * 1_000_000, + ], + ); + + let snapshot = dist.test_get_value(&glean, "baseline").unwrap(); + + // The number of samples was originally designed around 1ns to + // 10minutes, with 8 steps per power of 2, which works out to 316 items. + // This is to ensure that holds even when the time unit is changed. + assert!(snapshot.values.len() < 316); + } +} + +#[test] +fn test_setting_debug_view_tag() { + let dir = tempfile::tempdir().unwrap(); + + let (mut glean, _) = new_glean(Some(dir)); + + let valid_tag = "valid-tag"; + assert_eq!(true, glean.set_debug_view_tag(valid_tag)); + assert_eq!(valid_tag, glean.debug_view_tag().unwrap()); + + let invalid_tag = "invalid tag"; + assert_eq!(false, glean.set_debug_view_tag(invalid_tag)); + assert_eq!(valid_tag, glean.debug_view_tag().unwrap()); +} + +#[test] +fn test_setting_log_pings() { + let dir = tempfile::tempdir().unwrap(); + + let (mut glean, _) = new_glean(Some(dir)); + assert!(!glean.log_pings()); + + glean.set_log_pings(true); + assert!(glean.log_pings()); + + glean.set_log_pings(false); + assert!(!glean.log_pings()); +} + +#[test] +#[should_panic] +fn test_empty_application_id() { + let dir = tempfile::tempdir().unwrap(); + let tmpname = dir.path().display().to_string(); + + let glean = Glean::with_options(&tmpname, "", true); + // Check that this is indeed the first run. + assert!(glean.is_first_run()); +} + +#[test] +fn records_database_file_size() { + let _ = env_logger::builder().is_test(true).try_init(); + + // Note: We don't use `new_glean` because we need to re-use the database directory. + + let dir = tempfile::tempdir().unwrap(); + let tmpname = dir.path().display().to_string(); + + // Initialize Glean once to ensure we create the database. + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true); + let database_size = &glean.database_metrics.size; + let data = database_size.test_get_value(&glean, "metrics"); + assert!(data.is_none()); + drop(glean); + + // Initialize Glean again to record file size. + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true); + + let database_size = &glean.database_metrics.size; + let data = database_size.test_get_value(&glean, "metrics"); + assert!(data.is_some()); + let data = data.unwrap(); + + // We should see the database containing some data. + assert!(data.sum > 0); +} + +#[test] +fn records_io_errors() { + use std::fs; + let _ = env_logger::builder().is_test(true).try_init(); + + let (glean, _data_dir) = new_glean(None); + let pending_pings_dir = glean.get_data_path().join(crate::PENDING_PINGS_DIRECTORY); + fs::create_dir_all(&pending_pings_dir).unwrap(); + let attr = fs::metadata(&pending_pings_dir).unwrap(); + let original_permissions = attr.permissions(); + + // Remove write permissions on the pending_pings directory. + let mut permissions = original_permissions.clone(); + permissions.set_readonly(true); + fs::set_permissions(&pending_pings_dir, permissions).unwrap(); + + // Writing the ping file should fail. + let submitted = glean.internal_pings.metrics.submit(&glean, None); + assert!(submitted.is_err()); + + let metric = &glean.core_metrics.io_errors; + assert_eq!( + 1, + metric.test_get_value(&glean, "metrics").unwrap(), + "Should have recorded an IO error" + ); + + // Restore write permissions. + fs::set_permissions(&pending_pings_dir, original_permissions).unwrap(); + + // Now we can submit a ping + let submitted = glean.internal_pings.metrics.submit(&glean, None); + assert!(submitted.is_ok()); +} diff --git a/third_party/rust/glean-core/src/macros.rs b/third_party/rust/glean-core/src/macros.rs new file mode 100644 index 0000000000..9fdcd2380e --- /dev/null +++ b/third_party/rust/glean-core/src/macros.rs @@ -0,0 +1,22 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +#![macro_use] + +//! Utility macros used in this crate. + +/// Unwrap a `Result`s `Ok` value or do the specified action. +/// +/// This is an alternative to the question-mark operator (`?`), +/// when the other action should not be to return the error. +macro_rules! unwrap_or { + ($expr:expr, $or:expr) => { + match $expr { + Ok(x) => x, + Err(_) => { + $or; + } + } + }; +} diff --git a/third_party/rust/glean-core/src/metrics/boolean.rs b/third_party/rust/glean-core/src/metrics/boolean.rs new file mode 100644 index 0000000000..b434594781 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/boolean.rs @@ -0,0 +1,70 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::metrics::Metric; +use crate::metrics::MetricType; +use crate::storage::StorageManager; +use crate::CommonMetricData; +use crate::Glean; + +/// A boolean metric. +/// +/// Records a simple flag. +#[derive(Clone, Debug)] +pub struct BooleanMetric { + meta: CommonMetricData, +} + +impl MetricType for BooleanMetric { + fn meta(&self) -> &CommonMetricData { + &self.meta + } + + fn meta_mut(&mut self) -> &mut CommonMetricData { + &mut self.meta + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl BooleanMetric { + /// Creates a new boolean metric. + pub fn new(meta: CommonMetricData) -> Self { + Self { meta } + } + + /// Sets to the specified boolean value. + /// + /// # Arguments + /// + /// * `glean` - the Glean instance this metric belongs to. + /// * `value` - the value to set. + pub fn set(&self, glean: &Glean, value: bool) { + if !self.should_record(glean) { + return; + } + + let value = Metric::Boolean(value); + glean.storage().record(glean, &self.meta, &value) + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored value as a boolean. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, glean: &Glean, storage_name: &str) -> Option<bool> { + match StorageManager.snapshot_metric( + glean.storage(), + storage_name, + &self.meta.identifier(glean), + self.meta.lifetime, + ) { + Some(Metric::Boolean(b)) => Some(b), + _ => None, + } + } +} diff --git a/third_party/rust/glean-core/src/metrics/counter.rs b/third_party/rust/glean-core/src/metrics/counter.rs new file mode 100644 index 0000000000..af762071be --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/counter.rs @@ -0,0 +1,93 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::error_recording::{record_error, ErrorType}; +use crate::metrics::Metric; +use crate::metrics::MetricType; +use crate::storage::StorageManager; +use crate::CommonMetricData; +use crate::Glean; + +/// A counter metric. +/// +/// Used to count things. +/// The value can only be incremented, not decremented. +#[derive(Clone, Debug)] +pub struct CounterMetric { + meta: CommonMetricData, +} + +impl MetricType for CounterMetric { + fn meta(&self) -> &CommonMetricData { + &self.meta + } + + fn meta_mut(&mut self) -> &mut CommonMetricData { + &mut self.meta + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl CounterMetric { + /// Creates a new counter metric. + pub fn new(meta: CommonMetricData) -> Self { + Self { meta } + } + + /// Increases the counter by `amount`. + /// + /// # Arguments + /// + /// * `glean` - The Glean instance this metric belongs to. + /// * `amount` - The amount to increase by. Should be positive. + /// + /// ## Notes + /// + /// Logs an error if the `amount` is 0 or negative. + pub fn add(&self, glean: &Glean, amount: i32) { + if !self.should_record(glean) { + return; + } + + if amount <= 0 { + record_error( + glean, + &self.meta, + ErrorType::InvalidValue, + format!("Added negative or zero value {}", amount), + None, + ); + return; + } + + glean + .storage() + .record_with(glean, &self.meta, |old_value| match old_value { + Some(Metric::Counter(old_value)) => { + Metric::Counter(old_value.saturating_add(amount)) + } + _ => Metric::Counter(amount), + }) + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored value as an integer. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, glean: &Glean, storage_name: &str) -> Option<i32> { + match StorageManager.snapshot_metric( + glean.storage(), + storage_name, + &self.meta.identifier(glean), + self.meta.lifetime, + ) { + Some(Metric::Counter(i)) => Some(i), + _ => None, + } + } +} diff --git a/third_party/rust/glean-core/src/metrics/custom_distribution.rs b/third_party/rust/glean-core/src/metrics/custom_distribution.rs new file mode 100644 index 0000000000..f015d1cd75 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/custom_distribution.rs @@ -0,0 +1,187 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::error_recording::{record_error, ErrorType}; +use crate::histogram::{Bucketing, Histogram, HistogramType}; +use crate::metrics::{DistributionData, Metric, MetricType}; +use crate::storage::StorageManager; +use crate::CommonMetricData; +use crate::Glean; + +/// A custom distribution metric. +/// +/// Memory distributions are used to accumulate and store memory sizes. +#[derive(Debug)] +pub struct CustomDistributionMetric { + meta: CommonMetricData, + range_min: u64, + range_max: u64, + bucket_count: u64, + histogram_type: HistogramType, +} + +/// Create a snapshot of the histogram. +/// +/// The snapshot can be serialized into the payload format. +pub(crate) fn snapshot<B: Bucketing>(hist: &Histogram<B>) -> DistributionData { + DistributionData { + values: hist.snapshot_values(), + sum: hist.sum(), + } +} + +impl MetricType for CustomDistributionMetric { + fn meta(&self) -> &CommonMetricData { + &self.meta + } + + fn meta_mut(&mut self) -> &mut CommonMetricData { + &mut self.meta + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl CustomDistributionMetric { + /// Creates a new memory distribution metric. + pub fn new( + meta: CommonMetricData, + range_min: u64, + range_max: u64, + bucket_count: u64, + histogram_type: HistogramType, + ) -> Self { + Self { + meta, + range_min, + range_max, + bucket_count, + histogram_type, + } + } + + /// Accumulates the provided signed samples in the metric. + /// + /// This is required so that the platform-specific code can provide us with + /// 64 bit signed integers if no `u64` comparable type is available. This + /// will take care of filtering and reporting errors for any provided negative + /// sample. + /// + /// # Arguments + /// + /// - `samples` - The vector holding the samples to be recorded by the metric. + /// + /// ## Notes + /// + /// Discards any negative value in `samples` and report an [`ErrorType::InvalidValue`] + /// for each of them. + pub fn accumulate_samples_signed(&self, glean: &Glean, samples: Vec<i64>) { + if !self.should_record(glean) { + return; + } + + let mut num_negative_samples = 0; + + // Generic accumulation function to handle the different histogram types and count negative + // samples. + fn accumulate<B: Bucketing, F>( + samples: &[i64], + mut hist: Histogram<B>, + metric: F, + ) -> (i32, Metric) + where + F: Fn(Histogram<B>) -> Metric, + { + let mut num_negative_samples = 0; + for &sample in samples.iter() { + if sample < 0 { + num_negative_samples += 1; + } else { + let sample = sample as u64; + hist.accumulate(sample); + } + } + (num_negative_samples, metric(hist)) + } + + glean.storage().record_with(glean, &self.meta, |old_value| { + let (num_negative, hist) = match self.histogram_type { + HistogramType::Linear => { + let hist = if let Some(Metric::CustomDistributionLinear(hist)) = old_value { + hist + } else { + Histogram::linear( + self.range_min, + self.range_max, + self.bucket_count as usize, + ) + }; + accumulate(&samples, hist, Metric::CustomDistributionLinear) + } + HistogramType::Exponential => { + let hist = if let Some(Metric::CustomDistributionExponential(hist)) = old_value + { + hist + } else { + Histogram::exponential( + self.range_min, + self.range_max, + self.bucket_count as usize, + ) + }; + accumulate(&samples, hist, Metric::CustomDistributionExponential) + } + }; + + num_negative_samples = num_negative; + hist + }); + + if num_negative_samples > 0 { + let msg = format!("Accumulated {} negative samples", num_negative_samples); + record_error( + glean, + &self.meta, + ErrorType::InvalidValue, + msg, + num_negative_samples, + ); + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored histogram. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, glean: &Glean, storage_name: &str) -> Option<DistributionData> { + match StorageManager.snapshot_metric( + glean.storage(), + storage_name, + &self.meta.identifier(glean), + self.meta.lifetime, + ) { + // Boxing the value, in order to return either of the possible buckets + Some(Metric::CustomDistributionExponential(hist)) => Some(snapshot(&hist)), + Some(Metric::CustomDistributionLinear(hist)) => Some(snapshot(&hist)), + _ => None, + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored histogram as a JSON String of the serialized value. + /// + /// This doesn't clear the stored value. + pub fn test_get_value_as_json_string( + &self, + glean: &Glean, + storage_name: &str, + ) -> Option<String> { + self.test_get_value(glean, storage_name) + .map(|snapshot| serde_json::to_string(&snapshot).unwrap()) + } +} diff --git a/third_party/rust/glean-core/src/metrics/datetime.rs b/third_party/rust/glean-core/src/metrics/datetime.rs new file mode 100644 index 0000000000..7020f9d99e --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/datetime.rs @@ -0,0 +1,224 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +#![allow(clippy::too_many_arguments)] + +use crate::error_recording::{record_error, ErrorType}; +use crate::metrics::time_unit::TimeUnit; +use crate::metrics::Metric; +use crate::metrics::MetricType; +use crate::storage::StorageManager; +use crate::util::{get_iso_time_string, local_now_with_offset}; +use crate::CommonMetricData; +use crate::Glean; + +use chrono::{DateTime, FixedOffset, TimeZone, Timelike}; + +/// A datetime type. +/// +/// Used to feed data to the `DatetimeMetric`. +pub type Datetime = DateTime<FixedOffset>; + +/// A datetime metric. +/// +/// Used to record an absolute date and time, such as the time the user first ran +/// the application. +#[derive(Debug)] +pub struct DatetimeMetric { + meta: CommonMetricData, + time_unit: TimeUnit, +} + +impl MetricType for DatetimeMetric { + fn meta(&self) -> &CommonMetricData { + &self.meta + } + + fn meta_mut(&mut self) -> &mut CommonMetricData { + &mut self.meta + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl DatetimeMetric { + /// Creates a new datetime metric. + pub fn new(meta: CommonMetricData, time_unit: TimeUnit) -> Self { + Self { meta, time_unit } + } + + /// Sets the metric to a date/time including the timezone offset. + /// + /// # Arguments + /// + /// * `glean` - the Glean instance this metric belongs to. + /// * `year` - the year to set the metric to. + /// * `month` - the month to set the metric to (1-12). + /// * `day` - the day to set the metric to (1-based). + /// * `hour` - the hour to set the metric to. + /// * `minute` - the minute to set the metric to. + /// * `second` - the second to set the metric to. + /// * `nano` - the nanosecond fraction to the last whole second. + /// * `offset_seconds` - the timezone difference, in seconds, for the Eastern + /// Hemisphere. Negative seconds mean Western Hemisphere. + pub fn set_with_details( + &self, + glean: &Glean, + year: i32, + month: u32, + day: u32, + hour: u32, + minute: u32, + second: u32, + nano: u32, + offset_seconds: i32, + ) { + if !self.should_record(glean) { + return; + } + + let timezone_offset = FixedOffset::east_opt(offset_seconds); + if timezone_offset.is_none() { + let msg = format!("Invalid timezone offset {}. Not recording.", offset_seconds); + record_error(glean, &self.meta, ErrorType::InvalidValue, msg, None); + return; + }; + + let datetime_obj = FixedOffset::east(offset_seconds) + .ymd_opt(year, month, day) + .and_hms_nano_opt(hour, minute, second, nano); + + match datetime_obj.single() { + Some(d) => self.set(glean, Some(d)), + _ => { + record_error( + glean, + &self.meta, + ErrorType::InvalidValue, + "Invalid input data. Not recording.", + None, + ); + } + } + } + + /// Sets the metric to a date/time which including the timezone offset. + /// + /// # Arguments + /// + /// * `glean` - the Glean instance this metric belongs to. + /// * `value` - Some [`DateTime`] value, with offset, to set the metric to. + /// If none, the current local time is used. + pub fn set(&self, glean: &Glean, value: Option<Datetime>) { + if !self.should_record(glean) { + return; + } + + let value = value.unwrap_or_else(local_now_with_offset); + let value = Metric::Datetime(value, self.time_unit); + glean.storage().record(glean, &self.meta, &value) + } + + /// Gets the stored datetime value. + /// + /// # Arguments + /// + /// * `glean` - the Glean instance this metric belongs to. + /// * `storage_name` - the storage name to look into. + /// + /// # Returns + /// + /// The stored value or `None` if nothing stored. + pub(crate) fn get_value(&self, glean: &Glean, storage_name: &str) -> Option<Datetime> { + match StorageManager.snapshot_metric( + glean.storage(), + storage_name, + &self.meta.identifier(glean), + self.meta.lifetime, + ) { + Some(Metric::Datetime(dt, _)) => Some(dt), + _ => None, + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the stored datetime value. + /// + /// The precision of this value is truncated to the `time_unit` precision. + /// + /// # Arguments + /// + /// * `glean` - the Glean instance this metric belongs to. + /// * `storage_name` - the storage name to look into. + /// + /// # Returns + /// + /// The stored value or `None` if nothing stored. + pub fn test_get_value(&self, glean: &Glean, storage_name: &str) -> Option<Datetime> { + match StorageManager.snapshot_metric( + glean.storage(), + storage_name, + &self.meta.identifier(glean), + self.meta.lifetime, + ) { + Some(Metric::Datetime(d, tu)) => { + // The string version of the test function truncates using string + // parsing. Unfortunately `parse_from_str` errors with `NotEnough` if we + // try to truncate with `get_iso_time_string` and then parse it back + // in a `Datetime`. So we need to truncate manually. + let time = d.time(); + match tu { + TimeUnit::Nanosecond => d.date().and_hms_nano_opt( + time.hour(), + time.minute(), + time.second(), + time.nanosecond(), + ), + TimeUnit::Microsecond => d.date().and_hms_nano_opt( + time.hour(), + time.minute(), + time.second(), + time.nanosecond() / 1000, + ), + TimeUnit::Millisecond => d.date().and_hms_nano_opt( + time.hour(), + time.minute(), + time.second(), + time.nanosecond() / 1000000, + ), + TimeUnit::Second => { + d.date() + .and_hms_nano_opt(time.hour(), time.minute(), time.second(), 0) + } + TimeUnit::Minute => d.date().and_hms_nano_opt(time.hour(), time.minute(), 0, 0), + TimeUnit::Hour => d.date().and_hms_nano_opt(time.hour(), 0, 0, 0), + TimeUnit::Day => d.date().and_hms_nano_opt(0, 0, 0, 0), + } + } + _ => None, + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored value as a String. + /// + /// The precision of this value is truncated to the `time_unit` precision. + /// + /// This doesn't clear the stored value. + pub fn test_get_value_as_string(&self, glean: &Glean, storage_name: &str) -> Option<String> { + match StorageManager.snapshot_metric( + glean.storage(), + storage_name, + &self.meta.identifier(glean), + self.meta.lifetime, + ) { + Some(Metric::Datetime(d, tu)) => Some(get_iso_time_string(d, tu)), + _ => None, + } + } +} diff --git a/third_party/rust/glean-core/src/metrics/event.rs b/third_party/rust/glean-core/src/metrics/event.rs new file mode 100644 index 0000000000..69251c30bd --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/event.rs @@ -0,0 +1,139 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::collections::HashMap; + +use serde_json::{json, Value as JsonValue}; + +use crate::error_recording::{record_error, ErrorType}; +use crate::event_database::RecordedEvent; +use crate::metrics::MetricType; +use crate::util::truncate_string_at_boundary_with_error; +use crate::CommonMetricData; +use crate::Glean; + +const MAX_LENGTH_EXTRA_KEY_VALUE: usize = 100; + +/// An event metric. +/// +/// Events allow recording of e.g. individual occurences of user actions, say +/// every time a view was open and from where. Each time you record an event, it +/// records a timestamp, the event's name and a set of custom values. +#[derive(Clone, Debug)] +pub struct EventMetric { + meta: CommonMetricData, + allowed_extra_keys: Vec<String>, +} + +impl MetricType for EventMetric { + fn meta(&self) -> &CommonMetricData { + &self.meta + } + + fn meta_mut(&mut self) -> &mut CommonMetricData { + &mut self.meta + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl EventMetric { + /// Creates a new event metric. + pub fn new(meta: CommonMetricData, allowed_extra_keys: Vec<String>) -> Self { + Self { + meta, + allowed_extra_keys, + } + } + + /// Records an event. + /// + /// # Arguments + /// + /// * `glean` - The Glean instance this metric belongs to. + /// * `timestamp` - A monotonically increasing timestamp, in milliseconds. + /// This must be provided since the actual recording of the event may + /// happen some time later than the moment the event occurred. + /// * `extra` - A [`HashMap`] of (key, value) pairs. The key is an index into + /// the metric's `allowed_extra_keys` vector where the key's string is + /// looked up. If any key index is out of range, an error is reported and + /// no event is recorded. + pub fn record<M: Into<Option<HashMap<i32, String>>>>( + &self, + glean: &Glean, + timestamp: u64, + extra: M, + ) { + if !self.should_record(glean) { + return; + } + + let extra = extra.into(); + let extra_strings: Option<HashMap<String, String>> = if let Some(extra) = extra { + if extra.is_empty() { + None + } else { + let mut extra_strings = HashMap::new(); + for (k, v) in extra.into_iter() { + match self.allowed_extra_keys.get(k as usize) { + Some(k) => extra_strings.insert( + k.to_string(), + truncate_string_at_boundary_with_error( + glean, + &self.meta, + v, + MAX_LENGTH_EXTRA_KEY_VALUE, + ), + ), + None => { + let msg = format!("Invalid key index {}", k); + record_error(glean, &self.meta, ErrorType::InvalidValue, msg, None); + return; + } + }; + } + Some(extra_strings) + } + } else { + None + }; + + glean + .event_storage() + .record(glean, &self.meta, timestamp, extra_strings); + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Tests whether there are currently stored events for this event metric. + /// + /// This doesn't clear the stored value. + pub fn test_has_value(&self, glean: &Glean, store_name: &str) -> bool { + glean.event_storage().test_has_value(&self.meta, store_name) + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Get the vector of currently stored events for this event metric. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, glean: &Glean, store_name: &str) -> Option<Vec<RecordedEvent>> { + glean.event_storage().test_get_value(&self.meta, store_name) + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored events for this event metric as a JSON-encoded string. + /// + /// This doesn't clear the stored value. + pub fn test_get_value_as_json_string(&self, glean: &Glean, store_name: &str) -> String { + match self.test_get_value(glean, store_name) { + Some(value) => json!(value), + None => json!(JsonValue::Null), + } + .to_string() + } +} diff --git a/third_party/rust/glean-core/src/metrics/experiment.rs b/third_party/rust/glean-core/src/metrics/experiment.rs new file mode 100644 index 0000000000..5cf2139b05 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/experiment.rs @@ -0,0 +1,291 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use serde::{Deserialize, Serialize}; +use serde_json::{json, Map as JsonMap, Value as JsonValue}; +use std::collections::HashMap; + +use crate::error_recording::{record_error, ErrorType}; +use crate::metrics::Metric; +use crate::metrics::MetricType; +use crate::storage::StorageManager; +use crate::util::{truncate_string_at_boundary, truncate_string_at_boundary_with_error}; +use crate::CommonMetricData; +use crate::Glean; +use crate::Lifetime; +use crate::INTERNAL_STORAGE; + +/// The maximum length of the experiment id, the branch id, and the keys of the +/// `extra` map. Identifiers longer than this number of characters are truncated. +const MAX_EXPERIMENTS_IDS_LEN: usize = 100; +/// The maximum length of the experiment `extra` values. Values longer than this +/// limit will be truncated. +const MAX_EXPERIMENT_VALUE_LEN: usize = MAX_EXPERIMENTS_IDS_LEN; +/// The maximum number of extras allowed in the `extra` hash map. Any items added +/// beyond this limit will be dropped. Note that truncation of a hash map is +/// nondeterministic in which items are truncated. +const MAX_EXPERIMENTS_EXTRAS_SIZE: usize = 20; + +/// The data for a single experiment. +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)] +pub struct RecordedExperimentData { + pub branch: String, + pub extra: Option<HashMap<String, String>>, +} + +impl RecordedExperimentData { + /// Gets the recorded experiment data as a JSON value. + /// + /// For JSON, we don't want to include `{"extra": null}` -- we just want to skip + /// `extra` entirely. Unfortunately, we can't use a serde field annotation for this, + /// since that would break bincode serialization, which doesn't support skipping + /// fields. Therefore, we use a custom serialization function just for JSON here. + pub fn as_json(&self) -> JsonValue { + let mut value = JsonMap::new(); + value.insert("branch".to_string(), json!(self.branch)); + if self.extra.is_some() { + value.insert("extra".to_string(), json!(self.extra)); + } + JsonValue::Object(value) + } +} + +/// An experiment metric. +/// +/// Used to store active experiments. +/// This is used through the `set_experiment_active`/`set_experiment_inactive` Glean SDK API. +#[derive(Clone, Debug)] +pub struct ExperimentMetric { + meta: CommonMetricData, +} + +impl MetricType for ExperimentMetric { + fn meta(&self) -> &CommonMetricData { + &self.meta + } + + fn meta_mut(&mut self) -> &mut CommonMetricData { + &mut self.meta + } +} + +impl ExperimentMetric { + /// Creates a new experiment metric. + /// + /// # Arguments + /// + /// * `id` - the id of the experiment. Please note that this will be + /// truncated to `MAX_EXPERIMENTS_IDS_LEN`, if needed. + pub fn new(glean: &Glean, id: String) -> Self { + let mut error = None; + + // Make sure that experiment id is within the expected limit. + let truncated_id = if id.len() > MAX_EXPERIMENTS_IDS_LEN { + let msg = format!( + "Value length {} for experiment id exceeds maximum of {}", + id.len(), + MAX_EXPERIMENTS_IDS_LEN + ); + error = Some(msg); + truncate_string_at_boundary(id, MAX_EXPERIMENTS_IDS_LEN) + } else { + id + }; + + let new_experiment = Self { + meta: CommonMetricData { + name: format!("{}#experiment", truncated_id), + // We don't need a category, the name is already unique + category: "".into(), + send_in_pings: vec![INTERNAL_STORAGE.into()], + lifetime: Lifetime::Application, + ..Default::default() + }, + }; + + // Check for a truncation error to record + if let Some(msg) = error { + record_error( + glean, + &new_experiment.meta, + ErrorType::InvalidValue, + msg, + None, + ); + } + + new_experiment + } + + /// Records an experiment as active. + /// + /// # Arguments + /// + /// * `glean` - The Glean instance this metric belongs to. + /// * `branch` - the active branch of the experiment. Please note that this will be + /// truncated to `MAX_EXPERIMENTS_IDS_LEN`, if needed. + /// * `extra` - an optional, user defined String to String map used to provide richer + /// experiment context if needed. + pub fn set_active( + &self, + glean: &Glean, + branch: String, + extra: Option<HashMap<String, String>>, + ) { + if !self.should_record(glean) { + return; + } + + // Make sure that branch id is within the expected limit. + let truncated_branch = if branch.len() > MAX_EXPERIMENTS_IDS_LEN { + truncate_string_at_boundary_with_error( + glean, + &self.meta, + branch, + MAX_EXPERIMENTS_IDS_LEN, + ) + } else { + branch + }; + + // Apply limits to extras + let truncated_extras = extra.map(|extra| { + if extra.len() > MAX_EXPERIMENTS_EXTRAS_SIZE { + let msg = format!( + "Extra hash map length {} exceeds maximum of {}", + extra.len(), + MAX_EXPERIMENTS_EXTRAS_SIZE + ); + record_error(glean, &self.meta, ErrorType::InvalidValue, msg, None); + } + + let mut temp_map = HashMap::new(); + for (key, value) in extra.into_iter().take(MAX_EXPERIMENTS_EXTRAS_SIZE) { + let truncated_key = if key.len() > MAX_EXPERIMENTS_IDS_LEN { + truncate_string_at_boundary_with_error( + glean, + &self.meta, + key, + MAX_EXPERIMENTS_IDS_LEN, + ) + } else { + key + }; + let truncated_value = if value.len() > MAX_EXPERIMENT_VALUE_LEN { + truncate_string_at_boundary_with_error( + glean, + &self.meta, + value, + MAX_EXPERIMENT_VALUE_LEN, + ) + } else { + value + }; + + temp_map.insert(truncated_key, truncated_value); + } + temp_map + }); + + let value = Metric::Experiment(RecordedExperimentData { + branch: truncated_branch, + extra: truncated_extras, + }); + glean.storage().record(glean, &self.meta, &value) + } + + /// Records an experiment as inactive. + /// + /// # Arguments + /// + /// * `glean` - The Glean instance this metric belongs to. + pub fn set_inactive(&self, glean: &Glean) { + if !self.should_record(glean) { + return; + } + + if let Err(e) = glean.storage().remove_single_metric( + Lifetime::Application, + INTERNAL_STORAGE, + &self.meta.name, + ) { + log::error!("Failed to set experiment as inactive: {:?}", e); + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored experiment data as a JSON representation of + /// the RecordedExperimentData. + /// + /// This doesn't clear the stored value. + pub fn test_get_value_as_json_string(&self, glean: &Glean) -> Option<String> { + match StorageManager.snapshot_metric( + glean.storage(), + INTERNAL_STORAGE, + &self.meta.identifier(glean), + self.meta.lifetime, + ) { + Some(Metric::Experiment(e)) => Some(json!(e).to_string()), + _ => None, + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn stable_serialization() { + let experiment_empty = RecordedExperimentData { + branch: "branch".into(), + extra: None, + }; + + let mut data = HashMap::new(); + data.insert("a key".to_string(), "a value".to_string()); + let experiment_data = RecordedExperimentData { + branch: "branch".into(), + extra: Some(data), + }; + + let experiment_empty_bin = bincode::serialize(&experiment_empty).unwrap(); + let experiment_data_bin = bincode::serialize(&experiment_data).unwrap(); + + assert_eq!( + experiment_empty, + bincode::deserialize(&experiment_empty_bin).unwrap() + ); + assert_eq!( + experiment_data, + bincode::deserialize(&experiment_data_bin).unwrap() + ); + } + + #[test] + #[rustfmt::skip] // Let's not add newlines unnecessary + fn deserialize_old_encoding() { + // generated by `bincode::serialize` as of Glean commit ac27fceb7c0d5a7288d7d569e8c5c5399a53afb2 + // empty was generated from: `RecordedExperimentData { branch: "branch".into(), extra: None, }` + let empty_bin = vec![6, 0, 0, 0, 0, 0, 0, 0, 98, 114, 97, 110, 99, 104]; + // data was generated from: RecordedExperimentData { branch: "branch".into(), extra: Some({"a key": "a value"}), }; + let data_bin = vec![6, 0, 0, 0, 0, 0, 0, 0, 98, 114, 97, 110, 99, 104, + 1, 1, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, + 97, 32, 107, 101, 121, 7, 0, 0, 0, 0, 0, 0, 0, 97, + 32, 118, 97, 108, 117, 101]; + + + let mut data = HashMap::new(); + data.insert("a key".to_string(), "a value".to_string()); + let experiment_data = RecordedExperimentData { branch: "branch".into(), extra: Some(data), }; + + // We can't actually decode old experiment data. + // Luckily Glean did store experiments in the database before commit ac27fceb7c0d5a7288d7d569e8c5c5399a53afb2. + let experiment_empty: Result<RecordedExperimentData, _> = bincode::deserialize(&empty_bin); + assert!(experiment_empty.is_err()); + + assert_eq!(experiment_data, bincode::deserialize(&data_bin).unwrap()); + } +} diff --git a/third_party/rust/glean-core/src/metrics/jwe.rs b/third_party/rust/glean-core/src/metrics/jwe.rs new file mode 100644 index 0000000000..f054275a59 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/jwe.rs @@ -0,0 +1,473 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::fmt; +use std::str::FromStr; + +use serde::Serialize; + +use crate::error_recording::{record_error, ErrorType}; +use crate::metrics::{Metric, MetricType}; +use crate::storage::StorageManager; +use crate::CommonMetricData; +use crate::Glean; + +const DEFAULT_MAX_CHARS_PER_VARIABLE_SIZE_ELEMENT: usize = 1024; + +/// Verifies if a string is [`BASE64URL`](https://tools.ietf.org/html/rfc4648#section-5) compliant. +/// +/// As such, the string must match the regex: `[a-zA-Z0-9\-\_]*`. +/// +/// > **Note** As described in the [JWS specification](https://tools.ietf.org/html/rfc7515#section-2), +/// > the BASE64URL encoding used by JWE discards any padding, +/// > that is why we can ignore that for this validation. +/// +/// The regex crate isn't used here because it adds to the binary size, +/// and the Glean SDK doesn't use regular expressions anywhere else. +fn validate_base64url_encoding(value: &str) -> bool { + let mut iter = value.chars(); + + loop { + match iter.next() { + // We are done, so the whole expression is valid. + None => return true, + // Valid characters. + Some('_') | Some('-') | Some('a'..='z') | Some('A'..='Z') | Some('0'..='9') => (), + // An invalid character. + Some(_) => return false, + } + } +} + +/// Representation of a [JWE](https://tools.ietf.org/html/rfc7516). +/// +/// **Note** Variable sized elements will be constrained to a length of DEFAULT_MAX_CHARS_PER_VARIABLE_SIZE_ELEMENT, +/// this is a constraint introduced by Glean to prevent abuses and not part of the spec. +#[derive(Serialize)] +struct Jwe { + /// A variable-size JWE protected header. + header: String, + /// A variable-size [encrypted key](https://tools.ietf.org/html/rfc7516#appendix-A.1.3). + /// This can be an empty octet sequence. + key: String, + /// A fixed-size, 96-bit, base64 encoded [JWE Initialization vector](https://tools.ietf.org/html/rfc7516#appendix-A.1.4) (e.g. “48V1_ALb6US04U3b”). + /// If not required by the encryption algorithm, can be an empty octet sequence. + init_vector: String, + /// The variable-size base64 encoded cipher text. + cipher_text: String, + /// A fixed-size, 132-bit, base64 encoded authentication tag. + /// Can be an empty octet sequence. + auth_tag: String, +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl Jwe { + /// Create a new JWE struct. + fn new<S: Into<String>>( + header: S, + key: S, + init_vector: S, + cipher_text: S, + auth_tag: S, + ) -> Result<Self, (ErrorType, String)> { + let mut header = header.into(); + header = Self::validate_non_empty("header", header)?; + header = Self::validate_max_size("header", header)?; + header = Self::validate_base64url_encoding("header", header)?; + + let mut key = key.into(); + key = Self::validate_max_size("key", key)?; + key = Self::validate_base64url_encoding("key", key)?; + + let mut init_vector = init_vector.into(); + init_vector = Self::validate_fixed_size_or_empty("init_vector", init_vector, 96)?; + init_vector = Self::validate_base64url_encoding("init_vector", init_vector)?; + + let mut cipher_text = cipher_text.into(); + cipher_text = Self::validate_non_empty("cipher_text", cipher_text)?; + cipher_text = Self::validate_max_size("cipher_text", cipher_text)?; + cipher_text = Self::validate_base64url_encoding("cipher_text", cipher_text)?; + + let mut auth_tag = auth_tag.into(); + auth_tag = Self::validate_fixed_size_or_empty("auth_tag", auth_tag, 128)?; + auth_tag = Self::validate_base64url_encoding("auth_tag", auth_tag)?; + + Ok(Self { + header, + key, + init_vector, + cipher_text, + auth_tag, + }) + } + + fn validate_base64url_encoding( + name: &str, + value: String, + ) -> Result<String, (ErrorType, String)> { + if !validate_base64url_encoding(&value) { + return Err(( + ErrorType::InvalidValue, + format!("`{}` element in JWE value is not valid BASE64URL.", name), + )); + } + + Ok(value) + } + + fn validate_non_empty(name: &str, value: String) -> Result<String, (ErrorType, String)> { + if value.is_empty() { + return Err(( + ErrorType::InvalidValue, + format!("`{}` element in JWE value must not be empty.", name), + )); + } + + Ok(value) + } + + fn validate_max_size(name: &str, value: String) -> Result<String, (ErrorType, String)> { + if value.len() > DEFAULT_MAX_CHARS_PER_VARIABLE_SIZE_ELEMENT { + return Err(( + ErrorType::InvalidOverflow, + format!( + "`{}` element in JWE value must not exceed {} characters.", + name, DEFAULT_MAX_CHARS_PER_VARIABLE_SIZE_ELEMENT + ), + )); + } + + Ok(value) + } + + fn validate_fixed_size_or_empty( + name: &str, + value: String, + size_in_bits: usize, + ) -> Result<String, (ErrorType, String)> { + // Each Base64 digit represents exactly 6 bits of data. + // By dividing the size_in_bits by 6 and ceiling the result, + // we get the amount of characters the value should have. + let num_chars = (size_in_bits as f32 / 6f32).ceil() as usize; + if !value.is_empty() && value.len() != num_chars { + return Err(( + ErrorType::InvalidOverflow, + format!( + "`{}` element in JWE value must have exactly {}-bits or be empty.", + name, size_in_bits + ), + )); + } + + Ok(value) + } +} + +/// Trait implementation to convert a JWE [`compact representation`](https://tools.ietf.org/html/rfc7516#appendix-A.2.7) +/// string into a Jwe struct. +impl FromStr for Jwe { + type Err = (ErrorType, String); + + fn from_str(s: &str) -> Result<Self, Self::Err> { + let mut elements: Vec<&str> = s.split('.').collect(); + + if elements.len() != 5 { + return Err(( + ErrorType::InvalidValue, + "JWE value is not formatted as expected.".into(), + )); + } + + // Consume the vector extracting each part of the JWE from it. + // + // Safe unwraps, we already defined that the slice has five elements. + let auth_tag = elements.pop().unwrap(); + let cipher_text = elements.pop().unwrap(); + let init_vector = elements.pop().unwrap(); + let key = elements.pop().unwrap(); + let header = elements.pop().unwrap(); + + Self::new(header, key, init_vector, cipher_text, auth_tag) + } +} + +/// Trait implementation to print the Jwe struct as the proper JWE [`compact representation`](https://tools.ietf.org/html/rfc7516#appendix-A.2.7). +impl fmt::Display for Jwe { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{}.{}.{}.{}.{}", + self.header, self.key, self.init_vector, self.cipher_text, self.auth_tag + ) + } +} + +/// A JWE metric. +/// +/// This metric will be work as a "transport" for JWE encrypted data. +/// +/// The actual encrypti on is done somewhere else, +/// Glean must only make sure the data is valid JWE. +#[derive(Clone, Debug)] +pub struct JweMetric { + meta: CommonMetricData, +} + +impl MetricType for JweMetric { + fn meta(&self) -> &CommonMetricData { + &self.meta + } + + fn meta_mut(&mut self) -> &mut CommonMetricData { + &mut self.meta + } +} + +impl JweMetric { + /// Creates a new JWE metric. + pub fn new(meta: CommonMetricData) -> Self { + Self { meta } + } + + /// Sets to the specified JWE value. + /// + /// # Arguments + /// + /// * `glean` - the Glean instance this metric belongs to. + /// * `value` - the [`compact representation`](https://tools.ietf.org/html/rfc7516#appendix-A.2.7) of a JWE value. + pub fn set_with_compact_representation<S: Into<String>>(&self, glean: &Glean, value: S) { + if !self.should_record(glean) { + return; + } + + let value = value.into(); + match Jwe::from_str(&value) { + Ok(_) => glean + .storage() + .record(glean, &self.meta, &Metric::Jwe(value)), + Err((error_type, msg)) => record_error(glean, &self.meta, error_type, msg, None), + }; + } + + /// Builds a JWE value from its elements and set to it. + /// + /// # Arguments + /// + /// * `glean` - the Glean instance this metric belongs to. + /// * `header` - the JWE Protected Header element. + /// * `key` - the JWE Encrypted Key element. + /// * `init_vector` - the JWE Initialization Vector element. + /// * `cipher_text` - the JWE Ciphertext element. + /// * `auth_tag` - the JWE Authentication Tag element. + pub fn set<S: Into<String>>( + &self, + glean: &Glean, + header: S, + key: S, + init_vector: S, + cipher_text: S, + auth_tag: S, + ) { + if !self.should_record(glean) { + return; + } + + match Jwe::new(header, key, init_vector, cipher_text, auth_tag) { + Ok(jwe) => glean + .storage() + .record(glean, &self.meta, &Metric::Jwe(jwe.to_string())), + Err((error_type, msg)) => record_error(glean, &self.meta, error_type, msg, None), + }; + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored value as a string. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, glean: &Glean, storage_name: &str) -> Option<String> { + match StorageManager.snapshot_metric( + glean.storage(), + storage_name, + &self.meta.identifier(glean), + self.meta.lifetime, + ) { + Some(Metric::Jwe(b)) => Some(b), + _ => None, + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored JWE as a JSON String of the serialized value. + /// + /// This doesn't clear the stored value. + pub fn test_get_value_as_json_string( + &self, + glean: &Glean, + storage_name: &str, + ) -> Option<String> { + self.test_get_value(glean, storage_name).map(|snapshot| { + serde_json::to_string( + &Jwe::from_str(&snapshot).expect("Stored JWE metric should be valid JWE value."), + ) + .unwrap() + }) + } +} + +#[cfg(test)] +mod test { + use super::*; + + const HEADER: &str = "eyJhbGciOiJSU0EtT0FFUCIsImVuYyI6IkEyNTZHQ00ifQ"; + const KEY: &str = "OKOawDo13gRp2ojaHV7LFpZcgV7T6DVZKTyKOMTYUmKoTCVJRgckCL9kiMT03JGeipsEdY3mx_etLbbWSrFr05kLzcSr4qKAq7YN7e9jwQRb23nfa6c9d-StnImGyFDbSv04uVuxIp5Zms1gNxKKK2Da14B8S4rzVRltdYwam_lDp5XnZAYpQdb76FdIKLaVmqgfwX7XWRxv2322i-vDxRfqNzo_tETKzpVLzfiwQyeyPGLBIO56YJ7eObdv0je81860ppamavo35UgoRdbYaBcoh9QcfylQr66oc6vFWXRcZ_ZT2LawVCWTIy3brGPi6UklfCpIMfIjf7iGdXKHzg"; + const INIT_VECTOR: &str = "48V1_ALb6US04U3b"; + const CIPHER_TEXT: &str = + "5eym8TW_c8SuK0ltJ3rpYIzOeDQz7TALvtu6UG9oMo4vpzs9tX_EFShS8iB7j6jiSdiwkIr3ajwQzaBtQD_A"; + const AUTH_TAG: &str = "XFBoMYUZodetZdvTiFvSkQ"; + const JWE: &str = "eyJhbGciOiJSU0EtT0FFUCIsImVuYyI6IkEyNTZHQ00ifQ.OKOawDo13gRp2ojaHV7LFpZcgV7T6DVZKTyKOMTYUmKoTCVJRgckCL9kiMT03JGeipsEdY3mx_etLbbWSrFr05kLzcSr4qKAq7YN7e9jwQRb23nfa6c9d-StnImGyFDbSv04uVuxIp5Zms1gNxKKK2Da14B8S4rzVRltdYwam_lDp5XnZAYpQdb76FdIKLaVmqgfwX7XWRxv2322i-vDxRfqNzo_tETKzpVLzfiwQyeyPGLBIO56YJ7eObdv0je81860ppamavo35UgoRdbYaBcoh9QcfylQr66oc6vFWXRcZ_ZT2LawVCWTIy3brGPi6UklfCpIMfIjf7iGdXKHzg.48V1_ALb6US04U3b.5eym8TW_c8SuK0ltJ3rpYIzOeDQz7TALvtu6UG9oMo4vpzs9tX_EFShS8iB7j6jiSdiwkIr3ajwQzaBtQD_A.XFBoMYUZodetZdvTiFvSkQ"; + + #[test] + fn generates_jwe_from_correct_input() { + let jwe = Jwe::from_str(JWE).unwrap(); + assert_eq!(jwe.header, HEADER); + assert_eq!(jwe.key, KEY); + assert_eq!(jwe.init_vector, INIT_VECTOR); + assert_eq!(jwe.cipher_text, CIPHER_TEXT); + assert_eq!(jwe.auth_tag, AUTH_TAG); + + assert!(Jwe::new(HEADER, KEY, INIT_VECTOR, CIPHER_TEXT, AUTH_TAG).is_ok()); + } + + #[test] + fn jwe_validates_header_value_correctly() { + // When header is empty, correct error is returned + match Jwe::new("", KEY, INIT_VECTOR, CIPHER_TEXT, AUTH_TAG) { + Ok(_) => panic!("Should not have built JWE successfully."), + Err((error_type, _)) => assert_eq!(error_type, ErrorType::InvalidValue), + } + + // When header is bigger than max size, correct error is returned + let too_long = (0..1025).map(|_| "X").collect::<String>(); + match Jwe::new( + too_long, + KEY.into(), + INIT_VECTOR.into(), + CIPHER_TEXT.into(), + AUTH_TAG.into(), + ) { + Ok(_) => panic!("Should not have built JWE successfully."), + Err((error_type, _)) => assert_eq!(error_type, ErrorType::InvalidOverflow), + } + + // When header is not valid BASE64URL, correct error is returned + let not64 = "inv@alid value!"; + match Jwe::new(not64, KEY, INIT_VECTOR, CIPHER_TEXT, AUTH_TAG) { + Ok(_) => panic!("Should not have built JWE successfully."), + Err((error_type, _)) => assert_eq!(error_type, ErrorType::InvalidValue), + } + } + + #[test] + fn jwe_validates_key_value_correctly() { + // When key is empty,JWE is created + assert!(Jwe::new(HEADER, "", INIT_VECTOR, CIPHER_TEXT, AUTH_TAG).is_ok()); + + // When key is bigger than max size, correct error is returned + let too_long = (0..1025).map(|_| "X").collect::<String>(); + match Jwe::new(HEADER, &too_long, INIT_VECTOR, CIPHER_TEXT, AUTH_TAG) { + Ok(_) => panic!("Should not have built JWE successfully."), + Err((error_type, _)) => assert_eq!(error_type, ErrorType::InvalidOverflow), + } + + // When key is not valid BASE64URL, correct error is returned + let not64 = "inv@alid value!"; + match Jwe::new(HEADER, not64, INIT_VECTOR, CIPHER_TEXT, AUTH_TAG) { + Ok(_) => panic!("Should not have built JWE successfully."), + Err((error_type, _)) => assert_eq!(error_type, ErrorType::InvalidValue), + } + } + + #[test] + fn jwe_validates_init_vector_value_correctly() { + // When init_vector is empty, JWE is created + assert!(Jwe::new(HEADER, KEY, "", CIPHER_TEXT, AUTH_TAG).is_ok()); + + // When init_vector is not the correct size, correct error is returned + match Jwe::new(HEADER, KEY, "foo", CIPHER_TEXT, AUTH_TAG) { + Ok(_) => panic!("Should not have built JWE successfully."), + Err((error_type, _)) => assert_eq!(error_type, ErrorType::InvalidOverflow), + } + + // When init_vector is not valid BASE64URL, correct error is returned + let not64 = "inv@alid value!!"; + match Jwe::new(HEADER, KEY, not64, CIPHER_TEXT, AUTH_TAG) { + Ok(_) => panic!("Should not have built JWE successfully."), + Err((error_type, _)) => assert_eq!(error_type, ErrorType::InvalidValue), + } + } + + #[test] + fn jwe_validates_cipher_text_value_correctly() { + // When cipher_text is empty, correct error is returned + match Jwe::new(HEADER, KEY, INIT_VECTOR, "", AUTH_TAG) { + Ok(_) => panic!("Should not have built JWE successfully."), + Err((error_type, _)) => assert_eq!(error_type, ErrorType::InvalidValue), + } + + // When cipher_text is bigger than max size, correct error is returned + let too_long = (0..1025).map(|_| "X").collect::<String>(); + match Jwe::new(HEADER, KEY, INIT_VECTOR, &too_long, AUTH_TAG) { + Ok(_) => panic!("Should not have built JWE successfully."), + Err((error_type, _)) => assert_eq!(error_type, ErrorType::InvalidOverflow), + } + + // When cipher_text is not valid BASE64URL, correct error is returned + let not64 = "inv@alid value!"; + match Jwe::new(HEADER, KEY, INIT_VECTOR, not64, AUTH_TAG) { + Ok(_) => panic!("Should not have built JWE successfully."), + Err((error_type, _)) => assert_eq!(error_type, ErrorType::InvalidValue), + } + } + + #[test] + fn jwe_validates_auth_tag_value_correctly() { + // When auth_tag is empty, JWE is created + assert!(Jwe::new(HEADER, KEY, INIT_VECTOR, CIPHER_TEXT, "").is_ok()); + + // When auth_tag is not the correct size, correct error is returned + match Jwe::new(HEADER, KEY, INIT_VECTOR, CIPHER_TEXT, "foo") { + Ok(_) => panic!("Should not have built JWE successfully."), + Err((error_type, _)) => assert_eq!(error_type, ErrorType::InvalidOverflow), + } + + // When auth_tag is not valid BASE64URL, correct error is returned + let not64 = "inv@alid value!!!!!!!!"; + match Jwe::new(HEADER, KEY, INIT_VECTOR, CIPHER_TEXT, not64) { + Ok(_) => panic!("Should not have built JWE successfully."), + Err((error_type, _)) => assert_eq!(error_type, ErrorType::InvalidValue), + } + } + + #[test] + fn tranforms_jwe_struct_to_string_correctly() { + let jwe = Jwe::from_str(JWE).unwrap(); + assert_eq!(jwe.to_string(), JWE); + } + + #[test] + fn validates_base64url_correctly() { + assert!(validate_base64url_encoding( + "0987654321AaBbCcDdEeFfGgHhIiKkLlMmNnOoPpQqRrSsTtUuVvXxWwYyZz-_" + )); + assert!(validate_base64url_encoding("")); + assert!(!validate_base64url_encoding("aa aa")); + assert!(!validate_base64url_encoding("aa.aa")); + assert!(!validate_base64url_encoding("!nv@lid-val*e")); + } +} diff --git a/third_party/rust/glean-core/src/metrics/labeled.rs b/third_party/rust/glean-core/src/metrics/labeled.rs new file mode 100644 index 0000000000..6620862bab --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/labeled.rs @@ -0,0 +1,252 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::common_metric_data::CommonMetricData; +use crate::error_recording::{record_error, ErrorType}; +use crate::metrics::{Metric, MetricType}; +use crate::Glean; + +const MAX_LABELS: usize = 16; +const OTHER_LABEL: &str = "__other__"; +const MAX_LABEL_LENGTH: usize = 61; + +/// Checks whether the given value matches the label regex. +/// +/// This regex is used for matching against labels and should allow for dots, +/// underscores, and/or hyphens. Labels are also limited to starting with either +/// a letter or an underscore character. +/// +/// The exact regex (from the pipeline schema [here](https://github.com/mozilla-services/mozilla-pipeline-schemas/blob/master/templates/include/glean/dot_separated_short_id.1.schema.json)) is: +/// +/// "^[a-z_][a-z0-9_-]{0,29}(\\.[a-z_][a-z0-9_-]{0,29})*$" +/// +/// The regex crate isn't used here because it adds to the binary size, and the +/// Glean SDK doesn't use regular expressions anywhere else. +/// +/// Some examples of good and bad labels: +/// +/// Good: +/// * `this.is.fine` +/// * `this_is_fine_too` +/// * `this.is_still_fine` +/// * `thisisfine` +/// * `_.is_fine` +/// * `this.is-fine` +/// * `this-is-fine` +/// Bad: +/// * `this.is.not_fine_due_tu_the_length_being_too_long_i_thing.i.guess` +/// * `1.not_fine` +/// * `this.$isnotfine` +/// * `-.not_fine` +fn matches_label_regex(value: &str) -> bool { + let mut iter = value.chars(); + + loop { + // Match the first letter in the word. + match iter.next() { + Some('_') | Some('a'..='z') => (), + _ => return false, + }; + + // Match subsequent letters in the word. + let mut count = 0; + loop { + match iter.next() { + // We are done, so the whole expression is valid. + None => return true, + // Valid characters. + Some('_') | Some('-') | Some('a'..='z') | Some('0'..='9') => (), + // We ended a word, so iterate over the outer loop again. + Some('.') => break, + // An invalid character + _ => return false, + } + count += 1; + // We allow 30 characters per word, but the first one is handled + // above outside of this loop, so we have a maximum of 29 here. + if count == 29 { + return false; + } + } + } +} + +/// A labeled metric. +/// +/// Labeled metrics allow to record multiple sub-metrics of the same type under different string labels. +#[derive(Clone, Debug)] +pub struct LabeledMetric<T> { + labels: Option<Vec<String>>, + /// Type of the underlying metric + /// We hold on to an instance of it, which is cloned to create new modified instances. + submetric: T, +} + +impl<T> LabeledMetric<T> +where + T: MetricType + Clone, +{ + /// Creates a new labeled metric from the given metric instance and optional list of labels. + /// + /// See [`get`](LabeledMetric::get) for information on how static or dynamic labels are handled. + pub fn new(submetric: T, labels: Option<Vec<String>>) -> LabeledMetric<T> { + LabeledMetric { labels, submetric } + } + + /// Creates a new metric with a specific label. + /// + /// This is used for static labels where we can just set the name to be `name/label`. + fn new_metric_with_name(&self, name: String) -> T { + let mut t = self.submetric.clone(); + t.meta_mut().name = name; + t + } + + /// Creates a new metric with a specific label. + /// + /// This is used for dynamic labels where we have to actually validate and correct the + /// label later when we have a Glean object. + fn new_metric_with_dynamic_label(&self, label: String) -> T { + let mut t = self.submetric.clone(); + t.meta_mut().dynamic_label = Some(label); + t + } + + /// Creates a static label. + /// + /// # Safety + /// + /// Should only be called when static labels are available on this metric. + /// + /// # Arguments + /// + /// * `label` - The requested label + /// + /// # Returns + /// + /// The requested label if it is in the list of allowed labels. + /// Otherwise `OTHER_LABEL` is returned. + fn static_label<'a>(&self, label: &'a str) -> &'a str { + debug_assert!(self.labels.is_some()); + let labels = self.labels.as_ref().unwrap(); + if labels.iter().any(|l| l == label) { + label + } else { + OTHER_LABEL + } + } + + /// Gets a specific metric for a given label. + /// + /// If a set of acceptable labels were specified in the `metrics.yaml` file, + /// and the given label is not in the set, it will be recorded under the special `OTHER_LABEL` label. + /// + /// If a set of acceptable labels was not specified in the `metrics.yaml` file, + /// only the first 16 unique labels will be used. + /// After that, any additional labels will be recorded under the special `OTHER_LABEL` label. + /// + /// Labels must be `snake_case` and less than 30 characters. + /// If an invalid label is used, the metric will be recorded in the special `OTHER_LABEL` label. + pub fn get(&self, label: &str) -> T { + // We have 2 scenarios to consider: + // * Static labels. No database access needed. We just look at what is in memory. + // * Dynamic labels. We look up in the database all previously stored + // labels in order to keep a maximum of allowed labels. This is done later + // when the specific metric is actually recorded, when we are guaranteed to have + // an initialized Glean object. + match self.labels { + Some(_) => { + let label = self.static_label(label); + self.new_metric_with_name(combine_base_identifier_and_label( + &self.submetric.meta().name, + &label, + )) + } + None => self.new_metric_with_dynamic_label(label.to_string()), + } + } + + /// Gets the template submetric. + /// + /// The template submetric is the actual metric that is cloned and modified + /// to record for a specific label. + pub fn get_submetric(&self) -> &T { + &self.submetric + } +} + +/// Combines a metric's base identifier and label +pub fn combine_base_identifier_and_label(base_identifer: &str, label: &str) -> String { + format!("{}/{}", base_identifer, label) +} + +/// Strips the label off of a complete identifier +pub fn strip_label(identifier: &str) -> &str { + // safe unwrap, first field of a split always valid + identifier.splitn(2, '/').next().unwrap() +} + +/// Validates a dynamic label, changing it to `OTHER_LABEL` if it's invalid. +/// +/// Checks the requested label against limitations, such as the label length and allowed +/// characters. +/// +/// # Arguments +/// +/// * `label` - The requested label +/// +/// # Returns +/// +/// The entire identifier for the metric, including the base identifier and the corrected label. +/// The errors are logged. +pub fn dynamic_label( + glean: &Glean, + meta: &CommonMetricData, + base_identifier: &str, + label: &str, +) -> String { + let key = combine_base_identifier_and_label(base_identifier, label); + for store in &meta.send_in_pings { + if glean.storage().has_metric(meta.lifetime, store, &key) { + return key; + } + } + + let mut label_count = 0; + let prefix = &key[..=base_identifier.len()]; + let mut snapshotter = |_: &[u8], _: &Metric| { + label_count += 1; + }; + + let lifetime = meta.lifetime; + for store in &meta.send_in_pings { + glean + .storage() + .iter_store_from(lifetime, store, Some(&prefix), &mut snapshotter); + } + + let error = if label_count >= MAX_LABELS { + true + } else if label.len() > MAX_LABEL_LENGTH { + let msg = format!( + "label length {} exceeds maximum of {}", + label.len(), + MAX_LABEL_LENGTH + ); + record_error(glean, meta, ErrorType::InvalidLabel, msg, None); + true + } else if !matches_label_regex(label) { + let msg = format!("label must be snake_case, got '{}'", label); + record_error(glean, meta, ErrorType::InvalidLabel, msg, None); + true + } else { + false + }; + + if error { + combine_base_identifier_and_label(base_identifier, OTHER_LABEL) + } else { + key + } +} diff --git a/third_party/rust/glean-core/src/metrics/memory_distribution.rs b/third_party/rust/glean-core/src/metrics/memory_distribution.rs new file mode 100644 index 0000000000..40687e7dc3 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/memory_distribution.rs @@ -0,0 +1,213 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::error_recording::{record_error, ErrorType}; +use crate::histogram::{Functional, Histogram}; +use crate::metrics::memory_unit::MemoryUnit; +use crate::metrics::{DistributionData, Metric, MetricType}; +use crate::storage::StorageManager; +use crate::CommonMetricData; +use crate::Glean; + +// The base of the logarithm used to determine bucketing +const LOG_BASE: f64 = 2.0; + +// The buckets per each order of magnitude of the logarithm. +const BUCKETS_PER_MAGNITUDE: f64 = 16.0; + +// Set a maximum recordable value of 1 terabyte so the buckets aren't +// completely unbounded. +const MAX_BYTES: u64 = 1 << 40; + +/// A memory distribution metric. +/// +/// Memory distributions are used to accumulate and store memory sizes. +#[derive(Debug)] +pub struct MemoryDistributionMetric { + meta: CommonMetricData, + memory_unit: MemoryUnit, +} + +/// Create a snapshot of the histogram. +/// +/// The snapshot can be serialized into the payload format. +pub(crate) fn snapshot(hist: &Histogram<Functional>) -> DistributionData { + DistributionData { + // **Caution**: This cannot use `Histogram::snapshot_values` and needs to use the more + // specialized snapshot function. + values: hist.snapshot(), + sum: hist.sum(), + } +} + +impl MetricType for MemoryDistributionMetric { + fn meta(&self) -> &CommonMetricData { + &self.meta + } + + fn meta_mut(&mut self) -> &mut CommonMetricData { + &mut self.meta + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl MemoryDistributionMetric { + /// Creates a new memory distribution metric. + pub fn new(meta: CommonMetricData, memory_unit: MemoryUnit) -> Self { + Self { meta, memory_unit } + } + + /// Accumulates the provided sample in the metric. + /// + /// # Arguments + /// + /// * `sample` - The sample to be recorded by the metric. The sample is assumed to be in the + /// configured memory unit of the metric. + /// + /// ## Notes + /// + /// Values bigger than 1 Terabyte (2<sup>40</sup> bytes) are truncated + /// and an [`ErrorType::InvalidValue`] error is recorded. + pub fn accumulate(&self, glean: &Glean, sample: u64) { + if !self.should_record(glean) { + return; + } + + let mut sample = self.memory_unit.as_bytes(sample); + + if sample > MAX_BYTES { + let msg = "Sample is bigger than 1 terabyte"; + record_error(glean, &self.meta, ErrorType::InvalidValue, msg, None); + sample = MAX_BYTES; + } + + glean + .storage() + .record_with(glean, &self.meta, |old_value| match old_value { + Some(Metric::MemoryDistribution(mut hist)) => { + hist.accumulate(sample); + Metric::MemoryDistribution(hist) + } + _ => { + let mut hist = Histogram::functional(LOG_BASE, BUCKETS_PER_MAGNITUDE); + hist.accumulate(sample); + Metric::MemoryDistribution(hist) + } + }); + } + + /// Accumulates the provided signed samples in the metric. + /// + /// This is required so that the platform-specific code can provide us with + /// 64 bit signed integers if no `u64` comparable type is available. This + /// will take care of filtering and reporting errors for any provided negative + /// sample. + /// + /// Please note that this assumes that the provided samples are already in + /// the "unit" declared by the instance of the metric type (e.g. if the the + /// instance this method was called on is using [`MemoryUnit::Kilobyte`], then + /// `samples` are assumed to be in that unit). + /// + /// # Arguments + /// + /// * `samples` - The vector holding the samples to be recorded by the metric. + /// + /// ## Notes + /// + /// Discards any negative value in `samples` and report an [`ErrorType::InvalidValue`] + /// for each of them. + /// + /// Values bigger than 1 Terabyte (2<sup>40</sup> bytes) are truncated + /// and an [`ErrorType::InvalidValue`] error is recorded. + pub fn accumulate_samples_signed(&self, glean: &Glean, samples: Vec<i64>) { + if !self.should_record(glean) { + return; + } + + let mut num_negative_samples = 0; + let mut num_too_log_samples = 0; + + glean.storage().record_with(glean, &self.meta, |old_value| { + let mut hist = match old_value { + Some(Metric::MemoryDistribution(hist)) => hist, + _ => Histogram::functional(LOG_BASE, BUCKETS_PER_MAGNITUDE), + }; + + for &sample in samples.iter() { + if sample < 0 { + num_negative_samples += 1; + } else { + let sample = sample as u64; + let mut sample = self.memory_unit.as_bytes(sample); + if sample > MAX_BYTES { + num_too_log_samples += 1; + sample = MAX_BYTES; + } + + hist.accumulate(sample); + } + } + Metric::MemoryDistribution(hist) + }); + + if num_negative_samples > 0 { + let msg = format!("Accumulated {} negative samples", num_negative_samples); + record_error( + glean, + &self.meta, + ErrorType::InvalidValue, + msg, + num_negative_samples, + ); + } + + if num_too_log_samples > 0 { + let msg = format!( + "Accumulated {} samples larger than 1TB", + num_too_log_samples + ); + record_error( + glean, + &self.meta, + ErrorType::InvalidValue, + msg, + num_too_log_samples, + ); + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored value as an integer. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, glean: &Glean, storage_name: &str) -> Option<DistributionData> { + match StorageManager.snapshot_metric( + glean.storage(), + storage_name, + &self.meta.identifier(glean), + self.meta.lifetime, + ) { + Some(Metric::MemoryDistribution(hist)) => Some(snapshot(&hist)), + _ => None, + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently-stored histogram as a JSON String of the serialized value. + /// + /// This doesn't clear the stored value. + pub fn test_get_value_as_json_string( + &self, + glean: &Glean, + storage_name: &str, + ) -> Option<String> { + self.test_get_value(glean, storage_name) + .map(|snapshot| serde_json::to_string(&snapshot).unwrap()) + } +} diff --git a/third_party/rust/glean-core/src/metrics/memory_unit.rs b/third_party/rust/glean-core/src/metrics/memory_unit.rs new file mode 100644 index 0000000000..ce51b975fa --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/memory_unit.rs @@ -0,0 +1,64 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::convert::TryFrom; + +use serde::{Deserialize, Serialize}; + +use crate::error::{Error, ErrorKind}; + +/// Different resolutions supported by the memory related metric types (e.g. +/// MemoryDistributionMetric). +#[derive(Copy, Clone, Debug, Deserialize, Serialize)] +#[serde(rename_all = "lowercase")] +#[repr(i32)] // use i32 to be compatible with our JNA definition +pub enum MemoryUnit { + /// 1 byte + Byte, + /// 2^10 bytes + Kilobyte, + /// 2^20 bytes + Megabyte, + /// 2^30 bytes + Gigabyte, +} + +impl MemoryUnit { + /// Converts a value in the given unit to bytes. + /// + /// # Arguments + /// + /// * `value` - the value to convert. + /// + /// # Returns + /// + /// The integer representation of the byte value. + pub fn as_bytes(self, value: u64) -> u64 { + use MemoryUnit::*; + match self { + Byte => value, + Kilobyte => value << 10, + Megabyte => value << 20, + Gigabyte => value << 30, + } + } +} + +/// Trait implementation for converting an integer value +/// to a [`MemoryUnit`]. This is used in the FFI code. Please +/// note that values should match the ordering of the platform +/// specific side of things (e.g. Kotlin implementation). +impl TryFrom<i32> for MemoryUnit { + type Error = Error; + + fn try_from(value: i32) -> Result<MemoryUnit, Self::Error> { + match value { + 0 => Ok(MemoryUnit::Byte), + 1 => Ok(MemoryUnit::Kilobyte), + 2 => Ok(MemoryUnit::Megabyte), + 3 => Ok(MemoryUnit::Gigabyte), + e => Err(ErrorKind::MemoryUnit(e).into()), + } + } +} diff --git a/third_party/rust/glean-core/src/metrics/mod.rs b/third_party/rust/glean-core/src/metrics/mod.rs new file mode 100644 index 0000000000..ca3acac514 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/mod.rs @@ -0,0 +1,187 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! The different metric types supported by the Glean SDK to handle data. + +use std::collections::HashMap; + +use chrono::{DateTime, FixedOffset}; +use serde::{Deserialize, Serialize}; +use serde_json::{json, Value as JsonValue}; + +mod boolean; +mod counter; +mod custom_distribution; +mod datetime; +mod event; +mod experiment; +mod jwe; +mod labeled; +mod memory_distribution; +mod memory_unit; +mod ping; +mod quantity; +mod string; +mod string_list; +mod time_unit; +mod timespan; +mod timing_distribution; +mod uuid; + +pub use crate::event_database::RecordedEvent; +use crate::histogram::{Functional, Histogram, PrecomputedExponential, PrecomputedLinear}; +pub use crate::metrics::datetime::Datetime; +use crate::util::get_iso_time_string; +use crate::CommonMetricData; +use crate::Glean; + +pub use self::boolean::BooleanMetric; +pub use self::counter::CounterMetric; +pub use self::custom_distribution::CustomDistributionMetric; +pub use self::datetime::DatetimeMetric; +pub use self::event::EventMetric; +pub(crate) use self::experiment::ExperimentMetric; +pub use crate::histogram::HistogramType; +// Note: only expose RecordedExperimentData to tests in +// the next line, so that glean-core\src\lib.rs won't fail to build. +#[cfg(test)] +pub(crate) use self::experiment::RecordedExperimentData; +pub use self::jwe::JweMetric; +pub use self::labeled::{ + combine_base_identifier_and_label, dynamic_label, strip_label, LabeledMetric, +}; +pub use self::memory_distribution::MemoryDistributionMetric; +pub use self::memory_unit::MemoryUnit; +pub use self::ping::PingType; +pub use self::quantity::QuantityMetric; +pub use self::string::StringMetric; +pub use self::string_list::StringListMetric; +pub use self::time_unit::TimeUnit; +pub use self::timespan::TimespanMetric; +pub use self::timing_distribution::TimerId; +pub use self::timing_distribution::TimingDistributionMetric; +pub use self::uuid::UuidMetric; + +/// A snapshot of all buckets and the accumulated sum of a distribution. +#[derive(Debug, Serialize)] +pub struct DistributionData { + /// A map containig the bucket index mapped to the accumulated count. + /// + /// This can contain buckets with a count of `0`. + pub values: HashMap<u64, u64>, + + /// The accumulated sum of all the samples in the distribution. + pub sum: u64, +} + +/// The available metrics. +/// +/// This is the in-memory and persisted layout of a metric. +/// +/// ## Note +/// +/// The order of metrics in this enum is important, as it is used for serialization. +/// Do not reorder the variants. +/// +/// **Any new metric must be added at the end.** +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)] +pub enum Metric { + /// A boolean metric. See [`BooleanMetric`] for more information. + Boolean(bool), + /// A counter metric. See [`CounterMetric`] for more information. + Counter(i32), + /// A custom distribution with precomputed exponential bucketing. + /// See [`CustomDistributionMetric`] for more information. + CustomDistributionExponential(Histogram<PrecomputedExponential>), + /// A custom distribution with precomputed linear bucketing. + /// See [`CustomDistributionMetric`] for more information. + CustomDistributionLinear(Histogram<PrecomputedLinear>), + /// A datetime metric. See [`DatetimeMetric`] for more information. + Datetime(DateTime<FixedOffset>, TimeUnit), + /// An experiment metric. See `ExperimentMetric` for more information. + Experiment(experiment::RecordedExperimentData), + /// A quantity metric. See [`QuantityMetric`] for more information. + Quantity(i64), + /// A string metric. See [`StringMetric`] for more information. + String(String), + /// A string list metric. See [`StringListMetric`] for more information. + StringList(Vec<String>), + /// A UUID metric. See [`UuidMetric`] for more information. + Uuid(String), + /// A timespan metric. See [`TimespanMetric`] for more information. + Timespan(std::time::Duration, TimeUnit), + /// A timing distribution. See [`TimingDistributionMetric`] for more information. + TimingDistribution(Histogram<Functional>), + /// A memory distribution. See [`MemoryDistributionMetric`] for more information. + MemoryDistribution(Histogram<Functional>), + /// A JWE metric. See [`JweMetric`] for more information. + Jwe(String), +} + +/// A [`MetricType`] describes common behavior across all metrics. +pub trait MetricType { + /// Access the stored metadata + fn meta(&self) -> &CommonMetricData; + + /// Access the stored metadata mutable + fn meta_mut(&mut self) -> &mut CommonMetricData; + + /// Whether this metric should currently be recorded + /// + /// This depends on the metrics own state, as determined by its metadata, + /// and whether upload is enabled on the Glean object. + fn should_record(&self, glean: &Glean) -> bool { + glean.is_upload_enabled() && self.meta().should_record() + } +} + +impl Metric { + /// Gets the ping section the metric fits into. + /// + /// This determines the section of the ping to place the metric data in when + /// assembling the ping payload. + pub fn ping_section(&self) -> &'static str { + match self { + Metric::Boolean(_) => "boolean", + Metric::Counter(_) => "counter", + // Custom distributions are in the same section, no matter what bucketing. + Metric::CustomDistributionExponential(_) => "custom_distribution", + Metric::CustomDistributionLinear(_) => "custom_distribution", + Metric::Datetime(_, _) => "datetime", + Metric::Experiment(_) => panic!("Experiments should not be serialized through this"), + Metric::Quantity(_) => "quantity", + Metric::String(_) => "string", + Metric::StringList(_) => "string_list", + Metric::Timespan(..) => "timespan", + Metric::TimingDistribution(_) => "timing_distribution", + Metric::Uuid(_) => "uuid", + Metric::MemoryDistribution(_) => "memory_distribution", + Metric::Jwe(_) => "jwe", + } + } + + /// The JSON representation of the metric's data + pub fn as_json(&self) -> JsonValue { + match self { + Metric::Boolean(b) => json!(b), + Metric::Counter(c) => json!(c), + Metric::CustomDistributionExponential(hist) => { + json!(custom_distribution::snapshot(hist)) + } + Metric::CustomDistributionLinear(hist) => json!(custom_distribution::snapshot(hist)), + Metric::Datetime(d, time_unit) => json!(get_iso_time_string(*d, *time_unit)), + Metric::Experiment(e) => e.as_json(), + Metric::Quantity(q) => json!(q), + Metric::String(s) => json!(s), + Metric::StringList(v) => json!(v), + Metric::Timespan(time, time_unit) => { + json!({"value": time_unit.duration_convert(*time), "time_unit": time_unit}) + } + Metric::TimingDistribution(hist) => json!(timing_distribution::snapshot(hist)), + Metric::Uuid(s) => json!(s), + Metric::MemoryDistribution(hist) => json!(memory_distribution::snapshot(hist)), + Metric::Jwe(s) => json!(s), + } + } +} diff --git a/third_party/rust/glean-core/src/metrics/ping.rs b/third_party/rust/glean-core/src/metrics/ping.rs new file mode 100644 index 0000000000..fd44b06dec --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/ping.rs @@ -0,0 +1,78 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::error::Result; +use crate::Glean; + +/// Stores information about a ping. +/// +/// This is required so that given metric data queued on disk we can send +/// pings with the correct settings, e.g. whether it has a client_id. +#[derive(Clone, Debug)] +pub struct PingType { + /// The name of the ping. + pub name: String, + /// Whether the ping should include the client ID. + pub include_client_id: bool, + /// Whether the ping should be sent if it is empty + pub send_if_empty: bool, + /// The "reason" codes that this ping can send + pub reason_codes: Vec<String>, +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl PingType { + /// Creates a new ping type for the given name, whether to include the client ID and whether to + /// send this ping empty. + /// + /// # Arguments + /// + /// * `name` - The name of the ping. + /// * `include_client_id` - Whether to include the client ID in the assembled ping when submitting. + /// * `send_if_empty` - Whether the ping should be sent empty or not. + /// * `reason_codes` - The valid reason codes for this ping. + pub fn new<A: Into<String>>( + name: A, + include_client_id: bool, + send_if_empty: bool, + reason_codes: Vec<String>, + ) -> Self { + Self { + name: name.into(), + include_client_id, + send_if_empty, + reason_codes, + } + } + + /// Submits the ping for eventual uploading + /// + /// # Arguments + /// + /// * `glean` - the Glean instance to use to send the ping. + /// * `reason` - the reason the ping was triggered. Included in the + /// `ping_info.reason` part of the payload. + /// + /// # Returns + /// + /// See [`Glean::submit_ping`](crate::Glean::submit_ping) for details. + pub fn submit(&self, glean: &Glean, reason: Option<&str>) -> Result<bool> { + let corrected_reason = match reason { + Some(reason) => { + if self.reason_codes.contains(&reason.to_string()) { + Some(reason) + } else { + log::error!("Invalid reason code {} for ping {}", reason, self.name); + None + } + } + None => None, + }; + + glean.submit_ping(self, corrected_reason) + } +} diff --git a/third_party/rust/glean-core/src/metrics/quantity.rs b/third_party/rust/glean-core/src/metrics/quantity.rs new file mode 100644 index 0000000000..128761d4c6 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/quantity.rs @@ -0,0 +1,87 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::error_recording::{record_error, ErrorType}; +use crate::metrics::Metric; +use crate::metrics::MetricType; +use crate::storage::StorageManager; +use crate::CommonMetricData; +use crate::Glean; + +/// A quantity metric. +/// +/// Used to store explicit non-negative integers. +#[derive(Clone, Debug)] +pub struct QuantityMetric { + meta: CommonMetricData, +} + +impl MetricType for QuantityMetric { + fn meta(&self) -> &CommonMetricData { + &self.meta + } + + fn meta_mut(&mut self) -> &mut CommonMetricData { + &mut self.meta + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl QuantityMetric { + /// Creates a new quantity metric. + pub fn new(meta: CommonMetricData) -> Self { + Self { meta } + } + + /// Sets the value. Must be non-negative. + /// + /// # Arguments + /// + /// * `glean` - The Glean instance this metric belongs to. + /// * `value` - The value. Must be non-negative. + /// + /// ## Notes + /// + /// Logs an error if the `value` is negative. + pub fn set(&self, glean: &Glean, value: i64) { + if !self.should_record(glean) { + return; + } + + if value < 0 { + record_error( + glean, + &self.meta, + ErrorType::InvalidValue, + format!("Set negative value {}", value), + None, + ); + return; + } + + glean + .storage() + .record(glean, &self.meta, &Metric::Quantity(value)) + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored value as an integer. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, glean: &Glean, storage_name: &str) -> Option<i64> { + match StorageManager.snapshot_metric( + glean.storage(), + storage_name, + &self.meta.identifier(glean), + self.meta.lifetime, + ) { + Some(Metric::Quantity(i)) => Some(i), + _ => None, + } + } +} diff --git a/third_party/rust/glean-core/src/metrics/string.rs b/third_party/rust/glean-core/src/metrics/string.rs new file mode 100644 index 0000000000..e280d08c32 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/string.rs @@ -0,0 +1,116 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::metrics::Metric; +use crate::metrics::MetricType; +use crate::storage::StorageManager; +use crate::util::truncate_string_at_boundary_with_error; +use crate::CommonMetricData; +use crate::Glean; + +const MAX_LENGTH_VALUE: usize = 100; + +/// A string metric. +/// +/// Record an Unicode string value with arbitrary content. +/// Strings are length-limited to `MAX_LENGTH_VALUE` bytes. +#[derive(Clone, Debug)] +pub struct StringMetric { + meta: CommonMetricData, +} + +impl MetricType for StringMetric { + fn meta(&self) -> &CommonMetricData { + &self.meta + } + + fn meta_mut(&mut self) -> &mut CommonMetricData { + &mut self.meta + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl StringMetric { + /// Creates a new string metric. + pub fn new(meta: CommonMetricData) -> Self { + Self { meta } + } + + /// Sets to the specified value. + /// + /// # Arguments + /// + /// * `glean` - The Glean instance this metric belongs to. + /// * `value` - The string to set the metric to. + /// + /// ## Notes + /// + /// Truncates the value if it is longer than `MAX_LENGTH_VALUE` bytes and logs an error. + pub fn set<S: Into<String>>(&self, glean: &Glean, value: S) { + if !self.should_record(glean) { + return; + } + + let s = truncate_string_at_boundary_with_error(glean, &self.meta, value, MAX_LENGTH_VALUE); + + let value = Metric::String(s); + glean.storage().record(glean, &self.meta, &value) + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored value as a string. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, glean: &Glean, storage_name: &str) -> Option<String> { + match StorageManager.snapshot_metric( + glean.storage(), + storage_name, + &self.meta.identifier(glean), + self.meta.lifetime, + ) { + Some(Metric::String(s)) => Some(s), + _ => None, + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::test_get_num_recorded_errors; + use crate::tests::new_glean; + use crate::util::truncate_string_at_boundary; + use crate::ErrorType; + use crate::Lifetime; + + #[test] + fn setting_a_long_string_records_an_error() { + let (glean, _) = new_glean(None); + + let metric = StringMetric::new(CommonMetricData { + name: "string_metric".into(), + category: "test".into(), + send_in_pings: vec!["store1".into()], + lifetime: Lifetime::Application, + disabled: false, + dynamic_label: None, + }); + + let sample_string = "0123456789".repeat(11); + metric.set(&glean, sample_string.clone()); + + let truncated = truncate_string_at_boundary(sample_string, MAX_LENGTH_VALUE); + assert_eq!(truncated, metric.test_get_value(&glean, "store1").unwrap()); + + assert_eq!( + 1, + test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidOverflow, None) + .unwrap() + ); + } +} diff --git a/third_party/rust/glean-core/src/metrics/string_list.rs b/third_party/rust/glean-core/src/metrics/string_list.rs new file mode 100644 index 0000000000..e61183c018 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/string_list.rs @@ -0,0 +1,161 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::error_recording::{record_error, ErrorType}; +use crate::metrics::Metric; +use crate::metrics::MetricType; +use crate::storage::StorageManager; +use crate::util::truncate_string_at_boundary_with_error; +use crate::CommonMetricData; +use crate::Glean; + +// Maximum length of any list +const MAX_LIST_LENGTH: usize = 20; +// Maximum length of any string in the list +const MAX_STRING_LENGTH: usize = 50; + +/// A string list metric. +/// +/// This allows appending a string value with arbitrary content to a list. +#[derive(Clone, Debug)] +pub struct StringListMetric { + meta: CommonMetricData, +} + +impl MetricType for StringListMetric { + fn meta(&self) -> &CommonMetricData { + &self.meta + } + + fn meta_mut(&mut self) -> &mut CommonMetricData { + &mut self.meta + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl StringListMetric { + /// Creates a new string list metric. + pub fn new(meta: CommonMetricData) -> Self { + Self { meta } + } + + /// Adds a new string to the list. + /// + /// # Arguments + /// + /// * `glean` - The Glean instance this metric belongs to. + /// * `value` - The string to add. + /// + /// ## Notes + /// + /// Truncates the value if it is longer than `MAX_STRING_LENGTH` bytes and logs an error. + pub fn add<S: Into<String>>(&self, glean: &Glean, value: S) { + if !self.should_record(glean) { + return; + } + + let value = + truncate_string_at_boundary_with_error(glean, &self.meta, value, MAX_STRING_LENGTH); + let mut error = None; + glean + .storage() + .record_with(glean, &self.meta, |old_value| match old_value { + Some(Metric::StringList(mut old_value)) => { + if old_value.len() == MAX_LIST_LENGTH { + let msg = format!( + "String list length of {} exceeds maximum of {}", + old_value.len() + 1, + MAX_LIST_LENGTH + ); + error = Some(msg); + } else { + old_value.push(value.clone()); + } + Metric::StringList(old_value) + } + _ => Metric::StringList(vec![value.clone()]), + }); + + if let Some(msg) = error { + record_error(glean, &self.meta, ErrorType::InvalidValue, msg, None); + } + } + + /// Sets to a specific list of strings. + /// + /// # Arguments + /// + /// * `glean` - The Glean instance this metric belongs to. + /// * `value` - The list of string to set the metric to. + /// + /// ## Notes + /// + /// If passed an empty list, records an error and returns. + /// + /// Truncates the list if it is longer than `MAX_LIST_LENGTH` and logs an error. + /// + /// Truncates any value in the list if it is longer than `MAX_STRING_LENGTH` and logs an error. + pub fn set(&self, glean: &Glean, value: Vec<String>) { + if !self.should_record(glean) { + return; + } + + let value = if value.len() > MAX_LIST_LENGTH { + let msg = format!( + "StringList length {} exceeds maximum of {}", + value.len(), + MAX_LIST_LENGTH + ); + record_error(glean, &self.meta, ErrorType::InvalidValue, msg, None); + value[0..MAX_LIST_LENGTH].to_vec() + } else { + value + }; + + let value = value + .into_iter() + .map(|elem| { + truncate_string_at_boundary_with_error(glean, &self.meta, elem, MAX_STRING_LENGTH) + }) + .collect(); + + let value = Metric::StringList(value); + glean.storage().record(glean, &self.meta, &value); + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently-stored values. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, glean: &Glean, storage_name: &str) -> Option<Vec<String>> { + match StorageManager.snapshot_metric( + glean.storage(), + storage_name, + &self.meta.identifier(glean), + self.meta.lifetime, + ) { + Some(Metric::StringList(values)) => Some(values), + _ => None, + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently-stored values as a JSON String of the format + /// ["string1", "string2", ...] + /// + /// This doesn't clear the stored value. + pub fn test_get_value_as_json_string( + &self, + glean: &Glean, + storage_name: &str, + ) -> Option<String> { + self.test_get_value(glean, storage_name) + .map(|values| serde_json::to_string(&values).unwrap()) + } +} diff --git a/third_party/rust/glean-core/src/metrics/time_unit.rs b/third_party/rust/glean-core/src/metrics/time_unit.rs new file mode 100644 index 0000000000..09084527bc --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/time_unit.rs @@ -0,0 +1,117 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::convert::TryFrom; +use std::time::Duration; + +use serde::{Deserialize, Serialize}; + +use crate::error::{Error, ErrorKind}; + +/// Different resolutions supported by the time related +/// metric types (e.g. DatetimeMetric). +#[derive(Copy, Clone, Debug, Deserialize, Serialize, PartialEq)] +#[serde(rename_all = "lowercase")] +#[repr(i32)] // use i32 to be compatible with our JNA definition +pub enum TimeUnit { + /// Truncate to nanosecond precision. + Nanosecond, + /// Truncate to microsecond precision. + Microsecond, + /// Truncate to millisecond precision. + Millisecond, + /// Truncate to second precision. + Second, + /// Truncate to minute precision. + Minute, + /// Truncate to hour precision. + Hour, + /// Truncate to day precision. + Day, +} + +impl TimeUnit { + /// Formats the given time unit, truncating the time if needed. + pub fn format_pattern(self) -> &'static str { + use TimeUnit::*; + match self { + Nanosecond => "%Y-%m-%dT%H:%M:%S%.f%:z", + Microsecond => "%Y-%m-%dT%H:%M:%S%.6f%:z", + Millisecond => "%Y-%m-%dT%H:%M:%S%.3f%:z", + Second => "%Y-%m-%dT%H:%M:%S%:z", + Minute => "%Y-%m-%dT%H:%M%:z", + Hour => "%Y-%m-%dT%H%:z", + Day => "%Y-%m-%d%:z", + } + } + + /// Converts a duration to the requested time unit. + /// + /// # Arguments + /// + /// * `duration` - the duration to convert. + /// + /// # Returns + /// + /// The integer representation of the converted duration. + pub fn duration_convert(self, duration: Duration) -> u64 { + use TimeUnit::*; + match self { + Nanosecond => duration.as_nanos() as u64, + Microsecond => duration.as_micros() as u64, + Millisecond => duration.as_millis() as u64, + Second => duration.as_secs(), + Minute => duration.as_secs() / 60, + Hour => duration.as_secs() / 60 / 60, + Day => duration.as_secs() / 60 / 60 / 24, + } + } + + /// Converts a duration in the given unit to nanoseconds. + /// + /// # Arguments + /// + /// * `duration` - the duration to convert. + /// + /// # Returns + /// + /// The integer representation of the nanosecond duration. + pub fn as_nanos(self, duration: u64) -> u64 { + use TimeUnit::*; + let duration = match self { + Nanosecond => Duration::from_nanos(duration), + Microsecond => Duration::from_micros(duration), + Millisecond => Duration::from_millis(duration), + Second => Duration::from_secs(duration), + Minute => Duration::from_secs(duration * 60), + Hour => Duration::from_secs(duration * 60 * 60), + Day => Duration::from_secs(duration * 60 * 60 * 24), + }; + + duration.as_nanos() as u64 + } +} + +/// Trait implementation for converting an integer value to a TimeUnit. +/// +/// This is used in the FFI code. +/// +/// Please note that values should match the ordering of the +/// platform specific side of things (e.g. Kotlin implementation). +impl TryFrom<i32> for TimeUnit { + type Error = Error; + + fn try_from(value: i32) -> Result<TimeUnit, Self::Error> { + match value { + 0 => Ok(TimeUnit::Nanosecond), + 1 => Ok(TimeUnit::Microsecond), + 2 => Ok(TimeUnit::Millisecond), + 3 => Ok(TimeUnit::Second), + 4 => Ok(TimeUnit::Minute), + 5 => Ok(TimeUnit::Hour), + 6 => Ok(TimeUnit::Day), + e => Err(ErrorKind::TimeUnit(e).into()), + } + } +} diff --git a/third_party/rust/glean-core/src/metrics/timespan.rs b/third_party/rust/glean-core/src/metrics/timespan.rs new file mode 100644 index 0000000000..ef2c329467 --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/timespan.rs @@ -0,0 +1,192 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::time::Duration; + +use crate::error_recording::{record_error, ErrorType}; +use crate::metrics::time_unit::TimeUnit; +use crate::metrics::Metric; +use crate::metrics::MetricType; +use crate::storage::StorageManager; +use crate::CommonMetricData; +use crate::Glean; + +/// A timespan metric. +/// +/// Timespans are used to make a measurement of how much time is spent in a particular task. +#[derive(Debug)] +pub struct TimespanMetric { + meta: CommonMetricData, + time_unit: TimeUnit, + start_time: Option<u64>, +} + +impl MetricType for TimespanMetric { + fn meta(&self) -> &CommonMetricData { + &self.meta + } + + fn meta_mut(&mut self) -> &mut CommonMetricData { + &mut self.meta + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl TimespanMetric { + /// Creates a new timespan metric. + pub fn new(meta: CommonMetricData, time_unit: TimeUnit) -> Self { + Self { + meta, + time_unit, + start_time: None, + } + } + + /// Starts tracking time for the provided metric. + /// + /// This records an error if it's already tracking time (i.e. start was + /// already called with no corresponding + /// [`set_stop`](TimespanMetric::set_stop)): in that case the original start + /// time will be preserved. + pub fn set_start(&mut self, glean: &Glean, start_time: u64) { + if !self.should_record(glean) { + return; + } + + if self.start_time.is_some() { + record_error( + glean, + &self.meta, + ErrorType::InvalidState, + "Timespan already started", + None, + ); + return; + } + + self.start_time = Some(start_time); + } + + /// Stops tracking time for the provided metric. Sets the metric to the elapsed time. + /// + /// This will record an error if no [`set_start`](TimespanMetric::set_start) was called. + pub fn set_stop(&mut self, glean: &Glean, stop_time: u64) { + if !self.should_record(glean) { + // Reset timer when disabled, so that we don't record timespans across + // disabled/enabled toggling. + self.start_time = None; + return; + } + + if self.start_time.is_none() { + record_error( + glean, + &self.meta, + ErrorType::InvalidState, + "Timespan not running", + None, + ); + return; + } + + let start_time = self.start_time.take().unwrap(); + let duration = match stop_time.checked_sub(start_time) { + Some(duration) => duration, + None => { + record_error( + glean, + &self.meta, + ErrorType::InvalidValue, + "Timespan was negative", + None, + ); + return; + } + }; + let duration = Duration::from_nanos(duration); + self.set_raw(glean, duration); + } + + /// Aborts a previous [`set_start`](TimespanMetric::set_start) call. No + /// error is recorded if no [`set_start`](TimespanMetric::set_start) was + /// called. + pub fn cancel(&mut self) { + self.start_time = None; + } + + /// Explicitly sets the timespan value. + /// + /// This API should only be used if your library or application requires + /// recording times in a way that can not make use of + /// [`set_start`](TimespanMetric::set_start)/[`set_stop`](TimespanMetric::set_stop)/[`cancel`](TimespanMetric::cancel). + /// + /// Care should be taken using this if the ping lifetime might contain more + /// than one timespan measurement. To be safe, + /// [`set_raw`](TimespanMetric::set_raw) should generally be followed by + /// sending a custom ping containing the timespan. + /// + /// # Arguments + /// + /// * `elapsed` - The elapsed time to record. + pub fn set_raw(&self, glean: &Glean, elapsed: Duration) { + if !self.should_record(glean) { + return; + } + + if self.start_time.is_some() { + record_error( + glean, + &self.meta, + ErrorType::InvalidState, + "Timespan already running. Raw value not recorded.", + None, + ); + return; + } + + let mut report_value_exists: bool = false; + glean.storage().record_with(glean, &self.meta, |old_value| { + match old_value { + Some(old @ Metric::Timespan(..)) => { + // If some value already exists, report an error. + // We do this out of the storage since recording an + // error accesses the storage as well. + report_value_exists = true; + old + } + _ => Metric::Timespan(elapsed, self.time_unit), + } + }); + + if report_value_exists { + record_error( + glean, + &self.meta, + ErrorType::InvalidState, + "Timespan value already recorded. New value discarded.", + None, + ); + }; + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored value as an integer. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, glean: &Glean, storage_name: &str) -> Option<u64> { + match StorageManager.snapshot_metric( + glean.storage(), + storage_name, + &self.meta.identifier(glean), + self.meta.lifetime, + ) { + Some(Metric::Timespan(time, time_unit)) => Some(time_unit.duration_convert(time)), + _ => None, + } + } +} diff --git a/third_party/rust/glean-core/src/metrics/timing_distribution.rs b/third_party/rust/glean-core/src/metrics/timing_distribution.rs new file mode 100644 index 0000000000..3cb34d330d --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/timing_distribution.rs @@ -0,0 +1,411 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::collections::HashMap; + +use crate::error_recording::{record_error, ErrorType}; +use crate::histogram::{Functional, Histogram}; +use crate::metrics::time_unit::TimeUnit; +use crate::metrics::{DistributionData, Metric, MetricType}; +use crate::storage::StorageManager; +use crate::CommonMetricData; +use crate::Glean; + +// The base of the logarithm used to determine bucketing +const LOG_BASE: f64 = 2.0; + +// The buckets per each order of magnitude of the logarithm. +const BUCKETS_PER_MAGNITUDE: f64 = 8.0; + +// Maximum time, which means we retain a maximum of 316 buckets. +// It is automatically adjusted based on the `time_unit` parameter +// so that: +// +// - `nanosecond` - 10 minutes +// - `microsecond` - ~6.94 days +// - `millisecond` - ~19 years +const MAX_SAMPLE_TIME: u64 = 1000 * 1000 * 1000 * 60 * 10; + +/// Identifier for a running timer. +pub type TimerId = u64; + +#[derive(Debug, Clone)] +struct Timings { + next_id: TimerId, + start_times: HashMap<TimerId, u64>, +} + +/// Track different running timers, identified by a `TimerId`. +impl Timings { + /// Create a new timing manager. + fn new() -> Self { + Self { + next_id: 0, + start_times: HashMap::new(), + } + } + + /// Start a new timer and set it to the `start_time`. + /// + /// Returns a new [`TimerId`] identifying the timer. + fn set_start(&mut self, start_time: u64) -> TimerId { + let id = self.next_id; + self.next_id += 1; + self.start_times.insert(id, start_time); + id + } + + /// Stop the timer and return the elapsed time. + /// + /// Returns an error if the `id` does not correspond to a running timer. + /// Returns an error if the stop time is before the start time. + /// + /// ## Note + /// + /// This API exists to satisfy the FFI requirements, where the clock is handled on the + /// application side and passed in as a timestamp. + fn set_stop(&mut self, id: TimerId, stop_time: u64) -> Result<u64, (ErrorType, &str)> { + let start_time = match self.start_times.remove(&id) { + Some(start_time) => start_time, + None => return Err((ErrorType::InvalidState, "Timing not running")), + }; + + let duration = match stop_time.checked_sub(start_time) { + Some(duration) => duration, + None => { + return Err(( + ErrorType::InvalidValue, + "Timer stopped with negative duration", + )) + } + }; + + Ok(duration) + } + + /// Cancel and remove the timer. + fn cancel(&mut self, id: TimerId) { + self.start_times.remove(&id); + } +} + +/// A timing distribution metric. +/// +/// Timing distributions are used to accumulate and store time measurement, for analyzing distributions of the timing data. +#[derive(Debug)] +pub struct TimingDistributionMetric { + meta: CommonMetricData, + time_unit: TimeUnit, + timings: Timings, +} + +/// Create a snapshot of the histogram with a time unit. +/// +/// The snapshot can be serialized into the payload format. +pub(crate) fn snapshot(hist: &Histogram<Functional>) -> DistributionData { + DistributionData { + // **Caution**: This cannot use `Histogram::snapshot_values` and needs to use the more + // specialized snapshot function. + values: hist.snapshot(), + sum: hist.sum(), + } +} + +impl MetricType for TimingDistributionMetric { + fn meta(&self) -> &CommonMetricData { + &self.meta + } + + fn meta_mut(&mut self) -> &mut CommonMetricData { + &mut self.meta + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl TimingDistributionMetric { + /// Creates a new timing distribution metric. + pub fn new(meta: CommonMetricData, time_unit: TimeUnit) -> Self { + Self { + meta, + time_unit, + timings: Timings::new(), + } + } + + /// Starts tracking time for the provided metric. + /// + /// This records an error if it’s already tracking time (i.e. + /// [`set_start`](TimingDistributionMetric::set_start) was already called with no + /// corresponding [`set_stop_and_accumulate`](TimingDistributionMetric::set_stop_and_accumulate)): in + /// that case the original start time will be preserved. + /// + /// # Arguments + /// + /// * `start_time` - Timestamp in nanoseconds. + /// + /// # Returns + /// + /// A unique [`TimerId`] for the new timer. + pub fn set_start(&mut self, start_time: u64) -> TimerId { + self.timings.set_start(start_time) + } + + /// Stops tracking time for the provided metric and associated timer id. + /// + /// Adds a count to the corresponding bucket in the timing distribution. + /// This will record an error if no + /// [`set_start`](TimingDistributionMetric::set_start) was called. + /// + /// # Arguments + /// + /// * `id` - The [`TimerId`] to associate with this timing. This allows + /// for concurrent timing of events associated with different ids to the + /// same timespan metric. + /// * `stop_time` - Timestamp in nanoseconds. + pub fn set_stop_and_accumulate(&mut self, glean: &Glean, id: TimerId, stop_time: u64) { + // Duration is in nanoseconds. + let mut duration = match self.timings.set_stop(id, stop_time) { + Err((err_type, err_msg)) => { + record_error(glean, &self.meta, err_type, err_msg, None); + return; + } + Ok(duration) => duration, + }; + + let min_sample_time = self.time_unit.as_nanos(1); + let max_sample_time = self.time_unit.as_nanos(MAX_SAMPLE_TIME); + + duration = if duration < min_sample_time { + // If measurement is less than the minimum, just truncate. This is + // not recorded as an error. + min_sample_time + } else if duration > max_sample_time { + let msg = format!( + "Sample is longer than the max for a time_unit of {:?} ({} ns)", + self.time_unit, max_sample_time + ); + record_error(glean, &self.meta, ErrorType::InvalidOverflow, msg, None); + max_sample_time + } else { + duration + }; + + if !self.should_record(glean) { + return; + } + + glean + .storage() + .record_with(glean, &self.meta, |old_value| match old_value { + Some(Metric::TimingDistribution(mut hist)) => { + hist.accumulate(duration); + Metric::TimingDistribution(hist) + } + _ => { + let mut hist = Histogram::functional(LOG_BASE, BUCKETS_PER_MAGNITUDE); + hist.accumulate(duration); + Metric::TimingDistribution(hist) + } + }); + } + + /// Aborts a previous [`set_start`](TimingDistributionMetric::set_start) + /// call. No error is recorded if no + /// [`set_start`](TimingDistributionMetric.set_start) was called. + /// + /// # Arguments + /// + /// * `id` - The [`TimerId`] to associate with this timing. This allows + /// for concurrent timing of events associated with different ids to the + /// same timing distribution metric. + pub fn cancel(&mut self, id: TimerId) { + self.timings.cancel(id); + } + + /// Accumulates the provided signed samples in the metric. + /// + /// This is required so that the platform-specific code can provide us with + /// 64 bit signed integers if no `u64` comparable type is available. This + /// will take care of filtering and reporting errors for any provided negative + /// sample. + /// + /// Please note that this assumes that the provided samples are already in + /// the "unit" declared by the instance of the metric type (e.g. if the + /// instance this method was called on is using [`TimeUnit::Second`], then + /// `samples` are assumed to be in that unit). + /// + /// # Arguments + /// + /// * `samples` - The vector holding the samples to be recorded by the metric. + /// + /// ## Notes + /// + /// Discards any negative value in `samples` and report an [`ErrorType::InvalidValue`] + /// for each of them. Reports an [`ErrorType::InvalidOverflow`] error for samples that + /// are longer than `MAX_SAMPLE_TIME`. + pub fn accumulate_samples_signed(&mut self, glean: &Glean, samples: Vec<i64>) { + if !self.should_record(glean) { + return; + } + + let mut num_negative_samples = 0; + let mut num_too_long_samples = 0; + let max_sample_time = self.time_unit.as_nanos(MAX_SAMPLE_TIME); + + glean.storage().record_with(glean, &self.meta, |old_value| { + let mut hist = match old_value { + Some(Metric::TimingDistribution(hist)) => hist, + _ => Histogram::functional(LOG_BASE, BUCKETS_PER_MAGNITUDE), + }; + + for &sample in samples.iter() { + if sample < 0 { + num_negative_samples += 1; + } else { + let mut sample = sample as u64; + + // Check the range prior to converting the incoming unit to + // nanoseconds, so we can compare against the constant + // MAX_SAMPLE_TIME. + if sample == 0 { + sample = 1; + } else if sample > MAX_SAMPLE_TIME { + num_too_long_samples += 1; + sample = MAX_SAMPLE_TIME; + } + + sample = self.time_unit.as_nanos(sample); + + hist.accumulate(sample); + } + } + + Metric::TimingDistribution(hist) + }); + + if num_negative_samples > 0 { + let msg = format!("Accumulated {} negative samples", num_negative_samples); + record_error( + glean, + &self.meta, + ErrorType::InvalidValue, + msg, + num_negative_samples, + ); + } + + if num_too_long_samples > 0 { + let msg = format!( + "{} samples are longer than the maximum of {}", + num_too_long_samples, max_sample_time + ); + record_error( + glean, + &self.meta, + ErrorType::InvalidOverflow, + msg, + num_too_long_samples, + ); + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored value as an integer. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, glean: &Glean, storage_name: &str) -> Option<DistributionData> { + match StorageManager.snapshot_metric( + glean.storage(), + storage_name, + &self.meta.identifier(glean), + self.meta.lifetime, + ) { + Some(Metric::TimingDistribution(hist)) => Some(snapshot(&hist)), + _ => None, + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently-stored histogram as a JSON String of the serialized value. + /// + /// This doesn't clear the stored value. + pub fn test_get_value_as_json_string( + &self, + glean: &Glean, + storage_name: &str, + ) -> Option<String> { + self.test_get_value(glean, storage_name) + .map(|snapshot| serde_json::to_string(&snapshot).unwrap()) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn can_snapshot() { + use serde_json::json; + + let mut hist = Histogram::functional(2.0, 8.0); + + for i in 1..=10 { + hist.accumulate(i); + } + + let snap = snapshot(&hist); + + let expected_json = json!({ + "sum": 55, + "values": { + "1": 1, + "2": 1, + "3": 1, + "4": 1, + "5": 1, + "6": 1, + "7": 1, + "8": 1, + "9": 1, + "10": 1, + "11": 0, + }, + }); + + assert_eq!(expected_json, json!(snap)); + } + + #[test] + fn can_snapshot_sparse() { + use serde_json::json; + + let mut hist = Histogram::functional(2.0, 8.0); + + hist.accumulate(1024); + hist.accumulate(1024); + hist.accumulate(1116); + hist.accumulate(1448); + + let snap = snapshot(&hist); + + let expected_json = json!({ + "sum": 4612, + "values": { + "1024": 2, + "1116": 1, + "1217": 0, + "1327": 0, + "1448": 1, + "1579": 0, + }, + }); + + assert_eq!(expected_json, json!(snap)); + } +} diff --git a/third_party/rust/glean-core/src/metrics/uuid.rs b/third_party/rust/glean-core/src/metrics/uuid.rs new file mode 100644 index 0000000000..2bfe84cadb --- /dev/null +++ b/third_party/rust/glean-core/src/metrics/uuid.rs @@ -0,0 +1,121 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use uuid::Uuid; + +use crate::error_recording::{record_error, ErrorType}; +use crate::metrics::Metric; +use crate::metrics::MetricType; +use crate::storage::StorageManager; +use crate::CommonMetricData; +use crate::Glean; + +/// An UUID metric. +/// +/// Stores UUID v4 (randomly generated) values. +#[derive(Clone, Debug)] +pub struct UuidMetric { + meta: CommonMetricData, +} + +impl MetricType for UuidMetric { + fn meta(&self) -> &CommonMetricData { + &self.meta + } + + fn meta_mut(&mut self) -> &mut CommonMetricData { + &mut self.meta + } +} + +// IMPORTANT: +// +// When changing this implementation, make sure all the operations are +// also declared in the related trait in `../traits/`. +impl UuidMetric { + /// Creates a new UUID metric + pub fn new(meta: CommonMetricData) -> Self { + Self { meta } + } + + /// Sets to the specified value. + /// + /// # Arguments + /// + /// * `glean` - The Glean instance this metric belongs to. + /// * `value` - The [`Uuid`] to set the metric to. + pub fn set(&self, glean: &Glean, value: Uuid) { + if !self.should_record(glean) { + return; + } + + let s = value.to_string(); + let value = Metric::Uuid(s); + glean.storage().record(glean, &self.meta, &value) + } + + /// Sets to the specified value, from a string. + /// + /// This should only be used from FFI. When calling directly from Rust, it + /// is better to use [`set`](UuidMetric::set). + /// + /// # Arguments + /// + /// * `glean` - The Glean instance this metric belongs to. + /// * `value` - The [`Uuid`] to set the metric to. + pub fn set_from_str(&self, glean: &Glean, value: &str) { + if !self.should_record(glean) { + return; + } + + if let Ok(uuid) = uuid::Uuid::parse_str(&value) { + self.set(glean, uuid); + } else { + let msg = format!("Unexpected UUID value '{}'", value); + record_error(glean, &self.meta, ErrorType::InvalidValue, msg, None); + } + } + + /// Generates a new random [`Uuid`'] and sets the metric to it. + /// + /// # Arguments + /// + /// * `glean` - The Glean instance this metric belongs to. + pub fn generate_and_set(&self, storage: &Glean) -> Uuid { + let uuid = Uuid::new_v4(); + self.set(storage, uuid); + uuid + } + + /// Gets the stored Uuid value. + /// + /// # Arguments + /// + /// * `glean` - the Glean instance this metric belongs to. + /// * `storage_name` - the storage name to look into. + /// + /// # Returns + /// + /// The stored value or `None` if nothing stored. + pub(crate) fn get_value(&self, glean: &Glean, storage_name: &str) -> Option<Uuid> { + match StorageManager.snapshot_metric( + glean.storage(), + storage_name, + &self.meta().identifier(glean), + self.meta.lifetime, + ) { + Some(Metric::Uuid(uuid)) => Uuid::parse_str(&uuid).ok(), + _ => None, + } + } + + /// **Test-only API (exported for FFI purposes).** + /// + /// Gets the currently stored value as a string. + /// + /// This doesn't clear the stored value. + pub fn test_get_value(&self, glean: &Glean, storage_name: &str) -> Option<Uuid> { + self.get_value(glean, storage_name) + } +} diff --git a/third_party/rust/glean-core/src/ping/mod.rs b/third_party/rust/glean-core/src/ping/mod.rs new file mode 100644 index 0000000000..c1315c8b80 --- /dev/null +++ b/third_party/rust/glean-core/src/ping/mod.rs @@ -0,0 +1,392 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Ping collection, assembly & submission. + +use std::fs::{create_dir_all, File}; +use std::io::Write; +use std::path::{Path, PathBuf}; + +use log::info; +use serde_json::{json, Value as JsonValue}; + +use crate::common_metric_data::{CommonMetricData, Lifetime}; +use crate::metrics::{CounterMetric, DatetimeMetric, Metric, MetricType, PingType, TimeUnit}; +use crate::storage::StorageManager; +use crate::util::{get_iso_time_string, local_now_with_offset}; +use crate::{ + Glean, Result, DELETION_REQUEST_PINGS_DIRECTORY, INTERNAL_STORAGE, PENDING_PINGS_DIRECTORY, +}; + +/// Collect a ping's data, assemble it into its full payload and store it on disk. +pub struct PingMaker; + +fn merge(a: &mut JsonValue, b: &JsonValue) { + match (a, b) { + (&mut JsonValue::Object(ref mut a), &JsonValue::Object(ref b)) => { + for (k, v) in b { + merge(a.entry(k.clone()).or_insert(JsonValue::Null), v); + } + } + (a, b) => { + *a = b.clone(); + } + } +} + +impl Default for PingMaker { + fn default() -> Self { + Self::new() + } +} + +impl PingMaker { + /// Creates a new [`PingMaker`]. + pub fn new() -> Self { + Self + } + + /// Gets, and then increments, the sequence number for a given ping. + /// + /// This is crate-internal exclusively for enabling the migration tests. + pub(super) fn get_ping_seq(&self, glean: &Glean, storage_name: &str) -> usize { + // Sequence numbers are stored as a counter under a name that includes the storage name + let seq = CounterMetric::new(CommonMetricData { + name: format!("{}#sequence", storage_name), + // We don't need a category, the name is already unique + category: "".into(), + send_in_pings: vec![INTERNAL_STORAGE.into()], + lifetime: Lifetime::User, + ..Default::default() + }); + + let current_seq = match StorageManager.snapshot_metric( + glean.storage(), + INTERNAL_STORAGE, + &seq.meta().identifier(glean), + seq.meta().lifetime, + ) { + Some(Metric::Counter(i)) => i, + _ => 0, + }; + + // Increase to next sequence id + seq.add(glean, 1); + + current_seq as usize + } + + /// Gets the formatted start and end times for this ping and update for the next ping. + fn get_start_end_times(&self, glean: &Glean, storage_name: &str) -> (String, String) { + let time_unit = TimeUnit::Minute; + + let start_time = DatetimeMetric::new( + CommonMetricData { + name: format!("{}#start", storage_name), + category: "".into(), + send_in_pings: vec![INTERNAL_STORAGE.into()], + lifetime: Lifetime::User, + ..Default::default() + }, + time_unit, + ); + + // "start_time" is the time the ping was generated the last time. + // If not available, we use the date the Glean object was initialized. + let start_time_data = start_time + .get_value(glean, INTERNAL_STORAGE) + .unwrap_or_else(|| glean.start_time()); + let end_time_data = local_now_with_offset(); + + // Update the start time with the current time. + start_time.set(glean, Some(end_time_data)); + + // Format the times. + let start_time_data = get_iso_time_string(start_time_data, time_unit); + let end_time_data = get_iso_time_string(end_time_data, time_unit); + (start_time_data, end_time_data) + } + + fn get_ping_info(&self, glean: &Glean, storage_name: &str, reason: Option<&str>) -> JsonValue { + let (start_time, end_time) = self.get_start_end_times(glean, storage_name); + let mut map = json!({ + "seq": self.get_ping_seq(glean, storage_name), + "start_time": start_time, + "end_time": end_time, + }); + + if let Some(reason) = reason { + map.as_object_mut() + .unwrap() // safe unwrap, we created the object above + .insert("reason".to_string(), JsonValue::String(reason.to_string())); + }; + + // Get the experiment data, if available. + if let Some(experiment_data) = + StorageManager.snapshot_experiments_as_json(glean.storage(), INTERNAL_STORAGE) + { + map.as_object_mut() + .unwrap() // safe unwrap, we created the object above + .insert("experiments".to_string(), experiment_data); + }; + + map + } + + fn get_client_info(&self, glean: &Glean, include_client_id: bool) -> JsonValue { + // Add the "telemetry_sdk_build", which is the glean-core version. + let mut map = json!({ + "telemetry_sdk_build": crate::GLEAN_VERSION, + }); + + // Flatten the whole thing. + if let Some(client_info) = + StorageManager.snapshot_as_json(glean.storage(), "glean_client_info", true) + { + let client_info_obj = client_info.as_object().unwrap(); // safe unwrap, snapshot always returns an object. + for (_key, value) in client_info_obj { + merge(&mut map, value); + } + } else { + log::warn!("Empty client info data."); + } + + if !include_client_id { + // safe unwrap, we created the object above + map.as_object_mut().unwrap().remove("client_id"); + } + + json!(map) + } + + /// Build the metadata JSON to be persisted with a ping. + /// + /// Currently the only type of metadata we need to persist is the value of the `X-Debug-ID` header. + /// + /// # Arguments + /// + /// * `glean` - the [`Glean`] instance to collect metadata from. + /// + /// # Returns + /// + /// A JSON object representing the metadata that needs to be persisted with this ping. + /// + /// The structure of the metadata json is: + /// + /// ```json + /// { + /// "headers": { + /// "X-Debug-ID": "test-tag" + /// } + /// } + /// ``` + fn get_metadata(&self, glean: &Glean) -> Option<JsonValue> { + let mut headers_map = json!({}); + + if let Some(debug_view_tag) = glean.debug_view_tag() { + headers_map + .as_object_mut() + .unwrap() // safe unwrap, we created the object above + .insert( + "X-Debug-ID".to_string(), + JsonValue::String(debug_view_tag.to_string()), + ); + } + + if let Some(source_tags) = glean.source_tags() { + headers_map + .as_object_mut() + .unwrap() // safe unwrap, we created the object above + .insert( + "X-Source-Tags".to_string(), + JsonValue::String(source_tags.join(",")), + ); + } + + // safe unwrap, we created the object above + if !headers_map.as_object().unwrap().is_empty() { + Some(json!({ + "headers": headers_map, + })) + } else { + None + } + } + + /// Collects a snapshot for the given ping from storage and attach required meta information. + /// + /// # Arguments + /// + /// * `glean` - the [`Glean`] instance to collect data from. + /// * `ping` - the ping to collect for. + /// * `reason` - an optional reason code to include in the ping. + /// + /// # Returns + /// + /// A fully assembled JSON representation of the ping payload. + /// If there is no data stored for the ping, `None` is returned. + pub fn collect( + &self, + glean: &Glean, + ping: &PingType, + reason: Option<&str>, + ) -> Option<JsonValue> { + info!("Collecting {}", ping.name); + + let metrics_data = StorageManager.snapshot_as_json(glean.storage(), &ping.name, true); + let events_data = glean.event_storage().snapshot_as_json(&ping.name, true); + + let is_empty = metrics_data.is_none() && events_data.is_none(); + if !ping.send_if_empty && is_empty { + info!("Storage for {} empty. Bailing out.", ping.name); + return None; + } else if is_empty { + info!("Storage for {} empty. Ping will still be sent.", ping.name); + } + + let ping_info = self.get_ping_info(glean, &ping.name, reason); + let client_info = self.get_client_info(glean, ping.include_client_id); + + let mut json = json!({ + "ping_info": ping_info, + "client_info": client_info + }); + let json_obj = json.as_object_mut()?; + if let Some(metrics_data) = metrics_data { + json_obj.insert("metrics".to_string(), metrics_data); + } + if let Some(events_data) = events_data { + json_obj.insert("events".to_string(), events_data); + } + + Some(json) + } + + /// Collects a snapshot for the given ping from storage and attach required meta information. + /// + /// # Arguments + /// + /// * `glean` - the [`Glean`] instance to collect data from. + /// * `ping` - the ping to collect for. + /// * `reason` - an optional reason code to include in the ping. + /// + /// # Returns + /// + /// A fully assembled ping payload in a string encoded as JSON. + /// If there is no data stored for the ping, `None` is returned. + pub fn collect_string( + &self, + glean: &Glean, + ping: &PingType, + reason: Option<&str>, + ) -> Option<String> { + self.collect(glean, ping, reason) + .map(|ping| ::serde_json::to_string_pretty(&ping).unwrap()) + } + + /// Gets the path to a directory for ping storage. + /// + /// The directory will be created inside the `data_path`. + /// The `pings` directory (and its parents) is created if it does not exist. + fn get_pings_dir(&self, data_path: &Path, ping_type: Option<&str>) -> std::io::Result<PathBuf> { + // Use a special directory for deletion-request pings + let pings_dir = match ping_type { + Some(ping_type) if ping_type == "deletion-request" => { + data_path.join(DELETION_REQUEST_PINGS_DIRECTORY) + } + _ => data_path.join(PENDING_PINGS_DIRECTORY), + }; + + create_dir_all(&pings_dir)?; + Ok(pings_dir) + } + + /// Gets path to a directory for temporary storage. + /// + /// The directory will be created inside the `data_path`. + /// The `tmp` directory (and its parents) is created if it does not exist. + fn get_tmp_dir(&self, data_path: &Path) -> std::io::Result<PathBuf> { + let pings_dir = data_path.join("tmp"); + create_dir_all(&pings_dir)?; + Ok(pings_dir) + } + + /// Stores a ping to disk in the pings directory. + pub fn store_ping( + &self, + glean: &Glean, + doc_id: &str, + ping_name: &str, + data_path: &Path, + url_path: &str, + ping_content: &JsonValue, + ) -> std::io::Result<()> { + let pings_dir = self.get_pings_dir(data_path, Some(ping_name))?; + let temp_dir = self.get_tmp_dir(data_path)?; + + // Write to a temporary location and then move when done, + // for transactional writes. + let temp_ping_path = temp_dir.join(doc_id); + let ping_path = pings_dir.join(doc_id); + + log::debug!("Storing ping '{}' at '{}'", doc_id, ping_path.display()); + + { + let mut file = File::create(&temp_ping_path)?; + file.write_all(url_path.as_bytes())?; + file.write_all(b"\n")?; + file.write_all(::serde_json::to_string(ping_content)?.as_bytes())?; + if let Some(metadata) = self.get_metadata(glean) { + file.write_all(b"\n")?; + file.write_all(::serde_json::to_string(&metadata)?.as_bytes())?; + } + } + + if let Err(e) = std::fs::rename(&temp_ping_path, &ping_path) { + log::warn!( + "Unable to move '{}' to '{}", + temp_ping_path.display(), + ping_path.display() + ); + return Err(e); + } + + Ok(()) + } + + /// Clears any pending pings in the queue. + pub fn clear_pending_pings(&self, data_path: &Path) -> Result<()> { + let pings_dir = self.get_pings_dir(data_path, None)?; + + std::fs::remove_dir_all(&pings_dir)?; + create_dir_all(&pings_dir)?; + + log::debug!("All pending pings deleted"); + + Ok(()) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::tests::new_glean; + + #[test] + fn sequence_numbers_should_be_reset_when_toggling_uploading() { + let (mut glean, _) = new_glean(None); + let ping_maker = PingMaker::new(); + + assert_eq!(0, ping_maker.get_ping_seq(&glean, "custom")); + assert_eq!(1, ping_maker.get_ping_seq(&glean, "custom")); + + glean.set_upload_enabled(false); + assert_eq!(0, ping_maker.get_ping_seq(&glean, "custom")); + assert_eq!(0, ping_maker.get_ping_seq(&glean, "custom")); + + glean.set_upload_enabled(true); + assert_eq!(0, ping_maker.get_ping_seq(&glean, "custom")); + assert_eq!(1, ping_maker.get_ping_seq(&glean, "custom")); + } +} diff --git a/third_party/rust/glean-core/src/storage/mod.rs b/third_party/rust/glean-core/src/storage/mod.rs new file mode 100644 index 0000000000..144b37ff7b --- /dev/null +++ b/third_party/rust/glean-core/src/storage/mod.rs @@ -0,0 +1,257 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +#![allow(non_upper_case_globals)] + +//! Storage snapshotting. + +use std::collections::HashMap; + +use serde_json::{json, Value as JsonValue}; + +use crate::database::Database; +use crate::metrics::Metric; +use crate::Lifetime; + +/// Snapshot metrics from the underlying database. +pub struct StorageManager; + +/// Labeled metrics are stored as `<metric id>/<label>`. +/// They need to go into a nested object in the final snapshot. +/// +/// We therefore extract the metric id and the label from the key and construct the new object or +/// add to it. +fn snapshot_labeled_metrics( + snapshot: &mut HashMap<String, HashMap<String, JsonValue>>, + metric_id: &str, + metric: &Metric, +) { + let ping_section = format!("labeled_{}", metric.ping_section()); + let map = snapshot.entry(ping_section).or_insert_with(HashMap::new); + + let mut s = metric_id.splitn(2, '/'); + let metric_id = s.next().unwrap(); // Safe unwrap, the function is only called when the id does contain a '/' + let label = s.next().unwrap(); // Safe unwrap, the function is only called when the name does contain a '/' + + let obj = map.entry(metric_id.into()).or_insert_with(|| json!({})); + let obj = obj.as_object_mut().unwrap(); // safe unwrap, we constructed the object above + obj.insert(label.into(), metric.as_json()); +} + +impl StorageManager { + /// Snapshots the given store and optionally clear it. + /// + /// # Arguments + /// + /// * `storage` - the database to read from. + /// * `store_name` - the store to snapshot. + /// * `clear_store` - whether to clear the data after snapshotting. + /// + /// # Returns + /// + /// The stored data in a string encoded as JSON. + /// If no data for the store exists, `None` is returned. + pub fn snapshot( + &self, + storage: &Database, + store_name: &str, + clear_store: bool, + ) -> Option<String> { + self.snapshot_as_json(storage, store_name, clear_store) + .map(|data| ::serde_json::to_string_pretty(&data).unwrap()) + } + + /// Snapshots the given store and optionally clear it. + /// + /// # Arguments + /// + /// * `storage` - the database to read from. + /// * `store_name` - the store to snapshot. + /// * `clear_store` - whether to clear the data after snapshotting. + /// + /// # Returns + /// + /// A JSON representation of the stored data. + /// If no data for the store exists, `None` is returned. + pub fn snapshot_as_json( + &self, + storage: &Database, + store_name: &str, + clear_store: bool, + ) -> Option<JsonValue> { + let mut snapshot: HashMap<String, HashMap<String, JsonValue>> = HashMap::new(); + + let mut snapshotter = |metric_id: &[u8], metric: &Metric| { + let metric_id = String::from_utf8_lossy(metric_id).into_owned(); + if metric_id.contains('/') { + snapshot_labeled_metrics(&mut snapshot, &metric_id, &metric); + } else { + let map = snapshot + .entry(metric.ping_section().into()) + .or_insert_with(HashMap::new); + map.insert(metric_id, metric.as_json()); + } + }; + + storage.iter_store_from(Lifetime::Ping, &store_name, None, &mut snapshotter); + storage.iter_store_from(Lifetime::Application, &store_name, None, &mut snapshotter); + storage.iter_store_from(Lifetime::User, &store_name, None, &mut snapshotter); + + if clear_store { + if let Err(e) = storage.clear_ping_lifetime_storage(store_name) { + log::warn!("Failed to clear lifetime storage: {:?}", e); + } + } + + if snapshot.is_empty() { + None + } else { + Some(json!(snapshot)) + } + } + + /// Gets the current value of a single metric identified by name. + /// + /// This look for a value in stores for all lifetimes. + /// + /// # Arguments + /// + /// * `storage` - The database to get data from. + /// * `store_name` - The store name to look into. + /// * `metric_id` - The full metric identifier. + /// + /// # Returns + /// + /// The decoded metric or `None` if no data is found. + pub fn snapshot_metric( + &self, + storage: &Database, + store_name: &str, + metric_id: &str, + metric_lifetime: Lifetime, + ) -> Option<Metric> { + let mut snapshot: Option<Metric> = None; + + let mut snapshotter = |id: &[u8], metric: &Metric| { + let id = String::from_utf8_lossy(id).into_owned(); + if id == metric_id { + snapshot = Some(metric.clone()) + } + }; + + storage.iter_store_from(metric_lifetime, &store_name, None, &mut snapshotter); + + snapshot + } + + /// Snapshots the experiments. + /// + /// # Arguments + /// + /// * `storage` - The database to get data from. + /// * `store_name` - The store name to look into. + /// + /// # Returns + /// + /// A JSON representation of the experiment data, in the following format: + /// + /// ```json + /// { + /// "experiment-id": { + /// "branch": "branch-id", + /// "extra": { + /// "additional": "property", + /// // ... + /// } + /// } + /// } + /// ``` + /// + /// If no data for the store exists, `None` is returned. + pub fn snapshot_experiments_as_json( + &self, + storage: &Database, + store_name: &str, + ) -> Option<JsonValue> { + let mut snapshot: HashMap<String, JsonValue> = HashMap::new(); + + let mut snapshotter = |metric_id: &[u8], metric: &Metric| { + let metric_id = String::from_utf8_lossy(metric_id).into_owned(); + if metric_id.ends_with("#experiment") { + let name = metric_id.splitn(2, '#').next().unwrap(); // safe unwrap, first field of a split always valid + snapshot.insert(name.to_string(), metric.as_json()); + } + }; + + storage.iter_store_from(Lifetime::Application, store_name, None, &mut snapshotter); + + if snapshot.is_empty() { + None + } else { + Some(json!(snapshot)) + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::metrics::ExperimentMetric; + use crate::Glean; + + // Experiment's API tests: the next test comes from glean-ac's + // ExperimentsStorageEngineTest.kt. + #[test] + fn test_experiments_json_serialization() { + let t = tempfile::tempdir().unwrap(); + let name = t.path().display().to_string(); + let glean = Glean::with_options(&name, "org.mozilla.glean", true); + + let extra: HashMap<String, String> = [("test-key".into(), "test-value".into())] + .iter() + .cloned() + .collect(); + + let metric = ExperimentMetric::new(&glean, "some-experiment".to_string()); + + metric.set_active(&glean, "test-branch".to_string(), Some(extra)); + let snapshot = StorageManager + .snapshot_experiments_as_json(glean.storage(), "glean_internal_info") + .unwrap(); + assert_eq!( + json!({"some-experiment": {"branch": "test-branch", "extra": {"test-key": "test-value"}}}), + snapshot + ); + + metric.set_inactive(&glean); + + let empty_snapshot = + StorageManager.snapshot_experiments_as_json(glean.storage(), "glean_internal_info"); + assert!(empty_snapshot.is_none()); + } + + #[test] + fn test_experiments_json_serialization_empty() { + let t = tempfile::tempdir().unwrap(); + let name = t.path().display().to_string(); + let glean = Glean::with_options(&name, "org.mozilla.glean", true); + + let metric = ExperimentMetric::new(&glean, "some-experiment".to_string()); + + metric.set_active(&glean, "test-branch".to_string(), None); + let snapshot = StorageManager + .snapshot_experiments_as_json(glean.storage(), "glean_internal_info") + .unwrap(); + assert_eq!( + json!({"some-experiment": {"branch": "test-branch"}}), + snapshot + ); + + metric.set_inactive(&glean); + + let empty_snapshot = + StorageManager.snapshot_experiments_as_json(glean.storage(), "glean_internal_info"); + assert!(empty_snapshot.is_none()); + } +} diff --git a/third_party/rust/glean-core/src/system.rs b/third_party/rust/glean-core/src/system.rs new file mode 100644 index 0000000000..1a8c10a4c3 --- /dev/null +++ b/third_party/rust/glean-core/src/system.rs @@ -0,0 +1,81 @@ +// Copyright (c) 2017 The Rust Project Developers +// Licensed under the MIT License. +// Original license: +// https://github.com/RustSec/platforms-crate/blob/ebbd3403243067ba3096f31684557285e352b639/LICENSE-MIT +// +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the +// Software without restriction, including without +// limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice +// shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + +//! Detect and expose `target_os` as a constant. +//! +//! Code adopted from the "platforms" crate: <https://github.com/RustSec/platforms-crate>. + +#[cfg(target_os = "android")] +/// `target_os` when building this crate: `android` +pub const OS: &str = "Android"; + +#[cfg(target_os = "ios")] +/// `target_os` when building this crate: `ios` +pub const OS: &str = "iOS"; + +#[cfg(target_os = "linux")] +/// `target_os` when building this crate: `linux` +pub const OS: &str = "Linux"; + +#[cfg(target_os = "macos")] +/// `target_os` when building this crate: `macos` +pub const OS: &str = "Darwin"; + +#[cfg(target_os = "windows")] +/// `target_os` when building this crate: `windows` +pub const OS: &str = "Windows"; + +#[cfg(target_os = "freebsd")] +/// `target_os` when building this crate: `freebsd` +pub const OS: &str = "FreeBSD"; + +#[cfg(target_os = "netbsd")] +/// `target_os` when building this crate: `netbsd` +pub const OS: &str = "NetBSD"; + +#[cfg(target_os = "openbsd")] +/// `target_os` when building this crate: `openbsd` +pub const OS: &str = "OpenBSD"; + +#[cfg(target_os = "solaris")] +/// `target_os` when building this crate: `solaris` +pub const OS: &str = "Solaris"; + +#[cfg(not(any( + target_os = "android", + target_os = "ios", + target_os = "linux", + target_os = "macos", + target_os = "windows", + target_os = "freebsd", + target_os = "netbsd", + target_os = "openbsd", + target_os = "solaris", +)))] +pub const OS: &str = "unknown"; diff --git a/third_party/rust/glean-core/src/traits/boolean.rs b/third_party/rust/glean-core/src/traits/boolean.rs new file mode 100644 index 0000000000..4443fc4e08 --- /dev/null +++ b/third_party/rust/glean-core/src/traits/boolean.rs @@ -0,0 +1,28 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +/// A description for the [`BooleanMetric`](crate::metrics::BooleanMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait Boolean { + /// Sets to the specified boolean value. + /// + /// # Arguments + /// + /// * `value` - the value to set. + fn set(&self, value: bool); + + /// **Exported for test purposes.** + /// + /// Gets the currently stored value as a boolean. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>(&self, ping_name: S) -> Option<bool>; +} diff --git a/third_party/rust/glean-core/src/traits/counter.rs b/third_party/rust/glean-core/src/traits/counter.rs new file mode 100644 index 0000000000..673730fc2c --- /dev/null +++ b/third_party/rust/glean-core/src/traits/counter.rs @@ -0,0 +1,53 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::ErrorType; + +/// A description for the [`CounterMetric`](crate::metrics::CounterMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait Counter { + /// Increases the counter by `amount`. + /// + /// # Arguments + /// + /// * `amount` - The amount to increase by. Should be positive. + /// + /// ## Notes + /// + /// Logs an error if the `amount` is 0 or negative. + fn add(&self, amount: i32); + + /// **Exported for test purposes.** + /// + /// Gets the currently stored value as an integer. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>(&self, ping_name: S) -> Option<i32>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + fn test_get_num_recorded_errors<'a, S: Into<Option<&'a str>>>( + &self, + error: ErrorType, + ping_name: S, + ) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/custom_distribution.rs b/third_party/rust/glean-core/src/traits/custom_distribution.rs new file mode 100644 index 0000000000..12e2ef3061 --- /dev/null +++ b/third_party/rust/glean-core/src/traits/custom_distribution.rs @@ -0,0 +1,64 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::ErrorType; + +/// A description for the +/// [`CustomDistributionMetric`](crate::metrics::CustomDistributionMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait CustomDistribution { + /// Accumulates the provided signed samples in the metric. + /// + /// This is required so that the platform-specific code can provide us with + /// 64 bit signed integers if no `u64` comparable type is available. This + /// will take care of filtering and reporting errors for any provided negative + /// sample. + /// + /// # Arguments + /// + /// - `samples` - The vector holding the samples to be recorded by the metric. + /// + /// ## Notes + /// + /// Discards any negative value in `samples` and report an + /// [`ErrorType::InvalidValue`](crate::ErrorType::InvalidValue) for each of + /// them. + fn accumulate_samples_signed(&self, samples: Vec<i64>); + + /// **Exported for test purposes.** + /// + /// Gets the currently stored histogram. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>( + &self, + ping_name: S, + ) -> Option<crate::metrics::DistributionData>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors recorded. + fn test_get_num_recorded_errors<'a, S: Into<Option<&'a str>>>( + &self, + error: ErrorType, + ping_name: S, + ) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/datetime.rs b/third_party/rust/glean-core/src/traits/datetime.rs new file mode 100644 index 0000000000..78a9c02dce --- /dev/null +++ b/third_party/rust/glean-core/src/traits/datetime.rs @@ -0,0 +1,58 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +#![allow(clippy::too_many_arguments)] + +use crate::ErrorType; + +/// A description for the [`DatetimeMetric`](crate::metrics::DatetimeMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait Datetime { + /// Sets the metric to a date/time which including the timezone offset. + /// + /// # Arguments + /// + /// * `value` - Some [`Datetime`](crate::metrics::Datetime), with offset, to + /// set the metric to. If [`None`], the current local time is + /// used. + fn set(&self, value: Option<crate::metrics::Datetime>); + + /// **Exported for test purposes.** + /// + /// Gets the currently stored value as a Datetime. + /// + /// The precision of this value is truncated to the `time_unit` precision. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>( + &self, + ping_name: S, + ) -> Option<crate::metrics::Datetime>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + fn test_get_num_recorded_errors<'a, S: Into<Option<&'a str>>>( + &self, + error: ErrorType, + ping_name: S, + ) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/event.rs b/third_party/rust/glean-core/src/traits/event.rs new file mode 100644 index 0000000000..08277b8cf4 --- /dev/null +++ b/third_party/rust/glean-core/src/traits/event.rs @@ -0,0 +1,129 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::collections::HashMap; +use std::convert::TryFrom; +use std::hash::Hash; + +use crate::event_database::RecordedEvent; +use crate::ErrorType; + +/// Extra keys for events. +/// +/// Extra keys need to be pre-defined and map to a string representation. +/// +/// For user-defined `EventMetric`s these will be defined as `enums`. +/// Each variant will correspond to an entry in the `ALLOWED_KEYS` list. +/// The Glean SDK requires the keys as strings for submission in pings, +/// whereas in code we want to provide users a type to work with +/// (e.g. to avoid typos or misuse of the API). +pub trait ExtraKeys: Hash + Eq + PartialEq + Copy { + /// List of allowed extra keys as strings. + const ALLOWED_KEYS: &'static [&'static str]; + + /// The index of the extra key. + /// + /// It corresponds to its position in the associated `ALLOWED_KEYS` list. + /// + /// *Note*: An index of `-1` indicates an invalid / non-existing extra key. + /// Invalid / non-existing extra keys will be recorded as an error. + /// This cannot happen for generated code. + fn index(self) -> i32; +} + +/// Default of no extra keys for events. +/// +/// An enum with no values for convenient use as the default set of extra keys +/// that an [`EventMetric`](crate::metrics::EventMetric) can accept. +/// +/// *Note*: There exist no values for this enum, it can never exist. +/// It its equivalent to the [`never / !` type](https://doc.rust-lang.org/std/primitive.never.html). +#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)] +pub enum NoExtraKeys {} + +impl ExtraKeys for NoExtraKeys { + const ALLOWED_KEYS: &'static [&'static str] = &[]; + + fn index(self) -> i32 { + // This index will never be used. + -1 + } +} + +/// The possible errors when parsing to an extra key. +pub enum EventRecordingError { + /// The id doesn't correspond to a valid extra key + InvalidId, + /// The value doesn't correspond to a valid extra key + InvalidExtraKey, +} + +impl TryFrom<i32> for NoExtraKeys { + type Error = EventRecordingError; + + fn try_from(_value: i32) -> Result<Self, Self::Error> { + Err(EventRecordingError::InvalidExtraKey) + } +} + +impl TryFrom<&str> for NoExtraKeys { + type Error = EventRecordingError; + + fn try_from(_value: &str) -> Result<Self, Self::Error> { + Err(EventRecordingError::InvalidExtraKey) + } +} + +/// A description for the [`EventMetric`](crate::metrics::EventMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait Event { + /// The type of the allowed extra keys for this event. + type Extra: ExtraKeys; + + /// Records an event. + /// + /// # Arguments + /// + /// * `extra` - A [`HashMap`] of (key, value) pairs. The key is one of the allowed extra keys as + /// set in the metric defintion. + /// If a wrong key is used or a value is larger than allowed, an error is reported + /// and no event is recorded. + fn record<M: Into<Option<HashMap<Self::Extra, String>>>>(&self, extra: M); + + /// **Exported for test purposes.** + /// + /// Get the vector of currently stored events for this event metric. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>( + &self, + ping_name: S, + ) -> Option<Vec<RecordedEvent>>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + fn test_get_num_recorded_errors<'a, S: Into<Option<&'a str>>>( + &self, + error: ErrorType, + ping_name: S, + ) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/jwe.rs b/third_party/rust/glean-core/src/traits/jwe.rs new file mode 100644 index 0000000000..a994f93edf --- /dev/null +++ b/third_party/rust/glean-core/src/traits/jwe.rs @@ -0,0 +1,54 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +/// A description for the [`JweMetric`](crate::metrics::JweMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait Jwe { + /// Sets to the specified JWE value. + /// + /// # Arguments + /// + /// * `value` - the [`compact representation`](https://tools.ietf.org/html/rfc7516#appendix-A.2.7) of a JWE value. + fn set_with_compact_representation<S: Into<String>>(&self, value: S); + + /// Builds a JWE value from its elements and set to it. + /// + /// # Arguments + /// + /// * `header` - the JWE Protected Header element. + /// * `key` - the JWE Encrypted Key element. + /// * `init_vector` - the JWE Initialization Vector element. + /// * `cipher_text` - the JWE Ciphertext element. + /// * `auth_tag` - the JWE Authentication Tag element. + fn set<S: Into<String>>(&self, header: S, key: S, init_vector: S, cipher_text: S, auth_tag: S); + + /// **Exported for test purposes.** + /// + /// Gets the currently stored value as a string. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>(&self, ping_name: S) -> Option<String>; + + /// **Exported for test purposes.** + /// + /// Gets the currently stored JWE as a JSON String of the serialized value. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value_as_json_string<'a, S: Into<Option<&'a str>>>( + &self, + ping_name: S, + ) -> Option<String>; +} diff --git a/third_party/rust/glean-core/src/traits/labeled.rs b/third_party/rust/glean-core/src/traits/labeled.rs new file mode 100644 index 0000000000..25ab573733 --- /dev/null +++ b/third_party/rust/glean-core/src/traits/labeled.rs @@ -0,0 +1,46 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::ErrorType; + +/// A description for the [`LabeledMetric`](crate::metrics::LabeledMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait Labeled<T> +where + T: Clone, +{ + /// Gets a specific metric for a given label. + /// + /// If a set of acceptable labels were specified in the `metrics.yaml` file, + /// and the given label is not in the set, it will be recorded under the special `OTHER_LABEL` label. + /// + /// If a set of acceptable labels was not specified in the `metrics.yaml` file, + /// only the first 16 unique labels will be used. + /// After that, any additional labels will be recorded under the special `OTHER_LABEL` label. + /// + /// Labels must be `snake_case` and less than 30 characters. + /// If an invalid label is used, the metric will be recorded in the special `OTHER_LABEL` label. + fn get(&self, label: &str) -> T; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + fn test_get_num_recorded_errors<'a, S: Into<Option<&'a str>>>( + &self, + error: ErrorType, + ping_name: S, + ) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/memory_distribution.rs b/third_party/rust/glean-core/src/traits/memory_distribution.rs new file mode 100644 index 0000000000..93f0f83210 --- /dev/null +++ b/third_party/rust/glean-core/src/traits/memory_distribution.rs @@ -0,0 +1,60 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::metrics::DistributionData; +use crate::ErrorType; + +/// A description for the +/// [`MemoryDistributionMetric`](crate::metrics::MemoryDistributionMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait MemoryDistribution { + /// Accumulates the provided sample in the metric. + /// + /// # Arguments + /// + /// * `sample` - The sample to be recorded by the metric. The sample is assumed to be in the + /// configured memory unit of the metric. + /// + /// ## Notes + /// + /// Values bigger than 1 Terabyte (2<sup>40</sup> bytes) are truncated + /// and an `ErrorType::InvalidValue` error is recorded. + fn accumulate(&self, sample: u64); + + /// **Exported for test purposes.** + /// + /// Gets the currently stored value as a DistributionData of the serialized value. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>( + &self, + ping_name: S, + ) -> Option<DistributionData>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors recorded. + fn test_get_num_recorded_errors<'a, S: Into<Option<&'a str>>>( + &self, + error: ErrorType, + ping_name: S, + ) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/mod.rs b/third_party/rust/glean-core/src/traits/mod.rs new file mode 100644 index 0000000000..002411e8c3 --- /dev/null +++ b/third_party/rust/glean-core/src/traits/mod.rs @@ -0,0 +1,43 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Important: consider this module unstable / experimental. +//! +//! The different metric types supported by the Glean SDK to handle data. + +mod boolean; +mod counter; +mod custom_distribution; +mod datetime; +mod event; +mod jwe; +mod labeled; +mod memory_distribution; +mod ping; +mod quantity; +mod string; +mod string_list; +mod timespan; +mod timing_distribution; +mod uuid; + +pub use self::boolean::Boolean; +pub use self::counter::Counter; +pub use self::custom_distribution::CustomDistribution; +pub use self::datetime::Datetime; +pub use self::event::Event; +pub use self::event::EventRecordingError; +pub use self::event::ExtraKeys; +pub use self::event::NoExtraKeys; +pub use self::jwe::Jwe; +pub use self::labeled::Labeled; +pub use self::memory_distribution::MemoryDistribution; +pub use self::ping::Ping; +pub use self::quantity::Quantity; +pub use self::string::String; +pub use self::string_list::StringList; +pub use self::timespan::Timespan; +pub use self::timing_distribution::TimingDistribution; +pub use self::uuid::Uuid; +pub use crate::histogram::HistogramType; diff --git a/third_party/rust/glean-core/src/traits/ping.rs b/third_party/rust/glean-core/src/traits/ping.rs new file mode 100644 index 0000000000..e94b3e72e7 --- /dev/null +++ b/third_party/rust/glean-core/src/traits/ping.rs @@ -0,0 +1,17 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +/// A description for the [`PingType`](crate::metrics::PingType) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait Ping { + /// Submits the ping for eventual uploading + /// + /// # Arguments + /// + /// * `reason` - the reason the ping was triggered. Included in the + /// `ping_info.reason` part of the payload. + fn submit(&self, reason: Option<&str>); +} diff --git a/third_party/rust/glean-core/src/traits/quantity.rs b/third_party/rust/glean-core/src/traits/quantity.rs new file mode 100644 index 0000000000..bab2a528c8 --- /dev/null +++ b/third_party/rust/glean-core/src/traits/quantity.rs @@ -0,0 +1,53 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::ErrorType; + +/// A description for the [`QuantityMetric`](crate::metrics::QuantityMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait Quantity { + /// Sets the value. Must be non-negative. + /// + /// # Arguments + /// + /// * `value` - The value. Must be non-negative. + /// + /// ## Notes + /// + /// Logs an error if the `value` is negative. + fn set(&self, value: i64); + + /// **Exported for test purposes.** + /// + /// Gets the currently stored value as an integer. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>(&self, ping_name: S) -> Option<i64>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + fn test_get_num_recorded_errors<'a, S: Into<Option<&'a str>>>( + &self, + error: ErrorType, + ping_name: S, + ) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/string.rs b/third_party/rust/glean-core/src/traits/string.rs new file mode 100644 index 0000000000..a40cb75a21 --- /dev/null +++ b/third_party/rust/glean-core/src/traits/string.rs @@ -0,0 +1,56 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::ErrorType; + +/// A description for the [`StringMetric`](crate::metrics::StringMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait String { + /// Sets to the specified value. + /// + /// # Arguments + /// + /// * `value` - The string to set the metric to. + /// + /// ## Notes + /// + /// Truncates the value if it is longer than `MAX_LENGTH_VALUE` bytes and logs an error. + fn set<S: Into<std::string::String>>(&self, value: S); + + /// **Exported for test purposes.** + /// + /// Gets the currently stored value as a string. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>( + &self, + ping_name: S, + ) -> Option<std::string::String>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + fn test_get_num_recorded_errors<'a, S: Into<Option<&'a str>>>( + &self, + error: ErrorType, + ping_name: S, + ) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/string_list.rs b/third_party/rust/glean-core/src/traits/string_list.rs new file mode 100644 index 0000000000..967b941398 --- /dev/null +++ b/third_party/rust/glean-core/src/traits/string_list.rs @@ -0,0 +1,66 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::ErrorType; + +/// A description for the [`StringListMetric`](crate::metrics::StringListMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait StringList { + /// Adds a new string to the list. + /// + /// # Arguments + /// + /// * `value` - The string to add. + /// + /// ## Notes + /// + /// Truncates the value if it is longer than `MAX_STRING_LENGTH` bytes and logs an error. + fn add<S: Into<String>>(&self, value: S); + + /// Sets to a specific list of strings. + /// + /// # Arguments + /// + /// * `value` - The list of string to set the metric to. + /// + /// ## Notes + /// + /// If passed an empty list, records an error and returns. + /// Truncates the list if it is longer than `MAX_LIST_LENGTH` and logs an error. + /// Truncates any value in the list if it is longer than `MAX_STRING_LENGTH` and logs an error. + fn set(&self, value: Vec<String>); + + /// **Exported for test purposes.** + /// + /// Gets the currently-stored values. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>(&self, ping_name: S) -> Option<Vec<String>>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors recorded. + fn test_get_num_recorded_errors<'a, S: Into<Option<&'a str>>>( + &self, + error: ErrorType, + ping_name: S, + ) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/timespan.rs b/third_party/rust/glean-core/src/traits/timespan.rs new file mode 100644 index 0000000000..7cf41481c8 --- /dev/null +++ b/third_party/rust/glean-core/src/traits/timespan.rs @@ -0,0 +1,61 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::ErrorType; + +/// A description for the [`TimespanMetric`](crate::metrics::TimespanMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait Timespan { + /// Starts tracking time for the provided metric. + /// + /// This uses an internal monotonic timer. + /// + /// This records an error if it's already tracking time (i.e. + /// [`start`](Timespan::start) was already called with no corresponding + /// [`stop`](Timespan::stop)): in that case the original start time will be + /// preserved. + fn start(&self); + + /// Stops tracking time for the provided metric. Sets the metric to the elapsed time. + /// + /// This will record an error if no [`start`](Timespan::start) was called. + fn stop(&self); + + /// Aborts a previous [`start`](Timespan::start) call. No error is recorded + /// if no [`start`](Timespan::start) was called. + fn cancel(&self); + + /// **Exported for test purposes.** + /// + /// Gets the currently stored value as an integer. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>(&self, ping_name: S) -> Option<u64>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + fn test_get_num_recorded_errors<'a, S: Into<Option<&'a str>>>( + &self, + error: ErrorType, + ping_name: S, + ) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/timing_distribution.rs b/third_party/rust/glean-core/src/traits/timing_distribution.rs new file mode 100644 index 0000000000..1cb967c43a --- /dev/null +++ b/third_party/rust/glean-core/src/traits/timing_distribution.rs @@ -0,0 +1,83 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::metrics::DistributionData; +use crate::metrics::TimerId; +use crate::ErrorType; + +/// A description for the [`TimingDistributionMetric`](crate::metrics::TimingDistributionMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait TimingDistribution { + /// Starts tracking time for the provided metric. + /// + /// This records an error if it’s already tracking time (i.e. + /// [`start`](TimingDistribution::start) was already called with no corresponding + /// [`stop_and_accumulate`](TimingDistribution::stop_and_accumulate)): in that case the + /// original start time will be preserved. + /// + /// # Returns + /// + /// A unique [`TimerId`] for the new timer. + fn start(&self) -> TimerId; + + /// Stops tracking time for the provided metric and associated timer id. + /// + /// Adds a count to the corresponding bucket in the timing distribution. + /// This will record an error if no [`start`](TimingDistribution::start) was + /// called. + /// + /// # Arguments + /// + /// * `id` - The [`TimerId`] to associate with this timing. This allows + /// for concurrent timing of events associated with different ids to the + /// same timespan metric. + fn stop_and_accumulate(&self, id: TimerId); + + /// Aborts a previous [`start`](TimingDistribution::start) call. No + /// error is recorded if no [`start`](TimingDistribution::start) was + /// called. + /// + /// # Arguments + /// + /// * `id` - The [`TimerId`] to associate with this timing. This allows + /// for concurrent timing of events associated with different ids to the + /// same timing distribution metric. + fn cancel(&self, id: TimerId); + + /// **Exported for test purposes.** + /// + /// Gets the currently stored value of the metric. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>( + &self, + ping_name: S, + ) -> Option<DistributionData>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors recorded. + fn test_get_num_recorded_errors<'a, S: Into<Option<&'a str>>>( + &self, + error: ErrorType, + ping_name: S, + ) -> i32; +} diff --git a/third_party/rust/glean-core/src/traits/uuid.rs b/third_party/rust/glean-core/src/traits/uuid.rs new file mode 100644 index 0000000000..61411009de --- /dev/null +++ b/third_party/rust/glean-core/src/traits/uuid.rs @@ -0,0 +1,52 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::ErrorType; + +/// A description for the [`UuidMetric`](crate::metrics::UuidMetric) type. +/// +/// When changing this trait, make sure all the operations are +/// implemented in the related type in `../metrics/`. +pub trait Uuid { + /// Sets to the specified value. + /// + /// # Arguments + /// + /// * `value` - The [`Uuid`](uuid::Uuid) to set the metric to. + fn set(&self, value: uuid::Uuid); + + /// Generates a new random [`Uuid`](uuid::Uuid) and set the metric to it. + fn generate_and_set(&self) -> uuid::Uuid; + + /// **Exported for test purposes.** + /// + /// Gets the currently stored value as a string. + /// + /// This doesn't clear the stored value. + /// + /// # Arguments + /// + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + fn test_get_value<'a, S: Into<Option<&'a str>>>(&self, ping_name: S) -> Option<uuid::Uuid>; + + /// **Exported for test purposes.** + /// + /// Gets the number of recorded errors for the given metric and error type. + /// + /// # Arguments + /// + /// * `error` - The type of error + /// * `ping_name` - represents the optional name of the ping to retrieve the + /// metric for. Defaults to the first value in `send_in_pings`. + /// + /// # Returns + /// + /// The number of errors reported. + fn test_get_num_recorded_errors<'a, S: Into<Option<&'a str>>>( + &self, + error: ErrorType, + ping_name: S, + ) -> i32; +} diff --git a/third_party/rust/glean-core/src/upload/directory.rs b/third_party/rust/glean-core/src/upload/directory.rs new file mode 100644 index 0000000000..6b4a58156b --- /dev/null +++ b/third_party/rust/glean-core/src/upload/directory.rs @@ -0,0 +1,421 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Pings directory processing utilities. + +use std::cmp::Ordering; +use std::fs::{self, File}; +use std::io::{BufRead, BufReader}; +use std::path::{Path, PathBuf}; + +use serde::Deserialize; +use uuid::Uuid; + +use super::request::HeaderMap; +use crate::{DELETION_REQUEST_PINGS_DIRECTORY, PENDING_PINGS_DIRECTORY}; + +/// A representation of the data extracted from a ping file, +/// this will contain the document_id, path, JSON encoded body of a ping and the persisted headers. +pub type PingPayload = (String, String, String, Option<HeaderMap>); + +/// A struct to hold the result of scanning all pings directories. +#[derive(Clone, Debug, Default)] +pub struct PingPayloadsByDirectory { + pub pending_pings: Vec<(u64, PingPayload)>, + pub deletion_request_pings: Vec<(u64, PingPayload)>, +} + +impl PingPayloadsByDirectory { + /// Extends the data of this instance of PingPayloadsByDirectory + /// with the data from another instance of PingPayloadsByDirectory. + pub fn extend(&mut self, other: PingPayloadsByDirectory) { + self.pending_pings.extend(other.pending_pings); + self.deletion_request_pings + .extend(other.deletion_request_pings); + } + + // Get the sum of the number of deletion request and regular pending pings. + pub fn len(&self) -> usize { + self.pending_pings.len() + self.deletion_request_pings.len() + } +} + +/// Gets the file name from a path as a &str. +/// +/// # Panics +/// +/// Won't panic if not able to get file name. +fn get_file_name_as_str(path: &Path) -> Option<&str> { + match path.file_name() { + None => { + log::warn!("Error getting file name from path: {}", path.display()); + None + } + Some(file_name) => { + let file_name = file_name.to_str(); + if file_name.is_none() { + log::warn!("File name is not valid unicode: {}", path.display()); + } + file_name + } + } +} + +/// Processes a ping's metadata. +/// +/// The metadata is an optional third line in the ping file, +/// currently it contains only additonal headers to be added to each ping request. +/// Therefore, we will process the contents of this line +/// and return a HeaderMap of the persisted headers. +fn process_metadata(path: &str, metadata: &str) -> Option<HeaderMap> { + #[derive(Deserialize)] + struct PingMetadata { + pub headers: HeaderMap, + } + + if let Ok(metadata) = serde_json::from_str::<PingMetadata>(metadata) { + return Some(metadata.headers); + } else { + log::warn!("Error while parsing ping metadata: {}", path); + } + None +} + +/// Manages the pings directories. +#[derive(Debug, Clone)] +pub struct PingDirectoryManager { + /// Path to the pending pings directory. + pending_pings_dir: PathBuf, + /// Path to the deletion-request pings directory. + deletion_request_pings_dir: PathBuf, +} + +impl PingDirectoryManager { + /// Creates a new directory manager. + /// + /// # Arguments + /// + /// * `data_path` - Path to the pending pings directory. + pub fn new<P: Into<PathBuf>>(data_path: P) -> Self { + let data_path = data_path.into(); + Self { + pending_pings_dir: data_path.join(PENDING_PINGS_DIRECTORY), + deletion_request_pings_dir: data_path.join(DELETION_REQUEST_PINGS_DIRECTORY), + } + } + + /// Attempts to delete a ping file. + /// + /// # Arguments + /// + /// * `uuid` - The UUID of the ping file to be deleted + /// + /// # Returns + /// + /// Whether the file was successfully deleted. + /// + /// # Panics + /// + /// Won't panic if unable to delete the file. + pub fn delete_file(&self, uuid: &str) -> bool { + let path = match self.get_file_path(uuid) { + Some(path) => path, + None => { + log::warn!("Cannot find ping file to delete {}", uuid); + return false; + } + }; + + match fs::remove_file(&path) { + Err(e) => { + log::warn!("Error deleting file {}. {}", path.display(), e); + return false; + } + _ => log::info!("File was deleted {}", path.display()), + }; + + true + } + + /// Reads a ping file and returns the data from it. + /// + /// If the file is not properly formatted, it will be deleted and `None` will be returned. + /// + /// # Arguments + /// + /// * `document_id` - The UUID of the ping file to be processed + pub fn process_file(&self, document_id: &str) -> Option<PingPayload> { + let path = match self.get_file_path(document_id) { + Some(path) => path, + None => { + log::warn!("Cannot find ping file to process {}", document_id); + return None; + } + }; + let file = match File::open(&path) { + Ok(file) => file, + Err(e) => { + log::warn!("Error reading ping file {}. {}", path.display(), e); + return None; + } + }; + + log::info!("Processing ping at: {}", path.display()); + + // The way the ping file is structured: + // first line should always have the path, + // second line should have the body with the ping contents in JSON format + // and third line might contain ping metadata e.g. additional headers. + let mut lines = BufReader::new(file).lines(); + if let (Some(Ok(path)), Some(Ok(body)), Ok(metadata)) = + (lines.next(), lines.next(), lines.next().transpose()) + { + let headers = metadata.map(|m| process_metadata(&path, &m)).flatten(); + return Some((document_id.into(), path, body, headers)); + } else { + log::warn!( + "Error processing ping file: {}. Ping file is not formatted as expected.", + document_id + ); + } + self.delete_file(document_id); + None + } + + /// Processes both ping directories. + pub fn process_dirs(&self) -> PingPayloadsByDirectory { + PingPayloadsByDirectory { + pending_pings: self.process_dir(&self.pending_pings_dir), + deletion_request_pings: self.process_dir(&self.deletion_request_pings_dir), + } + } + + /// Processes one of the pings directory and return a vector with the ping data + /// corresponding to each valid ping file in the directory. + /// This vector will be ordered by file `modified_date`. + /// + /// Any files that don't match the UUID regex will be deleted + /// to prevent files from polluting the pings directory. + /// + /// # Returns + /// + /// A vector of tuples with the file size and payload of each ping file in the directory. + fn process_dir(&self, dir: &Path) -> Vec<(u64, PingPayload)> { + log::trace!("Processing persisted pings."); + + let entries = match dir.read_dir() { + Ok(entries) => entries, + Err(_) => { + // This may error simply because the directory doesn't exist, + // which is expected if no pings were stored yet. + return Vec::new(); + } + }; + + let mut pending_pings: Vec<_> = entries + .filter_map(|entry| entry.ok()) + .filter_map(|entry| { + let path = entry.path(); + if let Some(file_name) = get_file_name_as_str(&path) { + // Delete file if it doesn't match the pattern. + if Uuid::parse_str(file_name).is_err() { + log::warn!("Pattern mismatch. Deleting {}", path.display()); + self.delete_file(file_name); + return None; + } + if let Some(data) = self.process_file(file_name) { + let metadata = match fs::metadata(&path) { + Ok(metadata) => metadata, + Err(e) => { + // There's a rare case where this races against a parallel deletion + // of all pending ping files. + // This could therefore fail, in which case we don't care about the + // result and can ignore the ping, it's already been deleted. + log::warn!( + "Unable to read metadata for file: {}, error: {:?}", + path.display(), + e + ); + return None; + } + }; + return Some((metadata, data)); + } + }; + None + }) + .collect(); + + // This will sort the pings by date in ascending order (oldest -> newest). + pending_pings.sort_by(|(a, _), (b, _)| { + // We might not be able to get the modified date for a given file, + // in which case we just put it at the end. + if let (Ok(a), Ok(b)) = (a.modified(), b.modified()) { + a.cmp(&b) + } else { + Ordering::Less + } + }); + + pending_pings + .into_iter() + .map(|(metadata, data)| (metadata.len(), data)) + .collect() + } + + /// Gets the path for a ping file based on its document_id. + /// + /// Will look for files in each ping directory until something is found. + /// If nothing is found, returns `None`. + fn get_file_path(&self, document_id: &str) -> Option<PathBuf> { + for dir in [&self.pending_pings_dir, &self.deletion_request_pings_dir].iter() { + let path = dir.join(document_id); + if path.exists() { + return Some(path); + } + } + None + } +} + +#[cfg(test)] +mod test { + use std::fs::File; + + use super::*; + use crate::metrics::PingType; + use crate::tests::new_glean; + + #[test] + fn doesnt_panic_if_no_pending_pings_directory() { + let dir = tempfile::tempdir().unwrap(); + let directory_manager = PingDirectoryManager::new(dir.path()); + + // Verify that processing the directory didn't panic + let data = directory_manager.process_dirs(); + assert_eq!(data.pending_pings.len(), 0); + assert_eq!(data.deletion_request_pings.len(), 0); + } + + #[test] + fn gets_correct_data_from_valid_ping_file() { + let (mut glean, dir) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, true, vec![]); + glean.register_ping_type(&ping_type); + + // Submit the ping to populate the pending_pings directory + glean.submit_ping(&ping_type, None).unwrap(); + + let directory_manager = PingDirectoryManager::new(dir.path()); + + // Try and process the pings directories + let data = directory_manager.process_dirs(); + + // Verify there is just the one request + assert_eq!(data.pending_pings.len(), 1); + assert_eq!(data.deletion_request_pings.len(), 0); + + // Verify request was returned for the "test" ping + let ping = &data.pending_pings[0].1; + let request_ping_type = ping.1.split('/').nth(3).unwrap(); + assert_eq!(request_ping_type, "test"); + } + + #[test] + fn non_uuid_files_are_deleted_and_ignored() { + let (mut glean, dir) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, true, vec![]); + glean.register_ping_type(&ping_type); + + // Submit the ping to populate the pending_pings directory + glean.submit_ping(&ping_type, None).unwrap(); + + let directory_manager = PingDirectoryManager::new(&dir.path()); + + let not_uuid_path = dir + .path() + .join(PENDING_PINGS_DIRECTORY) + .join("not-uuid-file-name.txt"); + File::create(¬_uuid_path).unwrap(); + + // Try and process the pings directories + let data = directory_manager.process_dirs(); + + // Verify there is just the one request + assert_eq!(data.pending_pings.len(), 1); + assert_eq!(data.deletion_request_pings.len(), 0); + + // Verify request was returned for the "test" ping + let ping = &data.pending_pings[0].1; + let request_ping_type = ping.1.split('/').nth(3).unwrap(); + assert_eq!(request_ping_type, "test"); + + // Verify that file was indeed deleted + assert!(!not_uuid_path.exists()); + } + + #[test] + fn wrongly_formatted_files_are_deleted_and_ignored() { + let (mut glean, dir) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, true, vec![]); + glean.register_ping_type(&ping_type); + + // Submit the ping to populate the pending_pings directory + glean.submit_ping(&ping_type, None).unwrap(); + + let directory_manager = PingDirectoryManager::new(&dir.path()); + + let wrong_contents_file_path = dir + .path() + .join(PENDING_PINGS_DIRECTORY) + .join(Uuid::new_v4().to_string()); + File::create(&wrong_contents_file_path).unwrap(); + + // Try and process the pings directories + let data = directory_manager.process_dirs(); + + // Verify there is just the one request + assert_eq!(data.pending_pings.len(), 1); + assert_eq!(data.deletion_request_pings.len(), 0); + + // Verify request was returned for the "test" ping + let ping = &data.pending_pings[0].1; + let request_ping_type = ping.1.split('/').nth(3).unwrap(); + assert_eq!(request_ping_type, "test"); + + // Verify that file was indeed deleted + assert!(!wrong_contents_file_path.exists()); + } + + #[test] + fn takes_deletion_request_pings_into_account_while_processing() { + let (glean, dir) = new_glean(None); + + // Submit a deletion request ping to populate deletion request folder. + glean + .internal_pings + .deletion_request + .submit(&glean, None) + .unwrap(); + + let directory_manager = PingDirectoryManager::new(dir.path()); + + // Try and process the pings directories + let data = directory_manager.process_dirs(); + + assert_eq!(data.pending_pings.len(), 0); + assert_eq!(data.deletion_request_pings.len(), 1); + + // Verify request was returned for the "deletion-request" ping + let ping = &data.deletion_request_pings[0].1; + let request_ping_type = ping.1.split('/').nth(3).unwrap(); + assert_eq!(request_ping_type, "deletion-request"); + } +} diff --git a/third_party/rust/glean-core/src/upload/mod.rs b/third_party/rust/glean-core/src/upload/mod.rs new file mode 100644 index 0000000000..b240ec74cf --- /dev/null +++ b/third_party/rust/glean-core/src/upload/mod.rs @@ -0,0 +1,1550 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Manages the pending pings queue and directory. +//! +//! * Keeps track of pending pings, loading any unsent ping from disk on startup; +//! * Exposes [`get_upload_task`](PingUploadManager::get_upload_task) API for +//! the platform layer to request next upload task; +//! * Exposes +//! [`process_ping_upload_response`](PingUploadManager::process_ping_upload_response) +//! API to check the HTTP response from the ping upload and either delete the +//! corresponding ping from disk or re-enqueue it for sending. + +use std::collections::VecDeque; +use std::convert::TryInto; +use std::path::PathBuf; +use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; +use std::sync::{Arc, RwLock, RwLockWriteGuard}; +use std::thread; +use std::time::{Duration, Instant}; + +use crate::error::ErrorKind; +use crate::{internal_metrics::UploadMetrics, Glean}; +use directory::{PingDirectoryManager, PingPayloadsByDirectory}; +use policy::Policy; +pub use request::{HeaderMap, PingRequest}; +pub use result::{ffi_upload_result, UploadResult}; + +mod directory; +mod policy; +mod request; +mod result; + +const WAIT_TIME_FOR_PING_PROCESSING: u64 = 1000; // in milliseconds + +#[derive(Debug)] +struct RateLimiter { + /// The instant the current interval has started. + started: Option<Instant>, + /// The count for the current interval. + count: u32, + /// The duration of each interval. + interval: Duration, + /// The maximum count per interval. + max_count: u32, +} + +/// An enum to represent the current state of the RateLimiter. +#[derive(PartialEq)] +enum RateLimiterState { + /// The RateLimiter has not reached the maximum count and is still incrementing. + Incrementing, + /// The RateLimiter has reached the maximum count for the current interval. + /// + /// This variant contains the remaining time (in milliseconds) + /// until the rate limiter is not throttled anymore. + Throttled(u64), +} + +impl RateLimiter { + pub fn new(interval: Duration, max_count: u32) -> Self { + Self { + started: None, + count: 0, + interval, + max_count, + } + } + + fn reset(&mut self) { + self.started = Some(Instant::now()); + self.count = 0; + } + + fn elapsed(&self) -> Duration { + self.started.unwrap().elapsed() + } + + // The counter should reset if + // + // 1. It has never started; + // 2. It has been started more than the interval time ago; + // 3. Something goes wrong while trying to calculate the elapsed time since the last reset. + fn should_reset(&self) -> bool { + if self.started.is_none() { + return true; + } + + // Safe unwrap, we already stated that `self.started` is not `None` above. + if self.elapsed() > self.interval { + return true; + } + + false + } + + /// Tries to increment the internal counter. + /// + /// # Returns + /// + /// The current state of the RateLimiter. + pub fn get_state(&mut self) -> RateLimiterState { + if self.should_reset() { + self.reset(); + } + + if self.count == self.max_count { + // Note that `remining` can't be a negative number because we just called `reset`, + // which will check if it is and reset if so. + let remaining = self.interval.as_millis() - self.elapsed().as_millis(); + return RateLimiterState::Throttled( + remaining + .try_into() + .unwrap_or(self.interval.as_secs() * 1000), + ); + } + + self.count += 1; + RateLimiterState::Incrementing + } +} + +/// An enum representing the possible upload tasks to be performed by an uploader. +/// +/// When asking for the next ping request to upload, +/// the requester may receive one out of three possible tasks. +/// +/// If new variants are added, this should be reflected in `glean-core/ffi/src/upload.rs` as well. +#[derive(PartialEq, Debug)] +pub enum PingUploadTask { + /// A PingRequest popped from the front of the queue. + /// See [`PingRequest`](struct.PingRequest.html) for more information. + Upload(PingRequest), + /// A flag signaling that the pending pings directories are not done being processed, + /// thus the requester should wait and come back later. + /// + /// Contains the amount of time in milliseconds + /// the requester should wait before requesting a new task. + Wait(u64), + /// A flag signaling that requester doesn't need to request any more upload tasks at this moment. + /// + /// There are three possibilities for this scenario: + /// * Pending pings queue is empty, no more pings to request; + /// * Requester has gotten more than MAX_WAIT_ATTEMPTS (3, by default) `PingUploadTask::Wait` responses in a row; + /// * Requester has reported more than MAX_RECOVERABLE_FAILURES_PER_UPLOADING_WINDOW + /// recoverable upload failures on the same uploading window (see below) + /// and should stop requesting at this moment. + /// + /// An "uploading window" starts when a requester gets a new + /// `PingUploadTask::Upload(PingRequest)` response and finishes when they + /// finally get a `PingUploadTask::Done` or `PingUploadTask::Wait` response. + Done, +} + +impl PingUploadTask { + /// Whether the current task is an upload task. + pub fn is_upload(&self) -> bool { + matches!(self, PingUploadTask::Upload(_)) + } + + /// Whether the current task is wait task. + pub fn is_wait(&self) -> bool { + matches!(self, PingUploadTask::Wait(_)) + } +} + +/// Manages the pending pings queue and directory. +#[derive(Debug)] +pub struct PingUploadManager { + /// A FIFO queue storing a `PingRequest` for each pending ping. + queue: RwLock<VecDeque<PingRequest>>, + /// A manager for the pending pings directories. + directory_manager: PingDirectoryManager, + /// A flag signaling if we are done processing the pending pings directories. + processed_pending_pings: Arc<AtomicBool>, + /// A vector to store the pending pings processed off-thread. + cached_pings: Arc<RwLock<PingPayloadsByDirectory>>, + /// The number of upload failures for the current uploading window. + recoverable_failure_count: AtomicU32, + /// The number or times in a row a user has received a `PingUploadTask::Wait` response. + wait_attempt_count: AtomicU32, + /// A ping counter to help rate limit the ping uploads. + /// + /// To keep resource usage in check, + /// we may want to limit the amount of pings sent in a given interval. + rate_limiter: Option<RwLock<RateLimiter>>, + /// The name of the programming language used by the binding creating this instance of PingUploadManager. + /// + /// This will be used to build the value User-Agent header for each ping request. + language_binding_name: String, + /// Metrics related to ping uploading. + upload_metrics: UploadMetrics, + /// Policies for ping storage, uploading and requests. + policy: Policy, +} + +impl PingUploadManager { + /// Creates a new PingUploadManager. + /// + /// # Arguments + /// + /// * `data_path` - Path to the pending pings directory. + /// * `language_binding_name` - The name of the language binding calling this managers instance. + /// + /// # Panics + /// + /// Will panic if unable to spawn a new thread. + pub fn new<P: Into<PathBuf>>(data_path: P, language_binding_name: &str) -> Self { + Self { + queue: RwLock::new(VecDeque::new()), + directory_manager: PingDirectoryManager::new(data_path), + processed_pending_pings: Arc::new(AtomicBool::new(false)), + cached_pings: Arc::new(RwLock::new(PingPayloadsByDirectory::default())), + recoverable_failure_count: AtomicU32::new(0), + wait_attempt_count: AtomicU32::new(0), + rate_limiter: None, + language_binding_name: language_binding_name.into(), + upload_metrics: UploadMetrics::new(), + policy: Policy::default(), + } + } + + /// Spawns a new thread and processes the pending pings directories, + /// filling up the queue with whatever pings are in there. + /// + /// # Returns + /// + /// The `JoinHandle` to the spawned thread + pub fn scan_pending_pings_directories(&self) -> std::thread::JoinHandle<()> { + let local_manager = self.directory_manager.clone(); + let local_cached_pings = self.cached_pings.clone(); + let local_flag = self.processed_pending_pings.clone(); + thread::Builder::new() + .name("glean.ping_directory_manager.process_dir".to_string()) + .spawn(move || { + let mut local_cached_pings = local_cached_pings + .write() + .expect("Can't write to pending pings cache."); + local_cached_pings.extend(local_manager.process_dirs()); + local_flag.store(true, Ordering::SeqCst); + }) + .expect("Unable to spawn thread to process pings directories.") + } + + /// Creates a new upload manager with no limitations, for tests. + #[cfg(test)] + pub fn no_policy<P: Into<PathBuf>>(data_path: P) -> Self { + let mut upload_manager = Self::new(data_path, "Test"); + + // Disable all policies for tests, if necessary individuals tests can re-enable them. + upload_manager.policy.set_max_recoverable_failures(None); + upload_manager.policy.set_max_wait_attempts(None); + upload_manager.policy.set_max_ping_body_size(None); + upload_manager + .policy + .set_max_pending_pings_directory_size(None); + upload_manager.policy.set_max_pending_pings_count(None); + + // When building for tests, always scan the pending pings directories and do it sync. + upload_manager + .scan_pending_pings_directories() + .join() + .unwrap(); + + upload_manager + } + + fn processed_pending_pings(&self) -> bool { + self.processed_pending_pings.load(Ordering::SeqCst) + } + + fn recoverable_failure_count(&self) -> u32 { + self.recoverable_failure_count.load(Ordering::SeqCst) + } + + fn wait_attempt_count(&self) -> u32 { + self.wait_attempt_count.load(Ordering::SeqCst) + } + + /// Attempts to build a ping request from a ping file payload. + /// + /// Returns the `PingRequest` or `None` if unable to build, + /// in which case it will delete the ping file and record an error. + fn build_ping_request( + &self, + glean: &Glean, + document_id: &str, + path: &str, + body: &str, + headers: Option<HeaderMap>, + ) -> Option<PingRequest> { + let mut request = PingRequest::builder( + &self.language_binding_name, + self.policy.max_ping_body_size(), + ) + .document_id(document_id) + .path(path) + .body(body); + + if let Some(headers) = headers { + request = request.headers(headers); + } + + match request.build() { + Ok(request) => Some(request), + Err(e) => { + log::warn!("Error trying to build ping request: {}", e); + self.directory_manager.delete_file(&document_id); + + // Record the error. + // Currently the only possible error is PingBodyOverflow. + if let ErrorKind::PingBodyOverflow(s) = e.kind() { + self.upload_metrics + .discarded_exceeding_pings_size + .accumulate(glean, *s as u64 / 1024); + } + + None + } + } + } + + fn enqueue_ping( + &self, + glean: &Glean, + document_id: &str, + path: &str, + body: &str, + headers: Option<HeaderMap>, + ) { + let mut queue = self + .queue + .write() + .expect("Can't write to pending pings queue."); + + // Checks if a ping with this `document_id` is already enqueued. + if queue + .iter() + .any(|request| request.document_id == document_id) + { + log::warn!( + "Attempted to enqueue a duplicate ping {} at {}.", + document_id, + path + ); + return; + } + + log::trace!("Enqueuing ping {} at {}", document_id, path); + if let Some(request) = self.build_ping_request(glean, document_id, path, body, headers) { + queue.push_back(request) + } + } + + /// Enqueues pings that might have been cached. + /// + /// The size of the PENDING_PINGS_DIRECTORY directory will be calculated + /// (by accumulating each ping's size in that directory) + /// and in case we exceed the quota, defined by the `quota` arg, + /// outstanding pings get deleted and are not enqueued. + /// + /// The size of the DELETION_REQUEST_PINGS_DIRECTORY will not be calculated + /// and no deletion-request pings will be deleted. Deletion request pings + /// are not very common and usually don't contain any data, + /// we don't expect that directory to ever reach quota. + /// Most importantly, we don't want to ever delete deletion-request pings. + /// + /// # Arguments + /// + /// * `glean` - The Glean object holding the database. + fn enqueue_cached_pings(&self, glean: &Glean) { + let mut cached_pings = self + .cached_pings + .write() + .expect("Can't write to pending pings cache."); + + if cached_pings.len() > 0 { + let mut pending_pings_directory_size: u64 = 0; + let mut pending_pings_count = 0; + let mut deleting = false; + + let total = cached_pings.pending_pings.len() as u64; + self.upload_metrics + .pending_pings + .add(glean, total.try_into().unwrap_or(0)); + + if total > self.policy.max_pending_pings_count() { + log::warn!( + "More than {} pending pings in the directory, will delete {} old pings.", + self.policy.max_pending_pings_count(), + total - self.policy.max_pending_pings_count() + ); + } + + // The pending pings vector is sorted by date in ascending order (oldest -> newest). + // We need to calculate the size of the pending pings directory + // and delete the **oldest** pings in case quota is reached. + // Thus, we reverse the order of the pending pings vector, + // so that we iterate in descending order (newest -> oldest). + cached_pings.pending_pings.reverse(); + cached_pings.pending_pings.retain(|(file_size, (document_id, _, _, _))| { + pending_pings_count += 1; + pending_pings_directory_size += file_size; + + // We don't want to spam the log for every ping over the quota. + if !deleting && pending_pings_directory_size > self.policy.max_pending_pings_directory_size() { + log::warn!( + "Pending pings directory has reached the size quota of {} bytes, outstanding pings will be deleted.", + self.policy.max_pending_pings_directory_size() + ); + deleting = true; + } + + // Once we reach the number of allowed pings we start deleting, + // no matter what size. + // We already log this before the loop. + if pending_pings_count > self.policy.max_pending_pings_count() { + deleting = true; + } + + if deleting && self.directory_manager.delete_file(&document_id) { + self.upload_metrics + .deleted_pings_after_quota_hit + .add(glean, 1); + return false; + } + + true + }); + // After calculating the size of the pending pings directory, + // we record the calculated number and reverse the pings array back for enqueueing. + cached_pings.pending_pings.reverse(); + self.upload_metrics + .pending_pings_directory_size + .accumulate(glean, pending_pings_directory_size as u64 / 1024); + + // Enqueue the remaining pending pings and + // enqueue all deletion-request pings. + let deletion_request_pings = cached_pings.deletion_request_pings.drain(..); + for (_, (document_id, path, body, headers)) in deletion_request_pings { + self.enqueue_ping(glean, &document_id, &path, &body, headers); + } + let pending_pings = cached_pings.pending_pings.drain(..); + for (_, (document_id, path, body, headers)) in pending_pings { + self.enqueue_ping(glean, &document_id, &path, &body, headers); + } + } + } + + /// Adds rate limiting capability to this upload manager. + /// + /// The rate limiter will limit the amount of calls to `get_upload_task` per interval. + /// + /// Setting this will restart count and timer in case there was a previous rate limiter set + /// (e.g. if we have reached the current limit and call this function, we start counting again + /// and the caller is allowed to asks for tasks). + /// + /// # Arguments + /// + /// * `interval` - the amount of seconds in each rate limiting window. + /// * `max_tasks` - the maximum amount of task requests allowed per interval. + pub fn set_rate_limiter(&mut self, interval: u64, max_tasks: u32) { + self.rate_limiter = Some(RwLock::new(RateLimiter::new( + Duration::from_secs(interval), + max_tasks, + ))); + } + + /// Reads a ping file, creates a `PingRequest` and adds it to the queue. + /// + /// Duplicate requests won't be added. + /// + /// # Arguments + /// + /// * `glean` - The Glean object holding the database. + /// * `document_id` - The UUID of the ping in question. + pub fn enqueue_ping_from_file(&self, glean: &Glean, document_id: &str) { + if let Some((doc_id, path, body, headers)) = + self.directory_manager.process_file(document_id) + { + self.enqueue_ping(glean, &doc_id, &path, &body, headers) + } + } + + /// Clears the pending pings queue, leaves the deletion-request pings. + pub fn clear_ping_queue(&self) -> RwLockWriteGuard<'_, VecDeque<PingRequest>> { + log::trace!("Clearing ping queue"); + let mut queue = self + .queue + .write() + .expect("Can't write to pending pings queue."); + + queue.retain(|ping| ping.is_deletion_request()); + log::trace!( + "{} pings left in the queue (only deletion-request expected)", + queue.len() + ); + queue + } + + fn get_upload_task_internal(&self, glean: &Glean, log_ping: bool) -> PingUploadTask { + // Helper to decide whether to return PingUploadTask::Wait or PingUploadTask::Done. + // + // We want to limit the amount of PingUploadTask::Wait returned in a row, + // in case we reach MAX_WAIT_ATTEMPTS we want to actually return PingUploadTask::Done. + let wait_or_done = |time: u64| { + self.wait_attempt_count.fetch_add(1, Ordering::SeqCst); + if self.wait_attempt_count() > self.policy.max_wait_attempts() { + PingUploadTask::Done + } else { + PingUploadTask::Wait(time) + } + }; + + if !self.processed_pending_pings() { + log::info!( + "Tried getting an upload task, but processing is ongoing. Will come back later." + ); + return wait_or_done(WAIT_TIME_FOR_PING_PROCESSING); + } + + // This is a no-op in case there are no cached pings. + self.enqueue_cached_pings(glean); + + if self.recoverable_failure_count() >= self.policy.max_recoverable_failures() { + log::warn!( + "Reached maximum recoverable failures for the current uploading window. You are done." + ); + return PingUploadTask::Done; + } + + let mut queue = self + .queue + .write() + .expect("Can't write to pending pings queue."); + match queue.front() { + Some(request) => { + if let Some(rate_limiter) = &self.rate_limiter { + let mut rate_limiter = rate_limiter + .write() + .expect("Can't write to the rate limiter."); + if let RateLimiterState::Throttled(remaining) = rate_limiter.get_state() { + log::info!( + "Tried getting an upload task, but we are throttled at the moment." + ); + return wait_or_done(remaining); + } + } + + log::info!( + "New upload task with id {} (path: {})", + request.document_id, + request.path + ); + + if log_ping { + if let Some(body) = request.pretty_body() { + chunked_log_info(&request.path, &body); + } else { + chunked_log_info(&request.path, "<invalid ping payload>"); + } + } + + PingUploadTask::Upload(queue.pop_front().unwrap()) + } + None => { + log::info!("No more pings to upload! You are done."); + PingUploadTask::Done + } + } + } + + /// Gets the next `PingUploadTask`. + /// + /// # Arguments + /// + /// * `glean` - The Glean object holding the database. + /// * `log_ping` - Whether to log the ping before returning. + /// + /// # Returns + /// + /// The next [`PingUploadTask`](enum.PingUploadTask.html). + pub fn get_upload_task(&self, glean: &Glean, log_ping: bool) -> PingUploadTask { + let task = self.get_upload_task_internal(glean, log_ping); + + if !task.is_wait() && self.wait_attempt_count() > 0 { + self.wait_attempt_count.store(0, Ordering::SeqCst); + } + + if !task.is_upload() && self.recoverable_failure_count() > 0 { + self.recoverable_failure_count.store(0, Ordering::SeqCst); + } + + task + } + + /// Processes the response from an attempt to upload a ping. + /// + /// Based on the HTTP status of said response, + /// the possible outcomes are: + /// + /// * **200 - 299 Success** + /// Any status on the 2XX range is considered a succesful upload, + /// which means the corresponding ping file can be deleted. + /// _Known 2XX status:_ + /// * 200 - OK. Request accepted into the pipeline. + /// + /// * **400 - 499 Unrecoverable error** + /// Any status on the 4XX range means something our client did is not correct. + /// It is unlikely that the client is going to recover from this by retrying, + /// so in this case the corresponding ping file can also be deleted. + /// _Known 4XX status:_ + /// * 404 - not found - POST/PUT to an unknown namespace + /// * 405 - wrong request type (anything other than POST/PUT) + /// * 411 - missing content-length header + /// * 413 - request body too large Note that if we have badly-behaved clients that + /// retry on 4XX, we should send back 202 on body/path too long). + /// * 414 - request path too long (See above) + /// + /// * **Any other error** + /// For any other error, a warning is logged and the ping is re-enqueued. + /// _Known other errors:_ + /// * 500 - internal error + /// + /// # Note + /// + /// The disk I/O performed by this function is not done off-thread, + /// as it is expected to be called off-thread by the platform. + /// + /// # Arguments + /// + /// * `glean` - The Glean object holding the database. + /// * `document_id` - The UUID of the ping in question. + /// * `status` - The HTTP status of the response. + pub fn process_ping_upload_response( + &self, + glean: &Glean, + document_id: &str, + status: UploadResult, + ) { + use UploadResult::*; + + if let Some(label) = status.get_label() { + let metric = self.upload_metrics.ping_upload_failure.get(label); + metric.add(glean, 1); + } + + match status { + HttpStatus(status @ 200..=299) => { + log::info!("Ping {} successfully sent {}.", document_id, status); + self.directory_manager.delete_file(document_id); + } + + UnrecoverableFailure | HttpStatus(400..=499) => { + log::warn!( + "Unrecoverable upload failure while attempting to send ping {}. Error was {:?}", + document_id, + status + ); + self.directory_manager.delete_file(document_id); + } + + RecoverableFailure | HttpStatus(_) => { + log::warn!( + "Recoverable upload failure while attempting to send ping {}, will retry. Error was {:?}", + document_id, + status + ); + self.enqueue_ping_from_file(glean, &document_id); + self.recoverable_failure_count + .fetch_add(1, Ordering::SeqCst); + } + }; + } +} + +/// Splits log message into chunks on Android. +#[cfg(target_os = "android")] +pub fn chunked_log_info(path: &str, payload: &str) { + // Since the logcat ring buffer size is configurable, but it's 'max payload' size is not, + // we must break apart long pings into chunks no larger than the max payload size of 4076b. + // We leave some head space for our prefix. + const MAX_LOG_PAYLOAD_SIZE_BYTES: usize = 4000; + + // If the length of the ping will fit within one logcat payload, then we can + // short-circuit here and avoid some overhead, otherwise we must split up the + // message so that we don't truncate it. + if path.len() + payload.len() <= MAX_LOG_PAYLOAD_SIZE_BYTES { + log::info!("Glean ping to URL: {}\n{}", path, payload); + return; + } + + // Otherwise we break it apart into chunks of smaller size, + // prefixing it with the path and a counter. + let mut start = 0; + let mut end = MAX_LOG_PAYLOAD_SIZE_BYTES; + let mut chunk_idx = 1; + // Might be off by 1 on edge cases, but do we really care? + let total_chunks = payload.len() / MAX_LOG_PAYLOAD_SIZE_BYTES + 1; + + while end < payload.len() { + // Find char boundary from the end. + // It's UTF-8, so it is within 4 bytes from here. + for _ in 0..4 { + if payload.is_char_boundary(end) { + break; + } + end -= 1; + } + + log::info!( + "Glean ping to URL: {} [Part {} of {}]\n{}", + path, + chunk_idx, + total_chunks, + &payload[start..end] + ); + + // Move on with the string + start = end; + end = end + MAX_LOG_PAYLOAD_SIZE_BYTES; + chunk_idx += 1; + } + + // Print any suffix left + if start < payload.len() { + log::info!( + "Glean ping to URL: {} [Part {} of {}]\n{}", + path, + chunk_idx, + total_chunks, + &payload[start..] + ); + } +} + +/// Logs payload in one go (all other OS). +#[cfg(not(target_os = "android"))] +pub fn chunked_log_info(_path: &str, payload: &str) { + log::info!("{}", payload) +} + +#[cfg(test)] +mod test { + use std::thread; + use std::time::Duration; + + use uuid::Uuid; + + use super::UploadResult::*; + use super::*; + use crate::metrics::PingType; + use crate::{tests::new_glean, PENDING_PINGS_DIRECTORY}; + + const PATH: &str = "/submit/app_id/ping_name/schema_version/doc_id"; + + #[test] + fn doesnt_error_when_there_are_no_pending_pings() { + let (glean, _) = new_glean(None); + + // Try and get the next request. + // Verify request was not returned + assert_eq!(glean.get_upload_task(), PingUploadTask::Done); + } + + #[test] + fn returns_ping_request_when_there_is_one() { + let (glean, dir) = new_glean(None); + + let upload_manager = PingUploadManager::no_policy(dir.path()); + + // Enqueue a ping + upload_manager.enqueue_ping(&glean, &Uuid::new_v4().to_string(), PATH, "", None); + + // Try and get the next request. + // Verify request was returned + let task = upload_manager.get_upload_task(&glean, false); + assert!(task.is_upload()); + } + + #[test] + fn returns_as_many_ping_requests_as_there_are() { + let (glean, dir) = new_glean(None); + + let upload_manager = PingUploadManager::no_policy(dir.path()); + + // Enqueue a ping multiple times + let n = 10; + for _ in 0..n { + upload_manager.enqueue_ping(&glean, &Uuid::new_v4().to_string(), PATH, "", None); + } + + // Verify a request is returned for each submitted ping + for _ in 0..n { + let task = upload_manager.get_upload_task(&glean, false); + assert!(task.is_upload()); + } + + // Verify that after all requests are returned, none are left + assert_eq!( + upload_manager.get_upload_task(&glean, false), + PingUploadTask::Done + ); + } + + #[test] + fn limits_the_number_of_pings_when_there_is_rate_limiting() { + let (glean, dir) = new_glean(None); + + let mut upload_manager = PingUploadManager::no_policy(dir.path()); + + // Add a rate limiter to the upload mangager with max of 10 pings every 3 seconds. + let max_pings_per_interval = 10; + upload_manager.set_rate_limiter(3, 10); + + // Enqueue the max number of pings allowed per uploading window + for _ in 0..max_pings_per_interval { + upload_manager.enqueue_ping(&glean, &Uuid::new_v4().to_string(), PATH, "", None); + } + + // Verify a request is returned for each submitted ping + for _ in 0..max_pings_per_interval { + let task = upload_manager.get_upload_task(&glean, false); + assert!(task.is_upload()); + } + + // Enqueue just one more ping + upload_manager.enqueue_ping(&glean, &Uuid::new_v4().to_string(), PATH, "", None); + + // Verify that we are indeed told to wait because we are at capacity + match upload_manager.get_upload_task(&glean, false) { + PingUploadTask::Wait(time) => { + // Wait for the uploading window to reset + thread::sleep(Duration::from_millis(time)); + } + _ => panic!("Expected upload manager to return a wait task!"), + }; + + let task = upload_manager.get_upload_task(&glean, false); + assert!(task.is_upload()); + } + + #[test] + fn clearing_the_queue_works_correctly() { + let (glean, dir) = new_glean(None); + + let upload_manager = PingUploadManager::no_policy(dir.path()); + + // Enqueue a ping multiple times + for _ in 0..10 { + upload_manager.enqueue_ping(&glean, &Uuid::new_v4().to_string(), PATH, "", None); + } + + // Clear the queue + drop(upload_manager.clear_ping_queue()); + + // Verify there really isn't any ping in the queue + assert_eq!( + upload_manager.get_upload_task(&glean, false), + PingUploadTask::Done + ); + } + + #[test] + fn clearing_the_queue_doesnt_clear_deletion_request_pings() { + let (mut glean, _) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, /* send_if_empty */ true, vec![]); + glean.register_ping_type(&ping_type); + + // Submit the ping multiple times + let n = 10; + for _ in 0..n { + glean.submit_ping(&ping_type, None).unwrap(); + } + + glean + .internal_pings + .deletion_request + .submit(&glean, None) + .unwrap(); + + // Clear the queue + drop(glean.upload_manager.clear_ping_queue()); + + let upload_task = glean.get_upload_task(); + match upload_task { + PingUploadTask::Upload(request) => assert!(request.is_deletion_request()), + _ => panic!("Expected upload manager to return the next request!"), + } + + // Verify there really isn't any other pings in the queue + assert_eq!(glean.get_upload_task(), PingUploadTask::Done); + } + + #[test] + fn fills_up_queue_successfully_from_disk() { + let (mut glean, dir) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, /* send_if_empty */ true, vec![]); + glean.register_ping_type(&ping_type); + + // Submit the ping multiple times + let n = 10; + for _ in 0..n { + glean.submit_ping(&ping_type, None).unwrap(); + } + + // Create a new upload manager pointing to the same data_path as the glean instance. + let upload_manager = PingUploadManager::no_policy(dir.path()); + + // Verify the requests were properly enqueued + for _ in 0..n { + let task = upload_manager.get_upload_task(&glean, false); + assert!(task.is_upload()); + } + + // Verify that after all requests are returned, none are left + assert_eq!( + upload_manager.get_upload_task(&glean, false), + PingUploadTask::Done + ); + } + + #[test] + fn processes_correctly_success_upload_response() { + let (mut glean, dir) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, /* send_if_empty */ true, vec![]); + glean.register_ping_type(&ping_type); + + // Submit a ping + glean.submit_ping(&ping_type, None).unwrap(); + + // Get the pending ping directory path + let pending_pings_dir = dir.path().join(PENDING_PINGS_DIRECTORY); + + // Get the submitted PingRequest + match glean.get_upload_task() { + PingUploadTask::Upload(request) => { + // Simulate the processing of a sucessfull request + let document_id = request.document_id; + glean.process_ping_upload_response(&document_id, HttpStatus(200)); + // Verify file was deleted + assert!(!pending_pings_dir.join(document_id).exists()); + } + _ => panic!("Expected upload manager to return the next request!"), + } + + // Verify that after request is returned, none are left + assert_eq!(glean.get_upload_task(), PingUploadTask::Done); + } + + #[test] + fn processes_correctly_client_error_upload_response() { + let (mut glean, dir) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, /* send_if_empty */ true, vec![]); + glean.register_ping_type(&ping_type); + + // Submit a ping + glean.submit_ping(&ping_type, None).unwrap(); + + // Get the pending ping directory path + let pending_pings_dir = dir.path().join(PENDING_PINGS_DIRECTORY); + + // Get the submitted PingRequest + match glean.get_upload_task() { + PingUploadTask::Upload(request) => { + // Simulate the processing of a client error + let document_id = request.document_id; + glean.process_ping_upload_response(&document_id, HttpStatus(404)); + // Verify file was deleted + assert!(!pending_pings_dir.join(document_id).exists()); + } + _ => panic!("Expected upload manager to return the next request!"), + } + + // Verify that after request is returned, none are left + assert_eq!(glean.get_upload_task(), PingUploadTask::Done); + } + + #[test] + fn processes_correctly_server_error_upload_response() { + let (mut glean, _) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, /* send_if_empty */ true, vec![]); + glean.register_ping_type(&ping_type); + + // Submit a ping + glean.submit_ping(&ping_type, None).unwrap(); + + // Get the submitted PingRequest + match glean.get_upload_task() { + PingUploadTask::Upload(request) => { + // Simulate the processing of a client error + let document_id = request.document_id; + glean.process_ping_upload_response(&document_id, HttpStatus(500)); + // Verify this ping was indeed re-enqueued + match glean.get_upload_task() { + PingUploadTask::Upload(request) => { + assert_eq!(document_id, request.document_id); + } + _ => panic!("Expected upload manager to return the next request!"), + } + } + _ => panic!("Expected upload manager to return the next request!"), + } + + // Verify that after request is returned, none are left + assert_eq!(glean.get_upload_task(), PingUploadTask::Done); + } + + #[test] + fn processes_correctly_unrecoverable_upload_response() { + let (mut glean, dir) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, /* send_if_empty */ true, vec![]); + glean.register_ping_type(&ping_type); + + // Submit a ping + glean.submit_ping(&ping_type, None).unwrap(); + + // Get the pending ping directory path + let pending_pings_dir = dir.path().join(PENDING_PINGS_DIRECTORY); + + // Get the submitted PingRequest + match glean.get_upload_task() { + PingUploadTask::Upload(request) => { + // Simulate the processing of a client error + let document_id = request.document_id; + glean.process_ping_upload_response(&document_id, UnrecoverableFailure); + // Verify file was deleted + assert!(!pending_pings_dir.join(document_id).exists()); + } + _ => panic!("Expected upload manager to return the next request!"), + } + + // Verify that after request is returned, none are left + assert_eq!(glean.get_upload_task(), PingUploadTask::Done); + } + + #[test] + fn new_pings_are_added_while_upload_in_progress() { + let (glean, dir) = new_glean(None); + + let upload_manager = PingUploadManager::no_policy(dir.path()); + + let doc1 = Uuid::new_v4().to_string(); + let path1 = format!("/submit/app_id/test-ping/1/{}", doc1); + + let doc2 = Uuid::new_v4().to_string(); + let path2 = format!("/submit/app_id/test-ping/1/{}", doc2); + + // Enqueue a ping + upload_manager.enqueue_ping(&glean, &doc1, &path1, "", None); + + // Try and get the first request. + let req = match upload_manager.get_upload_task(&glean, false) { + PingUploadTask::Upload(req) => req, + _ => panic!("Expected upload manager to return the next request!"), + }; + assert_eq!(doc1, req.document_id); + + // Schedule the next one while the first one is "in progress" + upload_manager.enqueue_ping(&glean, &doc2, &path2, "", None); + + // Mark as processed + upload_manager.process_ping_upload_response(&glean, &req.document_id, HttpStatus(200)); + + // Get the second request. + let req = match upload_manager.get_upload_task(&glean, false) { + PingUploadTask::Upload(req) => req, + _ => panic!("Expected upload manager to return the next request!"), + }; + assert_eq!(doc2, req.document_id); + + // Mark as processed + upload_manager.process_ping_upload_response(&glean, &req.document_id, HttpStatus(200)); + + // ... and then we're done. + assert_eq!( + upload_manager.get_upload_task(&glean, false), + PingUploadTask::Done + ); + } + + #[test] + fn adds_debug_view_header_to_requests_when_tag_is_set() { + let (mut glean, _) = new_glean(None); + + glean.set_debug_view_tag("valid-tag"); + + // Register a ping for testing + let ping_type = PingType::new("test", true, /* send_if_empty */ true, vec![]); + glean.register_ping_type(&ping_type); + + // Submit a ping + glean.submit_ping(&ping_type, None).unwrap(); + + // Get the submitted PingRequest + match glean.get_upload_task() { + PingUploadTask::Upload(request) => { + assert_eq!(request.headers.get("X-Debug-ID").unwrap(), "valid-tag") + } + _ => panic!("Expected upload manager to return the next request!"), + } + } + + #[test] + fn duplicates_are_not_enqueued() { + let (glean, dir) = new_glean(None); + + // Create a new upload manager so that we have access to its functions directly, + // make it synchronous so we don't have to manually wait for the scanning to finish. + let upload_manager = PingUploadManager::no_policy(dir.path()); + + let doc_id = Uuid::new_v4().to_string(); + let path = format!("/submit/app_id/test-ping/1/{}", doc_id); + + // Try to enqueue a ping with the same doc_id twice + upload_manager.enqueue_ping(&glean, &doc_id, &path, "", None); + upload_manager.enqueue_ping(&glean, &doc_id, &path, "", None); + + // Get a task once + let task = upload_manager.get_upload_task(&glean, false); + assert!(task.is_upload()); + + // There should be no more queued tasks + assert_eq!( + upload_manager.get_upload_task(&glean, false), + PingUploadTask::Done + ); + } + + #[test] + fn maximum_of_recoverable_errors_is_enforced_for_uploading_window() { + let (mut glean, dir) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, /* send_if_empty */ true, vec![]); + glean.register_ping_type(&ping_type); + + // Submit the ping multiple times + let n = 5; + for _ in 0..n { + glean.submit_ping(&ping_type, None).unwrap(); + } + + let mut upload_manager = PingUploadManager::no_policy(dir.path()); + + // Set a policy for max recoverable failures, this is usually disabled for tests. + let max_recoverable_failures = 3; + upload_manager + .policy + .set_max_recoverable_failures(Some(max_recoverable_failures)); + + // Return the max recoverable error failures in a row + for _ in 0..max_recoverable_failures { + match upload_manager.get_upload_task(&glean, false) { + PingUploadTask::Upload(req) => upload_manager.process_ping_upload_response( + &glean, + &req.document_id, + RecoverableFailure, + ), + _ => panic!("Expected upload manager to return the next request!"), + } + } + + // Verify that after returning the max amount of recoverable failures, + // we are done even though we haven't gotten all the enqueued requests. + assert_eq!( + upload_manager.get_upload_task(&glean, false), + PingUploadTask::Done + ); + + // Verify all requests are returned when we try again. + for _ in 0..n { + let task = upload_manager.get_upload_task(&glean, false); + assert!(task.is_upload()); + } + } + + #[test] + fn quota_is_enforced_when_enqueueing_cached_pings() { + let (mut glean, dir) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, /* send_if_empty */ true, vec![]); + glean.register_ping_type(&ping_type); + + // Submit the ping multiple times + let n = 10; + for _ in 0..n { + glean.submit_ping(&ping_type, None).unwrap(); + } + + let directory_manager = PingDirectoryManager::new(dir.path()); + let pending_pings = directory_manager.process_dirs().pending_pings; + // The pending pings array is sorted by date in ascending order, + // the newest element is the last one. + let (_, newest_ping) = &pending_pings.last().unwrap(); + let (newest_ping_id, _, _, _) = &newest_ping; + + // Create a new upload manager pointing to the same data_path as the glean instance. + let mut upload_manager = PingUploadManager::no_policy(dir.path()); + + // Set the quota to just a little over the size on an empty ping file. + // This way we can check that one ping is kept and all others are deleted. + // + // From manual testing I figured out an empty ping file is 324bytes, + // I am setting this a little over just so that minor changes to the ping structure + // don't immediatelly break this. + upload_manager + .policy + .set_max_pending_pings_directory_size(Some(500)); + + // Get a task once + // One ping should have been enqueued. + // Make sure it is the newest ping. + match upload_manager.get_upload_task(&glean, false) { + PingUploadTask::Upload(request) => assert_eq!(&request.document_id, newest_ping_id), + _ => panic!("Expected upload manager to return the next request!"), + } + + // Verify that no other requests were returned, + // they should all have been deleted because pending pings quota was hit. + assert_eq!( + upload_manager.get_upload_task(&glean, false), + PingUploadTask::Done + ); + + // Verify that the correct number of deleted pings was recorded + assert_eq!( + n - 1, + upload_manager + .upload_metrics + .deleted_pings_after_quota_hit + .test_get_value(&glean, "metrics") + .unwrap() + ); + assert_eq!( + n as i32, + upload_manager + .upload_metrics + .pending_pings + .test_get_value(&glean, "metrics") + .unwrap() + ); + } + + #[test] + fn number_quota_is_enforced_when_enqueueing_cached_pings() { + let (mut glean, dir) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, /* send_if_empty */ true, vec![]); + glean.register_ping_type(&ping_type); + + // How many pings we allow at maximum + let count_quota = 3; + // The number of pings we fill the pending pings directory with. + let n = 10; + + // Submit the ping multiple times + for _ in 0..n { + glean.submit_ping(&ping_type, None).unwrap(); + } + + let directory_manager = PingDirectoryManager::new(dir.path()); + let pending_pings = directory_manager.process_dirs().pending_pings; + // The pending pings array is sorted by date in ascending order, + // the newest element is the last one. + let expected_pings = pending_pings + .iter() + .rev() + .take(count_quota) + .map(|(_, ping)| ping.0.clone()) + .collect::<Vec<_>>(); + + // Create a new upload manager pointing to the same data_path as the glean instance. + let mut upload_manager = PingUploadManager::no_policy(dir.path()); + + upload_manager + .policy + .set_max_pending_pings_count(Some(count_quota as u64)); + + // Get a task once + // One ping should have been enqueued. + // Make sure it is the newest ping. + for ping_id in expected_pings.iter().rev() { + match upload_manager.get_upload_task(&glean, false) { + PingUploadTask::Upload(request) => assert_eq!(&request.document_id, ping_id), + _ => panic!("Expected upload manager to return the next request!"), + } + } + + // Verify that no other requests were returned, + // they should all have been deleted because pending pings quota was hit. + assert_eq!( + upload_manager.get_upload_task(&glean, false), + PingUploadTask::Done + ); + + // Verify that the correct number of deleted pings was recorded + assert_eq!( + (n - count_quota) as i32, + upload_manager + .upload_metrics + .deleted_pings_after_quota_hit + .test_get_value(&glean, "metrics") + .unwrap() + ); + assert_eq!( + n as i32, + upload_manager + .upload_metrics + .pending_pings + .test_get_value(&glean, "metrics") + .unwrap() + ); + } + + #[test] + fn size_and_count_quota_work_together_size_first() { + let (mut glean, dir) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, /* send_if_empty */ true, vec![]); + glean.register_ping_type(&ping_type); + + let expected_number_of_pings = 3; + // The number of pings we fill the pending pings directory with. + let n = 10; + + // Submit the ping multiple times + for _ in 0..n { + glean.submit_ping(&ping_type, None).unwrap(); + } + + let directory_manager = PingDirectoryManager::new(dir.path()); + let pending_pings = directory_manager.process_dirs().pending_pings; + // The pending pings array is sorted by date in ascending order, + // the newest element is the last one. + let expected_pings = pending_pings + .iter() + .rev() + .take(expected_number_of_pings) + .map(|(_, ping)| ping.0.clone()) + .collect::<Vec<_>>(); + + // Create a new upload manager pointing to the same data_path as the glean instance. + let mut upload_manager = PingUploadManager::no_policy(dir.path()); + + // From manual testing we figured out an empty ping file is 324bytes, + // so this allows 3 pings. + upload_manager + .policy + .set_max_pending_pings_directory_size(Some(1000)); + upload_manager.policy.set_max_pending_pings_count(Some(5)); + + // Get a task once + // One ping should have been enqueued. + // Make sure it is the newest ping. + for ping_id in expected_pings.iter().rev() { + match upload_manager.get_upload_task(&glean, false) { + PingUploadTask::Upload(request) => assert_eq!(&request.document_id, ping_id), + _ => panic!("Expected upload manager to return the next request!"), + } + } + + // Verify that no other requests were returned, + // they should all have been deleted because pending pings quota was hit. + assert_eq!( + upload_manager.get_upload_task(&glean, false), + PingUploadTask::Done + ); + + // Verify that the correct number of deleted pings was recorded + assert_eq!( + (n - expected_number_of_pings) as i32, + upload_manager + .upload_metrics + .deleted_pings_after_quota_hit + .test_get_value(&glean, "metrics") + .unwrap() + ); + assert_eq!( + n as i32, + upload_manager + .upload_metrics + .pending_pings + .test_get_value(&glean, "metrics") + .unwrap() + ); + } + + #[test] + fn size_and_count_quota_work_together_count_first() { + let (mut glean, dir) = new_glean(None); + + // Register a ping for testing + let ping_type = PingType::new("test", true, /* send_if_empty */ true, vec![]); + glean.register_ping_type(&ping_type); + + let expected_number_of_pings = 2; + // The number of pings we fill the pending pings directory with. + let n = 10; + + // Submit the ping multiple times + for _ in 0..n { + glean.submit_ping(&ping_type, None).unwrap(); + } + + let directory_manager = PingDirectoryManager::new(dir.path()); + let pending_pings = directory_manager.process_dirs().pending_pings; + // The pending pings array is sorted by date in ascending order, + // the newest element is the last one. + let expected_pings = pending_pings + .iter() + .rev() + .take(expected_number_of_pings) + .map(|(_, ping)| ping.0.clone()) + .collect::<Vec<_>>(); + + // Create a new upload manager pointing to the same data_path as the glean instance. + let mut upload_manager = PingUploadManager::no_policy(dir.path()); + + // From manual testing we figured out an empty ping file is 324bytes, + // so this allows 3 pings. + upload_manager + .policy + .set_max_pending_pings_directory_size(Some(1000)); + upload_manager.policy.set_max_pending_pings_count(Some(2)); + + // Get a task once + // One ping should have been enqueued. + // Make sure it is the newest ping. + for ping_id in expected_pings.iter().rev() { + match upload_manager.get_upload_task(&glean, false) { + PingUploadTask::Upload(request) => assert_eq!(&request.document_id, ping_id), + _ => panic!("Expected upload manager to return the next request!"), + } + } + + // Verify that no other requests were returned, + // they should all have been deleted because pending pings quota was hit. + assert_eq!( + upload_manager.get_upload_task(&glean, false), + PingUploadTask::Done + ); + + // Verify that the correct number of deleted pings was recorded + assert_eq!( + (n - expected_number_of_pings) as i32, + upload_manager + .upload_metrics + .deleted_pings_after_quota_hit + .test_get_value(&glean, "metrics") + .unwrap() + ); + assert_eq!( + n as i32, + upload_manager + .upload_metrics + .pending_pings + .test_get_value(&glean, "metrics") + .unwrap() + ); + } + + #[test] + fn maximum_wait_attemps_is_enforced() { + let (glean, dir) = new_glean(None); + + let mut upload_manager = PingUploadManager::no_policy(dir.path()); + + // Define a max_wait_attemps policy, this is disabled for tests by default. + let max_wait_attempts = 3; + upload_manager + .policy + .set_max_wait_attempts(Some(max_wait_attempts)); + + // Add a rate limiter to the upload mangager with max of 1 ping 5secs. + // + // We arbitrarily set the maximum pings per interval to a very low number, + // when the rate limiter reaches it's limit get_upload_task returns a PingUploadTask::Wait, + // which will allow us to test the limitations around returning too many of those in a row. + let secs_per_interval = 5; + let max_pings_per_interval = 1; + upload_manager.set_rate_limiter(secs_per_interval, max_pings_per_interval); + + // Enqueue two pings + upload_manager.enqueue_ping(&glean, &Uuid::new_v4().to_string(), PATH, "", None); + upload_manager.enqueue_ping(&glean, &Uuid::new_v4().to_string(), PATH, "", None); + + // Get the first ping, it should be returned normally. + match upload_manager.get_upload_task(&glean, false) { + PingUploadTask::Upload(_) => {} + _ => panic!("Expected upload manager to return the next request!"), + } + + // Try to get the next ping, + // we should be throttled and thus get a PingUploadTask::Wait. + // Check that we are indeed allowed to get this response as many times as expected. + for _ in 0..max_wait_attempts { + let task = upload_manager.get_upload_task(&glean, false); + assert!(task.is_wait()); + } + + // Check that after we get PingUploadTask::Wait the allowed number of times, + // we then get PingUploadTask::Done. + assert_eq!( + upload_manager.get_upload_task(&glean, false), + PingUploadTask::Done + ); + + // Wait for the rate limiter to allow upload tasks again. + thread::sleep(Duration::from_secs(secs_per_interval)); + + // Check that we are allowed again to get pings. + let task = upload_manager.get_upload_task(&glean, false); + assert!(task.is_upload()); + + // And once we are done we don't need to wait anymore. + assert_eq!( + upload_manager.get_upload_task(&glean, false), + PingUploadTask::Done + ); + } + + #[test] + fn wait_task_contains_expected_wait_time_when_pending_pings_dir_not_processed_yet() { + let (glean, dir) = new_glean(None); + let upload_manager = PingUploadManager::new(dir.path(), "test"); + match upload_manager.get_upload_task(&glean, false) { + PingUploadTask::Wait(time) => { + assert_eq!(time, WAIT_TIME_FOR_PING_PROCESSING); + } + _ => panic!("Expected upload manager to return a wait task!"), + }; + } +} diff --git a/third_party/rust/glean-core/src/upload/policy.rs b/third_party/rust/glean-core/src/upload/policy.rs new file mode 100644 index 0000000000..91467ebd82 --- /dev/null +++ b/third_party/rust/glean-core/src/upload/policy.rs @@ -0,0 +1,112 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Policies for ping storage, uploading and requests. + +const MAX_RECOVERABLE_FAILURES: u32 = 3; +const MAX_WAIT_ATTEMPTS: u32 = 3; +const MAX_PING_BODY_SIZE: usize = 1024 * 1024; // 1 MB +const MAX_PENDING_PINGS_DIRECTORY_SIZE: u64 = 10 * 1024 * 1024; // 10MB + +// The average number of baseline pings per client (on Fenix) is at 15 pings a day. +// The P99 value is ~110. +// With a maximum of (a nice round) 250 we can store about 2 days worth of pings. +// A baseline ping file averages about 600 bytes, so that's a total of just 144 kB we store. +// With the default rate limit of 15 pings per 60s it would take roughly 16 minutes to send out all pending +// pings. +const MAX_PENDING_PINGS_COUNT: u64 = 250; + +/// A struct holding the values for all the policies related to ping storage, uploading and requests. +#[derive(Debug)] +pub struct Policy { + /// The maximum recoverable failures allowed per uploading window. + /// + /// Limiting this is necessary to avoid infinite loops on requesting upload tasks. + max_recoverable_failures: Option<u32>, + /// The maximum of [`PingUploadTask::Wait`] responses a user may get in a row + /// when calling [`get_upload_task`]. + /// + /// Limiting this is necessary to avoid infinite loops on requesting upload tasks. + max_wait_attempts: Option<u32>, + /// The maximum size in bytes a ping body may have to be eligible for upload. + max_ping_body_size: Option<usize>, + /// The maximum size in byte the pending pings directory may have on disk. + max_pending_pings_directory_size: Option<u64>, + /// The maximum number of pending pings on disk. + max_pending_pings_count: Option<u64>, +} + +impl Default for Policy { + fn default() -> Self { + Policy { + max_recoverable_failures: Some(MAX_RECOVERABLE_FAILURES), + max_wait_attempts: Some(MAX_WAIT_ATTEMPTS), + max_ping_body_size: Some(MAX_PING_BODY_SIZE), + max_pending_pings_directory_size: Some(MAX_PENDING_PINGS_DIRECTORY_SIZE), + max_pending_pings_count: Some(MAX_PENDING_PINGS_COUNT), + } + } +} + +impl Policy { + pub fn max_recoverable_failures(&self) -> u32 { + match &self.max_recoverable_failures { + Some(v) => *v, + None => u32::MAX, + } + } + + #[cfg(test)] + pub fn set_max_recoverable_failures(&mut self, v: Option<u32>) { + self.max_recoverable_failures = v; + } + + pub fn max_wait_attempts(&self) -> u32 { + match &self.max_wait_attempts { + Some(v) => *v, + None => u32::MAX, + } + } + + #[cfg(test)] + pub fn set_max_wait_attempts(&mut self, v: Option<u32>) { + self.max_wait_attempts = v; + } + + pub fn max_ping_body_size(&self) -> usize { + match &self.max_ping_body_size { + Some(v) => *v, + None => usize::MAX, + } + } + + #[cfg(test)] + pub fn set_max_ping_body_size(&mut self, v: Option<usize>) { + self.max_ping_body_size = v; + } + + pub fn max_pending_pings_directory_size(&self) -> u64 { + match &self.max_pending_pings_directory_size { + Some(v) => *v, + None => u64::MAX, + } + } + + pub fn max_pending_pings_count(&self) -> u64 { + match &self.max_pending_pings_count { + Some(v) => *v, + None => u64::MAX, + } + } + + #[cfg(test)] + pub fn set_max_pending_pings_directory_size(&mut self, v: Option<u64>) { + self.max_pending_pings_directory_size = v; + } + + #[cfg(test)] + pub fn set_max_pending_pings_count(&mut self, v: Option<u64>) { + self.max_pending_pings_count = v; + } +} diff --git a/third_party/rust/glean-core/src/upload/request.rs b/third_party/rust/glean-core/src/upload/request.rs new file mode 100644 index 0000000000..332994de53 --- /dev/null +++ b/third_party/rust/glean-core/src/upload/request.rs @@ -0,0 +1,291 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Ping request representation. + +use std::collections::HashMap; + +use chrono::prelude::{DateTime, Utc}; +use flate2::{read::GzDecoder, write::GzEncoder, Compression}; +use serde_json::{self, Value as JsonValue}; +use std::io::prelude::*; + +use crate::error::{ErrorKind, Result}; +use crate::system; + +/// A representation for request headers. +pub type HeaderMap = HashMap<String, String>; + +/// Creates a formatted date string that can be used with Date headers. +fn create_date_header_value(current_time: DateTime<Utc>) -> String { + // Date headers are required to be in the following format: + // + // <day-name>, <day> <month> <year> <hour>:<minute>:<second> GMT + // + // as documented here: + // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Date + // Unfortunately we can't use `current_time.to_rfc2822()` as it + // formats as "Mon, 22 Jun 2020 10:40:34 +0000", with an ending + // "+0000" instead of "GMT". That's why we need to go with manual + // formatting. + current_time.format("%a, %d %b %Y %T GMT").to_string() +} + +fn create_user_agent_header_value( + version: &str, + language_binding_name: &str, + system: &str, +) -> String { + format!( + "Glean/{} ({} on {})", + version, language_binding_name, system + ) +} + +/// Attempt to gzip the contents of a ping. +fn gzip_content(path: &str, content: &[u8]) -> Option<Vec<u8>> { + let mut gzipper = GzEncoder::new(Vec::new(), Compression::default()); + + // Attempt to add the content to the gzipper. + if let Err(e) = gzipper.write_all(content) { + log::warn!("Failed to write to the gzipper: {} - {:?}", path, e); + return None; + } + + gzipper.finish().ok() +} + +pub struct Builder { + document_id: Option<String>, + path: Option<String>, + body: Option<Vec<u8>>, + headers: HeaderMap, + body_max_size: usize, +} + +impl Builder { + /// Creates a new builder for a PingRequest. + pub fn new(language_binding_name: &str, body_max_size: usize) -> Self { + let mut headers = HashMap::new(); + headers.insert("Date".to_string(), create_date_header_value(Utc::now())); + headers.insert( + "User-Agent".to_string(), + create_user_agent_header_value(crate::GLEAN_VERSION, language_binding_name, system::OS), + ); + headers.insert( + "Content-Type".to_string(), + "application/json; charset=utf-8".to_string(), + ); + headers.insert("X-Client-Type".to_string(), "Glean".to_string()); + headers.insert( + "X-Client-Version".to_string(), + crate::GLEAN_VERSION.to_string(), + ); + + Self { + document_id: None, + path: None, + body: None, + headers, + body_max_size, + } + } + + /// Sets the document_id for this request. + pub fn document_id<S: Into<String>>(mut self, value: S) -> Self { + self.document_id = Some(value.into()); + self + } + + /// Sets the path for this request. + pub fn path<S: Into<String>>(mut self, value: S) -> Self { + self.path = Some(value.into()); + self + } + + /// Sets the body for this request. + /// + /// This method will also attempt to gzip the body contents + /// and add headers related to the body that was just added. + /// + /// Namely these headers are the "Content-Length" with the length of the body + /// and in case we are successfull on gzipping the contents, the "Content-Encoding"="gzip". + /// + /// **Important** + /// If we are unable to gzip we don't panic and instead just set the uncompressed body. + /// + /// # Panics + /// + /// This method will panic in case we try to set the body before setting the path. + pub fn body<S: Into<String>>(mut self, value: S) -> Self { + // Attempt to gzip the body contents. + let original_as_string = value.into(); + let gzipped_content = gzip_content( + self.path + .as_ref() + .expect("Path must be set before attempting to set the body"), + original_as_string.as_bytes(), + ); + let add_gzip_header = gzipped_content.is_some(); + let body = gzipped_content.unwrap_or_else(|| original_as_string.into_bytes()); + + // Include headers related to body + self = self.header("Content-Length", &body.len().to_string()); + if add_gzip_header { + self = self.header("Content-Encoding", "gzip"); + } + + self.body = Some(body); + self + } + + /// Sets a header for this request. + pub fn header<S: Into<String>>(mut self, key: S, value: S) -> Self { + self.headers.insert(key.into(), value.into()); + self + } + + /// Sets multiple headers for this request at once. + pub fn headers(mut self, values: HeaderMap) -> Self { + self.headers.extend(values); + self + } + + /// Consumes the builder and create a PingRequest. + /// + /// # Panics + /// + /// This method will panic if any of the required fields are missing: + /// `document_id`, `path` and `body`. + pub fn build(self) -> Result<PingRequest> { + let body = self + .body + .expect("body must be set before attempting to build PingRequest"); + + if body.len() > self.body_max_size { + return Err(ErrorKind::PingBodyOverflow(body.len()).into()); + } + + Ok(PingRequest { + document_id: self + .document_id + .expect("document_id must be set before attempting to build PingRequest"), + path: self + .path + .expect("path must be set before attempting to build PingRequest"), + body, + headers: self.headers, + }) + } +} + +/// Represents a request to upload a ping. +#[derive(PartialEq, Debug, Clone)] +pub struct PingRequest { + /// The Job ID to identify this request, + /// this is the same as the ping UUID. + pub document_id: String, + /// The path for the server to upload the ping to. + pub path: String, + /// The body of the request, as a byte array. If gzip encoded, then + /// the `headers` list will contain a `Content-Encoding` header with + /// the value `gzip`. + pub body: Vec<u8>, + /// A map with all the headers to be sent with the request. + pub headers: HeaderMap, +} + +impl PingRequest { + /// Creates a new builder-style structure to help build a PingRequest. + /// + /// # Arguments + /// + /// * `language_binding_name` - The name of the language used by the binding that instantiated this Glean instance. + /// This is used to build the User-Agent header value. + /// * `body_max_size` - The maximum size in bytes the compressed ping body may have to be eligible for upload. + pub fn builder(language_binding_name: &str, body_max_size: usize) -> Builder { + Builder::new(language_binding_name, body_max_size) + } + + /// Verifies if current request is for a deletion-request ping. + pub fn is_deletion_request(&self) -> bool { + // The path format should be `/submit/<app_id>/<ping_name>/<schema_version/<doc_id>` + self.path + .split('/') + .nth(3) + .map(|url| url == "deletion-request") + .unwrap_or(false) + } + + /// Decompresses and pretty-format the ping payload + /// + /// Should be used for logging when required. + /// This decompresses the payload in memory. + pub fn pretty_body(&self) -> Option<String> { + let mut gz = GzDecoder::new(&self.body[..]); + let mut s = String::with_capacity(self.body.len()); + + gz.read_to_string(&mut s) + .ok() + .map(|_| &s[..]) + .or_else(|| std::str::from_utf8(&self.body).ok()) + .and_then(|payload| serde_json::from_str::<JsonValue>(payload).ok()) + .and_then(|json| serde_json::to_string_pretty(&json).ok()) + } +} + +#[cfg(test)] +mod test { + use super::*; + use chrono::offset::TimeZone; + + #[test] + fn date_header_resolution() { + let date: DateTime<Utc> = Utc.ymd(2018, 2, 25).and_hms(11, 10, 37); + let test_value = create_date_header_value(date); + assert_eq!("Sun, 25 Feb 2018 11:10:37 GMT", test_value); + } + + #[test] + fn user_agent_header_resolution() { + let test_value = create_user_agent_header_value("0.0.0", "Rust", "Windows"); + assert_eq!("Glean/0.0.0 (Rust on Windows)", test_value); + } + + #[test] + fn correctly_builds_ping_request() { + let request = PingRequest::builder(/* language_binding_name */ "Rust", 1024 * 1024) + .document_id("woop") + .path("/random/path/doesnt/matter") + .body("{}") + .build() + .unwrap(); + + assert_eq!(request.document_id, "woop"); + assert_eq!(request.path, "/random/path/doesnt/matter"); + + // Make sure all the expected headers were added. + assert!(request.headers.contains_key("Date")); + assert!(request.headers.contains_key("User-Agent")); + assert!(request.headers.contains_key("Content-Type")); + assert!(request.headers.contains_key("X-Client-Type")); + assert!(request.headers.contains_key("X-Client-Version")); + assert!(request.headers.contains_key("Content-Length")); + } + + #[test] + fn errors_when_request_body_exceeds_max_size() { + // Create a new builder with an arbitrarily small value, + // se we can test that the builder errors when body max size exceeds the expected. + let request = Builder::new( + /* language_binding_name */ "Rust", /* body_max_size */ 1, + ) + .document_id("woop") + .path("/random/path/doesnt/matter") + .body("{}") + .build(); + + assert!(request.is_err()); + } +} diff --git a/third_party/rust/glean-core/src/upload/result.rs b/third_party/rust/glean-core/src/upload/result.rs new file mode 100644 index 0000000000..79c5acd723 --- /dev/null +++ b/third_party/rust/glean-core/src/upload/result.rs @@ -0,0 +1,83 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +/// Result values of attempted ping uploads encoded for FFI use. +/// +/// In a perfect world this would live in `glean-ffi`, +/// but because we also want to convert from pure integer values to a proper Rust enum +/// using Rust's `From` and `Into` trait, we need to have it in this crate +/// (The coherence rules don't allow to implement an external trait for an external type). +/// +/// Due to restrictions of cbindgen they are re-defined in `glean-core/ffi/src/upload.rs`. +/// +/// NOTE: +/// THEY MUST BE THE SAME ACROSS BOTH FILES! +pub mod ffi_upload_result { + /// A recoverable error. + pub const UPLOAD_RESULT_RECOVERABLE: u32 = 0x1; + + /// An unrecoverable error. + pub const UPLOAD_RESULT_UNRECOVERABLE: u32 = 0x2; + + /// A HTTP response code. + /// + /// The actual response code is encoded in the lower bits. + pub const UPLOAD_RESULT_HTTP_STATUS: u32 = 0x8000; +} +use ffi_upload_result::*; + +/// The result of an attempted ping upload. +#[derive(Debug)] +pub enum UploadResult { + /// A recoverable failure. + /// + /// During upload something went wrong, + /// e.g. the network connection failed. + /// The upload should be retried at a later time. + RecoverableFailure, + + /// An unrecoverable upload failure. + /// + /// A possible cause might be a malformed URL. + UnrecoverableFailure, + + /// A HTTP response code. + /// + /// This can still indicate an error, depending on the status code. + HttpStatus(u32), +} + +impl From<u32> for UploadResult { + fn from(status: u32) -> Self { + match status { + status if (status & UPLOAD_RESULT_HTTP_STATUS) == UPLOAD_RESULT_HTTP_STATUS => { + // Extract the status code from the lower bits. + let http_status = status & !UPLOAD_RESULT_HTTP_STATUS; + UploadResult::HttpStatus(http_status) + } + UPLOAD_RESULT_RECOVERABLE => UploadResult::RecoverableFailure, + UPLOAD_RESULT_UNRECOVERABLE => UploadResult::UnrecoverableFailure, + + // Any unknown result code is treated as unrecoverable. + _ => UploadResult::UnrecoverableFailure, + } + } +} + +impl UploadResult { + /// Gets the label to be used in recording error counts for upload. + /// + /// Returns `None` if the upload finished succesfully. + /// Failures are recorded in the `ping_upload_failure` metric. + pub fn get_label(&self) -> Option<&str> { + match self { + UploadResult::HttpStatus(200..=299) => None, + UploadResult::HttpStatus(400..=499) => Some("status_code_4xx"), + UploadResult::HttpStatus(500..=599) => Some("status_code_5xx"), + UploadResult::HttpStatus(_) => Some("status_code_unknown"), + UploadResult::UnrecoverableFailure => Some("unrecoverable"), + UploadResult::RecoverableFailure => Some("recoverable"), + } + } +} diff --git a/third_party/rust/glean-core/src/util.rs b/third_party/rust/glean-core/src/util.rs new file mode 100644 index 0000000000..28c6028ae8 --- /dev/null +++ b/third_party/rust/glean-core/src/util.rs @@ -0,0 +1,273 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use chrono::{DateTime, FixedOffset, Local}; + +use crate::error_recording::{record_error, ErrorType}; +use crate::metrics::TimeUnit; +use crate::CommonMetricData; +use crate::Glean; + +/// Generates a pipeline-friendly string +/// that replaces non alphanumeric characters with dashes. +pub fn sanitize_application_id(application_id: &str) -> String { + let mut last_dash = false; + application_id + .chars() + .filter_map(|x| match x { + 'A'..='Z' | 'a'..='z' | '0'..='9' => { + last_dash = false; + Some(x.to_ascii_lowercase()) + } + _ => { + let result = if last_dash { None } else { Some('-') }; + last_dash = true; + result + } + }) + .collect() +} + +/// Generates an ISO8601 compliant date/time string for the given time, +/// truncating it to the provided [`TimeUnit`]. +/// +/// # Arguments +/// +/// * `datetime` - the [`DateTime`] object that holds the date, time and timezone information. +/// * `truncate_to` - the desired resolution to use for the output string. +/// +/// # Returns +/// +/// A string representing the provided date/time truncated to the requested time unit. +pub fn get_iso_time_string(datetime: DateTime<FixedOffset>, truncate_to: TimeUnit) -> String { + datetime.format(truncate_to.format_pattern()).to_string() +} + +/// Get the current date & time with a fixed-offset timezone. +/// +/// This converts from the `Local` timezone into its fixed-offset equivalent. +pub(crate) fn local_now_with_offset() -> DateTime<FixedOffset> { + let now: DateTime<Local> = Local::now(); + now.with_timezone(now.offset()) +} + +/// Truncates a string, ensuring that it doesn't end in the middle of a codepoint. +/// +/// # Arguments +/// +/// * `value` - The string to truncate. +/// * `length` - The length, in bytes, to truncate to. The resulting string will +/// be at most this many bytes, but may be shorter to prevent ending in the middle +/// of a codepoint. +/// +/// # Returns +/// +/// A string, with at most `length` bytes. +pub(crate) fn truncate_string_at_boundary<S: Into<String>>(value: S, length: usize) -> String { + let s = value.into(); + if s.len() > length { + for i in (0..=length).rev() { + if s.is_char_boundary(i) { + return s[0..i].to_string(); + } + } + // If we never saw a character boundary, the safest thing we can do is + // return the empty string, though this should never happen in practice. + return "".to_string(); + } + s +} + +/// Truncates a string, ensuring that it doesn't end in the middle of a codepoint. +/// If the string required truncation, records an error through the error +/// reporting mechanism. +/// +/// # Arguments +/// +/// * `glean` - The Glean instance the metric doing the truncation belongs to. +/// * `meta` - The metadata for the metric. Used for recording the error. +/// * `value` - The String to truncate. +/// * `length` - The length, in bytes, to truncate to. The resulting string will +/// be at most this many bytes, but may be shorter to prevent ending in the middle +/// of a codepoint. +/// +/// # Returns +/// +/// A string, with at most `length` bytes. +pub(crate) fn truncate_string_at_boundary_with_error<S: Into<String>>( + glean: &Glean, + meta: &CommonMetricData, + value: S, + length: usize, +) -> String { + let s = value.into(); + if s.len() > length { + let msg = format!("Value length {} exceeds maximum of {}", s.len(), length); + record_error(glean, meta, ErrorType::InvalidOverflow, msg, None); + truncate_string_at_boundary(s, length) + } else { + s + } +} + +// On i686 on Windows, the CPython interpreter sets the FPU precision control +// flag to 53 bits of precision, rather than the 64 bit default. On x86_64 on +// Windows, the CPython interpreter changes the rounding control settings. This +// causes different floating point results than on other architectures. This +// context manager makes it easy to set the correct precision and rounding control +// to match our other targets and platforms. +// +// See https://bugzilla.mozilla.org/show_bug.cgi?id=1623335 for additional context. +#[cfg(all(target_os = "windows", target_env = "gnu"))] +pub mod floating_point_context { + // `size_t` is "pointer size", which is equivalent to Rust's `usize`. + // It's defined as such in libc: + // * https://github.com/rust-lang/libc/blob/bcbfeb5516cd5bb055198dbfbddf8d626fa2be07/src/unix/mod.rs#L19 + // * https://github.com/rust-lang/libc/blob/bcbfeb5516cd5bb055198dbfbddf8d626fa2be07/src/windows/mod.rs#L16 + #[allow(non_camel_case_types)] + type size_t = usize; + + #[link(name = "m")] + extern "C" { + // Gets and sets the floating point control word. + // See documentation here: + // https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/controlfp-s + fn _controlfp_s(current: *mut size_t, new: size_t, mask: size_t) -> size_t; + } + + // Rounding control mask + const MCW_RC: size_t = 0x00000300; + // Round by truncation + const RC_CHOP: size_t = 0x00000300; + // Precision control mask + const MCW_PC: size_t = 0x00030000; + // Values for 64-bit precision + const PC_64: size_t = 0x00000000; + + pub struct FloatingPointContext { + original_value: size_t, + } + + impl FloatingPointContext { + pub fn new() -> Self { + let mut current: size_t = 0; + let _err = unsafe { _controlfp_s(&mut current, PC_64 | RC_CHOP, MCW_PC | MCW_RC) }; + + FloatingPointContext { + original_value: current, + } + } + } + + impl Drop for FloatingPointContext { + fn drop(&mut self) { + let mut current: size_t = 0; + let _err = unsafe { _controlfp_s(&mut current, self.original_value, MCW_PC | MCW_RC) }; + } + } +} + +#[cfg(not(all(target_os = "windows", target_env = "gnu")))] +pub mod floating_point_context { + pub struct FloatingPointContext {} + + impl FloatingPointContext { + pub fn new() -> Self { + FloatingPointContext {} + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use chrono::offset::TimeZone; + + #[test] + fn test_sanitize_application_id() { + assert_eq!( + "org-mozilla-test-app", + sanitize_application_id("org.mozilla.test-app") + ); + assert_eq!( + "org-mozilla-test-app", + sanitize_application_id("org.mozilla..test---app") + ); + assert_eq!( + "org-mozilla-test-app", + sanitize_application_id("org-mozilla-test-app") + ); + assert_eq!( + "org-mozilla-test-app", + sanitize_application_id("org.mozilla.Test.App") + ); + } + + #[test] + fn test_get_iso_time_string() { + // `1985-07-03T12:09:14.000560274+01:00` + let dt = FixedOffset::east(3600) + .ymd(1985, 7, 3) + .and_hms_nano(12, 9, 14, 1_560_274); + assert_eq!( + "1985-07-03T12:09:14.001560274+01:00", + get_iso_time_string(dt, TimeUnit::Nanosecond) + ); + assert_eq!( + "1985-07-03T12:09:14.001560+01:00", + get_iso_time_string(dt, TimeUnit::Microsecond) + ); + assert_eq!( + "1985-07-03T12:09:14.001+01:00", + get_iso_time_string(dt, TimeUnit::Millisecond) + ); + assert_eq!( + "1985-07-03T12:09:14+01:00", + get_iso_time_string(dt, TimeUnit::Second) + ); + assert_eq!( + "1985-07-03T12:09+01:00", + get_iso_time_string(dt, TimeUnit::Minute) + ); + assert_eq!( + "1985-07-03T12+01:00", + get_iso_time_string(dt, TimeUnit::Hour) + ); + assert_eq!("1985-07-03+01:00", get_iso_time_string(dt, TimeUnit::Day)); + } + + #[test] + fn local_now_gets_the_time() { + let now = Local::now(); + let fixed_now = local_now_with_offset(); + + // We can't compare across differing timezones, so we just compare the UTC timestamps. + // The second timestamp should be just a few nanoseconds later. + assert!( + fixed_now.naive_utc() >= now.naive_utc(), + "Time mismatch. Local now: {:?}, Fixed now: {:?}", + now, + fixed_now + ); + } + + #[test] + fn truncate_safely_test() { + let value = "电脑坏了".to_string(); + let truncated = truncate_string_at_boundary(value, 10); + assert_eq!("电脑坏", truncated); + + let value = "0123456789abcdef".to_string(); + let truncated = truncate_string_at_boundary(value, 10); + assert_eq!("0123456789", truncated); + } + + #[test] + #[should_panic] + fn truncate_naive() { + // Ensure that truncating the naïve way on this string would panic + let value = "电脑坏了".to_string(); + value[0..10].to_string(); + } +} diff --git a/third_party/rust/glean-core/tests/boolean.rs b/third_party/rust/glean-core/tests/boolean.rs new file mode 100644 index 0000000000..c640048d6e --- /dev/null +++ b/third_party/rust/glean-core/tests/boolean.rs @@ -0,0 +1,91 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +mod common; +use crate::common::*; + +use serde_json::json; + +use glean_core::metrics::*; +use glean_core::storage::StorageManager; +use glean_core::{CommonMetricData, Lifetime}; + +// SKIPPED from glean-ac: string deserializer should correctly parse integers +// This test doesn't really apply to rkv + +#[test] +fn boolean_serializer_should_correctly_serialize_boolean() { + let (mut tempdir, _) = tempdir(); + + { + // We give tempdir to the `new_glean` function... + let (glean, dir) = new_glean(Some(tempdir)); + // And then we get it back once that function returns. + tempdir = dir; + + let metric = BooleanMetric::new(CommonMetricData { + name: "boolean_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::User, + ..Default::default() + }); + + metric.set(&glean, true); + + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + assert_eq!( + json!({"boolean": {"telemetry.boolean_metric": true}}), + snapshot + ); + } + + // Make a new Glean instance here, which should force reloading of the data from disk + // so we can ensure it persisted, because it has User lifetime + { + let (glean, _t) = new_glean(Some(tempdir)); + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + assert_eq!( + json!({"boolean": {"telemetry.boolean_metric": true}}), + snapshot + ); + } +} + +#[test] +fn set_properly_sets_the_value_in_all_stores() { + let (glean, _t) = new_glean(None); + let store_names: Vec<String> = vec!["store1".into(), "store2".into()]; + + let metric = BooleanMetric::new(CommonMetricData { + name: "boolean_metric".into(), + category: "telemetry".into(), + send_in_pings: store_names.clone(), + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }); + + metric.set(&glean, true); + + // Check that the data was correctly set in each store. + for store_name in store_names { + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), &store_name, true) + .unwrap(); + + assert_eq!( + json!({"boolean": {"telemetry.boolean_metric": true}}), + snapshot + ); + } +} + +// SKIPPED from glean-ac: booleans are serialized in the correct JSON format +// Completely redundant with other tests. diff --git a/third_party/rust/glean-core/tests/common/mod.rs b/third_party/rust/glean-core/tests/common/mod.rs new file mode 100644 index 0000000000..1d96e617ae --- /dev/null +++ b/third_party/rust/glean-core/tests/common/mod.rs @@ -0,0 +1,142 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// #[allow(dead_code)] is required on this module as a workaround for +// https://github.com/rust-lang/rust/issues/46379 +#![allow(dead_code)] +use glean_core::{Glean, Result}; + +use std::fs::{read_dir, File}; +use std::io::{BufRead, BufReader}; +use std::path::Path; + +use chrono::offset::TimeZone; +use iso8601::Date::YMD; +use serde_json::Value as JsonValue; + +use ctor::ctor; + +/// Initialize the logger for all tests without individual tests requiring to call the init code. +/// Log output can be controlled via the environment variable `RUST_LOG` for the `glean_core` crate, +/// e.g.: +/// +/// ``` +/// export RUST_LOG=glean_core=debug +/// ``` +#[ctor] +fn enable_test_logging() { + // When testing we want all logs to go to stdout/stderr by default, + // without requiring each individual test to activate it. + // This only applies to glean-core tests, users of the main library still need to call + // `glean_enable_logging` of the FFI component (automatically done by the platform wrappers). + let _ = env_logger::builder().is_test(true).try_init(); +} + +pub fn tempdir() -> (tempfile::TempDir, String) { + let t = tempfile::tempdir().unwrap(); + let name = t.path().display().to_string(); + (t, name) +} + +pub const GLOBAL_APPLICATION_ID: &str = "org.mozilla.glean.test.app"; + +// Creates a new instance of Glean with a temporary directory. +// We need to keep the `TempDir` alive, so that it's not deleted before we stop using it. +pub fn new_glean(tempdir: Option<tempfile::TempDir>) -> (Glean, tempfile::TempDir) { + let dir = match tempdir { + Some(tempdir) => tempdir, + None => tempfile::tempdir().unwrap(), + }; + let tmpname = dir.path().display().to_string(); + + let cfg = glean_core::Configuration { + data_path: tmpname, + application_id: GLOBAL_APPLICATION_ID.into(), + language_binding_name: "Rust".into(), + upload_enabled: true, + max_events: None, + delay_ping_lifetime_io: false, + }; + let glean = Glean::new(cfg).unwrap(); + + (glean, dir) +} + +/// Converts an iso8601::DateTime to a chrono::DateTime<FixedOffset> +pub fn iso8601_to_chrono(datetime: &iso8601::DateTime) -> chrono::DateTime<chrono::FixedOffset> { + if let YMD { year, month, day } = datetime.date { + return chrono::FixedOffset::east(datetime.time.tz_offset_hours * 3600) + .ymd(year, month, day) + .and_hms_milli( + datetime.time.hour, + datetime.time.minute, + datetime.time.second, + datetime.time.millisecond, + ); + }; + panic!("Unsupported datetime format"); +} + +/// Gets a vector of the currently queued pings. +/// +/// # Arguments +/// +/// * `data_path` - Glean's data path, as returned from Glean::get_data_path() +/// +/// # Returns +/// +/// A vector of all queued pings. +/// +/// Each entry is a pair `(url, json_data, metadata)`, +/// where `url` is the endpoint the ping will go to, `json_data` is the JSON payload +/// and metadata is optional persisted data related to the ping. +pub fn get_queued_pings(data_path: &Path) -> Result<Vec<(String, JsonValue, Option<JsonValue>)>> { + get_pings(&data_path.join("pending_pings")) +} + +/// Gets a vector of the currently queued `deletion-request` pings. +/// +/// # Arguments +/// +/// * `data_path` - Glean's data path, as returned from Glean::get_data_path() +/// +/// # Returns +/// +/// A vector of all queued pings. +/// +/// Each entry is a pair `(url, json_data, metadata)`, +/// where `url` is the endpoint the ping will go to, `json_data` is the JSON payload +/// and metadata is optional persisted data related to the ping. +pub fn get_deletion_pings(data_path: &Path) -> Result<Vec<(String, JsonValue, Option<JsonValue>)>> { + get_pings(&data_path.join("deletion_request")) +} + +fn get_pings(pings_dir: &Path) -> Result<Vec<(String, JsonValue, Option<JsonValue>)>> { + let entries = read_dir(pings_dir)?; + Ok(entries + .filter_map(|entry| entry.ok()) + .filter(|entry| match entry.file_type() { + Ok(file_type) => file_type.is_file(), + Err(_) => false, + }) + .filter_map(|entry| File::open(entry.path()).ok()) + .filter_map(|file| { + let mut lines = BufReader::new(file).lines(); + if let (Some(Ok(url)), Some(Ok(body)), Ok(metadata)) = + (lines.next(), lines.next(), lines.next().transpose()) + { + let parsed_metadata = metadata.map(|m| { + serde_json::from_str::<JsonValue>(&m).expect("metadata should be valid JSON") + }); + if let Ok(parsed_body) = serde_json::from_str::<JsonValue>(&body) { + Some((url, parsed_body, parsed_metadata)) + } else { + None + } + } else { + None + } + }) + .collect()) +} diff --git a/third_party/rust/glean-core/tests/counter.rs b/third_party/rust/glean-core/tests/counter.rs new file mode 100644 index 0000000000..ccada50fb0 --- /dev/null +++ b/third_party/rust/glean-core/tests/counter.rs @@ -0,0 +1,177 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +mod common; +use crate::common::*; + +use serde_json::json; + +use glean_core::metrics::*; +use glean_core::storage::StorageManager; +use glean_core::{test_get_num_recorded_errors, ErrorType}; +use glean_core::{CommonMetricData, Lifetime}; + +// Tests ported from glean-ac + +// SKIPPED from glean-ac: counter deserializer should correctly parse integers +// This test doesn't really apply to rkv + +#[test] +fn counter_serializer_should_correctly_serialize_counters() { + let (mut tempdir, _) = tempdir(); + + { + // We give tempdir to the `new_glean` function... + let (glean, dir) = new_glean(Some(tempdir)); + // And then we get it back once that function returns. + tempdir = dir; + + let metric = CounterMetric::new(CommonMetricData { + name: "counter_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::User, + ..Default::default() + }); + + metric.add(&glean, 1); + + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + assert_eq!( + json!({"counter": {"telemetry.counter_metric": 1}}), + snapshot + ); + } + + // Make a new Glean instance here, which should force reloading of the data from disk + // so we can ensure it persisted, because it has User lifetime + { + let (glean, _t) = new_glean(Some(tempdir)); + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + assert_eq!( + json!({"counter": {"telemetry.counter_metric": 1}}), + snapshot + ); + } +} + +#[test] +fn set_value_properly_sets_the_value_in_all_stores() { + let (glean, _t) = new_glean(None); + let store_names: Vec<String> = vec!["store1".into(), "store2".into()]; + + let metric = CounterMetric::new(CommonMetricData { + name: "counter_metric".into(), + category: "telemetry".into(), + send_in_pings: store_names.clone(), + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }); + + metric.add(&glean, 1); + + for store_name in store_names { + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), &store_name, true) + .unwrap(); + + assert_eq!( + json!({"counter": {"telemetry.counter_metric": 1}}), + snapshot + ); + } +} + +// SKIPPED from glean-ac: counters are serialized in the correct JSON format +// Completely redundant with other tests. + +#[test] +fn counters_must_not_increment_when_passed_zero_or_negative() { + let (glean, _t) = new_glean(None); + + let metric = CounterMetric::new(CommonMetricData { + name: "counter_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Application, + ..Default::default() + }); + + // Attempt to increment the counter with zero + metric.add(&glean, 0); + // Check that nothing was recorded + assert!(metric.test_get_value(&glean, "store1").is_none()); + + // Attempt to increment the counter with negative + metric.add(&glean, -1); + // Check that nothing was recorded + assert!(metric.test_get_value(&glean, "store1").is_none()); + + // Attempt increment counter properly + metric.add(&glean, 1); + // Check that nothing was recorded + assert_eq!(1, metric.test_get_value(&glean, "store1").unwrap()); + + // Make sure that the errors have been recorded + assert_eq!( + Ok(2), + test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidValue, None) + ); +} + +// New tests for glean-core below + +#[test] +fn transformation_works() { + let (glean, _t) = new_glean(None); + + let counter: CounterMetric = CounterMetric::new(CommonMetricData { + name: "transformation".into(), + category: "local".into(), + send_in_pings: vec!["store1".into(), "store2".into()], + ..Default::default() + }); + + counter.add(&glean, 2); + + assert_eq!(2, counter.test_get_value(&glean, "store1").unwrap()); + assert_eq!(2, counter.test_get_value(&glean, "store2").unwrap()); + + // Clearing just one store + let _ = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + + counter.add(&glean, 2); + + assert_eq!(2, counter.test_get_value(&glean, "store1").unwrap()); + assert_eq!(4, counter.test_get_value(&glean, "store2").unwrap()); +} + +#[test] +fn saturates_at_boundary() { + let (glean, _t) = new_glean(None); + + let counter: CounterMetric = CounterMetric::new(CommonMetricData { + name: "transformation".into(), + category: "local".into(), + send_in_pings: vec!["store1".into()], + ..Default::default() + }); + + counter.add(&glean, 2); + counter.add(&glean, i32::max_value()); + + assert_eq!( + i32::max_value(), + counter.test_get_value(&glean, "store1").unwrap() + ); +} diff --git a/third_party/rust/glean-core/tests/custom_distribution.rs b/third_party/rust/glean-core/tests/custom_distribution.rs new file mode 100644 index 0000000000..e3a27cb60c --- /dev/null +++ b/third_party/rust/glean-core/tests/custom_distribution.rs @@ -0,0 +1,437 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +mod common; +use crate::common::*; + +use serde_json::json; + +use glean_core::metrics::*; +use glean_core::storage::StorageManager; +use glean_core::{test_get_num_recorded_errors, ErrorType}; +use glean_core::{CommonMetricData, Lifetime}; + +// Tests ported from glean-ac + +mod linear { + use super::*; + + #[test] + fn serializer_should_correctly_serialize_custom_distribution() { + let (mut tempdir, _) = tempdir(); + + { + let (glean, dir) = new_glean(Some(tempdir)); + tempdir = dir; + + let metric = CustomDistributionMetric::new( + CommonMetricData { + name: "distribution".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + 1, + 100, + 100, + HistogramType::Linear, + ); + + metric.accumulate_samples_signed(&glean, vec![50]); + + let snapshot = metric + .test_get_value(&glean, "store1") + .expect("Value should be stored"); + + assert_eq!(snapshot.sum, 50); + } + + // Make a new Glean instance here, which should force reloading of the data from disk + // so we can ensure it persisted, because it has User lifetime + { + let (glean, _) = new_glean(Some(tempdir)); + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + + assert_eq!( + json!(50), + snapshot["custom_distribution"]["telemetry.distribution"]["sum"] + ); + } + } + + #[test] + fn set_value_properly_sets_the_value_in_all_stores() { + let (glean, _t) = new_glean(None); + let store_names: Vec<String> = vec!["store1".into(), "store2".into()]; + + let metric = CustomDistributionMetric::new( + CommonMetricData { + name: "distribution".into(), + category: "telemetry".into(), + send_in_pings: store_names.clone(), + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + 1, + 100, + 100, + HistogramType::Linear, + ); + + metric.accumulate_samples_signed(&glean, vec![50]); + + for store_name in store_names { + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), &store_name, true) + .unwrap(); + + assert_eq!( + json!(50), + snapshot["custom_distribution"]["telemetry.distribution"]["sum"] + ); + assert_eq!( + json!(1), + snapshot["custom_distribution"]["telemetry.distribution"]["values"]["50"] + ); + } + } + + // SKIPPED from glean-ac: memory distributions must not accumulate negative values + // This test doesn't apply to Rust, because we're using unsigned integers. + + #[test] + fn the_accumulate_samples_api_correctly_stores_memory_values() { + let (glean, _t) = new_glean(None); + + let metric = CustomDistributionMetric::new( + CommonMetricData { + name: "distribution".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + 1, + 100, + 100, + HistogramType::Linear, + ); + + // Accumulate the samples. We intentionally do not report + // negative values to not trigger error reporting. + metric.accumulate_samples_signed(&glean, [1, 2, 3].to_vec()); + + let snapshot = metric + .test_get_value(&glean, "store1") + .expect("Value should be stored"); + + // Check that we got the right sum of samples. + assert_eq!(snapshot.sum, 6); + + // We should get a sample in 3 buckets. + // These numbers are a bit magic, but they correspond to + // `hist.sample_to_bucket_minimum(i * kb)` for `i = 1..=3`. + assert_eq!(1, snapshot.values[&1]); + assert_eq!(1, snapshot.values[&2]); + assert_eq!(1, snapshot.values[&3]); + + // No errors should be reported. + assert!(test_get_num_recorded_errors( + &glean, + metric.meta(), + ErrorType::InvalidValue, + Some("store1") + ) + .is_err()); + } + + #[test] + fn the_accumulate_samples_api_correctly_handles_negative_values() { + let (glean, _t) = new_glean(None); + + let metric = CustomDistributionMetric::new( + CommonMetricData { + name: "distribution".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + 1, + 100, + 100, + HistogramType::Linear, + ); + + // Accumulate the samples. + metric.accumulate_samples_signed(&glean, [-1, 1, 2, 3].to_vec()); + + let snapshot = metric + .test_get_value(&glean, "store1") + .expect("Value should be stored"); + + // Check that we got the right sum of samples. + assert_eq!(snapshot.sum, 6); + + // We should get a sample in 3 buckets. + // These numbers are a bit magic, but they correspond to + // `hist.sample_to_bucket_minimum(i * kb)` for `i = 1..=3`. + assert_eq!(1, snapshot.values[&1]); + assert_eq!(1, snapshot.values[&2]); + assert_eq!(1, snapshot.values[&3]); + + // 1 error should be reported. + assert_eq!( + Ok(1), + test_get_num_recorded_errors( + &glean, + metric.meta(), + ErrorType::InvalidValue, + Some("store1") + ) + ); + } + + #[test] + fn json_snapshotting_works() { + let (glean, _t) = new_glean(None); + let metric = CustomDistributionMetric::new( + CommonMetricData { + name: "distribution".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + 1, + 100, + 100, + HistogramType::Linear, + ); + + metric.accumulate_samples_signed(&glean, vec![50]); + + let snapshot = metric.test_get_value_as_json_string(&glean, "store1"); + assert!(snapshot.is_some()); + } +} + +mod exponential { + use super::*; + + #[test] + fn serializer_should_correctly_serialize_custom_distribution() { + let (mut tempdir, _) = tempdir(); + + { + let (glean, dir) = new_glean(Some(tempdir)); + tempdir = dir; + + let metric = CustomDistributionMetric::new( + CommonMetricData { + name: "distribution".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + 1, + 100, + 10, + HistogramType::Exponential, + ); + + metric.accumulate_samples_signed(&glean, vec![50]); + + let snapshot = metric + .test_get_value(&glean, "store1") + .expect("Value should be stored"); + + assert_eq!(snapshot.sum, 50); + } + + // Make a new Glean instance here, which should force reloading of the data from disk + // so we can ensure it persisted, because it has User lifetime + { + let (glean, _) = new_glean(Some(tempdir)); + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + + assert_eq!( + json!(50), + snapshot["custom_distribution"]["telemetry.distribution"]["sum"] + ); + } + } + + #[test] + fn set_value_properly_sets_the_value_in_all_stores() { + let (glean, _t) = new_glean(None); + let store_names: Vec<String> = vec!["store1".into(), "store2".into()]; + + let metric = CustomDistributionMetric::new( + CommonMetricData { + name: "distribution".into(), + category: "telemetry".into(), + send_in_pings: store_names.clone(), + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + 1, + 100, + 10, + HistogramType::Exponential, + ); + + metric.accumulate_samples_signed(&glean, vec![50]); + + for store_name in store_names { + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), &store_name, true) + .unwrap(); + + assert_eq!( + json!(50), + snapshot["custom_distribution"]["telemetry.distribution"]["sum"] + ); + assert_eq!( + json!(1), + snapshot["custom_distribution"]["telemetry.distribution"]["values"]["29"] + ); + } + } + + // SKIPPED from glean-ac: memory distributions must not accumulate negative values + // This test doesn't apply to Rust, because we're using unsigned integers. + + #[test] + fn the_accumulate_samples_api_correctly_stores_memory_values() { + let (glean, _t) = new_glean(None); + + let metric = CustomDistributionMetric::new( + CommonMetricData { + name: "distribution".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + 1, + 100, + 10, + HistogramType::Exponential, + ); + + // Accumulate the samples. We intentionally do not report + // negative values to not trigger error reporting. + metric.accumulate_samples_signed(&glean, [1, 2, 3].to_vec()); + + let snapshot = metric + .test_get_value(&glean, "store1") + .expect("Value should be stored"); + + // Check that we got the right sum of samples. + assert_eq!(snapshot.sum, 6); + + // We should get a sample in 3 buckets. + // These numbers are a bit magic, but they correspond to + // `hist.sample_to_bucket_minimum(i * kb)` for `i = 1..=3`. + assert_eq!(1, snapshot.values[&1]); + assert_eq!(1, snapshot.values[&2]); + assert_eq!(1, snapshot.values[&3]); + + // No errors should be reported. + assert!(test_get_num_recorded_errors( + &glean, + metric.meta(), + ErrorType::InvalidValue, + Some("store1") + ) + .is_err()); + } + + #[test] + fn the_accumulate_samples_api_correctly_handles_negative_values() { + let (glean, _t) = new_glean(None); + + let metric = CustomDistributionMetric::new( + CommonMetricData { + name: "distribution".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + 1, + 100, + 10, + HistogramType::Exponential, + ); + + // Accumulate the samples. + metric.accumulate_samples_signed(&glean, [-1, 1, 2, 3].to_vec()); + + let snapshot = metric + .test_get_value(&glean, "store1") + .expect("Value should be stored"); + + // Check that we got the right sum of samples. + assert_eq!(snapshot.sum, 6); + + // We should get a sample in 3 buckets. + // These numbers are a bit magic, but they correspond to + // `hist.sample_to_bucket_minimum(i * kb)` for `i = 1..=3`. + assert_eq!(1, snapshot.values[&1]); + assert_eq!(1, snapshot.values[&2]); + assert_eq!(1, snapshot.values[&3]); + + // 1 error should be reported. + assert_eq!( + Ok(1), + test_get_num_recorded_errors( + &glean, + metric.meta(), + ErrorType::InvalidValue, + Some("store1") + ) + ); + } + + #[test] + fn json_snapshotting_works() { + let (glean, _t) = new_glean(None); + let metric = CustomDistributionMetric::new( + CommonMetricData { + name: "distribution".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + 1, + 100, + 10, + HistogramType::Exponential, + ); + + metric.accumulate_samples_signed(&glean, vec![50]); + + let snapshot = metric.test_get_value_as_json_string(&glean, "store1"); + assert!(snapshot.is_some()); + } +} diff --git a/third_party/rust/glean-core/tests/datetime.rs b/third_party/rust/glean-core/tests/datetime.rs new file mode 100644 index 0000000000..b67d01c3a3 --- /dev/null +++ b/third_party/rust/glean-core/tests/datetime.rs @@ -0,0 +1,187 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +mod common; +use crate::common::*; + +use chrono::prelude::*; +use serde_json::json; + +use glean_core::metrics::*; +use glean_core::storage::StorageManager; +use glean_core::{CommonMetricData, Lifetime}; + +// SKIPPED from glean-ac: datetime deserializer should correctly parse integers +// This test doesn't really apply to rkv + +#[test] +fn datetime_serializer_should_correctly_serialize_datetime() { + let expected_value = "1983-04-13T12:09+00:00"; + let (mut tempdir, _) = tempdir(); + + { + // We give tempdir to the `new_glean` function... + let (glean, dir) = new_glean(Some(tempdir)); + // And then we get it back once that function returns. + tempdir = dir; + + let metric = DatetimeMetric::new( + CommonMetricData { + name: "datetime_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::User, + ..Default::default() + }, + TimeUnit::Minute, + ); + + // `1983-04-13T12:09:14.274+00:00` will be truncated to Minute resolution. + let dt = FixedOffset::east(0) + .ymd(1983, 4, 13) + .and_hms_milli(12, 9, 14, 274); + metric.set(&glean, Some(dt)); + + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + assert_eq!( + json!({"datetime": {"telemetry.datetime_metric": expected_value}}), + snapshot + ); + } + + // Make a new Glean instance here, which should force reloading of the data from disk + // so we can ensure it persisted, because it has User lifetime + { + let (glean, _) = new_glean(Some(tempdir)); + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + assert_eq!( + json!({"datetime": {"telemetry.datetime_metric": expected_value}}), + snapshot + ); + } +} + +#[test] +fn set_value_properly_sets_the_value_in_all_stores() { + let (glean, _t) = new_glean(None); + let store_names: Vec<String> = vec!["store1".into(), "store2".into()]; + + let metric = DatetimeMetric::new( + CommonMetricData { + name: "datetime_metric".into(), + category: "telemetry".into(), + send_in_pings: store_names.clone(), + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + TimeUnit::Nanosecond, + ); + + // `1983-04-13T12:09:14.274+00:00` will be truncated to Minute resolution. + let dt = FixedOffset::east(0) + .ymd(1983, 4, 13) + .and_hms_nano(12, 9, 14, 1_560_274); + metric.set(&glean, Some(dt)); + + for store_name in store_names { + assert_eq!( + "1983-04-13T12:09:14.001560274+00:00", + metric + .test_get_value_as_string(&glean, &store_name) + .unwrap() + ); + } +} + +// SKIPPED from glean-ac: getSnapshot() returns null if nothing is recorded in the store +// This test doesn't really apply to rkv + +// SKIPPED from glean-ac: getSnapshot() correctly clears the stores +// This test doesn't really apply to rkv + +#[test] +fn test_that_truncation_works() { + let (glean, _t) = new_glean(None); + + // `1985-07-03T12:09:14.000560274+01:00` + let high_res_datetime = FixedOffset::east(3600) + .ymd(1985, 7, 3) + .and_hms_nano(12, 9, 14, 1_560_274); + let store_name = "store1"; + + // Create an helper struct for defining the truncation cases. + struct TestCase { + case_name: &'static str, + desired_resolution: TimeUnit, + expected_result: &'static str, + } + + // Define the single test cases. + let test_cases = vec![ + TestCase { + case_name: "nano", + desired_resolution: TimeUnit::Nanosecond, + expected_result: "1985-07-03T12:09:14.001560274+01:00", + }, + TestCase { + case_name: "micro", + desired_resolution: TimeUnit::Microsecond, + expected_result: "1985-07-03T12:09:14.001560+01:00", + }, + TestCase { + case_name: "milli", + desired_resolution: TimeUnit::Millisecond, + expected_result: "1985-07-03T12:09:14.001+01:00", + }, + TestCase { + case_name: "second", + desired_resolution: TimeUnit::Second, + expected_result: "1985-07-03T12:09:14+01:00", + }, + TestCase { + case_name: "minute", + desired_resolution: TimeUnit::Minute, + expected_result: "1985-07-03T12:09+01:00", + }, + TestCase { + case_name: "hour", + desired_resolution: TimeUnit::Hour, + expected_result: "1985-07-03T12+01:00", + }, + TestCase { + case_name: "day", + desired_resolution: TimeUnit::Day, + expected_result: "1985-07-03+01:00", + }, + ]; + + // Execute them all. + for t in test_cases { + let metric = DatetimeMetric::new( + CommonMetricData { + name: format!("datetime_metric_{}", t.case_name), + category: "telemetry".into(), + send_in_pings: vec![store_name.into()], + disabled: false, + lifetime: Lifetime::User, + ..Default::default() + }, + t.desired_resolution, + ); + metric.set(&glean, Some(high_res_datetime)); + + assert_eq!( + t.expected_result, + metric + .test_get_value_as_string(&glean, &store_name) + .unwrap() + ); + } +} diff --git a/third_party/rust/glean-core/tests/event.rs b/third_party/rust/glean-core/tests/event.rs new file mode 100644 index 0000000000..14cf0d0c8c --- /dev/null +++ b/third_party/rust/glean-core/tests/event.rs @@ -0,0 +1,290 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +mod common; +use crate::common::*; + +use std::collections::HashMap; +use std::fs; + +use glean_core::metrics::*; +use glean_core::{CommonMetricData, Lifetime}; + +#[test] +fn record_properly_records_without_optional_arguments() { + let store_names = vec!["store1".into(), "store2".into()]; + + let (glean, _t) = new_glean(None); + + let metric = EventMetric::new( + CommonMetricData { + name: "test_event_no_optional".into(), + category: "telemetry".into(), + send_in_pings: store_names.clone(), + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + vec![], + ); + + metric.record(&glean, 1000, None); + + for store_name in store_names { + let events = metric.test_get_value(&glean, &store_name).unwrap(); + assert_eq!(1, events.len()); + assert_eq!("telemetry", events[0].category); + assert_eq!("test_event_no_optional", events[0].name); + assert!(events[0].extra.is_none()); + } +} + +#[test] +fn record_properly_records_with_optional_arguments() { + let (glean, _t) = new_glean(None); + + let store_names = vec!["store1".into(), "store2".into()]; + + let metric = EventMetric::new( + CommonMetricData { + name: "test_event_no_optional".into(), + category: "telemetry".into(), + send_in_pings: store_names.clone(), + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + vec!["key1".into(), "key2".into()], + ); + + let extra: HashMap<i32, String> = [(0, "value1".into()), (1, "value2".into())] + .iter() + .cloned() + .collect(); + + metric.record(&glean, 1000, extra); + + for store_name in store_names { + let events = metric.test_get_value(&glean, &store_name).unwrap(); + let event = events[0].clone(); + assert_eq!(1, events.len()); + assert_eq!("telemetry", event.category); + assert_eq!("test_event_no_optional", event.name); + let extra = event.extra.unwrap(); + assert_eq!(2, extra.len()); + assert_eq!("value1", extra["key1"]); + assert_eq!("value2", extra["key2"]); + } +} + +// SKIPPED record() computes the correct time between events +// Timing is now handled in the language-specific part. + +#[test] +fn snapshot_returns_none_if_nothing_is_recorded_in_the_store() { + let (glean, _t) = new_glean(None); + + assert!(glean + .event_storage() + .snapshot_as_json("store1", false) + .is_none()) +} + +#[test] +fn snapshot_correctly_clears_the_stores() { + let (glean, _t) = new_glean(None); + + let store_names = vec!["store1".into(), "store2".into()]; + + let metric = EventMetric::new( + CommonMetricData { + name: "test_event_clear".into(), + category: "telemetry".into(), + send_in_pings: store_names, + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + vec![], + ); + + metric.record(&glean, 1000, None); + + let snapshot = glean.event_storage().snapshot_as_json("store1", true); + assert!(snapshot.is_some()); + + assert!(glean + .event_storage() + .snapshot_as_json("store1", false) + .is_none()); + + let files: Vec<fs::DirEntry> = fs::read_dir(&glean.event_storage().path) + .unwrap() + .filter_map(|x| x.ok()) + .collect(); + assert_eq!(1, files.len()); + assert_eq!("store2", files[0].file_name()); + + let snapshot2 = glean.event_storage().snapshot_as_json("store2", false); + for s in vec![snapshot, snapshot2] { + assert!(s.is_some()); + let s = s.unwrap(); + assert_eq!(1, s.as_array().unwrap().len()); + assert_eq!("telemetry", s[0]["category"]); + assert_eq!("test_event_clear", s[0]["name"]); + println!("{:?}", s[0].get("extra")); + assert!(s[0].get("extra").is_none()); + } +} + +// SKIPPED: Events are serialized in the correct JSON format (no extra) +// SKIPPED: Events are serialized in the correct JSON format (with extra) +// This test won't work as-is since Rust doesn't maintain the insertion order in +// a JSON object, therefore you can't check the JSON output directly against a +// string. This check is redundant with other tests, anyway, and checking against +// the schema is much more useful. + +#[test] +fn test_sending_of_event_ping_when_it_fills_up() { + let (mut glean, _t) = new_glean(None); + + let store_names: Vec<String> = vec!["events".into()]; + + for store_name in &store_names { + glean.register_ping_type(&PingType::new(store_name.clone(), true, false, vec![])); + } + + let click = EventMetric::new( + CommonMetricData { + name: "click".into(), + category: "ui".into(), + send_in_pings: store_names, + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + vec!["test_event_number".into()], + ); + + // We send 510 events. We expect to get the first 500 in the ping and 10 + // remaining afterward + for i in 0..510 { + let mut extra: HashMap<i32, String> = HashMap::new(); + extra.insert(0, i.to_string()); + click.record(&glean, i, extra); + } + + assert_eq!(10, click.test_get_value(&glean, "events").unwrap().len()); + + let (url, json, _) = &get_queued_pings(glean.get_data_path()).unwrap()[0]; + assert!(url.starts_with(format!("/submit/{}/events/", glean.get_application_id()).as_str())); + assert_eq!(500, json["events"].as_array().unwrap().len()); + assert_eq!( + "max_capacity", + json["ping_info"].as_object().unwrap()["reason"] + .as_str() + .unwrap() + ); + + for i in 0..500 { + let event = &json["events"].as_array().unwrap()[i]; + assert_eq!(i.to_string(), event["extra"]["test_event_number"]); + } + + let snapshot = glean + .event_storage() + .snapshot_as_json("events", false) + .unwrap(); + assert_eq!(10, snapshot.as_array().unwrap().len()); + for i in 0..10 { + let event = &snapshot.as_array().unwrap()[i]; + assert_eq!((i + 500).to_string(), event["extra"]["test_event_number"]); + } +} + +#[test] +fn extra_keys_must_be_recorded_and_truncated_if_needed() { + let (glean, _t) = new_glean(None); + + let store_names: Vec<String> = vec!["store1".into()]; + + let test_event = EventMetric::new( + CommonMetricData { + name: "testEvent".into(), + category: "ui".into(), + send_in_pings: store_names, + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + vec!["extra1".into(), "truncatedExtra".into()], + ); + + let test_value = "LeanGleanByFrank"; + let mut extra: HashMap<i32, String> = HashMap::new(); + extra.insert(0, test_value.to_string()); + extra.insert(1, test_value.to_string().repeat(10)); + + test_event.record(&glean, 0, extra); + + let snapshot = glean + .event_storage() + .snapshot_as_json("store1", false) + .unwrap(); + assert_eq!(1, snapshot.as_array().unwrap().len()); + let event = &snapshot.as_array().unwrap()[0]; + assert_eq!("ui", event["category"]); + assert_eq!("testEvent", event["name"]); + assert_eq!(2, event["extra"].as_object().unwrap().len()); + assert_eq!(test_value, event["extra"]["extra1"]); + assert_eq!( + test_value.to_string().repeat(10)[0..100], + event["extra"]["truncatedExtra"] + ); +} + +#[test] +fn snapshot_sorts_the_timestamps() { + let (glean, _t) = new_glean(None); + + let metric = EventMetric::new( + CommonMetricData { + name: "test_event_clear".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + vec![], + ); + + metric.record(&glean, 1000, None); + metric.record(&glean, 100, None); + metric.record(&glean, 10000, None); + + let snapshot = glean + .event_storage() + .snapshot_as_json("store1", true) + .unwrap(); + + assert_eq!( + 0, + snapshot.as_array().unwrap()[0]["timestamp"] + .as_i64() + .unwrap() + ); + assert_eq!( + 900, + snapshot.as_array().unwrap()[1]["timestamp"] + .as_i64() + .unwrap() + ); + assert_eq!( + 9900, + snapshot.as_array().unwrap()[2]["timestamp"] + .as_i64() + .unwrap() + ); +} diff --git a/third_party/rust/glean-core/tests/jwe.rs b/third_party/rust/glean-core/tests/jwe.rs new file mode 100644 index 0000000000..d6ddef4872 --- /dev/null +++ b/third_party/rust/glean-core/tests/jwe.rs @@ -0,0 +1,113 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +mod common; +use crate::common::*; + +use serde_json::json; + +use glean_core::metrics::*; +use glean_core::storage::StorageManager; +use glean_core::{CommonMetricData, Lifetime}; + +const HEADER: &str = "eyJhbGciOiJSU0EtT0FFUCIsImVuYyI6IkEyNTZHQ00ifQ"; +const KEY: &str = "OKOawDo13gRp2ojaHV7LFpZcgV7T6DVZKTyKOMTYUmKoTCVJRgckCL9kiMT03JGeipsEdY3mx_etLbbWSrFr05kLzcSr4qKAq7YN7e9jwQRb23nfa6c9d-StnImGyFDbSv04uVuxIp5Zms1gNxKKK2Da14B8S4rzVRltdYwam_lDp5XnZAYpQdb76FdIKLaVmqgfwX7XWRxv2322i-vDxRfqNzo_tETKzpVLzfiwQyeyPGLBIO56YJ7eObdv0je81860ppamavo35UgoRdbYaBcoh9QcfylQr66oc6vFWXRcZ_ZT2LawVCWTIy3brGPi6UklfCpIMfIjf7iGdXKHzg"; +const INIT_VECTOR: &str = "48V1_ALb6US04U3b"; +const CIPHER_TEXT: &str = + "5eym8TW_c8SuK0ltJ3rpYIzOeDQz7TALvtu6UG9oMo4vpzs9tX_EFShS8iB7j6jiSdiwkIr3ajwQzaBtQD_A"; +const AUTH_TAG: &str = "XFBoMYUZodetZdvTiFvSkQ"; +const JWE: &str = "eyJhbGciOiJSU0EtT0FFUCIsImVuYyI6IkEyNTZHQ00ifQ.OKOawDo13gRp2ojaHV7LFpZcgV7T6DVZKTyKOMTYUmKoTCVJRgckCL9kiMT03JGeipsEdY3mx_etLbbWSrFr05kLzcSr4qKAq7YN7e9jwQRb23nfa6c9d-StnImGyFDbSv04uVuxIp5Zms1gNxKKK2Da14B8S4rzVRltdYwam_lDp5XnZAYpQdb76FdIKLaVmqgfwX7XWRxv2322i-vDxRfqNzo_tETKzpVLzfiwQyeyPGLBIO56YJ7eObdv0je81860ppamavo35UgoRdbYaBcoh9QcfylQr66oc6vFWXRcZ_ZT2LawVCWTIy3brGPi6UklfCpIMfIjf7iGdXKHzg.48V1_ALb6US04U3b.5eym8TW_c8SuK0ltJ3rpYIzOeDQz7TALvtu6UG9oMo4vpzs9tX_EFShS8iB7j6jiSdiwkIr3ajwQzaBtQD_A.XFBoMYUZodetZdvTiFvSkQ"; + +#[test] +fn jwe_metric_is_generated_and_stored() { + let (glean, _t) = new_glean(None); + + let metric = JweMetric::new(CommonMetricData { + name: "jwe_metric".into(), + category: "local".into(), + send_in_pings: vec!["core".into()], + ..Default::default() + }); + + metric.set_with_compact_representation(&glean, JWE); + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "core", false) + .unwrap(); + + assert_eq!( + json!({"jwe": {"local.jwe_metric": metric.test_get_value(&glean, "core") }}), + snapshot + ); +} + +#[test] +fn set_properly_sets_the_value_in_all_stores() { + let (glean, _t) = new_glean(None); + let store_names: Vec<String> = vec!["store1".into(), "store2".into()]; + + let metric = JweMetric::new(CommonMetricData { + name: "jwe_metric".into(), + category: "local".into(), + send_in_pings: store_names.clone(), + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }); + + metric.set_with_compact_representation(&glean, JWE); + + // Check that the data was correctly set in each store. + for store_name in store_names { + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), &store_name, false) + .unwrap(); + + assert_eq!( + json!({"jwe": {"local.jwe_metric": metric.test_get_value(&glean, &store_name) }}), + snapshot + ); + } +} + +#[test] +fn get_test_value_returns_the_period_delimited_string() { + let (glean, _t) = new_glean(None); + + let metric = JweMetric::new(CommonMetricData { + name: "jwe_metric".into(), + category: "local".into(), + send_in_pings: vec!["core".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }); + + metric.set_with_compact_representation(&glean, JWE); + + assert_eq!(metric.test_get_value(&glean, "core").unwrap(), JWE); +} + +#[test] +fn get_test_value_as_json_string_returns_the_expected_repr() { + let (glean, _t) = new_glean(None); + + let metric = JweMetric::new(CommonMetricData { + name: "jwe_metric".into(), + category: "local".into(), + send_in_pings: vec!["core".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }); + + metric.set_with_compact_representation(&glean, JWE); + + let expected_json = format!("{{\"header\":\"{}\",\"key\":\"{}\",\"init_vector\":\"{}\",\"cipher_text\":\"{}\",\"auth_tag\":\"{}\"}}", HEADER, KEY, INIT_VECTOR, CIPHER_TEXT, AUTH_TAG); + assert_eq!( + metric + .test_get_value_as_json_string(&glean, "core") + .unwrap(), + expected_json + ); +} diff --git a/third_party/rust/glean-core/tests/labeled.rs b/third_party/rust/glean-core/tests/labeled.rs new file mode 100644 index 0000000000..386a86d521 --- /dev/null +++ b/third_party/rust/glean-core/tests/labeled.rs @@ -0,0 +1,395 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +mod common; +use crate::common::*; + +use serde_json::json; + +use glean_core::metrics::*; +use glean_core::storage::StorageManager; +use glean_core::{CommonMetricData, Lifetime}; + +#[test] +fn can_create_labeled_counter_metric() { + let (glean, _t) = new_glean(None); + let labeled = LabeledMetric::new( + CounterMetric::new(CommonMetricData { + name: "labeled_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }), + Some(vec!["label1".into()]), + ); + + let metric = labeled.get("label1"); + metric.add(&glean, 1); + + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + + assert_eq!( + json!({ + "labeled_counter": { + "telemetry.labeled_metric": { "label1": 1 } + } + }), + snapshot + ); +} + +#[test] +fn can_create_labeled_string_metric() { + let (glean, _t) = new_glean(None); + let labeled = LabeledMetric::new( + StringMetric::new(CommonMetricData { + name: "labeled_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }), + Some(vec!["label1".into()]), + ); + + let metric = labeled.get("label1"); + metric.set(&glean, "text"); + + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + + assert_eq!( + json!({ + "labeled_string": { + "telemetry.labeled_metric": { "label1": "text" } + } + }), + snapshot + ); +} + +#[test] +fn can_create_labeled_bool_metric() { + let (glean, _t) = new_glean(None); + let labeled = LabeledMetric::new( + BooleanMetric::new(CommonMetricData { + name: "labeled_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }), + Some(vec!["label1".into()]), + ); + + let metric = labeled.get("label1"); + metric.set(&glean, true); + + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + + assert_eq!( + json!({ + "labeled_boolean": { + "telemetry.labeled_metric": { "label1": true } + } + }), + snapshot + ); +} + +#[test] +fn can_use_multiple_labels() { + let (glean, _t) = new_glean(None); + let labeled = LabeledMetric::new( + CounterMetric::new(CommonMetricData { + name: "labeled_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }), + None, + ); + + let metric = labeled.get("label1"); + metric.add(&glean, 1); + + let metric = labeled.get("label2"); + metric.add(&glean, 2); + + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + + assert_eq!( + json!({ + "labeled_counter": { + "telemetry.labeled_metric": { + "label1": 1, + "label2": 2, + } + } + }), + snapshot + ); +} + +#[test] +fn labels_are_checked_against_static_list() { + let (glean, _t) = new_glean(None); + let labeled = LabeledMetric::new( + CounterMetric::new(CommonMetricData { + name: "labeled_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }), + Some(vec!["label1".into(), "label2".into()]), + ); + + let metric = labeled.get("label1"); + metric.add(&glean, 1); + + let metric = labeled.get("label2"); + metric.add(&glean, 2); + + // All non-registed labels get mapped to the `other` label + let metric = labeled.get("label3"); + metric.add(&glean, 3); + let metric = labeled.get("label4"); + metric.add(&glean, 4); + + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + + assert_eq!( + json!({ + "labeled_counter": { + "telemetry.labeled_metric": { + "label1": 1, + "label2": 2, + "__other__": 7, + } + } + }), + snapshot + ); +} + +#[test] +fn dynamic_labels_too_long() { + let (glean, _t) = new_glean(None); + let labeled = LabeledMetric::new( + CounterMetric::new(CommonMetricData { + name: "labeled_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }), + None, + ); + + let metric = labeled.get("this_string_has_more_than_thirty_characters"); + metric.add(&glean, 1); + + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + + assert_eq!( + json!({ + "labeled_counter": { + "glean.error.invalid_label": { "telemetry.labeled_metric": 1 }, + "telemetry.labeled_metric": { + "__other__": 1, + } + } + }), + snapshot + ); +} + +#[test] +fn dynamic_labels_regex_mismatch() { + let (glean, _t) = new_glean(None); + let labeled = LabeledMetric::new( + CounterMetric::new(CommonMetricData { + name: "labeled_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }), + None, + ); + + let labels_not_validating = vec![ + "notSnakeCase", + "", + "with/slash", + "1.not_fine", + "this.$isnotfine", + "-.not_fine", + "this.is_not_fine.2", + ]; + let num_non_validating = labels_not_validating.len(); + + for label in &labels_not_validating { + labeled.get(label).add(&glean, 1); + } + + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + + assert_eq!( + json!({ + "labeled_counter": { + "glean.error.invalid_label": { "telemetry.labeled_metric": num_non_validating }, + "telemetry.labeled_metric": { + "__other__": num_non_validating, + } + } + }), + snapshot + ); +} + +#[test] +fn dynamic_labels_regex_allowed() { + let (glean, _t) = new_glean(None); + let labeled = LabeledMetric::new( + CounterMetric::new(CommonMetricData { + name: "labeled_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }), + None, + ); + + let labels_validating = vec![ + "this.is.fine", + "this_is_fine_too", + "this.is_still_fine", + "thisisfine", + "_.is_fine", + "this.is-fine", + "this-is-fine", + ]; + + for label in &labels_validating { + labeled.get(label).add(&glean, 1); + } + + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + + assert_eq!( + json!({ + "labeled_counter": { + "telemetry.labeled_metric": { + "this.is.fine": 1, + "this_is_fine_too": 1, + "this.is_still_fine": 1, + "thisisfine": 1, + "_.is_fine": 1, + "this.is-fine": 1, + "this-is-fine": 1 + } + } + }), + snapshot + ); +} + +#[test] +fn seen_labels_get_reloaded_from_disk() { + let (mut tempdir, _) = tempdir(); + + let (glean, dir) = new_glean(Some(tempdir)); + tempdir = dir; + + let labeled = LabeledMetric::new( + CounterMetric::new(CommonMetricData { + name: "labeled_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }), + None, + ); + + // Store some data into labeled metrics + { + // Set the maximum number of labels + for i in 1..=16 { + let label = format!("label{}", i); + labeled.get(&label).add(&glean, i); + } + + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", false) + .unwrap(); + + // Check that the data is there + for i in 1..=16 { + let label = format!("label{}", i); + assert_eq!( + i, + snapshot["labeled_counter"]["telemetry.labeled_metric"][&label] + ); + } + + drop(glean); + } + + // Force a reload + { + let (glean, _) = new_glean(Some(tempdir)); + + // Try to store another label + labeled.get("new_label").add(&glean, 40); + + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", false) + .unwrap(); + + // Check that the old data is still there + for i in 1..=16 { + let label = format!("label{}", i); + assert_eq!( + i, + snapshot["labeled_counter"]["telemetry.labeled_metric"][&label] + ); + } + + // The new label lands in the __other__ bucket, due to too many labels + assert_eq!( + 40, + snapshot["labeled_counter"]["telemetry.labeled_metric"]["__other__"] + ); + } +} diff --git a/third_party/rust/glean-core/tests/memory_distribution.rs b/third_party/rust/glean-core/tests/memory_distribution.rs new file mode 100644 index 0000000000..8c7c620fa8 --- /dev/null +++ b/third_party/rust/glean-core/tests/memory_distribution.rs @@ -0,0 +1,193 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +mod common; +use crate::common::*; + +use serde_json::json; + +use glean_core::metrics::*; +use glean_core::storage::StorageManager; +use glean_core::{test_get_num_recorded_errors, ErrorType}; +use glean_core::{CommonMetricData, Lifetime}; + +// Tests ported from glean-ac + +#[test] +fn serializer_should_correctly_serialize_memory_distribution() { + let (mut tempdir, _) = tempdir(); + + let memory_unit = MemoryUnit::Kilobyte; + let kb = 1024; + + { + let (glean, dir) = new_glean(Some(tempdir)); + tempdir = dir; + + let metric = MemoryDistributionMetric::new( + CommonMetricData { + name: "distribution".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + memory_unit, + ); + + metric.accumulate(&glean, 100_000); + + let snapshot = metric + .test_get_value(&glean, "store1") + .expect("Value should be stored"); + + assert_eq!(snapshot.sum, 100_000 * kb); + } + + // Make a new Glean instance here, which should force reloading of the data from disk + // so we can ensure it persisted, because it has User lifetime + { + let (glean, _) = new_glean(Some(tempdir)); + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + + assert_eq!( + json!(100_000 * kb), + snapshot["memory_distribution"]["telemetry.distribution"]["sum"] + ); + } +} + +#[test] +fn set_value_properly_sets_the_value_in_all_stores() { + let (glean, _t) = new_glean(None); + let store_names: Vec<String> = vec!["store1".into(), "store2".into()]; + + let metric = MemoryDistributionMetric::new( + CommonMetricData { + name: "distribution".into(), + category: "telemetry".into(), + send_in_pings: store_names.clone(), + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + MemoryUnit::Byte, + ); + + metric.accumulate(&glean, 100_000); + + for store_name in store_names { + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), &store_name, true) + .unwrap(); + + assert_eq!( + json!(100_000), + snapshot["memory_distribution"]["telemetry.distribution"]["sum"] + ); + assert_eq!( + json!(1), + snapshot["memory_distribution"]["telemetry.distribution"]["values"]["96785"] + ); + } +} + +// SKIPPED from glean-ac: memory distributions must not accumulate negative values +// This test doesn't apply to Rust, because we're using unsigned integers. + +#[test] +fn the_accumulate_samples_api_correctly_stores_memory_values() { + let (glean, _t) = new_glean(None); + + let metric = MemoryDistributionMetric::new( + CommonMetricData { + name: "distribution".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + MemoryUnit::Kilobyte, + ); + + // Accumulate the samples. We intentionally do not report + // negative values to not trigger error reporting. + metric.accumulate_samples_signed(&glean, [1, 2, 3].to_vec()); + + let snapshot = metric + .test_get_value(&glean, "store1") + .expect("Value should be stored"); + + let kb = 1024; + + // Check that we got the right sum of samples. + assert_eq!(snapshot.sum, 6 * kb); + + // We should get a sample in 3 buckets. + // These numbers are a bit magic, but they correspond to + // `hist.sample_to_bucket_minimum(i * kb)` for `i = 1..=3`. + assert_eq!(1, snapshot.values[&1023]); + assert_eq!(1, snapshot.values[&2047]); + assert_eq!(1, snapshot.values[&3024]); + + // No errors should be reported. + assert!(test_get_num_recorded_errors( + &glean, + metric.meta(), + ErrorType::InvalidValue, + Some("store1") + ) + .is_err()); +} + +#[test] +fn the_accumulate_samples_api_correctly_handles_negative_values() { + let (glean, _t) = new_glean(None); + + let metric = MemoryDistributionMetric::new( + CommonMetricData { + name: "distribution".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + MemoryUnit::Kilobyte, + ); + + // Accumulate the samples. + metric.accumulate_samples_signed(&glean, [-1, 1, 2, 3].to_vec()); + + let snapshot = metric + .test_get_value(&glean, "store1") + .expect("Value should be stored"); + + let kb = 1024; + + // Check that we got the right sum of samples. + assert_eq!(snapshot.sum, 6 * kb); + + // We should get a sample in 3 buckets. + // These numbers are a bit magic, but they correspond to + // `hist.sample_to_bucket_minimum(i * kb)` for `i = 1..=3`. + assert_eq!(1, snapshot.values[&1023]); + assert_eq!(1, snapshot.values[&2047]); + assert_eq!(1, snapshot.values[&3024]); + + // 1 error should be reported. + assert_eq!( + Ok(1), + test_get_num_recorded_errors( + &glean, + metric.meta(), + ErrorType::InvalidValue, + Some("store1") + ) + ); +} diff --git a/third_party/rust/glean-core/tests/metrics.rs b/third_party/rust/glean-core/tests/metrics.rs new file mode 100644 index 0000000000..3e906fe90e --- /dev/null +++ b/third_party/rust/glean-core/tests/metrics.rs @@ -0,0 +1,37 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +mod common; +use crate::common::*; + +use glean_core::metrics::*; +use glean_core::CommonMetricData; + +#[test] +fn stores_strings() { + let (glean, _t) = new_glean(None); + let metric = StringMetric::new(CommonMetricData::new("local", "string", "baseline")); + + assert_eq!(None, metric.test_get_value(&glean, "baseline")); + + metric.set(&glean, "telemetry"); + assert_eq!( + "telemetry", + metric.test_get_value(&glean, "baseline").unwrap() + ); +} + +#[test] +fn stores_counters() { + let (glean, _t) = new_glean(None); + let metric = CounterMetric::new(CommonMetricData::new("local", "counter", "baseline")); + + assert_eq!(None, metric.test_get_value(&glean, "baseline")); + + metric.add(&glean, 1); + assert_eq!(1, metric.test_get_value(&glean, "baseline").unwrap()); + + metric.add(&glean, 2); + assert_eq!(3, metric.test_get_value(&glean, "baseline").unwrap()); +} diff --git a/third_party/rust/glean-core/tests/ping.rs b/third_party/rust/glean-core/tests/ping.rs new file mode 100644 index 0000000000..765297aea5 --- /dev/null +++ b/third_party/rust/glean-core/tests/ping.rs @@ -0,0 +1,103 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +mod common; +use crate::common::*; + +use glean_core::metrics::*; +use glean_core::CommonMetricData; + +#[test] +fn write_ping_to_disk() { + let (mut glean, _temp) = new_glean(None); + + let ping = PingType::new("metrics", true, false, vec![]); + glean.register_ping_type(&ping); + + // We need to store a metric as an empty ping is not stored. + let counter = CounterMetric::new(CommonMetricData { + name: "counter".into(), + category: "local".into(), + send_in_pings: vec!["metrics".into()], + ..Default::default() + }); + counter.add(&glean, 1); + + assert!(ping.submit(&glean, None).unwrap()); + + assert_eq!(1, get_queued_pings(glean.get_data_path()).unwrap().len()); +} + +#[test] +fn disabling_upload_clears_pending_pings() { + let (mut glean, _) = new_glean(None); + + let ping = PingType::new("metrics", true, false, vec![]); + glean.register_ping_type(&ping); + + // We need to store a metric as an empty ping is not stored. + let counter = CounterMetric::new(CommonMetricData { + name: "counter".into(), + category: "local".into(), + send_in_pings: vec!["metrics".into()], + ..Default::default() + }); + + counter.add(&glean, 1); + assert!(ping.submit(&glean, None).unwrap()); + assert_eq!(1, get_queued_pings(glean.get_data_path()).unwrap().len()); + // At this point no deletion_request ping should exist + // (that is: it's directory should not exist at all) + assert!(get_deletion_pings(glean.get_data_path()).is_err()); + + glean.set_upload_enabled(false); + assert_eq!(0, get_queued_pings(glean.get_data_path()).unwrap().len()); + // Disabling upload generates a deletion ping + assert_eq!(1, get_deletion_pings(glean.get_data_path()).unwrap().len()); + + glean.set_upload_enabled(true); + assert_eq!(0, get_queued_pings(glean.get_data_path()).unwrap().len()); + + counter.add(&glean, 1); + assert!(ping.submit(&glean, None).unwrap()); + assert_eq!(1, get_queued_pings(glean.get_data_path()).unwrap().len()); +} + +#[test] +fn deletion_request_only_when_toggled_from_on_to_off() { + let (mut glean, _) = new_glean(None); + + // Disabling upload generates a deletion ping + glean.set_upload_enabled(false); + assert_eq!(1, get_deletion_pings(glean.get_data_path()).unwrap().len()); + + // Re-setting it to `false` should not generate an additional ping. + // As we didn't clear the pending ping, that's the only one that sticks around. + glean.set_upload_enabled(false); + assert_eq!(1, get_deletion_pings(glean.get_data_path()).unwrap().len()); + + // Toggling back to true won't generate a ping either. + glean.set_upload_enabled(true); + assert_eq!(1, get_deletion_pings(glean.get_data_path()).unwrap().len()); +} + +#[test] +fn empty_pings_with_flag_are_sent() { + let (mut glean, _) = new_glean(None); + + let ping1 = PingType::new("custom-ping1", true, true, vec![]); + glean.register_ping_type(&ping1); + let ping2 = PingType::new("custom-ping2", true, false, vec![]); + glean.register_ping_type(&ping2); + + // No data is stored in either of the custom pings + + // Sending this should succeed. + assert_eq!(true, ping1.submit(&glean, None).unwrap()); + assert_eq!(1, get_queued_pings(glean.get_data_path()).unwrap().len()); + + // Sending this should fail. + assert_eq!(false, ping2.submit(&glean, None).unwrap()); + assert_eq!(1, get_queued_pings(glean.get_data_path()).unwrap().len()); +} diff --git a/third_party/rust/glean-core/tests/ping_maker.rs b/third_party/rust/glean-core/tests/ping_maker.rs new file mode 100644 index 0000000000..436e38e711 --- /dev/null +++ b/third_party/rust/glean-core/tests/ping_maker.rs @@ -0,0 +1,210 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +mod common; +use crate::common::*; + +use glean_core::metrics::*; +use glean_core::ping::PingMaker; +use glean_core::{CommonMetricData, Glean, Lifetime}; + +fn set_up_basic_ping() -> (Glean, PingMaker, PingType, tempfile::TempDir) { + let (tempdir, _) = tempdir(); + let (mut glean, t) = new_glean(Some(tempdir)); + let ping_maker = PingMaker::new(); + let ping_type = PingType::new("store1", true, false, vec![]); + glean.register_ping_type(&ping_type); + + // Record something, so the ping will have data + let metric = BooleanMetric::new(CommonMetricData { + name: "boolean_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::User, + ..Default::default() + }); + metric.set(&glean, true); + + (glean, ping_maker, ping_type, t) +} + +#[test] +fn ping_info_must_contain_a_nonempty_start_and_end_time() { + let (glean, ping_maker, ping_type, _t) = set_up_basic_ping(); + + let content = ping_maker.collect(&glean, &ping_type, None).unwrap(); + let ping_info = content["ping_info"].as_object().unwrap(); + + let start_time_str = ping_info["start_time"].as_str().unwrap(); + let start_time_date = iso8601_to_chrono(&iso8601::datetime(start_time_str).unwrap()); + + let end_time_str = ping_info["end_time"].as_str().unwrap(); + let end_time_date = iso8601_to_chrono(&iso8601::datetime(end_time_str).unwrap()); + + assert!(start_time_date <= end_time_date); +} + +#[test] +fn get_ping_info_must_report_all_the_required_fields() { + let (glean, ping_maker, ping_type, _t) = set_up_basic_ping(); + + let content = ping_maker.collect(&glean, &ping_type, None).unwrap(); + let ping_info = content["ping_info"].as_object().unwrap(); + + assert!(ping_info.get("start_time").is_some()); + assert!(ping_info.get("end_time").is_some()); + assert!(ping_info.get("seq").is_some()); +} + +#[test] +fn get_client_info_must_report_all_the_available_data() { + let (glean, ping_maker, ping_type, _t) = set_up_basic_ping(); + + let content = ping_maker.collect(&glean, &ping_type, None).unwrap(); + let client_info = content["client_info"].as_object().unwrap(); + + client_info["telemetry_sdk_build"].as_str().unwrap(); +} + +// SKIPPED from glean-ac: collect() must report a valid ping with the data from the engines +// This test doesn't really make sense with rkv + +#[test] +fn collect_must_report_none_when_no_data_is_stored() { + // NOTE: This is a behavior change from glean-ac which returned an empty + // string in this case. As this is an implementation detail and not part of + // the public API, it's safe to change this. + + let (mut glean, ping_maker, ping_type, _t) = set_up_basic_ping(); + + let unknown_ping_type = PingType::new("unknown", true, false, vec![]); + glean.register_ping_type(&ping_type); + + assert!(ping_maker + .collect(&glean, &unknown_ping_type, None) + .is_none()); +} + +#[test] +fn seq_number_must_be_sequential() { + let (glean, ping_maker, _ping_type, _t) = set_up_basic_ping(); + + let metric = BooleanMetric::new(CommonMetricData { + name: "boolean_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store2".into()], + disabled: false, + lifetime: Lifetime::User, + ..Default::default() + }); + metric.set(&glean, true); + + for i in 0..=1 { + for ping_name in ["store1", "store2"].iter() { + let ping_type = PingType::new(*ping_name, true, false, vec![]); + let content = ping_maker.collect(&glean, &ping_type, None).unwrap(); + let seq_num = content["ping_info"]["seq"].as_i64().unwrap(); + // Ensure sequence numbers in different stores are independent of + // each other + assert_eq!(i, seq_num); + } + } + + // Test that ping sequence numbers increase independently. + { + let ping_type = PingType::new("store1", true, false, vec![]); + + // 3rd ping of store1 + let content = ping_maker.collect(&glean, &ping_type, None).unwrap(); + let seq_num = content["ping_info"]["seq"].as_i64().unwrap(); + assert_eq!(2, seq_num); + + // 4th ping of store1 + let content = ping_maker.collect(&glean, &ping_type, None).unwrap(); + let seq_num = content["ping_info"]["seq"].as_i64().unwrap(); + assert_eq!(3, seq_num); + } + + { + let ping_type = PingType::new("store2", true, false, vec![]); + + // 3rd ping of store2 + let content = ping_maker.collect(&glean, &ping_type, None).unwrap(); + let seq_num = content["ping_info"]["seq"].as_i64().unwrap(); + assert_eq!(2, seq_num); + } + + { + let ping_type = PingType::new("store1", true, false, vec![]); + + // 5th ping of store1 + let content = ping_maker.collect(&glean, &ping_type, None).unwrap(); + let seq_num = content["ping_info"]["seq"].as_i64().unwrap(); + assert_eq!(4, seq_num); + } +} + +#[test] +fn clear_pending_pings() { + let (mut glean, _) = new_glean(None); + let ping_maker = PingMaker::new(); + let ping_type = PingType::new("store1", true, false, vec![]); + glean.register_ping_type(&ping_type); + + // Record something, so the ping will have data + let metric = BooleanMetric::new(CommonMetricData { + name: "boolean_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::User, + ..Default::default() + }); + metric.set(&glean, true); + + assert!(glean.submit_ping(&ping_type, None).is_ok()); + assert_eq!(1, get_queued_pings(glean.get_data_path()).unwrap().len()); + + assert!(ping_maker + .clear_pending_pings(glean.get_data_path()) + .is_ok()); + assert_eq!(0, get_queued_pings(glean.get_data_path()).unwrap().len()); +} + +#[test] +fn no_pings_submitted_if_upload_disabled() { + // Regression test, bug 1603571 + + let (mut glean, _) = new_glean(None); + let ping_type = PingType::new("store1", true, true, vec![]); + glean.register_ping_type(&ping_type); + + assert!(glean.submit_ping(&ping_type, None).is_ok()); + assert_eq!(1, get_queued_pings(glean.get_data_path()).unwrap().len()); + + // Disable upload, then try to sumbit + glean.set_upload_enabled(false); + + assert!(glean.submit_ping(&ping_type, None).is_ok()); + assert_eq!(0, get_queued_pings(glean.get_data_path()).unwrap().len()); + + // Test again through the direct call + assert!(ping_type.submit(&glean, None).is_ok()); + assert_eq!(0, get_queued_pings(glean.get_data_path()).unwrap().len()); +} + +#[test] +fn metadata_is_correctly_added_when_necessary() { + let (mut glean, _) = new_glean(None); + glean.set_debug_view_tag("valid-tag"); + let ping_type = PingType::new("store1", true, true, vec![]); + glean.register_ping_type(&ping_type); + + assert!(glean.submit_ping(&ping_type, None).is_ok()); + + let (_, _, metadata) = &get_queued_pings(glean.get_data_path()).unwrap()[0]; + let headers = metadata.as_ref().unwrap().get("headers").unwrap(); + assert_eq!(headers.get("X-Debug-ID").unwrap(), "valid-tag"); +} diff --git a/third_party/rust/glean-core/tests/quantity.rs b/third_party/rust/glean-core/tests/quantity.rs new file mode 100644 index 0000000000..644281521f --- /dev/null +++ b/third_party/rust/glean-core/tests/quantity.rs @@ -0,0 +1,118 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +mod common; +use crate::common::*; + +use serde_json::json; + +use glean_core::metrics::*; +use glean_core::storage::StorageManager; +use glean_core::{test_get_num_recorded_errors, ErrorType}; +use glean_core::{CommonMetricData, Lifetime}; + +// Tests ported from glean-ac + +// SKIPPED from glean-ac: quantity deserializer should correctly parse integers +// This test doesn't really apply to rkv + +#[test] +fn quantity_serializer_should_correctly_serialize_quantities() { + let (mut tempdir, _) = tempdir(); + + { + // We give tempdir to the `new_glean` function... + let (glean, dir) = new_glean(Some(tempdir)); + // And then we get it back once that function returns. + tempdir = dir; + + let metric = QuantityMetric::new(CommonMetricData { + name: "quantity_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::User, + ..Default::default() + }); + + metric.set(&glean, 1); + + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + assert_eq!( + json!({"quantity": {"telemetry.quantity_metric": 1}}), + snapshot + ); + } + + // Make a new Glean instance here, which should force reloading of the data from disk + // so we can ensure it persisted, because it has User lifetime + { + let (glean, _) = new_glean(Some(tempdir)); + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + assert_eq!( + json!({"quantity": {"telemetry.quantity_metric": 1}}), + snapshot + ); + } +} + +#[test] +fn set_value_properly_sets_the_value_in_all_stores() { + let (glean, _t) = new_glean(None); + let store_names: Vec<String> = vec!["store1".into(), "store2".into()]; + + let metric = QuantityMetric::new(CommonMetricData { + name: "quantity_metric".into(), + category: "telemetry".into(), + send_in_pings: store_names.clone(), + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }); + + metric.set(&glean, 1); + + for store_name in store_names { + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), &store_name, true) + .unwrap(); + + assert_eq!( + json!({"quantity": {"telemetry.quantity_metric": 1}}), + snapshot + ); + } +} + +// SKIPPED from glean-ac: quantities are serialized in the correct JSON format +// Completely redundant with other tests. + +#[test] +fn quantities_must_not_set_when_passed_negative() { + let (glean, _t) = new_glean(None); + + let metric = QuantityMetric::new(CommonMetricData { + name: "quantity_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Application, + ..Default::default() + }); + + // Attempt to set the quantity with negative + metric.set(&glean, -1); + // Check that nothing was recorded + assert!(metric.test_get_value(&glean, "store1").is_none()); + + // Make sure that the errors have been recorded + assert_eq!( + Ok(1), + test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidValue, None) + ); +} diff --git a/third_party/rust/glean-core/tests/storage.rs b/third_party/rust/glean-core/tests/storage.rs new file mode 100644 index 0000000000..9ffab11f0c --- /dev/null +++ b/third_party/rust/glean-core/tests/storage.rs @@ -0,0 +1,105 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +mod common; +use crate::common::*; + +use serde_json::json; + +use glean_core::metrics::*; +use glean_core::storage::StorageManager; +use glean_core::{CommonMetricData, Lifetime}; + +#[test] +fn snapshot_returns_none_if_nothing_is_recorded_in_the_store() { + let (glean, _t) = new_glean(None); + assert!(StorageManager + .snapshot(glean.storage(), "unknown_store", true) + .is_none()) +} + +#[test] +fn can_snapshot() { + let (glean, _t) = new_glean(None); + + let local_metric = StringMetric::new(CommonMetricData { + name: "can_snapshot_local_metric".into(), + category: "local".into(), + send_in_pings: vec!["store".into()], + ..Default::default() + }); + + local_metric.set(&glean, "snapshot 42"); + + assert!(StorageManager + .snapshot(glean.storage(), "store", true) + .is_some()) +} + +#[test] +fn snapshot_correctly_clears_the_stores() { + let (glean, _t) = new_glean(None); + let store_names: Vec<String> = vec!["store1".into(), "store2".into()]; + + let metric = CounterMetric::new(CommonMetricData { + name: "metric".into(), + category: "telemetry".into(), + send_in_pings: store_names, + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }); + + metric.add(&glean, 1); + + // Get the snapshot from "store1" and clear it. + let snapshot = StorageManager.snapshot(glean.storage(), "store1", true); + assert!(snapshot.is_some()); + // Check that getting a new snapshot for "store1" returns an empty store. + assert!(StorageManager + .snapshot(glean.storage(), "store1", false) + .is_none()); + // Check that we get the right data from both the stores. Clearing "store1" must + // not clear "store2" as well. + let snapshot2 = StorageManager.snapshot(glean.storage(), "store2", true); + assert!(snapshot2.is_some()); +} + +#[test] +fn storage_is_thread_safe() { + use std::sync::{Arc, Barrier, Mutex}; + use std::thread; + + let (glean, _t) = new_glean(None); + let glean = Arc::new(Mutex::new(glean)); + + let threadsafe_metric = CounterMetric::new(CommonMetricData { + name: "threadsafe".into(), + category: "global".into(), + send_in_pings: vec!["core".into(), "metrics".into()], + ..Default::default() + }); + let threadsafe_metric = Arc::new(threadsafe_metric); + + let barrier = Arc::new(Barrier::new(2)); + let c = barrier.clone(); + let threadsafe_metric_clone = threadsafe_metric.clone(); + let glean_clone = glean.clone(); + let child = thread::spawn(move || { + threadsafe_metric_clone.add(&*glean_clone.lock().unwrap(), 1); + c.wait(); + threadsafe_metric_clone.add(&*glean_clone.lock().unwrap(), 1); + }); + + threadsafe_metric.add(&*glean.lock().unwrap(), 1); + barrier.wait(); + threadsafe_metric.add(&*glean.lock().unwrap(), 1); + + child.join().unwrap(); + + let snapshot = StorageManager + .snapshot_as_json(glean.lock().unwrap().storage(), "core", true) + .unwrap(); + assert_eq!(json!({"counter": { "global.threadsafe": 4 }}), snapshot); +} diff --git a/third_party/rust/glean-core/tests/string.rs b/third_party/rust/glean-core/tests/string.rs new file mode 100644 index 0000000000..f5a1858cd7 --- /dev/null +++ b/third_party/rust/glean-core/tests/string.rs @@ -0,0 +1,121 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +mod common; +use crate::common::*; + +use serde_json::json; + +use glean_core::metrics::*; +use glean_core::storage::StorageManager; +use glean_core::{test_get_num_recorded_errors, ErrorType}; +use glean_core::{CommonMetricData, Lifetime}; + +// SKIPPED from glean-ac: string deserializer should correctly parse integers +// This test doesn't really apply to rkv + +#[test] +fn string_serializer_should_correctly_serialize_strings() { + let (mut tempdir, _) = tempdir(); + + { + // We give tempdir to the `new_glean` function... + let (glean, dir) = new_glean(Some(tempdir)); + // And then we get it back once that function returns. + tempdir = dir; + + let metric = StringMetric::new(CommonMetricData { + name: "string_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::User, + ..Default::default() + }); + + metric.set(&glean, "test_string_value"); + + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + assert_eq!( + json!({"string": {"telemetry.string_metric": "test_string_value"}}), + snapshot + ); + } + + // Make a new Glean instance here, which should force reloading of the data from disk + // so we can ensure it persisted, because it has User lifetime + { + let (glean, _) = new_glean(Some(tempdir)); + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + assert_eq!( + json!({"string": {"telemetry.string_metric": "test_string_value"}}), + snapshot + ); + } +} + +#[test] +fn set_properly_sets_the_value_in_all_stores() { + let (glean, _t) = new_glean(None); + let store_names: Vec<String> = vec!["store1".into(), "store2".into()]; + + let metric = StringMetric::new(CommonMetricData { + name: "string_metric".into(), + category: "telemetry".into(), + send_in_pings: store_names.clone(), + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }); + + metric.set(&glean, "test_string_value"); + + // Check that the data was correctly set in each store. + for store_name in store_names { + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), &store_name, true) + .unwrap(); + + assert_eq!( + json!({"string": {"telemetry.string_metric": "test_string_value"}}), + snapshot + ); + } +} + +// SKIPPED from glean-ac: strings are serialized in the correct JSON format +// Completely redundant with other tests. + +#[test] +fn long_string_values_are_truncated() { + let (glean, _t) = new_glean(None); + + let metric = StringMetric::new(CommonMetricData { + name: "string_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }); + + let test_sting = "01234567890".repeat(20); + metric.set(&glean, test_sting.clone()); + + // Check that data was truncated + assert_eq!( + test_sting[..100], + metric.test_get_value(&glean, "store1").unwrap() + ); + + // Make sure that the errors have been recorded + assert_eq!( + Ok(1), + test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidOverflow, None) + ); +} diff --git a/third_party/rust/glean-core/tests/string_list.rs b/third_party/rust/glean-core/tests/string_list.rs new file mode 100644 index 0000000000..e2355d5df5 --- /dev/null +++ b/third_party/rust/glean-core/tests/string_list.rs @@ -0,0 +1,249 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +mod common; +use crate::common::*; + +use serde_json::json; + +use glean_core::metrics::*; +use glean_core::storage::StorageManager; +use glean_core::{test_get_num_recorded_errors, CommonMetricData, ErrorType, Lifetime}; + +#[test] +fn list_can_store_multiple_items() { + let (glean, _t) = new_glean(None); + + let list: StringListMetric = StringListMetric::new(CommonMetricData { + name: "list".into(), + category: "local".into(), + send_in_pings: vec!["core".into()], + ..Default::default() + }); + + list.add(&glean, "first"); + assert_eq!(list.test_get_value(&glean, "core").unwrap(), vec!["first"]); + + list.add(&glean, "second"); + assert_eq!( + list.test_get_value(&glean, "core").unwrap(), + vec!["first", "second"] + ); + + list.set(&glean, vec!["third".into()]); + assert_eq!(list.test_get_value(&glean, "core").unwrap(), vec!["third"]); + + list.add(&glean, "fourth"); + assert_eq!( + list.test_get_value(&glean, "core").unwrap(), + vec!["third", "fourth"] + ); +} + +#[test] +fn stringlist_serializer_should_correctly_serialize_stringlists() { + let (mut tempdir, _) = tempdir(); + + { + // We give tempdir to the `new_glean` function... + let (glean, dir) = new_glean(Some(tempdir)); + // And then we get it back once that function returns. + tempdir = dir; + + let metric = StringListMetric::new(CommonMetricData { + name: "string_list_metric".into(), + category: "telemetry.test".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::User, + ..Default::default() + }); + metric.set(&glean, vec!["test_string_1".into(), "test_string_2".into()]); + } + + { + let (glean, _) = new_glean(Some(tempdir)); + + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + assert_eq!( + json!({"string_list": {"telemetry.test.string_list_metric": ["test_string_1", "test_string_2"]}}), + snapshot + ); + } +} + +#[test] +fn set_properly_sets_the_value_in_all_stores() { + let (glean, _t) = new_glean(None); + let store_names: Vec<String> = vec!["store1".into(), "store2".into()]; + + let metric = StringListMetric::new(CommonMetricData { + name: "string_list_metric".into(), + category: "telemetry.test".into(), + send_in_pings: store_names.clone(), + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }); + + metric.set(&glean, vec!["test_string_1".into(), "test_string_2".into()]); + + for store_name in store_names { + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), &store_name, true) + .unwrap(); + + assert_eq!( + json!({"string_list": {"telemetry.test.string_list_metric": ["test_string_1", "test_string_2"]}}), + snapshot + ); + } +} + +#[test] +fn long_string_values_are_truncated() { + let (glean, _t) = new_glean(None); + + let metric = StringListMetric::new(CommonMetricData { + name: "string_list_metric".into(), + category: "telemetry.test".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }); + + let test_string = "0123456789".repeat(20); + metric.add(&glean, test_string.clone()); + + // Ensure the string was truncated to the proper length. + assert_eq!( + vec![test_string[..50].to_string()], + metric.test_get_value(&glean, "store1").unwrap() + ); + + // Ensure the error has been recorded. + assert_eq!( + Ok(1), + test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidOverflow, None) + ); + + metric.set(&glean, vec![test_string.clone()]); + + // Ensure the string was truncated to the proper length. + assert_eq!( + vec![test_string[..50].to_string()], + metric.test_get_value(&glean, "store1").unwrap() + ); + + // Ensure the error has been recorded. + assert_eq!( + Ok(2), + test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidOverflow, None) + ); +} + +#[test] +fn disabled_string_lists_dont_record() { + let (glean, _t) = new_glean(None); + + let metric = StringListMetric::new(CommonMetricData { + name: "string_list_metric".into(), + category: "telemetry.test".into(), + send_in_pings: vec!["store1".into()], + disabled: true, + lifetime: Lifetime::Ping, + ..Default::default() + }); + + metric.add(&glean, "test_string".repeat(20)); + + // Ensure the string was not added. + assert_eq!(None, metric.test_get_value(&glean, "store1")); + + metric.set(&glean, vec!["test_string_2".repeat(20)]); + + // Ensure the stringlist was not set. + assert_eq!(None, metric.test_get_value(&glean, "store1")); + + // Ensure no error was recorded. + assert!( + test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidValue, None).is_err() + ); +} + +#[test] +fn string_lists_dont_exceed_max_items() { + let (glean, _t) = new_glean(None); + + let metric = StringListMetric::new(CommonMetricData { + name: "string_list_metric".into(), + category: "telemetry.test".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }); + + for _n in 1..21 { + metric.add(&glean, "test_string"); + } + + let expected: Vec<String> = "test_string " + .repeat(20) + .split_whitespace() + .map(|s| s.to_string()) + .collect(); + assert_eq!(expected, metric.test_get_value(&glean, "store1").unwrap()); + + // Ensure the 21st string wasn't added. + metric.add(&glean, "test_string"); + assert_eq!(expected, metric.test_get_value(&glean, "store1").unwrap()); + + // Ensure we recorded the error. + assert_eq!( + Ok(1), + test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidValue, None) + ); + + // Try to set it to a list that's too long. Ensure it cuts off at 20 elements. + let too_many: Vec<String> = "test_string " + .repeat(21) + .split_whitespace() + .map(|s| s.to_string()) + .collect(); + metric.set(&glean, too_many); + assert_eq!(expected, metric.test_get_value(&glean, "store1").unwrap()); + + assert_eq!( + Ok(2), + test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidValue, None) + ); +} + +#[test] +fn set_does_not_record_error_when_receiving_empty_list() { + let (glean, _t) = new_glean(None); + + let metric = StringListMetric::new(CommonMetricData { + name: "string_list_metric".into(), + category: "telemetry.test".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }); + + metric.set(&glean, vec![]); + + // Ensure the empty list was added + assert_eq!(Some(vec![]), metric.test_get_value(&glean, "store1")); + + // Ensure we didn't record an error. + assert!( + test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidValue, None).is_err() + ); +} diff --git a/third_party/rust/glean-core/tests/timespan.rs b/third_party/rust/glean-core/tests/timespan.rs new file mode 100644 index 0000000000..60855729af --- /dev/null +++ b/third_party/rust/glean-core/tests/timespan.rs @@ -0,0 +1,353 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::time::Duration; + +mod common; +use crate::common::*; + +use serde_json::json; + +use glean_core::metrics::*; +use glean_core::storage::StorageManager; +use glean_core::{test_get_num_recorded_errors, ErrorType}; +use glean_core::{CommonMetricData, Lifetime}; + +// Tests ported from glean-ac + +#[test] +fn serializer_should_correctly_serialize_timespans() { + let (mut tempdir, _) = tempdir(); + + let duration = 60; + + { + // We give tempdir to the `new_glean` function... + let (glean, dir) = new_glean(Some(tempdir)); + // And then we get it back once that function returns. + tempdir = dir; + + let mut metric = TimespanMetric::new( + CommonMetricData { + name: "timespan_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + TimeUnit::Nanosecond, + ); + + metric.set_start(&glean, 0); + metric.set_stop(&glean, duration); + + let val = metric + .test_get_value(&glean, "store1") + .expect("Value should be stored"); + assert_eq!(duration, val, "Recorded timespan should be positive."); + } + + // Make a new Glean instance here, which should force reloading of the data from disk + // so we can ensure it persisted, because it has User lifetime + { + let (glean, _) = new_glean(Some(tempdir)); + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + + assert_eq!( + json!({"timespan": {"telemetry.timespan_metric": { "value": duration, "time_unit": "nanosecond" }}}), + snapshot + ); + } +} + +#[test] +fn single_elapsed_time_must_be_recorded() { + let (glean, _t) = new_glean(None); + + let mut metric = TimespanMetric::new( + CommonMetricData { + name: "timespan_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + TimeUnit::Nanosecond, + ); + + let duration = 60; + + metric.set_start(&glean, 0); + metric.set_stop(&glean, duration); + + let val = metric + .test_get_value(&glean, "store1") + .expect("Value should be stored"); + assert_eq!(duration, val, "Recorded timespan should be positive."); +} + +// SKIPPED from glean-ac: multiple elapsed times must be correctly accumulated. +// replaced by below after API change. + +#[test] +fn second_timer_run_is_skipped() { + let (glean, _t) = new_glean(None); + + let mut metric = TimespanMetric::new( + CommonMetricData { + name: "timespan_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + TimeUnit::Nanosecond, + ); + + let duration = 60; + metric.set_start(&glean, 0); + metric.set_stop(&glean, duration); + + // No error should be recorded here: we had no prior value stored. + assert!( + test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidState, None).is_err() + ); + + let first_value = metric.test_get_value(&glean, "store1").unwrap(); + assert_eq!(duration, first_value); + + metric.set_start(&glean, 0); + metric.set_stop(&glean, duration * 2); + + let second_value = metric.test_get_value(&glean, "store1").unwrap(); + assert_eq!(second_value, first_value); + + // Make sure that the error has been recorded: we had a stored value, the + // new measurement was dropped. + assert_eq!( + Ok(1), + test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidState, None) + ); +} + +#[test] +fn recorded_time_conforms_to_resolution() { + let (glean, _t) = new_glean(None); + + let mut ns_metric = TimespanMetric::new( + CommonMetricData { + name: "timespan_ns".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + TimeUnit::Nanosecond, + ); + + let mut minute_metric = TimespanMetric::new( + CommonMetricData { + name: "timespan_m".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + TimeUnit::Minute, + ); + + let duration = 60; + ns_metric.set_start(&glean, 0); + ns_metric.set_stop(&glean, duration); + + let ns_value = ns_metric.test_get_value(&glean, "store1").unwrap(); + assert_eq!(duration, ns_value); + + // 1 minute in nanoseconds + let duration_minute = 60 * 1_000_000_000; + minute_metric.set_start(&glean, 0); + minute_metric.set_stop(&glean, duration_minute); + + let minute_value = minute_metric.test_get_value(&glean, "store1").unwrap(); + assert_eq!(1, minute_value); +} + +// SKIPPED from glean-ac: accumulated short-lived timespans should not be discarded + +#[test] +fn cancel_does_not_store() { + let (glean, _t) = new_glean(None); + + let mut metric = TimespanMetric::new( + CommonMetricData { + name: "timespan_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + TimeUnit::Nanosecond, + ); + + metric.set_start(&glean, 0); + metric.cancel(); + + assert_eq!(None, metric.test_get_value(&glean, "store1")); +} + +#[test] +fn nothing_stored_before_stop() { + let (glean, _t) = new_glean(None); + + let mut metric = TimespanMetric::new( + CommonMetricData { + name: "timespan_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + TimeUnit::Nanosecond, + ); + + let duration = 60; + + metric.set_start(&glean, 0); + + assert_eq!(None, metric.test_get_value(&glean, "store1")); + + metric.set_stop(&glean, duration); + assert_eq!(duration, metric.test_get_value(&glean, "store1").unwrap()); +} + +#[test] +fn set_raw_time() { + let (glean, _t) = new_glean(None); + + let metric = TimespanMetric::new( + CommonMetricData { + name: "timespan_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + TimeUnit::Nanosecond, + ); + + let time = Duration::from_secs(1); + metric.set_raw(&glean, time); + + let time_in_ns = time.as_nanos() as u64; + assert_eq!(Some(time_in_ns), metric.test_get_value(&glean, "store1")); +} + +#[test] +fn set_raw_time_does_nothing_when_timer_running() { + let (glean, _t) = new_glean(None); + + let mut metric = TimespanMetric::new( + CommonMetricData { + name: "timespan_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + TimeUnit::Nanosecond, + ); + + let time = Duration::from_secs(42); + + metric.set_start(&glean, 0); + metric.set_raw(&glean, time); + metric.set_stop(&glean, 60); + + // We expect the start/stop value, not the raw value. + assert_eq!(Some(60), metric.test_get_value(&glean, "store1")); + + // Make sure that the error has been recorded + assert_eq!( + Ok(1), + test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidState, None) + ); +} + +#[test] +fn timespan_is_not_tracked_across_upload_toggle() { + let (mut glean, _t) = new_glean(None); + + let mut metric = TimespanMetric::new( + CommonMetricData { + name: "timespan_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + TimeUnit::Nanosecond, + ); + + // Timer is started. + metric.set_start(&glean, 0); + // User disables telemetry upload. + glean.set_upload_enabled(false); + // App code eventually stops the timer. + // We should clear internal state as upload is disabled. + metric.set_stop(&glean, 40); + + // App code eventually starts the timer again. + // Upload is disabled, so this should not have any effect. + metric.set_start(&glean, 100); + // User enables telemetry upload again. + glean.set_upload_enabled(true); + // App code eventually stops the timer. + // None should be running. + metric.set_stop(&glean, 200); + + // Nothing should have been recorded. + assert_eq!(None, metric.test_get_value(&glean, "store1")); + + // Make sure that the error has been recorded + assert_eq!( + Ok(1), + test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidState, None) + ); +} + +#[test] +fn time_cannot_go_backwards() { + let (glean, _t) = new_glean(None); + + let mut metric: TimespanMetric = TimespanMetric::new( + CommonMetricData { + name: "raw_timespan".into(), + category: "test".into(), + send_in_pings: vec!["test1".into()], + ..Default::default() + }, + TimeUnit::Millisecond, + ); + + // Time cannot go backwards. + metric.set_start(&glean, 10); + metric.set_stop(&glean, 0); + assert!(metric.test_get_value(&glean, "test1").is_none()); + assert_eq!( + Ok(1), + test_get_num_recorded_errors(&glean, metric.meta(), ErrorType::InvalidValue, None), + ); +} diff --git a/third_party/rust/glean-core/tests/timing_distribution.rs b/third_party/rust/glean-core/tests/timing_distribution.rs new file mode 100644 index 0000000000..e338fc70c0 --- /dev/null +++ b/third_party/rust/glean-core/tests/timing_distribution.rs @@ -0,0 +1,336 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +mod common; +use crate::common::*; + +use std::time::Duration; + +use serde_json::json; + +use glean_core::metrics::*; +use glean_core::storage::StorageManager; +use glean_core::{test_get_num_recorded_errors, ErrorType}; +use glean_core::{CommonMetricData, Lifetime}; + +// Tests ported from glean-ac + +#[test] +fn serializer_should_correctly_serialize_timing_distribution() { + let (mut tempdir, _) = tempdir(); + + let duration = 60; + let time_unit = TimeUnit::Nanosecond; + + { + let (glean, dir) = new_glean(Some(tempdir)); + tempdir = dir; + + let mut metric = TimingDistributionMetric::new( + CommonMetricData { + name: "distribution".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + time_unit, + ); + + let id = metric.set_start(0); + metric.set_stop_and_accumulate(&glean, id, duration); + + let snapshot = metric + .test_get_value(&glean, "store1") + .expect("Value should be stored"); + + assert_eq!(snapshot.sum, duration); + } + + // Make a new Glean instance here, which should force reloading of the data from disk + // so we can ensure it persisted, because it has User lifetime + { + let (glean, _) = new_glean(Some(tempdir)); + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + + assert_eq!( + json!(duration), + snapshot["timing_distribution"]["telemetry.distribution"]["sum"] + ); + } +} + +#[test] +fn set_value_properly_sets_the_value_in_all_stores() { + let (glean, _t) = new_glean(None); + let store_names: Vec<String> = vec!["store1".into(), "store2".into()]; + + let duration = 1; + + let mut metric = TimingDistributionMetric::new( + CommonMetricData { + name: "distribution".into(), + category: "telemetry".into(), + send_in_pings: store_names.clone(), + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + TimeUnit::Nanosecond, + ); + + let id = metric.set_start(0); + metric.set_stop_and_accumulate(&glean, id, duration); + + for store_name in store_names { + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), &store_name, true) + .unwrap(); + + assert_eq!( + json!(duration), + snapshot["timing_distribution"]["telemetry.distribution"]["sum"] + ); + assert_eq!( + json!(1), + snapshot["timing_distribution"]["telemetry.distribution"]["values"]["1"] + ); + } +} + +#[test] +fn timing_distributions_must_not_accumulate_negative_values() { + let (glean, _t) = new_glean(None); + + let duration = 60; + let time_unit = TimeUnit::Nanosecond; + + let mut metric = TimingDistributionMetric::new( + CommonMetricData { + name: "distribution".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + time_unit, + ); + + // Flip around the timestamps, this should result in a negative value which should be + // discarded. + let id = metric.set_start(duration); + metric.set_stop_and_accumulate(&glean, id, 0); + + assert!(metric.test_get_value(&glean, "store1").is_none()); + + // Make sure that the errors have been recorded + assert_eq!( + Ok(1), + test_get_num_recorded_errors( + &glean, + metric.meta(), + ErrorType::InvalidValue, + Some("store1") + ) + ); +} + +#[test] +fn the_accumulate_samples_api_correctly_stores_timing_values() { + let (glean, _t) = new_glean(None); + + let mut metric = TimingDistributionMetric::new( + CommonMetricData { + name: "distribution".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + TimeUnit::Second, + ); + + // Accumulate the samples. We intentionally do not report + // negative values to not trigger error reporting. + metric.accumulate_samples_signed(&glean, [1, 2, 3].to_vec()); + + let snapshot = metric + .test_get_value(&glean, "store1") + .expect("Value should be stored"); + + let seconds_to_nanos = 1000 * 1000 * 1000; + + // Check that we got the right sum and number of samples. + assert_eq!(snapshot.sum, 6 * seconds_to_nanos); + + // We should get a sample in 3 buckets. + // These numbers are a bit magic, but they correspond to + // `hist.sample_to_bucket_minimum(i * seconds_to_nanos)` for `i = 1..=3`. + assert_eq!(1, snapshot.values[&984_625_593]); + assert_eq!(1, snapshot.values[&1_969_251_187]); + assert_eq!(1, snapshot.values[&2_784_941_737]); + + // No errors should be reported. + assert!(test_get_num_recorded_errors( + &glean, + metric.meta(), + ErrorType::InvalidValue, + Some("store1") + ) + .is_err()); +} + +#[test] +fn the_accumulate_samples_api_correctly_handles_negative_values() { + let (glean, _t) = new_glean(None); + + let mut metric = TimingDistributionMetric::new( + CommonMetricData { + name: "distribution".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + TimeUnit::Nanosecond, + ); + + // Accumulate the samples. + metric.accumulate_samples_signed(&glean, [-1, 1, 2, 3].to_vec()); + + let snapshot = metric + .test_get_value(&glean, "store1") + .expect("Value should be stored"); + + // Check that we got the right sum and number of samples. + assert_eq!(snapshot.sum, 6); + + // We should get a sample in each of the first 3 buckets. + assert_eq!(1, snapshot.values[&1]); + assert_eq!(1, snapshot.values[&2]); + assert_eq!(1, snapshot.values[&3]); + + // 1 error should be reported. + assert_eq!( + Ok(1), + test_get_num_recorded_errors( + &glean, + metric.meta(), + ErrorType::InvalidValue, + Some("store1") + ) + ); +} + +#[test] +fn the_accumulate_samples_api_correctly_handles_overflowing_values() { + let (glean, _t) = new_glean(None); + + let mut metric = TimingDistributionMetric::new( + CommonMetricData { + name: "distribution".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + TimeUnit::Nanosecond, + ); + + // The MAX_SAMPLE_TIME is the same from `metrics/timing_distribution.rs`. + const MAX_SAMPLE_TIME: u64 = 1000 * 1000 * 1000 * 60 * 10; + let overflowing_val = MAX_SAMPLE_TIME as i64 + 1; + // Accumulate the samples. + metric.accumulate_samples_signed(&glean, [overflowing_val, 1, 2, 3].to_vec()); + + let snapshot = metric + .test_get_value(&glean, "store1") + .expect("Value should be stored"); + + // Overflowing values are truncated to MAX_SAMPLE_TIME and recorded. + assert_eq!(snapshot.sum, MAX_SAMPLE_TIME + 6); + + // We should get a sample in each of the first 3 buckets. + assert_eq!(1, snapshot.values[&1]); + assert_eq!(1, snapshot.values[&2]); + assert_eq!(1, snapshot.values[&3]); + + // 1 error should be reported. + assert_eq!( + Ok(1), + test_get_num_recorded_errors( + &glean, + metric.meta(), + ErrorType::InvalidOverflow, + Some("store1") + ) + ); +} + +#[test] +fn large_nanoseconds_values() { + let (glean, _t) = new_glean(None); + + let mut metric = TimingDistributionMetric::new( + CommonMetricData { + name: "distribution".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + TimeUnit::Nanosecond, + ); + + let time = Duration::from_secs(10).as_nanos() as u64; + assert!(time > u64::from(u32::max_value())); + + let id = metric.set_start(0); + metric.set_stop_and_accumulate(&glean, id, time); + + let val = metric + .test_get_value(&glean, "store1") + .expect("Value should be stored"); + + // Check that we got the right sum and number of samples. + assert_eq!(val.sum, time); +} + +#[test] +fn stopping_non_existing_id_records_an_error() { + let (glean, _t) = new_glean(None); + + let mut metric = TimingDistributionMetric::new( + CommonMetricData { + name: "non_existing_id".into(), + category: "test".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }, + TimeUnit::Nanosecond, + ); + + metric.set_stop_and_accumulate(&glean, 3785, 60); + + // 1 error should be reported. + assert_eq!( + Ok(1), + test_get_num_recorded_errors( + &glean, + metric.meta(), + ErrorType::InvalidState, + Some("store1") + ) + ); +} diff --git a/third_party/rust/glean-core/tests/uuid.rs b/third_party/rust/glean-core/tests/uuid.rs new file mode 100644 index 0000000000..1317790e6c --- /dev/null +++ b/third_party/rust/glean-core/tests/uuid.rs @@ -0,0 +1,114 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +mod common; +use crate::common::*; + +use serde_json::json; + +use glean_core::metrics::*; +use glean_core::storage::StorageManager; +use glean_core::{CommonMetricData, Lifetime}; + +#[test] +fn uuid_is_generated_and_stored() { + let (mut glean, _t) = new_glean(None); + + let uuid: UuidMetric = UuidMetric::new(CommonMetricData { + name: "uuid".into(), + category: "local".into(), + send_in_pings: vec!["core".into()], + ..Default::default() + }); + + uuid.generate_and_set(&glean); + let snapshot = glean.snapshot("core", false); + assert!( + snapshot.contains(r#""local.uuid": ""#), + format!("Snapshot 1: {}", snapshot) + ); + + uuid.generate_and_set(&glean); + let snapshot = glean.snapshot("core", false); + assert!( + snapshot.contains(r#""local.uuid": ""#), + format!("Snapshot 2: {}", snapshot) + ); +} + +#[test] +fn uuid_serializer_should_correctly_serialize_uuids() { + let value = uuid::Uuid::new_v4(); + + let (mut tempdir, _) = tempdir(); + + { + // We give tempdir to the `new_glean` function... + let (glean, dir) = new_glean(Some(tempdir)); + // And then we get it back once that function returns. + tempdir = dir; + + let metric = UuidMetric::new(CommonMetricData { + name: "uuid_metric".into(), + category: "telemetry".into(), + send_in_pings: vec!["store1".into()], + disabled: false, + lifetime: Lifetime::User, + ..Default::default() + }); + + metric.set(&glean, value); + + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + assert_eq!( + json!({"uuid": {"telemetry.uuid_metric": value.to_string()}}), + snapshot + ); + } + + // Make a new Glean instance here, which should force reloading of the data from disk + // so we can ensure it persisted, because it has User lifetime + { + let (glean, _) = new_glean(Some(tempdir)); + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), "store1", true) + .unwrap(); + assert_eq!( + json!({"uuid": {"telemetry.uuid_metric": value.to_string()}}), + snapshot + ); + } +} + +#[test] +fn set_properly_sets_the_value_in_all_stores() { + let (glean, _t) = new_glean(None); + let store_names: Vec<String> = vec!["store1".into(), "store2".into()]; + let value = uuid::Uuid::new_v4(); + + let metric = UuidMetric::new(CommonMetricData { + name: "uuid_metric".into(), + category: "telemetry".into(), + send_in_pings: store_names.clone(), + disabled: false, + lifetime: Lifetime::Ping, + ..Default::default() + }); + + metric.set(&glean, value); + + // Check that the data was correctly set in each store. + for store_name in store_names { + let snapshot = StorageManager + .snapshot_as_json(glean.storage(), &store_name, true) + .unwrap(); + + assert_eq!( + json!({"uuid": {"telemetry.uuid_metric": value.to_string()}}), + snapshot + ); + } +} |