diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
commit | 6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch) | |
tree | a68f146d7fa01f0134297619fbe7e33db084e0aa /browser/components/newtab/test/unit/lib/PersonalityProvider | |
parent | Initial commit. (diff) | |
download | thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.tar.xz thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.zip |
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'browser/components/newtab/test/unit/lib/PersonalityProvider')
6 files changed, 3063 insertions, 0 deletions
diff --git a/browser/components/newtab/test/unit/lib/PersonalityProvider/NaiveBayesTextTagger.test.js b/browser/components/newtab/test/unit/lib/PersonalityProvider/NaiveBayesTextTagger.test.js new file mode 100644 index 0000000000..0751cafb4f --- /dev/null +++ b/browser/components/newtab/test/unit/lib/PersonalityProvider/NaiveBayesTextTagger.test.js @@ -0,0 +1,95 @@ +import { NaiveBayesTextTagger } from "lib/PersonalityProvider/NaiveBayesTextTagger.jsm"; +import { + tokenize, + toksToTfIdfVector, +} from "lib/PersonalityProvider/Tokenize.jsm"; + +const EPSILON = 0.00001; + +describe("Naive Bayes Tagger", () => { + describe("#tag", () => { + let model = { + model_type: "nb", + positive_class_label: "military", + positive_class_id: 0, + positive_class_threshold_log_prob: -0.5108256237659907, + classes: [ + { + log_prior: -0.6881346387364013, + feature_log_probs: [ + -6.2149425847276, -6.829869141665873, -7.124856122235796, + -7.116661287797188, -6.694751331313906, -7.11798266787003, + -6.5094904366004185, -7.1639509366900604, -7.218981434452414, + -6.854842907887801, -7.080328841624584, + ], + }, + { + log_prior: -0.6981849745899025, + feature_log_probs: [ + -7.0575941199203465, -6.632333513597953, -7.382756370680115, + -7.1160793981275905, -8.467120918791892, -8.369201274990882, + -8.518506617006922, -7.015756380369387, -7.739036845511857, + -9.748294397894645, -3.9353548206941955, + ], + }, + ], + vocab_idfs: { + deal: [0, 5.5058519847862275], + easy: [1, 5.5058519847862275], + tanks: [2, 5.601162164590552], + sites: [3, 5.957837108529285], + care: [4, 5.957837108529285], + needs: [5, 5.824305715904762], + finally: [6, 5.706522680248379], + super: [7, 5.264689927969339], + heard: [8, 5.5058519847862275], + reached: [9, 5.957837108529285], + words: [10, 5.070533913528382], + }, + }; + let instance = new NaiveBayesTextTagger(model, toksToTfIdfVector); + + let testCases = [ + { + input: "Finally! Super easy care for your tanks!", + expected: { + label: "military", + logProb: -0.16299510296630082, + confident: true, + }, + }, + { + input: "heard", + expected: { + label: "military", + logProb: -0.4628170738373294, + confident: false, + }, + }, + { + input: "words", + expected: { + label: null, + logProb: -0.04258339303757985, + confident: false, + }, + }, + ]; + + let checkTag = tc => { + let actual = instance.tagTokens(tokenize(tc.input)); + it(`should tag ${tc.input} with ${tc.expected.label}`, () => { + assert.equal(tc.expected.label, actual.label); + }); + it(`should give ${tc.input} the correct probability`, () => { + let delta = Math.abs(tc.expected.logProb - actual.logProb); + assert.isTrue(delta <= EPSILON); + }); + }; + + // RELEASE THE TESTS! + for (let tc of testCases) { + checkTag(tc); + } + }); +}); diff --git a/browser/components/newtab/test/unit/lib/PersonalityProvider/NmfTextTagger.test.js b/browser/components/newtab/test/unit/lib/PersonalityProvider/NmfTextTagger.test.js new file mode 100644 index 0000000000..fb3abb1367 --- /dev/null +++ b/browser/components/newtab/test/unit/lib/PersonalityProvider/NmfTextTagger.test.js @@ -0,0 +1,479 @@ +import { NmfTextTagger } from "lib/PersonalityProvider/NmfTextTagger.jsm"; +import { + tokenize, + toksToTfIdfVector, +} from "lib/PersonalityProvider/Tokenize.jsm"; + +const EPSILON = 0.00001; + +describe("NMF Tagger", () => { + describe("#tag", () => { + // The numbers in this model were pulled from existing trained model. + let model = { + document_topic: { + environment: [ + 0.05313956429537541, 0.07314019377743895, 0.03247190024863182, + 0.016189529772591395, 0.003812317145412572, 0.03863075834647775, + 0.007495425135831521, 0.005100298003919777, 0.005245622179405364, + 0.036196010766427554, 0.02189970342121833, 0.03514130992119014, + 0.001248114096050196, 0.0030908722594824665, 0.0023874256586350626, + 0.008533674814792993, 0.0009424690250135675, 0.01603124888144218, + 0.00752822798092765, 0.0039046678154748796, 0.03521776907836766, + 0.00614546613169027, 0.0008272200196643818, 0.01405638079154697, + 0.001990670259485496, 0.002803666919676377, 0.013841677883061631, + 0.004093362693745272, 0.009310678536276432, 0.006158920150866703, + 0.006821027337091937, 0.002712031105462971, 0.009093298611644996, + 0.014642160500331744, 0.0067239941045715386, 0.007150418784462898, + 0.0064652818600521265, 0.0006735690394489199, 0.02063188588742841, + 0.003213083349614106, 0.0031998068360970093, 0.00264520606931871, + 0.008854824468146531, 0.0024170562884908786, 0.0013705390639746128, + 0.0030575940757273288, 0.010417378215688392, 0.002356164040132228, + 0.0026710154645455007, 0.0007295327370144145, 0.0585307418954327, + 0.0037987763460599574, 0.003199095437138493, 0.004368800434950577, + 0.005087168372751965, 0.0011100904433965942, 0.01700096791869979, + 0.01929226435023826, 0.010536397909643058, 0.001734999985783697, + 0.003852807194017686, 0.007916805773686475, 0.028375307444815964, + 0.0012422599635274355, 0.0009298594944844238, 0.02095410849846837, + 0.0017269844428419192, 0.002152880993141985, 0.0030226616228192387, + 0.004804812297400959, 0.0012383636748462198, 0.006991278216261148, + 0.0013747035300597538, 0.002041541234639563, 0.012076270996247411, + 0.006643837514421182, 0.003974012776560734, 0.015794539051705442, + 0.007601190171659186, 0.016474925942594837, 0.002729423078513777, + 0.007635146179880609, 0.013457547041824648, 0.0007592338429017099, + 0.002947096673767141, 0.006371935735541048, 0.003356178481568716, + 0.00451933490245723, 0.0019006306992329104, 0.013048046603391707, + 0.023541628496101297, 0.027659066125377194, 0.002312727786055524, + 0.0014189157259186062, 0.01963766030236683, 0.0026014761547439634, + 0.002333697870992923, 0.003401734295211338, 0.002522073778255918, + 0.0015769783084977752, + ], + space: [ + 0.045976774394786174, 0.04386532305052323, 0.03346748817597193, + 0.008498345884036708, 0.005802390890667938, 0.0017673346473868704, + 0.00468037374691276, 0.0036807899985757367, 0.0034951488381868424, + 0.015073756869093244, 0.006784747891785806, 0.03069702365741547, + 0.004945214461908244, 0.002527030239506901, 0.0012201743197690308, + 0.010191409658936534, 0.0013882500616525532, 0.014559679471816162, + 0.005308140956577744, 0.002067005832569046, 0.006092496689239475, + 0.0029308442356851265, 0.0006407392160713908, 0.01669972147417425, + 0.0018920321527190246, 0.002436089537269062, 0.05542174181989591, + 0.006448761215865303, 0.012804154851567844, 0.014553974971946687, + 0.004927456148063145, 0.006085620881900181, 0.011626122370522652, + 0.002994267915422563, 0.0038291031528493898, 0.006987917175322377, + 0.00719289436611732, 0.0008398926158042337, 0.019068654506361523, + 0.004453895285397824, 0.00401164781243836, 0.0031309255764704544, + 0.013210118660087334, 0.0015542151889036313, 0.0013951089590218057, + 0.002790924761398501, 0.008739250167366135, 0.0027834569638271025, + 0.09198161284531065, 0.0019488047187835441, 0.001739971582806101, + 0.005113637251322287, 0.12140493794373561, 0.005535368890812829, + 0.004198222617607059, 0.0010670629105233682, 0.005298717616708989, + 0.0048291586850982855, 0.005140125537186181, 0.0011663683373124493, + 0.0024499638218810943, 0.012532772497286819, 0.0015564613278042862, + 0.0012252899339204029, 0.0005095187051357676, 0.0035442657060978655, + 0.014030578705118285, 0.0017653534252553718, 0.004026729875153457, + 0.004002067082856801, 0.00809773970333208, 0.017160384509220625, + 0.002981945110677171, 0.0018338446554387704, 0.0031886913904107484, + 0.004654622711785796, 0.0053886727821435415, 0.009023511029300392, + 0.005246967669202147, 0.022806469628558337, 0.0035142224878495355, + 0.006793295047927272, 0.017396620747821886, 0.000922278971300957, + 0.001695889413253992, 0.007015197552957029, 0.003908581792868586, + 0.010136260994789877, 0.0032880552208979508, 0.0039712539426523625, + 0.009672046620728448, 0.007290428293346, 0.0017814796852793386, + 0.0005388988974780036, 0.013936726486762537, 0.003427738251710856, + 0.002206664729558829, 0.05072392472622557, 0.004424158921356747, + 0.0003680061331891622, + ], + biology: [ + 0.054433533850037796, 0.039689474154513994, 0.027661000660240884, + 0.021655563357213067, 0.007862624595639219, 0.006280655377019006, + 0.013407714984668861, 0.004038592819712647, 0.009652765217013826, + 0.0011353987945632667, 0.00925298156804724, 0.004870163054917538, + 0.04911204317171355, 0.006921538451191124, 0.004003624507234068, + 0.016600722822360296, 0.002179735905957767, 0.010801493818182368, + 0.00918922860910538, 0.022115576350545514, 0.0027720850555002148, + 0.003290714340925284, 0.0006359939927595049, 0.020564054347194806, + 0.019590591011010666, 0.0029008397180383077, 0.030414664509122412, + 0.002864704837438281, 0.030933936414333993, 0.00222576969791357, + 0.007077232390623289, 0.005876547862506722, 0.016917705934608753, + 0.016466207380001166, 0.006648808144677746, 0.017876914915160164, + 0.008216930648675583, 0.0026813239798232098, 0.012171904585413245, + 0.012319763594831614, 0.003909608203628946, 0.003205613981613637, + 0.027729523430009183, 0.0019938396819227074, 0.002752482544417343, + 0.0016746657427111145, 0.019564250521109314, 0.027250898086440583, + 0.000954251437229793, 0.0020431321836649734, 0.0014636128217840221, + 0.006821766389705783, 0.003272989792090916, 0.011086677363737012, + 0.0044279892365732595, 0.0029213721398486203, 0.013081117655947345, + 0.012102962176204816, 0.0029165848047082825, 0.002363073972325097, + 0.0028567640089643695, 0.013692951578614878, 0.0013189478722657382, + 0.0030662419379415885, 0.001688218039583749, 0.0007806438728749603, + 0.025458033834110355, 0.009584308792578437, 0.0033243840056188263, + 0.0068361098488461045, 0.005178034666939756, 0.006831575853694424, + 0.010170774789130092, 0.004639315532453418, 0.00655511046953238, + 0.005661100806175219, 0.006238755352678196, 0.023282136482285103, + 0.007790828526461584, 0.011840304456780202, 0.0021953903460442225, + 0.011205225479328193, 0.01665869590158306, 0.0009257333679666402, + 0.0032380769616003604, 0.007379754534437712, 0.01804771060116468, + 0.02540492978451049, 0.0027900782593570507, 0.0029721824342474694, + 0.005666888959879564, 0.003629523931553047, 0.0017838703067849428, + 0.004996486217852931, 0.006086510142627035, 0.0023570031997685236, + 0.002718397814380002, 0.003908858478916721, 0.02080129902865465, + 0.005591305783253238, + ], + }, + topic_word: [ + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.003173633134427233, 0.0, 0.0, + 0.0019409914586816176, 0.0, + ], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 5.135548639746091e-5, 0.0, 0.0, 0.0, + 0.00015384770766669982, + ], + [ + 0.0, 0.0, 0.0005001441880557176, 0.0, 0.0, 0.0012069823147301646, + 0.02401141538644239, 8.831990149479376e-5, 0.001813504147854849, 0.0, + ], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0003577161362340021, 0.0005744157863408606, + 0.0, 0.0, 0.0, + ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.002662246533243532, 0.0, 0.0, + 0.0008394369973758684, 0.0, + ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [ + 0.0, 4.768637450522633e-5, 0.0, 0.0, 0.0, 0.0, 0.0010421065429755969, + 0.0, 0.0, 2.3210938729937306e-5, + ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0006034363278588148, + 0.001690622339085902, 0.0, 0.0, + ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.004257728522853072, 0.0, 0.0, 0.0, 0.0], + [ + 0.0007238839225620208, 0.0, 0.0, 0.0, 0.0, 0.0009507496006759083, + 0.0012635532859311572, 0.0, 0.0, 0.0, + ], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.2699264109324263e-5, + 0.00032868342552128994, 0.0, + ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [ + 0.0, 0.0, 0.0, 0.0, 0.0011157667743487598, 0.001278875789622101, + 9.011724853181247e-6, 0.0, 3.22069766200917e-5, 0.004124963644732435, + ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.00011961487736485771], + [0.0, 0.0, 0.0, 5.734703813314615e-5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.0340264022466226e-5, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.00039701897786057513, 0.0, 0.0, 0.0, 0.0], + [ + 0.0, 0.0, 0.0, 0.19635202968946042, 0.0, 0.0008873887898279083, 0.0, + 0.0, 0.0, 0.0, + ], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 1.552973162326247e-5, 0.0, + 0.002284331845105356, 0.0, 0.0, + ], + [ + 0.0, 0.0, 0.005561738919282601, 0.0, 0.0, 0.0, 0.010700323065082812, + 0.0, 0.0005795117202094265, 0.0, + ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0005085828329663487, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [ + 0.0, 0.0, 0.0, 0.0, 0.029261090049475084, 0.0020864946050332834, + 0.0018513709831557076, 0.0, 0.0, 0.0, + ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0008328286790309667, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0013227647245223537, 0.0, 0.0, 0.0, 0.0], + [ + 0.0, 0.0, 0.0, 0.0, 0.0024010554774254685, 5.357245317969706e-5, 0.0, + 0.0, 0.0, 0.0, + ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0014484032312145462, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0012081428144960678, 0.0, 0.0, 0.0, 0.0], + [ + 0.0, 0.0, 0.000616488580813398, 0.0, 0.0, 0.0017954524796671627, 0.0, + 0.0, 0.0, 0.0, + ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0006660554263924299, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0011891151421092303, 0.0, 0.0], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0024885434472066534, 0.0, + 0.0010165824086743897, 0.0, 0.0, + ], + [ + 0.0, 5.692292246819767e-5, 0.0, 0.0, 0.001006289633741549, 0.0, 0.0, + 0.001897882990870404, 0.0, 0.0, + ], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.00010646854330751878, 0.0, + 0.0013480243353754932, 0.0, + ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0002608785715957589, 0.0], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0010620422134845085, 0.0, 0.0, + 0.0002032215308376943, 0.0, + ], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0008928062238389307, 0.0, 0.0, + 5.727265080002417e-5, 0.0, + ], + [ + 0.0, 0.0, 0.06061253593083364, 0.0, 0.02739898181912798, 0.0, 0.0, + 0.0, 0.0, 0.0, + ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0014338134220455178, 0.0, + 0.0011276871850520397, 0.002840121913315777, + ], + [0.0008014293374641945, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [ + 0.0, 0.000345858724152025, 0.013078498367906305, 0.0, + 0.002815596608197659, 0.0, 0.0, 0.0030778986683343023, 0.0, 0.0, + ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0010177321509216356, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.00015333347872060042, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0009655934464519347, 0.0], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0008542046515290346, 0.0, 0.0, + 0.00016472517230317488, 0.0, + ], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0007759590139787148, + 0.0037535348789227703, 0.0007205740927611773, + ], + [ + 0.0, 0.0, 0.0010313963595627862, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0069665132800572115, 0.0, + ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0006880323929924655, 9.207429290830475e-5, + 0.0, 0.0, 0.0, + ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0008404475484102756, 0.0, 0.0, 0.0, 0.0], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.00016603822882009137, 0.0, 0.0, 0.0, + 0.0004386724451378034, + ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.003971386830918022, 0.0, 0.0, 0.0, 0.0], + [0.000983926199078037, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.001299108775819868, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [ + 0.16326515307916822, 0.0, 0.0, 0.0, 0.0, 0.0028677496385613155, + 0.023677620702293598, 0.0, 0.0, 0.0, + ], + [0.0, 0.0, 5.737710913345495e-6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [ + 0.0, 0.0, 0.0002081792662367579, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0002840163488982256, + ], + [0.0, 0.0, 0.0, 0.0, 0.0005021534925351664, 0.0, 0.0, 0.0, 0.0, 0.0], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.001057424953719077, 0.0, + 0.003578658690485632, 0.0, 0.0, + ], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.00022950619982206556, + 0.0018791783657735252, 0.0008530683004027156, 4.5513911743540586e-5, + 0.0, + ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0045523319463242765, 0.0, 0.0, 0.0, 0.0], + [ + 0.0, 0.0, 0.0, 0.0, 0.0006160628426134845, 0.0, 0.0023393152617350653, + 0.0, 0.0, 0.0012979890699731222, + ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [ + 0.0, 0.0, 0.003391399407584813, 0.0, 0.0, 0.000719659722017165, 0.0, + 0.004722518573572638, 0.002758841738663124, 0.0, + ], + [ + 0.0, 0.0, 0.0, 0.0, 0.002127862313876461, 0.0, 0.005031998155190167, + 0.0, 0.0, 0.0, + ], + [ + 0.0, 0.0, 0.00055401373160389, 0.0, 0.0, 0.000333325450244618, + 0.0017824446558959168, 0.0011398506826041158, 0.0, + 0.0006366915431430632, + ], + [ + 0.0, 0.21687336139378274, 0.0, 0.0, 0.0, 0.0030345303266644387, 0.0, + 0.0, 0.0, 0.0, + ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0012637173523723526, 0.0, + 0.0010158476831041915, 0.0035425832276585615, 0.0, + ], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0015451984659512325, 0.019909953764629045, + 0.0013484737840911303, 0.0033472098053086113, 0.0016951819626954759, + ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.00015923419851654453, 0.0, + 0.0024056492047359367, + ], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.01305313280419075, + 0.00014197157780982973, 0.0, 0.0, + ], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.000746430999979358, 0.0, + 0.0010041202546700189, 0.004557016648181857, 0.0, + ], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.00021372865758801545, + 0.00025925151316940747, 0.0, + ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.001658746582791234, 0.0], + [ + 0.0, 0.0, 0.0, 0.0, 0.00973640859923001, 0.0012404719999980969, + 0.0006365355864806626, 0.0008291013715577852, 0.0, 0.0, + ], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.001473459191608214, 0.0, 0.0, + 0.0009195459918865811, 0.002012929485852207, + ], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0005850456523130979, 0.0, + 0.00014396718214395852, 0.0, + ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0011858302272740567, 0.0], + [ + 0.0, 0.0, 0.0, 0.0, 0.0046803403116507545, 0.002083219444498354, 0.0, + 0.0, 0.0, 0.006104495765365948, + ], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.005456944646675863, 0.0, + 0.00011428354610339084, 0.0, 0.0, + ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0013384597578988894, 0.0, 0.0, 0.0, 0.0], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0018450592044551373, 0.0, + 0.005182965872305058, 0.0, 0.0, + ], + [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0003041074021307749, 0.0, + 0.0020827735275448823, 0.0, 0.0008494429669380388, + ], + [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + ], + vocab_idfs: { + blood: [0, 5.0948820521571045], + earth: [1, 4.2248041634380815], + rocket: [2, 5.666668375712782], + brain: [3, 4.616846251214104], + mars: [4, 6.226284163648205], + nothing: [5, 5.270772718620769], + nada: [6, 4.815297189937943], + star: [7, 6.38880309314598], + zilch: [8, 5.889811927026992], + soil: [9, 7.14257489552236], + }, + }; + + let instance = new NmfTextTagger(model, toksToTfIdfVector); + + let testCases = [ + { + input: "blood is in the brain", + expected: { + environment: 0.00037336337061919943, + space: 0.0003307690554984028, + biology: 0.0026549079818439627, + }, + }, + + { + input: "rocket to the star", + expected: { + environment: 0.0002855180592590448, + space: 0.004006242743506598, + biology: 0.0003094182371360131, + }, + }, + { + input: "rocket to the star mars", + expected: { + environment: 0.0004180326651780644, + space: 0.003844259295376754, + biology: 0.0003135623817729136, + }, + }, + { + input: "rocket rocket rocket", + expected: { + environment: 0.00033052002469507015, + space: 0.007519787053895712, + biology: 0.00031862864995569246, + }, + }, + { + input: "nothing nada rocket", + expected: { + environment: 0.0008597524218029812, + space: 0.0035401031629944506, + biology: 0.000950627767326667, + }, + }, + { + input: "rocket", + expected: { + environment: 0.00033052002469507015, + space: 0.007519787053895712, + biology: 0.00031862864995569246, + }, + }, + { + input: "this sentence is out of vocabulary", + expected: { + environment: 0.0, + space: 0.0, + biology: 0.0, + }, + }, + { + input: "this sentence is out of vocabulary except for rocket", + expected: { + environment: 0.00033052002469507015, + space: 0.007519787053895712, + biology: 0.00031862864995569246, + }, + }, + ]; + + let checkTag = tc => { + let actual = instance.tagTokens(tokenize(tc.input)); + it(`should score ${tc.input} correctly`, () => { + Object.keys(actual).forEach(tag => { + let delta = Math.abs(tc.expected[tag] - actual[tag]); + assert.isTrue(delta <= EPSILON); + }); + }); + }; + + // RELEASE THE TESTS! + for (let tc of testCases) { + checkTag(tc); + } + }); +}); diff --git a/browser/components/newtab/test/unit/lib/PersonalityProvider/PersonalityProvider.test.js b/browser/components/newtab/test/unit/lib/PersonalityProvider/PersonalityProvider.test.js new file mode 100644 index 0000000000..833a9d5a7c --- /dev/null +++ b/browser/components/newtab/test/unit/lib/PersonalityProvider/PersonalityProvider.test.js @@ -0,0 +1,356 @@ +import { GlobalOverrider } from "test/unit/utils"; +import { PersonalityProvider } from "lib/PersonalityProvider/PersonalityProvider.jsm"; + +describe("Personality Provider", () => { + let instance; + let RemoteSettingsStub; + let RemoteSettingsOnStub; + let RemoteSettingsOffStub; + let RemoteSettingsGetStub; + let sandbox; + let globals; + let baseURLStub; + + beforeEach(() => { + sandbox = sinon.createSandbox(); + globals = new GlobalOverrider(); + + RemoteSettingsOnStub = sandbox.stub().returns(); + RemoteSettingsOffStub = sandbox.stub().returns(); + RemoteSettingsGetStub = sandbox.stub().returns([]); + + RemoteSettingsStub = name => ({ + get: RemoteSettingsGetStub, + on: RemoteSettingsOnStub, + off: RemoteSettingsOffStub, + }); + + sinon.spy(global, "BasePromiseWorker"); + sinon.spy(global.BasePromiseWorker.prototype, "post"); + + baseURLStub = "https://baseattachmentsurl"; + global.fetch = async server => ({ + ok: true, + json: async () => { + if (server === "bogus://foo/") { + return { capabilities: { attachments: { base_url: baseURLStub } } }; + } + return {}; + }, + }); + globals.set("RemoteSettings", RemoteSettingsStub); + + instance = new PersonalityProvider(); + instance.interestConfig = { + history_item_builder: "history_item_builder", + history_required_fields: ["a", "b", "c"], + interest_finalizer: "interest_finalizer", + item_to_rank_builder: "item_to_rank_builder", + item_ranker: "item_ranker", + interest_combiner: "interest_combiner", + }; + }); + afterEach(() => { + sinon.restore(); + sandbox.restore(); + globals.restore(); + }); + describe("#personalityProviderWorker", () => { + it("should create a new promise worker on first call", async () => { + const { personalityProviderWorker } = instance; + assert.calledOnce(global.BasePromiseWorker); + assert.isDefined(personalityProviderWorker); + }); + it("should cache _personalityProviderWorker on first call", async () => { + instance._personalityProviderWorker = null; + const { personalityProviderWorker } = instance; + assert.isDefined(instance._personalityProviderWorker); + assert.isDefined(personalityProviderWorker); + }); + it("should use old promise worker on second call", async () => { + let { personalityProviderWorker } = instance; + personalityProviderWorker = instance.personalityProviderWorker; + assert.calledOnce(global.BasePromiseWorker); + assert.isDefined(personalityProviderWorker); + }); + }); + describe("#_getBaseAttachmentsURL", () => { + it("should return a fresh value", async () => { + await instance._getBaseAttachmentsURL(); + assert.equal(instance._baseAttachmentsURL, baseURLStub); + }); + it("should return a cached value", async () => { + const cachedURL = "cached"; + instance._baseAttachmentsURL = cachedURL; + await instance._getBaseAttachmentsURL(); + assert.equal(instance._baseAttachmentsURL, cachedURL); + }); + }); + describe("#setup", () => { + it("should setup two sync attachments", () => { + sinon.spy(instance, "setupSyncAttachment"); + instance.setup(); + assert.calledTwice(instance.setupSyncAttachment); + }); + }); + describe("#teardown", () => { + it("should teardown two sync attachments", () => { + sinon.spy(instance, "teardownSyncAttachment"); + instance.teardown(); + assert.calledTwice(instance.teardownSyncAttachment); + }); + it("should terminate worker", () => { + const terminateStub = sandbox.stub().returns(); + instance._personalityProviderWorker = { + terminate: terminateStub, + }; + instance.teardown(); + assert.calledOnce(terminateStub); + }); + }); + describe("#setupSyncAttachment", () => { + it("should call remote settings on twice for setupSyncAttachment", () => { + assert.calledTwice(RemoteSettingsOnStub); + }); + }); + describe("#teardownSyncAttachment", () => { + it("should call remote settings off for teardownSyncAttachment", () => { + instance.teardownSyncAttachment(); + assert.calledOnce(RemoteSettingsOffStub); + }); + }); + describe("#onSync", () => { + it("should call worker onSync", () => { + instance.onSync(); + assert.calledWith(global.BasePromiseWorker.prototype.post, "onSync"); + }); + }); + describe("#getAttachment", () => { + it("should call worker onSync", () => { + instance.getAttachment(); + assert.calledWith( + global.BasePromiseWorker.prototype.post, + "getAttachment" + ); + }); + }); + describe("#getRecipe", () => { + it("should call worker getRecipe and remote settings get", async () => { + RemoteSettingsGetStub = sandbox.stub().returns([ + { + key: 1, + }, + ]); + sinon.spy(instance, "getAttachment"); + RemoteSettingsStub = name => ({ + get: RemoteSettingsGetStub, + on: RemoteSettingsOnStub, + off: RemoteSettingsOffStub, + }); + globals.set("RemoteSettings", RemoteSettingsStub); + + const result = await instance.getRecipe(); + assert.calledOnce(RemoteSettingsGetStub); + assert.calledOnce(instance.getAttachment); + assert.equal(result.recordKey, 1); + }); + }); + describe("#fetchHistory", () => { + it("should return a history object for fetchHistory", async () => { + const history = await instance.fetchHistory(["requiredColumn"], 1, 1); + assert.equal( + history.sql, + `SELECT url, title, visit_count, frecency, last_visit_date, description\n FROM moz_places\n WHERE last_visit_date >= 1000000\n AND last_visit_date < 1000000 AND IFNULL(requiredColumn, '') <> '' LIMIT 30000` + ); + assert.equal(history.options.columns.length, 1); + assert.equal(Object.keys(history.options.params).length, 0); + }); + }); + describe("#getHistory", () => { + it("should return an empty array", async () => { + instance.interestConfig = { + history_required_fields: [], + }; + const result = await instance.getHistory(); + assert.equal(result.length, 0); + }); + it("should call fetchHistory", async () => { + sinon.spy(instance, "fetchHistory"); + await instance.getHistory(); + }); + }); + describe("#setBaseAttachmentsURL", () => { + it("should call worker setBaseAttachmentsURL", async () => { + await instance.setBaseAttachmentsURL(); + assert.calledWith( + global.BasePromiseWorker.prototype.post, + "setBaseAttachmentsURL" + ); + }); + }); + describe("#setInterestConfig", () => { + it("should call worker setInterestConfig", async () => { + await instance.setInterestConfig(); + assert.calledWith( + global.BasePromiseWorker.prototype.post, + "setInterestConfig" + ); + }); + }); + describe("#setInterestVector", () => { + it("should call worker setInterestVector", async () => { + await instance.setInterestVector(); + assert.calledWith( + global.BasePromiseWorker.prototype.post, + "setInterestVector" + ); + }); + }); + describe("#fetchModels", () => { + it("should call worker fetchModels and remote settings get", async () => { + await instance.fetchModels(); + assert.calledOnce(RemoteSettingsGetStub); + assert.calledWith(global.BasePromiseWorker.prototype.post, "fetchModels"); + }); + }); + describe("#generateTaggers", () => { + it("should call worker generateTaggers", async () => { + await instance.generateTaggers(); + assert.calledWith( + global.BasePromiseWorker.prototype.post, + "generateTaggers" + ); + }); + }); + describe("#generateRecipeExecutor", () => { + it("should call worker generateRecipeExecutor", async () => { + await instance.generateRecipeExecutor(); + assert.calledWith( + global.BasePromiseWorker.prototype.post, + "generateRecipeExecutor" + ); + }); + }); + describe("#createInterestVector", () => { + it("should call worker createInterestVector", async () => { + await instance.createInterestVector(); + assert.calledWith( + global.BasePromiseWorker.prototype.post, + "createInterestVector" + ); + }); + }); + describe("#init", () => { + it("should return early if setInterestConfig fails", async () => { + sandbox.stub(instance, "setBaseAttachmentsURL").returns(); + sandbox.stub(instance, "setInterestConfig").returns(); + instance.interestConfig = null; + const callback = globals.sandbox.stub(); + await instance.init(callback); + assert.notCalled(callback); + }); + it("should return early if fetchModels fails", async () => { + sandbox.stub(instance, "setBaseAttachmentsURL").returns(); + sandbox.stub(instance, "setInterestConfig").returns(); + sandbox.stub(instance, "fetchModels").resolves({ + ok: false, + }); + const callback = globals.sandbox.stub(); + await instance.init(callback); + assert.notCalled(callback); + }); + it("should return early if createInterestVector fails", async () => { + sandbox.stub(instance, "setBaseAttachmentsURL").returns(); + sandbox.stub(instance, "setInterestConfig").returns(); + sandbox.stub(instance, "fetchModels").resolves({ + ok: true, + }); + sandbox.stub(instance, "generateRecipeExecutor").resolves({ + ok: true, + }); + sandbox.stub(instance, "createInterestVector").resolves({ + ok: false, + }); + const callback = globals.sandbox.stub(); + await instance.init(callback); + assert.notCalled(callback); + }); + it("should call callback on successful init", async () => { + sandbox.stub(instance, "setBaseAttachmentsURL").returns(); + sandbox.stub(instance, "setInterestConfig").returns(); + sandbox.stub(instance, "fetchModels").resolves({ + ok: true, + }); + sandbox.stub(instance, "generateRecipeExecutor").resolves({ + ok: true, + }); + sandbox.stub(instance, "createInterestVector").resolves({ + ok: true, + }); + sandbox.stub(instance, "setInterestVector").resolves(); + const callback = globals.sandbox.stub(); + await instance.init(callback); + assert.calledOnce(callback); + assert.isTrue(instance.initialized); + }); + it("should do generic init stuff when calling init with no cache", async () => { + sandbox.stub(instance, "setBaseAttachmentsURL").returns(); + sandbox.stub(instance, "setInterestConfig").returns(); + sandbox.stub(instance, "fetchModels").resolves({ + ok: true, + }); + sandbox.stub(instance, "generateRecipeExecutor").resolves({ + ok: true, + }); + sandbox.stub(instance, "createInterestVector").resolves({ + ok: true, + interestVector: "interestVector", + }); + sandbox.stub(instance, "setInterestVector").resolves(); + await instance.init(); + assert.calledOnce(instance.setBaseAttachmentsURL); + assert.calledOnce(instance.setInterestConfig); + assert.calledOnce(instance.fetchModels); + assert.calledOnce(instance.generateRecipeExecutor); + assert.calledOnce(instance.createInterestVector); + assert.calledOnce(instance.setInterestVector); + }); + }); + describe("#calculateItemRelevanceScore", () => { + it("should return score for uninitialized provider", async () => { + instance.initialized = false; + assert.equal( + await instance.calculateItemRelevanceScore({ item_score: 2 }), + 2 + ); + }); + it("should return score for initialized provider", async () => { + instance.initialized = true; + + instance._personalityProviderWorker = { + post: (postName, [item]) => ({ + rankingVector: { score: item.item_score }, + }), + }; + + assert.equal( + await instance.calculateItemRelevanceScore({ item_score: 2 }), + 2 + ); + }); + it("should post calculateItemRelevanceScore to PersonalityProviderWorker", async () => { + instance.initialized = true; + await instance.calculateItemRelevanceScore({ item_score: 2 }); + assert.calledWith( + global.BasePromiseWorker.prototype.post, + "calculateItemRelevanceScore" + ); + }); + }); + describe("#getScores", () => { + it("should return correct data for getScores", () => { + const scores = instance.getScores(); + assert.isDefined(scores.interestConfig); + }); + }); +}); diff --git a/browser/components/newtab/test/unit/lib/PersonalityProvider/PersonalityProviderWorkerClass.test.js b/browser/components/newtab/test/unit/lib/PersonalityProvider/PersonalityProviderWorkerClass.test.js new file mode 100644 index 0000000000..6dd483ae70 --- /dev/null +++ b/browser/components/newtab/test/unit/lib/PersonalityProvider/PersonalityProviderWorkerClass.test.js @@ -0,0 +1,456 @@ +import { GlobalOverrider } from "test/unit/utils"; +import { PersonalityProviderWorker } from "lib/PersonalityProvider/PersonalityProviderWorkerClass.jsm"; +import { + tokenize, + toksToTfIdfVector, +} from "lib/PersonalityProvider/Tokenize.jsm"; +import { RecipeExecutor } from "lib/PersonalityProvider/RecipeExecutor.jsm"; +import { NmfTextTagger } from "lib/PersonalityProvider/NmfTextTagger.jsm"; +import { NaiveBayesTextTagger } from "lib/PersonalityProvider/NaiveBayesTextTagger.jsm"; + +describe("Personality Provider Worker Class", () => { + let instance; + let globals; + let sandbox; + + beforeEach(() => { + sandbox = sinon.createSandbox(); + globals = new GlobalOverrider(); + globals.set("tokenize", tokenize); + globals.set("toksToTfIdfVector", toksToTfIdfVector); + globals.set("NaiveBayesTextTagger", NaiveBayesTextTagger); + globals.set("NmfTextTagger", NmfTextTagger); + globals.set("RecipeExecutor", RecipeExecutor); + instance = new PersonalityProviderWorker(); + + // mock the RecipeExecutor + instance.recipeExecutor = { + executeRecipe: (item, recipe) => { + if (recipe === "history_item_builder") { + if (item.title === "fail") { + return null; + } + return { + title: item.title, + score: item.frecency, + type: "history_item", + }; + } else if (recipe === "interest_finalizer") { + return { + title: item.title, + score: item.score * 100, + type: "interest_vector", + }; + } else if (recipe === "item_to_rank_builder") { + if (item.title === "fail") { + return null; + } + return { + item_title: item.title, + item_score: item.score, + type: "item_to_rank", + }; + } else if (recipe === "item_ranker") { + if (item.title === "fail" || item.item_title === "fail") { + return null; + } + return { + title: item.title, + score: item.item_score * item.score, + type: "ranked_item", + }; + } + return null; + }, + executeCombinerRecipe: (item1, item2, recipe) => { + if (recipe === "interest_combiner") { + if ( + item1.title === "combiner_fail" || + item2.title === "combiner_fail" + ) { + return null; + } + if (item1.type === undefined) { + item1.type = "combined_iv"; + } + if (item1.score === undefined) { + item1.score = 0; + } + return { type: item1.type, score: item1.score + item2.score }; + } + return null; + }, + }; + + instance.interestConfig = { + history_item_builder: "history_item_builder", + history_required_fields: ["a", "b", "c"], + interest_finalizer: "interest_finalizer", + item_to_rank_builder: "item_to_rank_builder", + item_ranker: "item_ranker", + interest_combiner: "interest_combiner", + }; + }); + afterEach(() => { + sinon.restore(); + sandbox.restore(); + globals.restore(); + }); + describe("#setBaseAttachmentsURL", () => { + it("should set baseAttachmentsURL", () => { + instance.setBaseAttachmentsURL("url"); + assert.equal(instance.baseAttachmentsURL, "url"); + }); + }); + describe("#setInterestConfig", () => { + it("should set interestConfig", () => { + instance.setInterestConfig("config"); + assert.equal(instance.interestConfig, "config"); + }); + }); + describe("#setInterestVector", () => { + it("should set interestVector", () => { + instance.setInterestVector("vector"); + assert.equal(instance.interestVector, "vector"); + }); + }); + describe("#onSync", async () => { + it("should sync remote settings collection from onSync", async () => { + sinon.stub(instance, "deleteAttachment").resolves(); + sinon.stub(instance, "maybeDownloadAttachment").resolves(); + + instance.onSync({ + data: { + created: ["create-1", "create-2"], + updated: [ + { old: "update-old-1", new: "update-new-1" }, + { old: "update-old-2", new: "update-new-2" }, + ], + deleted: ["delete-2", "delete-1"], + }, + }); + + assert(instance.maybeDownloadAttachment.withArgs("create-1").calledOnce); + assert(instance.maybeDownloadAttachment.withArgs("create-2").calledOnce); + assert( + instance.maybeDownloadAttachment.withArgs("update-new-1").calledOnce + ); + assert( + instance.maybeDownloadAttachment.withArgs("update-new-2").calledOnce + ); + + assert(instance.deleteAttachment.withArgs("delete-1").calledOnce); + assert(instance.deleteAttachment.withArgs("delete-2").calledOnce); + assert(instance.deleteAttachment.withArgs("update-old-1").calledOnce); + assert(instance.deleteAttachment.withArgs("update-old-2").calledOnce); + }); + }); + describe("#maybeDownloadAttachment", () => { + it("should attempt _downloadAttachment three times for maybeDownloadAttachment", async () => { + let existsStub; + let statStub; + let attachmentStub; + sinon.stub(instance, "_downloadAttachment").resolves(); + const makeDirStub = globals.sandbox + .stub(global.IOUtils, "makeDirectory") + .resolves(); + + existsStub = globals.sandbox + .stub(global.IOUtils, "exists") + .resolves(true); + + statStub = globals.sandbox + .stub(global.IOUtils, "stat") + .resolves({ size: "1" }); + + attachmentStub = { + attachment: { + filename: "file", + size: "1", + // This hash matches the hash generated from the empty Uint8Array returned by the IOUtils.read stub. + hash: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + }, + }; + + await instance.maybeDownloadAttachment(attachmentStub); + assert.calledWith(makeDirStub, "personality-provider"); + assert.calledOnce(existsStub); + assert.calledOnce(statStub); + assert.notCalled(instance._downloadAttachment); + + existsStub.resetHistory(); + statStub.resetHistory(); + instance._downloadAttachment.resetHistory(); + + attachmentStub = { + attachment: { + filename: "file", + size: "2", + }, + }; + + await instance.maybeDownloadAttachment(attachmentStub); + assert.calledThrice(existsStub); + assert.calledThrice(statStub); + assert.calledThrice(instance._downloadAttachment); + + existsStub.resetHistory(); + statStub.resetHistory(); + instance._downloadAttachment.resetHistory(); + + attachmentStub = { + attachment: { + filename: "file", + size: "1", + // Bogus hash to trigger an update. + hash: "1234", + }, + }; + + await instance.maybeDownloadAttachment(attachmentStub); + assert.calledThrice(existsStub); + assert.calledThrice(statStub); + assert.calledThrice(instance._downloadAttachment); + }); + }); + describe("#_downloadAttachment", () => { + beforeEach(() => { + globals.set("Uint8Array", class Uint8Array {}); + }); + it("should write a file from _downloadAttachment", async () => { + globals.set( + "XMLHttpRequest", + class { + constructor() { + this.status = 200; + this.response = "response!"; + } + open() {} + setRequestHeader() {} + send() {} + } + ); + + const ioutilsWriteStub = globals.sandbox + .stub(global.IOUtils, "write") + .resolves(); + + await instance._downloadAttachment({ + attachment: { location: "location", filename: "filename" }, + }); + + const writeArgs = ioutilsWriteStub.firstCall.args; + assert.equal(writeArgs[0], "filename"); + assert.equal(writeArgs[2].tmpPath, "filename.tmp"); + }); + it("should call console.error from _downloadAttachment if not valid response", async () => { + globals.set( + "XMLHttpRequest", + class { + constructor() { + this.status = 0; + this.response = "response!"; + } + open() {} + setRequestHeader() {} + send() {} + } + ); + + const consoleErrorStub = globals.sandbox + .stub(console, "error") + .resolves(); + + await instance._downloadAttachment({ + attachment: { location: "location", filename: "filename" }, + }); + + assert.calledOnce(consoleErrorStub); + }); + }); + describe("#deleteAttachment", () => { + it("should remove attachments when calling deleteAttachment", async () => { + const makeDirStub = globals.sandbox + .stub(global.IOUtils, "makeDirectory") + .resolves(); + const removeStub = globals.sandbox + .stub(global.IOUtils, "remove") + .resolves(); + await instance.deleteAttachment({ attachment: { filename: "filename" } }); + assert.calledOnce(makeDirStub); + assert.calledTwice(removeStub); + assert.calledWith(removeStub.firstCall, "filename", { + ignoreAbsent: true, + }); + assert.calledWith(removeStub.secondCall, "personality-provider", { + ignoreAbsent: true, + }); + }); + }); + describe("#getAttachment", () => { + it("should return JSON when calling getAttachment", async () => { + sinon.stub(instance, "maybeDownloadAttachment").resolves(); + const readJSONStub = globals.sandbox + .stub(global.IOUtils, "readJSON") + .resolves({}); + const record = { attachment: { filename: "filename" } }; + let returnValue = await instance.getAttachment(record); + + assert.calledOnce(readJSONStub); + assert.calledWith(readJSONStub, "filename"); + assert.calledOnce(instance.maybeDownloadAttachment); + assert.calledWith(instance.maybeDownloadAttachment, record); + assert.deepEqual(returnValue, {}); + + readJSONStub.restore(); + globals.sandbox.stub(global.IOUtils, "readJSON").throws("foo"); + const consoleErrorStub = globals.sandbox + .stub(console, "error") + .resolves(); + returnValue = await instance.getAttachment(record); + + assert.calledOnce(consoleErrorStub); + assert.deepEqual(returnValue, {}); + }); + }); + describe("#fetchModels", () => { + it("should return ok true", async () => { + sinon.stub(instance, "getAttachment").resolves(); + const result = await instance.fetchModels([{ key: 1234 }]); + assert.isTrue(result.ok); + assert.deepEqual(instance.models, [{ recordKey: 1234 }]); + }); + it("should return ok false", async () => { + sinon.stub(instance, "getAttachment").resolves(); + const result = await instance.fetchModels([]); + assert.isTrue(!result.ok); + }); + }); + describe("#generateTaggers", () => { + it("should generate taggers from modelKeys", () => { + const modelKeys = ["nb_model_sports", "nmf_model_sports"]; + + instance.models = [ + { recordKey: "nb_model_sports", model_type: "nb" }, + { + recordKey: "nmf_model_sports", + model_type: "nmf", + parent_tag: "nmf_sports_parent_tag", + }, + ]; + + instance.generateTaggers(modelKeys); + assert.equal(instance.taggers.nbTaggers.length, 1); + assert.equal(Object.keys(instance.taggers.nmfTaggers).length, 1); + }); + it("should skip any models not in modelKeys", () => { + const modelKeys = ["nb_model_sports"]; + + instance.models = [ + { recordKey: "nb_model_sports", model_type: "nb" }, + { + recordKey: "nmf_model_sports", + model_type: "nmf", + parent_tag: "nmf_sports_parent_tag", + }, + ]; + + instance.generateTaggers(modelKeys); + assert.equal(instance.taggers.nbTaggers.length, 1); + assert.equal(Object.keys(instance.taggers.nmfTaggers).length, 0); + }); + it("should skip any models not defined", () => { + const modelKeys = ["nb_model_sports", "nmf_model_sports"]; + + instance.models = [{ recordKey: "nb_model_sports", model_type: "nb" }]; + instance.generateTaggers(modelKeys); + assert.equal(instance.taggers.nbTaggers.length, 1); + assert.equal(Object.keys(instance.taggers.nmfTaggers).length, 0); + }); + }); + describe("#generateRecipeExecutor", () => { + it("should generate a recipeExecutor", () => { + instance.recipeExecutor = null; + instance.taggers = {}; + instance.generateRecipeExecutor(); + assert.isNotNull(instance.recipeExecutor); + }); + }); + describe("#createInterestVector", () => { + let mockHistory = []; + beforeEach(() => { + mockHistory = [ + { + title: "automotive", + description: "something about automotive", + url: "http://example.com/automotive", + frecency: 10, + }, + { + title: "fashion", + description: "something about fashion", + url: "http://example.com/fashion", + frecency: 5, + }, + { + title: "tech", + description: "something about tech", + url: "http://example.com/tech", + frecency: 1, + }, + ]; + }); + it("should gracefully handle history entries that fail", () => { + mockHistory.push({ title: "fail" }); + assert.isNotNull(instance.createInterestVector(mockHistory)); + }); + + it("should fail if the combiner fails", () => { + mockHistory.push({ title: "combiner_fail", frecency: 111 }); + let actual = instance.createInterestVector(mockHistory); + assert.isNull(actual); + }); + + it("should process history, combine, and finalize", () => { + let actual = instance.createInterestVector(mockHistory); + assert.equal(actual.interestVector.score, 1600); + }); + }); + describe("#calculateItemRelevanceScore", () => { + it("should return null for busted item", () => { + assert.equal( + instance.calculateItemRelevanceScore({ title: "fail" }), + null + ); + }); + it("should return null for a busted ranking", () => { + instance.interestVector = { title: "fail", score: 10 }; + assert.equal( + instance.calculateItemRelevanceScore({ title: "some item", score: 6 }), + null + ); + }); + it("should return a score, and not change with interestVector", () => { + instance.interestVector = { score: 10 }; + assert.equal( + instance.calculateItemRelevanceScore({ score: 2 }).rankingVector.score, + 20 + ); + assert.deepEqual(instance.interestVector, { score: 10 }); + }); + it("should use defined personalization_models if available", () => { + instance.interestVector = { score: 10 }; + const item = { + score: 2, + personalization_models: { + entertainment: 1, + }, + }; + assert.equal( + instance.calculateItemRelevanceScore(item).scorableItem.item_tags + .entertainment, + 1 + ); + }); + }); +}); diff --git a/browser/components/newtab/test/unit/lib/PersonalityProvider/RecipeExecutor.test.js b/browser/components/newtab/test/unit/lib/PersonalityProvider/RecipeExecutor.test.js new file mode 100644 index 0000000000..82a1f2b77a --- /dev/null +++ b/browser/components/newtab/test/unit/lib/PersonalityProvider/RecipeExecutor.test.js @@ -0,0 +1,1543 @@ +import { RecipeExecutor } from "lib/PersonalityProvider/RecipeExecutor.jsm"; +import { tokenize } from "lib/PersonalityProvider/Tokenize.jsm"; + +class MockTagger { + constructor(mode, tagScoreMap) { + this.mode = mode; + this.tagScoreMap = tagScoreMap; + } + tagTokens(tokens) { + if (this.mode === "nb") { + // eslint-disable-next-line prefer-destructuring + let tag = Object.keys(this.tagScoreMap)[0]; + // eslint-disable-next-line prefer-destructuring + let prob = this.tagScoreMap[tag]; + let conf = prob >= 0.85; + return { + label: tag, + logProb: Math.log(prob), + confident: conf, + }; + } + return this.tagScoreMap; + } + tag(text) { + return this.tagTokens([text]); + } +} + +describe("RecipeExecutor", () => { + let makeItem = () => { + let x = { + lhs: 2, + one: 1, + two: 2, + three: 3, + foo: "FOO", + bar: "BAR", + baz: ["one", "two", "three"], + qux: 42, + text: "This Is A_sentence.", + url: "http://www.wonder.example.com/dir1/dir2a-dir2b/dir3+4?key1&key2=val2&key3&%26amp=%3D3+4", + url2: "http://wonder.example.com/dir1/dir2a-dir2b/dir3+4?key1&key2=val2&key3&%26amp=%3D3+4", + map: { + c: 3, + a: 1, + b: 2, + }, + map2: { + b: 2, + c: 3, + d: 4, + }, + arr1: [2, 3, 4], + arr2: [3, 4, 5], + long: [3, 4, 5, 6, 7], + tags: { + a: { + aa: 0.1, + ab: 0.2, + ac: 0.3, + }, + b: { + ba: 4, + bb: 5, + bc: 6, + }, + }, + bogus: { + a: { + aa: "0.1", + ab: "0.2", + ac: "0.3", + }, + b: { + ba: "4", + bb: "5", + bc: "6", + }, + }, + zero: { + a: 0, + b: 0, + }, + zaro: [0, 0], + }; + return x; + }; + + let EPSILON = 0.00001; + + let instance = new RecipeExecutor( + [ + new MockTagger("nb", { tag1: 0.7 }), + new MockTagger("nb", { tag2: 0.86 }), + new MockTagger("nb", { tag3: 0.9 }), + new MockTagger("nb", { tag5: 0.9 }), + ], + { + tag1: new MockTagger("nmf", { + tag11: 0.9, + tag12: 0.8, + tag13: 0.7, + }), + tag2: new MockTagger("nmf", { + tag21: 0.8, + tag22: 0.7, + tag23: 0.6, + }), + tag3: new MockTagger("nmf", { + tag31: 0.7, + tag32: 0.6, + tag33: 0.5, + }), + tag4: new MockTagger("nmf", { tag41: 0.99 }), + }, + tokenize + ); + let item = null; + + beforeEach(() => { + item = makeItem(); + }); + + describe("#_assembleText", () => { + it("should simply copy a single string", () => { + assert.equal(instance._assembleText(item, ["foo"]), "FOO"); + }); + it("should append some strings with a space", () => { + assert.equal(instance._assembleText(item, ["foo", "bar"]), "FOO BAR"); + }); + it("should give an empty string for a missing field", () => { + assert.equal(instance._assembleText(item, ["missing"]), ""); + }); + it("should not double space an interior missing field", () => { + assert.equal( + instance._assembleText(item, ["foo", "missing", "bar"]), + "FOO BAR" + ); + }); + it("should splice in an array of strings", () => { + assert.equal( + instance._assembleText(item, ["foo", "baz", "bar"]), + "FOO one two three BAR" + ); + }); + it("should handle numbers", () => { + assert.equal( + instance._assembleText(item, ["foo", "qux", "bar"]), + "FOO 42 BAR" + ); + }); + }); + + describe("#naiveBayesTag", () => { + it("should understand NaiveBayesTextTagger", () => { + item = instance.naiveBayesTag(item, { fields: ["text"] }); + assert.isTrue("nb_tags" in item); + assert.isTrue(!("tag1" in item.nb_tags)); + assert.equal(item.nb_tags.tag2, 0.86); + assert.equal(item.nb_tags.tag3, 0.9); + assert.equal(item.nb_tags.tag5, 0.9); + assert.isTrue("nb_tokens" in item); + assert.deepEqual(item.nb_tokens, ["this", "is", "a", "sentence"]); + assert.isTrue("nb_tags_extended" in item); + assert.isTrue(!("tag1" in item.nb_tags_extended)); + assert.deepEqual(item.nb_tags_extended.tag2, { + label: "tag2", + logProb: Math.log(0.86), + confident: true, + }); + assert.deepEqual(item.nb_tags_extended.tag3, { + label: "tag3", + logProb: Math.log(0.9), + confident: true, + }); + assert.deepEqual(item.nb_tags_extended.tag5, { + label: "tag5", + logProb: Math.log(0.9), + confident: true, + }); + assert.isTrue("nb_tokens" in item); + assert.deepEqual(item.nb_tokens, ["this", "is", "a", "sentence"]); + }); + }); + + describe("#conditionallyNmfTag", () => { + it("should do nothing if it's not nb tagged", () => { + item = instance.conditionallyNmfTag(item, {}); + assert.equal(item, null); + }); + it("should populate nmf tags for the nb tags", () => { + item = instance.naiveBayesTag(item, { fields: ["text"] }); + item = instance.conditionallyNmfTag(item, {}); + assert.isTrue("nb_tags" in item); + assert.deepEqual(item.nmf_tags, { + tag2: { + tag21: 0.8, + tag22: 0.7, + tag23: 0.6, + }, + tag3: { + tag31: 0.7, + tag32: 0.6, + tag33: 0.5, + }, + }); + assert.deepEqual(item.nmf_tags_parent, { + tag21: "tag2", + tag22: "tag2", + tag23: "tag2", + tag31: "tag3", + tag32: "tag3", + tag33: "tag3", + }); + }); + it("should not populate nmf tags for things that were not nb tagged", () => { + item = instance.naiveBayesTag(item, { fields: ["text"] }); + item = instance.conditionallyNmfTag(item, {}); + assert.isTrue("nmf_tags" in item); + assert.isTrue(!("tag4" in item.nmf_tags)); + assert.isTrue("nmf_tags_parent" in item); + assert.isTrue(!("tag4" in item.nmf_tags_parent)); + }); + }); + + describe("#acceptItemByFieldValue", () => { + it("should implement ==", () => { + assert.isTrue( + instance.acceptItemByFieldValue(item, { + field: "lhs", + op: "==", + rhsValue: 2, + }) !== null + ); + assert.isTrue( + instance.acceptItemByFieldValue(item, { + field: "lhs", + op: "==", + rhsValue: 3, + }) === null + ); + assert.isTrue( + instance.acceptItemByFieldValue(item, { + field: "lhs", + op: "==", + rhsField: "two", + }) !== null + ); + assert.isTrue( + instance.acceptItemByFieldValue(item, { + field: "lhs", + op: "==", + rhsField: "three", + }) === null + ); + }); + it("should implement !=", () => { + assert.isTrue( + instance.acceptItemByFieldValue(item, { + field: "lhs", + op: "!=", + rhsValue: 2, + }) === null + ); + assert.isTrue( + instance.acceptItemByFieldValue(item, { + field: "lhs", + op: "!=", + rhsValue: 3, + }) !== null + ); + }); + it("should implement < ", () => { + assert.isTrue( + instance.acceptItemByFieldValue(item, { + field: "lhs", + op: "<", + rhsValue: 1, + }) === null + ); + assert.isTrue( + instance.acceptItemByFieldValue(item, { + field: "lhs", + op: "<", + rhsValue: 2, + }) === null + ); + assert.isTrue( + instance.acceptItemByFieldValue(item, { + field: "lhs", + op: "<", + rhsValue: 3, + }) !== null + ); + }); + it("should implement <= ", () => { + assert.isTrue( + instance.acceptItemByFieldValue(item, { + field: "lhs", + op: "<=", + rhsValue: 1, + }) === null + ); + assert.isTrue( + instance.acceptItemByFieldValue(item, { + field: "lhs", + op: "<=", + rhsValue: 2, + }) !== null + ); + assert.isTrue( + instance.acceptItemByFieldValue(item, { + field: "lhs", + op: "<=", + rhsValue: 3, + }) !== null + ); + }); + it("should implement > ", () => { + assert.isTrue( + instance.acceptItemByFieldValue(item, { + field: "lhs", + op: ">", + rhsValue: 1, + }) !== null + ); + assert.isTrue( + instance.acceptItemByFieldValue(item, { + field: "lhs", + op: ">", + rhsValue: 2, + }) === null + ); + assert.isTrue( + instance.acceptItemByFieldValue(item, { + field: "lhs", + op: ">", + rhsValue: 3, + }) === null + ); + }); + it("should implement >= ", () => { + assert.isTrue( + instance.acceptItemByFieldValue(item, { + field: "lhs", + op: ">=", + rhsValue: 1, + }) !== null + ); + assert.isTrue( + instance.acceptItemByFieldValue(item, { + field: "lhs", + op: ">=", + rhsValue: 2, + }) !== null + ); + assert.isTrue( + instance.acceptItemByFieldValue(item, { + field: "lhs", + op: ">=", + rhsValue: 3, + }) === null + ); + }); + it("should skip items with missing fields", () => { + assert.isTrue( + instance.acceptItemByFieldValue(item, { + field: "no-left", + op: "==", + rhsValue: 1, + }) === null + ); + assert.isTrue( + instance.acceptItemByFieldValue(item, { + field: "lhs", + op: "==", + rhsField: "no-right", + }) === null + ); + assert.isTrue( + instance.acceptItemByFieldValue(item, { field: "lhs", op: "==" }) === + null + ); + }); + it("should skip items with bogus operators", () => { + assert.isTrue( + instance.acceptItemByFieldValue(item, { + field: "lhs", + op: "bogus", + rhsField: "two", + }) === null + ); + }); + }); + + describe("#tokenizeUrl", () => { + it("should strip the leading www from a url", () => { + item = instance.tokenizeUrl(item, { field: "url", dest: "url_toks" }); + assert.deepEqual( + [ + "wonder", + "example", + "com", + "dir1", + "dir2a", + "dir2b", + "dir3", + "4", + "key1", + "key2", + "val2", + "key3", + "amp", + "3", + "4", + ], + item.url_toks + ); + }); + it("should tokenize the not strip the leading non-wwww token from a url", () => { + item = instance.tokenizeUrl(item, { field: "url2", dest: "url_toks" }); + assert.deepEqual( + [ + "wonder", + "example", + "com", + "dir1", + "dir2a", + "dir2b", + "dir3", + "4", + "key1", + "key2", + "val2", + "key3", + "amp", + "3", + "4", + ], + item.url_toks + ); + }); + it("should error for a missing url", () => { + item = instance.tokenizeUrl(item, { field: "missing", dest: "url_toks" }); + assert.equal(item, null); + }); + }); + + describe("#getUrlDomain", () => { + it("should get only the hostname skipping the www", () => { + item = instance.getUrlDomain(item, { field: "url", dest: "url_domain" }); + assert.isTrue("url_domain" in item); + assert.deepEqual("wonder.example.com", item.url_domain); + }); + it("should get only the hostname", () => { + item = instance.getUrlDomain(item, { field: "url2", dest: "url_domain" }); + assert.isTrue("url_domain" in item); + assert.deepEqual("wonder.example.com", item.url_domain); + }); + it("should get the hostname and 2 levels of directories", () => { + item = instance.getUrlDomain(item, { + field: "url", + path_length: 2, + dest: "url_plus_2", + }); + assert.isTrue("url_plus_2" in item); + assert.deepEqual("wonder.example.com/dir1/dir2a-dir2b", item.url_plus_2); + }); + it("should error for a missing url", () => { + item = instance.getUrlDomain(item, { + field: "missing", + dest: "url_domain", + }); + assert.equal(item, null); + }); + }); + + describe("#tokenizeField", () => { + it("should tokenize the field", () => { + item = instance.tokenizeField(item, { field: "text", dest: "toks" }); + assert.isTrue("toks" in item); + assert.deepEqual(["this", "is", "a", "sentence"], item.toks); + }); + it("should error for a missing field", () => { + item = instance.tokenizeField(item, { field: "missing", dest: "toks" }); + assert.equal(item, null); + }); + it("should error for a broken config", () => { + item = instance.tokenizeField(item, {}); + assert.equal(item, null); + }); + }); + + describe("#_typeOf", () => { + it("should know this is a map", () => { + assert.equal(instance._typeOf({}), "map"); + }); + it("should know this is an array", () => { + assert.equal(instance._typeOf([]), "array"); + }); + it("should know this is a string", () => { + assert.equal(instance._typeOf("blah"), "string"); + }); + it("should know this is a boolean", () => { + assert.equal(instance._typeOf(true), "boolean"); + }); + + it("should know this is a null", () => { + assert.equal(instance._typeOf(null), "null"); + }); + }); + + describe("#_lookupScalar", () => { + it("should return the constant", () => { + assert.equal(instance._lookupScalar({}, 1, 0), 1); + }); + it("should return the default", () => { + assert.equal(instance._lookupScalar({}, "blah", 42), 42); + }); + it("should return the field's value", () => { + assert.equal(instance._lookupScalar({ blah: 11 }, "blah", 42), 11); + }); + }); + + describe("#copyValue", () => { + it("should copy values", () => { + item = instance.copyValue(item, { src: "one", dest: "again" }); + assert.isTrue("again" in item); + assert.equal(item.again, 1); + item.one = 100; + assert.equal(item.one, 100); + assert.equal(item.again, 1); + }); + it("should handle maps corrects", () => { + item = instance.copyValue(item, { src: "map", dest: "again" }); + assert.deepEqual(item.again, { a: 1, b: 2, c: 3 }); + item.map.c = 100; + assert.deepEqual(item.again, { a: 1, b: 2, c: 3 }); + item.map = 342; + assert.deepEqual(item.again, { a: 1, b: 2, c: 3 }); + }); + it("should error for a missing field", () => { + item = instance.copyValue(item, { src: "missing", dest: "toks" }); + assert.equal(item, null); + }); + }); + + describe("#keepTopK", () => { + it("should keep the 2 smallest", () => { + item = instance.keepTopK(item, { field: "map", k: 2, descending: false }); + assert.equal(Object.keys(item.map).length, 2); + assert.isTrue("a" in item.map); + assert.equal(item.map.a, 1); + assert.isTrue("b" in item.map); + assert.equal(item.map.b, 2); + assert.isTrue(!("c" in item.map)); + }); + it("should keep the 2 largest", () => { + item = instance.keepTopK(item, { field: "map", k: 2, descending: true }); + assert.equal(Object.keys(item.map).length, 2); + assert.isTrue(!("a" in item.map)); + assert.isTrue("b" in item.map); + assert.equal(item.map.b, 2); + assert.isTrue("c" in item.map); + assert.equal(item.map.c, 3); + }); + it("should still keep the 2 largest", () => { + item = instance.keepTopK(item, { field: "map", k: 2 }); + assert.equal(Object.keys(item.map).length, 2); + assert.isTrue(!("a" in item.map)); + assert.isTrue("b" in item.map); + assert.equal(item.map.b, 2); + assert.isTrue("c" in item.map); + assert.equal(item.map.c, 3); + }); + it("should promote up nested fields", () => { + item = instance.keepTopK(item, { field: "tags", k: 2 }); + assert.equal(Object.keys(item.tags).length, 2); + assert.deepEqual(item.tags, { bb: 5, bc: 6 }); + }); + it("should error for a missing field", () => { + item = instance.keepTopK(item, { field: "missing", k: 3 }); + assert.equal(item, null); + }); + }); + + describe("#scalarMultiply", () => { + it("should use constants", () => { + item = instance.scalarMultiply(item, { field: "map", k: 2 }); + assert.equal(item.map.a, 2); + assert.equal(item.map.b, 4); + assert.equal(item.map.c, 6); + }); + it("should use fields", () => { + item = instance.scalarMultiply(item, { field: "map", k: "three" }); + assert.equal(item.map.a, 3); + assert.equal(item.map.b, 6); + assert.equal(item.map.c, 9); + }); + it("should use default", () => { + item = instance.scalarMultiply(item, { + field: "map", + k: "missing", + dfault: 4, + }); + assert.equal(item.map.a, 4); + assert.equal(item.map.b, 8); + assert.equal(item.map.c, 12); + }); + it("should error for a missing field", () => { + item = instance.scalarMultiply(item, { field: "missing", k: 3 }); + assert.equal(item, null); + }); + it("should multiply numbers", () => { + item = instance.scalarMultiply(item, { field: "lhs", k: 2 }); + assert.equal(item.lhs, 4); + }); + it("should multiply arrays", () => { + item = instance.scalarMultiply(item, { field: "arr1", k: 2 }); + assert.deepEqual(item.arr1, [4, 6, 8]); + }); + it("should should error on strings", () => { + item = instance.scalarMultiply(item, { field: "foo", k: 2 }); + assert.equal(item, null); + }); + }); + + describe("#elementwiseMultiply", () => { + it("should handle maps", () => { + item = instance.elementwiseMultiply(item, { + left: "tags", + right: "map2", + }); + assert.deepEqual(item.tags, { + a: { aa: 0, ab: 0, ac: 0 }, + b: { ba: 8, bb: 10, bc: 12 }, + }); + }); + it("should handle arrays of same length", () => { + item = instance.elementwiseMultiply(item, { + left: "arr1", + right: "arr2", + }); + assert.deepEqual(item.arr1, [6, 12, 20]); + }); + it("should error for arrays of different lengths", () => { + item = instance.elementwiseMultiply(item, { + left: "arr1", + right: "long", + }); + assert.equal(item, null); + }); + it("should error for a missing left", () => { + item = instance.elementwiseMultiply(item, { + left: "missing", + right: "arr2", + }); + assert.equal(item, null); + }); + it("should error for a missing right", () => { + item = instance.elementwiseMultiply(item, { + left: "arr1", + right: "missing", + }); + assert.equal(item, null); + }); + it("should handle numbers", () => { + item = instance.elementwiseMultiply(item, { + left: "three", + right: "two", + }); + assert.equal(item.three, 6); + }); + it("should error for mismatched types", () => { + item = instance.elementwiseMultiply(item, { left: "arr1", right: "two" }); + assert.equal(item, null); + }); + it("should error for strings", () => { + item = instance.elementwiseMultiply(item, { left: "foo", right: "bar" }); + assert.equal(item, null); + }); + }); + + describe("#vectorMultiply", () => { + it("should calculate dot products from maps", () => { + item = instance.vectorMultiply(item, { + left: "map", + right: "map2", + dest: "dot", + }); + assert.equal(item.dot, 13); + }); + it("should calculate dot products from arrays", () => { + item = instance.vectorMultiply(item, { + left: "arr1", + right: "arr2", + dest: "dot", + }); + assert.equal(item.dot, 38); + }); + it("should error for arrays of different lengths", () => { + item = instance.vectorMultiply(item, { left: "arr1", right: "long" }); + assert.equal(item, null); + }); + it("should error for a missing left", () => { + item = instance.vectorMultiply(item, { left: "missing", right: "arr2" }); + assert.equal(item, null); + }); + it("should error for a missing right", () => { + item = instance.vectorMultiply(item, { left: "arr1", right: "missing" }); + assert.equal(item, null); + }); + it("should error for mismatched types", () => { + item = instance.vectorMultiply(item, { left: "arr1", right: "two" }); + assert.equal(item, null); + }); + it("should error for strings", () => { + item = instance.vectorMultiply(item, { left: "foo", right: "bar" }); + assert.equal(item, null); + }); + }); + + describe("#scalarAdd", () => { + it("should error for a missing field", () => { + item = instance.scalarAdd(item, { field: "missing", k: 10 }); + assert.equal(item, null); + }); + it("should error for strings", () => { + item = instance.scalarAdd(item, { field: "foo", k: 10 }); + assert.equal(item, null); + }); + it("should work for numbers", () => { + item = instance.scalarAdd(item, { field: "one", k: 10 }); + assert.equal(item.one, 11); + }); + it("should add a constant to every cell on a map", () => { + item = instance.scalarAdd(item, { field: "map", k: 10 }); + assert.deepEqual(item.map, { a: 11, b: 12, c: 13 }); + }); + it("should add a value from a field to every cell on a map", () => { + item = instance.scalarAdd(item, { field: "map", k: "qux" }); + assert.deepEqual(item.map, { a: 43, b: 44, c: 45 }); + }); + it("should add a constant to every cell on an array", () => { + item = instance.scalarAdd(item, { field: "arr1", k: 10 }); + assert.deepEqual(item.arr1, [12, 13, 14]); + }); + }); + + describe("#vectorAdd", () => { + it("should calculate add vectors from maps", () => { + item = instance.vectorAdd(item, { left: "map", right: "map2" }); + assert.equal(Object.keys(item.map).length, 4); + assert.isTrue("a" in item.map); + assert.equal(item.map.a, 1); + assert.isTrue("b" in item.map); + assert.equal(item.map.b, 4); + assert.isTrue("c" in item.map); + assert.equal(item.map.c, 6); + assert.isTrue("d" in item.map); + assert.equal(item.map.d, 4); + }); + it("should work for missing left", () => { + item = instance.vectorAdd(item, { left: "missing", right: "arr2" }); + assert.deepEqual(item.missing, [3, 4, 5]); + }); + it("should error for missing right", () => { + item = instance.vectorAdd(item, { left: "arr2", right: "missing" }); + assert.equal(item, null); + }); + it("should error error for strings", () => { + item = instance.vectorAdd(item, { left: "foo", right: "bar" }); + assert.equal(item, null); + }); + it("should error for different types", () => { + item = instance.vectorAdd(item, { left: "arr2", right: "map" }); + assert.equal(item, null); + }); + it("should calculate add vectors from arrays", () => { + item = instance.vectorAdd(item, { left: "arr1", right: "arr2" }); + assert.deepEqual(item.arr1, [5, 7, 9]); + }); + it("should abort on different sized arrays", () => { + item = instance.vectorAdd(item, { left: "arr1", right: "long" }); + assert.equal(item, null); + }); + it("should calculate add vectors from arrays", () => { + item = instance.vectorAdd(item, { left: "arr1", right: "arr2" }); + assert.deepEqual(item.arr1, [5, 7, 9]); + }); + }); + + describe("#makeBoolean", () => { + it("should error for missing field", () => { + item = instance.makeBoolean(item, { field: "missing", threshold: 2 }); + assert.equal(item, null); + }); + it("should 0/1 a map", () => { + item = instance.makeBoolean(item, { field: "map", threshold: 2 }); + assert.deepEqual(item.map, { a: 0, b: 0, c: 1 }); + }); + it("should a map of all 1s", () => { + item = instance.makeBoolean(item, { field: "map" }); + assert.deepEqual(item.map, { a: 1, b: 1, c: 1 }); + }); + it("should -1/1 a map", () => { + item = instance.makeBoolean(item, { + field: "map", + threshold: 2, + keep_negative: true, + }); + assert.deepEqual(item.map, { a: -1, b: -1, c: 1 }); + }); + it("should work an array", () => { + item = instance.makeBoolean(item, { field: "arr1", threshold: 3 }); + assert.deepEqual(item.arr1, [0, 0, 1]); + }); + it("should -1/1 an array", () => { + item = instance.makeBoolean(item, { + field: "arr1", + threshold: 3, + keep_negative: true, + }); + assert.deepEqual(item.arr1, [-1, -1, 1]); + }); + it("should 1 a high number", () => { + item = instance.makeBoolean(item, { field: "qux", threshold: 3 }); + assert.equal(item.qux, 1); + }); + it("should 0 a low number", () => { + item = instance.makeBoolean(item, { field: "qux", threshold: 70 }); + assert.equal(item.qux, 0); + }); + it("should -1 a low number", () => { + item = instance.makeBoolean(item, { + field: "qux", + threshold: 83, + keep_negative: true, + }); + assert.equal(item.qux, -1); + }); + it("should fail a string", () => { + item = instance.makeBoolean(item, { field: "foo", threshold: 3 }); + assert.equal(item, null); + }); + }); + + describe("#allowFields", () => { + it("should filter the keys out of a map", () => { + item = instance.allowFields(item, { + fields: ["foo", "missing", "bar"], + }); + assert.deepEqual(item, { foo: "FOO", bar: "BAR" }); + }); + }); + + describe("#filterByValue", () => { + it("should fail on missing field", () => { + item = instance.filterByValue(item, { field: "missing", threshold: 2 }); + assert.equal(item, null); + }); + it("should filter the keys out of a map", () => { + item = instance.filterByValue(item, { field: "map", threshold: 2 }); + assert.deepEqual(item.map, { c: 3 }); + }); + }); + + describe("#l2Normalize", () => { + it("should fail on missing field", () => { + item = instance.l2Normalize(item, { field: "missing" }); + assert.equal(item, null); + }); + it("should L2 normalize an array", () => { + item = instance.l2Normalize(item, { field: "arr1" }); + assert.deepEqual( + item.arr1, + [0.3713906763541037, 0.5570860145311556, 0.7427813527082074] + ); + }); + it("should L2 normalize a map", () => { + item = instance.l2Normalize(item, { field: "map" }); + assert.deepEqual(item.map, { + a: 0.2672612419124244, + b: 0.5345224838248488, + c: 0.8017837257372732, + }); + }); + it("should fail a string", () => { + item = instance.l2Normalize(item, { field: "foo" }); + assert.equal(item, null); + }); + it("should not bomb on a zero vector", () => { + item = instance.l2Normalize(item, { field: "zero" }); + assert.deepEqual(item.zero, { a: 0, b: 0 }); + item = instance.l2Normalize(item, { field: "zaro" }); + assert.deepEqual(item.zaro, [0, 0]); + }); + }); + + describe("#probNormalize", () => { + it("should fail on missing field", () => { + item = instance.probNormalize(item, { field: "missing" }); + assert.equal(item, null); + }); + it("should normalize an array to sum to 1", () => { + item = instance.probNormalize(item, { field: "arr1" }); + assert.deepEqual( + item.arr1, + [0.2222222222222222, 0.3333333333333333, 0.4444444444444444] + ); + }); + it("should normalize a map to sum to 1", () => { + item = instance.probNormalize(item, { field: "map" }); + assert.equal(Object.keys(item.map).length, 3); + assert.isTrue("a" in item.map); + assert.isTrue(Math.abs(item.map.a - 0.16667) <= EPSILON); + assert.isTrue("b" in item.map); + assert.isTrue(Math.abs(item.map.b - 0.33333) <= EPSILON); + assert.isTrue("c" in item.map); + assert.isTrue(Math.abs(item.map.c - 0.5) <= EPSILON); + }); + it("should fail a string", () => { + item = instance.probNormalize(item, { field: "foo" }); + assert.equal(item, null); + }); + it("should not bomb on a zero vector", () => { + item = instance.probNormalize(item, { field: "zero" }); + assert.deepEqual(item.zero, { a: 0, b: 0 }); + item = instance.probNormalize(item, { field: "zaro" }); + assert.deepEqual(item.zaro, [0, 0]); + }); + }); + + describe("#scalarMultiplyTag", () => { + it("should fail on missing field", () => { + item = instance.scalarMultiplyTag(item, { field: "missing", k: 3 }); + assert.equal(item, null); + }); + it("should scalar multiply a nested map", () => { + item = instance.scalarMultiplyTag(item, { + field: "tags", + k: 3, + log_scale: false, + }); + assert.isTrue(Math.abs(item.tags.a.aa - 0.3) <= EPSILON); + assert.isTrue(Math.abs(item.tags.a.ab - 0.6) <= EPSILON); + assert.isTrue(Math.abs(item.tags.a.ac - 0.9) <= EPSILON); + assert.isTrue(Math.abs(item.tags.b.ba - 12) <= EPSILON); + assert.isTrue(Math.abs(item.tags.b.bb - 15) <= EPSILON); + assert.isTrue(Math.abs(item.tags.b.bc - 18) <= EPSILON); + }); + it("should scalar multiply a nested map with logrithms", () => { + item = instance.scalarMultiplyTag(item, { + field: "tags", + k: 3, + log_scale: true, + }); + assert.isTrue( + Math.abs(item.tags.a.aa - Math.log(0.1 + 0.000001) * 3) <= EPSILON + ); + assert.isTrue( + Math.abs(item.tags.a.ab - Math.log(0.2 + 0.000001) * 3) <= EPSILON + ); + assert.isTrue( + Math.abs(item.tags.a.ac - Math.log(0.3 + 0.000001) * 3) <= EPSILON + ); + assert.isTrue( + Math.abs(item.tags.b.ba - Math.log(4.0 + 0.000001) * 3) <= EPSILON + ); + assert.isTrue( + Math.abs(item.tags.b.bb - Math.log(5.0 + 0.000001) * 3) <= EPSILON + ); + assert.isTrue( + Math.abs(item.tags.b.bc - Math.log(6.0 + 0.000001) * 3) <= EPSILON + ); + }); + it("should fail a string", () => { + item = instance.scalarMultiplyTag(item, { field: "foo", k: 3 }); + assert.equal(item, null); + }); + }); + + describe("#setDefault", () => { + it("should store a missing value", () => { + item = instance.setDefault(item, { field: "missing", value: 1111 }); + assert.equal(item.missing, 1111); + }); + it("should not overwrite an existing value", () => { + item = instance.setDefault(item, { field: "lhs", value: 1111 }); + assert.equal(item.lhs, 2); + }); + it("should store a complex value", () => { + item = instance.setDefault(item, { field: "missing", value: { a: 1 } }); + assert.deepEqual(item.missing, { a: 1 }); + }); + }); + + describe("#lookupValue", () => { + it("should promote a value", () => { + item = instance.lookupValue(item, { + haystack: "map", + needle: "c", + dest: "ccc", + }); + assert.equal(item.ccc, 3); + }); + it("should handle a missing haystack", () => { + item = instance.lookupValue(item, { + haystack: "missing", + needle: "c", + dest: "ccc", + }); + assert.isTrue(!("ccc" in item)); + }); + it("should handle a missing needle", () => { + item = instance.lookupValue(item, { + haystack: "map", + needle: "missing", + dest: "ccc", + }); + assert.isTrue(!("ccc" in item)); + }); + }); + + describe("#copyToMap", () => { + it("should copy a value to a map", () => { + item = instance.copyToMap(item, { + src: "qux", + dest_map: "map", + dest_key: "zzz", + }); + assert.isTrue("zzz" in item.map); + assert.equal(item.map.zzz, item.qux); + }); + it("should create a new map to hold the key", () => { + item = instance.copyToMap(item, { + src: "qux", + dest_map: "missing", + dest_key: "zzz", + }); + assert.equal(Object.keys(item.missing).length, 1); + assert.equal(item.missing.zzz, item.qux); + }); + it("should not create an empty map if the src is missing", () => { + item = instance.copyToMap(item, { + src: "missing", + dest_map: "no_map", + dest_key: "zzz", + }); + assert.isTrue(!("no_map" in item)); + }); + }); + + describe("#applySoftmaxTags", () => { + it("should error on missing field", () => { + item = instance.applySoftmaxTags(item, { field: "missing" }); + assert.equal(item, null); + }); + it("should error on nonmaps", () => { + item = instance.applySoftmaxTags(item, { field: "arr1" }); + assert.equal(item, null); + }); + it("should error on unnested maps", () => { + item = instance.applySoftmaxTags(item, { field: "map" }); + assert.equal(item, null); + }); + it("should error on wrong nested maps", () => { + item = instance.applySoftmaxTags(item, { field: "bogus" }); + assert.equal(item, null); + }); + it("should apply softmax across the subtags", () => { + item = instance.applySoftmaxTags(item, { field: "tags" }); + assert.isTrue("a" in item.tags); + assert.isTrue("aa" in item.tags.a); + assert.isTrue("ab" in item.tags.a); + assert.isTrue("ac" in item.tags.a); + assert.isTrue(Math.abs(item.tags.a.aa - 0.30061) <= EPSILON); + assert.isTrue(Math.abs(item.tags.a.ab - 0.33222) <= EPSILON); + assert.isTrue(Math.abs(item.tags.a.ac - 0.36717) <= EPSILON); + + assert.isTrue("b" in item.tags); + assert.isTrue("ba" in item.tags.b); + assert.isTrue("bb" in item.tags.b); + assert.isTrue("bc" in item.tags.b); + assert.isTrue(Math.abs(item.tags.b.ba - 0.09003) <= EPSILON); + assert.isTrue(Math.abs(item.tags.b.bb - 0.24473) <= EPSILON); + assert.isTrue(Math.abs(item.tags.b.bc - 0.66524) <= EPSILON); + }); + }); + + describe("#combinerAdd", () => { + it("should do nothing when right field is missing", () => { + let right = makeItem(); + let combined = instance.combinerAdd(item, right, { field: "missing" }); + assert.deepEqual(combined, item); + }); + it("should handle missing left maps", () => { + let right = makeItem(); + right.missingmap = { a: 5, b: -1, c: 3 }; + let combined = instance.combinerAdd(item, right, { field: "missingmap" }); + assert.deepEqual(combined.missingmap, { a: 5, b: -1, c: 3 }); + }); + it("should add equal sized maps", () => { + let right = makeItem(); + let combined = instance.combinerAdd(item, right, { field: "map" }); + assert.deepEqual(combined.map, { a: 2, b: 4, c: 6 }); + }); + it("should add long map to short map", () => { + let right = makeItem(); + right.map.d = 999; + let combined = instance.combinerAdd(item, right, { field: "map" }); + assert.deepEqual(combined.map, { a: 2, b: 4, c: 6, d: 999 }); + }); + it("should add short map to long map", () => { + let right = makeItem(); + item.map.d = 999; + let combined = instance.combinerAdd(item, right, { field: "map" }); + assert.deepEqual(combined.map, { a: 2, b: 4, c: 6, d: 999 }); + }); + it("should add equal sized arrays", () => { + let right = makeItem(); + let combined = instance.combinerAdd(item, right, { field: "arr1" }); + assert.deepEqual(combined.arr1, [4, 6, 8]); + }); + it("should handle missing left arrays", () => { + let right = makeItem(); + right.missingarray = [5, 1, 4]; + let combined = instance.combinerAdd(item, right, { + field: "missingarray", + }); + assert.deepEqual(combined.missingarray, [5, 1, 4]); + }); + it("should add long array to short array", () => { + let right = makeItem(); + right.arr1 = [2, 3, 4, 12]; + let combined = instance.combinerAdd(item, right, { field: "arr1" }); + assert.deepEqual(combined.arr1, [4, 6, 8, 12]); + }); + it("should add short array to long array", () => { + let right = makeItem(); + item.arr1 = [2, 3, 4, 12]; + let combined = instance.combinerAdd(item, right, { field: "arr1" }); + assert.deepEqual(combined.arr1, [4, 6, 8, 12]); + }); + it("should handle missing left number", () => { + let right = makeItem(); + right.missingnumber = 999; + let combined = instance.combinerAdd(item, right, { + field: "missingnumber", + }); + assert.deepEqual(combined.missingnumber, 999); + }); + it("should add numbers", () => { + let right = makeItem(); + let combined = instance.combinerAdd(item, right, { field: "lhs" }); + assert.equal(combined.lhs, 4); + }); + it("should error on missing left, and right is a string", () => { + let right = makeItem(); + right.error = "error"; + let combined = instance.combinerAdd(item, right, { field: "error" }); + assert.equal(combined, null); + }); + it("should error on left string", () => { + let right = makeItem(); + let combined = instance.combinerAdd(item, right, { field: "foo" }); + assert.equal(combined, null); + }); + it("should error on mismatch types", () => { + let right = makeItem(); + right.lhs = [1, 2, 3]; + let combined = instance.combinerAdd(item, right, { field: "lhs" }); + assert.equal(combined, null); + }); + }); + + describe("#combinerMax", () => { + it("should do nothing when right field is missing", () => { + let right = makeItem(); + let combined = instance.combinerMax(item, right, { field: "missing" }); + assert.deepEqual(combined, item); + }); + it("should handle missing left maps", () => { + let right = makeItem(); + right.missingmap = { a: 5, b: -1, c: 3 }; + let combined = instance.combinerMax(item, right, { field: "missingmap" }); + assert.deepEqual(combined.missingmap, { a: 5, b: -1, c: 3 }); + }); + it("should handle equal sized maps", () => { + let right = makeItem(); + right.map = { a: 5, b: -1, c: 3 }; + let combined = instance.combinerMax(item, right, { field: "map" }); + assert.deepEqual(combined.map, { a: 5, b: 2, c: 3 }); + }); + it("should handle short map to long map", () => { + let right = makeItem(); + right.map = { a: 5, b: -1, c: 3, d: 999 }; + let combined = instance.combinerMax(item, right, { field: "map" }); + assert.deepEqual(combined.map, { a: 5, b: 2, c: 3, d: 999 }); + }); + it("should handle long map to short map", () => { + let right = makeItem(); + right.map = { a: 5, b: -1, c: 3 }; + item.map.d = 999; + let combined = instance.combinerMax(item, right, { field: "map" }); + assert.deepEqual(combined.map, { a: 5, b: 2, c: 3, d: 999 }); + }); + it("should handle equal sized arrays", () => { + let right = makeItem(); + right.arr1 = [5, 1, 4]; + let combined = instance.combinerMax(item, right, { field: "arr1" }); + assert.deepEqual(combined.arr1, [5, 3, 4]); + }); + it("should handle missing left arrays", () => { + let right = makeItem(); + right.missingarray = [5, 1, 4]; + let combined = instance.combinerMax(item, right, { + field: "missingarray", + }); + assert.deepEqual(combined.missingarray, [5, 1, 4]); + }); + it("should handle short array to long array", () => { + let right = makeItem(); + right.arr1 = [5, 1, 4, 7]; + let combined = instance.combinerMax(item, right, { field: "arr1" }); + assert.deepEqual(combined.arr1, [5, 3, 4, 7]); + }); + it("should handle long array to short array", () => { + let right = makeItem(); + right.arr1 = [5, 1, 4]; + item.arr1.push(7); + let combined = instance.combinerMax(item, right, { field: "arr1" }); + assert.deepEqual(combined.arr1, [5, 3, 4, 7]); + }); + it("should handle missing left number", () => { + let right = makeItem(); + right.missingnumber = 999; + let combined = instance.combinerMax(item, right, { + field: "missingnumber", + }); + assert.deepEqual(combined.missingnumber, 999); + }); + it("should handle big number", () => { + let right = makeItem(); + right.lhs = 99; + let combined = instance.combinerMax(item, right, { field: "lhs" }); + assert.equal(combined.lhs, 99); + }); + it("should handle small number", () => { + let right = makeItem(); + item.lhs = 99; + let combined = instance.combinerMax(item, right, { field: "lhs" }); + assert.equal(combined.lhs, 99); + }); + it("should error on missing left, and right is a string", () => { + let right = makeItem(); + right.error = "error"; + let combined = instance.combinerMax(item, right, { field: "error" }); + assert.equal(combined, null); + }); + it("should error on left string", () => { + let right = makeItem(); + let combined = instance.combinerMax(item, right, { field: "foo" }); + assert.equal(combined, null); + }); + it("should error on mismatch types", () => { + let right = makeItem(); + right.lhs = [1, 2, 3]; + let combined = instance.combinerMax(item, right, { field: "lhs" }); + assert.equal(combined, null); + }); + }); + + describe("#combinerCollectValues", () => { + it("should error on bogus operation", () => { + let right = makeItem(); + right.url_domain = "maseratiusa.com/maserati"; + right.time = 41; + let combined = instance.combinerCollectValues(item, right, { + left_field: "combined_map", + right_key_field: "url_domain", + right_value_field: "time", + operation: "missing", + }); + assert.equal(combined, null); + }); + it("should sum when missing left", () => { + let right = makeItem(); + right.url_domain = "maseratiusa.com/maserati"; + right.time = 41; + let combined = instance.combinerCollectValues(item, right, { + left_field: "combined_map", + right_key_field: "url_domain", + right_value_field: "time", + operation: "sum", + }); + assert.deepEqual(combined.combined_map, { + "maseratiusa.com/maserati": 41, + }); + }); + it("should sum when missing right", () => { + let right = makeItem(); + item.combined_map = { fake: 42 }; + let combined = instance.combinerCollectValues(item, right, { + left_field: "combined_map", + right_key_field: "url_domain", + right_value_field: "time", + operation: "sum", + }); + assert.deepEqual(combined.combined_map, { fake: 42 }); + }); + it("should sum when both", () => { + let right = makeItem(); + right.url_domain = "maseratiusa.com/maserati"; + right.time = 41; + item.combined_map = { fake: 42, "maseratiusa.com/maserati": 41 }; + let combined = instance.combinerCollectValues(item, right, { + left_field: "combined_map", + right_key_field: "url_domain", + right_value_field: "time", + operation: "sum", + }); + assert.deepEqual(combined.combined_map, { + fake: 42, + "maseratiusa.com/maserati": 82, + }); + }); + + it("should max when missing left", () => { + let right = makeItem(); + right.url_domain = "maseratiusa.com/maserati"; + right.time = 41; + let combined = instance.combinerCollectValues(item, right, { + left_field: "combined_map", + right_key_field: "url_domain", + right_value_field: "time", + operation: "max", + }); + assert.deepEqual(combined.combined_map, { + "maseratiusa.com/maserati": 41, + }); + }); + it("should max when missing right", () => { + let right = makeItem(); + item.combined_map = { fake: 42 }; + let combined = instance.combinerCollectValues(item, right, { + left_field: "combined_map", + right_key_field: "url_domain", + right_value_field: "time", + operation: "max", + }); + assert.deepEqual(combined.combined_map, { fake: 42 }); + }); + it("should max when both (right)", () => { + let right = makeItem(); + right.url_domain = "maseratiusa.com/maserati"; + right.time = 99; + item.combined_map = { fake: 42, "maseratiusa.com/maserati": 41 }; + let combined = instance.combinerCollectValues(item, right, { + left_field: "combined_map", + right_key_field: "url_domain", + right_value_field: "time", + operation: "max", + }); + assert.deepEqual(combined.combined_map, { + fake: 42, + "maseratiusa.com/maserati": 99, + }); + }); + it("should max when both (left)", () => { + let right = makeItem(); + right.url_domain = "maseratiusa.com/maserati"; + right.time = -99; + item.combined_map = { fake: 42, "maseratiusa.com/maserati": 41 }; + let combined = instance.combinerCollectValues(item, right, { + left_field: "combined_map", + right_key_field: "url_domain", + right_value_field: "time", + operation: "max", + }); + assert.deepEqual(combined.combined_map, { + fake: 42, + "maseratiusa.com/maserati": 41, + }); + }); + + it("should overwrite when missing left", () => { + let right = makeItem(); + right.url_domain = "maseratiusa.com/maserati"; + right.time = 41; + let combined = instance.combinerCollectValues(item, right, { + left_field: "combined_map", + right_key_field: "url_domain", + right_value_field: "time", + operation: "overwrite", + }); + assert.deepEqual(combined.combined_map, { + "maseratiusa.com/maserati": 41, + }); + }); + it("should overwrite when missing right", () => { + let right = makeItem(); + item.combined_map = { fake: 42 }; + let combined = instance.combinerCollectValues(item, right, { + left_field: "combined_map", + right_key_field: "url_domain", + right_value_field: "time", + operation: "overwrite", + }); + assert.deepEqual(combined.combined_map, { fake: 42 }); + }); + it("should overwrite when both", () => { + let right = makeItem(); + right.url_domain = "maseratiusa.com/maserati"; + right.time = 41; + item.combined_map = { fake: 42, "maseratiusa.com/maserati": 77 }; + let combined = instance.combinerCollectValues(item, right, { + left_field: "combined_map", + right_key_field: "url_domain", + right_value_field: "time", + operation: "overwrite", + }); + assert.deepEqual(combined.combined_map, { + fake: 42, + "maseratiusa.com/maserati": 41, + }); + }); + + it("should count when missing left", () => { + let right = makeItem(); + right.url_domain = "maseratiusa.com/maserati"; + right.time = 41; + let combined = instance.combinerCollectValues(item, right, { + left_field: "combined_map", + right_key_field: "url_domain", + right_value_field: "time", + operation: "count", + }); + assert.deepEqual(combined.combined_map, { + "maseratiusa.com/maserati": 1, + }); + }); + it("should count when missing right", () => { + let right = makeItem(); + item.combined_map = { fake: 42 }; + let combined = instance.combinerCollectValues(item, right, { + left_field: "combined_map", + right_key_field: "url_domain", + right_value_field: "time", + operation: "count", + }); + assert.deepEqual(combined.combined_map, { fake: 42 }); + }); + it("should count when both", () => { + let right = makeItem(); + right.url_domain = "maseratiusa.com/maserati"; + right.time = 41; + item.combined_map = { fake: 42, "maseratiusa.com/maserati": 1 }; + let combined = instance.combinerCollectValues(item, right, { + left_field: "combined_map", + right_key_field: "url_domain", + right_value_field: "time", + operation: "count", + }); + assert.deepEqual(combined.combined_map, { + fake: 42, + "maseratiusa.com/maserati": 2, + }); + }); + }); + + describe("#executeRecipe", () => { + it("should handle working steps", () => { + let final = instance.executeRecipe({}, [ + { function: "set_default", field: "foo", value: 1 }, + { function: "set_default", field: "bar", value: 10 }, + ]); + assert.equal(final.foo, 1); + assert.equal(final.bar, 10); + }); + it("should handle unknown steps", () => { + let final = instance.executeRecipe({}, [ + { function: "set_default", field: "foo", value: 1 }, + { function: "missing" }, + { function: "set_default", field: "bar", value: 10 }, + ]); + assert.equal(final, null); + }); + it("should handle erroring steps", () => { + let final = instance.executeRecipe({}, [ + { function: "set_default", field: "foo", value: 1 }, + { + function: "accept_item_by_field_value", + field: "missing", + op: "invalid", + rhsField: "moot", + rhsValue: "m00t", + }, + { function: "set_default", field: "bar", value: 10 }, + ]); + assert.equal(final, null); + }); + }); + + describe("#executeCombinerRecipe", () => { + it("should handle working steps", () => { + let final = instance.executeCombinerRecipe( + { foo: 1, bar: 10 }, + { foo: 1, bar: 10 }, + [ + { function: "combiner_add", field: "foo" }, + { function: "combiner_add", field: "bar" }, + ] + ); + assert.equal(final.foo, 2); + assert.equal(final.bar, 20); + }); + it("should handle unknown steps", () => { + let final = instance.executeCombinerRecipe( + { foo: 1, bar: 10 }, + { foo: 1, bar: 10 }, + [ + { function: "combiner_add", field: "foo" }, + { function: "missing" }, + { function: "combiner_add", field: "bar" }, + ] + ); + assert.equal(final, null); + }); + it("should handle erroring steps", () => { + let final = instance.executeCombinerRecipe( + { foo: 1, bar: 10, baz: 0 }, + { foo: 1, bar: 10, baz: "hundred" }, + [ + { function: "combiner_add", field: "foo" }, + { function: "combiner_add", field: "baz" }, + { function: "combiner_add", field: "bar" }, + ] + ); + assert.equal(final, null); + }); + }); +}); diff --git a/browser/components/newtab/test/unit/lib/PersonalityProvider/Tokenize.test.js b/browser/components/newtab/test/unit/lib/PersonalityProvider/Tokenize.test.js new file mode 100644 index 0000000000..8503c2903b --- /dev/null +++ b/browser/components/newtab/test/unit/lib/PersonalityProvider/Tokenize.test.js @@ -0,0 +1,134 @@ +import { + tokenize, + toksToTfIdfVector, +} from "lib/PersonalityProvider/Tokenize.jsm"; + +const EPSILON = 0.00001; + +describe("TF-IDF Term Vectorizer", () => { + describe("#tokenize", () => { + let testCases = [ + { input: "HELLO there", expected: ["hello", "there"] }, + { input: "blah,,,blah,blah", expected: ["blah", "blah", "blah"] }, + { + input: "Call Jenny: 967-5309", + expected: ["call", "jenny", "967", "5309"], + }, + { + input: "Yo(what)[[hello]]{{jim}}}bob{1:2:1+2=$3", + expected: [ + "yo", + "what", + "hello", + "jim", + "bob", + "1", + "2", + "1", + "2", + "3", + ], + }, + { input: "čÄfė 80's", expected: ["čäfė", "80", "s"] }, + { input: "我知道很多东西。", expected: ["我知道很多东西"] }, + ]; + let checkTokenization = tc => { + it(`${tc.input} should tokenize to ${tc.expected}`, () => { + assert.deepEqual(tc.expected, tokenize(tc.input)); + }); + }; + + for (let i = 0; i < testCases.length; i++) { + checkTokenization(testCases[i]); + } + }); + + describe("#tfidf", () => { + let vocab_idfs = { + deal: [221, 5.5058519847862275], + easy: [269, 5.5058519847862275], + tanks: [867, 5.601162164590552], + sites: [792, 5.957837108529285], + care: [153, 5.957837108529285], + needs: [596, 5.824305715904762], + finally: [334, 5.706522680248379], + }; + let testCases = [ + { + input: "Finally! Easy care for your tanks!", + expected: { + finally: [334, 0.5009816295853761], + easy: [269, 0.48336453811728713], + care: [153, 0.5230447876368227], + tanks: [867, 0.49173191907236774], + }, + }, + { + input: "Easy easy EASY", + expected: { easy: [269, 1.0] }, + }, + { + input: "Easy easy care", + expected: { + easy: [269, 0.8795205218806832], + care: [153, 0.4758609582543317], + }, + }, + { + input: "easy care", + expected: { + easy: [269, 0.6786999710383944], + care: [153, 0.7344156515982504], + }, + }, + { + input: "这个空间故意留空。", + expected: { + /* This space is left intentionally blank. */ + }, + }, + ]; + let checkTokenGeneration = tc => { + describe(`${tc.input} should have only vocabulary tokens`, () => { + let actual = toksToTfIdfVector(tokenize(tc.input), vocab_idfs); + + it(`${tc.input} should generate exactly ${Object.keys( + tc.expected + )}`, () => { + let seen = {}; + Object.keys(actual).forEach(actualTok => { + assert.isTrue(actualTok in tc.expected); + seen[actualTok] = true; + }); + Object.keys(tc.expected).forEach(expectedTok => { + assert.isTrue(expectedTok in seen); + }); + }); + + it(`${tc.input} should have the correct token ids`, () => { + Object.keys(actual).forEach(actualTok => { + assert.equal(tc.expected[actualTok][0], actual[actualTok][0]); + }); + }); + }); + }; + + let checkTfIdfVector = tc => { + let actual = toksToTfIdfVector(tokenize(tc.input), vocab_idfs); + it(`${tc.input} should have the correct tf-idf`, () => { + Object.keys(actual).forEach(actualTok => { + let delta = Math.abs( + tc.expected[actualTok][1] - actual[actualTok][1] + ); + assert.isTrue(delta <= EPSILON); + }); + }); + }; + + // run the tests + for (let i = 0; i < testCases.length; i++) { + checkTokenGeneration(testCases[i]); + checkTfIdfVector(testCases[i]); + } + }); +}); |