diff options
Diffstat (limited to 'bin/text_cat/LM')
-rw-r--r-- | bin/text_cat/LM/english.lm | 400 | ||||
-rw-r--r-- | bin/text_cat/LM/german.lm | 400 |
2 files changed, 800 insertions, 0 deletions
diff --git a/bin/text_cat/LM/english.lm b/bin/text_cat/LM/english.lm new file mode 100644 index 000000000..ab71632c6 --- /dev/null +++ b/bin/text_cat/LM/english.lm @@ -0,0 +1,400 @@ +_ 20326 +e 6617 +t 4843 +o 3834 +n 3653 +i 3602 +a 3433 +s 2945 +r 2921 +h 2507 +e_ 2000 +d 1816 +_t 1785 +c 1639 +l 1635 +th 1535 +he 1351 +_th 1333 +u 1309 +f 1253 +m 1175 +p 1151 +_a 1145 +the 1142 +_the 1060 +s_ 978 +er 968 +_o 967 +he_ 928 +d_ 888 +t_ 885 +the_ 844 +_the_ 843 +on 842 +in 817 +y 783 +n_ 773 +b 761 +re 754 +, 734 +,_ 732 +an 732 +g 728 +w 718 +_i 707 +en 676 +f_ 599 +y_ 595 +of 594 +_of 592 +es 589 +ti 587 +v 580 +_of_ 575 +of_ 575 +nd 568 +at 549 +r_ 540 +_w 534 +it 522 +ed 496 +_p 494 +nt 485 +_c 462 +o_ 457 +io 450 +_an 439 +te 432 +or 425 +_b 418 +nd_ 407 +to 406 +st 402 +is 401 +_s 396 +_in 389 +ion 385 +and 385 +de 384 +ve 382 +ha 375 +ar 366 +_m 361 +and_ 360 +_and 360 +_and_ 358 +se 353 +_to 347 +me 346 +to_ 344 +ed_ 339 +. 330 +be 329 +_f 329 +._ 329 +_to_ 320 +co 317 +ic 316 +ns 308 +al 307 +le 304 +ou 304 +ce 293 +ent 279 +l_ 278 +_co 277 +tio 275 +on_ 274 +_d 274 +tion 268 +ri 266 +_e 264 +ng 253 +hi 251 +er_ 249 +ea 246 +as 245 +_be 242 +pe 242 +h_ 234 +_r 232 +ec 227 +ch 223 +ro 222 +ct 220 +_h 219 +pr 217 +in_ 217 +ne 214 +ll 214 +rt 213 +s,_ 210 +s, 210 +li 209 +ra 208 +T 207 +wh 204 +a_ 203 +ac 201 +_wh 199 +_n 196 +ts 196 +di 196 +es_ 195 +si 194 +re_ 193 +at_ 192 +nc 192 +ie 190 +_a_ 188 +_in_ 185 +ing 184 +us 182 +_re 182 +g_ 179 +ng_ 178 +op 178 +con 177 +tha 175 +_l 174 +_tha 174 +ver 173 +ma 173 +ion_ 171 +_con 171 +ci 170 +ons 170 +_it 170 +po 169 +ere 168 +is_ 167 +ta 167 +la 166 +_pr 165 +fo 164 +ho 164 +ir 162 +ss 161 +men 160 +be_ 160 +un 159 +ty 159 +_be_ 158 +ing_ 157 +om 156 +ot 156 +hat 155 +ly 155 +_g 155 +em 153 +_T 151 +rs 150 +mo 148 +ch_ 148 +wi 147 +we 147 +ad 147 +ts_ 145 +res 143 +_wi 143 +I 143 +hat_ 142 +ei 141 +ly_ 141 +ni 140 +os 140 +ca 139 +ur 139 +A 138 +ut 138 +that 138 +_that 137 +ati 137 +_fo 137 +st_ 137 +il 136 +or_ 136 +for 136 +pa 136 +ul 135 +ate 135 +ter 134 +it_ 134 +nt_ 133 +that_ 132 +_ha 129 +al_ 128 +el 128 +as_ 127 +ll_ 127 +_ma 125 +no 124 +ment 124 +an_ 124 +tion_ 122 +su 122 +bl 122 +_de 122 +nce 120 +pl 120 +fe 119 +tr 118 +so 118 +int 115 +ov 114 +e, 114 +e,_ 114 +_u 113 +ent_ 113 +Th 113 +her 113 +j 112 +atio 112 +ation 112 +_Th 111 +le_ 110 +ai 110 +_it_ 110 +_on 110 +_for 109 +ect 109 +k 109 +hic 108 +est 108 +der 107 +tu 107 +na 106 +_by_ 106 +by_ 106 +E 106 +by 106 +_by 106 +ve_ 106 +_di 106 +en_ 104 +vi 104 +m_ 103 +_whi 102 +iv 102 +whi 102 +ns_ 102 +_A 101 +ich 100 +ge 100 +pro 99 +ess 99 +_whic 99 +ers 99 +hich 99 +ce_ 99 +which 99 +whic 99 +all 98 +ove 98 +_is 98 +ich_ 97 +ee 97 +hich_ 97 +n,_ 96 +n, 96 +im 95 +ir_ 94 +hei 94 +ions 94 +sti 94 +se_ 94 +per 93 +The 93 +_pa 93 +heir 93 +id 93 +eir 93 +eir_ 93 +ig 93 +heir_ 93 +_no 93 +ev 93 +era 92 +_int 92 +ted 91 +_The 91 +ies 91 +art 91 +thei 90 +_ar 90 +_thei 90 +their 90 +_pro 90 +et 89 +_pe 88 +_mo 88 +ther 88 +x 87 +gh 87 +S 87 +_is_ 87 +ol 87 +ty_ 87 +_I 86 +nde 86 +am 86 +rn 86 +nte 86 +mp 85 +_su 84 +_we 84 +par 84 +_v 84 +pu 82 +his 82 +ow 82 +mi 82 +go 81 +N 81 +ue 81 +ple 81 +ep 80 +ab 80 +;_ 80 +; 80 +ex 80 +ain 80 +over 80 +_un 79 +q 79 +qu 79 +pp 79 +ith 79 +ry 79 +_as 79 +ber 79 +ub 78 +av 78 +uc 78 +s._ 77 +s. 77 +enc 77 +are 77 +iti 77 +gr 76 +his_ 76 +ua 76 +part 76 +ff 75 +eve 75 +O 75 +rea 74 +ous 74 +ia 74 +The_ 73 +ag 73 +mb 73 +_go 73 +fa 72 +on,_ 72 +ern 72 +t,_ 72 +on, 72 +t, 72 +_me 71 diff --git a/bin/text_cat/LM/german.lm b/bin/text_cat/LM/german.lm new file mode 100644 index 000000000..6f14f51ef --- /dev/null +++ b/bin/text_cat/LM/german.lm @@ -0,0 +1,400 @@ +_ 31586 +e 15008 +n 9058 +i 7299 +r 6830 +t 5662 +s 5348 +a 4618 +h 4176 +d 4011 +er 3415 +en 3412 +u 3341 +l 3266 +n_ 2848 +c 2636 +ch 2460 +g 2407 +o 2376 +e_ 2208 +r_ 2128 +m 2077 +_d 1948 +de 1831 +en_ 1786 +ei 1718 +er_ 1570 +in 1568 +te 1505 +ie 1505 +b 1458 +t_ 1425 +f 1306 +k 1176 +ge 1144 +s_ 1137 +un 1113 +, 1104 +,_ 1099 +w 1099 +z 1060 +nd 1039 +he 1004 +st 989 +_s 952 +_de 949 +. 909 +_e 906 +ne 906 +der 880 +._ 847 +be 841 +es 829 +ic 796 +_a 791 +ie_ 779 +is 769 +ich 763 +an 755 +re 749 +di 732 +ein 730 +se 730 +" 720 +ng 709 +_i 706 +sc 683 +sch 681 +it 673 +der_ 652 +h_ 651 +ch_ 642 +S 630 +le 609 +p 609 +ä 607 +ü 603 +au 603 +v 602 +che 599 +_w 596 +d_ 585 +die 576 +_di 572 +m_ 562 +_die 559 +el 548 +_S 540 +_der 529 +li 527 +_der_ 523 +si 515 +al 514 +ns 507 +on 501 +or 495 +ti 490 +ten 487 +ht 486 +die_ 485 +_die_ 483 +D 479 +rt 478 +nd_ 476 +_u 470 +nt 468 +A 466 +in_ 464 +den 461 +cht 447 +und 443 +me 440 +_z 429 +ung 426 +ll 423 +_un 421 +_ei 419 +_n 415 +hr 412 +ine 412 +_A 408 +_ein 405 +ar 404 +ra 403 +_v 400 +_g 400 +as 395 +zu 392 +et 389 +em 385 +_D 380 +eine 376 +gen 376 +g_ 376 +da 368 +we 366 +K 365 +lt 360 +B 354 +_" 353 +nde 349 +ni 347 +und_ 345 +E 345 +ur 345 +_m 342 +ri 341 +ha 340 +eh 339 +ten_ 338 +es_ 336 +_K 336 +_und 335 +ig 335 +_b 335 +hen 334 +_und_ 332 +_au 329 +_B 327 +_da 325 +_zu 324 +_in 322 +at 321 +us 318 +wi 307 +n, 305 +n,_ 304 +nn 304 +te_ 301 +eit 301 +_h 300 +ter 299 +M 298 +n. 295 +ß 294 +ng_ 289 +sche 289 +- 283 +rs 282 +den_ 282 +_si 280 +G 280 +im 278 +_ge 277 +chen 276 +rd 273 +_E 273 +n._ 270 +icht 270 +rn 268 +uf 267 +isch 264 +isc 264 +nen 263 +_in_ 262 +_M 260 +_er 257 +ich_ 255 +ac 253 +lic 252 +_G 252 +ber 252 +la 251 +vo 251 +eb 250 +ke 249 +F 248 +as_ 248 +hen_ 248 +ach 245 +en, 244 +ung_ 243 +lich 243 +ste 243 +en,_ 243 +_k 241 +ben 241 +_f 241 +en. 241 +_be 239 +it_ 239 +L 238 +_se 237 +mi 236 +ve 236 +na 236 +on_ 236 +P 235 +ss 234 +ist 234 +ö 234 +ht_ 233 +ru 233 +st_ 229 +_F 229 +ts 227 +ab 226 +W 226 +ol 225 +_eine 225 +hi 225 +so 224 +em_ 223 +"_ 223 +ren 222 +en._ 221 +chen_ 221 +R 221 +ta 221 +ere 220 +ische 219 +ers 218 +ert 217 +_P 217 +tr 217 +ed 215 +ze 215 +eg 215 +ens 215 +ür 213 +ah 212 +_vo 212 +ne_ 211 +cht_ 210 +uc 209 +_wi 209 +nge 208 +lle 208 +fe 207 +_L 207 +ver 206 +hl 205 +V 204 +ma 203 +wa 203 +auf 201 +H 198 +_W 195 +T 195 +nte 193 +uch 193 +l_ 192 +sei 192 +nen_ 190 +u_ 189 +_den 189 +_al 189 +_V 188 +t. 188 +lte 187 +ut 186 +ent 184 +sich 183 +sic 183 +il 183 +ier 182 +am 181 +gen_ 180 +sen 179 +fü 178 +um 178 +t._ 177 +f_ 174 +he_ 174 +ner 174 +nst 174 +ls 174 +_sei 173 +ro 173 +ir 173 +ebe 173 +mm 173 +ag 172 +ern 169 +t,_ 169 +t, 169 +eu 169 +ft 168 +icht_ 167 +hre 167 +Be 166 +nz 165 +nder 165 +_T 164 +_den_ 164 +iche 163 +tt 163 +zu_ 162 +and 162 +J 161 +rde 160 +rei 160 +_we 159 +_H 159 +ige 159 +_Be 158 +rte 157 +hei 156 +das 155 +aus 155 +che_ 154 +_das 154 +_zu_ 154 +tz 154 +_ni 153 +das_ 153 +_R 153 +N 153 +des 153 +_ve 153 +_J 152 +I 152 +_das_ 152 +men 151 +_so 151 +_ver 151 +_auf 150 +ine_ 150 +_ha 150 +rg 149 +ind 148 +eben 148 +kt 147 +mit 147 +_an 147 +her 146 +Ge 146 +Sc 145 +_sich 145 +U 145 +Sch 145 +_sic 145 +end 145 +Di 144 +abe 143 +ck 143 +sse 142 +ür_ 142 +ell 142 +ik 141 +o_ 141 +nic 141 +nich 141 +sa 141 +_fü 140 +hn 140 +zi 140 +no 140 +nicht 140 +im_ 139 +von_ 139 +von 139 +_nic 139 +_nich 139 +eine_ 139 +oc 138 +wei 138 +io 138 +schen 138 +gt 138 |