summaryrefslogtreecommitdiffstats
path: root/bin/text_cat/LM
diff options
context:
space:
mode:
Diffstat (limited to 'bin/text_cat/LM')
-rw-r--r--bin/text_cat/LM/english.lm400
-rw-r--r--bin/text_cat/LM/german.lm400
2 files changed, 800 insertions, 0 deletions
diff --git a/bin/text_cat/LM/english.lm b/bin/text_cat/LM/english.lm
new file mode 100644
index 000000000..ab71632c6
--- /dev/null
+++ b/bin/text_cat/LM/english.lm
@@ -0,0 +1,400 @@
+_ 20326
+e 6617
+t 4843
+o 3834
+n 3653
+i 3602
+a 3433
+s 2945
+r 2921
+h 2507
+e_ 2000
+d 1816
+_t 1785
+c 1639
+l 1635
+th 1535
+he 1351
+_th 1333
+u 1309
+f 1253
+m 1175
+p 1151
+_a 1145
+the 1142
+_the 1060
+s_ 978
+er 968
+_o 967
+he_ 928
+d_ 888
+t_ 885
+the_ 844
+_the_ 843
+on 842
+in 817
+y 783
+n_ 773
+b 761
+re 754
+, 734
+,_ 732
+an 732
+g 728
+w 718
+_i 707
+en 676
+f_ 599
+y_ 595
+of 594
+_of 592
+es 589
+ti 587
+v 580
+_of_ 575
+of_ 575
+nd 568
+at 549
+r_ 540
+_w 534
+it 522
+ed 496
+_p 494
+nt 485
+_c 462
+o_ 457
+io 450
+_an 439
+te 432
+or 425
+_b 418
+nd_ 407
+to 406
+st 402
+is 401
+_s 396
+_in 389
+ion 385
+and 385
+de 384
+ve 382
+ha 375
+ar 366
+_m 361
+and_ 360
+_and 360
+_and_ 358
+se 353
+_to 347
+me 346
+to_ 344
+ed_ 339
+. 330
+be 329
+_f 329
+._ 329
+_to_ 320
+co 317
+ic 316
+ns 308
+al 307
+le 304
+ou 304
+ce 293
+ent 279
+l_ 278
+_co 277
+tio 275
+on_ 274
+_d 274
+tion 268
+ri 266
+_e 264
+ng 253
+hi 251
+er_ 249
+ea 246
+as 245
+_be 242
+pe 242
+h_ 234
+_r 232
+ec 227
+ch 223
+ro 222
+ct 220
+_h 219
+pr 217
+in_ 217
+ne 214
+ll 214
+rt 213
+s,_ 210
+s, 210
+li 209
+ra 208
+T 207
+wh 204
+a_ 203
+ac 201
+_wh 199
+_n 196
+ts 196
+di 196
+es_ 195
+si 194
+re_ 193
+at_ 192
+nc 192
+ie 190
+_a_ 188
+_in_ 185
+ing 184
+us 182
+_re 182
+g_ 179
+ng_ 178
+op 178
+con 177
+tha 175
+_l 174
+_tha 174
+ver 173
+ma 173
+ion_ 171
+_con 171
+ci 170
+ons 170
+_it 170
+po 169
+ere 168
+is_ 167
+ta 167
+la 166
+_pr 165
+fo 164
+ho 164
+ir 162
+ss 161
+men 160
+be_ 160
+un 159
+ty 159
+_be_ 158
+ing_ 157
+om 156
+ot 156
+hat 155
+ly 155
+_g 155
+em 153
+_T 151
+rs 150
+mo 148
+ch_ 148
+wi 147
+we 147
+ad 147
+ts_ 145
+res 143
+_wi 143
+I 143
+hat_ 142
+ei 141
+ly_ 141
+ni 140
+os 140
+ca 139
+ur 139
+A 138
+ut 138
+that 138
+_that 137
+ati 137
+_fo 137
+st_ 137
+il 136
+or_ 136
+for 136
+pa 136
+ul 135
+ate 135
+ter 134
+it_ 134
+nt_ 133
+that_ 132
+_ha 129
+al_ 128
+el 128
+as_ 127
+ll_ 127
+_ma 125
+no 124
+ment 124
+an_ 124
+tion_ 122
+su 122
+bl 122
+_de 122
+nce 120
+pl 120
+fe 119
+tr 118
+so 118
+int 115
+ov 114
+e, 114
+e,_ 114
+_u 113
+ent_ 113
+Th 113
+her 113
+j 112
+atio 112
+ation 112
+_Th 111
+le_ 110
+ai 110
+_it_ 110
+_on 110
+_for 109
+ect 109
+k 109
+hic 108
+est 108
+der 107
+tu 107
+na 106
+_by_ 106
+by_ 106
+E 106
+by 106
+_by 106
+ve_ 106
+_di 106
+en_ 104
+vi 104
+m_ 103
+_whi 102
+iv 102
+whi 102
+ns_ 102
+_A 101
+ich 100
+ge 100
+pro 99
+ess 99
+_whic 99
+ers 99
+hich 99
+ce_ 99
+which 99
+whic 99
+all 98
+ove 98
+_is 98
+ich_ 97
+ee 97
+hich_ 97
+n,_ 96
+n, 96
+im 95
+ir_ 94
+hei 94
+ions 94
+sti 94
+se_ 94
+per 93
+The 93
+_pa 93
+heir 93
+id 93
+eir 93
+eir_ 93
+ig 93
+heir_ 93
+_no 93
+ev 93
+era 92
+_int 92
+ted 91
+_The 91
+ies 91
+art 91
+thei 90
+_ar 90
+_thei 90
+their 90
+_pro 90
+et 89
+_pe 88
+_mo 88
+ther 88
+x 87
+gh 87
+S 87
+_is_ 87
+ol 87
+ty_ 87
+_I 86
+nde 86
+am 86
+rn 86
+nte 86
+mp 85
+_su 84
+_we 84
+par 84
+_v 84
+pu 82
+his 82
+ow 82
+mi 82
+go 81
+N 81
+ue 81
+ple 81
+ep 80
+ab 80
+;_ 80
+; 80
+ex 80
+ain 80
+over 80
+_un 79
+q 79
+qu 79
+pp 79
+ith 79
+ry 79
+_as 79
+ber 79
+ub 78
+av 78
+uc 78
+s._ 77
+s. 77
+enc 77
+are 77
+iti 77
+gr 76
+his_ 76
+ua 76
+part 76
+ff 75
+eve 75
+O 75
+rea 74
+ous 74
+ia 74
+The_ 73
+ag 73
+mb 73
+_go 73
+fa 72
+on,_ 72
+ern 72
+t,_ 72
+on, 72
+t, 72
+_me 71
diff --git a/bin/text_cat/LM/german.lm b/bin/text_cat/LM/german.lm
new file mode 100644
index 000000000..6f14f51ef
--- /dev/null
+++ b/bin/text_cat/LM/german.lm
@@ -0,0 +1,400 @@
+_ 31586
+e 15008
+n 9058
+i 7299
+r 6830
+t 5662
+s 5348
+a 4618
+h 4176
+d 4011
+er 3415
+en 3412
+u 3341
+l 3266
+n_ 2848
+c 2636
+ch 2460
+g 2407
+o 2376
+e_ 2208
+r_ 2128
+m 2077
+_d 1948
+de 1831
+en_ 1786
+ei 1718
+er_ 1570
+in 1568
+te 1505
+ie 1505
+b 1458
+t_ 1425
+f 1306
+k 1176
+ge 1144
+s_ 1137
+un 1113
+, 1104
+,_ 1099
+w 1099
+z 1060
+nd 1039
+he 1004
+st 989
+_s 952
+_de 949
+. 909
+_e 906
+ne 906
+der 880
+._ 847
+be 841
+es 829
+ic 796
+_a 791
+ie_ 779
+is 769
+ich 763
+an 755
+re 749
+di 732
+ein 730
+se 730
+" 720
+ng 709
+_i 706
+sc 683
+sch 681
+it 673
+der_ 652
+h_ 651
+ch_ 642
+S 630
+le 609
+p 609
+ä 607
+ü 603
+au 603
+v 602
+che 599
+_w 596
+d_ 585
+die 576
+_di 572
+m_ 562
+_die 559
+el 548
+_S 540
+_der 529
+li 527
+_der_ 523
+si 515
+al 514
+ns 507
+on 501
+or 495
+ti 490
+ten 487
+ht 486
+die_ 485
+_die_ 483
+D 479
+rt 478
+nd_ 476
+_u 470
+nt 468
+A 466
+in_ 464
+den 461
+cht 447
+und 443
+me 440
+_z 429
+ung 426
+ll 423
+_un 421
+_ei 419
+_n 415
+hr 412
+ine 412
+_A 408
+_ein 405
+ar 404
+ra 403
+_v 400
+_g 400
+as 395
+zu 392
+et 389
+em 385
+_D 380
+eine 376
+gen 376
+g_ 376
+da 368
+we 366
+K 365
+lt 360
+B 354
+_" 353
+nde 349
+ni 347
+und_ 345
+E 345
+ur 345
+_m 342
+ri 341
+ha 340
+eh 339
+ten_ 338
+es_ 336
+_K 336
+_und 335
+ig 335
+_b 335
+hen 334
+_und_ 332
+_au 329
+_B 327
+_da 325
+_zu 324
+_in 322
+at 321
+us 318
+wi 307
+n, 305
+n,_ 304
+nn 304
+te_ 301
+eit 301
+_h 300
+ter 299
+M 298
+n. 295
+ß 294
+ng_ 289
+sche 289
+- 283
+rs 282
+den_ 282
+_si 280
+G 280
+im 278
+_ge 277
+chen 276
+rd 273
+_E 273
+n._ 270
+icht 270
+rn 268
+uf 267
+isch 264
+isc 264
+nen 263
+_in_ 262
+_M 260
+_er 257
+ich_ 255
+ac 253
+lic 252
+_G 252
+ber 252
+la 251
+vo 251
+eb 250
+ke 249
+F 248
+as_ 248
+hen_ 248
+ach 245
+en, 244
+ung_ 243
+lich 243
+ste 243
+en,_ 243
+_k 241
+ben 241
+_f 241
+en. 241
+_be 239
+it_ 239
+L 238
+_se 237
+mi 236
+ve 236
+na 236
+on_ 236
+P 235
+ss 234
+ist 234
+ö 234
+ht_ 233
+ru 233
+st_ 229
+_F 229
+ts 227
+ab 226
+W 226
+ol 225
+_eine 225
+hi 225
+so 224
+em_ 223
+"_ 223
+ren 222
+en._ 221
+chen_ 221
+R 221
+ta 221
+ere 220
+ische 219
+ers 218
+ert 217
+_P 217
+tr 217
+ed 215
+ze 215
+eg 215
+ens 215
+ür 213
+ah 212
+_vo 212
+ne_ 211
+cht_ 210
+uc 209
+_wi 209
+nge 208
+lle 208
+fe 207
+_L 207
+ver 206
+hl 205
+V 204
+ma 203
+wa 203
+auf 201
+H 198
+_W 195
+T 195
+nte 193
+uch 193
+l_ 192
+sei 192
+nen_ 190
+u_ 189
+_den 189
+_al 189
+_V 188
+t. 188
+lte 187
+ut 186
+ent 184
+sich 183
+sic 183
+il 183
+ier 182
+am 181
+gen_ 180
+sen 179
+fü 178
+um 178
+t._ 177
+f_ 174
+he_ 174
+ner 174
+nst 174
+ls 174
+_sei 173
+ro 173
+ir 173
+ebe 173
+mm 173
+ag 172
+ern 169
+t,_ 169
+t, 169
+eu 169
+ft 168
+icht_ 167
+hre 167
+Be 166
+nz 165
+nder 165
+_T 164
+_den_ 164
+iche 163
+tt 163
+zu_ 162
+and 162
+J 161
+rde 160
+rei 160
+_we 159
+_H 159
+ige 159
+_Be 158
+rte 157
+hei 156
+das 155
+aus 155
+che_ 154
+_das 154
+_zu_ 154
+tz 154
+_ni 153
+das_ 153
+_R 153
+N 153
+des 153
+_ve 153
+_J 152
+I 152
+_das_ 152
+men 151
+_so 151
+_ver 151
+_auf 150
+ine_ 150
+_ha 150
+rg 149
+ind 148
+eben 148
+kt 147
+mit 147
+_an 147
+her 146
+Ge 146
+Sc 145
+_sich 145
+U 145
+Sch 145
+_sic 145
+end 145
+Di 144
+abe 143
+ck 143
+sse 142
+ür_ 142
+ell 142
+ik 141
+o_ 141
+nic 141
+nich 141
+sa 141
+_fü 140
+hn 140
+zi 140
+no 140
+nicht 140
+im_ 139
+von_ 139
+von 139
+_nic 139
+_nich 139
+eine_ 139
+oc 138
+wei 138
+io 138
+schen 138
+gt 138