summaryrefslogtreecommitdiffstats
path: root/utils/sa_trivial_convert.lua
blob: 2ea53bed1edb903d7ef3b1f121dfff05c819d2be (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
local fun = require "fun"
local rspamd_logger = require "rspamd_logger"
local util = require "rspamd_util"
local lua_util = require "lua_util"
local rspamd_regexp = require "rspamd_regexp"
local ucl = require "ucl"

local complicated = {}
local rules = {}
local scores = {}

local function words_to_re(words, start)
  return table.concat(fun.totable(fun.drop_n(start, words)), " ");
end

local function split(str, delim)
  local result = {}

  if not delim then
    delim = '[^%s]+'
  end

  for token in string.gmatch(str, delim) do
    table.insert(result, token)
  end

  return result
end

local function handle_header_def(hline, cur_rule)
  --Now check for modifiers inside header's name
  local hdrs = split(hline, '[^|]+')
  local hdr_params = {}
  local cur_param = {}
  -- Check if an re is an ordinary re
  local ordinary = true

  for _,h in ipairs(hdrs) do
    if h == 'ALL' or h == 'ALL:raw' then
      ordinary = false
    else
      local args = split(h, '[^:]+')
      cur_param['strong'] = false
      cur_param['raw'] = false
      cur_param['header'] = args[1]

      if args[2] then
        -- We have some ops that are required for the header, so it's not ordinary
        ordinary = false
      end

      fun.each(function(func)
          if func == 'addr' then
            cur_param['function'] = function(str)
              local addr_parsed = util.parse_mail_address(str)
              local ret = {}
              if addr_parsed then
                for _,elt in ipairs(addr_parsed) do
                  if elt['addr'] then
                    table.insert(ret, elt['addr'])
                  end
                end
              end

              return ret
            end
          elseif func == 'name' then
            cur_param['function'] = function(str)
              local addr_parsed = util.parse_mail_address(str)
              local ret = {}
              if addr_parsed then
                for _,elt in ipairs(addr_parsed) do
                  if elt['name'] then
                    table.insert(ret, elt['name'])
                  end
                end
              end

              return ret
            end
          elseif func == 'raw' then
            cur_param['raw'] = true
          elseif func == 'case' then
            cur_param['strong'] = true
          else
            rspamd_logger.warnx(rspamd_config, 'Function %1 is not supported in %2',
              func, cur_rule['symbol'])
          end
        end, fun.tail(args))

        -- Some header rules require splitting to check of multiple headers
        if cur_param['header'] == 'MESSAGEID' then
          -- Special case for spamassassin
          ordinary = false
        elseif cur_param['header'] == 'ToCc' then
          ordinary = false
        else
          table.insert(hdr_params, cur_param)
        end
    end

    cur_rule['ordinary'] = ordinary and #hdr_params <= 1
    cur_rule['header'] = hdr_params
  end
end

local function process_sa_conf(f)
  local cur_rule = {}
  local valid_rule = false

  local function insert_cur_rule()
   if not rules[cur_rule.type] then
     rules[cur_rule.type] = {}
   end

   local target = rules[cur_rule.type]

   if cur_rule.type == 'header' then
     if not cur_rule.header[1].header then
      rspamd_logger.errx(rspamd_config, 'bad rule definition: %1', cur_rule)
      return
     end
     if not target[cur_rule.header[1].header] then
       target[cur_rule.header[1].header] = {}
     end
     target = target[cur_rule.header[1].header]
   end

   if not cur_rule['symbol'] then
     rspamd_logger.errx(rspamd_config, 'bad rule definition: %1', cur_rule)
     return
   end
   target[cur_rule['symbol']] = cur_rule
   cur_rule = {}
   valid_rule = false
  end

  local function parse_score(words)
    if #words == 3 then
      -- score rule <x>
      return tonumber(words[3])
    elseif #words == 6 then
      -- score rule <x1> <x2> <x3> <x4>
      -- we assume here that bayes and network are enabled and select <x4>
      return tonumber(words[6])
    else
      rspamd_logger.errx(rspamd_config, 'invalid score for %1', words[2])
    end

    return 0
  end

  local skip_to_endif = false
  local if_nested = 0
  for l in f:lines() do
    (function ()
    l = lua_util.rspamd_str_trim(l)
    -- Replace bla=~/re/ with bla =~ /re/ (#2372)
    l = l:gsub('([^%s])%s*([=!]~)%s*([^%s])', '%1 %2 %3')

    if string.len(l) == 0 or string.sub(l, 1, 1) == '#' then
      return
    end

    -- Unbalanced if/endif
    if if_nested < 0 then if_nested = 0 end
    if skip_to_endif then
      if string.match(l, '^endif') then
        if_nested = if_nested - 1

        if if_nested == 0 then
          skip_to_endif = false
        end
      elseif string.match(l, '^if') then
        if_nested = if_nested + 1
      elseif string.match(l, '^else') then
        -- Else counterpart for if
        skip_to_endif = false
      end
      table.insert(complicated, l)
      return
    else
      if string.match(l, '^ifplugin') then
        skip_to_endif = true
        if_nested = if_nested + 1
        table.insert(complicated, l)
      elseif string.match(l, '^if !plugin%(') then
         skip_to_endif = true
         if_nested = if_nested + 1
        table.insert(complicated, l)
      elseif string.match(l, '^if') then
        -- Unknown if
        skip_to_endif = true
        if_nested = if_nested + 1
        table.insert(complicated, l)
      elseif string.match(l, '^else') then
        -- Else counterpart for if
        skip_to_endif = true
        table.insert(complicated, l)
      elseif string.match(l, '^endif') then
        if_nested = if_nested - 1
        table.insert(complicated, l)
      end
    end

    -- Skip comments
    local words = fun.totable(fun.take_while(
      function(w) return string.sub(w, 1, 1) ~= '#' end,
      fun.filter(function(w)
          return w ~= "" end,
      fun.iter(split(l)))))

    if words[1] == "header" then
      -- header SYMBOL Header ~= /regexp/
      if valid_rule then
        insert_cur_rule()
      end
      if words[4] and (words[4] == '=~' or words[4] == '!~') then
        cur_rule['type'] = 'header'
        cur_rule['symbol'] = words[2]

        if words[4] == '!~' then
          table.insert(complicated, l)
          return
        end

        cur_rule['re_expr'] = words_to_re(words, 4)
        local unset_comp = string.find(cur_rule['re_expr'], '%s+%[if%-unset:')
        if unset_comp then
          table.insert(complicated, l)
          return
        end

        cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])

        if not cur_rule['re'] then
          rspamd_logger.warnx(rspamd_config, "Cannot parse regexp '%1' for %2",
            cur_rule['re_expr'], cur_rule['symbol'])
          table.insert(complicated, l)
          return
        else
          handle_header_def(words[3], cur_rule)
          if not cur_rule['ordinary'] then
            table.insert(complicated, l)
            return
          end
        end

        valid_rule = true
      else
        table.insert(complicated, l)
        return
      end
    elseif words[1] == "body" then
      -- body SYMBOL /regexp/
      if valid_rule then
        insert_cur_rule()
      end

      cur_rule['symbol'] = words[2]
      if words[3] and (string.sub(words[3], 1, 1) == '/'
          or string.sub(words[3], 1, 1) == 'm') then
        cur_rule['type'] = 'sabody'
        cur_rule['re_expr'] = words_to_re(words, 2)
        cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
        if cur_rule['re'] then

          valid_rule = true
        end
      else
        -- might be function
        table.insert(complicated, l)
        return
      end
    elseif words[1] == "rawbody" then
      -- body SYMBOL /regexp/
      if valid_rule then
        insert_cur_rule()
      end

      cur_rule['symbol'] = words[2]
      if words[3] and (string.sub(words[3], 1, 1) == '/'
          or string.sub(words[3], 1, 1) == 'm') then
        cur_rule['type'] = 'sarawbody'
        cur_rule['re_expr'] = words_to_re(words, 2)
        cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
        if cur_rule['re'] then
          valid_rule = true
        end
      else
        table.insert(complicated, l)
        return
      end
    elseif words[1] == "full" then
      -- body SYMBOL /regexp/
      if valid_rule then
        insert_cur_rule()
      end

      cur_rule['symbol'] = words[2]

      if words[3] and (string.sub(words[3], 1, 1) == '/'
          or string.sub(words[3], 1, 1) == 'm') then
        cur_rule['type'] = 'message'
        cur_rule['re_expr'] = words_to_re(words, 2)
        cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
        cur_rule['raw'] = true
        if cur_rule['re'] then
          valid_rule = true
        end
      else
        table.insert(complicated, l)
        return
      end
    elseif words[1] == "uri" then
      -- uri SYMBOL /regexp/
      if valid_rule then
        insert_cur_rule()
      end
      cur_rule['type'] = 'uri'
      cur_rule['symbol'] = words[2]
      cur_rule['re_expr'] = words_to_re(words, 2)
      cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
      if cur_rule['re'] and cur_rule['symbol'] then
        valid_rule = true
      else
        table.insert(complicated, l)
        return
      end
    elseif words[1] == "meta" then
      -- meta SYMBOL expression
      if valid_rule then
        insert_cur_rule()
      end
      table.insert(complicated, l)
      return
    elseif words[1] == "describe" and valid_rule then
      cur_rule['description'] = words_to_re(words, 2)
    elseif words[1] == "score" then
      scores[words[2]] = parse_score(words)
    else
      table.insert(complicated, l)
      return
    end
    end)()
  end
  if valid_rule then
    insert_cur_rule()
  end
end

for _,matched in ipairs(arg) do
  local f = io.open(matched, "r")
  if f then
    rspamd_logger.messagex(rspamd_config, 'loading SA rules from %s', matched)
    process_sa_conf(f)
  else
    rspamd_logger.errx(rspamd_config, "cannot open %1", matched)
  end
end

local multimap_conf = {}

local function handle_rule(what, syms, hdr)
  local mtype
  local filter
  local fname
  local header
  local sym = what:upper()
  if what == 'sabody' then
    mtype = 'content'
    fname = 'body_re.map'
    filter = 'oneline'
  elseif what == 'sarawbody' then
    fname = 'raw_body_re.map'
    mtype = 'content'
    filter = 'rawtext'
  elseif what == 'full' then
    fname = 'full_re.map'
    mtype = 'content'
    filter = 'full'
  elseif what == 'uri' then
    fname = 'uri_re.map'
    mtype = 'url'
    filter = 'full'
  elseif what == 'header' then
    fname = ('hdr_' .. hdr .. '_re.map'):lower()
    mtype = 'header'
    header = hdr
    sym = sym .. '_' .. hdr:upper()
  else
    rspamd_logger.errx('unknown type: %s', what)
    return
  end
  local conf = {
    type = mtype,
    filter = filter,
    symbol = 'SA_MAP_AUTO_' .. sym,
    regexp = true,
    map = fname,
    header = header,
    symbols = {}
  }
  local re_file = io.open(fname, 'w')

  for k,r in pairs(syms) do
    local score = 0.0
    if scores[k] then
      score = scores[k]
    end
    re_file:write(string.format('/%s/ %s:%f\n', tostring(r.re), k, score))
    table.insert(conf.symbols, k)
  end

  re_file:close()

  multimap_conf[sym:lower()] = conf
  rspamd_logger.messagex('stored %s regexp in %s', sym:lower(), fname)
end

for k,v in pairs(rules) do
  if k == 'header' then
    for h,r in pairs(v) do
      handle_rule(k, r, h)
    end
  else
    handle_rule(k, v)
  end
end

local out = ucl.to_format(multimap_conf, 'ucl')
local mmap_conf = io.open('auto_multimap.conf', 'w')
mmap_conf:write(out)
mmap_conf:close()
rspamd_logger.messagex('stored multimap conf in %s', 'auto_multimap.conf')

local sa_remain = io.open('auto_sa.conf', 'w')
fun.each(function(l)
  sa_remain:write(l)
  sa_remain:write('\n')
end, fun.filter(function(l) return not string.match(l, '^%s+$') end, complicated))
sa_remain:close()
rspamd_logger.messagex('stored sa remains conf in %s', 'auto_sa.conf')