summaryrefslogtreecommitdiffstats
path: root/src/go/collectors/go.d.plugin/modules/weblog/parser.go
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/go/collectors/go.d.plugin/modules/weblog/parser.go167
1 files changed, 167 insertions, 0 deletions
diff --git a/src/go/collectors/go.d.plugin/modules/weblog/parser.go b/src/go/collectors/go.d.plugin/modules/weblog/parser.go
new file mode 100644
index 000000000..b152e4129
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/weblog/parser.go
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package weblog
+
+import (
+ "errors"
+ "fmt"
+ "regexp"
+ "strings"
+
+ "github.com/netdata/netdata/go/go.d.plugin/pkg/logs"
+)
+
+/*
+Default apache log format:
+ - "%v:%p %h %l %u %t \"%r\" %>s %O \"%{Referer}i\" \"%{User-Agent}i\"" vhost_combined
+ - "%h %l %u %t \"%r\" %>s %O \"%{Referer}i\" \"%{User-Agent}i\"" combined
+ - "%h %l %u %t \"%r\" %>s %O" common
+ - "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\" %I %O" Combined I/O (https://httpd.apache.org/docs/2.4/mod/mod_logio.html)
+
+Default nginx log format:
+ - '$remote_addr - $remote_user [$time_local] '
+ '"$request" $status $body_bytes_sent '
+ '"$http_referer" "$http_user_agent"' combined
+
+Netdata recommends:
+ Nginx:
+ - '$remote_addr - $remote_user [$time_local] '
+ '"$request" $status $body_bytes_sent '
+ '$request_length $request_time $upstream_response_time '
+ '"$http_referer" "$http_user_agent"'
+
+ Apache:
+ - "%h %l %u %t \"%r\" %>s %B %I %D \"%{Referer}i\" \"%{User-Agent}i\""
+*/
+
+var (
+ csvCommon = ` $remote_addr - - [$time_local] "$request" $status $body_bytes_sent`
+ csvCustom1 = ` $remote_addr - - [$time_local] "$request" $status $body_bytes_sent $request_length $request_time`
+ csvCustom2 = ` $remote_addr - - [$time_local] "$request" $status $body_bytes_sent $request_length $request_time $upstream_response_time`
+ csvCustom3 = ` $remote_addr - - [$time_local] "$request" $status $body_bytes_sent - - $request_length $request_time`
+ csvCustom4 = ` $remote_addr - - [$time_local] "$request" $status $body_bytes_sent - - $request_length $request_time $upstream_response_time`
+ csvVhostCommon = `$host:$server_port $remote_addr - - [$time_local] "$request" $status $body_bytes_sent`
+ csvVhostCustom1 = `$host:$server_port $remote_addr - - [$time_local] "$request" $status $body_bytes_sent $request_length $request_time`
+ csvVhostCustom2 = `$host:$server_port $remote_addr - - [$time_local] "$request" $status $body_bytes_sent $request_length $request_time $upstream_response_time`
+ csvVhostCustom3 = `$host:$server_port $remote_addr - - [$time_local] "$request" $status $body_bytes_sent - - $request_length $request_time`
+ csvVhostCustom4 = `$host:$server_port $remote_addr - - [$time_local] "$request" $status $body_bytes_sent - - $request_length $request_time $upstream_response_time`
+
+ guessOrder = []string{
+ csvVhostCustom4,
+ csvVhostCustom3,
+ csvVhostCustom2,
+ csvVhostCustom1,
+ csvVhostCommon,
+ csvCustom4,
+ csvCustom3,
+ csvCustom2,
+ csvCustom1,
+ csvCommon,
+ }
+)
+
+func cleanCSVFormat(format string) string { return strings.Join(strings.Fields(format), " ") }
+func cleanApacheLogFormat(format string) string { return strings.ReplaceAll(format, `\`, "") }
+
+const (
+ typeAuto = "auto"
+)
+
+var (
+ reLTSV = regexp.MustCompile(`^[a-zA-Z0-9]+:[^\t]*(\t[a-zA-Z0-9]+:[^\t]*)*$`)
+ reJSON = regexp.MustCompile(`^[[:space:]]*{.*}[[:space:]]*$`)
+)
+
+func (w *WebLog) newParser(record []byte) (logs.Parser, error) {
+ if w.ParserConfig.LogType == typeAuto {
+ w.Debugf("log_type is %s, will try format auto-detection", typeAuto)
+ if len(record) == 0 {
+ return nil, fmt.Errorf("empty line, can't auto-detect format (%s)", w.file.CurrentFilename())
+ }
+ return w.guessParser(record)
+ }
+
+ w.ParserConfig.CSV.Format = cleanApacheLogFormat(w.ParserConfig.CSV.Format)
+ w.Debugf("log_type is %s, skipping auto-detection", w.ParserConfig.LogType)
+ switch w.ParserConfig.LogType {
+ case logs.TypeCSV:
+ w.Debugf("config: %+v", w.ParserConfig.CSV)
+ case logs.TypeLTSV:
+ w.Debugf("config: %+v", w.ParserConfig.LogType)
+ case logs.TypeRegExp:
+ w.Debugf("config: %+v", w.ParserConfig.RegExp)
+ case logs.TypeJSON:
+ w.Debugf("config: %+v", w.ParserConfig.JSON)
+ }
+ return logs.NewParser(w.ParserConfig, w.file)
+}
+
+func (w *WebLog) guessParser(record []byte) (logs.Parser, error) {
+ w.Debug("starting log type auto-detection")
+ if reLTSV.Match(record) {
+ w.Debug("log type is LTSV")
+ return logs.NewLTSVParser(w.ParserConfig.LTSV, w.file)
+ }
+ if reJSON.Match(record) {
+ w.Debug("log type is JSON")
+ return logs.NewJSONParser(w.ParserConfig.JSON, w.file)
+ }
+ w.Debug("log type is CSV")
+ return w.guessCSVParser(record)
+}
+
+func (w *WebLog) guessCSVParser(record []byte) (logs.Parser, error) {
+ w.Debug("starting csv log format auto-detection")
+ w.Debugf("config: %+v", w.ParserConfig.CSV)
+ for _, format := range guessOrder {
+ format = cleanCSVFormat(format)
+ cfg := w.ParserConfig.CSV
+ cfg.Format = format
+
+ w.Debugf("trying format: '%s'", format)
+ parser, err := logs.NewCSVParser(cfg, w.file)
+ if err != nil {
+ return nil, err
+ }
+
+ line := newEmptyLogLine()
+ if err := parser.Parse(record, line); err != nil {
+ w.Debug("parse: ", err)
+ continue
+ }
+
+ if err = line.verify(); err != nil {
+ w.Debug("verify: ", err)
+ continue
+ }
+ return parser, nil
+ }
+ return nil, errors.New("cannot auto-detect log format, use custom log format")
+}
+
+func checkCSVFormatField(field string) (newName string, offset int, valid bool) {
+ if isTimeField(field) {
+ return "", 1, false
+ }
+ if !isFieldValid(field) {
+ return "", 0, false
+ }
+ // remove `$` and `%` to have same field names with regexp parser,
+ // these symbols aren't allowed in sub exp names
+ return field[1:], 0, true
+}
+
+func isTimeField(field string) bool {
+ return field == "[$time_local]" || field == "$time_local" || field == "%t"
+}
+
+func isFieldValid(field string) bool {
+ return len(field) > 1 && (isNginxField(field) || isApacheField(field))
+}
+func isNginxField(field string) bool {
+ return strings.HasPrefix(field, "$")
+}
+
+func isApacheField(field string) bool {
+ return strings.HasPrefix(field, "%")
+}