diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-05 11:19:16 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-07-24 09:53:24 +0000 |
commit | b5f8ee61a7f7e9bd291dd26b0585d03eb686c941 (patch) | |
tree | d4d31289c39fc00da064a825df13a0b98ce95b10 /src/go/collectors/go.d.plugin/modules/weblog/logline.go | |
parent | Adding upstream version 1.44.3. (diff) | |
download | netdata-upstream.tar.xz netdata-upstream.zip |
Adding upstream version 1.46.3.upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | src/go/collectors/go.d.plugin/modules/weblog/logline.go | 617 |
1 files changed, 617 insertions, 0 deletions
diff --git a/src/go/collectors/go.d.plugin/modules/weblog/logline.go b/src/go/collectors/go.d.plugin/modules/weblog/logline.go new file mode 100644 index 000000000..5a69593b9 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/weblog/logline.go @@ -0,0 +1,617 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package weblog + +import ( + "errors" + "fmt" + "regexp" + "strconv" + "strings" +) + +// TODO: it is not clear how to handle "-", current handling is not good +// In general it is: +// - If a field is unused in a particular entry dash "-" marks the omitted field. +// In addition to that "-" is used as zero value in: +// - apache: %b '-' when no bytes are sent. +// +// Log Format: +// - CLF: https://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-format +// - ELF: https://www.w3.org/TR/WD-logfile.html +// - Apache CLF: https://httpd.apache.org/docs/trunk/logs.html#common + +// Variables: +// - nginx: http://nginx.org/en/docs/varindex.html +// - apache: http://httpd.apache.org/docs/current/mod/mod_log_config.html#logformat +// - IIS: https://learn.microsoft.com/en-us/windows/win32/http/w3c-logging + +/* +| nginx | apache | description | +|-------------------------|-----------|-----------------------------------------------| +| $host ($http_host) | %v | Name of the server which accepted a request. +| $server_port | %p | Port of the server which accepted a request. +| $scheme | - | Request scheme. "http" or "https". +| $remote_addr | %a (%h) | Client address. +| $request | %r | Full original request line. The line is "$request_method $request_uri $server_protocol". +| $request_method | %m | Request method. Usually "GET" or "POST". +| $request_uri | %U | Full original request URI. +| $server_protocol | %H | Request protocol. Usually "HTTP/1.0", "HTTP/1.1", or "HTTP/2.0". +| $status | %s (%>s) | Response status code. +| $request_length | %I | Bytes received from a client, including request and headers. +| $bytes_sent | %O | Bytes sent to a client, including request and headers. +| $body_bytes_sent | %B (%b) | Bytes sent to a client, not counting the response header. +| $request_time | %D | Request processing time. +| $upstream_response_time | - | Time spent on receiving the response from the upstream server. +| $ssl_protocol | - | Protocol of an established SSL connection. +| $ssl_cipher | - | String of ciphers used for an established SSL connection. +*/ + +var ( + errEmptyLine = errors.New("empty line") + errBadVhost = errors.New("bad vhost") + errBadVhostPort = errors.New("bad vhost with port") + errBadPort = errors.New("bad port") + errBadReqScheme = errors.New("bad req scheme") + errBadReqClient = errors.New("bad req client") + errBadRequest = errors.New("bad request") + errBadReqMethod = errors.New("bad req method") + errBadReqURL = errors.New("bad req url") + errBadReqProto = errors.New("bad req protocol") + errBadReqSize = errors.New("bad req size") + errBadRespCode = errors.New("bad resp status code") + errBadRespSize = errors.New("bad resp size") + errBadReqProcTime = errors.New("bad req processing time") + errBadUpsRespTime = errors.New("bad upstream resp time") + errBadSSLProto = errors.New("bad ssl protocol") + errBadSSLCipherSuite = errors.New("bad ssl cipher suite") +) + +func newEmptyLogLine() *logLine { + var l logLine + l.custom.fields = make(map[string]struct{}) + l.custom.values = make([]customValue, 0, 20) + l.reset() + return &l +} + +type ( + logLine struct { + web + custom custom + } + web struct { + vhost string + port string + reqScheme string + reqClient string + reqMethod string + reqURL string + reqProto string + reqSize int + reqProcTime float64 + respCode int + respSize int + upsRespTime float64 + sslProto string + sslCipherSuite string + } + custom struct { + fields map[string]struct{} + values []customValue + } + customValue struct { + name string + value string + } +) + +func (l *logLine) Assign(field string, value string) (err error) { + if value == "" { + return + } + + switch field { + case "host", "http_host", "v": + err = l.assignVhost(value) + case "server_port", "p": + err = l.assignPort(value) + case "host:$server_port", "v:%p": + err = l.assignVhostWithPort(value) + case "scheme": + err = l.assignReqScheme(value) + case "remote_addr", "a", "h": + err = l.assignReqClient(value) + case "request", "r": + err = l.assignRequest(value) + case "request_method", "m": + err = l.assignReqMethod(value) + case "request_uri", "U": + err = l.assignReqURL(value) + case "server_protocol", "H": + err = l.assignReqProto(value) + case "status", "s", ">s": + err = l.assignRespCode(value) + case "request_length", "I": + err = l.assignReqSize(value) + case "bytes_sent", "body_bytes_sent", "b", "O", "B": + err = l.assignRespSize(value) + case "request_time", "D": + err = l.assignReqProcTime(value) + case "upstream_response_time": + err = l.assignUpsRespTime(value) + case "ssl_protocol": + err = l.assignSSLProto(value) + case "ssl_cipher": + err = l.assignSSLCipherSuite(value) + default: + err = l.assignCustom(field, value) + } + if err != nil { + err = fmt.Errorf("assign '%s': %w", field, err) + } + return err +} + +const hyphen = "-" + +func (l *logLine) assignVhost(vhost string) error { + if vhost == hyphen { + return nil + } + // nginx $host and $http_host returns ipv6 in [], apache not + if idx := strings.IndexByte(vhost, ']'); idx > 0 { + vhost = vhost[1:idx] + } + l.vhost = vhost + return nil +} + +func (l *logLine) assignPort(port string) error { + if port == hyphen { + return nil + } + if !isPortValid(port) { + return fmt.Errorf("assign '%s' : %w", port, errBadPort) + } + l.port = port + return nil +} + +func (l *logLine) assignVhostWithPort(vhostPort string) error { + if vhostPort == hyphen { + return nil + } + idx := strings.LastIndexByte(vhostPort, ':') + if idx == -1 { + return fmt.Errorf("assign '%s' : %w", vhostPort, errBadVhostPort) + } + if err := l.assignPort(vhostPort[idx+1:]); err != nil { + return fmt.Errorf("assign '%s' : %w", vhostPort, errBadVhostPort) + } + if err := l.assignVhost(vhostPort[0:idx]); err != nil { + return fmt.Errorf("assign '%s' : %w", vhostPort, errBadVhostPort) + } + return nil +} + +func (l *logLine) assignReqScheme(scheme string) error { + if scheme == hyphen { + return nil + } + if !isSchemeValid(scheme) { + return fmt.Errorf("assign '%s' : %w", scheme, errBadReqScheme) + } + l.reqScheme = scheme + return nil +} + +func (l *logLine) assignReqClient(client string) error { + if client == hyphen { + return nil + } + l.reqClient = client + return nil +} + +func (l *logLine) assignRequest(request string) error { + if request == hyphen { + return nil + } + var first, last int + if first = strings.IndexByte(request, ' '); first < 0 { + return fmt.Errorf("assign '%s': %w", request, errBadRequest) + } + if last = strings.LastIndexByte(request, ' '); first == last { + return fmt.Errorf("assign '%s': %w", request, errBadRequest) + } + proto := request[last+1:] + url := request[first+1 : last] + method := request[0:first] + if err := l.assignReqMethod(method); err != nil { + return err + } + if err := l.assignReqURL(url); err != nil { + return err + } + return l.assignReqProto(proto) +} + +func (l *logLine) assignReqMethod(method string) error { + if method == hyphen { + return nil + } + if !isReqMethodValid(method) { + return fmt.Errorf("assign '%s' : %w", method, errBadReqMethod) + } + l.reqMethod = method + return nil +} + +func (l *logLine) assignReqURL(url string) error { + if url == hyphen { + return nil + } + if isEmptyString(url) { + return fmt.Errorf("assign '%s' : %w", url, errBadReqURL) + } + l.reqURL = url + return nil +} + +func (l *logLine) assignReqProto(proto string) error { + if proto == hyphen { + return nil + } + if !isReqProtoValid(proto) { + return fmt.Errorf("assign '%s': %w", proto, errBadReqProto) + } + l.reqProto = proto[5:] + return nil +} + +func (l *logLine) assignRespCode(status string) error { + if status == hyphen { + return nil + } + v, err := strconv.Atoi(status) + if err != nil || !isRespCodeValid(v) { + return fmt.Errorf("assign '%s': %w", status, errBadRespCode) + } + l.respCode = v + return nil +} + +func (l *logLine) assignReqSize(size string) error { + // apache: can be "-" according web_log py regexp. + if size == hyphen { + l.reqSize = 0 + return nil + } + v, err := strconv.Atoi(size) + if err != nil || !isSizeValid(v) { + return fmt.Errorf("assign '%s': %w", size, errBadReqSize) + } + l.reqSize = v + return nil +} + +func (l *logLine) assignRespSize(size string) error { + // apache: %b. In CLF format, i.e. a '-' rather than a 0 when no bytes are sent. + if size == hyphen { + l.respSize = 0 + return nil + } + v, err := strconv.Atoi(size) + if err != nil || !isSizeValid(v) { + return fmt.Errorf("assign '%s': %w", size, errBadRespSize) + } + l.respSize = v + return nil +} + +func (l *logLine) assignReqProcTime(time string) error { + if time == hyphen { + return nil + } + if time == "0.000" { + l.reqProcTime = 0 + return nil + } + v, err := strconv.ParseFloat(time, 64) + if err != nil || !isTimeValid(v) { + return fmt.Errorf("assign '%s': %w", time, errBadReqProcTime) + } + l.reqProcTime = v * timeMultiplier(time) + return nil +} + +func isUpstreamTimeSeparator(r rune) bool { return r == ',' || r == ':' } + +func (l *logLine) assignUpsRespTime(time string) error { + if time == hyphen { + return nil + } + + // the upstream response time string can contain multiple values, separated + // by commas (in case the request was handled by multiple servers), or colons + // (in case the request passed between multiple server groups via an internal redirect) + // the individual values should be summed up to obtain the correct amount of time + // the request spent in upstream + var sum float64 + for _, val := range strings.FieldsFunc(time, isUpstreamTimeSeparator) { + val = strings.TrimSpace(val) + v, err := strconv.ParseFloat(val, 64) + if err != nil || !isTimeValid(v) { + return fmt.Errorf("assign '%s': %w", time, errBadUpsRespTime) + } + + sum += v + } + + l.upsRespTime = sum * timeMultiplier(time) + return nil +} + +func (l *logLine) assignSSLProto(proto string) error { + if proto == hyphen { + return nil + } + if !isSSLProtoValid(proto) { + return fmt.Errorf("assign '%s': %w", proto, errBadSSLProto) + } + l.sslProto = proto + return nil +} + +func (l *logLine) assignSSLCipherSuite(cipher string) error { + if cipher == hyphen { + return nil + } + if strings.IndexByte(cipher, '-') <= 0 && strings.IndexByte(cipher, '_') <= 0 { + return fmt.Errorf("assign '%s': %w", cipher, errBadSSLCipherSuite) + } + l.sslCipherSuite = cipher + return nil +} + +func (l *logLine) assignCustom(field, value string) error { + if len(l.custom.fields) == 0 || value == hyphen { + return nil + } + if _, ok := l.custom.fields[field]; ok { + l.custom.values = append(l.custom.values, customValue{name: field, value: value}) + } + return nil +} + +func (l *logLine) verify() error { + if l.empty() { + return fmt.Errorf("verify: %w", errEmptyLine) + } + if l.hasRespCode() && !l.isRespCodeValid() { + return fmt.Errorf("verify '%d': %w", l.respCode, errBadRespCode) + } + if l.hasVhost() && !l.isVhostValid() { + return fmt.Errorf("verify '%s': %w", l.vhost, errBadVhost) + } + if l.hasPort() && !l.isPortValid() { + return fmt.Errorf("verify '%s': %w", l.port, errBadPort) + } + if l.hasReqScheme() && !l.isSchemeValid() { + return fmt.Errorf("verify '%s': %w", l.reqScheme, errBadReqScheme) + } + if l.hasReqClient() && !l.isClientValid() { + return fmt.Errorf("verify '%s': %w", l.reqClient, errBadReqClient) + } + if l.hasReqMethod() && !l.isMethodValid() { + return fmt.Errorf("verify '%s': %w", l.reqMethod, errBadReqMethod) + } + if l.hasReqURL() && !l.isURLValid() { + return fmt.Errorf("verify '%s': %w", l.reqURL, errBadReqURL) + } + if l.hasReqProto() && !l.isProtoValid() { + return fmt.Errorf("verify '%s': %w", l.reqProto, errBadReqProto) + } + if l.hasReqSize() && !l.isReqSizeValid() { + return fmt.Errorf("verify '%d': %w", l.reqSize, errBadReqSize) + } + if l.hasRespSize() && !l.isRespSizeValid() { + return fmt.Errorf("verify '%d': %w", l.respSize, errBadRespSize) + } + if l.hasReqProcTime() && !l.isReqProcTimeValid() { + return fmt.Errorf("verify '%f': %w", l.reqProcTime, errBadReqProcTime) + } + if l.hasUpsRespTime() && !l.isUpsRespTimeValid() { + return fmt.Errorf("verify '%f': %w", l.upsRespTime, errBadUpsRespTime) + } + if l.hasSSLProto() && !l.isSSLProtoValid() { + return fmt.Errorf("verify '%s': %w", l.sslProto, errBadSSLProto) + } + if l.hasSSLCipherSuite() && !l.isSSLCipherSuiteValid() { + return fmt.Errorf("verify '%s': %w", l.sslCipherSuite, errBadSSLCipherSuite) + } + return nil +} + +func (l *logLine) empty() bool { return !l.hasWebFields() && !l.hasCustomFields() } +func (l *logLine) hasCustomFields() bool { return len(l.custom.values) > 0 } +func (l *logLine) hasWebFields() bool { return l.web != emptyWebFields } +func (l *logLine) hasVhost() bool { return !isEmptyString(l.vhost) } +func (l *logLine) hasPort() bool { return !isEmptyString(l.port) } +func (l *logLine) hasReqScheme() bool { return !isEmptyString(l.reqScheme) } +func (l *logLine) hasReqClient() bool { return !isEmptyString(l.reqClient) } +func (l *logLine) hasReqMethod() bool { return !isEmptyString(l.reqMethod) } +func (l *logLine) hasReqURL() bool { return !isEmptyString(l.reqURL) } +func (l *logLine) hasReqProto() bool { return !isEmptyString(l.reqProto) } +func (l *logLine) hasRespCode() bool { return !isEmptyNumber(l.respCode) } +func (l *logLine) hasReqSize() bool { return !isEmptyNumber(l.reqSize) } +func (l *logLine) hasRespSize() bool { return !isEmptyNumber(l.respSize) } +func (l *logLine) hasReqProcTime() bool { return !isEmptyNumber(int(l.reqProcTime)) } +func (l *logLine) hasUpsRespTime() bool { return !isEmptyNumber(int(l.upsRespTime)) } +func (l *logLine) hasSSLProto() bool { return !isEmptyString(l.sslProto) } +func (l *logLine) hasSSLCipherSuite() bool { return !isEmptyString(l.sslCipherSuite) } +func (l *logLine) isVhostValid() bool { return reVhost.MatchString(l.vhost) } +func (l *logLine) isPortValid() bool { return isPortValid(l.port) } +func (l *logLine) isSchemeValid() bool { return isSchemeValid(l.reqScheme) } +func (l *logLine) isClientValid() bool { return reClient.MatchString(l.reqClient) } +func (l *logLine) isMethodValid() bool { return isReqMethodValid(l.reqMethod) } +func (l *logLine) isURLValid() bool { return !isEmptyString(l.reqURL) } +func (l *logLine) isProtoValid() bool { return isReqProtoVerValid(l.reqProto) } +func (l *logLine) isRespCodeValid() bool { return isRespCodeValid(l.respCode) } +func (l *logLine) isReqSizeValid() bool { return isSizeValid(l.reqSize) } +func (l *logLine) isRespSizeValid() bool { return isSizeValid(l.respSize) } +func (l *logLine) isReqProcTimeValid() bool { return isTimeValid(l.reqProcTime) } +func (l *logLine) isUpsRespTimeValid() bool { return isTimeValid(l.upsRespTime) } +func (l *logLine) isSSLProtoValid() bool { return isSSLProtoValid(l.sslProto) } +func (l *logLine) isSSLCipherSuiteValid() bool { return reCipherSuite.MatchString(l.sslCipherSuite) } + +func (l *logLine) reset() { + l.web = emptyWebFields + l.custom.values = l.custom.values[:0] +} + +var ( + // TODO: reClient doesn't work with %h when HostnameLookups is On. + reVhost = regexp.MustCompile(`^[a-zA-Z0-9-:.]+$`) + reClient = regexp.MustCompile(`^([\da-f:.]+|localhost)$`) + reCipherSuite = regexp.MustCompile(`^[A-Z0-9-_]+$`) // openssl -v +) + +var emptyWebFields = web{ + vhost: emptyString, + port: emptyString, + reqScheme: emptyString, + reqClient: emptyString, + reqMethod: emptyString, + reqURL: emptyString, + reqProto: emptyString, + reqSize: emptyNumber, + reqProcTime: emptyNumber, + respCode: emptyNumber, + respSize: emptyNumber, + upsRespTime: emptyNumber, + sslProto: emptyString, + sslCipherSuite: emptyString, +} + +const ( + emptyString = "__empty_string__" + emptyNumber = -9999 +) + +func isEmptyString(s string) bool { + return s == emptyString || s == "" +} + +func isEmptyNumber(n int) bool { + return n == emptyNumber +} + +func isReqMethodValid(method string) bool { + // https://www.iana.org/assignments/http-methods/http-methods.xhtml + switch method { + case "GET", + "ACL", + "BASELINE-CONTROL", + "BIND", + "CHECKIN", + "CHECKOUT", + "CONNECT", + "COPY", + "DELETE", + "HEAD", + "LABEL", + "LINK", + "LOCK", + "MERGE", + "MKACTIVITY", + "MKCALENDAR", + "MKCOL", + "MKREDIRECTREF", + "MKWORKSPACE", + "MOVE", + "OPTIONS", + "ORDERPATCH", + "PATCH", + "POST", + "PRI", + "PROPFIND", + "PROPPATCH", + "PURGE", // not a standardized HTTP method + "PUT", + "REBIND", + "REPORT", + "SEARCH", + "TRACE", + "UNBIND", + "UNCHECKOUT", + "UNLINK", + "UNLOCK", + "UPDATE", + "UPDATEREDIRECTREF": + return true + } + return false +} + +func isReqProtoValid(proto string) bool { + return len(proto) >= 6 && proto[:5] == "HTTP/" && isReqProtoVerValid(proto[5:]) +} + +func isReqProtoVerValid(version string) bool { + switch version { + case "1.1", "1", "1.0", "2", "2.0", "3", "3.0": + return true + } + return false +} + +func isPortValid(port string) bool { + v, err := strconv.Atoi(port) + return err == nil && v >= 80 && v <= 49151 +} + +func isSchemeValid(scheme string) bool { + return scheme == "http" || scheme == "https" +} + +func isRespCodeValid(code int) bool { + // rfc7231 + // Informational responses (100–199), + // Successful responses (200–299), + // Redirects (300–399), + // Client errors (400–499), + // Server errors (500–599). + return code >= 100 && code <= 600 +} + +func isSizeValid(size int) bool { + return size >= 0 +} + +func isTimeValid(time float64) bool { + return time >= 0 +} + +func isSSLProtoValid(proto string) bool { + if proto == "TLSv1.2" { + return true + } + switch proto { + case "TLSv1.3", "SSLv2", "SSLv3", "TLSv1", "TLSv1.1": + return true + } + return false +} + +func timeMultiplier(time string) float64 { + // TODO: Change code to detect and modify properly IIS time (in milliseconds) + // Convert to microseconds: + // - nginx time is in seconds with a milliseconds' resolution. + if strings.IndexByte(time, '.') > 0 { + return 1e6 + } + // - apache time is in microseconds. + return 1 +} |