summaryrefslogtreecommitdiffstats
path: root/src/go/collectors/go.d.plugin/modules/squidlog/logline.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/go/collectors/go.d.plugin/modules/squidlog/logline.go')
-rw-r--r--src/go/collectors/go.d.plugin/modules/squidlog/logline.go407
1 files changed, 407 insertions, 0 deletions
diff --git a/src/go/collectors/go.d.plugin/modules/squidlog/logline.go b/src/go/collectors/go.d.plugin/modules/squidlog/logline.go
new file mode 100644
index 000000000..e3d200eaf
--- /dev/null
+++ b/src/go/collectors/go.d.plugin/modules/squidlog/logline.go
@@ -0,0 +1,407 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+package squidlog
+
+import (
+ "errors"
+ "fmt"
+ "regexp"
+ "strconv"
+ "strings"
+)
+
+// https://wiki.squid-cache.org/Features/LogFormat
+// http://www.squid-cache.org/Doc/config/logformat/
+// https://wiki.squid-cache.org/SquidFaq/SquidLogs#Squid_result_codes
+// https://www.websense.com/content/support/library/web/v773/wcg_help/squid.aspx
+
+/*
+4.6.1:
+logformat squid %ts.%03tu %6tr %>a %Ss/%03>Hs %<st %rm %ru %[un %Sh/%<a %mt
+logformat common %>a %[ui %[un [%tl] "%rm %ru HTTP/%rv" %>Hs %<st %Ss:%Sh
+logformat combined %>a %[ui %[un [%tl] "%rm %ru HTTP/%rv" %>Hs %<st "%{Referer}>h" "%{User-Agent}>h" %Ss:%Sh
+logformat referrer %ts.%03tu %>a %{Referer}>h %ru
+logformat useragent %>a [%tl] "%{User-Agent}>h"
+logformat icap_squid %ts.%03tu %6icap::tr %>A %icap::to/%03icap::Hs %icap::<st %icap::rm %icap::ru %un -/%icap::<A -
+*/
+
+/*
+Valid Capture Name: [A-Za-z0-9_]+
+// TODO: namings
+
+| local | squid format code | description |
+|-------------------------|-------------------|------------------------------------------------------------------------|
+| resp_time | %tr | Response time (milliseconds).
+| client_address | %>a | Client source IP address.
+| client_address | %>A | Client FQDN.
+| cache_code | %Ss | Squid request status (TCP_MISS etc).
+| http_code | %>Hs | The HTTP response status code from Content Gateway to client.
+| resp_size | %<st | Total size of reply sent to client (after adaptation).
+| req_method | %rm | Request method (GET/POST etc).
+| hier_code | %Sh | Squid hierarchy status (DEFAULT_PARENT etc).
+| server_address | %<a | Server IP address of the last server or peer connection.
+| server_address | %<A | Server FQDN or peer name.
+| mime_type | %mt | MIME content type.
+
+// Following needed to make default log format csv parsable
+| result_code | %Ss/%03>Hs | cache code and http code.
+| hierarchy | %Sh/%<a | hierarchy code and server address.
+
+Notes:
+- %<a: older versions of Squid would put the origin server hostname here.
+*/
+
+var (
+ errEmptyLine = errors.New("empty line")
+ errBadRespTime = errors.New("bad response time")
+ errBadClientAddr = errors.New("bad client address")
+ errBadCacheCode = errors.New("bad cache code")
+ errBadHTTPCode = errors.New("bad http code")
+ errBadRespSize = errors.New("bad response size")
+ errBadReqMethod = errors.New("bad request method")
+ errBadHierCode = errors.New("bad hier code")
+ errBadServerAddr = errors.New("bad server address")
+ errBadMimeType = errors.New("bad mime type")
+ errBadResultCode = errors.New("bad result code")
+ errBadHierarchy = errors.New("bad hierarchy")
+)
+
+func newEmptyLogLine() *logLine {
+ var l logLine
+ l.reset()
+ return &l
+}
+
+type (
+ logLine struct {
+ clientAddr string
+ serverAddr string
+
+ respTime int
+ respSize int
+ httpCode int
+
+ reqMethod string
+ mimeType string
+
+ cacheCode string
+ hierCode string
+ }
+)
+
+const (
+ fieldRespTime = "resp_time"
+ fieldClientAddr = "client_address"
+ fieldCacheCode = "cache_code"
+ fieldHTTPCode = "http_code"
+ fieldRespSize = "resp_size"
+ fieldReqMethod = "req_method"
+ fieldHierCode = "hier_code"
+ fieldServerAddr = "server_address"
+ fieldMimeType = "mime_type"
+ fieldResultCode = "result_code"
+ fieldHierarchy = "hierarchy"
+)
+
+func (l *logLine) Assign(field string, value string) (err error) {
+ if value == "" {
+ return
+ }
+
+ switch field {
+ case fieldRespTime:
+ err = l.assignRespTime(value)
+ case fieldClientAddr:
+ err = l.assignClientAddress(value)
+ case fieldCacheCode:
+ err = l.assignCacheCode(value)
+ case fieldHTTPCode:
+ err = l.assignHTTPCode(value)
+ case fieldRespSize:
+ err = l.assignRespSize(value)
+ case fieldReqMethod:
+ err = l.assignReqMethod(value)
+ case fieldHierCode:
+ err = l.assignHierCode(value)
+ case fieldMimeType:
+ err = l.assignMimeType(value)
+ case fieldServerAddr:
+ err = l.assignServerAddress(value)
+ case fieldResultCode:
+ err = l.assignResultCode(value)
+ case fieldHierarchy:
+ err = l.assignHierarchy(value)
+ }
+ return err
+}
+
+const hyphen = "-"
+
+func (l *logLine) assignRespTime(time string) error {
+ if time == hyphen {
+ return fmt.Errorf("assign '%s': %w", time, errBadRespTime)
+ }
+ v, err := strconv.Atoi(time)
+ if err != nil || !isRespTimeValid(v) {
+ return fmt.Errorf("assign '%s': %w", time, errBadRespTime)
+ }
+ l.respTime = v
+ return nil
+}
+
+func (l *logLine) assignClientAddress(address string) error {
+ if address == hyphen {
+ return fmt.Errorf("assign '%s': %w", address, errBadClientAddr)
+ }
+ l.clientAddr = address
+ return nil
+}
+
+func (l *logLine) assignCacheCode(code string) error {
+ if code == hyphen || !isCacheCodeValid(code) {
+ return fmt.Errorf("assign '%s': %w", code, errBadCacheCode)
+ }
+ l.cacheCode = code
+ return nil
+}
+
+func (l *logLine) assignHTTPCode(code string) error {
+ if code == hyphen {
+ return fmt.Errorf("assign '%s': %w", code, errBadHTTPCode)
+ }
+ v, err := strconv.Atoi(code)
+ if err != nil || !isHTTPCodeValid(v) {
+ return fmt.Errorf("assign '%s': %w", code, errBadHTTPCode)
+ }
+ l.httpCode = v
+ return nil
+}
+
+func (l *logLine) assignResultCode(code string) error {
+ i := strings.IndexByte(code, '/')
+ if i <= 0 {
+ return fmt.Errorf("assign '%s': %w", code, errBadResultCode)
+ }
+ if err := l.assignCacheCode(code[:i]); err != nil {
+ return err
+ }
+ return l.assignHTTPCode(code[i+1:])
+}
+
+func (l *logLine) assignRespSize(size string) error {
+ if size == hyphen {
+ return fmt.Errorf("assign '%s': %w", size, errBadRespSize)
+ }
+ v, err := strconv.Atoi(size)
+ if err != nil || !isRespSizeValid(v) {
+ return fmt.Errorf("assign '%s': %w", size, errBadRespSize)
+ }
+ l.respSize = v
+ return nil
+}
+
+func (l *logLine) assignReqMethod(method string) error {
+ if method == hyphen || !isReqMethodValid(method) {
+ return fmt.Errorf("assign '%s': %w", method, errBadReqMethod)
+ }
+ l.reqMethod = method
+ return nil
+}
+
+func (l *logLine) assignHierCode(code string) error {
+ if code == hyphen || !isHierCodeValid(code) {
+ return fmt.Errorf("assign '%s': %w", code, errBadHierCode)
+ }
+ l.hierCode = code
+ return nil
+}
+
+func (l *logLine) assignServerAddress(address string) error {
+ // Logged as "-" if there is no hierarchy information.
+ // For TCP HIT, TCP failures, cachemgr requests and all UDP requests, there is no hierarchy information.
+ if address == hyphen {
+ return nil
+ }
+ l.serverAddr = address
+ return nil
+}
+
+func (l *logLine) assignHierarchy(hierarchy string) error {
+ i := strings.IndexByte(hierarchy, '/')
+ if i <= 0 {
+ return fmt.Errorf("assign '%s': %w", hierarchy, errBadHierarchy)
+ }
+ if err := l.assignHierCode(hierarchy[:i]); err != nil {
+ return err
+ }
+ return l.assignServerAddress(hierarchy[i+1:])
+}
+
+func (l *logLine) assignMimeType(mime string) error {
+ // ICP exchanges usually don't have any content type, and thus are logged "-".
+ //Also, some weird replies have content types ":" or even empty ones.
+ if mime == hyphen || mime == ":" {
+ return nil
+ }
+ // format: type/subtype, type/subtype;parameter=value
+ i := strings.IndexByte(mime, '/')
+ if i <= 0 || !isMimeTypeValid(mime[:i]) {
+ return fmt.Errorf("assign '%s': %w", mime, errBadMimeType)
+ }
+ l.mimeType = mime[:i] // drop subtype
+ return nil
+}
+
+func (l logLine) verify() error {
+ if l.empty() {
+ return fmt.Errorf("verify: %w", errEmptyLine)
+ }
+ if l.hasRespTime() && !l.isRespTimeValid() {
+ return fmt.Errorf("verify '%d': %w", l.respTime, errBadRespTime)
+ }
+ if l.hasClientAddress() && !l.isClientAddressValid() {
+ return fmt.Errorf("verify '%s': %w", l.clientAddr, errBadClientAddr)
+ }
+ if l.hasCacheCode() && !l.isCacheCodeValid() {
+ return fmt.Errorf("verify '%s': %w", l.cacheCode, errBadCacheCode)
+ }
+ if l.hasHTTPCode() && !l.isHTTPCodeValid() {
+ return fmt.Errorf("verify '%d': %w", l.httpCode, errBadHTTPCode)
+ }
+ if l.hasRespSize() && !l.isRespSizeValid() {
+ return fmt.Errorf("verify '%d': %w", l.respSize, errBadRespSize)
+ }
+ if l.hasReqMethod() && !l.isReqMethodValid() {
+ return fmt.Errorf("verify '%s': %w", l.reqMethod, errBadReqMethod)
+ }
+ if l.hasHierCode() && !l.isHierCodeValid() {
+ return fmt.Errorf("verify '%s': %w", l.hierCode, errBadHierCode)
+ }
+ if l.hasServerAddress() && !l.isServerAddressValid() {
+ return fmt.Errorf("verify '%s': %w", l.serverAddr, errBadServerAddr)
+ }
+ if l.hasMimeType() && !l.isMimeTypeValid() {
+ return fmt.Errorf("verify '%s': %w", l.mimeType, errBadMimeType)
+ }
+ return nil
+}
+
+func (l logLine) empty() bool { return l == emptyLogLine }
+func (l logLine) hasRespTime() bool { return !isEmptyNumber(l.respTime) }
+func (l logLine) hasClientAddress() bool { return !isEmptyString(l.clientAddr) }
+func (l logLine) hasCacheCode() bool { return !isEmptyString(l.cacheCode) }
+func (l logLine) hasHTTPCode() bool { return !isEmptyNumber(l.httpCode) }
+func (l logLine) hasRespSize() bool { return !isEmptyNumber(l.respSize) }
+func (l logLine) hasReqMethod() bool { return !isEmptyString(l.reqMethod) }
+func (l logLine) hasHierCode() bool { return !isEmptyString(l.hierCode) }
+func (l logLine) hasServerAddress() bool { return !isEmptyString(l.serverAddr) }
+func (l logLine) hasMimeType() bool { return !isEmptyString(l.mimeType) }
+func (l logLine) isRespTimeValid() bool { return isRespTimeValid(l.respTime) }
+func (l logLine) isClientAddressValid() bool { return reAddress.MatchString(l.clientAddr) }
+func (l logLine) isCacheCodeValid() bool { return isCacheCodeValid(l.cacheCode) }
+func (l logLine) isHTTPCodeValid() bool { return isHTTPCodeValid(l.httpCode) }
+func (l logLine) isRespSizeValid() bool { return isRespSizeValid(l.respSize) }
+func (l logLine) isReqMethodValid() bool { return isReqMethodValid(l.reqMethod) }
+func (l logLine) isHierCodeValid() bool { return isHierCodeValid(l.hierCode) }
+func (l logLine) isServerAddressValid() bool { return reAddress.MatchString(l.serverAddr) }
+func (l logLine) isMimeTypeValid() bool { return isMimeTypeValid(l.mimeType) }
+
+func (l *logLine) reset() {
+ l.respTime = emptyNumber
+ l.clientAddr = emptyString
+ l.cacheCode = emptyString
+ l.httpCode = emptyNumber
+ l.respSize = emptyNumber
+ l.reqMethod = emptyString
+ l.hierCode = emptyString
+ l.serverAddr = emptyString
+ l.mimeType = emptyString
+}
+
+var emptyLogLine = *newEmptyLogLine()
+
+const (
+ emptyString = "__empty_string__"
+ emptyNumber = -9999
+)
+
+var (
+ // IPv4, IPv6, FQDN.
+ reAddress = regexp.MustCompile(`^(?:(?:[0-9]{1,3}\.){3}[0-9]{1,3}|[a-f0-9:]{3,}|[a-zA-Z0-9-.]{3,})$`)
+)
+
+func isEmptyString(s string) bool {
+ return s == emptyString || s == ""
+}
+
+func isEmptyNumber(n int) bool {
+ return n == emptyNumber
+}
+
+func isRespTimeValid(time int) bool {
+ return time >= 0
+}
+
+// isCacheCodeValid does not guarantee cache result code is valid, but it is very likely.
+func isCacheCodeValid(code string) bool {
+ // https://wiki.squid-cache.org/SquidFaq/SquidLogs#Squid_result_codes
+ if code == "NONE" {
+ return true
+ }
+ return len(code) > 5 && (code[:4] == "TCP_" || code[:4] == "UDP_")
+}
+
+func isHTTPCodeValid(code int) bool {
+ // https://wiki.squid-cache.org/SquidFaq/SquidLogs#HTTP_status_codes
+ return code == 0 || code >= 100 && code <= 603
+}
+
+func isRespSizeValid(size int) bool {
+ return size >= 0
+}
+
+func isReqMethodValid(method string) bool {
+ // https://wiki.squid-cache.org/SquidFaq/SquidLogs#Request_methods
+ switch method {
+ case "GET",
+ "HEAD",
+ "POST",
+ "PUT",
+ "PATCH",
+ "DELETE",
+ "CONNECT",
+ "OPTIONS",
+ "TRACE",
+ "ICP_QUERY",
+ "PURGE",
+ "PROPFIND",
+ "PROPATCH",
+ "MKCOL",
+ "COPY",
+ "MOVE",
+ "LOCK",
+ "UNLOCK",
+ "NONE":
+ return true
+ }
+ return false
+}
+
+// isHierCodeValid does not guarantee hierarchy code is valid, but it is very likely.
+func isHierCodeValid(code string) bool {
+ // https://wiki.squid-cache.org/SquidFaq/SquidLogs#Hierarchy_Codes
+ return len(code) > 6 && code[:5] == "HIER_"
+}
+
+// isMimeTypeValid expects only mime type part.
+func isMimeTypeValid(mimeType string) bool {
+ // https://www.iana.org/assignments/media-types/media-types.xhtml
+ if mimeType == "text" {
+ return true
+ }
+ switch mimeType {
+ case "application", "audio", "font", "image", "message", "model", "multipart", "video":
+ return true
+ }
+ return false
+}