diff options
Diffstat (limited to 'admin/halog')
-rw-r--r-- | admin/halog/README | 4 | ||||
-rw-r--r-- | admin/halog/fgets2.c | 267 | ||||
-rw-r--r-- | admin/halog/halog.c | 1910 |
3 files changed, 2181 insertions, 0 deletions
diff --git a/admin/halog/README b/admin/halog/README new file mode 100644 index 0000000..ff1bb12 --- /dev/null +++ b/admin/halog/README @@ -0,0 +1,4 @@ +This needs to be built from the top makefile, for example : + + make admin/halog/halog + diff --git a/admin/halog/fgets2.c b/admin/halog/fgets2.c new file mode 100644 index 0000000..7fbe16b --- /dev/null +++ b/admin/halog/fgets2.c @@ -0,0 +1,267 @@ +/* + * fast fgets() replacement for log parsing + * + * Copyright 2000-2012 Willy Tarreau <w@1wt.eu> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * This function manages its own buffer and returns a pointer to that buffer + * in order to avoid expensive memory copies. It also checks for line breaks + * 32 or 64 bits at a time. It could be improved a lot using mmap() but we + * would not be allowed to replace trailing \n with zeroes and we would be + * limited to small log files on 32-bit machines. + * + */ + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <unistd.h> + +#ifndef FGETS2_BUFSIZE +#define FGETS2_BUFSIZE (256*1024) +#endif + +/* memchr() is faster in glibc with SSE since commit 093ecf92998de2 */ +#if defined(__x86_64__) && defined(__GLIBC__) && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 15)) +#define USE_MEMCHR +#endif + +/* return non-zero if the integer contains at least one zero byte */ +static inline __attribute__((unused)) unsigned int has_zero32(unsigned int x) +{ + unsigned int y; + + /* Principle: we want to perform 4 tests on one 32-bit int at once. For + * this, we have to simulate an SIMD instruction which we don't have by + * default. The principle is that a zero byte is the only one which + * will cause a 1 to appear on the upper bit of a byte/word/etc... when + * we subtract 1. So we can detect a zero byte if a one appears at any + * of the bits 7, 15, 23 or 31 where it was not. It takes only one + * instruction to test for the presence of any of these bits, but it is + * still complex to check for their initial absence. Thus, we'll + * proceed differently : we first save and clear only those bits, then + * we check in the final result if one of them is present and was not. + * The order of operations below is important to save registers and + * tests. The result is used as a boolean, so the last test must apply + * on the constant so that it can efficiently be inlined. + */ +#if defined(__i386__) + /* gcc on x86 loves copying registers over and over even on code that + * simple, so let's do it by hand to prevent it from doing so :-( + */ + asm("lea -0x01010101(%0),%1\n" + "not %0\n" + "and %1,%0\n" + : "=a" (x), "=r"(y) + : "0" (x) + ); + return x & 0x80808080; +#else + y = x - 0x01010101; /* generate a carry */ + x = ~x & y; /* clear the bits that were already set */ + return x & 0x80808080; +#endif +} + +/* return non-zero if the argument contains at least one zero byte. See principle above. */ +static inline __attribute__((unused)) unsigned long long has_zero64(unsigned long long x) +{ + unsigned long long y; + + y = x - 0x0101010101010101ULL; /* generate a carry */ + y &= ~x; /* clear the bits that were already set */ + return y & 0x8080808080808080ULL; +} + +static inline __attribute__((unused)) unsigned long has_zero(unsigned long x) +{ + return (sizeof(x) == 8) ? has_zero64(x) : has_zero32(x); +} + +/* find a '\n' between <next> and <end>. Warning: may read slightly past <end>. + * If no '\n' is found, <end> is returned. + */ +static char *find_lf(char *next, char *end) +{ +#if defined USE_MEMCHR + /* some recent libc use platform-specific optimizations to provide more + * efficient byte search than below (eg: glibc 2.11 on x86_64). + */ + next = memchr(next, '\n', end - next); + if (!next) + next = end; +#else + if (sizeof(long) == 4) { /* 32-bit system */ + /* this is a speed-up, we read 32 bits at once and check for an + * LF character there. We stop if found then continue one at a + * time. + */ + while (next < end && (((unsigned long)next) & 3) && *next != '\n') + next++; + + /* Now next is multiple of 4 or equal to end. We know we can safely + * read up to 32 bytes past end if needed because they're allocated. + */ + while (next < end) { + if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A)) + break; + next += 4; + if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A)) + break; + next += 4; + if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A)) + break; + next += 4; + if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A)) + break; + next += 4; + if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A)) + break; + next += 4; + if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A)) + break; + next += 4; + if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A)) + break; + next += 4; + if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A)) + break; + next += 4; + } + } + else { /* 64-bit system */ + /* this is a speed-up, we read 64 bits at once and check for an + * LF character there. We stop if found then continue one at a + * time. + */ + if (next <= end) { + /* max 3 bytes tested here */ + while ((((unsigned long)next) & 3) && *next != '\n') + next++; + + /* maybe we have can skip 4 more bytes */ + if ((((unsigned long)next) & 4) && !has_zero32(*(unsigned int *)next ^ 0x0A0A0A0AU)) + next += 4; + } + + /* now next is multiple of 8 or equal to end */ + while (next <= (end-68)) { + if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL)) + break; + next += 8; + if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL)) + break; + next += 8; + if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL)) + break; + next += 8; + if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL)) + break; + next += 8; + if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL)) + break; + next += 8; + if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL)) + break; + next += 8; + if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL)) + break; + next += 8; + if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL)) + break; + next += 8; + } + + /* maybe we can skip 4 more bytes */ + if (!has_zero32(*(unsigned int *)next ^ 0x0A0A0A0AU)) + next += 4; + } + + /* We finish if needed : if <next> is below <end>, it means we + * found an LF in one of the 4 following bytes. + */ + while (next < end) { + if (*next == '\n') + break; + next++; + } +#endif + return next; +} + +const char *fgets2(FILE *stream) +{ + static char buffer[FGETS2_BUFSIZE + 68]; /* Note: +32 is enough on 32-bit systems */ + static char *end = buffer; + static char *line = buffer; + char *next; + int ret; + + next = line; + + while (1) { + next = find_lf(next, end); + if (next < end) { + const char *start = line; + *next = '\0'; + line = next + 1; + return start; + } + + /* we found an incomplete line. First, let's move the + * remaining part of the buffer to the beginning, then + * try to complete the buffer with a new read. We can't + * rely on <next> anymore because it went past <end>. + */ + if (line > buffer) { + if (end != line) + memmove(buffer, line, end - line); + end = buffer + (end - line); + next = end; + line = buffer; + } else { + if (end == buffer + FGETS2_BUFSIZE) + return NULL; + } + + ret = read(fileno(stream), end, buffer + FGETS2_BUFSIZE - end); + + if (ret <= 0) { + if (end == line) + return NULL; + + *end = '\0'; + end = line; /* ensure we stop next time */ + return line; + } + + end += ret; + *end = '\n'; /* make parser stop ASAP */ + /* search for '\n' again */ + } +} + +#ifdef BENCHMARK +int main() { + const char *p; + unsigned int lines = 0; + + while ((p=fgets2(stdin))) + lines++; + printf("lines=%u\n", lines); + return 0; +} +#endif diff --git a/admin/halog/halog.c b/admin/halog/halog.c new file mode 100644 index 0000000..45eec75 --- /dev/null +++ b/admin/halog/halog.c @@ -0,0 +1,1910 @@ +/* + * haproxy log statistics reporter + * + * Copyright 2000-2012 Willy Tarreau <w@1wt.eu> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <syslog.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> +#include <time.h> + +#include <haproxy/compiler.h> + +#include <import/eb32tree.h> +#include <import/eb64tree.h> +#include <import/ebistree.h> +#include <import/ebsttree.h> + +#define SOURCE_FIELD 5 +#define ACCEPT_FIELD 6 +#define SERVER_FIELD 8 +#define TIME_FIELD 9 +#define STATUS_FIELD 10 +#define BYTES_SENT_FIELD 11 +#define TERM_CODES_FIELD 14 +#define CONN_FIELD 15 +#define QUEUE_LEN_FIELD 16 +#define METH_FIELD 17 +#define URL_FIELD 18 +#define MAXLINE 16384 +#define QBITS 4 + +#define SEP(c) ((unsigned char)(c) <= ' ') +#define SKIP_CHAR(p,c) do { while (1) { int __c = (unsigned char)*p++; if (__c == c) break; if (__c <= ' ') { p--; break; } } } while (0) + +/* [0] = err/date, [1] = req, [2] = conn, [3] = resp, [4] = data */ +static struct eb_root timers[5] = { + EB_ROOT_UNIQUE, EB_ROOT_UNIQUE, EB_ROOT_UNIQUE, + EB_ROOT_UNIQUE, EB_ROOT_UNIQUE, +}; + +struct timer { + struct eb32_node node; + unsigned int count; +}; + +struct srv_st { + unsigned int st_cnt[6]; /* 0xx to 5xx */ + unsigned int nb_ct, nb_rt, nb_ok; + unsigned long long cum_ct, cum_rt; + struct ebmb_node node; + /* don't put anything else here, the server name will be there */ +}; + +struct url_stat { + union { + struct ebpt_node url; + struct eb64_node val; + } node; + char *url; + unsigned long long total_time; /* sum(all reqs' times) */ + unsigned long long total_time_ok; /* sum(all OK reqs' times) */ + unsigned long long total_bytes_sent; /* sum(all bytes sent) */ + unsigned int nb_err, nb_req; +}; + +#define FILT_COUNT_ONLY 0x01 +#define FILT_INVERT 0x02 +#define FILT_QUIET 0x04 +#define FILT_ERRORS_ONLY 0x08 +#define FILT_ACC_DELAY 0x10 +#define FILT_ACC_COUNT 0x20 +#define FILT_GRAPH_TIMERS 0x40 +#define FILT_PERCENTILE 0x80 +#define FILT_TIME_RESP 0x100 + +#define FILT_INVERT_ERRORS 0x200 +#define FILT_INVERT_TIME_RESP 0x400 + +#define FILT_COUNT_STATUS 0x800 +#define FILT_COUNT_SRV_STATUS 0x1000 +#define FILT_COUNT_TERM_CODES 0x2000 + +#define FILT_COUNT_URL_ONLY 0x004000 +#define FILT_COUNT_URL_COUNT 0x008000 +#define FILT_COUNT_URL_ERR 0x010000 +#define FILT_COUNT_URL_TTOT 0x020000 +#define FILT_COUNT_URL_TAVG 0x040000 +#define FILT_COUNT_URL_TTOTO 0x080000 +#define FILT_COUNT_URL_TAVGO 0x100000 + +#define FILT_HTTP_ONLY 0x200000 +#define FILT_TERM_CODE_NAME 0x400000 +#define FILT_INVERT_TERM_CODE_NAME 0x800000 + +#define FILT_HTTP_STATUS 0x1000000 +#define FILT_INVERT_HTTP_STATUS 0x2000000 +#define FILT_QUEUE_ONLY 0x4000000 +#define FILT_QUEUE_SRV_ONLY 0x8000000 + +#define FILT_COUNT_URL_BAVG 0x10000000 +#define FILT_COUNT_URL_BTOT 0x20000000 + +#define FILT_COUNT_URL_ANY (FILT_COUNT_URL_ONLY|FILT_COUNT_URL_COUNT|FILT_COUNT_URL_ERR| \ + FILT_COUNT_URL_TTOT|FILT_COUNT_URL_TAVG|FILT_COUNT_URL_TTOTO|FILT_COUNT_URL_TAVGO| \ + FILT_COUNT_URL_BAVG|FILT_COUNT_URL_BTOT) + +#define FILT_COUNT_COOK_CODES 0x40000000 +#define FILT_COUNT_IP_COUNT 0x80000000 + +#define FILT2_TIMESTAMP 0x01 +#define FILT2_PRESERVE_QUERY 0x02 +#define FILT2_EXTRACT_CAPTURE 0x04 + +unsigned int filter = 0; +unsigned int filter2 = 0; +unsigned int filter_invert = 0; +const char *line; +int linenum = 0; +int parse_err = 0; +int lines_out = 0; +int lines_max = -1; + +const char *fgets2(FILE *stream); + +void filter_count_url(const char *accept_field, const char *time_field, struct timer **tptr); +void filter_count_ip(const char *source_field, const char *accept_field, const char *time_field, struct timer **tptr); +void filter_count_srv_status(const char *accept_field, const char *time_field, struct timer **tptr); +void filter_count_cook_codes(const char *accept_field, const char *time_field, struct timer **tptr); +void filter_count_term_codes(const char *accept_field, const char *time_field, struct timer **tptr); +void filter_count_status(const char *accept_field, const char *time_field, struct timer **tptr); +void filter_graphs(const char *accept_field, const char *time_field, struct timer **tptr); +void filter_output_line(const char *accept_field, const char *time_field, struct timer **tptr); +void filter_extract_capture(const char *accept_field, const char *time_field, unsigned int, unsigned int); +void filter_accept_holes(const char *accept_field, const char *time_field, struct timer **tptr); + +void usage(FILE *output, const char *msg) +{ + fprintf(output, + "%s" + "Usage:\n" + " halog [-h|--help] for long help\n" + " halog [input_filters]* [modifiers]* [output_format] < log\n" + " inp = [-e|-E] [-H] [-Q|-QS] [-rt|-RT <time>] [-ad <delay>] [-ac <count>]\n" + " [-hs|-HS [min][:[max]]] [-tcn|-TCN <termcode>] [-time [min][:[max]]]\n" + " mod = [-q] [-v] [-m <lines>] [-s <skipflds>] [-query]\n" + " out = {-c|-u|-uc|-ue|-ua|-ut|-uao|-uto|-uba|-ubt|-hdr <block>:<field>|\n" + " -cc|-gt|-pct|-st|-tc|-srv|-ic}\n" + "\n", + msg ? msg : "" + ); +} + +void die(const char *msg) +{ + usage(stderr, msg); + exit(1); +} + +void help() +{ + usage(stdout, NULL); + printf( + "Input filters - several filters may be combined\n" + " -H only match lines containing HTTP logs (ignore TCP)\n" + " -E only match lines without any error (no 5xx status)\n" + " -e only match lines with errors (status 5xx or negative)\n" + " -rt|-RT <time> only match response times larger|smaller than <time>\n" + " -Q|-QS only match queued requests (any queue|server queue)\n" + " -tcn|-TCN <code> only match requests with/without termination code <code>\n" + " -hs|-HS <[min][:][max]> only match requests with HTTP status codes within/not\n" + " within min..max. Any of them may be omitted. Exact\n" + " code is checked for if no ':' is specified.\n" + " -time <[min][:max]> only match requests recorded between timestamps.\n" + " Any of them may be omitted.\n" + "Modifiers\n" + " -v invert the input filtering condition\n" + " -q don't report errors/warnings\n" + " -m <lines> limit output to the first <lines> lines\n" + " -s <skip_n_fields> skip n fields from the beginning of a line (default %d)\n" + " you can also use -n to start from earlier then field %d\n" + " -query preserve the query string for per-URL (-u*) statistics\n" + "\n" + "Output format - only one may be used at a time\n" + " -c only report the number of lines that would have been printed\n" + " -pct output connect and response times percentiles\n" + " -st output number of requests per HTTP status code\n" + " -cc output number of requests per cookie code (2 chars)\n" + " -tc output number of requests per termination code (2 chars)\n" + " -srv output statistics per server (time, requests, errors)\n" + " -ic output statistics per ip count (time, requests, errors)\n" + " -u* output statistics per URL (time, requests, errors)\n" + " Additional characters indicate the output sorting key :\n" + " -u : by URL, -uc : request count, -ue : error count\n" + " -ua : average response time, -ut : average total time\n" + " -uao, -uto: average times computed on valid ('OK') requests\n" + " -uba, -ubt: average bytes returned, total bytes returned\n" + " -hdr output captured header at the given <block>:<field>\n", + (int)SOURCE_FIELD, (int)SOURCE_FIELD + ); + exit(0); +} + + +/* return pointer to first char not part of current field starting at <p>. */ + +#if defined(__i386__) +/* this one is always faster on 32-bits */ +static inline const char *field_stop(const char *p) +{ + asm( + /* Look for spaces */ + "4: \n\t" + "inc %0 \n\t" + "cmpb $0x20, -1(%0) \n\t" + "ja 4b \n\t" + "jz 3f \n\t" + + /* we only get there for control chars 0..31. Leave if we find '\0' */ + "cmpb $0x0, -1(%0) \n\t" + "jnz 4b \n\t" + + /* return %0-1 = position of the last char we checked */ + "3: \n\t" + "dec %0 \n\t" + : "=r" (p) + : "0" (p) + ); + return p; +} +#else +const char *field_stop(const char *p) +{ + unsigned char c; + + while (1) { + c = *(p++); + if (c > ' ') + continue; + if (c == ' ' || c == 0) + break; + } + return p - 1; +} +#endif + +/* return non-zero if the argument contains at least one zero byte. See principle above. */ +static inline __attribute__((unused)) unsigned long long has_zero64(unsigned long long x) +{ + unsigned long long y; + + y = x - 0x0101010101010101ULL; /* generate a carry */ + y &= ~x; /* clear the bits that were already set */ + return y & 0x8080808080808080ULL; +} + +/* return field <field> (starting from 1) in string <p>. Only consider + * contiguous spaces (or tabs) as one delimiter. May return pointer to + * last char if field is not found. Equivalent to awk '{print $field}'. + */ +const char *field_start(const char *p, int field) +{ +#ifndef PREFER_ASM + unsigned char c; + while (1) { + /* skip spaces */ + while (1) { + c = *(p++); + if (!c) /* end of line */ + return p-1; + if (c == ' ') + continue; + /* other char => new field */ + break; + } + + /* start of field */ + field--; + if (!field) + return p-1; + + /* skip this field */ + while (1) { +#if defined(HA_UNALIGNED_LE64) + unsigned long long l = *(unsigned long long *)p; + if (!has_zero64(l)) { + l ^= 0x2020202020202020; + l = has_zero64(l); + if (!l) { + p += 8; + continue; + } + /* there is at least one space, find it and + * skip it now. The lowest byte in <l> with + * a 0x80 is the right one, but checking for + * it remains slower than testing each byte, + * probably due to the numerous short fields. + */ + while (*(p++) != ' ') + ; + break; + } +#endif + c = *(p++); + if (c == '\0') + return p - 1; + if (c == ' ') + break; + } + } +#else + /* This version works optimally on i386 and x86_64 but the code above + * shows similar performance. However, depending on the version of GCC + * used, inlining rules change and it may have difficulties to make + * efficient use of this code at other locations and could result in + * worse performance (eg: gcc 4.4). You may want to experience. + */ + asm( + /* skip spaces */ + "1: \n\t" + "inc %0 \n\t" + "cmpb $0x20, -1(%0) \n\t" + "ja 2f \n\t" + "jz 1b \n\t" + + /* we only get there for control chars 0..31. Leave if we find '\0' */ + "cmpb $0x0, -1(%0) \n\t" + "jz 3f \n\t" + + /* start of field at [%0-1]. Check if we need to skip more fields */ + "2: \n\t" + "dec %1 \n\t" + "jz 3f \n\t" + + /* Look for spaces */ + "4: \n\t" + "inc %0 \n\t" + "cmpb $0x20, -1(%0) \n\t" + "jz 1b \n\t" + "ja 4b \n\t" + + /* we only get there for control chars 0..31. Leave if we find '\0' */ + "cmpb $0x0, -1(%0) \n\t" + "jnz 4b \n\t" + + /* return %0-1 = position of the last char we checked */ + "3: \n\t" + "dec %0 \n\t" + : "=r" (p) + : "r" (field), "0" (p) + ); + return p; +#endif +} + +/* keep only the <bits> higher bits of <i> */ +static inline unsigned int quantify_u32(unsigned int i, int bits) +{ + int high; + + if (!bits) + return 0; + + if (i) + high = fls_auto(i); // 1 to 32 + else + high = 0; + + if (high <= bits) + return i; + + return i & ~((1 << (high - bits)) - 1); +} + +/* keep only the <bits> higher bits of the absolute value of <i>, as well as + * its sign. */ +static inline int quantify(int i, int bits) +{ + if (i >= 0) + return quantify_u32(i, bits); + else + return -quantify_u32(-i, bits); +} + +/* Insert timer value <v> into tree <r>. A pre-allocated node must be passed + * in <alloc>. It may be NULL, in which case the function will allocate it + * itself. It will be reset to NULL once consumed. The caller is responsible + * for freeing the node once not used anymore. The node where the value was + * inserted is returned. + */ +struct timer *insert_timer(struct eb_root *r, struct timer **alloc, int v) +{ + struct timer *t = *alloc; + struct eb32_node *n; + + if (!t) { + t = calloc(sizeof(*t), 1); + if (unlikely(!t)) { + fprintf(stderr, "%s: not enough memory\n", __FUNCTION__); + exit(1); + } + } + t->node.key = quantify(v, QBITS); // keep only the higher QBITS bits + + n = eb32i_insert(r, &t->node); + if (n == &t->node) + t = NULL; /* node inserted, will malloc next time */ + + *alloc = t; + return container_of(n, struct timer, node); +} + +/* Insert value value <v> into tree <r>. A pre-allocated node must be passed + * in <alloc>. It may be NULL, in which case the function will allocate it + * itself. It will be reset to NULL once consumed. The caller is responsible + * for freeing the node once not used anymore. The node where the value was + * inserted is returned. + */ +struct timer *insert_value(struct eb_root *r, struct timer **alloc, int v) +{ + struct timer *t = *alloc; + struct eb32_node *n; + + if (!t) { + t = calloc(sizeof(*t), 1); + if (unlikely(!t)) { + fprintf(stderr, "%s: not enough memory\n", __FUNCTION__); + exit(1); + } + } + t->node.key = v; + + n = eb32i_insert(r, &t->node); + if (n == &t->node) + t = NULL; /* node inserted, will malloc next time */ + + *alloc = t; + return container_of(n, struct timer, node); +} + +int str2ic(const char *s) +{ + int i = 0; + int j, k; + + if (*s != '-') { + /* positive number */ + while (1) { + j = (*s++) - '0'; + k = i * 10; + if ((unsigned)j > 9) + break; + i = k + j; + } + } else { + /* negative number */ + s++; + while (1) { + j = (*s++) - '0'; + k = i * 10; + if ((unsigned)j > 9) + break; + i = k - j; + } + } + + return i; +} + + +/* Convert "[04/Dec/2008:09:49:40.555]" to an integer equivalent to the time of + * the day in milliseconds. It returns -1 for all unparsable values. The parser + * looks ugly but gcc emits far better code that way. + */ +int convert_date(const char *field) +{ + unsigned int h, m, s, ms; + unsigned char c; + const char *e; + + h = m = s = ms = 0; + e = field; + + /* skip the date */ + while (1) { + c = *(e++); + if (c == ':') + break; + if (!c) + goto out_err; + } + + /* hour + ':' */ + while (1) { + c = *(e++) - '0'; + if (c > 9) + break; + h = h * 10 + c; + } + if (c == (unsigned char)(0 - '0')) + goto out_err; + + /* minute + ':' */ + while (1) { + c = *(e++) - '0'; + if (c > 9) + break; + m = m * 10 + c; + } + if (c == (unsigned char)(0 - '0')) + goto out_err; + + /* second + '.' or ']' */ + while (1) { + c = *(e++) - '0'; + if (c > 9) + break; + s = s * 10 + c; + } + if (c == (unsigned char)(0 - '0')) + goto out_err; + + /* if there's a '.', we have milliseconds */ + if (c == (unsigned char)('.' - '0')) { + /* millisecond second + ']' */ + while (1) { + c = *(e++) - '0'; + if (c > 9) + break; + ms = ms * 10 + c; + } + if (c == (unsigned char)(0 - '0')) + goto out_err; + } + return (((h * 60) + m) * 60 + s) * 1000 + ms; + out_err: + return -1; +} + +/* Convert "[04/Dec/2008:09:49:40.555]" to an unix timestamp. + * It returns -1 for all unparsable values. The parser + * looks ugly but gcc emits far better code that way. + */ +int convert_date_to_timestamp(const char *field) +{ + unsigned int d, mo, y, h, m, s; + unsigned char c; + const char *e; + time_t rawtime; + static struct tm * timeinfo; + static int last_res; + + d = mo = y = h = m = s = 0; + e = field; + + e++; // remove '[' + + /* day + '/' */ + while (1) { + c = *(e++) - '0'; + if (c > 9) + break; + d = d * 10 + c; + if (c == (unsigned char)(0 - '0')) + goto out_err; + } + + /* month + '/' */ + c = *(e++); + if (c =='F') { + mo = 2; + e = e+3; + } else if (c =='S') { + mo = 9; + e = e+3; + } else if (c =='O') { + mo = 10; + e = e+3; + } else if (c =='N') { + mo = 11; + e = e+3; + } else if (c == 'D') { + mo = 12; + e = e+3; + } else if (c == 'A') { + c = *(e++); + if (c == 'p') { + mo = 4; + e = e+2; + } else if (c == 'u') { + mo = 8; + e = e+2; + } else + goto out_err; + } else if (c == 'J') { + c = *(e++); + if (c == 'a') { + mo = 1; + e = e+2; + } else if (c == 'u') { + c = *(e++); + if (c == 'n') { + mo = 6; + e = e+1; + } else if (c == 'l') { + mo = 7; + e++; + } + } else + goto out_err; + } else if (c == 'M') { + e++; + c = *(e++); + if (c == 'r') { + mo = 3; + e = e+1; + } else if (c == 'y') { + mo = 5; + e = e+1; + } else + goto out_err; + } else + goto out_err; + + /* year + ':' */ + while (1) { + c = *(e++) - '0'; + if (c > 9) + break; + y = y * 10 + c; + if (c == (unsigned char)(0 - '0')) + goto out_err; + } + + /* hour + ':' */ + while (1) { + c = *(e++) - '0'; + if (c > 9) + break; + h = h * 10 + c; + } + if (c == (unsigned char)(0 - '0')) + goto out_err; + + /* minute + ':' */ + while (1) { + c = *(e++) - '0'; + if (c > 9) + break; + m = m * 10 + c; + } + if (c == (unsigned char)(0 - '0')) + goto out_err; + + /* second + '.' or ']' */ + while (1) { + c = *(e++) - '0'; + if (c > 9) + break; + s = s * 10 + c; + } + + if (likely(timeinfo)) { + if ((unsigned)timeinfo->tm_min == m && + (unsigned)timeinfo->tm_hour == h && + (unsigned)timeinfo->tm_mday == d && + (unsigned)timeinfo->tm_mon == mo - 1 && + (unsigned)timeinfo->tm_year == y - 1900) + return last_res + s; + } + else { + time(&rawtime); + timeinfo = localtime(&rawtime); + } + + timeinfo->tm_sec = 0; + timeinfo->tm_min = m; + timeinfo->tm_hour = h; + timeinfo->tm_mday = d; + timeinfo->tm_mon = mo - 1; + timeinfo->tm_year = y - 1900; + last_res = mktime(timeinfo); + + return last_res + s; + out_err: + return -1; +} + +void truncated_line(int linenum, const char *line) +{ + if (!(filter & FILT_QUIET)) + fprintf(stderr, "Truncated line %d: %s\n", linenum, line); +} + +int main(int argc, char **argv) +{ + const char *b, *p, *time_field, *accept_field, *source_field; + const char *filter_term_code_name = NULL; + const char *output_file = NULL; + int f, last; + struct timer *t = NULL; + struct eb32_node *n; + struct url_stat *ustat = NULL; + int val, test; + unsigned int uval; + unsigned int filter_acc_delay = 0, filter_acc_count = 0; + int filter_time_resp = 0; + int filt_http_status_low = 0, filt_http_status_high = 0; + unsigned int filt2_timestamp_low = 0, filt2_timestamp_high = 0; + unsigned int filt2_capture_block = 0, filt2_capture_field = 0; + int skip_fields = 1; + + void (*line_filter)(const char *accept_field, const char *time_field, struct timer **tptr) = NULL; + + argc--; argv++; + while (argc > 0) { + if (*argv[0] != '-') + break; + + if (strcmp(argv[0], "-ad") == 0) { + if (argc < 2) die("missing option for -ad\n"); + argc--; argv++; + filter |= FILT_ACC_DELAY; + filter_acc_delay = atol(*argv); + } + else if (strcmp(argv[0], "-ac") == 0) { + if (argc < 2) die("missing option for -ac\n"); + argc--; argv++; + filter |= FILT_ACC_COUNT; + filter_acc_count = atol(*argv); + } + else if (strcmp(argv[0], "-rt") == 0) { + if (argc < 2) die("missing option for -rt\n"); + argc--; argv++; + filter |= FILT_TIME_RESP; + filter_time_resp = atol(*argv); + } + else if (strcmp(argv[0], "-RT") == 0) { + if (argc < 2) die("missing option for -RT\n"); + argc--; argv++; + filter |= FILT_TIME_RESP | FILT_INVERT_TIME_RESP; + filter_time_resp = atol(*argv); + } + else if (strcmp(argv[0], "-s") == 0) { + if (argc < 2) die("missing option for -s\n"); + argc--; argv++; + skip_fields = atol(*argv); + } + else if (strcmp(argv[0], "-m") == 0) { + if (argc < 2) die("missing option for -m\n"); + argc--; argv++; + lines_max = atol(*argv); + } + else if (strcmp(argv[0], "-e") == 0) + filter |= FILT_ERRORS_ONLY; + else if (strcmp(argv[0], "-E") == 0) + filter |= FILT_ERRORS_ONLY | FILT_INVERT_ERRORS; + else if (strcmp(argv[0], "-H") == 0) + filter |= FILT_HTTP_ONLY; + else if (strcmp(argv[0], "-Q") == 0) + filter |= FILT_QUEUE_ONLY; + else if (strcmp(argv[0], "-QS") == 0) + filter |= FILT_QUEUE_SRV_ONLY; + else if (strcmp(argv[0], "-c") == 0) + filter |= FILT_COUNT_ONLY; + else if (strcmp(argv[0], "-q") == 0) + filter |= FILT_QUIET; + else if (strcmp(argv[0], "-v") == 0) + filter_invert = !filter_invert; + else if (strcmp(argv[0], "-gt") == 0) + filter |= FILT_GRAPH_TIMERS; + else if (strcmp(argv[0], "-pct") == 0) + filter |= FILT_PERCENTILE; + else if (strcmp(argv[0], "-st") == 0) + filter |= FILT_COUNT_STATUS; + else if (strcmp(argv[0], "-srv") == 0) + filter |= FILT_COUNT_SRV_STATUS; + else if (strcmp(argv[0], "-cc") == 0) + filter |= FILT_COUNT_COOK_CODES; + else if (strcmp(argv[0], "-tc") == 0) + filter |= FILT_COUNT_TERM_CODES; + else if (strcmp(argv[0], "-tcn") == 0) { + if (argc < 2) die("missing option for -tcn\n"); + argc--; argv++; + filter |= FILT_TERM_CODE_NAME; + filter_term_code_name = *argv; + } + else if (strcmp(argv[0], "-TCN") == 0) { + if (argc < 2) die("missing option for -TCN\n"); + argc--; argv++; + filter |= FILT_TERM_CODE_NAME | FILT_INVERT_TERM_CODE_NAME; + filter_term_code_name = *argv; + } + else if (strcmp(argv[0], "-hs") == 0 || strcmp(argv[0], "-HS") == 0) { + char *sep, *str; + + if (argc < 2) die("missing option for -hs/-HS ([min]:[max])\n"); + filter |= FILT_HTTP_STATUS; + if (argv[0][1] == 'H') + filter |= FILT_INVERT_HTTP_STATUS; + + argc--; argv++; + str = *argv; + sep = strchr(str, ':'); /* [min]:[max] */ + if (!sep) + sep = str; /* make max point to min */ + else + *sep++ = 0; + filt_http_status_low = *str ? atol(str) : 0; + filt_http_status_high = *sep ? atol(sep) : 65535; + } + else if (strcmp(argv[0], "-time") == 0) { + char *sep, *str; + + if (argc < 2) die("missing option for -time ([min]:[max])\n"); + filter2 |= FILT2_TIMESTAMP; + + argc--; argv++; + str = *argv; + sep = strchr(str, ':'); /* [min]:[max] */ + filt2_timestamp_low = *str ? atol(str) : 0; + if (!sep) + filt2_timestamp_high = 0xFFFFFFFF; + else + filt2_timestamp_high = atol(++sep); + } + else if (strcmp(argv[0], "-u") == 0) + filter |= FILT_COUNT_URL_ONLY; + else if (strcmp(argv[0], "-uc") == 0) + filter |= FILT_COUNT_URL_COUNT; + else if (strcmp(argv[0], "-ue") == 0) + filter |= FILT_COUNT_URL_ERR; + else if (strcmp(argv[0], "-ua") == 0) + filter |= FILT_COUNT_URL_TAVG; + else if (strcmp(argv[0], "-ut") == 0) + filter |= FILT_COUNT_URL_TTOT; + else if (strcmp(argv[0], "-uao") == 0) + filter |= FILT_COUNT_URL_TAVGO; + else if (strcmp(argv[0], "-uto") == 0) + filter |= FILT_COUNT_URL_TTOTO; + else if (strcmp(argv[0], "-uba") == 0) + filter |= FILT_COUNT_URL_BAVG; + else if (strcmp(argv[0], "-ubt") == 0) + filter |= FILT_COUNT_URL_BTOT; + else if (strcmp(argv[0], "-query") == 0) + filter2 |= FILT2_PRESERVE_QUERY; + else if (strcmp(argv[0], "-ic") == 0) + filter |= FILT_COUNT_IP_COUNT; + else if (strcmp(argv[0], "-hdr") == 0) { + char *sep, *str; + + if (argc < 2) die("missing option for -hdr (<block>:<field>)\n"); + filter2 |= FILT2_EXTRACT_CAPTURE; + + argc--; argv++; + str = *argv; + sep = strchr(str, ':'); + if (!sep) + die("missing colon in -hdr (<block>:<field>)\n"); + else + *sep++ = 0; + + filt2_capture_block = *str ? atol(str) : 1; + filt2_capture_field = *sep ? atol(sep) : 1; + + if (filt2_capture_block < 1 || filt2_capture_field < 1) + die("block and field must be at least 1 for -hdr (<block>:<field>)\n"); + } + else if (strcmp(argv[0], "-o") == 0) { + if (output_file) + die("Fatal: output file name already specified.\n"); + if (argc < 2) + die("Fatal: missing output file name.\n"); + output_file = argv[1]; + } + else if (strcmp(argv[0], "-h") == 0 || strcmp(argv[0], "--help") == 0) + help(); + argc--; + argv++; + } + + if (!filter && !filter2) + die("No action specified.\n"); + + if (filter & FILT_ACC_COUNT && !filter_acc_count) + filter_acc_count=1; + + if (filter & FILT_ACC_DELAY && !filter_acc_delay) + filter_acc_delay = 1; + + + /* by default, all lines are printed */ + line_filter = filter_output_line; + if (filter & (FILT_ACC_COUNT|FILT_ACC_DELAY)) + line_filter = filter_accept_holes; + else if (filter & (FILT_GRAPH_TIMERS|FILT_PERCENTILE)) + line_filter = filter_graphs; + else if (filter & FILT_COUNT_STATUS) + line_filter = filter_count_status; + else if (filter & FILT_COUNT_COOK_CODES) + line_filter = filter_count_cook_codes; + else if (filter & FILT_COUNT_TERM_CODES) + line_filter = filter_count_term_codes; + else if (filter & FILT_COUNT_SRV_STATUS) + line_filter = filter_count_srv_status; + else if (filter & FILT_COUNT_URL_ANY) + line_filter = filter_count_url; + else if (filter & FILT_COUNT_ONLY) + line_filter = NULL; + +#if defined(POSIX_FADV_SEQUENTIAL) + /* around 20% performance improvement is observed on Linux with this + * on cold-cache. Surprisingly, WILLNEED is less performant. Don't + * use NOREUSE as it flushes the cache and prevents easy data + * manipulation on logs! + */ + posix_fadvise(0, 0, 0, POSIX_FADV_SEQUENTIAL); +#endif + + if (!line_filter && /* FILT_COUNT_ONLY ( see above), and no input filter (see below) */ + !(filter & (FILT_HTTP_ONLY|FILT_TIME_RESP|FILT_ERRORS_ONLY|FILT_HTTP_STATUS|FILT_QUEUE_ONLY|FILT_QUEUE_SRV_ONLY|FILT_TERM_CODE_NAME)) && + !(filter2 & (FILT2_TIMESTAMP))) { + /* read the whole file at once first, ignore it if inverted output */ + if (!filter_invert) + while ((lines_max < 0 || lines_out < lines_max) && fgets2(stdin) != NULL) + lines_out++; + + goto skip_filters; + } + + while ((line = fgets2(stdin)) != NULL) { + linenum++; + time_field = NULL; accept_field = NULL; + source_field = NULL; + + test = 1; + + /* for any line we process, we first ensure that there is a field + * looking like the accept date field (beginning with a '['). + */ + if (filter & FILT_COUNT_IP_COUNT) { + /* we need the IP first */ + source_field = field_start(line, SOURCE_FIELD + skip_fields); + accept_field = field_start(source_field, ACCEPT_FIELD - SOURCE_FIELD + 1); + } + else + accept_field = field_start(line, ACCEPT_FIELD + skip_fields); + + if (unlikely(*accept_field != '[')) { + parse_err++; + continue; + } + + /* the day of month field is begin 01 and 31 */ + if (accept_field[1] < '0' || accept_field[1] > '3') { + parse_err++; + continue; + } + + if (filter2 & FILT2_TIMESTAMP) { + uval = convert_date_to_timestamp(accept_field); + test &= (uval>=filt2_timestamp_low && uval<=filt2_timestamp_high) ; + } + + if (filter & FILT_HTTP_ONLY) { + /* only report lines with at least 4 timers */ + if (!time_field) { + time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1); + if (unlikely(!*time_field)) { + truncated_line(linenum, line); + continue; + } + } + + field_stop(time_field + 1); + /* we have field TIME_FIELD in [time_field]..[e-1] */ + p = time_field; + f = 0; + while (!SEP(*p)) { + if (++f == 4) + break; + SKIP_CHAR(p, '/'); + } + test &= (f >= 4); + } + + if (filter & FILT_TIME_RESP) { + int tps; + + /* only report lines with response times larger than filter_time_resp */ + if (!time_field) { + time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1); + if (unlikely(!*time_field)) { + truncated_line(linenum, line); + continue; + } + } + + field_stop(time_field + 1); + /* we have field TIME_FIELD in [time_field]..[e-1], let's check only the response time */ + + p = time_field; + f = 0; + while (!SEP(*p)) { + tps = str2ic(p); + if (tps < 0) { + tps = -1; + } + if (++f == 4) + break; + SKIP_CHAR(p, '/'); + } + + if (unlikely(f < 4)) { + parse_err++; + continue; + } + + test &= (tps >= filter_time_resp) ^ !!(filter & FILT_INVERT_TIME_RESP); + } + + if (filter & (FILT_ERRORS_ONLY | FILT_HTTP_STATUS)) { + /* Check both error codes (-1, 5xx) and status code ranges */ + if (time_field) + b = field_start(time_field, STATUS_FIELD - TIME_FIELD + 1); + else + b = field_start(accept_field, STATUS_FIELD - ACCEPT_FIELD + 1); + + if (unlikely(!*b)) { + truncated_line(linenum, line); + continue; + } + + val = str2ic(b); + if (filter & FILT_ERRORS_ONLY) + test &= (val < 0 || (val >= 500 && val <= 599)) ^ !!(filter & FILT_INVERT_ERRORS); + + if (filter & FILT_HTTP_STATUS) + test &= (val >= filt_http_status_low && val <= filt_http_status_high) ^ !!(filter & FILT_INVERT_HTTP_STATUS); + } + + if (filter & (FILT_QUEUE_ONLY|FILT_QUEUE_SRV_ONLY)) { + /* Check if the server's queue is non-nul */ + if (time_field) + b = field_start(time_field, QUEUE_LEN_FIELD - TIME_FIELD + 1); + else + b = field_start(accept_field, QUEUE_LEN_FIELD - ACCEPT_FIELD + 1); + + if (unlikely(!*b)) { + truncated_line(linenum, line); + continue; + } + + if (*b == '0') { + if (filter & FILT_QUEUE_SRV_ONLY) { + test = 0; + } + else { + do { + b++; + if (*b == '/') { + b++; + break; + } + } while (*b); + test &= ((unsigned char)(*b - '1') < 9); + } + } + } + + if (filter & FILT_TERM_CODE_NAME) { + /* only report corresponding termination code name */ + if (time_field) + b = field_start(time_field, TERM_CODES_FIELD - TIME_FIELD + 1); + else + b = field_start(accept_field, TERM_CODES_FIELD - ACCEPT_FIELD + 1); + + if (unlikely(!*b)) { + truncated_line(linenum, line); + continue; + } + + test &= (b[0] == filter_term_code_name[0] && b[1] == filter_term_code_name[1]) ^ !!(filter & FILT_INVERT_TERM_CODE_NAME); + } + + + test ^= filter_invert; + if (!test) + continue; + + /************** here we process inputs *******************/ + + if (line_filter) { + if (filter & FILT_COUNT_IP_COUNT) + filter_count_ip(source_field, accept_field, time_field, &t); + else if (filter2 & FILT2_EXTRACT_CAPTURE) + filter_extract_capture(accept_field, time_field, filt2_capture_block, filt2_capture_field); + else + line_filter(accept_field, time_field, &t); + } + else + lines_out++; /* FILT_COUNT_ONLY was used, so we're just counting lines */ + if (lines_max >= 0 && lines_out >= lines_max) + break; + } + + skip_filters: + /***************************************************** + * Here we've finished reading all input. Depending on the + * filters, we may still have some analysis to run on the + * collected data and to output data in a new format. + *************************************************** */ + + if (t) + free(t); + + if (filter & FILT_COUNT_ONLY) { + printf("%d\n", lines_out); + exit(0); + } + + if (filter & (FILT_ACC_COUNT|FILT_ACC_DELAY)) { + /* sort and count all timers. Output will look like this : + * <accept_date> <delta_ms from previous one> <nb entries> + */ + n = eb32_first(&timers[0]); + + if (n) + last = n->key; + while (n) { + unsigned int d, h, m, s, ms; + + t = container_of(n, struct timer, node); + h = n->key; + d = h - last; + last = h; + + if (d >= filter_acc_delay && t->count >= filter_acc_count) { + ms = h % 1000; h = h / 1000; + s = h % 60; h = h / 60; + m = h % 60; h = h / 60; + printf("%02u:%02u:%02u.%03u %d %u %u\n", h, m, s, ms, last, d, t->count); + lines_out++; + if (lines_max >= 0 && lines_out >= lines_max) + break; + } + n = eb32_next(n); + } + } + else if (filter & FILT_GRAPH_TIMERS) { + /* sort all timers */ + for (f = 0; f < 5; f++) { + struct eb32_node *n; + + n = eb32_first(&timers[f]); + while (n) { + int i; + double d; + int val; + + t = container_of(n, struct timer, node); + last = n->key; + val = t->count; + + i = (last < 0) ? -last : last; + i = fls_auto(i) - QBITS; + + if (i > 0) + d = val / (double)(1 << i); + else + d = val; + + if (d > 0.0) + printf("%d %d %f\n", f, last, d+1.0); + + n = eb32_next(n); + } + } + } + else if (filter & FILT_PERCENTILE) { + /* report timers by percentile : + * <percent> <total> <max_req_time> <max_conn_time> <max_resp_time> <max_data_time> + * We don't count errs. + */ + struct eb32_node *n[5]; + unsigned long cum[5]; + double step; + + if (!lines_out) + goto empty; + + for (f = 1; f < 5; f++) { + n[f] = eb32_first(&timers[f]); + cum[f] = container_of(n[f], struct timer, node)->count; + } + + for (step = 1; step <= 1000;) { + unsigned int thres = lines_out * (step / 1000.0); + + printf("%3.1f %u ", step/10.0, thres); + for (f = 1; f < 5; f++) { + struct eb32_node *next; + while (cum[f] < thres) { + /* need to find other keys */ + next = eb32_next(n[f]); + if (!next) + break; + n[f] = next; + cum[f] += container_of(next, struct timer, node)->count; + } + + /* value still within $step % of total */ + printf("%d ", n[f]->key); + } + putchar('\n'); + if (step >= 100 && step < 900) + step += 50; // jump 5% by 5% between those steps. + else if (step >= 20 && step < 980) + step += 10; + else + step += 1; + } + } + else if (filter & FILT_COUNT_STATUS) { + /* output all statuses in the form of <status> <occurrences> */ + n = eb32_first(&timers[0]); + while (n) { + t = container_of(n, struct timer, node); + printf("%d %u\n", n->key, t->count); + lines_out++; + if (lines_max >= 0 && lines_out >= lines_max) + break; + n = eb32_next(n); + } + } + else if (filter & FILT_COUNT_SRV_STATUS) { + struct ebmb_node *srv_node; + struct srv_st *srv; + + printf("#srv_name 1xx 2xx 3xx 4xx 5xx other tot_req req_ok pct_ok avg_ct avg_rt\n"); + + srv_node = ebmb_first(&timers[0]); + while (srv_node) { + int tot_rq; + + srv = container_of(srv_node, struct srv_st, node); + + tot_rq = 0; + for (f = 0; f <= 5; f++) + tot_rq += srv->st_cnt[f]; + + printf("%s %u %u %u %u %u %u %d %u %.1f %d %d\n", + srv_node->key, srv->st_cnt[1], srv->st_cnt[2], + srv->st_cnt[3], srv->st_cnt[4], srv->st_cnt[5], srv->st_cnt[0], + tot_rq, + srv->nb_ok, (double)srv->nb_ok * 100.0 / (tot_rq?tot_rq:1), + (int)(srv->cum_ct / (srv->nb_ct?srv->nb_ct:1)), (int)(srv->cum_rt / (srv->nb_rt?srv->nb_rt:1))); + srv_node = ebmb_next(srv_node); + lines_out++; + if (lines_max >= 0 && lines_out >= lines_max) + break; + } + } + else if (filter & (FILT_COUNT_TERM_CODES|FILT_COUNT_COOK_CODES)) { + /* output all statuses in the form of <code> <occurrences> */ + n = eb32_first(&timers[0]); + while (n) { + t = container_of(n, struct timer, node); + printf("%c%c %u\n", (n->key >> 8), (n->key) & 255, t->count); + lines_out++; + if (lines_max >= 0 && lines_out >= lines_max) + break; + n = eb32_next(n); + } + } + else if (filter & (FILT_COUNT_URL_ANY|FILT_COUNT_IP_COUNT)) { + struct eb_node *node, *next; + + if (!(filter & FILT_COUNT_URL_ONLY)) { + /* we have to sort on another criterion. We'll use timers[1] for the + * destination tree. + */ + + timers[1] = EB_ROOT; /* reconfigure to accept duplicates */ + for (node = eb_first(&timers[0]); node; node = next) { + next = eb_next(node); + eb_delete(node); + + ustat = container_of(node, struct url_stat, node.url.node); + + if (filter & (FILT_COUNT_URL_COUNT|FILT_COUNT_IP_COUNT)) + ustat->node.val.key = ustat->nb_req; + else if (filter & FILT_COUNT_URL_ERR) + ustat->node.val.key = ustat->nb_err; + else if (filter & FILT_COUNT_URL_TTOT) + ustat->node.val.key = ustat->total_time; + else if (filter & FILT_COUNT_URL_TAVG) + ustat->node.val.key = ustat->nb_req ? ustat->total_time / ustat->nb_req : 0; + else if (filter & FILT_COUNT_URL_TTOTO) + ustat->node.val.key = ustat->total_time_ok; + else if (filter & FILT_COUNT_URL_TAVGO) + ustat->node.val.key = (ustat->nb_req - ustat->nb_err) ? ustat->total_time_ok / (ustat->nb_req - ustat->nb_err) : 0; + else if (filter & FILT_COUNT_URL_BAVG) + ustat->node.val.key = ustat->nb_req ? ustat->total_bytes_sent / ustat->nb_req : 0; + else if (filter & FILT_COUNT_URL_BTOT) + ustat->node.val.key = ustat->total_bytes_sent; + else + ustat->node.val.key = 0; + + eb64_insert(&timers[1], &ustat->node.val); + } + /* switch trees */ + timers[0] = timers[1]; + } + + if (FILT_COUNT_IP_COUNT) + printf("#req err ttot tavg oktot okavg bavg btot src\n"); + else + printf("#req err ttot tavg oktot okavg bavg btot url\n"); + + /* scan the tree in its reverse sorting order */ + node = eb_last(&timers[0]); + while (node) { + ustat = container_of(node, struct url_stat, node.url.node); + printf("%u %u %llu %llu %llu %llu %llu %llu %s\n", + ustat->nb_req, + ustat->nb_err, + ustat->total_time, + ustat->nb_req ? ustat->total_time / ustat->nb_req : 0, + ustat->total_time_ok, + (ustat->nb_req - ustat->nb_err) ? ustat->total_time_ok / (ustat->nb_req - ustat->nb_err) : 0, + ustat->nb_req ? ustat->total_bytes_sent / ustat->nb_req : 0, + ustat->total_bytes_sent, + ustat->url); + + node = eb_prev(node); + lines_out++; + if (lines_max >= 0 && lines_out >= lines_max) + break; + } + } + + empty: + if (!(filter & FILT_QUIET)) + fprintf(stderr, "%d lines in, %d lines out, %d parsing errors\n", + linenum, lines_out, parse_err); + exit(0); +} + +void filter_output_line(const char *accept_field, const char *time_field, struct timer **tptr) +{ + puts(line); + lines_out++; +} + +void filter_extract_capture(const char *accept_field, const char *time_field, unsigned int block, unsigned int field) +{ + const char *e, *f; + + if (time_field) + e = field_start(time_field, METH_FIELD - TIME_FIELD + 1); + else + e = field_start(accept_field, METH_FIELD - ACCEPT_FIELD + 1); + + while (block-- > 0) { + /* Scan until the start of a capture block ('{') until the URL ('"'). */ + while ((*e != '"' && *e != '{') && *e) { + /* Note: some syslog servers escape quotes ! */ + if (*e == '\\' && e[1] == '"') + break; + + e = field_start(e, 2); + } + + if (unlikely(!*e)) { + truncated_line(linenum, line); + return; + } + + /* We reached the URL, no more captures will follow. */ + if (*e != '{') { + puts(""); + lines_out++; + return; + } + + /* e points the the opening brace of the capture block. */ + + e++; + } + + /* We are in the first field of the selected capture block. */ + + while (--field > 0) { + while ((*e != '|' && *e != '}') && *e) + e++; + + if (unlikely(!*e)) { + truncated_line(linenum, line); + return; + } + + if (*e != '|') { + puts(""); + lines_out++; + return; + } + + /* e points to the pipe. */ + + e++; + } + + f = e; + + while ((*f != '|' && *f != '}') && *f) + f++; + + if (unlikely(!*f)) { + truncated_line(linenum, line); + return; + } + + fwrite(e, f - e, 1, stdout); + putchar('\n'); + lines_out++; +} + +void filter_accept_holes(const char *accept_field, const char *time_field, struct timer **tptr) +{ + struct timer *t2; + int val; + + val = convert_date(accept_field); + if (unlikely(val < 0)) { + truncated_line(linenum, line); + return; + } + + t2 = insert_value(&timers[0], tptr, val); + t2->count++; + return; +} + +void filter_count_status(const char *accept_field, const char *time_field, struct timer **tptr) +{ + struct timer *t2; + const char *b; + int val; + + if (time_field) + b = field_start(time_field, STATUS_FIELD - TIME_FIELD + 1); + else + b = field_start(accept_field, STATUS_FIELD - ACCEPT_FIELD + 1); + + if (unlikely(!*b)) { + truncated_line(linenum, line); + return; + } + + val = str2ic(b); + + t2 = insert_value(&timers[0], tptr, val); + t2->count++; +} + +void filter_count_cook_codes(const char *accept_field, const char *time_field, struct timer **tptr) +{ + struct timer *t2; + const char *b; + int val; + + if (time_field) + b = field_start(time_field, TERM_CODES_FIELD - TIME_FIELD + 1); + else + b = field_start(accept_field, TERM_CODES_FIELD - ACCEPT_FIELD + 1); + + if (unlikely(!*b)) { + truncated_line(linenum, line); + return; + } + + val = 256 * b[2] + b[3]; + + t2 = insert_value(&timers[0], tptr, val); + t2->count++; +} + +void filter_count_term_codes(const char *accept_field, const char *time_field, struct timer **tptr) +{ + struct timer *t2; + const char *b; + int val; + + if (time_field) + b = field_start(time_field, TERM_CODES_FIELD - TIME_FIELD + 1); + else + b = field_start(accept_field, TERM_CODES_FIELD - ACCEPT_FIELD + 1); + + if (unlikely(!*b)) { + truncated_line(linenum, line); + return; + } + + val = 256 * b[0] + b[1]; + + t2 = insert_value(&timers[0], tptr, val); + t2->count++; +} + +void filter_count_srv_status(const char *accept_field, const char *time_field, struct timer **tptr) +{ + const char *b, *e, *p; + int f, err, array[5]; + struct ebmb_node *srv_node; + struct srv_st *srv; + int val; + + /* the server field is before the status field, so let's + * parse them in the proper order. + */ + b = field_start(accept_field, SERVER_FIELD - ACCEPT_FIELD + 1); + if (unlikely(!*b)) { + truncated_line(linenum, line); + return; + } + + e = field_stop(b + 1); /* we have the server name in [b]..[e-1] */ + + /* the chance that a server name already exists is extremely high, + * so let's perform a normal lookup first. + */ + srv_node = ebst_lookup_len(&timers[0], b, e - b); + srv = container_of(srv_node, struct srv_st, node); + + if (!srv_node) { + /* server not yet in the tree, let's create it */ + srv = (void *)calloc(1, sizeof(struct srv_st) + e - b + 1); + srv_node = &srv->node; + memcpy(&srv_node->key, b, e - b); + srv_node->key[e - b] = '\0'; + ebst_insert(&timers[0], srv_node); + } + + /* let's collect the connect and response times */ + if (!time_field) { + time_field = field_start(e, TIME_FIELD - SERVER_FIELD); + if (unlikely(!*time_field)) { + truncated_line(linenum, line); + return; + } + } + + e = field_stop(time_field + 1); + /* we have field TIME_FIELD in [time_field]..[e-1] */ + + p = time_field; + err = 0; + f = 0; + while (!SEP(*p)) { + array[f] = str2ic(p); + if (array[f] < 0) { + array[f] = -1; + err = 1; + } + if (++f == 5) + break; + SKIP_CHAR(p, '/'); + } + + if (unlikely(f < 5)){ + parse_err++; + return; + } + + /* OK we have our timers in array[2,3] */ + if (!err) + srv->nb_ok++; + + if (array[2] >= 0) { + srv->cum_ct += array[2]; + srv->nb_ct++; + } + + if (array[3] >= 0) { + srv->cum_rt += array[3]; + srv->nb_rt++; + } + + /* we're interested in the 5 HTTP status classes (1xx ... 5xx), and + * the invalid ones which will be reported as 0. + */ + b = field_start(e, STATUS_FIELD - TIME_FIELD); + if (unlikely(!*b)) { + truncated_line(linenum, line); + return; + } + + val = 0; + if (*b >= '1' && *b <= '5') + val = *b - '0'; + + srv->st_cnt[val]++; +} + +void filter_count_url(const char *accept_field, const char *time_field, struct timer **tptr) +{ + struct url_stat *ustat = NULL; + struct ebpt_node *ebpt_old; + const char *b, *e; + int f, err, array[5]; + int val; + + /* let's collect the response time */ + if (!time_field) { + time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1); // avg 115 ns per line + if (unlikely(!*time_field)) { + truncated_line(linenum, line); + return; + } + } + + /* we have the field TIME_FIELD starting at <time_field>. We'll + * parse the 5 timers to detect errors, it takes avg 55 ns per line. + */ + e = time_field; err = 0; f = 0; + while (!SEP(*e)) { + array[f] = str2ic(e); + if (array[f] < 0) { + array[f] = -1; + err = 1; + } + if (++f == 5) + break; + SKIP_CHAR(e, '/'); + } + if (f < 5) { + parse_err++; + return; + } + + /* OK we have our timers in array[3], and err is >0 if at + * least one -1 was seen. <e> points to the first char of + * the last timer. Let's prepare a new node with that. + */ + if (unlikely(!ustat)) + ustat = calloc(1, sizeof(*ustat)); + + ustat->nb_err = err; + ustat->nb_req = 1; + + /* use array[4] = total time in case of error */ + ustat->total_time = (array[3] >= 0) ? array[3] : array[4]; + ustat->total_time_ok = (array[3] >= 0) ? array[3] : 0; + + e = field_start(e, BYTES_SENT_FIELD - TIME_FIELD + 1); + val = str2ic(e); + ustat->total_bytes_sent = val; + + /* the line may be truncated because of a bad request or anything like this, + * without a method. Also, if it does not begin with an quote, let's skip to + * the next field because it's a capture. Let's fall back to the "method" itself + * if there's nothing else. + */ + e = field_start(e, METH_FIELD - BYTES_SENT_FIELD + 1); + while (*e != '"' && *e) { + /* Note: some syslog servers escape quotes ! */ + if (*e == '\\' && e[1] == '"') + break; + e = field_start(e, 2); + } + + if (unlikely(!*e)) { + truncated_line(linenum, line); + free(ustat); + return; + } + + b = field_start(e, URL_FIELD - METH_FIELD + 1); // avg 40 ns per line + if (!*b) + b = e; + + /* stop at end of field or first ';' or '?', takes avg 64 ns per line */ + e = b; + do { + if (*e == ' '|| + (!(filter2 & FILT2_PRESERVE_QUERY) && (*e == '?' || *e == ';'))) { + *(char *)e = 0; + break; + } + e++; + } while (*e); + + /* now instead of copying the URL for a simple lookup, we'll link + * to it from the node we're trying to insert. If it returns a + * different value, it was already there. Otherwise we just have + * to dynamically realloc an entry using strdup(). + */ + ustat->node.url.key = (char *)b; + ebpt_old = ebis_insert(&timers[0], &ustat->node.url); + + if (ebpt_old != &ustat->node.url) { + struct url_stat *ustat_old; + /* node was already there, let's update previous one */ + ustat_old = container_of(ebpt_old, struct url_stat, node.url); + ustat_old->nb_req ++; + ustat_old->nb_err += ustat->nb_err; + ustat_old->total_time += ustat->total_time; + ustat_old->total_time_ok += ustat->total_time_ok; + ustat_old->total_bytes_sent += ustat->total_bytes_sent; + } else { + ustat->url = ustat->node.url.key = strdup(ustat->node.url.key); + ustat = NULL; /* node was used */ + } +} + +void filter_count_ip(const char *source_field, const char *accept_field, const char *time_field, struct timer **tptr) +{ + struct url_stat *ustat = NULL; + struct ebpt_node *ebpt_old; + const char *b, *e; + int f, err, array[5]; + int val; + + /* let's collect the response time */ + if (!time_field) { + time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1); // avg 115 ns per line + if (unlikely(!*time_field)) { + truncated_line(linenum, line); + return; + } + } + + /* we have the field TIME_FIELD starting at <time_field>. We'll + * parse the 5 timers to detect errors, it takes avg 55 ns per line. + */ + e = time_field; err = 0; f = 0; + while (!SEP(*e)) { + if (f == 0 || f == 4) { + array[f] = str2ic(e); + if (array[f] < 0) { + array[f] = -1; + err = 1; + } + } + if (++f == 5) + break; + SKIP_CHAR(e, '/'); + } + if (f < 5) { + parse_err++; + return; + } + + /* OK we have our timers in array[0], and err is >0 if at + * least one -1 was seen. <e> points to the first char of + * the last timer. Let's prepare a new node with that. + */ + if (unlikely(!ustat)) + ustat = calloc(1, sizeof(*ustat)); + + ustat->nb_err = err; + ustat->nb_req = 1; + + /* use array[4] = total time in case of error */ + ustat->total_time = (array[0] >= 0) ? array[0] : array[4]; + ustat->total_time_ok = (array[0] >= 0) ? array[0] : 0; + + e = field_start(e, BYTES_SENT_FIELD - TIME_FIELD + 1); + val = str2ic(e); + ustat->total_bytes_sent = val; + + /* the source might be IPv4 or IPv6, so we always strip the port by + * removing the last colon. + */ + b = source_field; + e = field_stop(b + 1); + while (e > b && e[-1] != ':') + e--; + *(char *)(e - 1) = '\0'; + + /* now instead of copying the src for a simple lookup, we'll link + * to it from the node we're trying to insert. If it returns a + * different value, it was already there. Otherwise we just have + * to dynamically realloc an entry using strdup(). We're using the + * <url> field of the node to store the source address. + */ + ustat->node.url.key = (char *)b; + ebpt_old = ebis_insert(&timers[0], &ustat->node.url); + + if (ebpt_old != &ustat->node.url) { + struct url_stat *ustat_old; + /* node was already there, let's update previous one */ + ustat_old = container_of(ebpt_old, struct url_stat, node.url); + ustat_old->nb_req ++; + ustat_old->nb_err += ustat->nb_err; + ustat_old->total_time += ustat->total_time; + ustat_old->total_time_ok += ustat->total_time_ok; + ustat_old->total_bytes_sent += ustat->total_bytes_sent; + } else { + ustat->url = ustat->node.url.key = strdup(ustat->node.url.key); + ustat = NULL; /* node was used */ + } +} + +void filter_graphs(const char *accept_field, const char *time_field, struct timer **tptr) +{ + struct timer *t2; + const char *p; + int f, err, array[5]; + + if (!time_field) { + time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1); + if (unlikely(!*time_field)) { + truncated_line(linenum, line); + return; + } + } + + field_stop(time_field + 1); + /* we have field TIME_FIELD in [time_field]..[e-1] */ + + p = time_field; + err = 0; + f = 0; + while (!SEP(*p)) { + array[f] = str2ic(p); + if (array[f] < 0) { + array[f] = -1; + err = 1; + } + if (++f == 5) + break; + SKIP_CHAR(p, '/'); + } + + if (unlikely(f < 5)) { + parse_err++; + return; + } + + /* if we find at least one negative time, we count one error + * with a time equal to the total session time. This will + * emphasize quantum timing effects associated to known + * timeouts. Note that on some buggy machines, it is possible + * that the total time is negative, hence the reason to reset + * it. + */ + + if (filter & FILT_GRAPH_TIMERS) { + if (err) { + if (array[4] < 0) + array[4] = -1; + t2 = insert_timer(&timers[0], tptr, array[4]); // total time + t2->count++; + } else { + int v; + + t2 = insert_timer(&timers[1], tptr, array[0]); t2->count++; // req + t2 = insert_timer(&timers[2], tptr, array[2]); t2->count++; // conn + t2 = insert_timer(&timers[3], tptr, array[3]); t2->count++; // resp + + v = array[4] - array[0] - array[1] - array[2] - array[3]; // data time + if (v < 0 && !(filter & FILT_QUIET)) + fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n", + line, array[0], array[1], array[2], array[3], array[4], v); + t2 = insert_timer(&timers[4], tptr, v); t2->count++; + lines_out++; + } + } else { /* percentile */ + if (err) { + if (array[4] < 0) + array[4] = -1; + t2 = insert_value(&timers[0], tptr, array[4]); // total time + t2->count++; + } else { + int v; + + t2 = insert_value(&timers[1], tptr, array[0]); t2->count++; // req + t2 = insert_value(&timers[2], tptr, array[2]); t2->count++; // conn + t2 = insert_value(&timers[3], tptr, array[3]); t2->count++; // resp + + v = array[4] - array[0] - array[1] - array[2] - array[3]; // data time + if (v < 0 && !(filter & FILT_QUIET)) + fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n", + line, array[0], array[1], array[2], array[3], array[4], v); + t2 = insert_value(&timers[4], tptr, v); t2->count++; + lines_out++; + } + } +} + + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ |