diff options
Diffstat (limited to '')
-rw-r--r-- | tools/src/qsgeo.c | 607 |
1 files changed, 607 insertions, 0 deletions
diff --git a/tools/src/qsgeo.c b/tools/src/qsgeo.c new file mode 100644 index 0000000..0a46628 --- /dev/null +++ b/tools/src/qsgeo.c @@ -0,0 +1,607 @@ +/* -*-mode: c; indent-tabs-mode: nil; c-basic-offset: 2; -*- + */ +/** + * Utilities for the quality of service module mod_qos. + * + * qsgeo.c: resolves the country codes of IP addresses + * + * See http://mod-qos.sourceforge.net/ for further + * details. + * + * Copyright (C) 2023 Pascal Buchbinder + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +static const char revision[] = "$Id: qsgeo.c 2654 2022-05-13 09:12:42Z pbuchbinder $"; + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <time.h> +#include <sys/time.h> +#include <regex.h> + +/* apr */ +#include <apr.h> +#include <apr_strings.h> +#include <apr_file_io.h> +#include <apr_time.h> +#include <apr_lib.h> +#include <apr_portable.h> +#include <apr_support.h> +#include <apr_base64.h> + +#include "qs_util.h" + +#define MAX_REG_MATCH 10 + +// "3758096128","3758096383","AU" +#define QS_GEO_PATTERN "\"([0-9]+)\",\"([0-9]+)\",\"([A-Z0-9]{2}|-)\"" +// "3758096128","3758096383","AU","Australia" +#define QS_GEO_PATTERN_D "\"([0-9]+)\",\"([0-9]+)\",\"([A-Z0-9]{2})\",\"(.*)\"" +// "192.83.198.0","192.83.198.255","3226715648","3226715903","AU","Australia" +#define QS_GEO_PATTERN_EXT "\"[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+\",\"[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+\",\"([0-9]+)\",\"([0-9]+)\",\"([A-Z0-9]{2})\"" +// 182.12.34.23 +#define IPPATTERN "([0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3})[\"'\x0d\x0a, ]+" +#define IPPATTERN2 "([0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3})[\"'\x0d\x0a,; ]+" + +static int m_inject = 0; +static int m_verbose = 0; + +typedef struct { + unsigned long start; + char *c; +} qos_inj_t; + +static const qos_inj_t m_inj[] = { + { 167772160, "\"10.0.0.0\",\"10.255.255.255\",\"167772160\",\"184549375\",\"PV\",\"private network\"" }, + { 2130706432, "\"127.0.0.0\",\"127.255.255.255\",\"2130706432\",\"2147483647\",\"LO\",\"local loopback\"" }, + { 2886729728, "\"172.16.0.0\",\"172.31.255.255\",\"2886729728\",\"2887778303\",\"PV\",\"private network\"" }, + { 3232235520, "\"192.168.0.0\",\"192.168.255.255\",\"3232235520\",\"3232301055\",\"PV\",\"private network\"" }, + { 0, NULL } +}; + +typedef struct { + unsigned long start; + unsigned long end; + char country[3]; + char c[500]; +} qos_geo_t; + +typedef struct { + int num; + char *c; +} qos_geo_stat_t; + +static int qos_is_num(const char *num) { + int i = 0; + while(num[i]) { + if(!isdigit(num[i])) { + return 0; + } + i++; + } + return 1; +} + +/** + * Converts an IPv4 address string to it's numeric value. + * w.x.y.z results in 16777216*w + 65536*x + 256*y + z + * + * @param pool To make a copy of the address to parse + * @param ip + * @return The address or 0 on error + */ +static unsigned long qos_geo_str2long(apr_pool_t *pool, const char *ip) { + char *p; + char *i = apr_pstrdup(pool, ip); + unsigned long addr = 0; + + p = strchr(i, '.'); + if(!p) return 0; + p[0] = '\0'; + if(!qos_is_num(i)) return 0; + addr += (atol(i) * 16777216); + i = p; + i++; + + p = strchr(i, '.'); + if(!p) return 0; + p[0] = '\0'; + if(!qos_is_num(i)) return 0; + addr += (atol(i) * 65536); + i = p; + i++; + + p = strchr(i, '.'); + if(!p) return 0; + p[0] = '\0'; + if(!qos_is_num(i)) return 0; + addr += (atol(i) * 256); + i = p; + i++; + + if(!qos_is_num(i)) return 0; + addr += (atol(i)); + + return addr; +} + +static void qos_geo_long2str(char *buf, unsigned long ip) { + int a,b,c,d; + a = ip % 256; + ip = ip / 256; + b = ip % 256; + ip = ip / 256; + c = ip % 256; + ip = ip / 256; + d = ip % 256; + sprintf(buf, "%d.%d.%d.%d", d, c, b, a); +} + +/** + * Usage message (text or manpage format). + */ +static void usage(const char *cmd, int man) { + if(man) { + //.TH [name of program] [section number] [center footer] [left footer] [center header] + printf(".TH %s 1 \"%s\" \"mod_qos utilities %s\" \"%s man page\"\n", qs_CMD(cmd), man_date, + man_version, cmd); + } + printf("\n"); + if(man) { + printf(".SH NAME\n"); + } + qs_man_print(man, "%s - an utility to lookup a client's country code.\n", cmd); + printf("\n"); + if(man) { + printf(".SH SYNOPSIS\n"); + } + qs_man_print(man, "%s%s -d <path> [-l] [-s] [-ip <ip>]\n", man ? "" : "Usage: ", cmd); + printf("\n"); + if(man) { + printf(".SH DESCRIPTION\n"); + } else { + printf("Summary\n"); + } + qs_man_print(man, "Use this utility to resolve the country codes of IP addresses\n"); + qs_man_print(man, "within existing log files. The utility reads the log file data\n"); + qs_man_print(man, "from stdin and writes them, with the injected country code, to\n"); + qs_man_print(man, "stdout.\n"); + printf("\n"); + if(man) { + printf(".SH OPTIONS\n"); + } else { + printf("Options\n"); + } + if(man) printf("\n.TP\n"); + qs_man_print(man, " -d <path>\n"); + if(man) printf("\n"); + qs_man_print(man, " Specifies the path to the geographical database files (CSV\n"); + qs_man_print(man, " file containing IP address ranges and country codes).\n"); + if(man) printf("\n.TP\n"); + qs_man_print(man, " -s\n"); + if(man) printf("\n"); + qs_man_print(man, " Writes a summary of the requests per country only.\n"); + if(man) printf("\n.TP\n"); + qs_man_print(man, " -l\n"); + if(man) printf("\n"); + qs_man_print(man, " Writes the database to stdout (ignoring stdin) inserting\n"); + qs_man_print(man, " local (127.*) and private (10.*, 172.16*, 192.168.*)\n"); + qs_man_print(man, " network addresses.\n"); + if(man) printf("\n.TP\n"); + qs_man_print(man, " -ip <ip>\n"); + if(man) printf("\n"); + qs_man_print(man, " Resolves a single IP address instead of processing a log file.\n"); + printf("\n"); + if(man) { + printf(".SH EXAMPLE\n"); + printf("Reading the file access.log and adding the country code to the IP address field:\n"); + printf("\n"); + } else { + printf("Example reading the file access.log and adding the country code to\n"); + printf("the IP address field:\n"); + } + qs_man_println(man, " cat access.log | %s -d GeoIPCountryWhois.csv\n", cmd); + printf("\n"); + if(man) { + printf("Reading the file access.log and showing a summary only:\n"); + printf("\n"); + } else { + printf("Example reading the file access.log and showing a summary only:\n"); + } + qs_man_println(man, " cat access.log | %s -d GeoIPCountryWhois.csv -s\n", cmd); + printf("\n"); + if(man) { + printf("Resolving a single IP address:\n"); + printf("\n"); + } else { + printf("Example resolving a single IP address:\n"); + } + qs_man_println(man, " %s -d GeoIPCountryWhois.csv -ip 192.84.12.23\n", cmd); + printf("\n"); + if(man) { + printf(".SH SEE ALSO\n"); + printf("qsdt(1), qsexec(1), qsfilter2(1), qsgrep(1), qshead(1), qslog(1), qslogger(1), qspng(1), qsre(1), qsrespeed(1), qsrotate(1), qssign(1), qstail(1)\n"); + printf(".SH AUTHOR\n"); + printf("Pascal Buchbinder, http://mod-qos.sourceforge.net/\n"); + } else { + printf("See http://mod-qos.sourceforge.net/ for further details.\n"); + } + if(man) { + exit(0); + } else { + exit(1); + } +} + +/** + * Comperator to search entries using bsearch. + */ +static int qos_geo_comp(const void *_pA, const void *_pB) { + unsigned long *pA = (unsigned long *)_pA; + qos_geo_t *pB = (qos_geo_t *)_pB; + unsigned long search = *pA; + if((search >= pB->start) && (search <= pB->end)) return 0; + if(search > pB->start) return 1; + if(search < pB->start) return -1; + return -1; // error +} + +/** + * Loads the (sorted) CSV file into the memory. + * + * @param pool + * @param db Path to the db file + * @param size Returns the size f the db (elements in the array) + * @param msg Error message if something got wrong + * @return Array with all entries from the CSV file (or NULL on error) + */ +static qos_geo_t *qos_loadgeo(apr_pool_t *pool, const char *db, int *size, char **msg, int *errors) { + regmatch_t ma[MAX_REG_MATCH]; + regex_t preg; + regex_t pregd; + regex_t pregext; + qos_geo_t *geo = NULL; + qos_geo_t *g = NULL; + qos_geo_t *last = NULL; + int lines = 0; + char line[HUGE_STRING_LEN]; + char buf[HUGE_STRING_LEN]; + FILE *file; + const qos_inj_t *inj = m_inj; + *size = 0; + if(regcomp(&preg, QS_GEO_PATTERN, REG_EXTENDED)) { + // internal error + *msg = apr_pstrdup(pool, "failed to compile regular expression "QS_GEO_PATTERN); + (*errors)++; + return NULL; + } + if(regcomp(&pregd, QS_GEO_PATTERN_D, REG_EXTENDED)) { + // internal error + *msg = apr_pstrdup(pool, "failed to compile regular expression "QS_GEO_PATTERN_D); + (*errors)++; + return NULL; + } + if(regcomp(&pregext, QS_GEO_PATTERN_EXT, REG_EXTENDED)) { + // internal error + *msg = apr_pstrdup(pool, "failed to compile regular expression "QS_GEO_PATTERN_EXT); + (*errors)++; + return NULL; + } + file = fopen(db, "r"); + if(!file) { + (*errors)++; + return NULL; + } + while(fgets(line, sizeof(line), file) != NULL) { + if(strlen(line) > 0) { + if(regexec(&preg, line, 0, NULL, 0) == 0) { + lines++; + } else { + *msg = apr_psprintf(pool, "invalid entry in database: '%s'", line); + (*errors)++; + if(m_verbose) { + char *p = *msg; + while(p[0]) { + if(p[0] < 32) { + p[0] = '.'; + } + p++; + } + fprintf(stderr, "line %d: %s\n", lines, *msg); + } + } + } + } + *size = lines; + geo = apr_pcalloc(pool, sizeof(qos_geo_t) * lines); + g = geo; + fseek(file, 0, SEEK_SET); + lines = 0; + while(fgets(line, sizeof(line), file) != NULL) { + lines++; + if(strlen(line) > 0) { + int plus = 0; + if(m_inject) { + strcpy(buf, line); + } + if(regexec(&pregd, line, MAX_REG_MATCH, ma, 0) == 0) { + plus = 1; + } + if(plus || regexec(&preg, line, MAX_REG_MATCH, ma, 0) == 0) { + int missingAddr = 0; + if(regexec(&pregext, line, 0, NULL, 0) != 0) { + missingAddr = 1; + } + line[ma[1].rm_eo] = '\0'; + line[ma[2].rm_eo] = '\0'; + line[ma[3].rm_eo] = '\0'; + g->start = atoll(&line[ma[1].rm_so]); + g->end = atoll(&line[ma[2].rm_so]); + g->c[0] = '\0'; + if(m_inject) { + if(inj->start && (g->start > inj->start)) { + while(inj->start && (g->start > inj->start)) { + printf("%s\n", inj->c); + inj++; + } + } else if(g->start != inj->start) { + if(missingAddr) { + /* some databases do not include IP address + representation (but number only) */ + char bs[128]; + char be[128]; + qos_geo_long2str(bs, g->start); + qos_geo_long2str(be, g->end); + printf("\"%s\",\"%s\",%s", bs, be, buf); + } + } + if(!missingAddr) { + printf("%s", buf); + } + } + strncpy(g->country, &line[ma[3].rm_so], 2); + if(last) { + if(g->start < last->start) { + *msg = apr_psprintf(pool, "wrong order/lines not sorted (line %d)", lines); + (*errors)++; + if(m_verbose) { + fprintf(stderr, "line %d: wrong order/lines not sorted\n", lines); + } + } + } + if(plus) { + line[ma[4].rm_eo] = '\0'; + strncpy(g->c, &line[ma[4].rm_so], 500); + } + last = g; + g++; + } + } + } + fclose(file); + return geo; +} + +int main(int argc, const char * const argv[]) { + int errors = 0; + int rc; + int stat = 0; + const char *ip = NULL; + char *msg = NULL; + qos_geo_t *geo; + int size; + const char *db = NULL; + apr_table_t *entries; + apr_pool_t *pool; + const char *cmd = strrchr(argv[0], '/'); + apr_app_initialize(&argc, &argv, NULL); + apr_pool_create(&pool, NULL); + entries = apr_table_make(pool, 100); + + if(cmd == NULL) { + cmd = (char *)argv[0]; + } else { + cmd++; + } + + argc--; + argv++; + while(argc >= 1) { + if(strcmp(*argv, "-d") == 0) { + if (--argc >= 1) { + db = *(++argv); + } + } else if(strcmp(*argv, "-ip") == 0) { + if (--argc >= 1) { + ip = *(++argv); + } + } else if(strcmp(*argv, "-s") == 0) { + stat = 1; + } else if(strcmp(*argv, "-l") == 0) { + m_inject = 1; + } else if(strcmp(*argv, "-v") == 0) { + m_verbose = 1; + } else if(strcmp(*argv,"-h") == 0) { + usage(cmd, 0); + } else if(strcmp(*argv,"--help") == 0) { + usage(cmd, 0); + } else if(strcmp(*argv,"-?") == 0) { + usage(cmd, 0); + } else if(strcmp(*argv,"--man") == 0) { + usage(cmd, 1); + } else { + usage(cmd, 0); + } + argc--; + argv++; + } + + if(db == NULL) { + usage(cmd, 0); + } + + rc = nice(10); + if(rc == -1) { + fprintf(stderr, "ERROR, failed to change nice value: %s\n", strerror(errno)); + } + + geo = qos_loadgeo(pool, db, &size, &msg, &errors); + if(geo == NULL || msg != NULL) { + if(msg) { + char *p = msg; + while(p[0]) { + if(p[0] < 32) { + p[0] = '.'; + } + p++; + } + } + fprintf(stderr, "failed to load database: %s (total %d errors)\n", + msg ? msg : "-", errors); + exit(1); + } + if(m_inject) { + exit(0); + } + + if(ip) { + qos_geo_t *pB; + unsigned long search = qos_geo_str2long(pool, ip); + printf("search %lu: ", search); + pB = bsearch(&search, + geo, + size, + sizeof(qos_geo_t), + qos_geo_comp); + if(pB) { + printf("%s\n", pB->country); + } else { + printf("n/a\n"); + } + return 0; + } + + // start reading from stdin + { + char prev; + qos_geo_t *pB; + apr_pool_t *tmp; + char *line = calloc(1, MAX_LINE_BUFFER+1); + regex_t preg; + regex_t preg2; + regmatch_t ma[MAX_REG_MATCH]; + apr_pool_create(&tmp, NULL); + if(regcomp(&preg, IPPATTERN, REG_EXTENDED)) { + exit(1); + } + regcomp(&preg2, IPPATTERN2, REG_EXTENDED); + while(fgets(line, MAX_LINE_BUFFER, stdin) != NULL) { + int match = regexec(&preg, line, MAX_REG_MATCH, ma, 0); + if(match != 0) { + char *dx = strchr(line, ';'); + if(dx && ((dx - line) <= 15)) { + // file starts probably with <ip>; => a qslog -pc file? + match = regexec(&preg2, line, MAX_REG_MATCH, ma, 0); + } + } + if(match == 0) { + unsigned long search; + prev = line[ma[1].rm_eo]; + line[ma[1].rm_eo] = '\0'; + search = qos_geo_str2long(tmp, &line[ma[1].rm_so]); + apr_pool_clear(tmp); + pB = bsearch(&search, + geo, + size, + sizeof(qos_geo_t), + qos_geo_comp); + if(stat) { + /* creates a single statistic entry for each country (used to collect + requests per source country) */ + if(pB) { + qos_geo_stat_t *s = (qos_geo_stat_t *)apr_table_get(entries, pB->country); + if(s == NULL) { + s = apr_pcalloc(pool, sizeof(qos_geo_stat_t)); + s->num = 0; + s->c = pB->c; + apr_table_addn(entries, apr_pstrdup(pool, pB->country), (char *)s); + } + s->num++; + } + } else { + /* modifies each log line inserting the country code + */ + char cr = prev; + char delw[2]; + char delx[2]; + delw[1] = '\0'; + delw[0] = ' '; + delx[1] = '\0'; + delx[0] = ' '; + if(line[ma[1].rm_eo+1] == ' ') { + delx[0] = '\0'; + } + if(line[ma[1].rm_eo+1] == ';') { + delx[0] = ';'; + } + if(prev <= CR) { + prev = ' '; + } + if(prev == ' ') { + delw[0] = '\0'; + } + if(prev == ';') { + delw[0] = '\0'; + delx[0] = ';'; + } + if(pB) { + printf("%s%c%s%s%s%s", line, prev, + delw, + pB->country, + delx, + &line[ma[1].rm_eo+1]); + } else { + printf("%s%c%s--%s%s", line, prev, + delw, + delx, + &line[ma[1].rm_eo+1]); + } + if(cr <= CR) { + printf("\n"); + } + } + } else { + printf("%s", line); + } + fflush(stdout); + } + if(stat) { + int i; + apr_table_entry_t *entry = (apr_table_entry_t *)apr_table_elts(entries)->elts; + for(i = 0; i < apr_table_elts(entries)->nelts; i++) { + qos_geo_stat_t *s = (qos_geo_stat_t *)entry[i].val; + printf("%7.d %s %s\n", s->num, entry[i].key, s->c ? s->c : ""); + } + } + } + return 0; +} |