summaryrefslogtreecommitdiffstats
path: root/tools/src/qsgeo.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/src/qsgeo.c')
-rw-r--r--tools/src/qsgeo.c607
1 files changed, 607 insertions, 0 deletions
diff --git a/tools/src/qsgeo.c b/tools/src/qsgeo.c
new file mode 100644
index 0000000..0a46628
--- /dev/null
+++ b/tools/src/qsgeo.c
@@ -0,0 +1,607 @@
+/* -*-mode: c; indent-tabs-mode: nil; c-basic-offset: 2; -*-
+ */
+/**
+ * Utilities for the quality of service module mod_qos.
+ *
+ * qsgeo.c: resolves the country codes of IP addresses
+ *
+ * See http://mod-qos.sourceforge.net/ for further
+ * details.
+ *
+ * Copyright (C) 2023 Pascal Buchbinder
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+static const char revision[] = "$Id: qsgeo.c 2654 2022-05-13 09:12:42Z pbuchbinder $";
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <time.h>
+#include <sys/time.h>
+#include <regex.h>
+
+/* apr */
+#include <apr.h>
+#include <apr_strings.h>
+#include <apr_file_io.h>
+#include <apr_time.h>
+#include <apr_lib.h>
+#include <apr_portable.h>
+#include <apr_support.h>
+#include <apr_base64.h>
+
+#include "qs_util.h"
+
+#define MAX_REG_MATCH 10
+
+// "3758096128","3758096383","AU"
+#define QS_GEO_PATTERN "\"([0-9]+)\",\"([0-9]+)\",\"([A-Z0-9]{2}|-)\""
+// "3758096128","3758096383","AU","Australia"
+#define QS_GEO_PATTERN_D "\"([0-9]+)\",\"([0-9]+)\",\"([A-Z0-9]{2})\",\"(.*)\""
+// "192.83.198.0","192.83.198.255","3226715648","3226715903","AU","Australia"
+#define QS_GEO_PATTERN_EXT "\"[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+\",\"[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+\",\"([0-9]+)\",\"([0-9]+)\",\"([A-Z0-9]{2})\""
+// 182.12.34.23
+#define IPPATTERN "([0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3})[\"'\x0d\x0a, ]+"
+#define IPPATTERN2 "([0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3})[\"'\x0d\x0a,; ]+"
+
+static int m_inject = 0;
+static int m_verbose = 0;
+
+typedef struct {
+ unsigned long start;
+ char *c;
+} qos_inj_t;
+
+static const qos_inj_t m_inj[] = {
+ { 167772160, "\"10.0.0.0\",\"10.255.255.255\",\"167772160\",\"184549375\",\"PV\",\"private network\"" },
+ { 2130706432, "\"127.0.0.0\",\"127.255.255.255\",\"2130706432\",\"2147483647\",\"LO\",\"local loopback\"" },
+ { 2886729728, "\"172.16.0.0\",\"172.31.255.255\",\"2886729728\",\"2887778303\",\"PV\",\"private network\"" },
+ { 3232235520, "\"192.168.0.0\",\"192.168.255.255\",\"3232235520\",\"3232301055\",\"PV\",\"private network\"" },
+ { 0, NULL }
+};
+
+typedef struct {
+ unsigned long start;
+ unsigned long end;
+ char country[3];
+ char c[500];
+} qos_geo_t;
+
+typedef struct {
+ int num;
+ char *c;
+} qos_geo_stat_t;
+
+static int qos_is_num(const char *num) {
+ int i = 0;
+ while(num[i]) {
+ if(!isdigit(num[i])) {
+ return 0;
+ }
+ i++;
+ }
+ return 1;
+}
+
+/**
+ * Converts an IPv4 address string to it's numeric value.
+ * w.x.y.z results in 16777216*w + 65536*x + 256*y + z
+ *
+ * @param pool To make a copy of the address to parse
+ * @param ip
+ * @return The address or 0 on error
+ */
+static unsigned long qos_geo_str2long(apr_pool_t *pool, const char *ip) {
+ char *p;
+ char *i = apr_pstrdup(pool, ip);
+ unsigned long addr = 0;
+
+ p = strchr(i, '.');
+ if(!p) return 0;
+ p[0] = '\0';
+ if(!qos_is_num(i)) return 0;
+ addr += (atol(i) * 16777216);
+ i = p;
+ i++;
+
+ p = strchr(i, '.');
+ if(!p) return 0;
+ p[0] = '\0';
+ if(!qos_is_num(i)) return 0;
+ addr += (atol(i) * 65536);
+ i = p;
+ i++;
+
+ p = strchr(i, '.');
+ if(!p) return 0;
+ p[0] = '\0';
+ if(!qos_is_num(i)) return 0;
+ addr += (atol(i) * 256);
+ i = p;
+ i++;
+
+ if(!qos_is_num(i)) return 0;
+ addr += (atol(i));
+
+ return addr;
+}
+
+static void qos_geo_long2str(char *buf, unsigned long ip) {
+ int a,b,c,d;
+ a = ip % 256;
+ ip = ip / 256;
+ b = ip % 256;
+ ip = ip / 256;
+ c = ip % 256;
+ ip = ip / 256;
+ d = ip % 256;
+ sprintf(buf, "%d.%d.%d.%d", d, c, b, a);
+}
+
+/**
+ * Usage message (text or manpage format).
+ */
+static void usage(const char *cmd, int man) {
+ if(man) {
+ //.TH [name of program] [section number] [center footer] [left footer] [center header]
+ printf(".TH %s 1 \"%s\" \"mod_qos utilities %s\" \"%s man page\"\n", qs_CMD(cmd), man_date,
+ man_version, cmd);
+ }
+ printf("\n");
+ if(man) {
+ printf(".SH NAME\n");
+ }
+ qs_man_print(man, "%s - an utility to lookup a client's country code.\n", cmd);
+ printf("\n");
+ if(man) {
+ printf(".SH SYNOPSIS\n");
+ }
+ qs_man_print(man, "%s%s -d <path> [-l] [-s] [-ip <ip>]\n", man ? "" : "Usage: ", cmd);
+ printf("\n");
+ if(man) {
+ printf(".SH DESCRIPTION\n");
+ } else {
+ printf("Summary\n");
+ }
+ qs_man_print(man, "Use this utility to resolve the country codes of IP addresses\n");
+ qs_man_print(man, "within existing log files. The utility reads the log file data\n");
+ qs_man_print(man, "from stdin and writes them, with the injected country code, to\n");
+ qs_man_print(man, "stdout.\n");
+ printf("\n");
+ if(man) {
+ printf(".SH OPTIONS\n");
+ } else {
+ printf("Options\n");
+ }
+ if(man) printf("\n.TP\n");
+ qs_man_print(man, " -d <path>\n");
+ if(man) printf("\n");
+ qs_man_print(man, " Specifies the path to the geographical database files (CSV\n");
+ qs_man_print(man, " file containing IP address ranges and country codes).\n");
+ if(man) printf("\n.TP\n");
+ qs_man_print(man, " -s\n");
+ if(man) printf("\n");
+ qs_man_print(man, " Writes a summary of the requests per country only.\n");
+ if(man) printf("\n.TP\n");
+ qs_man_print(man, " -l\n");
+ if(man) printf("\n");
+ qs_man_print(man, " Writes the database to stdout (ignoring stdin) inserting\n");
+ qs_man_print(man, " local (127.*) and private (10.*, 172.16*, 192.168.*)\n");
+ qs_man_print(man, " network addresses.\n");
+ if(man) printf("\n.TP\n");
+ qs_man_print(man, " -ip <ip>\n");
+ if(man) printf("\n");
+ qs_man_print(man, " Resolves a single IP address instead of processing a log file.\n");
+ printf("\n");
+ if(man) {
+ printf(".SH EXAMPLE\n");
+ printf("Reading the file access.log and adding the country code to the IP address field:\n");
+ printf("\n");
+ } else {
+ printf("Example reading the file access.log and adding the country code to\n");
+ printf("the IP address field:\n");
+ }
+ qs_man_println(man, " cat access.log | %s -d GeoIPCountryWhois.csv\n", cmd);
+ printf("\n");
+ if(man) {
+ printf("Reading the file access.log and showing a summary only:\n");
+ printf("\n");
+ } else {
+ printf("Example reading the file access.log and showing a summary only:\n");
+ }
+ qs_man_println(man, " cat access.log | %s -d GeoIPCountryWhois.csv -s\n", cmd);
+ printf("\n");
+ if(man) {
+ printf("Resolving a single IP address:\n");
+ printf("\n");
+ } else {
+ printf("Example resolving a single IP address:\n");
+ }
+ qs_man_println(man, " %s -d GeoIPCountryWhois.csv -ip 192.84.12.23\n", cmd);
+ printf("\n");
+ if(man) {
+ printf(".SH SEE ALSO\n");
+ printf("qsdt(1), qsexec(1), qsfilter2(1), qsgrep(1), qshead(1), qslog(1), qslogger(1), qspng(1), qsre(1), qsrespeed(1), qsrotate(1), qssign(1), qstail(1)\n");
+ printf(".SH AUTHOR\n");
+ printf("Pascal Buchbinder, http://mod-qos.sourceforge.net/\n");
+ } else {
+ printf("See http://mod-qos.sourceforge.net/ for further details.\n");
+ }
+ if(man) {
+ exit(0);
+ } else {
+ exit(1);
+ }
+}
+
+/**
+ * Comperator to search entries using bsearch.
+ */
+static int qos_geo_comp(const void *_pA, const void *_pB) {
+ unsigned long *pA = (unsigned long *)_pA;
+ qos_geo_t *pB = (qos_geo_t *)_pB;
+ unsigned long search = *pA;
+ if((search >= pB->start) && (search <= pB->end)) return 0;
+ if(search > pB->start) return 1;
+ if(search < pB->start) return -1;
+ return -1; // error
+}
+
+/**
+ * Loads the (sorted) CSV file into the memory.
+ *
+ * @param pool
+ * @param db Path to the db file
+ * @param size Returns the size f the db (elements in the array)
+ * @param msg Error message if something got wrong
+ * @return Array with all entries from the CSV file (or NULL on error)
+ */
+static qos_geo_t *qos_loadgeo(apr_pool_t *pool, const char *db, int *size, char **msg, int *errors) {
+ regmatch_t ma[MAX_REG_MATCH];
+ regex_t preg;
+ regex_t pregd;
+ regex_t pregext;
+ qos_geo_t *geo = NULL;
+ qos_geo_t *g = NULL;
+ qos_geo_t *last = NULL;
+ int lines = 0;
+ char line[HUGE_STRING_LEN];
+ char buf[HUGE_STRING_LEN];
+ FILE *file;
+ const qos_inj_t *inj = m_inj;
+ *size = 0;
+ if(regcomp(&preg, QS_GEO_PATTERN, REG_EXTENDED)) {
+ // internal error
+ *msg = apr_pstrdup(pool, "failed to compile regular expression "QS_GEO_PATTERN);
+ (*errors)++;
+ return NULL;
+ }
+ if(regcomp(&pregd, QS_GEO_PATTERN_D, REG_EXTENDED)) {
+ // internal error
+ *msg = apr_pstrdup(pool, "failed to compile regular expression "QS_GEO_PATTERN_D);
+ (*errors)++;
+ return NULL;
+ }
+ if(regcomp(&pregext, QS_GEO_PATTERN_EXT, REG_EXTENDED)) {
+ // internal error
+ *msg = apr_pstrdup(pool, "failed to compile regular expression "QS_GEO_PATTERN_EXT);
+ (*errors)++;
+ return NULL;
+ }
+ file = fopen(db, "r");
+ if(!file) {
+ (*errors)++;
+ return NULL;
+ }
+ while(fgets(line, sizeof(line), file) != NULL) {
+ if(strlen(line) > 0) {
+ if(regexec(&preg, line, 0, NULL, 0) == 0) {
+ lines++;
+ } else {
+ *msg = apr_psprintf(pool, "invalid entry in database: '%s'", line);
+ (*errors)++;
+ if(m_verbose) {
+ char *p = *msg;
+ while(p[0]) {
+ if(p[0] < 32) {
+ p[0] = '.';
+ }
+ p++;
+ }
+ fprintf(stderr, "line %d: %s\n", lines, *msg);
+ }
+ }
+ }
+ }
+ *size = lines;
+ geo = apr_pcalloc(pool, sizeof(qos_geo_t) * lines);
+ g = geo;
+ fseek(file, 0, SEEK_SET);
+ lines = 0;
+ while(fgets(line, sizeof(line), file) != NULL) {
+ lines++;
+ if(strlen(line) > 0) {
+ int plus = 0;
+ if(m_inject) {
+ strcpy(buf, line);
+ }
+ if(regexec(&pregd, line, MAX_REG_MATCH, ma, 0) == 0) {
+ plus = 1;
+ }
+ if(plus || regexec(&preg, line, MAX_REG_MATCH, ma, 0) == 0) {
+ int missingAddr = 0;
+ if(regexec(&pregext, line, 0, NULL, 0) != 0) {
+ missingAddr = 1;
+ }
+ line[ma[1].rm_eo] = '\0';
+ line[ma[2].rm_eo] = '\0';
+ line[ma[3].rm_eo] = '\0';
+ g->start = atoll(&line[ma[1].rm_so]);
+ g->end = atoll(&line[ma[2].rm_so]);
+ g->c[0] = '\0';
+ if(m_inject) {
+ if(inj->start && (g->start > inj->start)) {
+ while(inj->start && (g->start > inj->start)) {
+ printf("%s\n", inj->c);
+ inj++;
+ }
+ } else if(g->start != inj->start) {
+ if(missingAddr) {
+ /* some databases do not include IP address
+ representation (but number only) */
+ char bs[128];
+ char be[128];
+ qos_geo_long2str(bs, g->start);
+ qos_geo_long2str(be, g->end);
+ printf("\"%s\",\"%s\",%s", bs, be, buf);
+ }
+ }
+ if(!missingAddr) {
+ printf("%s", buf);
+ }
+ }
+ strncpy(g->country, &line[ma[3].rm_so], 2);
+ if(last) {
+ if(g->start < last->start) {
+ *msg = apr_psprintf(pool, "wrong order/lines not sorted (line %d)", lines);
+ (*errors)++;
+ if(m_verbose) {
+ fprintf(stderr, "line %d: wrong order/lines not sorted\n", lines);
+ }
+ }
+ }
+ if(plus) {
+ line[ma[4].rm_eo] = '\0';
+ strncpy(g->c, &line[ma[4].rm_so], 500);
+ }
+ last = g;
+ g++;
+ }
+ }
+ }
+ fclose(file);
+ return geo;
+}
+
+int main(int argc, const char * const argv[]) {
+ int errors = 0;
+ int rc;
+ int stat = 0;
+ const char *ip = NULL;
+ char *msg = NULL;
+ qos_geo_t *geo;
+ int size;
+ const char *db = NULL;
+ apr_table_t *entries;
+ apr_pool_t *pool;
+ const char *cmd = strrchr(argv[0], '/');
+ apr_app_initialize(&argc, &argv, NULL);
+ apr_pool_create(&pool, NULL);
+ entries = apr_table_make(pool, 100);
+
+ if(cmd == NULL) {
+ cmd = (char *)argv[0];
+ } else {
+ cmd++;
+ }
+
+ argc--;
+ argv++;
+ while(argc >= 1) {
+ if(strcmp(*argv, "-d") == 0) {
+ if (--argc >= 1) {
+ db = *(++argv);
+ }
+ } else if(strcmp(*argv, "-ip") == 0) {
+ if (--argc >= 1) {
+ ip = *(++argv);
+ }
+ } else if(strcmp(*argv, "-s") == 0) {
+ stat = 1;
+ } else if(strcmp(*argv, "-l") == 0) {
+ m_inject = 1;
+ } else if(strcmp(*argv, "-v") == 0) {
+ m_verbose = 1;
+ } else if(strcmp(*argv,"-h") == 0) {
+ usage(cmd, 0);
+ } else if(strcmp(*argv,"--help") == 0) {
+ usage(cmd, 0);
+ } else if(strcmp(*argv,"-?") == 0) {
+ usage(cmd, 0);
+ } else if(strcmp(*argv,"--man") == 0) {
+ usage(cmd, 1);
+ } else {
+ usage(cmd, 0);
+ }
+ argc--;
+ argv++;
+ }
+
+ if(db == NULL) {
+ usage(cmd, 0);
+ }
+
+ rc = nice(10);
+ if(rc == -1) {
+ fprintf(stderr, "ERROR, failed to change nice value: %s\n", strerror(errno));
+ }
+
+ geo = qos_loadgeo(pool, db, &size, &msg, &errors);
+ if(geo == NULL || msg != NULL) {
+ if(msg) {
+ char *p = msg;
+ while(p[0]) {
+ if(p[0] < 32) {
+ p[0] = '.';
+ }
+ p++;
+ }
+ }
+ fprintf(stderr, "failed to load database: %s (total %d errors)\n",
+ msg ? msg : "-", errors);
+ exit(1);
+ }
+ if(m_inject) {
+ exit(0);
+ }
+
+ if(ip) {
+ qos_geo_t *pB;
+ unsigned long search = qos_geo_str2long(pool, ip);
+ printf("search %lu: ", search);
+ pB = bsearch(&search,
+ geo,
+ size,
+ sizeof(qos_geo_t),
+ qos_geo_comp);
+ if(pB) {
+ printf("%s\n", pB->country);
+ } else {
+ printf("n/a\n");
+ }
+ return 0;
+ }
+
+ // start reading from stdin
+ {
+ char prev;
+ qos_geo_t *pB;
+ apr_pool_t *tmp;
+ char *line = calloc(1, MAX_LINE_BUFFER+1);
+ regex_t preg;
+ regex_t preg2;
+ regmatch_t ma[MAX_REG_MATCH];
+ apr_pool_create(&tmp, NULL);
+ if(regcomp(&preg, IPPATTERN, REG_EXTENDED)) {
+ exit(1);
+ }
+ regcomp(&preg2, IPPATTERN2, REG_EXTENDED);
+ while(fgets(line, MAX_LINE_BUFFER, stdin) != NULL) {
+ int match = regexec(&preg, line, MAX_REG_MATCH, ma, 0);
+ if(match != 0) {
+ char *dx = strchr(line, ';');
+ if(dx && ((dx - line) <= 15)) {
+ // file starts probably with <ip>; => a qslog -pc file?
+ match = regexec(&preg2, line, MAX_REG_MATCH, ma, 0);
+ }
+ }
+ if(match == 0) {
+ unsigned long search;
+ prev = line[ma[1].rm_eo];
+ line[ma[1].rm_eo] = '\0';
+ search = qos_geo_str2long(tmp, &line[ma[1].rm_so]);
+ apr_pool_clear(tmp);
+ pB = bsearch(&search,
+ geo,
+ size,
+ sizeof(qos_geo_t),
+ qos_geo_comp);
+ if(stat) {
+ /* creates a single statistic entry for each country (used to collect
+ requests per source country) */
+ if(pB) {
+ qos_geo_stat_t *s = (qos_geo_stat_t *)apr_table_get(entries, pB->country);
+ if(s == NULL) {
+ s = apr_pcalloc(pool, sizeof(qos_geo_stat_t));
+ s->num = 0;
+ s->c = pB->c;
+ apr_table_addn(entries, apr_pstrdup(pool, pB->country), (char *)s);
+ }
+ s->num++;
+ }
+ } else {
+ /* modifies each log line inserting the country code
+ */
+ char cr = prev;
+ char delw[2];
+ char delx[2];
+ delw[1] = '\0';
+ delw[0] = ' ';
+ delx[1] = '\0';
+ delx[0] = ' ';
+ if(line[ma[1].rm_eo+1] == ' ') {
+ delx[0] = '\0';
+ }
+ if(line[ma[1].rm_eo+1] == ';') {
+ delx[0] = ';';
+ }
+ if(prev <= CR) {
+ prev = ' ';
+ }
+ if(prev == ' ') {
+ delw[0] = '\0';
+ }
+ if(prev == ';') {
+ delw[0] = '\0';
+ delx[0] = ';';
+ }
+ if(pB) {
+ printf("%s%c%s%s%s%s", line, prev,
+ delw,
+ pB->country,
+ delx,
+ &line[ma[1].rm_eo+1]);
+ } else {
+ printf("%s%c%s--%s%s", line, prev,
+ delw,
+ delx,
+ &line[ma[1].rm_eo+1]);
+ }
+ if(cr <= CR) {
+ printf("\n");
+ }
+ }
+ } else {
+ printf("%s", line);
+ }
+ fflush(stdout);
+ }
+ if(stat) {
+ int i;
+ apr_table_entry_t *entry = (apr_table_entry_t *)apr_table_elts(entries)->elts;
+ for(i = 0; i < apr_table_elts(entries)->nelts; i++) {
+ qos_geo_stat_t *s = (qos_geo_stat_t *)entry[i].val;
+ printf("%7.d %s %s\n", s->num, entry[i].key, s->c ? s->c : "");
+ }
+ }
+ }
+ return 0;
+}