summaryrefslogtreecommitdiffstats
path: root/debian/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 12:31:56 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 12:31:56 +0000
commit0f8bda193d0dc185420c6196d20fa1d63e8f090f (patch)
tree0252e34cbe37904122165c69df9793761a502985 /debian/src
parentAdding upstream version 1.6.12. (diff)
downloadgeoip-debian.tar.xz
geoip-debian.zip
Adding debian version 1.6.12-10.debian/1.6.12-10debian
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'debian/src')
-rw-r--r--debian/src/countryInfo.txt303
-rw-r--r--debian/src/geoip-asn-csv-to-dat.cpp897
-rw-r--r--debian/src/geoip-csv-to-dat.cpp1507
-rwxr-xr-xdebian/src/geolite2-to-legacy-csv.sh99
-rwxr-xr-xdebian/src/v4-to-v6-layout.pl31
5 files changed, 2837 insertions, 0 deletions
diff --git a/debian/src/countryInfo.txt b/debian/src/countryInfo.txt
new file mode 100644
index 0000000..30db029
--- /dev/null
+++ b/debian/src/countryInfo.txt
@@ -0,0 +1,303 @@
+# GeoNames.org Country Information
+# ================================
+#
+#
+# CountryCodes:
+# ============
+#
+# The official ISO country code for the United Kingdom is 'GB'. The code 'UK' is reserved.
+#
+# A list of dependent countries is available here:
+# https://spreadsheets.google.com/ccc?key=pJpyPy-J5JSNhe7F_KxwiCA&hl=en
+#
+#
+# The countrycode XK temporarily stands for Kosvo:
+# http://geonames.wordpress.com/2010/03/08/xk-country-code-for-kosovo/
+#
+#
+# CS (Serbia and Montenegro) with geonameId = 8505033 no longer exists.
+# AN (the Netherlands Antilles) with geonameId = 8505032 was dissolved on 10 October 2010.
+#
+#
+# Currencies :
+# ============
+#
+# A number of territories are not included in ISO 4217, because their currencies are not per se an independent currency,
+# but a variant of another currency. These currencies are:
+#
+# 1. FO : Faroese krona (1:1 pegged to the Danish krone)
+# 2. GG : Guernsey pound (1:1 pegged to the pound sterling)
+# 3. JE : Jersey pound (1:1 pegged to the pound sterling)
+# 4. IM : Isle of Man pound (1:1 pegged to the pound sterling)
+# 5. TV : Tuvaluan dollar (1:1 pegged to the Australian dollar).
+# 6. CK : Cook Islands dollar (1:1 pegged to the New Zealand dollar).
+#
+# The following non-ISO codes are, however, sometimes used: GGP for the Guernsey pound,
+# JEP for the Jersey pound and IMP for the Isle of Man pound (http://en.wikipedia.org/wiki/ISO_4217)
+#
+#
+# A list of currency symbols is available here : http://forum.geonames.org/gforum/posts/list/437.page
+# another list with fractional units is here: http://forum.geonames.org/gforum/posts/list/1961.page
+#
+#
+# Languages :
+# ===========
+#
+# The column 'languages' lists the languages spoken in a country ordered by the number of speakers. The language code is a 'locale'
+# where any two-letter primary-tag is an ISO-639 language abbreviation and any two-letter initial subtag is an ISO-3166 country code.
+#
+# Example : es-AR is the Spanish variant spoken in Argentina.
+#
+#ISO ISO3 ISO-Numeric fips Country Capital Area(in sq km) Population Continent tld CurrencyCode CurrencyName Phone Postal Code Format Postal Code Regex Languages geonameid neighbours EquivalentFipsCode
+AD AND 020 AN Andorra Andorra la Vella 468 84000 EU .ad EUR Euro 376 AD### ^(?:AD)*(\d{3})$ ca 3041565 ES,FR
+AE ARE 784 AE United Arab Emirates Abu Dhabi 82880 4975593 AS .ae AED Dirham 971 ar-AE,fa,en,hi,ur 290557 SA,OM
+AF AFG 004 AF Afghanistan Kabul 647500 29121286 AS .af AFN Afghani 93 fa-AF,ps,uz-AF,tk 1149361 TM,CN,IR,TJ,PK,UZ
+AG ATG 028 AC Antigua and Barbuda St. John's 443 86754 NA .ag XCD Dollar +1-268 en-AG 3576396
+AI AIA 660 AV Anguilla The Valley 102 13254 NA .ai XCD Dollar +1-264 en-AI 3573511
+AL ALB 008 AL Albania Tirana 28748 2986952 EU .al ALL Lek 355 #### ^(\d{4})$ sq,el 783754 MK,GR,ME,RS,XK
+AM ARM 051 AM Armenia Yerevan 29800 2968000 AS .am AMD Dram 374 ###### ^(\d{6})$ hy 174982 GE,IR,AZ,TR
+AO AGO 024 AO Angola Luanda 1246700 13068161 AF .ao AOA Kwanza 244 pt-AO 3351879 CD,NA,ZM,CG
+AQ ATA 010 AY Antarctica 14000000 0 AN .aq 6697173
+AR ARG 032 AR Argentina Buenos Aires 2766890 41343201 SA .ar ARS Peso 54 @####@@@ ^[A-Z]?\d{4}[A-Z]{0,3}$ es-AR,en,it,de,fr,gn 3865483 CL,BO,UY,PY,BR
+AS ASM 016 AQ American Samoa Pago Pago 199 57881 OC .as USD Dollar +1-684 #####-#### 96799 en-AS,sm,to 5880801
+AT AUT 040 AU Austria Vienna 83858 8205000 EU .at EUR Euro 43 #### ^(\d{4})$ de-AT,hr,hu,sl 2782113 CH,DE,HU,SK,CZ,IT,SI,LI
+AU AUS 036 AS Australia Canberra 7686850 21515754 OC .au AUD Dollar 61 #### ^(\d{4})$ en-AU 2077456
+AW ABW 533 AA Aruba Oranjestad 193 71566 NA .aw AWG Guilder 297 nl-AW,pap,es,en 3577279
+AX ALA 248 Aland Islands Mariehamn 1580 26711 EU .ax EUR Euro +358-18 ##### ^(?:FI)*(\d{5})$ sv-AX 661882 FI
+AZ AZE 031 AJ Azerbaijan Baku 86600 8303512 AS .az AZN Manat 994 AZ #### ^(?:AZ)*(\d{4})$ az,ru,hy 587116 GE,IR,AM,TR,RU
+BA BIH 070 BK Bosnia and Herzegovina Sarajevo 51129 4590000 EU .ba BAM Marka 387 ##### ^(\d{5})$ bs,hr-BA,sr-BA 3277605 HR,ME,RS
+BB BRB 052 BB Barbados Bridgetown 431 285653 NA .bb BBD Dollar +1-246 BB##### ^(?:BB)*(\d{5})$ en-BB 3374084
+BD BGD 050 BG Bangladesh Dhaka 144000 156118464 AS .bd BDT Taka 880 #### ^(\d{4})$ bn-BD,en 1210997 MM,IN
+BE BEL 056 BE Belgium Brussels 30510 10403000 EU .be EUR Euro 32 #### ^(\d{4})$ nl-BE,fr-BE,de-BE 2802361 DE,NL,LU,FR
+BF BFA 854 UV Burkina Faso Ouagadougou 274200 16241811 AF .bf XOF Franc 226 fr-BF,mos 2361809 NE,BJ,GH,CI,TG,ML
+BG BGR 100 BU Bulgaria Sofia 110910 7148785 EU .bg BGN Lev 359 #### ^(\d{4})$ bg,tr-BG,rom 732800 MK,GR,RO,TR,RS
+BH BHR 048 BA Bahrain Manama 665 738004 AS .bh BHD Dinar 973 ####|### ^(\d{3}\d?)$ ar-BH,en,fa,ur 290291
+BI BDI 108 BY Burundi Bujumbura 27830 9863117 AF .bi BIF Franc 257 fr-BI,rn 433561 TZ,CD,RW
+BJ BEN 204 BN Benin Porto-Novo 112620 9056010 AF .bj XOF Franc 229 fr-BJ 2395170 NE,TG,BF,NG
+BL BLM 652 TB Saint Barthelemy Gustavia 21 8450 NA .gp EUR Euro 590 ##### ^(\d{5})$ fr 3578476
+BM BMU 060 BD Bermuda Hamilton 53 65365 NA .bm BMD Dollar +1-441 @@ ## ^([A-Z]{2}\d{2})$ en-BM,pt 3573345
+BN BRN 096 BX Brunei Bandar Seri Begawan 5770 395027 AS .bn BND Dollar 673 @@#### ^([A-Z]{2}\d{4})$ ms-BN,en-BN 1820814 MY
+BO BOL 068 BL Bolivia Sucre 1098580 9947418 SA .bo BOB Boliviano 591 es-BO,qu,ay 3923057 PE,CL,PY,BR,AR
+BQ BES 535 Bonaire, Saint Eustatius and Saba 328 18012 NA .bq USD Dollar 599 nl,pap,en 7626844
+BR BRA 076 BR Brazil Brasilia 8511965 201103330 SA .br BRL Real 55 #####-### ^\d{5}-\d{3}$ pt-BR,es,en,fr 3469034 SR,PE,BO,UY,GY,PY,GF,VE,CO,AR
+BS BHS 044 BF Bahamas Nassau 13940 301790 NA .bs BSD Dollar +1-242 en-BS 3572887
+BT BTN 064 BT Bhutan Thimphu 47000 699847 AS .bt BTN Ngultrum 975 dz 1252634 CN,IN
+BV BVT 074 BV Bouvet Island 49 0 AN .bv NOK Krone 3371123
+BW BWA 072 BC Botswana Gaborone 600370 2029307 AF .bw BWP Pula 267 en-BW,tn-BW 933860 ZW,ZA,NA
+BY BLR 112 BO Belarus Minsk 207600 9685000 EU .by BYN Belarusian ruble 375 ###### ^(\d{6})$ be,ru 630336 PL,LT,UA,RU,LV
+BZ BLZ 084 BH Belize Belmopan 22966 314522 NA .bz BZD Dollar 501 en-BZ,es 3582678 GT,MX
+CA CAN 124 CA Canada Ottawa 9984670 33679000 NA .ca CAD Dollar 1 @#@ #@# ^([ABCEGHJKLMNPRSTVXY]\d[ABCEGHJKLMNPRSTVWXYZ]) ?(\d[ABCEGHJKLMNPRSTVWXYZ]\d)$ en-CA,fr-CA,iu 6251999 US
+CC CCK 166 CK Cocos Islands West Island 14 628 AS .cc AUD Dollar 61 ms-CC,en 1547376
+CD COD 180 CG Democratic Republic of the Congo Kinshasa 2345410 70916439 AF .cd CDF Franc 243 fr-CD,ln,ktu,kg,sw,lua 203312 TZ,CF,SS,RW,ZM,BI,UG,CG,AO
+CF CAF 140 CT Central African Republic Bangui 622984 4844927 AF .cf XAF Franc 236 fr-CF,sg,ln,kg 239880 TD,SD,CD,SS,CM,CG
+CG COG 178 CF Republic of the Congo Brazzaville 342000 3039126 AF .cg XAF Franc 242 fr-CG,kg,ln-CG 2260494 CF,GA,CD,CM,AO
+CH CHE 756 SZ Switzerland Bern 41290 8484100 EU .ch CHF Franc 41 #### ^(\d{4})$ de-CH,fr-CH,it-CH,rm 2658434 DE,IT,LI,FR,AT
+CI CIV 384 IV Ivory Coast Yamoussoukro 322460 21058798 AF .ci XOF Franc 225 fr-CI 2287781 LR,GH,GN,BF,ML
+CK COK 184 CW Cook Islands Avarua 240 21388 OC .ck NZD Dollar 682 en-CK,mi 1899402
+CL CHL 152 CI Chile Santiago 756950 16746491 SA .cl CLP Peso 56 ####### ^(\d{7})$ es-CL 3895114 PE,BO,AR
+CM CMR 120 CM Cameroon Yaounde 475440 19294149 AF .cm XAF Franc 237 en-CM,fr-CM 2233387 TD,CF,GA,GQ,CG,NG
+CN CHN 156 CH China Beijing 9596960 1330044000 AS .cn CNY Yuan Renminbi 86 ###### ^(\d{6})$ zh-CN,yue,wuu,dta,ug,za 1814991 LA,BT,TJ,KZ,MN,AF,NP,MM,KG,PK,KP,RU,VN,IN
+CO COL 170 CO Colombia Bogota 1138910 47790000 SA .co COP Peso 57 ###### ^(\d{6})$ es-CO 3686110 EC,PE,PA,BR,VE
+CR CRI 188 CS Costa Rica San Jose 51100 4516220 NA .cr CRC Colon 506 ##### ^(\d{5})$ es-CR,en 3624060 PA,NI
+CU CUB 192 CU Cuba Havana 110860 11423000 NA .cu CUP Peso 53 CP ##### ^(?:CP)*(\d{5})$ es-CU,pap 3562981 US
+CV CPV 132 CV Cabo Verde Praia 4033 508659 AF .cv CVE Escudo 238 #### ^(\d{4})$ pt-CV 3374766
+CW CUW 531 UC Curacao Willemstad 444 141766 NA .cw ANG Guilder 599 nl,pap 7626836
+CX CXR 162 KT Christmas Island Flying Fish Cove 135 1500 OC .cx AUD Dollar 61 #### ^(\d{4})$ en,zh,ms-CC 2078138
+CY CYP 196 CY Cyprus Nicosia 9250 1102677 EU .cy EUR Euro 357 #### ^(\d{4})$ el-CY,tr-CY,en 146669
+CZ CZE 203 EZ Czechia Prague 78866 10476000 EU .cz CZK Koruna 420 ### ## ^\d{3}\s?\d{2}$ cs,sk 3077311 PL,DE,SK,AT
+DE DEU 276 GM Germany Berlin 357021 81802257 EU .de EUR Euro 49 ##### ^(\d{5})$ de 2921044 CH,PL,NL,DK,BE,CZ,LU,FR,AT
+DJ DJI 262 DJ Djibouti Djibouti 23000 740528 AF .dj DJF Franc 253 fr-DJ,ar,so-DJ,aa 223816 ER,ET,SO
+DK DNK 208 DA Denmark Copenhagen 43094 5484000 EU .dk DKK Krone 45 #### ^(\d{4})$ da-DK,en,fo,de-DK 2623032 DE
+DM DMA 212 DO Dominica Roseau 754 72813 NA .dm XCD Dollar +1-767 en-DM 3575830
+DO DOM 214 DR Dominican Republic Santo Domingo 48730 9823821 NA .do DOP Peso +1-809 and 1-829 ##### ^(\d{5})$ es-DO 3508796 HT
+DZ DZA 012 AG Algeria Algiers 2381740 34586184 AF .dz DZD Dinar 213 ##### ^(\d{5})$ ar-DZ 2589581 NE,EH,LY,MR,TN,MA,ML
+EC ECU 218 EC Ecuador Quito 283560 14790608 SA .ec USD Dollar 593 @####@ ^([a-zA-Z]\d{4}[a-zA-Z])$ es-EC 3658394 PE,CO
+EE EST 233 EN Estonia Tallinn 45226 1291170 EU .ee EUR Euro 372 ##### ^(\d{5})$ et,ru 453733 RU,LV
+EG EGY 818 EG Egypt Cairo 1001450 80471869 AF .eg EGP Pound 20 ##### ^(\d{5})$ ar-EG,en,fr 357994 LY,SD,IL,PS
+EH ESH 732 WI Western Sahara El-Aaiun 266000 273008 AF .eh MAD Dirham 212 ar,mey 2461445 DZ,MR,MA
+ER ERI 232 ER Eritrea Asmara 121320 5792984 AF .er ERN Nakfa 291 aa-ER,ar,tig,kun,ti-ER 338010 ET,SD,DJ
+ES ESP 724 SP Spain Madrid 504782 46505963 EU .es EUR Euro 34 ##### ^(\d{5})$ es-ES,ca,gl,eu,oc 2510769 AD,PT,GI,FR,MA
+ET ETH 231 ET Ethiopia Addis Ababa 1127127 88013491 AF .et ETB Birr 251 #### ^(\d{4})$ am,en-ET,om-ET,ti-ET,so-ET,sid 337996 ER,KE,SD,SS,SO,DJ
+FI FIN 246 FI Finland Helsinki 337030 5244000 EU .fi EUR Euro 358 ##### ^(?:FI)*(\d{5})$ fi-FI,sv-FI,smn 660013 NO,RU,SE
+FJ FJI 242 FJ Fiji Suva 18270 875983 OC .fj FJD Dollar 679 en-FJ,fj 2205218
+FK FLK 238 FK Falkland Islands Stanley 12173 2638 SA .fk FKP Pound 500 en-FK 3474414
+FM FSM 583 FM Micronesia Palikir 702 107708 OC .fm USD Dollar 691 ##### ^(\d{5})$ en-FM,chk,pon,yap,kos,uli,woe,nkr,kpg 2081918
+FO FRO 234 FO Faroe Islands Torshavn 1399 48228 EU .fo DKK Krone 298 ### ^(?:FO)*(\d{3})$ fo,da-FO 2622320
+FR FRA 250 FR France Paris 547030 64768389 EU .fr EUR Euro 33 ##### ^(\d{5})$ fr-FR,frp,br,co,ca,eu,oc 3017382 CH,DE,BE,LU,IT,AD,MC,ES
+GA GAB 266 GB Gabon Libreville 267667 1545255 AF .ga XAF Franc 241 fr-GA 2400553 CM,GQ,CG
+GB GBR 826 UK United Kingdom London 244820 62348447 EU .uk GBP Pound 44 @# #@@|@## #@@|@@# #@@|@@## #@@|@#@ #@@|@@#@ #@@|GIR0AA ^([Gg][Ii][Rr] 0[Aa]{2})|((([A-Za-z][0-9]{1,2})|(([A-Za-z][A-Ha-hJ-Yj-y][0-9]{1,2})|(([A-Za-z][0-9][A-Za-z])|([A-Za-z][A-Ha-hJ-Yj-y][0-9]?[A-Za-z])))) [0-9][A-Za-z]{2})$ en-GB,cy-GB,gd 2635167 IE
+GD GRD 308 GJ Grenada St. George's 344 107818 NA .gd XCD Dollar +1-473 en-GD 3580239
+GE GEO 268 GG Georgia Tbilisi 69700 4630000 AS .ge GEL Lari 995 #### ^(\d{4})$ ka,ru,hy,az 614540 AM,AZ,TR,RU
+GF GUF 254 FG French Guiana Cayenne 91000 195506 SA .gf EUR Euro 594 ##### ^((97|98)3\d{2})$ fr-GF 3381670 SR,BR
+GG GGY 831 GK Guernsey St Peter Port 78 65228 EU .gg GBP Pound +44-1481 @# #@@|@## #@@|@@# #@@|@@## #@@|@#@ #@@|@@#@ #@@|GIR0AA ^((?:(?:[A-PR-UWYZ][A-HK-Y]\d[ABEHMNPRV-Y0-9]|[A-PR-UWYZ]\d[A-HJKPS-UW0-9])\s\d[ABD-HJLNP-UW-Z]{2})|GIR\s?0AA)$ en,nrf 3042362
+GH GHA 288 GH Ghana Accra 239460 24339838 AF .gh GHS Cedi 233 en-GH,ak,ee,tw 2300660 CI,TG,BF
+GI GIB 292 GI Gibraltar Gibraltar 6.5 27884 EU .gi GIP Pound 350 en-GI,es,it,pt 2411586 ES
+GL GRL 304 GL Greenland Nuuk 2166086 56375 NA .gl DKK Krone 299 #### ^(\d{4})$ kl,da-GL,en 3425505
+GM GMB 270 GA Gambia Banjul 11300 1593256 AF .gm GMD Dalasi 220 en-GM,mnk,wof,wo,ff 2413451 SN
+GN GIN 324 GV Guinea Conakry 245857 10324025 AF .gn GNF Franc 224 fr-GN 2420477 LR,SN,SL,CI,GW,ML
+GP GLP 312 GP Guadeloupe Basse-Terre 1780 443000 NA .gp EUR Euro 590 ##### ^((97|98)\d{3})$ fr-GP 3579143
+GQ GNQ 226 EK Equatorial Guinea Malabo 28051 1014999 AF .gq XAF Franc 240 es-GQ,fr 2309096 GA,CM
+GR GRC 300 GR Greece Athens 131940 11000000 EU .gr EUR Euro 30 ### ## ^(\d{5})$ el-GR,en,fr 390903 AL,MK,TR,BG
+GS SGS 239 SX South Georgia and the South Sandwich Islands Grytviken 3903 30 AN .gs GBP Pound en 3474415
+GT GTM 320 GT Guatemala Guatemala City 108890 13550440 NA .gt GTQ Quetzal 502 ##### ^(\d{5})$ es-GT 3595528 MX,HN,BZ,SV
+GU GUM 316 GQ Guam Hagatna 549 159358 OC .gu USD Dollar +1-671 969## ^(969\d{2})$ en-GU,ch-GU 4043988
+GW GNB 624 PU Guinea-Bissau Bissau 36120 1565126 AF .gw XOF Franc 245 #### ^(\d{4})$ pt-GW,pov 2372248 SN,GN
+GY GUY 328 GY Guyana Georgetown 214970 748486 SA .gy GYD Dollar 592 en-GY 3378535 SR,BR,VE
+HK HKG 344 HK Hong Kong Hong Kong 1092 6898686 AS .hk HKD Dollar 852 zh-HK,yue,zh,en 1819730
+HM HMD 334 HM Heard Island and McDonald Islands 412 0 AN .hm AUD Dollar 1547314
+HN HND 340 HO Honduras Tegucigalpa 112090 7989415 NA .hn HNL Lempira 504 @@#### ^([A-Z]{2}\d{4})$ es-HN,cab,miq 3608932 GT,NI,SV
+HR HRV 191 HR Croatia Zagreb 56542 4284889 EU .hr HRK Kuna 385 ##### ^(?:HR)*(\d{5})$ hr-HR,sr 3202326 HU,SI,BA,ME,RS
+HT HTI 332 HA Haiti Port-au-Prince 27750 9648924 NA .ht HTG Gourde 509 HT#### ^(?:HT)*(\d{4})$ ht,fr-HT 3723988 DO
+HU HUN 348 HU Hungary Budapest 93030 9982000 EU .hu HUF Forint 36 #### ^(\d{4})$ hu-HU 719819 SK,SI,RO,UA,HR,AT,RS
+ID IDN 360 ID Indonesia Jakarta 1919440 242968342 AS .id IDR Rupiah 62 ##### ^(\d{5})$ id,en,nl,jv 1643084 PG,TL,MY
+IE IRL 372 EI Ireland Dublin 70280 4622917 EU .ie EUR Euro 353 @@@ @@@@ ^[A-Z]\d{2}$|^[A-Z]{3}[A-Z]{4}$ en-IE,ga-IE 2963597 GB
+IL ISR 376 IS Israel Jerusalem 20770 7353985 AS .il ILS Shekel 972 ####### ^(\d{7}|\d{5})$ he,ar-IL,en-IL, 294640 SY,JO,LB,EG,PS
+IM IMN 833 IM Isle of Man Douglas 572 75049 EU .im GBP Pound +44-1624 @# #@@|@## #@@|@@# #@@|@@## #@@|@#@ #@@|@@#@ #@@|GIR0AA ^((?:(?:[A-PR-UWYZ][A-HK-Y]\d[ABEHMNPRV-Y0-9]|[A-PR-UWYZ]\d[A-HJKPS-UW0-9])\s\d[ABD-HJLNP-UW-Z]{2})|GIR\s?0AA)$ en,gv 3042225
+IN IND 356 IN India New Delhi 3287590 1173108018 AS .in INR Rupee 91 ###### ^(\d{6})$ en-IN,hi,bn,te,mr,ta,ur,gu,kn,ml,or,pa,as,bh,sat,ks,ne,sd,kok,doi,mni,sit,sa,fr,lus,inc 1269750 CN,NP,MM,BT,PK,BD
+IO IOT 086 IO British Indian Ocean Territory Diego Garcia 60 4000 AS .io USD Dollar 246 en-IO 1282588
+IQ IRQ 368 IZ Iraq Baghdad 437072 29671605 AS .iq IQD Dinar 964 ##### ^(\d{5})$ ar-IQ,ku,hy 99237 SY,SA,IR,JO,TR,KW
+IR IRN 364 IR Iran Tehran 1648000 76923300 AS .ir IRR Rial 98 ########## ^(\d{10})$ fa-IR,ku 130758 TM,AF,IQ,AM,PK,AZ,TR
+IS ISL 352 IC Iceland Reykjavik 103000 308910 EU .is ISK Krona 354 ### ^(\d{3})$ is,en,de,da,sv,no 2629691
+IT ITA 380 IT Italy Rome 301230 60340328 EU .it EUR Euro 39 ##### ^(\d{5})$ it-IT,de-IT,fr-IT,sc,ca,co,sl 3175395 CH,VA,SI,SM,FR,AT
+JE JEY 832 JE Jersey Saint Helier 116 90812 EU .je GBP Pound +44-1534 @# #@@|@## #@@|@@# #@@|@@## #@@|@#@ #@@|@@#@ #@@|GIR0AA ^((?:(?:[A-PR-UWYZ][A-HK-Y]\d[ABEHMNPRV-Y0-9]|[A-PR-UWYZ]\d[A-HJKPS-UW0-9])\s\d[ABD-HJLNP-UW-Z]{2})|GIR\s?0AA)$ en,fr,nrf 3042142
+JM JAM 388 JM Jamaica Kingston 10991 2847232 NA .jm JMD Dollar +1-876 en-JM 3489940
+JO JOR 400 JO Jordan Amman 92300 6407085 AS .jo JOD Dinar 962 ##### ^(\d{5})$ ar-JO,en 248816 SY,SA,IQ,IL,PS
+JP JPN 392 JA Japan Tokyo 377835 127288000 AS .jp JPY Yen 81 ###-#### ^\d{3}-\d{4}$ ja 1861060
+KE KEN 404 KE Kenya Nairobi 582650 40046566 AF .ke KES Shilling 254 ##### ^(\d{5})$ en-KE,sw-KE 192950 ET,TZ,SS,SO,UG
+KG KGZ 417 KG Kyrgyzstan Bishkek 198500 5776500 AS .kg KGS Som 996 ###### ^(\d{6})$ ky,uz,ru 1527747 CN,TJ,UZ,KZ
+KH KHM 116 CB Cambodia Phnom Penh 181040 14453680 AS .kh KHR Riels 855 ##### ^(\d{5})$ km,fr,en 1831722 LA,TH,VN
+KI KIR 296 KR Kiribati Tarawa 811 92533 OC .ki AUD Dollar 686 en-KI,gil 4030945
+KM COM 174 CN Comoros Moroni 2170 773407 AF .km KMF Franc 269 ar,fr-KM 921929
+KN KNA 659 SC Saint Kitts and Nevis Basseterre 261 51134 NA .kn XCD Dollar +1-869 en-KN 3575174
+KP PRK 408 KN North Korea Pyongyang 120540 22912177 AS .kp KPW Won 850 ###-### ^(\d{6})$ ko-KP 1873107 CN,KR,RU
+KR KOR 410 KS South Korea Seoul 98480 48422644 AS .kr KRW Won 82 ##### ^(\d{5})$ ko-KR,en 1835841 KP
+XK XKX 0 KV Kosovo Pristina 10908 1800000 EU EUR Euro sq,sr 831053 RS,AL,MK,ME
+KW KWT 414 KU Kuwait Kuwait City 17820 2789132 AS .kw KWD Dinar 965 ##### ^(\d{5})$ ar-KW,en 285570 SA,IQ
+KY CYM 136 CJ Cayman Islands George Town 262 44270 NA .ky KYD Dollar +1-345 en-KY 3580718
+KZ KAZ 398 KZ Kazakhstan Nur-Sultan 2717300 15340000 AS .kz KZT Tenge 7 ###### ^(\d{6})$ kk,ru 1522867 TM,CN,KG,UZ,RU
+LA LAO 418 LA Laos Vientiane 236800 6368162 AS .la LAK Kip 856 ##### ^(\d{5})$ lo,fr,en 1655842 CN,MM,KH,TH,VN
+LB LBN 422 LE Lebanon Beirut 10400 4125247 AS .lb LBP Pound 961 #### ####|#### ^(\d{4}(\d{4})?)$ ar-LB,fr-LB,en,hy 272103 SY,IL
+LC LCA 662 ST Saint Lucia Castries 616 160922 NA .lc XCD Dollar +1-758 en-LC 3576468
+LI LIE 438 LS Liechtenstein Vaduz 160 35000 EU .li CHF Franc 423 #### ^(\d{4})$ de-LI 3042058 CH,AT
+LK LKA 144 CE Sri Lanka Colombo 65610 21513990 AS .lk LKR Rupee 94 ##### ^(\d{5})$ si,ta,en 1227603
+LR LBR 430 LI Liberia Monrovia 111370 3685076 AF .lr LRD Dollar 231 #### ^(\d{4})$ en-LR 2275384 SL,CI,GN
+LS LSO 426 LT Lesotho Maseru 30355 1919552 AF .ls LSL Loti 266 ### ^(\d{3})$ en-LS,st,zu,xh 932692 ZA
+LT LTU 440 LH Lithuania Vilnius 65200 2944459 EU .lt EUR Euro 370 LT-##### ^(?:LT)*(\d{5})$ lt,ru,pl 597427 PL,BY,RU,LV
+LU LUX 442 LU Luxembourg Luxembourg 2586 497538 EU .lu EUR Euro 352 L-#### ^(?:L-)?\d{4}$ lb,de-LU,fr-LU 2960313 DE,BE,FR
+LV LVA 428 LG Latvia Riga 64589 2217969 EU .lv EUR Euro 371 LV-#### ^(?:LV)*(\d{4})$ lv,ru,lt 458258 LT,EE,BY,RU
+LY LBY 434 LY Libya Tripoli 1759540 6461454 AF .ly LYD Dinar 218 ar-LY,it,en 2215636 TD,NE,DZ,SD,TN,EG
+MA MAR 504 MO Morocco Rabat 446550 33848242 AF .ma MAD Dirham 212 ##### ^(\d{5})$ ar-MA,ber,fr 2542007 DZ,EH,ES
+MC MCO 492 MN Monaco Monaco 1.95 32965 EU .mc EUR Euro 377 ##### ^(\d{5})$ fr-MC,en,it 2993457 FR
+MD MDA 498 MD Moldova Chisinau 33843 4324000 EU .md MDL Leu 373 MD-#### ^MD-\d{4}$ ro,ru,gag,tr 617790 RO,UA
+ME MNE 499 MJ Montenegro Podgorica 14026 666730 EU .me EUR Euro 382 ##### ^(\d{5})$ sr,hu,bs,sq,hr,rom 3194884 AL,HR,BA,RS,XK
+MF MAF 663 RN Saint Martin Marigot 53 35925 NA .gp EUR Euro 590 ##### ^(\d{5})$ fr 3578421 SX
+MG MDG 450 MA Madagascar Antananarivo 587040 21281844 AF .mg MGA Ariary 261 ### ^(\d{3})$ fr-MG,mg 1062947
+MH MHL 584 RM Marshall Islands Majuro 181.3 65859 OC .mh USD Dollar 692 #####-#### ^969\d{2}(-\d{4})$ mh,en-MH 2080185
+MK MKD 807 MK North Macedonia Skopje 25333 2062294 EU .mk MKD Denar 389 #### ^(\d{4})$ mk,sq,tr,rmm,sr 718075 AL,GR,BG,RS,XK
+ML MLI 466 ML Mali Bamako 1240000 13796354 AF .ml XOF Franc 223 fr-ML,bm 2453866 SN,NE,DZ,CI,GN,MR,BF
+MM MMR 104 BM Myanmar Nay Pyi Taw 678500 53414374 AS .mm MMK Kyat 95 ##### ^(\d{5})$ my 1327865 CN,LA,TH,BD,IN
+MN MNG 496 MG Mongolia Ulaanbaatar 1565000 3086918 AS .mn MNT Tugrik 976 ###### ^(\d{6})$ mn,ru 2029969 CN,RU
+MO MAC 446 MC Macao Macao 254 449198 AS .mo MOP Pataca 853 zh,zh-MO,pt 1821275
+MP MNP 580 CQ Northern Mariana Islands Saipan 477 53883 OC .mp USD Dollar +1-670 ##### ^9695\d{1}$ fil,tl,zh,ch-MP,en-MP 4041468
+MQ MTQ 474 MB Martinique Fort-de-France 1100 432900 NA .mq EUR Euro 596 ##### ^(\d{5})$ fr-MQ 3570311
+MR MRT 478 MR Mauritania Nouakchott 1030700 3205060 AF .mr MRO Ouguiya 222 ar-MR,fuc,snk,fr,mey,wo 2378080 SN,DZ,EH,ML
+MS MSR 500 MH Montserrat Plymouth 102 9341 NA .ms XCD Dollar +1-664 en-MS 3578097
+MT MLT 470 MT Malta Valletta 316 403000 EU .mt EUR Euro 356 @@@ #### ^[A-Z]{3}\s?\d{4}$ mt,en-MT 2562770
+MU MUS 480 MP Mauritius Port Louis 2040 1294104 AF .mu MUR Rupee 230 en-MU,bho,fr 934292
+MV MDV 462 MV Maldives Male 300 395650 AS .mv MVR Rufiyaa 960 ##### ^(\d{5})$ dv,en 1282028
+MW MWI 454 MI Malawi Lilongwe 118480 15447500 AF .mw MWK Kwacha 265 ny,yao,tum,swk 927384 TZ,MZ,ZM
+MX MEX 484 MX Mexico Mexico City 1972550 112468855 NA .mx MXN Peso 52 ##### ^(\d{5})$ es-MX 3996063 GT,US,BZ
+MY MYS 458 MY Malaysia Kuala Lumpur 329750 28274729 AS .my MYR Ringgit 60 ##### ^(\d{5})$ ms-MY,en,zh,ta,te,ml,pa,th 1733045 BN,TH,ID
+MZ MOZ 508 MZ Mozambique Maputo 801590 22061451 AF .mz MZN Metical 258 #### ^(\d{4})$ pt-MZ,vmw 1036973 ZW,TZ,SZ,ZA,ZM,MW
+NA NAM 516 WA Namibia Windhoek 825418 2128471 AF .na NAD Dollar 264 en-NA,af,de,hz,naq 3355338 ZA,BW,ZM,AO
+NC NCL 540 NC New Caledonia Noumea 19060 216494 OC .nc XPF Franc 687 ##### ^(\d{5})$ fr-NC 2139685
+NE NER 562 NG Niger Niamey 1267000 15878271 AF .ne XOF Franc 227 #### ^(\d{4})$ fr-NE,ha,kr,dje 2440476 TD,BJ,DZ,LY,BF,NG,ML
+NF NFK 574 NF Norfolk Island Kingston 34.6 1828 OC .nf AUD Dollar 672 #### ^(\d{4})$ en-NF 2155115
+NG NGA 566 NI Nigeria Abuja 923768 154000000 AF .ng NGN Naira 234 ###### ^(\d{6})$ en-NG,ha,yo,ig,ff 2328926 TD,NE,BJ,CM
+NI NIC 558 NU Nicaragua Managua 129494 5995928 NA .ni NIO Cordoba 505 ###-###-# ^(\d{7})$ es-NI,en 3617476 CR,HN
+NL NLD 528 NL Netherlands Amsterdam 41526 16645000 EU .nl EUR Euro 31 #### @@ ^(\d{4}[A-Z]{2})$ nl-NL,fy-NL 2750405 DE,BE
+NO NOR 578 NO Norway Oslo 324220 5009150 EU .no NOK Krone 47 #### ^(\d{4})$ no,nb,nn,se,fi 3144096 FI,RU,SE
+NP NPL 524 NP Nepal Kathmandu 140800 28951852 AS .np NPR Rupee 977 ##### ^(\d{5})$ ne,en 1282988 CN,IN
+NR NRU 520 NR Nauru Yaren 21 10065 OC .nr AUD Dollar 674 na,en-NR 2110425
+NU NIU 570 NE Niue Alofi 260 2166 OC .nu NZD Dollar 683 niu,en-NU 4036232
+NZ NZL 554 NZ New Zealand Wellington 268680 4252277 OC .nz NZD Dollar 64 #### ^(\d{4})$ en-NZ,mi 2186224
+OM OMN 512 MU Oman Muscat 212460 2967717 AS .om OMR Rial 968 ### ^(\d{3})$ ar-OM,en,bal,ur 286963 SA,YE,AE
+PA PAN 591 PM Panama Panama City 78200 3410676 NA .pa PAB Balboa 507 es-PA,en 3703430 CR,CO
+PE PER 604 PE Peru Lima 1285220 29907003 SA .pe PEN Sol 51 es-PE,qu,ay 3932488 EC,CL,BO,BR,CO
+PF PYF 258 FP French Polynesia Papeete 4167 270485 OC .pf XPF Franc 689 ##### ^((97|98)7\d{2})$ fr-PF,ty 4030656
+PG PNG 598 PP Papua New Guinea Port Moresby 462840 6064515 OC .pg PGK Kina 675 ### ^(\d{3})$ en-PG,ho,meu,tpi 2088628 ID
+PH PHL 608 RP Philippines Manila 300000 99900177 AS .ph PHP Peso 63 #### ^(\d{4})$ tl,en-PH,fil,ceb,tgl,ilo,hil,war,pam,bik,bcl,pag,mrw,tsg,mdh,cbk,krj,sgd,msb,akl,ibg,yka,mta,abx 1694008
+PK PAK 586 PK Pakistan Islamabad 803940 184404791 AS .pk PKR Rupee 92 ##### ^(\d{5})$ ur-PK,en-PK,pa,sd,ps,brh 1168579 CN,AF,IR,IN
+PL POL 616 PL Poland Warsaw 312685 38500000 EU .pl PLN Zloty 48 ##-### ^\d{2}-\d{3}$ pl 798544 DE,LT,SK,CZ,BY,UA,RU
+PM SPM 666 SB Saint Pierre and Miquelon Saint-Pierre 242 7012 NA .pm EUR Euro 508 ##### ^(97500)$ fr-PM 3424932
+PN PCN 612 PC Pitcairn Adamstown 47 46 OC .pn NZD Dollar 870 en-PN 4030699
+PR PRI 630 RQ Puerto Rico San Juan 9104 3916632 NA .pr USD Dollar +1-787 and 1-939 #####-#### ^00[679]\d{2}(?:-\d{4})?$ en-PR,es-PR 4566966
+PS PSE 275 WE Palestinian Territory East Jerusalem 5970 3800000 AS .ps ILS Shekel 970 ar-PS 6254930 JO,IL,EG
+PT PRT 620 PO Portugal Lisbon 92391 10676000 EU .pt EUR Euro 351 ####-### ^\d{4}-\d{3}\s?[a-zA-Z]{0,25}$ pt-PT,mwl 2264397 ES
+PW PLW 585 PS Palau Melekeok 458 19907 OC .pw USD Dollar 680 96940 ^(96940)$ pau,sov,en-PW,tox,ja,fil,zh 1559582
+PY PRY 600 PA Paraguay Asuncion 406750 6375830 SA .py PYG Guarani 595 #### ^(\d{4})$ es-PY,gn 3437598 BO,BR,AR
+QA QAT 634 QA Qatar Doha 11437 840926 AS .qa QAR Rial 974 ar-QA,es 289688 SA
+RE REU 638 RE Reunion Saint-Denis 2517 776948 AF .re EUR Euro 262 ##### ^((97|98)(4|7|8)\d{2})$ fr-RE 935317
+RO ROU 642 RO Romania Bucharest 237500 21959278 EU .ro RON Leu 40 ###### ^(\d{6})$ ro,hu,rom 798549 MD,HU,UA,BG,RS
+RS SRB 688 RI Serbia Belgrade 88361 7344847 EU .rs RSD Dinar 381 ###### ^(\d{6})$ sr,hu,bs,rom 6290252 AL,HU,MK,RO,HR,BA,BG,ME,XK
+RU RUS 643 RS Russia Moscow 17100000 140702000 EU .ru RUB Ruble 7 ###### ^(\d{6})$ ru,tt,xal,cau,ady,kv,ce,tyv,cv,udm,tut,mns,bua,myv,mdf,chm,ba,inh,tut,kbd,krc,av,sah,nog 2017370 GE,CN,BY,UA,KZ,LV,PL,EE,LT,FI,MN,NO,AZ,KP
+RW RWA 646 RW Rwanda Kigali 26338 11055976 AF .rw RWF Franc 250 rw,en-RW,fr-RW,sw 49518 TZ,CD,BI,UG
+SA SAU 682 SA Saudi Arabia Riyadh 1960582 25731776 AS .sa SAR Rial 966 ##### ^(\d{5})$ ar-SA 102358 QA,OM,IQ,YE,JO,AE,KW
+SB SLB 090 BP Solomon Islands Honiara 28450 559198 OC .sb SBD Dollar 677 en-SB,tpi 2103350
+SC SYC 690 SE Seychelles Victoria 455 88340 AF .sc SCR Rupee 248 en-SC,fr-SC 241170
+SD SDN 729 SU Sudan Khartoum 1861484 35000000 AF .sd SDG Pound 249 ##### ^(\d{5})$ ar-SD,en,fia 366755 SS,TD,EG,ET,ER,LY,CF
+SS SSD 728 OD South Sudan Juba 644329 8260490 AF SSP Pound 211 en 7909807 CD,CF,ET,KE,SD,UG
+SE SWE 752 SW Sweden Stockholm 449964 9828655 EU .se SEK Krona 46 ### ## ^(?:SE)?\d{3}\s\d{2}$ sv-SE,se,sma,fi-SE 2661886 NO,FI
+SG SGP 702 SN Singapore Singapore 692.7 4701069 AS .sg SGD Dollar 65 ###### ^(\d{6})$ cmn,en-SG,ms-SG,ta-SG,zh-SG 1880251
+SH SHN 654 SH Saint Helena Jamestown 410 7460 AF .sh SHP Pound 290 STHL 1ZZ ^(STHL1ZZ)$ en-SH 3370751
+SI SVN 705 SI Slovenia Ljubljana 20273 2007000 EU .si EUR Euro 386 #### ^(?:SI)*(\d{4})$ sl,sh 3190538 HU,IT,HR,AT
+SJ SJM 744 SV Svalbard and Jan Mayen Longyearbyen 62049 2550 EU .sj NOK Krone 47 #### ^(\d{4})$ no,ru 607072
+SK SVK 703 LO Slovakia Bratislava 48845 5455000 EU .sk EUR Euro 421 ### ## ^\d{3}\s?\d{2}$ sk,hu 3057568 PL,HU,CZ,UA,AT
+SL SLE 694 SL Sierra Leone Freetown 71740 5245695 AF .sl SLL Leone 232 en-SL,men,tem 2403846 LR,GN
+SM SMR 674 SM San Marino San Marino 61.2 31477 EU .sm EUR Euro 378 4789# ^(4789\d)$ it-SM 3168068 IT
+SN SEN 686 SG Senegal Dakar 196190 12323252 AF .sn XOF Franc 221 ##### ^(\d{5})$ fr-SN,wo,fuc,mnk 2245662 GN,MR,GW,GM,ML
+SO SOM 706 SO Somalia Mogadishu 637657 10112453 AF .so SOS Shilling 252 @@ ##### ^([A-Z]{2}\d{5})$ so-SO,ar-SO,it,en-SO 51537 ET,KE,DJ
+SR SUR 740 NS Suriname Paramaribo 163270 492829 SA .sr SRD Dollar 597 nl-SR,en,srn,hns,jv 3382998 GY,BR,GF
+ST STP 678 TP Sao Tome and Principe Sao Tome 1001 197700 AF .st STD Dobra 239 pt-ST 2410758
+SV SLV 222 ES El Salvador San Salvador 21040 6052064 NA .sv USD Dollar 503 CP #### ^(?:CP)*(\d{4})$ es-SV 3585968 GT,HN
+SX SXM 534 NN Sint Maarten Philipsburg 21 37429 NA .sx ANG Guilder 599 nl,en 7609695 MF
+SY SYR 760 SY Syria Damascus 185180 22198110 AS .sy SYP Pound 963 ar-SY,ku,hy,arc,fr,en 163843 IQ,JO,IL,TR,LB
+SZ SWZ 748 WZ Eswatini Mbabane 17363 1354051 AF .sz SZL Lilangeni 268 @### ^([A-Z]\d{3})$ en-SZ,ss-SZ 934841 ZA,MZ
+TC TCA 796 TK Turks and Caicos Islands Cockburn Town 430 20556 NA .tc USD Dollar +1-649 TKCA 1ZZ ^(TKCA 1ZZ)$ en-TC 3576916
+TD TCD 148 CD Chad N'Djamena 1284000 10543464 AF .td XAF Franc 235 fr-TD,ar-TD,sre 2434508 NE,LY,CF,SD,CM,NG
+TF ATF 260 FS French Southern Territories Port-aux-Francais 7829 140 AN .tf EUR Euro fr 1546748
+TG TGO 768 TO Togo Lome 56785 6587239 AF .tg XOF Franc 228 fr-TG,ee,hna,kbp,dag,ha 2363686 BJ,GH,BF
+TH THA 764 TH Thailand Bangkok 514000 67089500 AS .th THB Baht 66 ##### ^(\d{5})$ th,en 1605651 LA,MM,KH,MY
+TJ TJK 762 TI Tajikistan Dushanbe 143100 7487489 AS .tj TJS Somoni 992 ###### ^(\d{6})$ tg,ru 1220409 CN,AF,KG,UZ
+TK TKL 772 TL Tokelau 10 1466 OC .tk NZD Dollar 690 tkl,en-TK 4031074
+TL TLS 626 TT Timor Leste Dili 15007 1154625 OC .tl USD Dollar 670 tet,pt-TL,id,en 1966436 ID
+TM TKM 795 TX Turkmenistan Ashgabat 488100 4940916 AS .tm TMT Manat 993 ###### ^(\d{6})$ tk,ru,uz 1218197 AF,IR,UZ,KZ
+TN TUN 788 TS Tunisia Tunis 163610 10589025 AF .tn TND Dinar 216 #### ^(\d{4})$ ar-TN,fr 2464461 DZ,LY
+TO TON 776 TN Tonga Nuku'alofa 748 122580 OC .to TOP Pa'anga 676 to,en-TO 4032283
+TR TUR 792 TU Turkey Ankara 780580 77804122 AS .tr TRY Lira 90 ##### ^(\d{5})$ tr-TR,ku,diq,az,av 298795 SY,GE,IQ,IR,GR,AM,AZ,BG
+TT TTO 780 TD Trinidad and Tobago Port of Spain 5128 1328019 NA .tt TTD Dollar +1-868 en-TT,hns,fr,es,zh 3573591
+TV TUV 798 TV Tuvalu Funafuti 26 10472 OC .tv AUD Dollar 688 tvl,en,sm,gil 2110297
+TW TWN 158 TW Taiwan Taipei 35980 22894384 AS .tw TWD Dollar 886 ##### ^(\d{5})$ zh-TW,zh,nan,hak 1668284
+TZ TZA 834 TZ Tanzania Dodoma 945087 41892895 AF .tz TZS Shilling 255 sw-TZ,en,ar 149590 MZ,KE,CD,RW,ZM,BI,UG,MW
+UA UKR 804 UP Ukraine Kyiv 603700 45415596 EU .ua UAH Hryvnia 380 ##### ^(\d{5})$ uk,ru-UA,rom,pl,hu 690791 PL,MD,HU,SK,BY,RO,RU
+UG UGA 800 UG Uganda Kampala 236040 33398682 AF .ug UGX Shilling 256 en-UG,lg,sw,ar 226074 TZ,KE,SS,CD,RW
+UM UMI 581 United States Minor Outlying Islands 0 0 OC .um USD Dollar 1 en-UM 5854968
+US USA 840 US United States Washington 9629091 310232863 NA .us USD Dollar 1 #####-#### ^\d{5}(-\d{4})?$ en-US,es-US,haw,fr 6252001 CA,MX,CU
+UY URY 858 UY Uruguay Montevideo 176220 3477000 SA .uy UYU Peso 598 ##### ^(\d{5})$ es-UY 3439705 BR,AR
+UZ UZB 860 UZ Uzbekistan Tashkent 447400 27865738 AS .uz UZS Som 998 ###### ^(\d{6})$ uz,ru,tg 1512440 TM,AF,KG,TJ,KZ
+VA VAT 336 VT Vatican Vatican City 0.44 921 EU .va EUR Euro 379 ##### ^(\d{5})$ la,it,fr 3164670 IT
+VC VCT 670 VC Saint Vincent and the Grenadines Kingstown 389 104217 NA .vc XCD Dollar +1-784 en-VC,fr 3577815
+VE VEN 862 VE Venezuela Caracas 912050 27223228 SA .ve VES Bolivar Soberano 58 #### ^(\d{4})$ es-VE 3625428 GY,BR,CO
+VG VGB 092 VI British Virgin Islands Road Town 153 21730 NA .vg USD Dollar +1-284 en-VG 3577718
+VI VIR 850 VQ U.S. Virgin Islands Charlotte Amalie 352 108708 NA .vi USD Dollar +1-340 #####-#### ^008\d{2}(?:-\d{4})?$ en-VI 4796775
+VN VNM 704 VM Vietnam Hanoi 329560 89571130 AS .vn VND Dong 84 ###### ^(\d{6})$ vi,en,fr,zh,km 1562822 CN,LA,KH
+VU VUT 548 NH Vanuatu Port Vila 12200 221552 OC .vu VUV Vatu 678 bi,en-VU,fr-VU 2134431
+WF WLF 876 WF Wallis and Futuna Mata Utu 274 16025 OC .wf XPF Franc 681 ##### ^(986\d{2})$ wls,fud,fr-WF 4034749
+WS WSM 882 WS Samoa Apia 2944 192001 OC .ws WST Tala 685 sm,en-WS 4034894
+YE YEM 887 YM Yemen Sanaa 527970 23495361 AS .ye YER Rial 967 ar-YE 69543 SA,OM
+YT MYT 175 MF Mayotte Mamoudzou 374 159042 AF .yt EUR Euro 262 ##### ^(\d{5})$ fr-YT 1024031
+ZA ZAF 710 SF South Africa Pretoria 1219912 49000000 AF .za ZAR Rand 27 #### ^(\d{4})$ zu,xh,af,nso,en-ZA,tn,st,ts,ss,ve,nr 953987 ZW,SZ,MZ,BW,NA,LS
+ZM ZMB 894 ZA Zambia Lusaka 752614 13460305 AF .zm ZMW Kwacha 260 ##### ^(\d{5})$ en-ZM,bem,loz,lun,lue,ny,toi 895949 ZW,TZ,MZ,CD,NA,MW,AO
+ZW ZWE 716 ZI Zimbabwe Harare 390580 13061000 AF .zw ZWL Dollar 263 en-ZW,sn,nr,nd 878675 ZA,MZ,BW,ZM
+CS SCG 891 YI Serbia and Montenegro Belgrade 102350 10829175 EU .cs RSD Dinar 381 ##### ^(\d{5})$ cu,hu,sq,sr 8505033 AL,HU,MK,RO,HR,BA,BG
+AN ANT 530 NT Netherlands Antilles Willemstad 960 300000 NA .an ANG Guilder 599 nl-AN,en,es 8505032 GP
diff --git a/debian/src/geoip-asn-csv-to-dat.cpp b/debian/src/geoip-asn-csv-to-dat.cpp
new file mode 100644
index 0000000..4db11cc
--- /dev/null
+++ b/debian/src/geoip-asn-csv-to-dat.cpp
@@ -0,0 +1,897 @@
+/* geoip-csv-to-dat - convert a country database from CSV to GeoIP binary format
+ *
+ * Copyright (c) 2009 Kalle Olavi Niemitalo.
+ * Copyright (c) 2011 Patrick Matthäi
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#define _GNU_SOURCE 1
+#include <algorithm>
+#include <arpa/inet.h>
+#include <cerrno>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <error.h>
+#include <fstream>
+#include <getopt.h>
+#include <iostream>
+#include <queue>
+#include <set>
+#include <stack>
+#include <string>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sysexits.h>
+#include <vector>
+#include <GeoIP.h>
+#include <map>
+
+// Format of GeoIP Country database files
+// ======================================
+//
+// 1. Binary trie mapping IP addresses to countries.
+// 2. Optional unused data.
+// 3. Optional database-info block.
+// 4. Optional structure-info block.
+//
+// Binary trie
+// -----------
+//
+// The trie treats IP addresses as bit sequences and maps them to
+// numbers. In the country database, each such number is 0xFFFF00 +
+// the the country ID that GeoIP_id_by_ipnum() would return. The
+// meanings of country IDs are hardcoded in libGeoIP and cannot be
+// overridden by the database.
+//
+// The root node of the trie is at the beginning of the file, and the
+// other nodes then follow it. Each node has the same size and
+// consists of two little-endian pointers that correspond to the two
+// possible values of a bit. In the country database, the pointers are
+// 24-bit, and each node is thus 6 bytes long.
+//
+// Each pointer is one of:
+// - The number that the whole lookup should return, i.e. 0xFFFF00 + id
+// in the country database.
+// - The number of the node that should be examined next, counting from
+// 0 at the beginning of the file. Pointing back to nodes with
+// smaller numbers is allowed, but loops are not allowed.
+//
+// Optional unused data
+// --------------------
+//
+// The file format seems to permit extra data between the binary trie
+// and the optional blocks.
+//
+// Optional database-info block
+// ----------------------------
+//
+// Near the end of the file, there may be a three-byte tag (0x00 0x00
+// 0x00) followed by at most DATABASE_INFO_MAX_SIZE - 1 = 99 bytes of
+// text that describes the database. GeoIP_database_info() returns
+// this text and appends a terminating '\0'.
+//
+// The GeoLite Country IPv4 database downloadable from MaxMind
+// includes this database-info block.
+//
+// Optional structure-info block
+// -----------------------------
+//
+// At the very end of the file, there may be a three-byte tag (0xFF
+// 0xFF 0xFF) followed by at most STRUCTURE_INFO_MAX_SIZE - 1 = 19
+// bytes. The first byte is the database type,
+// e.g. GEOIP_COUNTRY_EDITION = 1 or GEOIP_COUNTRY_EDITION_V6 = 12,
+// possibly with 105 added to it. Type-specific information then
+// follows. There is no type-specific information for the country
+// editions.
+//
+// The GeoLite Country IPv4 database downloadable from MaxMind does
+// not include this structure-info block.
+
+namespace {
+ class binary_trie
+ {
+ public:
+ typedef uint_fast32_t edge_type;
+
+ explicit binary_trie(edge_type leaf);
+ void set_range(
+ const uint8_t range_min[],
+ const uint8_t range_max[],
+ std::size_t bit_count,
+ edge_type leaf);
+ void reorder_depth_first();
+ void reorder_in_blocks(std::size_t bytes_per_block);
+ void write_binary(std::ostream &dat_stream) const;
+ void write_segment(std::ostream &dat_stream) const;
+ void update_records();
+
+ private:
+ struct node
+ {
+ edge_type edges[2];
+ };
+ std::vector<node> nodes;
+
+ // This could be std::vector<bool> but that seems slower.
+ typedef std::vector<uint8_t> bits_vector;
+
+ void set_range_in_node(
+ const bits_vector *min_bits,
+ const bits_vector *max_bits,
+ std::size_t bit_pos,
+ edge_type edit_node,
+ edge_type leaf);
+ void set_range_in_edge(
+ const bits_vector *min_bits,
+ const bits_vector *max_bits,
+ std::size_t bit_pos,
+ edge_type edit_node,
+ bool bit,
+ edge_type leaf);
+ void reorder(
+ const std::vector<edge_type> &old_to_new,
+ const std::vector<edge_type> &new_to_old);
+ };
+}
+
+/** Construct a binary trie and its root node.
+ *
+ * \param leaf
+ * Both edges of the root node will initially point to this leaf.
+ * The caller should provide a value that means nothing was found. */
+binary_trie::binary_trie(edge_type leaf)
+{
+ const node node = {{ leaf, leaf }};
+ nodes.push_back(node);
+}
+
+/** Edit the trie so it maps a range of bit sequences to the same
+ * leaf.
+ *
+ * \param range_min
+ * The first bit sequence in the range. Eight bits are packed in each
+ * byte. The most significant bit of the whole sequence is in the
+ * most significant bit of the first byte.
+ *
+ * \param range_max
+ * The last bit sequence in the range.
+ *
+ * \param bit_count
+ * The number of bits in both sequences.
+ *
+ * \param leaf
+ * The leaf to which all the bit sequences in the range should be
+ * mapped. */
+void
+binary_trie::set_range(
+ const uint8_t range_min[],
+ const uint8_t range_max[],
+ std::size_t bit_count,
+ edge_type leaf)
+{
+ bits_vector min_bits(bit_count);
+ bits_vector max_bits(bit_count);
+ for (std::size_t i = 0; i < bit_count; ++i) {
+ std::size_t byte_pos = i / 8;
+ uint8_t mask = 1 << ((~i) % 8);
+ min_bits[i] = ((range_min[byte_pos] & mask) != 0);
+ max_bits[i] = ((range_max[byte_pos] & mask) != 0);
+ }
+ set_range_in_node(&min_bits, &max_bits, 0, 0, leaf);
+}
+
+/** Edit a node in the trie so it maps a range of bit sequences to the
+ * same leaf.
+ *
+ * \param min_bits
+ * The first bit sequence in the range, or NULL if unbounded.
+ *
+ * \param max_bits
+ * The last bit sequence in the range, or NULL if unbounded.
+ *
+ * \param bit_pos
+ * Which bit in the sequences corresponds to \a edit_node.
+ *
+ * \param edit_node
+ * The node to be modified.
+ *
+ * \param leaf
+ * The leaf to which all the bit sequences in the range should be
+ * mapped. */
+void
+binary_trie::set_range_in_node(
+ const bits_vector *min_bits,
+ const bits_vector *max_bits,
+ std::size_t bit_pos,
+ edge_type edit_node,
+ edge_type leaf)
+{
+ if (!min_bits || (*min_bits)[bit_pos] == false) {
+ set_range_in_edge(min_bits,
+ (max_bits && (*max_bits)[bit_pos] == false)
+ ? max_bits : NULL,
+ bit_pos + 1, edit_node, false, leaf);
+ }
+ if (!max_bits || (*max_bits)[bit_pos] == true) {
+ set_range_in_edge((min_bits && (*min_bits)[bit_pos] == true)
+ ? min_bits : NULL,
+ max_bits,
+ bit_pos + 1, edit_node, true, leaf);
+ }
+}
+
+/** Edit an edge in the trie so it maps a range of bit sequences to
+ * the same leaf.
+ *
+ * \param min_bits
+ * The first bit sequence in the range, or NULL if unbounded.
+ *
+ * \param max_bits
+ * The last bit sequence in the range, or NULL if unbounded.
+ *
+ * \param bit_pos
+ * Which bit in the sequences corresponds to \a bit.
+ *
+ * \param edit_node
+ * The node in which the edge to be modified is located.
+ *
+ * \param bit
+ * Which edge of \a edit_node should be modified.
+ *
+ * \param leaf
+ * The leaf to which all the bit sequences in the range should be
+ * mapped. */
+void
+binary_trie::set_range_in_edge(
+ const bits_vector *min_bits,
+ const bits_vector *max_bits,
+ std::size_t bit_pos,
+ edge_type edit_node,
+ bool bit,
+ edge_type leaf)
+{
+ // Check if the range fills this edge entirely.
+ bool entire = true;
+ if (min_bits
+ && std::find(min_bits->begin() + bit_pos, min_bits->end(),
+ true) != min_bits->end())
+ entire = false;
+ if (max_bits
+ && std::find(max_bits->begin() + bit_pos, max_bits->end(),
+ false) != max_bits->end())
+ entire = false;
+
+ if (entire) {
+ nodes[edit_node].edges[bit] = leaf;
+ } else {
+ edge_type next = nodes[edit_node].edges[bit];
+ if (next >= nodes.size()) {
+ const node new_node = {{ next, next }};
+ next = nodes.size();
+ nodes.push_back(new_node);
+ nodes[edit_node].edges[bit] = next;
+ }
+
+ set_range_in_node(min_bits, max_bits, bit_pos,
+ next, leaf);
+ }
+}
+
+/** Renumber the nodes in depth-first order. */
+void
+binary_trie::reorder_depth_first()
+{
+ std::vector<edge_type> old_to_new, new_to_old;
+ std::stack<edge_type> depth_first;
+ old_to_new.resize(nodes.size(), -1);
+ new_to_old.reserve(nodes.size());
+ depth_first.push(0);
+ while (!depth_first.empty()) {
+ const edge_type edge = depth_first.top();
+ depth_first.pop();
+ if (edge < nodes.size()) {
+ old_to_new[edge] = new_to_old.size();
+ new_to_old.push_back(edge);
+ depth_first.push(nodes[edge].edges[1]);
+ depth_first.push(nodes[edge].edges[0]);
+ }
+ }
+ reorder(old_to_new, new_to_old);
+}
+
+/** Renumber the nodes to make lookups use CPU and disk caches more
+ * effectively.
+ *
+ * First group the nodes into blocks so that each block contains the
+ * root of a subtrie and as many levels of its descendants as will
+ * fit. This way, after the root is paged in, the next few lookup
+ * steps need not page in anything else. Then, sort the nodes of each
+ * block in depth-first order. That should give each lookup almost
+ * 1/2 chance to find the next node immediately adjacent.
+ *
+ * With a block size of 1024 bytes, this renumbering reduces the time
+ * required for random lookups by about 1.1%, compared to a plain
+ * depth-first order. However, it's still 2.3% slower than the
+ * database optimized by MaxMind. */
+void
+binary_trie::reorder_in_blocks(
+ std::size_t bytes_per_block)
+{
+ const edge_type none = -1;
+ std::vector<edge_type> old_to_new, new_to_old;
+ ssize_t bytes_left = bytes_per_block;
+ old_to_new.resize(nodes.size(), none);
+ new_to_old.reserve(nodes.size());
+ for (edge_type subtrie = 0; subtrie < nodes.size(); ++subtrie) {
+ // If subtrie has already been added to the output,
+ // ignore it.
+ if (old_to_new[subtrie] != none)
+ continue;
+
+ // Walk breadth-first from subtrie until we have a
+ // block full of nodes or the subtrie runs out. Don't
+ // add these nodes immediately to the output, however.
+ // Instead just list them in nodes_in_block.
+ std::set<edge_type> nodes_in_block;
+ std::queue<edge_type> breadth_first;
+ breadth_first.push(subtrie);
+ if (bytes_left <= 0)
+ bytes_left += bytes_per_block;
+ while (bytes_left > 0 && !breadth_first.empty()) {
+ edge_type edge = breadth_first.front();
+ breadth_first.pop();
+ if (edge >= nodes.size())
+ continue;
+
+ // Let the last node of the block straddle the
+ // block boundary. That's better than making
+ // the hotter first node do so.
+ bytes_left -= 6;
+ nodes_in_block.insert(edge);
+
+ breadth_first.push(nodes[edge].edges[0]);
+ breadth_first.push(nodes[edge].edges[1]);
+ }
+
+ // Add the nodes from nodes_in_block to the output in
+ // depth-first order. This assumes they are all
+ // reachable from subtrie.
+ std::stack<edge_type> depth_first;
+ depth_first.push(subtrie);
+ while (!depth_first.empty()) {
+ edge_type edge = depth_first.top();
+ depth_first.pop();
+ if (nodes_in_block.find(edge)
+ == nodes_in_block.end())
+ continue;
+
+ old_to_new[edge] = new_to_old.size();
+ new_to_old.push_back(edge);
+
+ depth_first.push(nodes[edge].edges[1]);
+ depth_first.push(nodes[edge].edges[0]);
+ }
+ }
+ reorder(old_to_new, new_to_old);
+}
+
+void
+binary_trie::reorder(
+ const std::vector<edge_type> &old_to_new,
+ const std::vector<edge_type> &new_to_old)
+{
+ std::vector<node> new_nodes;
+ new_nodes.reserve(new_to_old.size());
+ for (std::vector<edge_type>::const_iterator
+ it = new_to_old.begin();
+ it != new_to_old.end(); ++it) {
+ node new_node;
+ for (int bit = 0; bit <= 1; ++bit) {
+ edge_type old_edge = nodes[*it].edges[bit];
+ if (old_edge < nodes.size())
+ new_node.edges[bit] = old_to_new[old_edge];
+ else
+ new_node.edges[bit] = old_edge;
+ }
+ new_nodes.push_back(new_node);
+ }
+ swap(new_nodes, nodes);
+}
+
+/** Add the size of the trie (number of nodes) to data records*/
+void
+binary_trie::update_records()
+{
+ for (std::vector<node>::iterator it = nodes.begin();
+ it != nodes.end(); ++it) {
+ // previously, we commandeered the MSB in order to indicate which records
+ // were data records, rather than pointers to other nodes in the trie.
+ // Here, we remove that bit, and increment the record by the number of nodes,
+ // because this is how libGeoIP determines whether a node points to an entry
+ // inside the data section or another node.
+ for (int i = 0;i<2;++i) {
+ if (it->edges[i] == 0x80000000)
+ it->edges[i] = nodes.size();
+ else if (it->edges[i] & 0x80000000)
+ {
+ it->edges[i] = (it->edges[i] & 0x7FFFFFFF) + nodes.size();
+ }
+ }
+ }
+}
+
+/** Write the 3 byte segment offset. **/
+void
+binary_trie::write_segment(std::ostream &dat_stream) const
+{
+ int len = nodes.size();
+ dat_stream << (char) (0xFF & len);
+ dat_stream << (char) (0xFF & (len >> 8));
+ dat_stream << (char) (0xFF & (len >> 16));
+}
+
+/** Write the trie to a stream in GeoIP binary format. */
+void
+binary_trie::write_binary(std::ostream &dat_stream) const
+{
+ for (std::vector<node>::const_iterator it = nodes.begin();
+ it != nodes.end(); ++it) {
+ union {
+ uint8_t bytes[6];
+ char chars[6];
+ } binary = {{
+ (it->edges[0] ) & 0xFF,
+ (it->edges[0] >> 8) & 0xFF,
+ (it->edges[0] >> 16) & 0xFF,
+ (it->edges[1] ) & 0xFF,
+ (it->edges[1] >> 8) & 0xFF,
+ (it->edges[1] >> 16) & 0xFF
+ }};
+ dat_stream.write(binary.chars, 6);
+ if (dat_stream.bad())
+ return;
+ }
+}
+
+
+namespace {
+ void
+ v4_csv_line_to_vector(
+ const std::string line,
+ std::vector<std::string> &fields)
+ {
+ std::string buf(line);
+ std::string delim = ",";
+ std::size_t fs;
+ for(int i = 0; i<2;++i) {
+ fs = buf.find(delim);
+ fields.push_back(buf.substr(0,fs));
+ buf.erase(0,fs + delim.length());
+ }
+ if (buf[0] == '"' || buf[0] == '\'')
+ fields.push_back(buf.substr(1, buf.length() - 2));
+ else
+ fields.push_back(buf);
+ }
+
+ void
+ v6_csv_line_to_vector(
+ const std::string line,
+ std::vector<std::string> & fields)
+ {
+ std::string buf(line);
+ std::string delim = ",";
+ std::size_t fs;
+ for(int i = 0; i<3;++i) {
+ fs = buf.rfind(delim);
+ fields.push_back(buf.substr(fs+delim.length(), buf.length()));
+ buf.erase(fs,buf.length());
+ }
+ if (buf[0] == '"' || buf[0] == '\'')
+ fields.push_back(buf.substr(1, buf.length() - 2));
+ else
+ fields.push_back(buf.substr(0, buf.length()));
+ }
+
+ /** Load ranges of IP addresses from a CSV-formatted stream to
+ * a trie.
+ *
+ * \param trie
+ * Load the ranges to this trie, overwriting original values.
+ *
+ * \param csv_file_name
+ * The name of the file that \a csv_stream is reading.
+ * This string is used only for error messages.
+ *
+ * \param csv_stream
+ * Load the ranges from this stream.
+ *
+ * \param address_family
+ * The type of IP addresses in the CSV data: either AF_INET * for IPv4 or AF_INET6 for IPv6. */
+ void
+ csv_stream_to_trie_db(
+ binary_trie &trie,
+ std::string &database_segment,
+ const char *csv_file_name,
+ std::istream &csv_stream,
+ int address_family)
+ {
+ enum {
+ V4_CSV_FIELD_MIN_DECIMAL,
+ V4_CSV_FIELD_MAX_DECIMAL,
+ V4_CSV_FIELD_ASNUM_DESCRIPTION,
+ V4_CSV_FIELDS
+ };
+
+ enum {
+ V6_CSV_FIELD_NET_BITS,
+ V6_CSV_FIELD_MAX_TEXT,
+ V6_CSV_FIELD_MIN_TEXT,
+ V6_CSV_FIELD_ASNUM_DESCRIPTION,
+ V6_CSV_FIELDS
+ };
+
+ std::string csv_line;
+ std::map<std::string,int> segment_offset;
+ std::vector<std::string> csv_fields;
+ // create a map to track which as descriptions are added already
+ int csv_line_number = 0;
+ database_segment += '\0'; // padding so that record 0 is not at the start of the db.
+ while (getline(csv_stream, csv_line)) {
+ ++csv_line_number;
+ std::string as;
+ switch (address_family) {
+ case AF_INET:
+ v4_csv_line_to_vector(csv_line, csv_fields);
+ if (csv_fields.size() != V4_CSV_FIELDS) {
+ error_at_line(EX_DATAERR, 0, csv_file_name, csv_line_number,
+ "Wrong number of fields");
+ }
+ as = csv_fields[V4_CSV_FIELD_ASNUM_DESCRIPTION];
+ if (segment_offset.find(as) == segment_offset.end()) { // no key found
+ segment_offset[as] = database_segment.length(); // start of record
+ database_segment += csv_fields[V4_CSV_FIELD_ASNUM_DESCRIPTION] + '\x00';
+ }
+ break;
+ case AF_INET6:
+ v6_csv_line_to_vector(csv_line, csv_fields);
+ if (csv_fields.size() != V6_CSV_FIELDS) {
+ error_at_line(EX_DATAERR, 0, csv_file_name, csv_line_number,
+ "Wrong number of fields");
+ }
+ as = csv_fields[V6_CSV_FIELD_ASNUM_DESCRIPTION];
+ if (segment_offset.find(as) == segment_offset.end()) { // no key found
+ segment_offset[as] = database_segment.length(); // start of record
+ database_segment += csv_fields[V6_CSV_FIELD_ASNUM_DESCRIPTION] + '\x00';
+ }
+ break;
+ default:
+ abort();
+ }
+
+ // use the MSB to indicate that this is a data record. Later, the field
+ // is set to the segment offset + the nubmer of nodes in the trie.
+ const binary_trie::edge_type leaf = 0x80000000 | segment_offset[as];
+ union {
+ struct in_addr inet;
+ uint8_t inetbytes[4];
+ struct in6_addr inet6;
+ } minaddr, maxaddr;
+
+ switch (address_family) {
+ case AF_INET:
+ inet_aton(csv_fields[V4_CSV_FIELD_MIN_DECIMAL].c_str(), &(minaddr.inet));
+ inet_aton(csv_fields[V4_CSV_FIELD_MAX_DECIMAL].c_str(), &(maxaddr.inet));
+ trie.set_range(minaddr.inetbytes, maxaddr.inetbytes, 32, leaf);
+ break;
+
+ case AF_INET6:
+ if (inet_pton(address_family, csv_fields[V6_CSV_FIELD_MIN_TEXT].c_str(), &minaddr) <= 0) {
+ error_at_line(EX_DATAERR, 0, csv_file_name, csv_line_number,
+ "Cannot parse minimum address: %s",
+ csv_fields[V6_CSV_FIELD_MIN_TEXT].c_str());
+ }
+ if (inet_pton(address_family, csv_fields[V6_CSV_FIELD_MAX_TEXT].c_str(), &maxaddr) <= 0) {
+ error_at_line(EX_DATAERR, 0, csv_file_name, csv_line_number,
+ "Cannot parse maximum address: %s",
+ csv_fields[V6_CSV_FIELD_MAX_TEXT].c_str());
+ }
+ trie.set_range(minaddr.inet6.s6_addr, maxaddr.inet6.s6_addr,
+ 128, leaf);
+ break;
+
+ default:
+ abort();
+ }
+ csv_fields.clear();
+ }
+ if (csv_stream.bad()) {
+ error(EX_IOERR, errno, "%s", csv_file_name);
+ }
+ }
+
+ /** Load ranges of IP addresses from a CSV-formatted file or
+ * standard input to a trie.
+ *
+ * \param trie
+ * Load the ranges to this trie, overwriting original values.
+ *
+ * \param csv_file_name
+ * The name of the CSV file that should be read, or "-" for
+ * standard input.
+ *
+ * \param address_family
+ * The type of IP addresses in the CSV data: either AF_INET
+ * for IPv4 or AF_INET6 for IPv6. */
+ void
+ csv_file_to_trie_db(
+ binary_trie &trie,
+ std::string &database_segment,
+ const char *csv_file_name,
+ int address_family)
+ {
+ if (std::strcmp(csv_file_name, "-") == 0) {
+ csv_stream_to_trie_db(trie, database_segment, csv_file_name, std::cin, address_family);
+ } else {
+ std::ifstream csv_stream(csv_file_name, std::ios::in);
+ if (!csv_stream) {
+ error(EX_NOINPUT, errno, "%s", csv_file_name);
+ }
+ csv_stream_to_trie_db(trie, database_segment, csv_file_name, csv_stream, address_family);
+ }
+ }
+
+ /** Write a GeoIP binary database to a stream.
+ *
+ * \param trie
+ * Mapping from IP addresses to country codes or other values.
+ *
+ * \param dat_file_name
+ * The name of the file that \a dat_stream is writing.
+ * This string is used only for error messages.
+ *
+ * \param dat_stream
+ * Write the database to this stream.
+ *
+ * \param database_info
+ * Copyright or other information about the database, or NULL.
+ * GeoIP_database_info() will return this string.
+ *
+ * \param address_family
+ * The type of IP addresses in the database: either AF_INET
+ * for IPv4 or AF_INET6 for IPv6. */
+ void
+ write_dat_stream(
+ const binary_trie &trie,
+ const char *dat_file_name,
+ std::ostream &dat_stream,
+ const char *database_info,
+ std::string database_segment,
+ int address_family)
+ {
+
+ trie.write_binary(dat_stream);
+ if (dat_stream.bad()) {
+ error(EX_IOERR, errno, "%s", dat_file_name);
+ }
+
+ // or open the file and read the length
+ if (database_segment.length() > 0) {
+ dat_stream << database_segment;
+ if (dat_stream.bad()) {
+ error(EX_IOERR, errno, "%s", dat_file_name);
+ }
+
+ }
+
+ // write the metadata section
+ if (database_info) {
+ const char tag[3] = { 0, 0, 0 };
+ dat_stream.write(tag, 3);
+ dat_stream.write(database_info, std::strlen(database_info));
+ if (dat_stream.bad()) {
+ error(EX_IOERR, errno, "%s", dat_file_name);
+ }
+ }
+
+ switch (address_family) {
+ case AF_INET: {
+ const unsigned char structure_info[4] = { 0xFF, 0xFF, 0xFF, 9 };
+ dat_stream.write((const char *)structure_info, 4);
+ break;
+ }
+ case AF_INET6: {
+ const unsigned char structure_info[4] = { 0xFF, 0xFF, 0xFF, 21 };
+ dat_stream.write((const char *)structure_info, 4);
+ break;
+ }
+ default:
+ abort();
+ }
+
+ trie.write_segment(dat_stream);
+ if (dat_stream.bad()) {
+ error(EX_IOERR, errno, "%s", dat_file_name);
+ }
+ }
+
+ /** Write a GeoIP binary database to a file or standard output.
+ *
+ * \param trie
+ * Mapping from IP addresses to country codes or other values.
+ *
+ * \param csv_file_name
+ * The name of the file that should be written, or "-" for
+ * standard output.
+ *
+ * \param database_info
+ * Copyright or other information about the database, or NULL.
+ * GeoIP_database_info() will return this string.
+ *
+ * \param address_family
+ * The type of IP addresses in the database: either AF_INET
+ * for IPv4 or AF_INET6 for IPv6. */
+ void
+ write_dat_file(
+ const binary_trie &trie,
+ const char *dat_file_name,
+ const char *database_info,
+ std::string database_segment,
+ int address_family)
+ {
+ if (std::strcmp(dat_file_name, "-") == 0) {
+ write_dat_stream(trie, dat_file_name, std::cout,
+ database_info, database_segment, address_family);
+ } else {
+ std::ofstream dat_stream(
+ dat_file_name,
+ std::ios::out | std::ios::binary);
+ if (!dat_stream) {
+ error(EX_CANTCREAT, errno, "%s", dat_file_name);
+ }
+ write_dat_stream(trie, dat_file_name, dat_stream,
+ database_info, database_segment, address_family);
+ }
+ }
+
+ struct cmdline {
+ const char *csv_file_name;
+ const char *dat_file_name;
+ int address_family;
+ const char *database_info;
+ bool verbose;
+
+ cmdline(int argc, char **argv);
+ };
+}
+
+cmdline::cmdline(int argc, char **argv):
+ csv_file_name("-"),
+ dat_file_name("-"),
+ address_family(AF_INET),
+ database_info(NULL),
+ verbose(false)
+{
+ enum {
+ OPT_HELP = -2
+ };
+
+ static const struct option long_options[] = {
+ { "info", required_argument, NULL, 'i' },
+ { "output", required_argument, NULL, 'o' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "help", no_argument, NULL, OPT_HELP },
+ { NULL, 0, NULL, 0 }
+ };
+ static const char *const usage = "\
+Usage: %s [OPTION] [CSV-FILE]...\n\
+Convert a country database from CSV to GeoIP binary format.\n\
+\n\
+ -i, --info=TEXT add copyright or other info TEXT to output\n\
+ -o, --output=FILE write the binary data to FILE, not stdout\n\
+ -v, --verbose show what is going on\n\
+ --help display this help and exit\n";
+
+ for (;;) {
+ int optret = getopt_long(argc, argv, "46i:o:v", long_options, NULL);
+
+ if (optret == -1)
+ break;
+ switch (optret) {
+ case '4':
+ address_family = AF_INET;
+ break;
+ case '6':
+ address_family = AF_INET6;
+ break;
+ case 'i':
+ database_info = optarg;
+ if (std::strlen(database_info) > 99) {
+ error(EX_USAGE, 0,
+ "Database info must not be longer than 99 bytes");
+ }
+ break;
+ case 'o':
+ dat_file_name = optarg;
+ break;
+ case 'v':
+ verbose = true;
+ break;
+ case OPT_HELP:
+ std::printf(usage, program_invocation_name);
+ std::exit(EX_OK);
+ case '?':
+ std::fprintf(stderr,
+ "Try `%s --help' for more information.\n",
+ program_invocation_name);
+ std::exit(EX_USAGE);
+ default:
+ std::abort();
+ }
+ }
+
+ if (optind < argc)
+ csv_file_name = argv[optind++];
+
+ if (optind < argc) {
+ error(EX_USAGE, 0,
+ "Only one non-option argument is allowed");
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ cmdline cmdline(argc, argv);
+
+ std::ostream *verbose_stream;
+ if (!cmdline.verbose)
+ verbose_stream = NULL;
+ else if (strcmp(cmdline.dat_file_name, "-") == 0)
+ verbose_stream = &std::cerr;
+ else
+ verbose_stream = &std::cout;
+
+ if (verbose_stream) {
+ *verbose_stream << program_invocation_name
+ << ": Reading CSV and building the trie"
+ << std::endl;
+ }
+ /* Initialize the trie with a value that will point to the start of the
+ * data section, e.g. an empty record. See binary_trie::update_records()
+ */
+ binary_trie trie(0x80000000);
+ std::string database_segment;
+ csv_file_to_trie_db(trie, database_segment, cmdline.csv_file_name, cmdline.address_family);
+
+ if (verbose_stream) {
+ *verbose_stream << program_invocation_name
+ << ": Optimizing" << std::endl;
+ }
+ trie.reorder_depth_first();
+ trie.reorder_in_blocks(1024);
+ trie.update_records();
+
+ if (verbose_stream) {
+ *verbose_stream << program_invocation_name
+ << ": Writing output" << std::endl;
+ }
+ write_dat_file(trie, cmdline.dat_file_name, cmdline.database_info,
+ database_segment, cmdline.address_family);
+
+ if (verbose_stream) {
+ *verbose_stream << program_invocation_name
+ << ": All done" << std::endl; }
+}
diff --git a/debian/src/geoip-csv-to-dat.cpp b/debian/src/geoip-csv-to-dat.cpp
new file mode 100644
index 0000000..2e59fa0
--- /dev/null
+++ b/debian/src/geoip-csv-to-dat.cpp
@@ -0,0 +1,1507 @@
+/* geoip-csv-to-dat - convert a country database from CSV to GeoIP binary format
+ *
+ * Copyright (c) 2009 Kalle Olavi Niemitalo.
+ * Copyright (c) 2011 Patrick Matthäi
+ * Copyright (c) 2014 Andrew Moise
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#define _GNU_SOURCE 1
+#include <algorithm>
+#include <arpa/inet.h>
+#include <cerrno>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <error.h>
+#include <fstream>
+#include <getopt.h>
+#include <iostream>
+#include <queue>
+#include <set>
+#include <sstream>
+#include <stack>
+#include <string>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sysexits.h>
+#include <vector>
+#include <GeoIP.h>
+
+// Format of GeoIP Database files
+// ======================================
+//
+// 1. Binary trie mapping IP addresses to locations.
+// 2. Information about each location (only in city DBs).
+// 3. Optional unused data.
+// 4. Optional database-info block.
+// 5. Optional structure-info block.
+//
+// Binary trie
+// -----------
+//
+// The trie treats IP addresses as bit sequences and maps them to
+// location numbers.
+//
+// In the country database format, each such number is a country ID
+// that GeoIP_id_by_ipnum() would return. The meanings of country IDs
+// are hardcoded in libGeoIP and cannot be overridden by the database.
+//
+// In the city database format, each such number is a position to seek
+// to, within section #2 (information about each location), in order
+// to find a record giving information about the location associated
+// with that IP.
+//
+// The root node of the trie is at the beginning of the file, and the
+// other nodes then follow it. Each node has the same size and
+// consists of two little-endian pointers that correspond to the two
+// possible values of a bit. The pointers are 24-bit, and each node
+// is thus 6 bytes long.
+//
+// To traverse the trie, you go one bit at a time through the IP
+// address you're looking up -- starting at bit 0, and starting at the
+// root node of the trie. For each bit, you look at the current node,
+// and take the left branch (the first of the two pointers) if the bit
+// is 0, otherwise take the right branch. It is allowed for a node
+// pointer to refer back to earlier nodes in the file, but loops are
+// not allowed.
+//
+// If the pointer you're looking at is less than the total number of
+// nodes in the trie, it indicates the next node you should
+// examine. If the pointer you're looking at is greater than or equal
+// to the total number of nodes, it indicates a leaf -- the end of
+// your search.
+//
+// The meaning of the leaf pointers depends on the database type:
+//
+// In country databases, 0xFFFF?? indicates that the country ID
+// for that IP address is equal to the ?? part. 0xFFFF00 indicates
+// that the IP address you queried is not in the database.
+//
+// City databases contain an extra segment which contains location
+// information records. A leaf pointer equal to (number of nodes in
+// the trie + X) indicates that the location information for the IP
+// address you queried can be found in the location information
+// segment, at offset X. A leaf pointer exactly equal to the number of
+// nodes in the trie indicates that the location information for the
+// IP address you queried is not stored in this database.
+//
+// Location segment
+// --------------------
+//
+// City databases contain a location segment. Each record in this
+// segment contains information about a single location which IP
+// addresses may be mapped to. Pointers to records in this segment are
+// contained within leaf nodes of the trie; each record contains, in
+// this order:
+//
+// * A country ID, as a single byte
+// * A "region" (generally state or province), as a NULL-terminated string
+// * A city name, as a NULL-terminated string
+// * A postal code, as a NULL-terminated string
+// * Encoded latitude, as a little-endian 3-byte integer. To convert
+// back to actual latitude, divide by 10000 and subtract 180.
+// * Encoded longitude, as a little-endian 3-byte integer. To convert
+// back to actual longitude, divide by 10000 and subtract 180.
+// * Area code and metro code (ONLY if the country is US). These are
+// encoded into a single little-endian 3-byte integer. The area code
+// is the encoded value modulo 1000, and the metro code is the
+// encoded value divided by 1000. If the country is not US, this
+// field is not present.
+//
+// The string fields may be equal to empty strings, but all fields are
+// always included (except area/metro code, which is included if and
+// only if the country is US).
+//
+// All strings seem to be in ISO-8859-1 encoding.
+//
+// Optional unused data
+// --------------------
+//
+// The file format permits any amount of extra data between the binary
+// trie and the optional blocks.
+//
+// Optional database-info block
+// ----------------------------
+//
+// Near the end of the file, there may be a three-byte tag (0x00 0x00
+// 0x00) followed by at most DATABASE_INFO_MAX_SIZE - 1 = 99 bytes of
+// text that describes the database. GeoIP_database_info() returns
+// this text and appends a terminating '\0'.
+//
+// The GeoLite Country IPv4 database downloadable from MaxMind
+// includes this database-info block.
+//
+// Optional structure-info block
+// -----------------------------
+//
+// At the very end of the file, there may be a three-byte tag (0xFF
+// 0xFF 0xFF) followed by at most STRUCTURE_INFO_MAX_SIZE - 1 = 19
+// bytes. The first byte is the database type,
+// e.g. GEOIP_COUNTRY_EDITION = 1 or GEOIP_COUNTRY_EDITION_V6 = 12,
+// possibly with 105 added to it. Type-specific information then
+// follows. There is no type-specific information for the country
+// editions.
+//
+// The GeoLite Country IPv4 database downloadable from MaxMind does
+// not include this structure-info block.
+
+
+
+/*************************************************
+ * Binary trie
+ *
+ * This section implements a data structure representing the trie
+ * which, within a .dat file, maps IP address ranges to locations.
+ *************************************************/
+
+namespace {
+ class binary_trie
+ {
+ public:
+ typedef uint_fast32_t edge_type;
+ struct node
+ {
+ edge_type edges[2];
+ };
+
+ explicit binary_trie(edge_type leaf);
+ void set_range(
+ const uint8_t range_min[],
+ const uint8_t range_max[],
+ std::size_t bit_count,
+ edge_type leaf);
+ void reorder_depth_first();
+ void reorder_in_blocks(std::size_t bytes_per_block);
+
+ std::vector<node>::iterator nodes_begin() { return nodes.begin(); }
+ std::vector<node>::iterator nodes_end() { return nodes.end(); }
+
+ private:
+ std::vector<node> nodes;
+
+ // This could be std::vector<bool> but that seems slower.
+ typedef std::vector<uint8_t> bits_vector;
+
+ void set_range_in_node(
+ const bits_vector *min_bits,
+ const bits_vector *max_bits,
+ std::size_t bit_pos,
+ edge_type edit_node,
+ edge_type leaf);
+ void set_range_in_edge(
+ const bits_vector *min_bits,
+ const bits_vector *max_bits,
+ std::size_t bit_pos,
+ edge_type edit_node,
+ bool bit,
+ edge_type leaf);
+ void reorder(
+ const std::vector<edge_type> &old_to_new,
+ const std::vector<edge_type> &new_to_old);
+ };
+}
+
+/** Construct a binary trie and its root node.
+ *
+ * \param leaf
+ * Both edges of the root node will initially point to this leaf.
+ * The caller should provide a value that means nothing was found. */
+binary_trie::binary_trie(edge_type leaf)
+{
+ const node node = {{ leaf, leaf }};
+ nodes.push_back(node);
+}
+
+/** Edit the trie so it maps a range of bit sequences to the same
+ * leaf.
+ *
+ * \param range_min
+ * The first bit sequence in the range. Eight bits are packed in each
+ * byte. The most significant bit of the whole sequence is in the
+ * most significant bit of the first byte.
+ *
+ * \param range_max
+ * The last bit sequence in the range.
+ *
+ * \param bit_count
+ * The number of bits in both sequences.
+ *
+ * \param leaf
+ * The leaf to which all the bit sequences in the range should be
+ * mapped. */
+void
+binary_trie::set_range(
+ const uint8_t range_min[],
+ const uint8_t range_max[],
+ std::size_t bit_count,
+ edge_type leaf)
+{
+ bits_vector min_bits(bit_count);
+ bits_vector max_bits(bit_count);
+ for (std::size_t i = 0; i < bit_count; ++i) {
+ std::size_t byte_pos = i / 8;
+ uint8_t mask = 1 << ((~i) % 8);
+ min_bits[i] = ((range_min[byte_pos] & mask) != 0);
+ max_bits[i] = ((range_max[byte_pos] & mask) != 0);
+ }
+ set_range_in_node(&min_bits, &max_bits, 0, 0, leaf);
+}
+
+/** Edit a node in the trie so it maps a range of bit sequences to the
+ * same leaf.
+ *
+ * \param min_bits
+ * The first bit sequence in the range, or NULL if unbounded.
+ *
+ * \param max_bits
+ * The last bit sequence in the range, or NULL if unbounded.
+ *
+ * \param bit_pos
+ * Which bit in the sequences corresponds to \a edit_node.
+ *
+ * \param edit_node
+ * The node to be modified.
+ *
+ * \param leaf
+ * The leaf to which all the bit sequences in the range should be
+ * mapped. */
+void
+binary_trie::set_range_in_node(
+ const bits_vector *min_bits,
+ const bits_vector *max_bits,
+ std::size_t bit_pos,
+ edge_type edit_node,
+ edge_type leaf)
+{
+ if (!min_bits || (*min_bits)[bit_pos] == false) {
+ set_range_in_edge(min_bits,
+ (max_bits && (*max_bits)[bit_pos] == false)
+ ? max_bits : NULL,
+ bit_pos + 1, edit_node, false, leaf);
+ }
+ if (!max_bits || (*max_bits)[bit_pos] == true) {
+ set_range_in_edge((min_bits && (*min_bits)[bit_pos] == true)
+ ? min_bits : NULL,
+ max_bits,
+ bit_pos + 1, edit_node, true, leaf);
+ }
+}
+
+/** Edit an edge in the trie so it maps a range of bit sequences to
+ * the same leaf.
+ *
+ * \param min_bits
+ * The first bit sequence in the range, or NULL if unbounded.
+ *
+ * \param max_bits
+ * The last bit sequence in the range, or NULL if unbounded.
+ *
+ * \param bit_pos
+ * Which bit in the sequences corresponds to \a bit.
+ *
+ * \param edit_node
+ * The node in which the edge to be modified is located.
+ *
+ * \param bit
+ * Which edge of \a edit_node should be modified.
+ *
+ * \param leaf
+ * The leaf to which all the bit sequences in the range should be
+ * mapped. */
+void
+binary_trie::set_range_in_edge(
+ const bits_vector *min_bits,
+ const bits_vector *max_bits,
+ std::size_t bit_pos,
+ edge_type edit_node,
+ bool bit,
+ edge_type leaf)
+{
+ // Check if the range fills this edge entirely.
+ bool entire = true;
+ if (min_bits
+ && std::find(min_bits->begin() + bit_pos, min_bits->end(),
+ true) != min_bits->end())
+ entire = false;
+ if (max_bits
+ && std::find(max_bits->begin() + bit_pos, max_bits->end(),
+ false) != max_bits->end())
+ entire = false;
+
+ if (entire) {
+ nodes[edit_node].edges[bit] = leaf;
+ } else {
+ edge_type next = nodes[edit_node].edges[bit];
+ if (next >= nodes.size()) {
+ const node new_node = {{ next, next }};
+ next = nodes.size();
+ nodes.push_back(new_node);
+ nodes[edit_node].edges[bit] = next;
+ }
+
+ set_range_in_node(min_bits, max_bits, bit_pos,
+ next, leaf);
+ }
+}
+
+/** Renumber the nodes in depth-first order. */
+void
+binary_trie::reorder_depth_first()
+{
+ std::vector<edge_type> old_to_new, new_to_old;
+ std::stack<edge_type> depth_first;
+ old_to_new.resize(nodes.size(), -1);
+ new_to_old.reserve(nodes.size());
+ depth_first.push(0);
+ while (!depth_first.empty()) {
+ const edge_type edge = depth_first.top();
+ depth_first.pop();
+ if (edge < nodes.size()) {
+ old_to_new[edge] = new_to_old.size();
+ new_to_old.push_back(edge);
+ depth_first.push(nodes[edge].edges[1]);
+ depth_first.push(nodes[edge].edges[0]);
+ }
+ }
+ reorder(old_to_new, new_to_old);
+}
+
+/** Renumber the nodes to make lookups use CPU and disk caches more
+ * effectively.
+ *
+ * First group the nodes into blocks so that each block contains the
+ * root of a subtrie and as many levels of its descendants as will
+ * fit. This way, after the root is paged in, the next few lookup
+ * steps need not page in anything else. Then, sort the nodes of each
+ * block in depth-first order. That should give each lookup almost
+ * 1/2 chance to find the next node immediately adjacent.
+ *
+ * With a block size of 1024 bytes, this renumbering reduces the time
+ * required for random lookups by about 1.1%, compared to a plain
+ * depth-first order. However, it's still 2.3% slower than the
+ * database optimized by MaxMind. */
+void
+binary_trie::reorder_in_blocks(
+ std::size_t bytes_per_block)
+{
+ const edge_type none = -1;
+ std::vector<edge_type> old_to_new, new_to_old;
+ ssize_t bytes_left = bytes_per_block;
+ old_to_new.resize(nodes.size(), none);
+ new_to_old.reserve(nodes.size());
+ for (edge_type subtrie = 0; subtrie < nodes.size(); ++subtrie) {
+ // If subtrie has already been added to the output,
+ // ignore it.
+ if (old_to_new[subtrie] != none)
+ continue;
+
+ // Walk breadth-first from subtrie until we have a
+ // block full of nodes or the subtrie runs out. Don't
+ // add these nodes immediately to the output, however.
+ // Instead just list them in nodes_in_block.
+ std::set<edge_type> nodes_in_block;
+ std::queue<edge_type> breadth_first;
+ breadth_first.push(subtrie);
+ if (bytes_left <= 0)
+ bytes_left += bytes_per_block;
+ while (bytes_left > 0 && !breadth_first.empty()) {
+ edge_type edge = breadth_first.front();
+ breadth_first.pop();
+ if (edge >= nodes.size())
+ continue;
+
+ // Let the last node of the block straddle the
+ // block boundary. That's better than making
+ // the hotter first node do so.
+ bytes_left -= 6;
+ nodes_in_block.insert(edge);
+
+ breadth_first.push(nodes[edge].edges[0]);
+ breadth_first.push(nodes[edge].edges[1]);
+ }
+
+ // Add the nodes from nodes_in_block to the output in
+ // depth-first order. This assumes they are all
+ // reachable from subtrie.
+ std::stack<edge_type> depth_first;
+ depth_first.push(subtrie);
+ while (!depth_first.empty()) {
+ edge_type edge = depth_first.top();
+ depth_first.pop();
+ if (nodes_in_block.find(edge)
+ == nodes_in_block.end())
+ continue;
+
+ old_to_new[edge] = new_to_old.size();
+ new_to_old.push_back(edge);
+
+ depth_first.push(nodes[edge].edges[1]);
+ depth_first.push(nodes[edge].edges[0]);
+ }
+ }
+ reorder(old_to_new, new_to_old);
+}
+
+void
+binary_trie::reorder(
+ const std::vector<edge_type> &old_to_new,
+ const std::vector<edge_type> &new_to_old)
+{
+ std::vector<node> new_nodes;
+ new_nodes.reserve(new_to_old.size());
+ for (std::vector<edge_type>::const_iterator
+ it = new_to_old.begin();
+ it != new_to_old.end(); ++it) {
+ node new_node;
+ for (int bit = 0; bit <= 1; ++bit) {
+ edge_type old_edge = nodes[*it].edges[bit];
+ if (old_edge < nodes.size())
+ new_node.edges[bit] = old_to_new[old_edge];
+ else
+ new_node.edges[bit] = old_edge;
+ }
+ new_nodes.push_back(new_node);
+ }
+ swap(new_nodes, nodes);
+}
+
+/*************************************************
+ * CSV file support
+ *
+ * This section implements reading from .csv files.
+ *************************************************/
+
+namespace {
+ /** Interface for classes interested in .csv data -- this should be
+ * implemented and then passed to csv_read_file(), which will then
+ * call read_csv_line(), providing the data in the .csv file. */
+ class csv_data_reader
+ {
+ public:
+ virtual ~csv_data_reader() {}
+
+ virtual void read_csv_line(const char *csv_file_name,
+ int csv_line_number,
+ std::vector<std::string> &fields) = 0;
+ };
+}
+
+namespace {
+ /** Convert a line from a .csv file into a vector of
+ * tokens. For internal use by the .csv reading code. */
+ void
+ csv_line_to_vector(
+ const std::string &line,
+ std::vector<std::string> &fields)
+ {
+ fields.clear();
+ std::vector<char> field;
+ bool quoted = false;
+ bool spaces_after_comma = false;
+ for (std::string::const_iterator it = line.begin();
+ it != line.end(); ++it) {
+ if (*it == '"') {
+ quoted = !quoted;
+ spaces_after_comma = false;
+ } else if (*it == ',' && !quoted) {
+ fields.push_back(std::string(field.begin(), field.end()));
+ field.clear();
+ spaces_after_comma = true;
+ } else if (*it == ' ' && spaces_after_comma) {
+ } else {
+ field.push_back(*it);
+ spaces_after_comma = false;
+ }
+ }
+ fields.push_back(std::string(field.begin(), field.end()));
+ }
+
+ /** Load data from a CSV-formatted stream.
+ *
+ * \param reader
+ * The reader to call for each line of the CSV
+ *
+ * \param csv_file_name
+ * The name of the file that \a csv_stream is reading.
+ * This string is used only for error messages.
+ *
+ * \param csv_stream
+ * The stream to read from. */
+ void
+ csv_read_stream(
+ csv_data_reader &reader,
+ const char *csv_file_name,
+ std::istream &csv_stream)
+ {
+ std::string csv_line;
+ std::vector<std::string> csv_fields;
+ int csv_line_number = 0;
+ while (getline(csv_stream, csv_line)) {
+ ++csv_line_number;
+ csv_line_to_vector(csv_line, csv_fields);
+ reader.read_csv_line(csv_file_name, csv_line_number, csv_fields);
+ }
+ if (csv_stream.bad()) {
+ error(EX_IOERR, errno, "%s", csv_file_name);
+ }
+ }
+
+ /** Load data from a CSV-formatted file or standard input.
+ *
+ * \param reader
+ * The reader to call for each line of the CSV.
+ *
+ * \param csv_file_name
+ * The name of the CSV file that should be read, or "-" for
+ * standard input. */
+ void
+ csv_read_file(
+ csv_data_reader &reader,
+ const char *csv_file_name)
+ {
+ if (std::strcmp(csv_file_name, "-") == 0) {
+ csv_read_stream(reader, csv_file_name, std::cin);
+ } else {
+ std::ifstream csv_stream(csv_file_name, std::ios::in);
+ if (!csv_stream) {
+ error(EX_NOINPUT, errno, "%s", csv_file_name);
+ }
+ csv_read_stream(reader, csv_file_name, csv_stream);
+ }
+ }
+}
+
+/*************************************************
+ * .dat file support
+ *
+ * This section implements support code for writing out .dat files in
+ * Maxmind DB format.
+ *************************************************/
+
+namespace {
+
+ /** .dat file writer class
+ *
+ * To write out a .dat file, construct a dat_writer, then call
+ * (in this order):
+ *
+ * write_trie()
+ * write_database_info (optional)
+ * write_structure_info()
+ *
+ * Setting dat_file_name to "-" will write to standard output;
+ * otherwise, a file will be created, and closed when the
+ * dat_writer is deleted. */
+
+ class dat_writer
+ {
+ public:
+ dat_writer(const char *dat_file_name, GeoIPDBTypes database_type);
+ virtual ~dat_writer();
+
+ void write_trie(binary_trie &trie);
+ void write_database_info(const char *database_info);
+ virtual void write_structure_info();
+
+ protected:
+ std::ostream *dat_stream;
+ bool need_to_delete_stream;
+ std::string dat_file_name;
+ GeoIPDBTypes database_type;
+ };
+
+}
+
+dat_writer::dat_writer(const char *dat_file_name, GeoIPDBTypes database_type):
+ dat_file_name(dat_file_name),
+ database_type(database_type)
+{
+ if (std::strcmp(dat_file_name, "-") == 0) {
+ dat_stream = &std::cout;
+ need_to_delete_stream = false;
+ } else {
+ dat_stream = new std::ofstream(dat_file_name, std::ios::out | std::ios::binary);
+ if (!dat_stream) {
+ error(EX_CANTCREAT, errno, "%s", dat_file_name);
+ }
+ need_to_delete_stream = true;
+ }
+}
+
+dat_writer::~dat_writer()
+{
+ if (need_to_delete_stream)
+ delete dat_stream;
+}
+
+void dat_writer::write_trie(binary_trie &trie)
+{
+ for (std::vector<binary_trie::node>::iterator it = trie.nodes_begin();
+ it != trie.nodes_end(); ++it)
+ {
+ union {
+ uint8_t bytes[6];
+ char chars[6];
+ } binary = {{
+ (it->edges[0] ) & 0xFF,
+ (it->edges[0] >> 8) & 0xFF,
+ (it->edges[0] >> 16) & 0xFF,
+ (it->edges[1] ) & 0xFF,
+ (it->edges[1] >> 8) & 0xFF,
+ (it->edges[1] >> 16) & 0xFF
+ }};
+ dat_stream->write(binary.chars, 6);
+ if (dat_stream->bad())
+ error(EX_IOERR, errno, "%s", dat_file_name.c_str());
+ }
+}
+
+void dat_writer::write_database_info(const char *database_info)
+{
+ const char tag[3] = { 0, 0, 0 };
+ dat_stream->write(tag, 3);
+ dat_stream->write(database_info, std::strlen(database_info));
+ if (dat_stream->bad()) {
+ error(EX_IOERR, errno, "%s", dat_file_name.c_str());
+ }
+}
+
+void dat_writer::write_structure_info()
+{
+ const unsigned char structure_info[4] = { 0xFF, 0xFF, 0xFF, database_type };
+ dat_stream->write((const char *)structure_info, 4);
+}
+
+/*************************************************
+ * .dat file writer class, extended for city DBs
+ *************************************************/
+
+namespace
+{
+
+ class city_dat_writer : public dat_writer
+ {
+ public:
+ // All serialized location information, in one big
+ // undifferentiated block
+ std::stringstream location_stream;
+
+ // Seek offset of each location within
+ // location_stream (relative to the beginning of
+ // location_stream). An offset of -1 means that that
+ // location is not in the table (can happen if the
+ // location info's out of order).
+ std::vector<int> location_pos;
+
+ // Set of location IDs that are actually going to be used;
+ // we'll silently ignore any locations not in this set.
+ std::set<int> needed_locations;
+
+ city_dat_writer(const char *dat_file_name, GeoIPDBTypes database_type);
+
+ // Notify of a location ID we need -- this MUST be
+ // called for every location ID you care about before
+ // the location CSV is read; any ID not explicitly
+ // notified will be discarded.
+ void notify_need_location(int loc_id);
+
+ void serialize_location_info(std::vector<std::string> &info,
+ const char *input_file_name,
+ int input_line_number);
+
+ void finalize_location_offsets(binary_trie &trie);
+ void write_locations();
+ virtual void write_structure_info(binary_trie &trie);
+ };
+
+}
+
+city_dat_writer::city_dat_writer(const char *dat_file_name, GeoIPDBTypes database_type)
+ : dat_writer(dat_file_name, database_type)
+{ }
+
+void city_dat_writer::notify_need_location(int loc_id)
+{
+ needed_locations.insert(loc_id);
+}
+
+void city_dat_writer::finalize_location_offsets(binary_trie &trie)
+{
+ // We're going to convert the location numbers in the trie
+ // into the final location numbers we're going to want to
+ // write to disk. Previous to this call, leaf nodes in the
+ // trie have the value:
+ //
+ // 0x1000000 + the location number
+ //
+ // After this call, leaf nodes in the trie have the value:
+ //
+ // (total number of nodes in the trie) + (offset of location
+ // record in the location segment)
+ //
+ // Absence of a record is indicated by the value 0x1000000
+ // before this call, and by the value (total number of nodes
+ // in the trie) after this call.
+
+ int trie_size = std::distance(trie.nodes_begin(), trie.nodes_end());
+
+ for(std::vector<binary_trie::node>::iterator it = trie.nodes_begin();
+ it != trie.nodes_end(); ++it)
+ {
+ if (it->edges[0] == 0x1000000) // No data
+ it->edges[0] = trie_size;
+ else if (it->edges[0] > 0x1000000) { // Ptr to location block
+ int loc_id = it->edges[0] - 0x1000000;
+ if (loc_id >= location_pos.size() || location_pos[loc_id] == -1)
+ error(EX_DATAERR, 1, "Location %d exists in blocks but not in locations", loc_id);
+
+ int offset = location_pos[loc_id] + trie_size;
+ if (offset > 0xFFFFFF)
+ error(EX_DATAERR, 1, "Overflow! Offset for location %d too large (0x%x > 0xFFFFFF)", loc_id, offset);
+ it->edges[0] = offset;
+ }
+ // Any other value would indicate a non-leaf node
+
+ if (it->edges[1] == 0x1000000) // No data
+ it->edges[1] = trie_size;
+ else if (it->edges[1] > 0x1000000) { // Ptr to location block
+ int loc_id = it->edges[1] - 0x1000000;
+ if (loc_id >= location_pos.size() || location_pos[loc_id] == -1)
+ error(EX_DATAERR, 1, "Location %d exists in blocks but not in locations", loc_id);
+
+ int offset = location_pos[loc_id] + trie_size;
+ if (offset > 0xFFFFFF)
+ error(EX_DATAERR, 1, "Overflow! Offset for location %d too large (0x%x > 0xFFFFFF)", loc_id, offset);
+ it->edges[1] = offset;
+ }
+ // Any other value would indicate a non-leaf node
+ }
+}
+
+void city_dat_writer::write_locations()
+{
+ *dat_stream << location_stream.rdbuf();
+
+ if (dat_stream->bad())
+ {
+ error(EX_IOERR, errno, "%s", dat_file_name.c_str());
+ }
+}
+
+void city_dat_writer::write_structure_info(binary_trie &trie)
+{
+ int trie_size = std::distance(trie.nodes_begin(), trie.nodes_end());
+
+ const unsigned char structure_info[7] = { 0xFF,
+ 0xFF,
+ 0xFF,
+ database_type,
+ (trie_size ) & 0xFF,
+ (trie_size >> 8 ) & 0xFF,
+ (trie_size >> 16) & 0xFF};
+ dat_stream->write((const char *)structure_info, 7);
+}
+
+/** Convert location info into on-disk format
+ *
+ * \param info the location info read from the .csv file:
+ *
+ * info[CSV_LOCATION_FIELD_COUNTRY] is the country id
+ * info[CSV_LOCATION_FIELD_REGION] is the region
+ * info[CSV_LOCATION_FIELD_CITY] is the city
+ *
+ * ... and so on.
+ *
+ * \param result a vector to append the on-disk converted information
+ * to.
+ *
+ * \param input_line_number input file line number (for error
+ * notifications)
+ **/
+
+void city_dat_writer::serialize_location_info(std::vector<std::string> &info,
+ const char *input_file_name,
+ int input_line_number)
+{
+ // First, we determine the offset of this location block.
+ int loc_id = ::atoi(info[0].c_str());
+
+ if (needed_locations.find(loc_id) == needed_locations.end()) {
+ // We don't need this location, so we skip serializing
+ // it altogether.
+
+ return;
+ }
+
+ if (loc_id >= location_pos.size()) {
+ // We need to add to the location table (this is the
+ // usual case).
+
+ while(loc_id > location_pos.size()) {
+ // If some numbers were skipped in the data,
+ // then we need to add some empty locations to
+ // the table before we find our spot.
+ location_pos.push_back(-1);
+ }
+
+ // Now we have our spot, insert this location.
+ location_pos.push_back(location_stream.tellp());
+ } else {
+ // We already have a space in the table for this location --
+ // if it's not empty, then we have two locations with the same
+ // ID, and we print an error.
+ if (location_pos[loc_id] != -1) {
+ error_at_line(EX_DATAERR, 0, input_file_name,
+ input_line_number,
+ "Duplicate location info for ID %d",
+ loc_id);
+ }
+ location_pos[loc_id] = location_stream.tellp();
+ }
+
+ // Country ID
+ int country_id;
+ if (info[1] != "AN")
+ country_id = GeoIP_id_by_code(info[1].c_str());
+ else
+ country_id = GeoIP_id_by_code("CW");
+
+ if (country_id == 0) {
+ error(EX_DATAERR, 1, dat_file_name.c_str(), input_line_number,
+ "Unrecognized country code: %s", info[1].c_str());
+ }
+ location_stream.put(country_id);
+
+ // Region
+ location_stream << info[2];
+ location_stream.put('\0');
+
+ // City
+ location_stream << info[3];
+ location_stream.put('\0');
+
+ // Postal code
+ location_stream << info[4];
+ location_stream.put('\0');
+
+ // Latitude
+ double latitude_dbl = ::atof(info[5].c_str());
+ int latitude_int = (latitude_dbl + 180) * 10000;
+ location_stream.put((latitude_int >> 0) & 0xFF);
+ location_stream.put((latitude_int >> 8) & 0xFF);
+ location_stream.put((latitude_int >> 16) & 0xFF);
+
+ // Longitude
+ double longitude_dbl = ::atof(info[6].c_str());
+ int longitude_int = (longitude_dbl + 180) * 10000;
+ location_stream.put((longitude_int >> 0) & 0xFF);
+ location_stream.put((longitude_int >> 8) & 0xFF);
+ location_stream.put((longitude_int >> 16) & 0xFF);
+
+ // Area code and metro code
+ if (info[1] == "US") {
+ int metro_code = ::atoi(info[7].c_str());
+ int area_code = ::atoi(info[8].c_str());
+ int area_metro_combined = metro_code * 1000 + area_code;
+ location_stream.put((area_metro_combined >> 0) & 0xFF);
+ location_stream.put((area_metro_combined >> 8) & 0xFF);
+ location_stream.put((area_metro_combined >> 16) & 0xFF);
+ }
+}
+
+/*************************************************
+ * Command line and options
+ *
+ * This section implements the command line parsing and stores the
+ * options for controlling the program's behavior.
+ *************************************************/
+
+namespace {
+
+ struct cmdline {
+ const char *ip_block_csv_file_name;
+ const char *location_csv_file_name;
+ const char *dat_file_name;
+ int address_family;
+ GeoIPDBTypes database_type;
+ const char *database_info;
+ bool verbose;
+
+ cmdline(int argc, char **argv);
+ };
+}
+
+cmdline::cmdline(int argc, char **argv):
+ ip_block_csv_file_name("-"),
+ location_csv_file_name(NULL),
+ dat_file_name("-"),
+ address_family(AF_INET),
+ database_type(GEOIP_COUNTRY_EDITION),
+ database_info(NULL),
+ verbose(false)
+{
+ enum {
+ OPT_HELP = -2
+ };
+
+ static const struct option long_options[] = {
+ { "inet", no_argument, NULL, '4' },
+ { "inet6", no_argument, NULL, '6' },
+ { "info", required_argument, NULL, 'i' },
+ { "location-csv", required_argument, NULL, 'l' },
+ { "output", required_argument, NULL, 'o' },
+ { "type", required_argument, NULL, 't' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "help", no_argument, NULL, OPT_HELP },
+ { NULL, 0, NULL, 0 }
+ };
+ static const char *const usage = "\
+Usage: %s [OPTION] [CSV-FILE]...\n\
+Convert a GeoIP database from CSV to GeoIP binary format.\n\
+\n\
+ -4, --inet set database type to GEOIP_COUNTRY_EDITION, v4 addresses (default)\n\
+ -6, --inet6 set database type to GEOIP_COUNTRY_EDITION_V6, v6 addresses\n\
+ -t, --type=TYPE set database type explicitly (e.g. to GEOIP_CITY_EDITION_REV1)\n\
+ -i, --info=TEXT add copyright or other info TEXT to output\n\
+ -l, --location-csv=FILE set location CSV file name (required for GEOIP_CITY_EDITION_REV1)\n\
+ -o, --output=FILE write the binary data to FILE, not stdout\n\
+ -v, --verbose show what is going on\n\
+ --help display this help and exit\n";
+
+ for (;;) {
+ int optret = getopt_long(argc, argv, "46i:l:o:t:v", long_options, NULL);
+
+ if (optret == -1)
+ break;
+ switch (optret) {
+ case '4':
+ address_family = AF_INET;
+ break;
+ case '6':
+ database_type = GEOIP_COUNTRY_EDITION_V6;
+ address_family = AF_INET6;
+ break;
+ case 'i':
+ database_info = optarg;
+ if (std::strlen(database_info) > 99) {
+ error(EX_USAGE, 0,
+ "Database info must not be longer than 99 bytes");
+ }
+ break;
+ case 'l':
+ location_csv_file_name = optarg;
+ break;
+ case 'o':
+ dat_file_name = optarg;
+ break;
+ case 't':
+ if (!strcmp(optarg, "GEOIP_COUNTRY_EDITION")) {
+ database_type = GEOIP_COUNTRY_EDITION;
+ } else if (!strcmp(optarg, "GEOIP_COUNTRY_EDITION_V6")) {
+ database_type = GEOIP_COUNTRY_EDITION_V6;
+ address_family = AF_INET6;
+ } else if (!strcmp(optarg, "GEOIP_CITY_EDITION_REV1")) {
+ database_type = GEOIP_CITY_EDITION_REV1;
+ } else {
+ error(EX_USAGE, 0,
+ "Unrecognized database type (we support GEOIP_COUNTRY_EDITION, GEOIP_COUNTRY_EDITION_V6, \
+GEOIP_CITY_EDITION_REV1)");
+ }
+ break;
+ case 'v':
+ verbose = true;
+ break;
+ case OPT_HELP:
+ std::printf(usage, program_invocation_name);
+ std::exit(EX_OK);
+ case '?':
+ std::fprintf(stderr,
+ "Try `%s --help' for more information.\n",
+ program_invocation_name);
+ std::exit(EX_USAGE);
+ default:
+ std::abort();
+ }
+ }
+
+ if (optind < argc)
+ ip_block_csv_file_name = argv[optind++];
+
+ if (database_type == GEOIP_CITY_EDITION_REV1 && location_csv_file_name == NULL) {
+ error(EX_USAGE, 0,
+ "Must specify -l option when type is GEOIP_CITY_EDITION_REV1");
+ }
+
+ if (optind < argc) {
+ error(EX_USAGE, 0,
+ "Only one non-option argument is allowed");
+ }
+}
+
+/*************************************************
+ * Country DB reading and writing
+ *
+ * This section contains code implementing coverting a country .csv
+ * file to a country .dat file.
+ *************************************************/
+
+namespace {
+
+ class country_db_impl : public csv_data_reader
+ {
+ public:
+ binary_trie trie;
+ struct cmdline &cmdline;
+
+ enum {
+ CSV_FIELD_MIN_TEXT,
+ CSV_FIELD_MAX_TEXT,
+ CSV_FIELD_MIN_DECIMAL,
+ CSV_FIELD_MAX_DECIMAL,
+ CSV_FIELD_COUNTRY_CODE,
+ CSV_FIELD_COUNTRY_NAME,
+ CSV_FIELDS
+ };
+
+ country_db_impl(struct cmdline &cmdline);
+ void convert_db(std::ostream *verbose_stream);
+ void read_csv_line(const char *csv_file_name,
+ int csv_line_number,
+ std::vector<std::string> &fields);
+ };
+
+}
+
+country_db_impl::country_db_impl(struct cmdline &in_cmdline):
+ cmdline(in_cmdline),
+ trie(0xFFFF00)
+{ }
+
+/** Callback for receiving .csv data (see csv_read_file()) */
+
+void country_db_impl::read_csv_line(const char *csv_file_name,
+ int csv_line_number,
+ std::vector<std::string> &csv_fields)
+{
+ if (csv_fields.size() != CSV_FIELDS) {
+ error_at_line(EX_DATAERR, 0, csv_file_name, csv_line_number,
+ "Wrong number of fields");
+ }
+
+ if (csv_fields[CSV_FIELD_COUNTRY_CODE] == "AN") {
+ csv_fields[CSV_FIELD_COUNTRY_CODE] = "CW";
+ }
+ else if (csv_fields[CSV_FIELD_COUNTRY_CODE] == "XK") {
+ csv_fields[CSV_FIELD_COUNTRY_CODE] = "RS";
+ }
+
+ const int countryid = GeoIP_id_by_code(csv_fields[CSV_FIELD_COUNTRY_CODE].c_str());
+ if (countryid == 0) {
+ error_at_line(EX_DATAERR, 0, csv_file_name, csv_line_number,
+ "Unrecognized country code: %s",
+ csv_fields[CSV_FIELD_COUNTRY_CODE].c_str());
+ }
+ const binary_trie::edge_type leaf = 0xFFFF00 + countryid;
+
+ union {
+ struct in_addr inet;
+ uint8_t inetbytes[4];
+ struct in6_addr inet6;
+ } minaddr, maxaddr;
+ if (inet_pton(cmdline.address_family, csv_fields[CSV_FIELD_MIN_TEXT].c_str(), &minaddr) <= 0) {
+ error_at_line(EX_DATAERR, 0, csv_file_name, csv_line_number,
+ "Cannot parse minimum address: %s",
+ csv_fields[CSV_FIELD_MIN_TEXT].c_str());
+ }
+ if (inet_pton(cmdline.address_family, csv_fields[CSV_FIELD_MAX_TEXT].c_str(), &maxaddr) <= 0) {
+ error_at_line(EX_DATAERR, 0, csv_file_name, csv_line_number,
+ "Cannot parse maximum address: %s",
+ csv_fields[CSV_FIELD_MAX_TEXT].c_str());
+ }
+ switch (cmdline.address_family) {
+ case AF_INET:
+ trie.set_range(minaddr.inetbytes, maxaddr.inetbytes,
+ 32, leaf);
+ break;
+ case AF_INET6:
+ trie.set_range(minaddr.inet6.s6_addr, maxaddr.inet6.s6_addr,
+ 128, leaf);
+ break;
+ default:
+ abort();
+ }
+}
+
+/** Convert a country DB from .csv to .dat. Parameters are mainly
+ * controlled by the cmdline object. verbose_stream is (if non-NULL)
+ * the stream to write verbose information to. */
+
+void country_db_impl::convert_db(std::ostream *verbose_stream)
+{
+ if (verbose_stream) {
+ *verbose_stream << program_invocation_name
+ << ": Reading CSV and building the trie"
+ << std::endl;
+ }
+ csv_read_file(*this, cmdline.ip_block_csv_file_name);
+
+ if (verbose_stream) {
+ *verbose_stream << program_invocation_name
+ << ": Optimizing" << std::endl;
+ }
+ trie.reorder_depth_first();
+ trie.reorder_in_blocks(1024);
+
+ if (verbose_stream) {
+ *verbose_stream << program_invocation_name
+ << ": Writing output" << std::endl;
+ }
+
+ dat_writer writer(cmdline.dat_file_name, cmdline.database_type);
+ writer.write_trie(trie);
+ if (cmdline.database_info)
+ writer.write_database_info(cmdline.database_info);
+ writer.write_structure_info();
+}
+
+/*************************************************
+ * City DB conversion
+ *
+ * This section implements converting the two .csv files storing city
+ * data to a city .dat file.
+ *************************************************/
+
+namespace {
+
+ /** Implementation for converting a city DB
+ */
+
+ class city_db_impl : public csv_data_reader
+ {
+ public:
+ // Trie mapping IP ranges to location blocks
+ binary_trie trie;
+
+ // Writer for .dat file
+ city_dat_writer writer;
+
+ enum {
+ STAGE_READING_BLOCKS,
+ STAGE_READING_LOCATIONS
+ };
+
+ // Which stage of CSV reading we're at (out of above
+ // enum)
+ int which_stage;
+
+ struct cmdline &cmdline;
+
+ enum {
+ CSV_BLOCK_FIELD_MIN_DECIMAL,
+ CSV_BLOCK_FIELD_MAX_DECIMAL,
+ CSV_BLOCK_FIELD_LOC,
+ CSV_BLOCK_FIELDS
+ };
+
+ enum {
+ CSV_LOCATION_FIELD_ID,
+ CSV_LOCATION_FIELD_COUNTRY,
+ CSV_LOCATION_FIELD_REGION,
+ CSV_LOCATION_FIELD_CITY,
+ CSV_LOCATION_FIELD_POSTALCODE,
+ CSV_LOCATION_FIELD_LATITUDE,
+ CSV_LOCATION_FIELD_LONGITUDE,
+ CSV_LOCATION_FIELD_METROCODE,
+ CSV_LOCATION_FIELD_AREACODE,
+ CSV_LOCATION_FIELDS
+ };
+
+ city_db_impl(struct cmdline &cmdline);
+ void convert_db(std::ostream *verbose_stream);
+ void read_csv_line(const char *csv_file_name,
+ int csv_line_number,
+ std::vector<std::string> &fields);
+
+ void read_location_line(const char *csv_file_name,
+ int csv_line_number,
+ std::vector<std::string> &fields);
+ void read_block_line(const char *csv_file_name,
+ int csv_line_number,
+ std::vector<std::string> &fields);
+
+ // Check that a token within the "header" of the CSV
+ // files is what we expect it to be, and cause a data
+ // error if not.
+ void check_csv_header_token(std::vector<std::string> &tokens,
+ int token_number,
+ const char *token_expected,
+ const char *csv_file_name,
+ int csv_line_number);
+ };
+
+}
+
+city_db_impl::city_db_impl(struct cmdline &in_cmdline):
+ trie(0x1000000), // We use 0x1000000 as the beginning of the
+ // location information, since we don't know
+ // the real value and we'll need to remap all
+ // the offsets later anyway.
+ writer(in_cmdline.dat_file_name, in_cmdline.database_type),
+ cmdline(in_cmdline),
+ which_stage(STAGE_READING_BLOCKS)
+{ }
+
+/** Convert a city DB from .csv to .dat. Parameters are mainly
+ * controlled by the cmdline object. verbose_stream is (if non-NULL)
+ * the stream to write verbose information to. */
+
+void
+city_db_impl::convert_db(std::ostream *verbose_stream)
+{
+ // Read the block data from CSV
+ if (verbose_stream) {
+ *verbose_stream << program_invocation_name
+ << ": Reading block CSV and building the trie"
+ << std::endl;
+ }
+
+ which_stage = STAGE_READING_BLOCKS;
+ csv_read_file(*this, cmdline.ip_block_csv_file_name);
+
+ if (verbose_stream) {
+ int trie_size = std::distance(trie.nodes_begin(), trie.nodes_end());
+
+ *verbose_stream << program_invocation_name
+ << ": Done reading blocks, trie size is "
+ << trie_size
+ << std::endl;
+ }
+
+ // Read the location data from CSV
+ if (verbose_stream) {
+ *verbose_stream << program_invocation_name
+ << ": Reading location CSV"
+ << std::endl;
+ }
+
+ which_stage = STAGE_READING_LOCATIONS;
+ csv_read_file(*this, cmdline.location_csv_file_name);
+
+ // Optimize
+ if (verbose_stream) {
+ *verbose_stream << program_invocation_name
+ << ": Optimizing" << std::endl;
+ }
+
+ trie.reorder_depth_first();
+ trie.reorder_in_blocks(1024);
+
+ // Finalize offsets
+ if (verbose_stream) {
+ *verbose_stream << program_invocation_name
+ << ": Linking location and block data" << std::endl;
+ }
+
+ writer.finalize_location_offsets(trie);
+
+ // Write
+ if (verbose_stream) {
+ *verbose_stream << program_invocation_name
+ << ": Writing output" << std::endl;
+ }
+
+ writer.write_trie(trie);
+ writer.write_locations();
+ if (cmdline.database_info)
+ writer.write_database_info(cmdline.database_info);
+ writer.write_structure_info(trie);
+}
+
+/** Callback for receiving CSV data (see csv_read_file()). We use
+ * this both for reading the location CSV and the block CSV; which
+ * stage we're at is indicated by the which_stage variable. */
+
+void city_db_impl::read_csv_line(const char *csv_file_name,
+ int csv_line_number,
+ std::vector<std::string> &csv_fields)
+{
+ switch(which_stage) {
+ case STAGE_READING_BLOCKS:
+ read_block_line(csv_file_name, csv_line_number, csv_fields);
+ break;
+ case STAGE_READING_LOCATIONS:
+ read_location_line(csv_file_name, csv_line_number, csv_fields);
+ break;
+ default:
+ error(EX_SOFTWARE, 1, "Invalid which_stage value: %d", which_stage);
+ }
+}
+
+/** Callback for reading one line of the block CSV. */
+
+void city_db_impl::read_block_line(const char *csv_file_name,
+ int csv_line_number,
+ std::vector<std::string> &csv_fields)
+{
+ if (csv_line_number == 1)
+ return; // Assume this is copyright information and
+ // skip doing anything to it
+
+ if (csv_fields.size() != CSV_BLOCK_FIELDS) {
+ error_at_line(EX_DATAERR, 0, csv_file_name, csv_line_number,
+ "Wrong number of fields");
+ return;
+ }
+
+ if (csv_line_number == 2) {
+ // Assume this is header information -- we check it to
+ // make sure we're looking at the right format of file.
+ check_csv_header_token(csv_fields, CSV_BLOCK_FIELD_MIN_DECIMAL, "startIpNum",
+ csv_file_name, csv_line_number);
+ check_csv_header_token(csv_fields, CSV_BLOCK_FIELD_MAX_DECIMAL, "endIpNum",
+ csv_file_name, csv_line_number);
+ check_csv_header_token(csv_fields, CSV_BLOCK_FIELD_LOC, "locId",
+ csv_file_name, csv_line_number);
+
+ // Format checks out, we're now done with this line
+ return;
+ }
+
+ const int loc_id = atoi(csv_fields[CSV_BLOCK_FIELD_LOC].c_str());
+ const binary_trie::edge_type leaf = 0x1000000 + loc_id;
+
+ if (cmdline.address_family != AF_INET) {
+ error(EX_SOFTWARE, 1, "IPv6 with city database is unimplemented.");
+ }
+
+ union {
+ struct in_addr inet;
+ uint8_t inetbytes[4];
+ } minaddr, maxaddr;
+
+ if (inet_aton(csv_fields[CSV_BLOCK_FIELD_MIN_DECIMAL].c_str(), &(minaddr.inet)) == 0) {
+ error_at_line(EX_DATAERR, 1, csv_file_name, csv_line_number,
+ "Invalid min IP address");
+ }
+ if (inet_aton(csv_fields[CSV_BLOCK_FIELD_MAX_DECIMAL].c_str(), &(maxaddr.inet)) == 0) {
+ error_at_line(EX_DATAERR, 1, csv_file_name, csv_line_number,
+ "Invalid max IP address");
+ }
+
+ writer.notify_need_location(loc_id);
+ trie.set_range(minaddr.inetbytes, maxaddr.inetbytes, 32, leaf);
+}
+
+/** Callback for reading one line of the location CSV. */
+
+void city_db_impl::read_location_line(const char *csv_file_name,
+ int csv_line_number,
+ std::vector<std::string> &csv_fields)
+{
+ if (csv_line_number == 1)
+ return; // Assume this is copyright information and
+ // skip it entirely
+
+ if (csv_fields.size() != CSV_LOCATION_FIELDS) {
+ error_at_line(EX_DATAERR, 0, csv_file_name, csv_line_number,
+ "Wrong number of fields");
+ return;
+ }
+
+ if (csv_line_number == 2) {
+ // Assume this is header information -- we check it to
+ // make sure we're looking at the right format of file.
+ check_csv_header_token(csv_fields, CSV_LOCATION_FIELD_ID, "locId",
+ csv_file_name, csv_line_number);
+ check_csv_header_token(csv_fields, CSV_LOCATION_FIELD_COUNTRY, "country",
+ csv_file_name, csv_line_number);
+ check_csv_header_token(csv_fields, CSV_LOCATION_FIELD_REGION, "region",
+ csv_file_name, csv_line_number);
+ check_csv_header_token(csv_fields, CSV_LOCATION_FIELD_CITY, "city",
+ csv_file_name, csv_line_number);
+ check_csv_header_token(csv_fields, CSV_LOCATION_FIELD_POSTALCODE, "postalCode",
+ csv_file_name, csv_line_number);
+ check_csv_header_token(csv_fields, CSV_LOCATION_FIELD_LATITUDE, "latitude",
+ csv_file_name, csv_line_number);
+ check_csv_header_token(csv_fields, CSV_LOCATION_FIELD_LONGITUDE, "longitude",
+ csv_file_name, csv_line_number);
+ check_csv_header_token(csv_fields, CSV_LOCATION_FIELD_METROCODE, "metroCode",
+ csv_file_name, csv_line_number);
+ check_csv_header_token(csv_fields, CSV_LOCATION_FIELD_AREACODE, "areaCode",
+ csv_file_name, csv_line_number);
+
+ // Format checks out, we're now done with this line
+ return;
+ }
+
+ writer.serialize_location_info(csv_fields, csv_file_name, csv_line_number);
+}
+
+void city_db_impl::check_csv_header_token(std::vector<std::string> &tokens,
+ int token_number,
+ const char *token_expected,
+ const char *csv_file_name,
+ int csv_line_number)
+{
+ if (tokens[token_number] != token_expected) {
+ error_at_line(EX_DATAERR, 1, csv_file_name, csv_line_number,
+ "Incorrect format: field %d is \"%s\", but we expected \"%s\"",
+ token_number, tokens[token_number].c_str(), token_expected);
+ }
+}
+
+/*************************************************
+ * Main program
+ *
+ * This is the entry point.
+ *************************************************/
+
+int
+main(int argc, char **argv)
+{
+ cmdline cmdline(argc, argv);
+
+ std::ostream *verbose_stream;
+ if (!cmdline.verbose)
+ verbose_stream = NULL;
+ else if (strcmp(cmdline.dat_file_name, "-") == 0)
+ verbose_stream = &std::cerr;
+ else
+ verbose_stream = &std::cout;
+
+ switch(cmdline.database_type) {
+ case GEOIP_COUNTRY_EDITION:
+ case GEOIP_COUNTRY_EDITION_V6:
+ {
+ country_db_impl country_db(cmdline);
+ country_db.convert_db(verbose_stream);
+ break;
+ }
+
+ case GEOIP_CITY_EDITION_REV1:
+ {
+ city_db_impl city_db(cmdline);
+ city_db.convert_db(verbose_stream);
+ break;
+ }
+ }
+
+ if (verbose_stream) {
+ *verbose_stream << program_invocation_name
+ << ": All done" << std::endl;
+ }
+
+ return 0;
+}
diff --git a/debian/src/geolite2-to-legacy-csv.sh b/debian/src/geolite2-to-legacy-csv.sh
new file mode 100755
index 0000000..4531528
--- /dev/null
+++ b/debian/src/geolite2-to-legacy-csv.sh
@@ -0,0 +1,99 @@
+#!/usr/bin/perl -w
+use strict;
+use diagnostics;
+use NetAddr::IP;
+use Getopt::Long;
+
+my $quiet = 0;
+GetOptions(
+ 'quiet' => \$quiet,
+ ) or die("bad args");
+
+unless(-s "$ARGV[0]"){
+ print STDERR "Specify Country DB to use on the command line.\n";
+ exit 1;
+}
+
+# Prime country data with additional continent codes
+# http://download.geonames.org/export/dump/readme.txt
+my $countryinfo;
+$countryinfo->{'6255146'}->{'code'} = 'AF';
+$countryinfo->{'6255146'}->{'name'} = 'Africa';
+$countryinfo->{'6255147'}->{'code'} = 'AS';
+$countryinfo->{'6255147'}->{'name'} = 'Asia';
+$countryinfo->{'6255148'}->{'code'} = 'EU';
+$countryinfo->{'6255148'}->{'name'} = 'Europe';
+$countryinfo->{'6255149'}->{'code'} = 'NA';
+$countryinfo->{'6255149'}->{'name'} = 'North America';
+$countryinfo->{'6255150'}->{'code'} = 'SA';
+$countryinfo->{'6255150'}->{'name'} = 'South America';
+$countryinfo->{'6255151'}->{'code'} = 'OC';
+$countryinfo->{'6255151'}->{'name'} = 'Oceania';
+$countryinfo->{'6255152'}->{'code'} = 'AN';
+$countryinfo->{'6255152'}->{'name'} = 'Antarctica';
+
+# Read the countryinfo file
+open my $fh_in, "<", "$ARGV[0]" or die "Can't open $ARGV[0]: $!\n";
+foreach my $line (<$fh_in>){
+ chomp $line;
+ next if ($line =~ /^#/);
+ my @fields = (split "\t", $line);
+ my $code = $fields[0];
+ my $name = $fields[4];
+ my $id = $fields[16];
+ $countryinfo->{$id}->{'code'} = $code;
+ $countryinfo->{$id}->{'name'} = $name;
+}
+close $fh_in;
+
+# Convert actual GeoLite2 data from STDIN
+my $counter;
+foreach my $line (<STDIN>){
+ next unless ($line =~ /^\d/);
+ chomp $line;
+ $counter++;
+ my @fields = (split ",", $line);
+ my $network = $fields[0];
+ my $geoname_id = $fields[1];
+ my $registered_country_geoname_id = $fields[2];
+ my $represented_country_geoname_id = $fields[3];
+ my $is_anonymous_proxy = $fields[4];
+ my $is_satellite_provider = $fields[5];
+ my $ip = NetAddr::IP->new($network);
+ my $start_ip = $ip->canon();
+ my $end_ip = $ip->broadcast();
+ my $start_int = $ip->bigint();
+ my $end_int = $end_ip->bigint();
+ my $code;
+ my $name;
+ if ($is_anonymous_proxy){
+ $code = "A1";
+ $name = "Anonymous Proxy";
+ }elsif ($is_satellite_provider){
+ $code = "A2";
+ $name = "Satellite Provider";
+ }elsif($countryinfo->{$geoname_id}){
+ $code = $countryinfo->{$geoname_id}->{'code'};
+ $name = $countryinfo->{$geoname_id}->{'name'};
+ }elsif($countryinfo->{$represented_country_geoname_id}){
+ $code = $countryinfo->{$represented_country_geoname_id}->{'code'};
+ $name = $countryinfo->{$represented_country_geoname_id}->{'name'};
+ }elsif($countryinfo->{$registered_country_geoname_id}){
+ $code = $countryinfo->{$registered_country_geoname_id}->{'code'};
+ $name = $countryinfo->{$registered_country_geoname_id}->{'name'};
+ }else{
+ print STDERR "Unknown Geoname ID, panicking. This is a bug.\n";
+ print STDERR "ID: $geoname_id\n";
+ print STDERR "ID Registered: $registered_country_geoname_id\n";
+ print STDERR "ID Represented $represented_country_geoname_id\n";
+ exit 1;
+ }
+
+ # Legacy GeoIP listing format:
+ # "1.0.0.0","1.0.0.255","16777216","16777471","AU","Australia"
+ printf "\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\"\n",
+ $start_ip, $end_ip->canon(), $start_int, $end_int, $code, $name;
+ if (!$quiet && $counter % 10000 == 0) {
+ print STDERR "$counter\n";
+ }
+}
diff --git a/debian/src/v4-to-v6-layout.pl b/debian/src/v4-to-v6-layout.pl
new file mode 100755
index 0000000..030344d
--- /dev/null
+++ b/debian/src/v4-to-v6-layout.pl
@@ -0,0 +1,31 @@
+#!/usr/bin/perl
+
+use warnings;
+use strict;
+
+# Script from Boris Zentner (Maxmind)
+# This script converts the IPv4 csv database to the same
+# format as the IPv6 database.
+
+sub _x {
+ sprintf("%02x%02x:%02x%02x", split(/\./, $_[0]));
+}
+
+while(<STDIN>){
+ chomp;
+ my($f, $t, $tail) = split /,/, $_, 3;
+
+ for($t, $f){
+ s/^"//;
+ s/"$//;
+ }
+ my $xxxxf = _x($f);
+ my $xxxxt = _x($t);
+ print <<__OUT__;
+"::$f", "::$t", $tail
+"::ffff:$f", "::ffff:$t", $tail
+"2002:${xxxxf}::", "2002:${xxxxt}:ffff:ffff:ffff:ffff:ffff", $tail
+__OUT__
+}
+
+exit(0);