|
rpm-build |
ca2b01 |
#!/usr/bin/perl
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
binmode STDIN, ":utf8";
|
|
rpm-build |
ca2b01 |
binmode STDOUT, ":utf8";
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
while (<>) {
|
|
rpm-build |
ca2b01 |
### Drop certain stations
|
|
rpm-build |
ca2b01 |
next if / Platform *;/; # offshort oil platforms
|
|
rpm-build |
ca2b01 |
next if /^LYPZ;/; # buggy duplicate
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
### Whitespace/punctuation cleanup
|
|
rpm-build |
ca2b01 |
s/ */ /g;
|
|
rpm-build |
ca2b01 |
s/\\/\//g;
|
|
rpm-build |
ca2b01 |
s/([^ ])\//$1 \//g;
|
|
rpm-build |
ca2b01 |
s/\/([^ ])/\/ $1/g;
|
|
rpm-build |
ca2b01 |
s/,([^ ])/, $1/g;
|
|
rpm-build |
ca2b01 |
s/ ,/,/g;
|
|
rpm-build |
ca2b01 |
s/[ ,\/]*;/;/g;
|
|
rpm-build |
ca2b01 |
s/; /;/g;
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
### Capitalization, etc
|
|
rpm-build |
ca2b01 |
s/Mc /Mc/g;
|
|
rpm-build |
ca2b01 |
s/ Of / of /g;
|
|
rpm-build |
ca2b01 |
s/([a-z]) D(a |e |el |es |i |o |u |\')/$1 d$2/g;
|
|
rpm-build |
ca2b01 |
s/([a-z]) L(a|es?) /$1 l$2 /g;
|
|
rpm-build |
ca2b01 |
# lowercasify a capital letter after an apostrophe, unless the
|
|
rpm-build |
ca2b01 |
# preceding letter was "d" (eg, "Cote d'Ivoire")
|
|
rpm-build |
ca2b01 |
s/([a-ce-z]\'[A-Z])/\L$1/g;
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
### Fix incorrect or outdated codes
|
|
rpm-build |
ca2b01 |
if (/;Angelholm;;Sweden;/) { s/ESDB/ESTA/; }
|
|
rpm-build |
ca2b01 |
if (/;M\. Calamita;;Italy;/) { s/LIRJ/LIRX/; }
|
|
rpm-build |
ca2b01 |
if (/;Yerevan;;Armenia;/) { s/UGEE/UDYZ/; }
|
|
rpm-build |
ca2b01 |
if (/;Novosibirsk;;Russia;/) { s/UNNN/UNNT/; }
|
|
rpm-build |
ca2b01 |
if (/;Jinan;;China;/) { s/ZSTN/ZSJN/; }
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
### Fix invalid or incorrect coordinates
|
|
rpm-build |
ca2b01 |
if (/^K3MW;/) { s/;40-26-94N;106-44-95W;/;40-27N;106-45W;/; }
|
|
rpm-build |
ca2b01 |
if (/^KBKB;/) { s/;092-97W;/;093-00W;/; }
|
|
rpm-build |
ca2b01 |
if (/^KBJN;/) { s/;37-37-02;/;37-37-02N;/; }
|
|
rpm-build |
ca2b01 |
if (/^KFHU;/) { s/;46-98N;/;31-35N;/; }
|
|
rpm-build |
ca2b01 |
if (/^KWTR;/) { s/;104-87W;/;105-00W;/; }
|
|
rpm-build |
ca2b01 |
if (/^MMML;/) { s/;117-00W;/;115-14W;/; }
|
|
rpm-build |
ca2b01 |
if (/^MNBL;/) { s/;086-46W;/;083-46W;/; }
|
|
rpm-build |
ca2b01 |
if (/^PGNT;/) { s/;14-96N;/;15-00N;/; }
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
### Fix some country divisions to match FIPS codes
|
|
rpm-build |
ca2b01 |
if (/^EGJA;/) { s/;United Kingdom;/;Guernsey;/; }
|
|
rpm-build |
ca2b01 |
if (/^EGJB;/) { s/;United Kingdom;/;Guernsey;/; }
|
|
rpm-build |
ca2b01 |
if (/^EGJJ;/) { s/;United Kingdom;/;Jersey;/; }
|
|
rpm-build |
ca2b01 |
if (/^EGNS;/) { s/;United Kingdom;/;Isle of Man;/; }
|
|
rpm-build |
ca2b01 |
if (/^EKVG;/) { s/;Denmark;/;Faroe Islands;/; }
|
|
rpm-build |
ca2b01 |
if (/^ENSB;/) { s/;Norway;/;Svalbard;/; }
|
|
rpm-build |
ca2b01 |
if (/^FMCZ;/) { s/;Comoros;/;Mayotte;/; }
|
|
rpm-build |
ca2b01 |
if (/^NLWW;/) { s/;France;/;Wallis and Futuna;/; }
|
|
rpm-build |
ca2b01 |
if (/^PLCH;/) { s/;New Zealand;/;Kiribati;/; }
|
|
rpm-build |
ca2b01 |
if (/^TI..;/) { s/;;Virgin Islands;/;VI;United States;/; }
|
|
rpm-build |
ca2b01 |
if (/^YSNF;/) { s/;Australia;/;Norfolk Island;/; }
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
### Fix some country/state divisions to correct bugs
|
|
rpm-build |
ca2b01 |
if (/^EGYP;/) { s/;South Georgia and the Islands;/;Falkland Islands (Islas Malvinas);/; }
|
|
rpm-build |
ca2b01 |
if (/^HHAS;/) { s/;Ethiopia;/;Eritrea;/; } # 502576
|
|
rpm-build |
ca2b01 |
if (/^LY..;/) { s/;Serbia and Montenegro;/;;/; } # This will force update-locations to figure them out itself
|
|
rpm-build |
ca2b01 |
if (/^NIUE;/) { s/;Cook Islands;/;Niue;/; }
|
|
rpm-build |
ca2b01 |
if (/^NSTU;/) { s/;;United States Minor Outlying Islands;/;AS;United States;/; }
|
|
rpm-build |
ca2b01 |
if (/^NZWD;/) { s/, Antarctic;;New Zealand;/;;Antarctica;/; }
|
|
rpm-build |
ca2b01 |
if (/^PMDY;/) { s/;HI;/;UM;/; }
|
|
rpm-build |
ca2b01 |
if (/^PWAK;/) { s/;GU;/;UM;/; }
|
|
rpm-build |
ca2b01 |
if (/^TKPN;/) { s/;Antigua and Barbuda;/;Saint Kitts and Nevis;/; }
|
|
rpm-build |
ca2b01 |
if (/^YPCC;/) { s/;Christmas Island;/;Cocos (Keeling) Islands;/; }
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
### Fix some spelling mistakes/wackiness/nonstandardnesses. Mostly
|
|
rpm-build |
ca2b01 |
### alphabetical by station code
|
|
rpm-build |
ca2b01 |
if (/^EDDF;/) { s/ \/ M-Flughafen//; }
|
|
rpm-build |
ca2b01 |
if (/^ETWM;/) { s/-Mil;/ Military Base;/; }
|
|
rpm-build |
ca2b01 |
if (/^FAJS;/) { s/;Johannesburg International Airport;/;O. R. Tambo International Airport;/; } # 533622
|
|
rpm-build |
ca2b01 |
if (/^HESH;/) { s/Sheikhintl/Sheikh Intl/; }
|
|
rpm-build |
ca2b01 |
if (/^HKJK;/) { s/ TWR \/ APP \/ NOF \/ Civil Airlines//; }
|
|
rpm-build |
ca2b01 |
if (/^LBGO;/) { s/Orechovista/Oryakhovitsa/; } # 313655
|
|
rpm-build |
ca2b01 |
if (/^LEJR;/) { s/Fronteraaeropuerto/Frontera Aeropuerto/; }
|
|
rpm-build |
ca2b01 |
if (/^LFPG;/) { s/Paris-Aeroport Charles de Gaulle/Paris, Charles de Gaulle International Airport/; }
|
|
rpm-build |
ca2b01 |
if (/^LIPO;/) { s/Montichia;/Montichiara;/; } # 350945
|
|
rpm-build |
ca2b01 |
if (/^LOAV;/) { s/Lugplatz/Flugplatz/; }
|
|
rpm-build |
ca2b01 |
if (/^MMGL;/) { s/Guadalaj;/Guadalajara;/; }
|
|
rpm-build |
ca2b01 |
if (/^MMMD;/) { s/ lic / /; }
|
|
rpm-build |
ca2b01 |
if (/^MNMG;/) { s/Managua A. C. Sandino/Managua, A. C. Sandino Airport/; }
|
|
rpm-build |
ca2b01 |
if (/^MTPP;/) { s/ \/ Aeroport International/ International Airport/; }
|
|
rpm-build |
ca2b01 |
if (/^MWCR;/) { s/Airportgrand/Airport, Grand/; }
|
|
rpm-build |
ca2b01 |
# 319538 - the entry for OIAG actually has the data for OIAJ
|
|
rpm-build |
ca2b01 |
if (/^OIAG;/) { s/Omidieh/Aghajari/; s/30-46N;049-40E/30-44-44N;049-40-35E/; }
|
|
rpm-build |
ca2b01 |
if (/^OIKB;/) { s/Bandarabbass/Bandar Abbas/; }
|
|
rpm-build |
ca2b01 |
if (/^OINN;/) { s/Noshahr/Now Shahr/; }
|
|
rpm-build |
ca2b01 |
if (/^OITR;/) { s/Orumieh/Orumiyeh/; }
|
|
rpm-build |
ca2b01 |
if (/^PGUM;/) { s/Agana/Hagåtña/; } # to match POP_PLACES
|
|
rpm-build |
ca2b01 |
if (/^SABE;/) { s/Aeroparque Bs\. As\./Buenos Aires, Jorge Newbery/; }
|
|
rpm-build |
ca2b01 |
if (/^SBFZ;/) { s/pinto/Pinto/; }
|
|
rpm-build |
ca2b01 |
if (/^SPHY;/) { s/Andahuayla/Andahuaylas/; }
|
|
rpm-build |
ca2b01 |
if (/^SPIM;/) { s/Aerop\. Internacional Jorgechavez/Jorge Chavez International Airport/; }
|
|
rpm-build |
ca2b01 |
if (/^SVMI;/) { s/Maiquetia Aerop\. Intl\. Simon Bolivar/Simon Bolivar International/; }
|
|
rpm-build |
ca2b01 |
if (/^TAPA;/) { s/Vc /V. C. /; }
|
|
rpm-build |
ca2b01 |
if (/^TKPN;/) { s/Newcast;/Newcastle;/; }
|
|
rpm-build |
ca2b01 |
if (/^UBBG;/) { s/Gyanca/Gyandzha/; }
|
|
rpm-build |
ca2b01 |
if (/^UKDR;/) { s/Krivyy/Kryvyy/; }
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
### "Move" some stations to keep them from matching irrelevant cities
|
|
rpm-build |
ca2b01 |
if (/^VTBD;/) { s/Don Muang/Bangkok/; }
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
### Untranslate/unabbreviate the word "Airport". (The names in
|
|
rpm-build |
ca2b01 |
### nsd_cccc.txt don't seem to be especially close to
|
|
rpm-build |
ca2b01 |
### correct/official, so this is a net win.
|
|
rpm-build |
ca2b01 |
s/Aerop\. Internacional ([^,;]*)/$1 International Airport/;
|
|
rpm-build |
ca2b01 |
s/Aeropuerto[^ ]* ([^,;]*)/$1 Airport/;
|
|
rpm-build |
ca2b01 |
s/Aeroporto* d[ea] ([^,;]*)/$1 Airport/;
|
|
rpm-build |
ca2b01 |
s/[ -]Aero(|\.|drome|porto?|-Porto|puerto)( |;)/ Airport$2/;
|
|
rpm-build |
ca2b01 |
s/Air(-Port|p\.)/Airport/;
|
|
rpm-build |
ca2b01 |
s/Civ \/ (Mil|Afb)/Airport/;
|
|
rpm-build |
ca2b01 |
s/( \/)? Civ(|il|ilian);/$1 Airport;/;
|
|
rpm-build |
ca2b01 |
s/Lufthavn/Airport/;
|
|
rpm-build |
ca2b01 |
s/Int\'?l\.?/International/;
|
|
rpm-build |
ca2b01 |
s/Int\./International/;
|
|
rpm-build |
ca2b01 |
s/Inter-National/International/;
|
|
rpm-build |
ca2b01 |
s/Internationalairport;/International Airport;/;
|
|
rpm-build |
ca2b01 |
s/International;/International Airport;/;
|
|
rpm-build |
ca2b01 |
s/Airport ([A-Z])/Airport, $1/;
|
|
rpm-build |
ca2b01 |
# Change "Foo / Airport" to "Foo Airport"
|
|
rpm-build |
ca2b01 |
s/;([^;]*)(,| \/) (International Airport|Airport);/;$1 $3;/;
|
|
rpm-build |
ca2b01 |
# And "Foo / Bar Airport" to "Foo, Bar Airport"
|
|
rpm-build |
ca2b01 |
s/;([^;\/,]*) \/ ([^;\/,]* Airport)/;$1, $2/;
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
s/,? ([a-z][a-z]*-)?afb/ Air Force Base/i;
|
|
rpm-build |
ca2b01 |
s/ ([A-Z][a-z]*-)?Ab;/ Air Base;/;
|
|
rpm-build |
ca2b01 |
s/Usa . Af/US Air Force Base/;
|
|
rpm-build |
ca2b01 |
s/Usaf/US Air Force Base/;
|
|
rpm-build |
ca2b01 |
s/Air Force Operated Base In Foreign Country/Air Force Base/;
|
|
rpm-build |
ca2b01 |
s/ (Can-)?Mil(\.|itary);/ Military Base;/;
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
s/Obs(\.|erv\.|ervatory|ervatorio)/Observatory/;
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
# US National Weather Service, but appears not just in /;United States;/
|
|
rpm-build |
ca2b01 |
s/, NWS Office//;
|
|
rpm-build |
ca2b01 |
# Likewise Australian Weather Service (or Automated Weather Station?)
|
|
rpm-build |
ca2b01 |
s/,? Aws;/;/;
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
### Country-specific fixups, sorted alphabetically by country
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
if (/;Argentina;/) {
|
|
rpm-build |
ca2b01 |
# Remove province name from location description
|
|
rpm-build |
ca2b01 |
s/, (BA|B\. A\.|CHT|SF);/;/;
|
|
rpm-build |
ca2b01 |
}
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
if (/;Australia;/) {
|
|
rpm-build |
ca2b01 |
# ??
|
|
rpm-build |
ca2b01 |
s/ (Amo|Mo);/ Airport;/;
|
|
rpm-build |
ca2b01 |
s/,? M\. O\.?;/;/;
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
s/ Ran / Royal Australian Navy /;
|
|
rpm-build |
ca2b01 |
}
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
if (/;Austria;/) {
|
|
rpm-build |
ca2b01 |
s/-Flughafen/ Airport/;
|
|
rpm-build |
ca2b01 |
s/Flugplatz/Airport/;
|
|
rpm-build |
ca2b01 |
s/ Am / am /;
|
|
rpm-build |
ca2b01 |
s/ Im / im /;
|
|
rpm-build |
ca2b01 |
}
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
if (/;Canada;/) {
|
|
rpm-build |
ca2b01 |
# Remove province/territory name from location description
|
|
rpm-build |
ca2b01 |
s/,? (Alta|B\. C|Man|N\. B|Nfld|N\. S|N\. W\. T|Ont|P\. E\. I|Prince Edward Island|Que|Sask|Y\. T)\.?;/;/;
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
# Canadian Department of Agriculture, [Remote] Climate Station
|
|
rpm-build |
ca2b01 |
s/ Cda//i;
|
|
rpm-build |
ca2b01 |
s/ R?CS//;
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
s/Airport, [^;]*Station;/Airport;/;
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
# /CX../ stations are automated. Maybe we should drop all of them,
|
|
rpm-build |
ca2b01 |
# but for now we'll just drop the ones where there's also a
|
|
rpm-build |
ca2b01 |
# corresponding non-automated station
|
|
rpm-build |
ca2b01 |
if (/^CX(DE|EC|EG|MI|MM|OX|TV|WN|ZU)/) {
|
|
rpm-build |
ca2b01 |
next;
|
|
rpm-build |
ca2b01 |
}
|
|
rpm-build |
ca2b01 |
}
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
if (/;Cuba;/) {
|
|
rpm-build |
ca2b01 |
s/, Oriente//;
|
|
rpm-build |
ca2b01 |
}
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
if (/;Mexico;/) {
|
|
rpm-build |
ca2b01 |
# Remove state name from location description
|
|
rpm-build |
ca2b01 |
s/,? (Ags|B\. C\. S|Camp|Chis|Coah|Mor|N\. L|Nay|Pue|Q\. Roo|Qro|S\. L\. P|Sin|Son)\.?;/;/;
|
|
rpm-build |
ca2b01 |
}
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
if (/;Netherlands;/) {
|
|
rpm-build |
ca2b01 |
s/;([^;]*) Airport, ([^;]*);/;$1, $2 Airport;/;
|
|
rpm-build |
ca2b01 |
}
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
if (/;New Caledonia;/) {
|
|
rpm-build |
ca2b01 |
s/ Nlle-Caledonie//;
|
|
rpm-build |
ca2b01 |
s/ Ile [^;]*//;
|
|
rpm-build |
ca2b01 |
}
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
if (/;Sweden;/) {
|
|
rpm-build |
ca2b01 |
s/Flygplats/Airport/;
|
|
rpm-build |
ca2b01 |
}
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
if (/;United States;/) {
|
|
rpm-build |
ca2b01 |
s/Nexrad/NEXRAD Station/;
|
|
rpm-build |
ca2b01 |
}
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
### Final airport fixing...
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
### The location data for several countries (including all of South
|
|
rpm-build |
ca2b01 |
### America) uses the convention "City / Airport Name"
|
|
rpm-build |
ca2b01 |
if (/^(AG|AY|DA|EL|FC|FMM|FO|FX|FZ|GA|GM|HE|LI|LJ|MK|MM|MP|OI|PL|S|US|UU|UW|VV|WA|WI|WM|WR|WS|Z)/) {
|
|
rpm-build |
ca2b01 |
s/;([^;]*) \/ ([^;\/]*)/;$1, $2 Airport/;
|
|
rpm-build |
ca2b01 |
}
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
### Some do it backwards
|
|
rpm-build |
ca2b01 |
if (/^(EN|LE|MS|TT)/) {
|
|
rpm-build |
ca2b01 |
s/;([^;]*) \/ ([^;\/]*)/;$1 Airport, $2/;
|
|
rpm-build |
ca2b01 |
}
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
### In some countries, you generally need to prefix the city name to
|
|
rpm-build |
ca2b01 |
### the airport name
|
|
rpm-build |
ca2b01 |
if (/^(C|ES|GO|LF|LH|LK|LS|LT|LZ)/) {
|
|
rpm-build |
ca2b01 |
s/;([^;]*) \/ ([^;\/]*)/;$1, $1-$2 Airport/;
|
|
rpm-build |
ca2b01 |
}
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
# Some of our fixes end up resulting in "Airport Airport"
|
|
rpm-build |
ca2b01 |
s/Airport Airport/Airport/;
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
# Remove numbers in "Foo 1", "Foo 2", "Foo Iii", etc
|
|
rpm-build |
ca2b01 |
s/ [123];/;/;
|
|
rpm-build |
ca2b01 |
s/ I[iv]i*([^a-z])/$1/;
|
|
rpm-build |
ca2b01 |
|
|
rpm-build |
ca2b01 |
print;
|
|
rpm-build |
ca2b01 |
}
|