|
Packit Service |
4897fc |
#!/usr/bin/perl -w
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
use File::Temp "tempfile";
|
|
Packit Service |
4897fc |
use Text::Unidecode;
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
sub myunidecode {
|
|
Packit Service |
4897fc |
my ($text) = @_;
|
|
Packit Service |
4897fc |
$text = unidecode($text);
|
|
Packit Service |
4897fc |
$text =~ s/\@/a/g;
|
|
Packit Service |
4897fc |
return $text;
|
|
Packit Service |
4897fc |
}
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
@intlglob = glob("sources/geonames_dd_dms_date_*.txt");
|
|
Packit Service |
4897fc |
$intlsrc = $intlglob[$#intlglob];
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
$uscitysrc = "sources/POP_PLACES.txt";
|
|
Packit Service |
4897fc |
$usbigsrc = "sources/US_CONCISE.txt";
|
|
Packit Service |
4897fc |
$majorsrc = "major-cities.txt";
|
|
Packit Service |
4897fc |
$weathersrc = "sources/nsd_cccc.txt";
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
$locationdb = "locationdb.sqlite";
|
|
Packit Service |
4897fc |
unlink $locationdb;
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
($states, $states_filename) = tempfile();
|
|
Packit Service |
4897fc |
binmode $states, ":utf8";
|
|
Packit Service |
4897fc |
($counties, $counties_filename) = tempfile();
|
|
Packit Service |
4897fc |
binmode $counties, ":utf8";
|
|
Packit Service |
4897fc |
($cities, $cities_filename) = tempfile();
|
|
Packit Service |
4897fc |
binmode $cities, ":utf8";
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
# The columns in $intlsrc: (interesting ones in caps)
|
|
Packit Service |
4897fc |
# RC UFI uni LAT LONG dms_lat dms_long mgrs jog fc DSG PC CC1 ADM1 adm2 POP ELEV CC2 NT LC SHORT_FORM generic SORT_NAME FULL_NAME FULL_NAME_ND modify_date
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
print "Reading $intlsrc\n";
|
|
Packit Service |
4897fc |
open INTLSRC, "-|:utf8", "perl ./city-fixups.pl < $intlsrc";
|
|
Packit Service |
4897fc |
while (<INTLSRC>) {
|
|
Packit Service |
4897fc |
# skip header, and historical records
|
|
Packit Service |
4897fc |
next if $. == 1 || /\(\(/ || /\(historical\)/i;
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
@fields = split /\t/;
|
|
Packit Service |
4897fc |
next if $#fields != 25;
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
($rc, $id, $junk, $latitude, $longitude, $junk, $junk, $junk,
|
|
Packit Service |
4897fc |
$junk, $junk, $designation, $pc, $country_code, $state_code, $junk,
|
|
Packit Service |
4897fc |
$population, $elevation, $cc2, $name_type, $language, $short_name,
|
|
Packit Service |
4897fc |
$junk, $flat_name, $long_name, $long_name_nd, $junk) = @fields;
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
# Don't use wacky complex transliteration rules for non-European
|
|
Packit Service |
4897fc |
# names; just use the non-diacritic version of the name instead
|
|
Packit Service |
4897fc |
if ($name_type eq 'N' && (!$rc || $rc > 2)) {
|
|
Packit Service |
4897fc |
$long_name = myunidecode($long_name);
|
|
Packit Service |
4897fc |
$short_name = myunidecode($short_name);
|
|
Packit Service |
4897fc |
} elsif ($name_type eq 'NS' && (!$rc || $rc > 2)) {
|
|
Packit Service |
4897fc |
$long_name = $long_name_nd;
|
|
Packit Service |
4897fc |
}
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
# Also fix smart quotes and dumb spacing
|
|
Packit Service |
4897fc |
$short_name =~ s/\x{2019}/'/g;
|
|
Packit Service |
4897fc |
$short_name =~ s/ +/ /;
|
|
Packit Service |
4897fc |
$short_name =~ s/ $//;
|
|
Packit Service |
4897fc |
$long_name =~ s/\x{2019}/'/g;
|
|
Packit Service |
4897fc |
$long_name =~ s/ +/ /;
|
|
Packit Service |
4897fc |
$long_name =~ s/ $//;
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
if ($designation =~ /^PCL/) {
|
|
Packit Service |
4897fc |
# /^PCL/ matches countries and other ISO-3166-like things:
|
|
Packit Service |
4897fc |
# PCLD - dependent political entity
|
|
Packit Service |
4897fc |
# PCLF - freely associated state
|
|
Packit Service |
4897fc |
# PCLI - independent political entity (ie, "country")
|
|
Packit Service |
4897fc |
# PCLIX - section of independent political entity
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
if ($short_name) { $countries{$short_name} = $country_code; }
|
|
Packit Service |
4897fc |
if ($long_name) { $countries{$long_name} = $country_code; }
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
} elsif ($designation eq "ADM1") {
|
|
Packit Service |
4897fc |
# ID COUNTRY_CODE STATE_CODE NAME_TYPE LANGUAGE SHORT_NAME
|
|
Packit Service |
4897fc |
# LONG_NAME FLAT_NAME
|
|
Packit Service |
4897fc |
print $states "intl-$id\t$country_code\t$country_code$state_code\t$name_type\t$language\t$short_name\t$long_name\t$flat_name\n";
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
} elsif ($designation =~ /^PPL/) {
|
|
Packit Service |
4897fc |
# "PPL*" is "Populated Place"
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
# Assign an "importance" to the city. Having a positive
|
|
Packit Service |
4897fc |
# $importance makes a city "bigger" in terms of figuring out how
|
|
Packit Service |
4897fc |
# close it is to a given coordinate, and having negative
|
|
Packit Service |
4897fc |
# $importance makes it "smaller". Furthermore, a negative
|
|
Packit Service |
4897fc |
# importance city can never match solely on coordinates; it needs
|
|
Packit Service |
4897fc |
# to match based on name as well.
|
|
Packit Service |
4897fc |
#
|
|
Packit Service |
4897fc |
# $importance = 2 means furthermore that if we haven't found a
|
|
Packit Service |
4897fc |
# weather station for the city after going through the whole
|
|
Packit Service |
4897fc |
# stations table, then we'll add an entry using whatever weather
|
|
Packit Service |
4897fc |
# station is closest. $importance = 3 means it's a national
|
|
Packit Service |
4897fc |
# capital.
|
|
Packit Service |
4897fc |
#
|
|
Packit Service |
4897fc |
# Our current definition of "important" is that it has designation
|
|
Packit Service |
4897fc |
# of "PPLC" (capital of a country [that is, a /PCL.*/]) or "PPLA"
|
|
Packit Service |
4897fc |
# (capital of an ADM1). We also look at the "Populated Place
|
|
Packit Service |
4897fc |
# Classification" ($pc) and population, but as of 2008-05 those
|
|
Packit Service |
4897fc |
# aren't filled in yet for most countries.
|
|
Packit Service |
4897fc |
#
|
|
Packit Service |
4897fc |
# If $pc is 4 or 5 ("unimportant"), or $designation is "PPLX"
|
|
Packit Service |
4897fc |
# (neighborhood/subdivision), we give the city $importance = -1,
|
|
Packit Service |
4897fc |
# meaning it will only be used when a weather station actually
|
|
Packit Service |
4897fc |
# matches its name; it won't get pulled in as a "nearest city"
|
|
Packit Service |
4897fc |
# match.
|
|
Packit Service |
4897fc |
if ($designation eq "PPLC") {
|
|
Packit Service |
4897fc |
$importance = 3;
|
|
Packit Service |
4897fc |
} elsif ($population && $population > 100000) {
|
|
Packit Service |
4897fc |
$importance = 2;
|
|
Packit Service |
4897fc |
} elsif ($designation eq "PPLA" || $pc eq "1" || $pc eq "2") {
|
|
Packit Service |
4897fc |
$importance = 1;
|
|
Packit Service |
4897fc |
} elsif ($designation eq "PPLX" || $pc eq "4" || $pc eq "5") {
|
|
Packit Service |
4897fc |
$importance = -1;
|
|
Packit Service |
4897fc |
} else {
|
|
Packit Service |
4897fc |
$importance = 0;
|
|
Packit Service |
4897fc |
}
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
# either sqlite or pysqlite treats ".9" as a string rather than a
|
|
Packit Service |
4897fc |
# number, so make sure we say "0.9" instead
|
|
Packit Service |
4897fc |
$latitude = 0 + $latitude;
|
|
Packit Service |
4897fc |
$longitude = 0 + $longitude;
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
# ID LATITUDE LONGITUDE ELEVATION IMPORTANCE COUNTRY_CODE
|
|
Packit Service |
4897fc |
# STATE_CODE COUNTY_CODE NAME_TYPE LANGUAGE SHORT_NAME LONG_NAME
|
|
Packit Service |
4897fc |
# FLAT_NAME
|
|
Packit Service |
4897fc |
print $cities "intl-$id\t$latitude\t$longitude\t$elevation\t$importance\t$country_code\t$country_code$state_code\t\t$name_type\t$language\t$short_name\t$long_name\t$flat_name\n";
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
# The dataset gives two countries for some features, usually
|
|
Packit Service |
4897fc |
# meaning either that they cross a national border, or are
|
|
Packit Service |
4897fc |
# disputed.
|
|
Packit Service |
4897fc |
if ($cc2) {
|
|
Packit Service |
4897fc |
print $cities "intl-$id\t$latitude\t$longitude\t$elevation\t$importance\t$cc2\t\t\t$name_type\t$language\t$short_name\t$long_name\t$flat_name\n";
|
|
Packit Service |
4897fc |
}
|
|
Packit Service |
4897fc |
}
|
|
Packit Service |
4897fc |
}
|
|
Packit Service |
4897fc |
close INTLSRC;
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
# $usbigsrc is the "Concise Features Gazetteer", containing "Large
|
|
Packit Service |
4897fc |
# features that should be labeled on maps with a scale of 1:250,000".
|
|
Packit Service |
4897fc |
# We use this to find medium-to-large cities. After reading through
|
|
Packit Service |
4897fc |
# that, we read $uscitysrc, the "Populated Places Gazetteer", and add
|
|
Packit Service |
4897fc |
# any remaining cities from there with $importance=-1. Both of these
|
|
Packit Service |
4897fc |
# files have DOS newlines. (After importing the data into sqlite,
|
|
Packit Service |
4897fc |
# we'll use $majorsrc to set $importance=2 on the largest US cities.)
|
|
Packit Service |
4897fc |
%uscities = ();
|
|
Packit Service |
4897fc |
$/ = "\r\n";
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
# The columns in $usbigsrc:
|
|
Packit Service |
4897fc |
# feature_id feature_name class st_alpha st_num county county_num primary_lat_dms primary_lon_dms primary_lat_dec primary_lon_dec source_lat_dms source_lon_dms source_lat_dec source_lon_dec elev map_name
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
print "Reading $usbigsrc\n";
|
|
Packit Service |
4897fc |
open USBIGSRC, "<:utf8", $usbigsrc;
|
|
Packit Service |
4897fc |
while (<USBIGSRC>) {
|
|
Packit Service |
4897fc |
# skip header, and historical records
|
|
Packit Service |
4897fc |
next if $. == 1 || /\(historical\)/i;
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
# remove CRLF and split
|
|
Packit Service |
4897fc |
chomp;
|
|
Packit Service |
4897fc |
@fields = split /\t/;
|
|
Packit Service |
4897fc |
next if $#fields != 16;
|
|
Packit Service |
4897fc |
($feature_id, $name, $class, $state, $state_num, $junk, $county_num, $junk,
|
|
Packit Service |
4897fc |
$junk, $latitude, $longitude, $junk, $junk, $junk, $junk,
|
|
Packit Service |
4897fc |
$elevation, $junk) = @fields;
|
|
Packit Service |
4897fc |
$flat = uc(myunidecode($name));
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
if ($class eq "Populated Place") {
|
|
Packit Service |
4897fc |
$importance = 0;
|
|
Packit Service |
4897fc |
} else {
|
|
Packit Service |
4897fc |
next;
|
|
Packit Service |
4897fc |
}
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
# ID LATITUDE LONGITUDE ELEVATION IMPORTANCE COUNTRY_CODE STATE_CODE
|
|
Packit Service |
4897fc |
# COUNTY_CODE NAME_TYPE LANGUAGE SHORT_NAME LONG_NAME FLAT_NAME
|
|
Packit Service |
4897fc |
print $cities "us-$feature_id\t$latitude\t$longitude\t$elevation\t$importance\tUS\tUS$state_num\t$county_num\tN\teng\t$name\t\t$flat\n";
|
|
Packit Service |
4897fc |
$uscities{$feature_id} = 1;
|
|
Packit Service |
4897fc |
}
|
|
Packit Service |
4897fc |
close USBIGSRC;
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
# The columns in $uscitysrc:
|
|
Packit Service |
4897fc |
# feature_id feature_name class st_alpha st_num county county_num primary_lat_dms primary_lon_dms primary_lat_dec primary_lon_dec elev map_name
|
|
Packit Service |
4897fc |
print "Reading $uscitysrc\n";
|
|
Packit Service |
4897fc |
open USCITYSRC, "<:utf8", $uscitysrc;
|
|
Packit Service |
4897fc |
while (<USCITYSRC>) {
|
|
Packit Service |
4897fc |
# skip header, and historical records
|
|
Packit Service |
4897fc |
next if $. == 1 || /\(historical\)/i;
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
# remove CRLF and split
|
|
Packit Service |
4897fc |
chomp;
|
|
Packit Service |
4897fc |
@fields = split /\t/;
|
|
Packit Service |
4897fc |
next if $#fields != 12;
|
|
Packit Service |
4897fc |
($feature_id, $name, $class, $state, $state_num, $county, $county_num,
|
|
Packit Service |
4897fc |
$junk, $junk, $latitude, $longitude, $elevation, $junk) = @fields;
|
|
Packit Service |
4897fc |
$flat = uc(myunidecode($name));
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
if (!$uscities{$feature_id}) {
|
|
Packit Service |
4897fc |
# ID LATITUDE LONGITUDE ELEVATION IMPORTANCE COUNTRY_CODE STATE_CODE
|
|
Packit Service |
4897fc |
# COUNTY_CODE NAME_TYPE LANGUAGE SHORT_NAME LONG_NAME FLAT_NAME
|
|
Packit Service |
4897fc |
print $cities "us-$feature_id\t$latitude\t$longitude\t$elevation\t-1\tUS\tUS$state_num\t$county_num\tN\teng\t$name\t\t$flat\n";
|
|
Packit Service |
4897fc |
}
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
# There are no separate entries for counties, so we just extract
|
|
Packit Service |
4897fc |
# that data out of the cities, keeping track of which counties we've
|
|
Packit Service |
4897fc |
# already added
|
|
Packit Service |
4897fc |
$county_id = "us-$state_num-$county_num";
|
|
Packit Service |
4897fc |
if (!$counties{$county_id}) {
|
|
Packit Service |
4897fc |
$counties{$county_id} = 1;
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
# ID COUNTRY_CODE STATE_CODE COUNTY_CODE NAME_TYPE LANGUAGE
|
|
Packit Service |
4897fc |
# SHORT_NAME LONG_NAME FLAT_NAME
|
|
Packit Service |
4897fc |
print $counties "$county_id\tUS\tUS$state_num\t$county_num\tN\teng\t$county\t\t\n";
|
|
Packit Service |
4897fc |
}
|
|
Packit Service |
4897fc |
}
|
|
Packit Service |
4897fc |
close USCITYSRC;
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
# Now add some additional data that's not in any source file...
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
# $intlsrc has no entry for the US or its dependencies
|
|
Packit Service |
4897fc |
$countries{"United States"} = "US";
|
|
Packit Service |
4897fc |
$countries{"United States of America"} = "US";
|
|
Packit Service |
4897fc |
$countries{"United States Minor Outlying Islands"} = "US";
|
|
Packit Service |
4897fc |
$countries{"Micronesia, Federated States of"} = "US";
|
|
Packit Service |
4897fc |
# These are in $intlsrc, but not listed as countries
|
|
Packit Service |
4897fc |
$countries{"Ã…land Islands"} = "FI01";
|
|
Packit Service |
4897fc |
$countries{"Antarctica"} = "AY";
|
|
Packit Service |
4897fc |
$countries{"Faroe Islands"} = "FO";
|
|
Packit Service |
4897fc |
$countries{"French Guiana"} = "FG";
|
|
Packit Service |
4897fc |
$countries{"French Polynesia"} = "FP";
|
|
Packit Service |
4897fc |
$countries{"Greenland"} = "GL";
|
|
Packit Service |
4897fc |
$countries{"Guadeloupe"} = "GP";
|
|
Packit Service |
4897fc |
$countries{"Martinique"} = "MB";
|
|
Packit Service |
4897fc |
$countries{"New Caledonia"} = "NC";
|
|
Packit Service |
4897fc |
$countries{"Reunion"} = "RE";
|
|
Packit Service |
4897fc |
$countries{"Saint Pierre and Miquelon"} = "SB";
|
|
Packit Service |
4897fc |
$countries{"Svalbard"} = "SV";
|
|
Packit Service |
4897fc |
$countries{"Wallis and Futuna"} = "WF";
|
|
Packit Service |
4897fc |
# These are variations that appear in $weathersrc
|
|
Packit Service |
4897fc |
$countries{"Congo, Republic of the"} = "CF";
|
|
Packit Service |
4897fc |
$countries{"Congo, Democratic Republic of the"} = "CG";
|
|
Packit Service |
4897fc |
$countries{"Cote d'Ivoire"} = "IV";
|
|
Packit Service |
4897fc |
$countries{"Falkland Islands, Islas Malvinas"} = "FK";
|
|
Packit Service |
4897fc |
$countries{"Gambia, The"} = "GA";
|
|
Packit Service |
4897fc |
$countries{"Korea, North"} = "KN";
|
|
Packit Service |
4897fc |
$countries{"Korea, South"} = "KS";
|
|
Packit Service |
4897fc |
$countries{"Macedonia, The Republic of"} = "MK";
|
|
Packit Service |
4897fc |
$countries{"People's Republic of China"} = "CH";
|
|
Packit Service |
4897fc |
$countries{"Viet Nam"} = "VM";
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
$usstatedata = << "EOF";
|
|
Packit Service |
4897fc |
01 AL Alabama
|
|
Packit Service |
4897fc |
02 AK Alaska
|
|
Packit Service |
4897fc |
04 AZ Arizona
|
|
Packit Service |
4897fc |
05 AR Arkansas
|
|
Packit Service |
4897fc |
06 CA California
|
|
Packit Service |
4897fc |
08 CO Colorado
|
|
Packit Service |
4897fc |
09 CT Connecticut
|
|
Packit Service |
4897fc |
10 DE Delaware
|
|
Packit Service |
4897fc |
11 DC District of Columbia
|
|
Packit Service |
4897fc |
12 FL Florida
|
|
Packit Service |
4897fc |
13 GA Georgia
|
|
Packit Service |
4897fc |
15 HI Hawaii
|
|
Packit Service |
4897fc |
16 ID Idaho
|
|
Packit Service |
4897fc |
17 IL Illinois
|
|
Packit Service |
4897fc |
18 IN Indiana
|
|
Packit Service |
4897fc |
19 IA Iowa
|
|
Packit Service |
4897fc |
20 KS Kansas
|
|
Packit Service |
4897fc |
21 KY Kentucky
|
|
Packit Service |
4897fc |
22 LA Louisiana
|
|
Packit Service |
4897fc |
23 ME Maine
|
|
Packit Service |
4897fc |
24 MD Maryland
|
|
Packit Service |
4897fc |
25 MA Massachusetts
|
|
Packit Service |
4897fc |
26 MI Michigan
|
|
Packit Service |
4897fc |
27 MN Minnesota
|
|
Packit Service |
4897fc |
28 MS Mississippi
|
|
Packit Service |
4897fc |
29 MO Missouri
|
|
Packit Service |
4897fc |
30 MT Montana
|
|
Packit Service |
4897fc |
31 NE Nebraska
|
|
Packit Service |
4897fc |
32 NV Nevada
|
|
Packit Service |
4897fc |
33 NH New Hampshire
|
|
Packit Service |
4897fc |
34 NJ New Jersey
|
|
Packit Service |
4897fc |
35 NM New Mexico
|
|
Packit Service |
4897fc |
36 NY New York
|
|
Packit Service |
4897fc |
37 NC North Carolina
|
|
Packit Service |
4897fc |
38 ND North Dakota
|
|
Packit Service |
4897fc |
39 OH Ohio
|
|
Packit Service |
4897fc |
40 OK Oklahoma
|
|
Packit Service |
4897fc |
41 OR Oregon
|
|
Packit Service |
4897fc |
42 PA Pennsylvania
|
|
Packit Service |
4897fc |
44 RI Rhode Island
|
|
Packit Service |
4897fc |
45 SC South Carolina
|
|
Packit Service |
4897fc |
46 SD South Dakota
|
|
Packit Service |
4897fc |
47 TN Tennessee
|
|
Packit Service |
4897fc |
48 TX Texas
|
|
Packit Service |
4897fc |
49 UT Utah
|
|
Packit Service |
4897fc |
50 VT Vermont
|
|
Packit Service |
4897fc |
51 VA Virginia
|
|
Packit Service |
4897fc |
53 WA Washington
|
|
Packit Service |
4897fc |
54 WV West Virginia
|
|
Packit Service |
4897fc |
55 WI Wisconsin
|
|
Packit Service |
4897fc |
56 WY Wyoming
|
|
Packit Service |
4897fc |
60 AS American Samoa
|
|
Packit Service |
4897fc |
64 FM Federated States of Micronesia
|
|
Packit Service |
4897fc |
66 GU Guam
|
|
Packit Service |
4897fc |
69 MP Northern Mariana Islands
|
|
Packit Service |
4897fc |
70 PW Palau
|
|
Packit Service |
4897fc |
72 PR Puerto Rico
|
|
Packit Service |
4897fc |
74 UM United States Minor Outlying Islands
|
|
Packit Service |
4897fc |
78 VI United States Virgin Islands
|
|
Packit Service |
4897fc |
EOF
|
|
Packit Service |
4897fc |
# ID COUNTRY_CODE STATE_CODE NAME_TYPE LANGUAGE SHORT_NAME
|
|
Packit Service |
4897fc |
# LONG_NAME FLAT_NAME
|
|
Packit Service |
4897fc |
for $usstate (split(/\n/, $usstatedata)) {
|
|
Packit Service |
4897fc |
($fips, $abbrev, $name) = split(/\t/, $usstate);
|
|
Packit Service |
4897fc |
($flat = uc $name) =~ s/ //g;
|
|
Packit Service |
4897fc |
$states{$name} = "US$fips";
|
|
Packit Service |
4897fc |
$states{$abbrev} = "US$fips";
|
|
Packit Service |
4897fc |
print $states "us-$fips\tUS\tUS$fips\tN\teng\t$abbrev\t$name\t$flat\n";
|
|
Packit Service |
4897fc |
}
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
close $states;
|
|
Packit Service |
4897fc |
close $counties;
|
|
Packit Service |
4897fc |
close $cities;
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
print "\nCreating adm1 ('states') table\n";
|
|
Packit Service |
4897fc |
open SQLITE, "|-:utf8", "sqlite3 $locationdb";
|
|
Packit Service |
4897fc |
print SQLITE "CREATE TABLE adm1 (id TEXT, country_code TEXT, state_code TEXT, name_type TEXT, language TEXT, short_name TEXT, long_name TEXT, flat_name TEXT);\n";
|
|
Packit Service |
4897fc |
print SQLITE ".separator '\t'\n";
|
|
Packit Service |
4897fc |
print SQLITE ".import $states_filename adm1\n";
|
|
Packit Service |
4897fc |
close SQLITE;
|
|
Packit Service |
4897fc |
#unlink $states_filename;
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
print "Creating adm2 ('counties') table\n";
|
|
Packit Service |
4897fc |
open SQLITE, "|-:utf8", "sqlite3 $locationdb";
|
|
Packit Service |
4897fc |
print SQLITE "CREATE TABLE adm2 (id TEXT, country_code TEXT, state_code TEXT, county_code TEXT, name_type TEXT, language TEXT, short_name TEXT, long_name TEXT, flat_name TEXT);\n";
|
|
Packit Service |
4897fc |
print SQLITE ".separator '\t'\n";
|
|
Packit Service |
4897fc |
print SQLITE ".import $counties_filename adm2\n";
|
|
Packit Service |
4897fc |
close SQLITE;
|
|
Packit Service |
4897fc |
#unlink $counties_filename;
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
print "Creating cities table\n";
|
|
Packit Service |
4897fc |
open SQLITE, "|-:utf8", "sqlite3 $locationdb";
|
|
Packit Service |
4897fc |
print SQLITE "CREATE TABLE cities (id TEXT, latitude REAL, longitude REAL, elevation REAL, importance INTEGER, country_code TEXT, state_code TEXT, county_code TEXT, name_type TEXT, language TEXT, short_name TEXT, long_name TEXT, flat_name TEXT);\n";
|
|
Packit Service |
4897fc |
print SQLITE ".separator '\t'\n";
|
|
Packit Service |
4897fc |
print SQLITE ".import $cities_filename cities\n";
|
|
Packit Service |
4897fc |
close SQLITE;
|
|
Packit Service |
4897fc |
#unlink $cities_filename;
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
print "Indexing cities\n";
|
|
Packit Service |
4897fc |
open SQLITE, "|-:utf8", "sqlite3 $locationdb";
|
|
Packit Service |
4897fc |
print SQLITE "CREATE INDEX short_name ON cities (country_code, short_name);\n";
|
|
Packit Service |
4897fc |
print SQLITE "CREATE INDEX long_name ON cities (country_code, long_name);\n";
|
|
Packit Service |
4897fc |
print SQLITE "CREATE INDEX flat_name ON cities (country_code, flat_name);\n";
|
|
Packit Service |
4897fc |
print SQLITE "CREATE INDEX long_lat ON cities (country_code, longitude, latitude);\n";
|
|
Packit Service |
4897fc |
print SQLITE "CREATE INDEX id ON cities (id);\n";
|
|
Packit Service |
4897fc |
close SQLITE;
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
# Update cities by marking cities "important" if they have the same
|
|
Packit Service |
4897fc |
# name as their ADM1. (This seems to do a good job of picking up major
|
|
Packit Service |
4897fc |
# cities in some countries that don't have PC or POP data.)
|
|
Packit Service |
4897fc |
open SQLITE, "|-:utf8", "sqlite3 $locationdb";
|
|
Packit Service |
4897fc |
print SQLITE "UPDATE cities SET importance=1 WHERE importance < 1 AND id IN (SELECT cities.id FROM cities INNER JOIN adm1 ON cities.country_code = adm1.country_code AND cities.state_code = adm1.state_code AND cities.flat_name = adm1.flat_name);\n";
|
|
Packit Service |
4897fc |
print SQLITE "UPDATE cities SET importance=1 WHERE importance < 1 AND id IN (SELECT cities.id FROM cities INNER JOIN adm1 ON cities.country_code = adm1.country_code AND cities.state_code = adm1.state_code AND cities.long_name = adm1.short_name);\n";
|
|
Packit Service |
4897fc |
close SQLITE;
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
# Update cities table with additional information about major cities
|
|
Packit Service |
4897fc |
$/ = "\n";
|
|
Packit Service |
4897fc |
open MAJORSRC, "<:utf8", $majorsrc;
|
|
Packit Service |
4897fc |
open SQLITE, "|-:utf8", "sqlite3 $locationdb";
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
while (<MAJORSRC>) {
|
|
Packit Service |
4897fc |
chomp;
|
|
Packit Service |
4897fc |
s/\s*#.*//;
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
@fields = split /\t/;
|
|
Packit Service |
4897fc |
next if $#fields != 4;
|
|
Packit Service |
4897fc |
($country, $latitude, $longitude, $importance, $city) = @fields;
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
$city =~ s/'/''/g; # for SQL
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
if ($latitude && $longitude) {
|
|
Packit Service |
4897fc |
$latlo = $latitude - 0.1;
|
|
Packit Service |
4897fc |
$lathi = $latitude + 0.1;
|
|
Packit Service |
4897fc |
$lonlo = $longitude - 0.1;
|
|
Packit Service |
4897fc |
$lonhi = $longitude + 0.1;
|
|
Packit Service |
4897fc |
print SQLITE "UPDATE cities SET importance=$importance WHERE ( short_name='$city' OR long_name='$city' ) AND country_code='$country' AND importance!=-1 AND latitude BETWEEN $latlo AND $lathi AND longitude BETWEEN $lonlo AND $lonhi;\n";
|
|
Packit Service |
4897fc |
} else {
|
|
Packit Service |
4897fc |
print SQLITE "UPDATE cities SET importance=$importance WHERE ( short_name='$city' OR long_name='$city' ) AND country_code='$country' AND importance!=-1;\n";
|
|
Packit Service |
4897fc |
}
|
|
Packit Service |
4897fc |
}
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
close MAJORSRC;
|
|
Packit Service |
4897fc |
close SQLITE;
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
# The columns in $weathersrc are
|
|
Packit Service |
4897fc |
# CODE wmo_zone wmo_id NAME STATE COUNTRY region LATITUDE_DMS LONGITUDE_DMS ue_lat ue_long ELEVATION upper_elevation primary
|
|
Packit Service |
4897fc |
# (however, some lines are mising one or both of the last two fields!)
|
|
Packit Service |
4897fc |
#
|
|
Packit Service |
4897fc |
# The columns in the stations table will be
|
|
Packit Service |
4897fc |
# CODE NAME STATE COUNTRY LATITUDE LONGITUDE ELEVATION
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
sub dms_to_dec {
|
|
Packit Service |
4897fc |
my ($coord) = @_;
|
|
Packit Service |
4897fc |
return 0 if $coord !~ /(\d+)-(\d+)(-(\d+))?([NSEW])/;
|
|
Packit Service |
4897fc |
my $dec = $1 + $2 / 60;
|
|
Packit Service |
4897fc |
if ($4) { $dec += $4 / 3600; }
|
|
Packit Service |
4897fc |
if ($5 =~ /[SW]/) { $dec = -$dec; }
|
|
Packit Service |
4897fc |
return $dec;
|
|
Packit Service |
4897fc |
}
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
print "\nCreating weather stations\n";
|
|
Packit Service |
4897fc |
($stations, $stations_filename) = tempfile();
|
|
Packit Service |
4897fc |
open WEATHERSRC, "-|:utf8", "perl ./station-fixups.pl < $weathersrc";
|
|
Packit Service |
4897fc |
while (<WEATHERSRC>) {
|
|
Packit Service |
4897fc |
# remove CRLF and split
|
|
Packit Service |
4897fc |
chomp;
|
|
Packit Service |
4897fc |
@fields = split /;/;
|
|
Packit Service |
4897fc |
next if $#fields < 11;
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
($code, $junk, $id, $location, $state, $country, $junk, $latitude, $longitude, $junk, $junk, $elevation) = @fields;
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
$latitude = dms_to_dec($latitude);
|
|
Packit Service |
4897fc |
$longitude = dms_to_dec($longitude);
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
$country_code = $countries{$country};
|
|
Packit Service |
4897fc |
if (!$country_code) {
|
|
Packit Service |
4897fc |
$alt_country = s/^(.*), (.*)$/$2 $1/;
|
|
Packit Service |
4897fc |
$country_code = $countries{$alt_country};
|
|
Packit Service |
4897fc |
}
|
|
Packit Service |
4897fc |
if (!$country_code) {
|
|
Packit Service |
4897fc |
if ($country) { print "No country for '$country'\n"; }
|
|
Packit Service |
4897fc |
$country_code = '';
|
|
Packit Service |
4897fc |
}
|
|
Packit Service |
4897fc |
if ($state) {
|
|
Packit Service |
4897fc |
$state_code = $states{$state};
|
|
Packit Service |
4897fc |
if (!$state_code) {
|
|
Packit Service |
4897fc |
print "No state for '$state'\n";
|
|
Packit Service |
4897fc |
next;
|
|
Packit Service |
4897fc |
}
|
|
Packit Service |
4897fc |
} else {
|
|
Packit Service |
4897fc |
$state_code = "";
|
|
Packit Service |
4897fc |
}
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
print $stations "$code;$location;$state_code;$country_code;$latitude;$longitude;$elevation\n";
|
|
Packit Service |
4897fc |
}
|
|
Packit Service |
4897fc |
close WEATHERSRC;
|
|
Packit Service |
4897fc |
close $stations;
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
open SQLITE, "|-:utf8", "sqlite3 $locationdb";
|
|
Packit Service |
4897fc |
print SQLITE "CREATE TABLE stations (code TEXT, name TEXT, state TEXT, country TEXT, latitude REAL, longitude REAL, elevation REAL);\n";
|
|
Packit Service |
4897fc |
print SQLITE ".separator ';'\n";
|
|
Packit Service |
4897fc |
print SQLITE ".import $stations_filename stations\n";
|
|
Packit Service |
4897fc |
print SQLITE "CREATE INDEX station_long_lat ON stations (longitude, latitude);\n";
|
|
Packit Service |
4897fc |
close SQLITE;
|
|
Packit Service |
4897fc |
|
|
Packit Service |
4897fc |
# unlink $stations_filename;
|