Blame data/build-locationdb.pl

Packit Service 4897fc
#!/usr/bin/perl -w
Packit Service 4897fc
Packit Service 4897fc
use File::Temp "tempfile";
Packit Service 4897fc
use Text::Unidecode;
Packit Service 4897fc
Packit Service 4897fc
sub myunidecode {
Packit Service 4897fc
  my ($text) = @_;
Packit Service 4897fc
  $text = unidecode($text);
Packit Service 4897fc
  $text =~ s/\@/a/g;
Packit Service 4897fc
  return $text;
Packit Service 4897fc
}
Packit Service 4897fc
Packit Service 4897fc
@intlglob = glob("sources/geonames_dd_dms_date_*.txt");
Packit Service 4897fc
$intlsrc = $intlglob[$#intlglob];
Packit Service 4897fc
Packit Service 4897fc
$uscitysrc = "sources/POP_PLACES.txt";
Packit Service 4897fc
$usbigsrc = "sources/US_CONCISE.txt";
Packit Service 4897fc
$majorsrc = "major-cities.txt";
Packit Service 4897fc
$weathersrc = "sources/nsd_cccc.txt";
Packit Service 4897fc
Packit Service 4897fc
$locationdb = "locationdb.sqlite";
Packit Service 4897fc
unlink $locationdb;
Packit Service 4897fc
Packit Service 4897fc
($states, $states_filename) = tempfile();
Packit Service 4897fc
binmode $states, ":utf8";
Packit Service 4897fc
($counties, $counties_filename) = tempfile();
Packit Service 4897fc
binmode $counties, ":utf8";
Packit Service 4897fc
($cities, $cities_filename) = tempfile();
Packit Service 4897fc
binmode $cities, ":utf8";
Packit Service 4897fc
Packit Service 4897fc
# The columns in $intlsrc: (interesting ones in caps)
Packit Service 4897fc
# RC UFI uni LAT LONG dms_lat dms_long mgrs jog fc DSG PC CC1 ADM1 adm2 POP ELEV CC2 NT LC SHORT_FORM generic SORT_NAME FULL_NAME FULL_NAME_ND modify_date
Packit Service 4897fc
Packit Service 4897fc
print "Reading $intlsrc\n";
Packit Service 4897fc
open INTLSRC, "-|:utf8", "perl ./city-fixups.pl < $intlsrc";
Packit Service 4897fc
while (<INTLSRC>) {
Packit Service 4897fc
  # skip header, and historical records
Packit Service 4897fc
  next if $. == 1 || /\(\(/ || /\(historical\)/i;
Packit Service 4897fc
Packit Service 4897fc
  @fields = split /\t/;
Packit Service 4897fc
  next if $#fields != 25;
Packit Service 4897fc
Packit Service 4897fc
  ($rc, $id, $junk, $latitude, $longitude, $junk, $junk, $junk,
Packit Service 4897fc
   $junk, $junk, $designation, $pc, $country_code, $state_code, $junk,
Packit Service 4897fc
   $population, $elevation, $cc2, $name_type, $language, $short_name,
Packit Service 4897fc
   $junk, $flat_name, $long_name, $long_name_nd, $junk) = @fields;
Packit Service 4897fc
Packit Service 4897fc
  # Don't use wacky complex transliteration rules for non-European
Packit Service 4897fc
  # names; just use the non-diacritic version of the name instead
Packit Service 4897fc
  if ($name_type eq 'N' && (!$rc || $rc > 2)) {
Packit Service 4897fc
    $long_name = myunidecode($long_name);
Packit Service 4897fc
    $short_name = myunidecode($short_name);
Packit Service 4897fc
  } elsif ($name_type eq 'NS' && (!$rc || $rc > 2)) {
Packit Service 4897fc
    $long_name = $long_name_nd;
Packit Service 4897fc
  }
Packit Service 4897fc
Packit Service 4897fc
  # Also fix smart quotes and dumb spacing
Packit Service 4897fc
  $short_name =~ s/\x{2019}/'/g;
Packit Service 4897fc
  $short_name =~ s/  +/ /;
Packit Service 4897fc
  $short_name =~ s/ $//;
Packit Service 4897fc
  $long_name =~ s/\x{2019}/'/g;
Packit Service 4897fc
  $long_name =~ s/  +/ /;
Packit Service 4897fc
  $long_name =~ s/ $//;
Packit Service 4897fc
Packit Service 4897fc
  if ($designation =~ /^PCL/) {
Packit Service 4897fc
    # /^PCL/ matches countries and other ISO-3166-like things:
Packit Service 4897fc
    # PCLD - dependent political entity
Packit Service 4897fc
    # PCLF - freely associated state
Packit Service 4897fc
    # PCLI - independent political entity (ie, "country")
Packit Service 4897fc
    # PCLIX - section of independent political entity
Packit Service 4897fc
Packit Service 4897fc
    if ($short_name) { $countries{$short_name} = $country_code; }
Packit Service 4897fc
    if ($long_name) { $countries{$long_name} = $country_code; }
Packit Service 4897fc
Packit Service 4897fc
  } elsif ($designation eq "ADM1") {
Packit Service 4897fc
    # ID COUNTRY_CODE STATE_CODE NAME_TYPE LANGUAGE SHORT_NAME
Packit Service 4897fc
    # LONG_NAME FLAT_NAME
Packit Service 4897fc
    print $states "intl-$id\t$country_code\t$country_code$state_code\t$name_type\t$language\t$short_name\t$long_name\t$flat_name\n";
Packit Service 4897fc
Packit Service 4897fc
  } elsif ($designation =~ /^PPL/) {
Packit Service 4897fc
    # "PPL*" is "Populated Place"
Packit Service 4897fc
Packit Service 4897fc
    # Assign an "importance" to the city. Having a positive
Packit Service 4897fc
    # $importance makes a city "bigger" in terms of figuring out how
Packit Service 4897fc
    # close it is to a given coordinate, and having negative
Packit Service 4897fc
    # $importance makes it "smaller". Furthermore, a negative
Packit Service 4897fc
    # importance city can never match solely on coordinates; it needs
Packit Service 4897fc
    # to match based on name as well.
Packit Service 4897fc
    #
Packit Service 4897fc
    # $importance = 2 means furthermore that if we haven't found a
Packit Service 4897fc
    # weather station for the city after going through the whole
Packit Service 4897fc
    # stations table, then we'll add an entry using whatever weather
Packit Service 4897fc
    # station is closest. $importance = 3 means it's a national
Packit Service 4897fc
    # capital.
Packit Service 4897fc
    #
Packit Service 4897fc
    # Our current definition of "important" is that it has designation
Packit Service 4897fc
    # of "PPLC" (capital of a country [that is, a /PCL.*/]) or "PPLA"
Packit Service 4897fc
    # (capital of an ADM1). We also look at the "Populated Place
Packit Service 4897fc
    # Classification" ($pc) and population, but as of 2008-05 those
Packit Service 4897fc
    # aren't filled in yet for most countries.
Packit Service 4897fc
    #
Packit Service 4897fc
    # If $pc is 4 or 5 ("unimportant"), or $designation is "PPLX"
Packit Service 4897fc
    # (neighborhood/subdivision), we give the city $importance = -1,
Packit Service 4897fc
    # meaning it will only be used when a weather station actually
Packit Service 4897fc
    # matches its name; it won't get pulled in as a "nearest city"
Packit Service 4897fc
    # match.
Packit Service 4897fc
    if ($designation eq "PPLC") {
Packit Service 4897fc
      $importance = 3;
Packit Service 4897fc
    } elsif ($population && $population > 100000) {
Packit Service 4897fc
      $importance = 2;
Packit Service 4897fc
    } elsif ($designation eq "PPLA" || $pc eq "1" || $pc eq "2") {
Packit Service 4897fc
      $importance = 1;
Packit Service 4897fc
    } elsif ($designation eq "PPLX" || $pc eq "4" || $pc eq "5") {
Packit Service 4897fc
      $importance = -1;
Packit Service 4897fc
    } else {
Packit Service 4897fc
      $importance = 0;
Packit Service 4897fc
    }
Packit Service 4897fc
Packit Service 4897fc
    # either sqlite or pysqlite treats ".9" as a string rather than a
Packit Service 4897fc
    # number, so make sure we say "0.9" instead
Packit Service 4897fc
    $latitude = 0 + $latitude;
Packit Service 4897fc
    $longitude = 0 + $longitude;
Packit Service 4897fc
Packit Service 4897fc
    # ID LATITUDE LONGITUDE ELEVATION IMPORTANCE COUNTRY_CODE
Packit Service 4897fc
    # STATE_CODE COUNTY_CODE NAME_TYPE LANGUAGE SHORT_NAME LONG_NAME
Packit Service 4897fc
    # FLAT_NAME
Packit Service 4897fc
    print $cities "intl-$id\t$latitude\t$longitude\t$elevation\t$importance\t$country_code\t$country_code$state_code\t\t$name_type\t$language\t$short_name\t$long_name\t$flat_name\n";
Packit Service 4897fc
Packit Service 4897fc
    # The dataset gives two countries for some features, usually
Packit Service 4897fc
    # meaning either that they cross a national border, or are
Packit Service 4897fc
    # disputed.
Packit Service 4897fc
    if ($cc2) {
Packit Service 4897fc
      print $cities "intl-$id\t$latitude\t$longitude\t$elevation\t$importance\t$cc2\t\t\t$name_type\t$language\t$short_name\t$long_name\t$flat_name\n";
Packit Service 4897fc
    }
Packit Service 4897fc
  }
Packit Service 4897fc
}
Packit Service 4897fc
close INTLSRC;
Packit Service 4897fc
Packit Service 4897fc
# $usbigsrc is the "Concise Features Gazetteer", containing "Large
Packit Service 4897fc
# features that should be labeled on maps with a scale of 1:250,000".
Packit Service 4897fc
# We use this to find medium-to-large cities. After reading through
Packit Service 4897fc
# that, we read $uscitysrc, the "Populated Places Gazetteer", and add
Packit Service 4897fc
# any remaining cities from there with $importance=-1. Both of these
Packit Service 4897fc
# files have DOS newlines. (After importing the data into sqlite,
Packit Service 4897fc
# we'll use $majorsrc to set $importance=2 on the largest US cities.)
Packit Service 4897fc
%uscities = ();
Packit Service 4897fc
$/ = "\r\n";
Packit Service 4897fc
Packit Service 4897fc
# The columns in $usbigsrc:
Packit Service 4897fc
# feature_id feature_name class st_alpha st_num county county_num primary_lat_dms primary_lon_dms primary_lat_dec primary_lon_dec source_lat_dms source_lon_dms source_lat_dec source_lon_dec elev map_name
Packit Service 4897fc
Packit Service 4897fc
print "Reading $usbigsrc\n";
Packit Service 4897fc
open USBIGSRC, "<:utf8", $usbigsrc;
Packit Service 4897fc
while (<USBIGSRC>) {
Packit Service 4897fc
  # skip header, and historical records
Packit Service 4897fc
  next if $. == 1 || /\(historical\)/i;
Packit Service 4897fc
Packit Service 4897fc
  # remove CRLF and split
Packit Service 4897fc
  chomp;
Packit Service 4897fc
  @fields = split /\t/;
Packit Service 4897fc
  next if $#fields != 16;
Packit Service 4897fc
  ($feature_id, $name, $class, $state, $state_num, $junk, $county_num, $junk,
Packit Service 4897fc
   $junk, $latitude, $longitude, $junk, $junk, $junk, $junk,
Packit Service 4897fc
   $elevation, $junk) = @fields;
Packit Service 4897fc
  $flat = uc(myunidecode($name));
Packit Service 4897fc
Packit Service 4897fc
  if ($class eq "Populated Place") {
Packit Service 4897fc
    $importance = 0;
Packit Service 4897fc
  } else {
Packit Service 4897fc
    next;
Packit Service 4897fc
  }
Packit Service 4897fc
Packit Service 4897fc
  # ID LATITUDE LONGITUDE ELEVATION IMPORTANCE COUNTRY_CODE STATE_CODE
Packit Service 4897fc
  # COUNTY_CODE NAME_TYPE LANGUAGE SHORT_NAME LONG_NAME FLAT_NAME
Packit Service 4897fc
  print $cities "us-$feature_id\t$latitude\t$longitude\t$elevation\t$importance\tUS\tUS$state_num\t$county_num\tN\teng\t$name\t\t$flat\n";
Packit Service 4897fc
  $uscities{$feature_id} = 1;
Packit Service 4897fc
}
Packit Service 4897fc
close USBIGSRC;
Packit Service 4897fc
Packit Service 4897fc
# The columns in $uscitysrc:
Packit Service 4897fc
# feature_id feature_name class	st_alpha st_num county county_num primary_lat_dms primary_lon_dms primary_lat_dec primary_lon_dec elev map_name
Packit Service 4897fc
print "Reading $uscitysrc\n";
Packit Service 4897fc
open USCITYSRC, "<:utf8", $uscitysrc;
Packit Service 4897fc
while (<USCITYSRC>) {
Packit Service 4897fc
  # skip header, and historical records
Packit Service 4897fc
  next if $. == 1 || /\(historical\)/i;
Packit Service 4897fc
Packit Service 4897fc
  # remove CRLF and split
Packit Service 4897fc
  chomp;
Packit Service 4897fc
  @fields = split /\t/;
Packit Service 4897fc
  next if $#fields != 12;
Packit Service 4897fc
  ($feature_id, $name, $class, $state, $state_num, $county, $county_num,
Packit Service 4897fc
   $junk, $junk, $latitude, $longitude, $elevation, $junk) = @fields;
Packit Service 4897fc
  $flat = uc(myunidecode($name));
Packit Service 4897fc
Packit Service 4897fc
  if (!$uscities{$feature_id}) {
Packit Service 4897fc
    # ID LATITUDE LONGITUDE ELEVATION IMPORTANCE COUNTRY_CODE STATE_CODE
Packit Service 4897fc
    # COUNTY_CODE NAME_TYPE LANGUAGE SHORT_NAME LONG_NAME FLAT_NAME
Packit Service 4897fc
    print $cities "us-$feature_id\t$latitude\t$longitude\t$elevation\t-1\tUS\tUS$state_num\t$county_num\tN\teng\t$name\t\t$flat\n";
Packit Service 4897fc
  }
Packit Service 4897fc
Packit Service 4897fc
  # There are no separate entries for counties, so we just extract
Packit Service 4897fc
  # that data out of the cities, keeping track of which counties we've
Packit Service 4897fc
  # already added
Packit Service 4897fc
  $county_id = "us-$state_num-$county_num";
Packit Service 4897fc
  if (!$counties{$county_id}) {
Packit Service 4897fc
    $counties{$county_id} = 1;
Packit Service 4897fc
Packit Service 4897fc
    # ID COUNTRY_CODE STATE_CODE COUNTY_CODE NAME_TYPE LANGUAGE
Packit Service 4897fc
    # SHORT_NAME LONG_NAME FLAT_NAME
Packit Service 4897fc
    print $counties "$county_id\tUS\tUS$state_num\t$county_num\tN\teng\t$county\t\t\n";
Packit Service 4897fc
  }
Packit Service 4897fc
}
Packit Service 4897fc
close USCITYSRC;
Packit Service 4897fc
Packit Service 4897fc
# Now add some additional data that's not in any source file...
Packit Service 4897fc
Packit Service 4897fc
# $intlsrc has no entry for the US or its dependencies
Packit Service 4897fc
$countries{"United States"} = "US";
Packit Service 4897fc
$countries{"United States of America"} = "US";
Packit Service 4897fc
$countries{"United States Minor Outlying Islands"} = "US";
Packit Service 4897fc
$countries{"Micronesia, Federated States of"} = "US";
Packit Service 4897fc
# These are in $intlsrc, but not listed as countries
Packit Service 4897fc
$countries{"Ã…land Islands"} = "FI01";
Packit Service 4897fc
$countries{"Antarctica"} = "AY";
Packit Service 4897fc
$countries{"Faroe Islands"} = "FO";
Packit Service 4897fc
$countries{"French Guiana"} = "FG";
Packit Service 4897fc
$countries{"French Polynesia"} = "FP";
Packit Service 4897fc
$countries{"Greenland"} = "GL";
Packit Service 4897fc
$countries{"Guadeloupe"} = "GP";
Packit Service 4897fc
$countries{"Martinique"} = "MB";
Packit Service 4897fc
$countries{"New Caledonia"} = "NC";
Packit Service 4897fc
$countries{"Reunion"} = "RE";
Packit Service 4897fc
$countries{"Saint Pierre and Miquelon"} = "SB";
Packit Service 4897fc
$countries{"Svalbard"} = "SV";
Packit Service 4897fc
$countries{"Wallis and Futuna"} = "WF";
Packit Service 4897fc
# These are variations that appear in $weathersrc
Packit Service 4897fc
$countries{"Congo, Republic of the"} = "CF";
Packit Service 4897fc
$countries{"Congo, Democratic Republic of the"} = "CG";
Packit Service 4897fc
$countries{"Cote d'Ivoire"} = "IV";
Packit Service 4897fc
$countries{"Falkland Islands, Islas Malvinas"} = "FK";
Packit Service 4897fc
$countries{"Gambia, The"} = "GA";
Packit Service 4897fc
$countries{"Korea, North"} = "KN";
Packit Service 4897fc
$countries{"Korea, South"} = "KS";
Packit Service 4897fc
$countries{"Macedonia, The Republic of"} = "MK";
Packit Service 4897fc
$countries{"People's Republic of China"} = "CH";
Packit Service 4897fc
$countries{"Viet Nam"} = "VM";
Packit Service 4897fc
Packit Service 4897fc
$usstatedata = << "EOF";
Packit Service 4897fc
01	AL	Alabama
Packit Service 4897fc
02	AK	Alaska
Packit Service 4897fc
04	AZ	Arizona
Packit Service 4897fc
05	AR	Arkansas
Packit Service 4897fc
06	CA	California
Packit Service 4897fc
08	CO	Colorado
Packit Service 4897fc
09	CT	Connecticut
Packit Service 4897fc
10	DE	Delaware
Packit Service 4897fc
11	DC	District of Columbia
Packit Service 4897fc
12	FL	Florida
Packit Service 4897fc
13	GA	Georgia
Packit Service 4897fc
15	HI	Hawaii
Packit Service 4897fc
16	ID	Idaho
Packit Service 4897fc
17	IL	Illinois
Packit Service 4897fc
18	IN	Indiana
Packit Service 4897fc
19	IA	Iowa
Packit Service 4897fc
20	KS	Kansas
Packit Service 4897fc
21	KY	Kentucky
Packit Service 4897fc
22	LA	Louisiana
Packit Service 4897fc
23	ME	Maine
Packit Service 4897fc
24	MD	Maryland
Packit Service 4897fc
25	MA	Massachusetts
Packit Service 4897fc
26	MI	Michigan
Packit Service 4897fc
27	MN	Minnesota
Packit Service 4897fc
28	MS	Mississippi
Packit Service 4897fc
29	MO	Missouri
Packit Service 4897fc
30	MT	Montana
Packit Service 4897fc
31	NE	Nebraska
Packit Service 4897fc
32	NV	Nevada
Packit Service 4897fc
33	NH	New Hampshire
Packit Service 4897fc
34	NJ	New Jersey
Packit Service 4897fc
35	NM	New Mexico
Packit Service 4897fc
36	NY	New York
Packit Service 4897fc
37	NC	North Carolina
Packit Service 4897fc
38	ND	North Dakota
Packit Service 4897fc
39	OH	Ohio
Packit Service 4897fc
40	OK	Oklahoma
Packit Service 4897fc
41	OR	Oregon
Packit Service 4897fc
42	PA	Pennsylvania
Packit Service 4897fc
44	RI	Rhode Island
Packit Service 4897fc
45	SC	South Carolina
Packit Service 4897fc
46	SD	South Dakota
Packit Service 4897fc
47	TN	Tennessee
Packit Service 4897fc
48	TX	Texas
Packit Service 4897fc
49	UT	Utah
Packit Service 4897fc
50	VT	Vermont
Packit Service 4897fc
51	VA	Virginia
Packit Service 4897fc
53	WA	Washington
Packit Service 4897fc
54	WV	West Virginia
Packit Service 4897fc
55	WI	Wisconsin
Packit Service 4897fc
56	WY	Wyoming
Packit Service 4897fc
60	AS	American Samoa
Packit Service 4897fc
64	FM	Federated States of Micronesia
Packit Service 4897fc
66	GU	Guam
Packit Service 4897fc
69	MP	Northern Mariana Islands
Packit Service 4897fc
70	PW	Palau
Packit Service 4897fc
72	PR	Puerto Rico
Packit Service 4897fc
74	UM	United States Minor Outlying Islands
Packit Service 4897fc
78	VI	United States Virgin Islands
Packit Service 4897fc
EOF
Packit Service 4897fc
# ID COUNTRY_CODE STATE_CODE NAME_TYPE LANGUAGE SHORT_NAME
Packit Service 4897fc
# LONG_NAME FLAT_NAME
Packit Service 4897fc
for $usstate (split(/\n/, $usstatedata)) {
Packit Service 4897fc
  ($fips, $abbrev, $name) = split(/\t/, $usstate);
Packit Service 4897fc
  ($flat = uc $name) =~ s/ //g;
Packit Service 4897fc
  $states{$name} = "US$fips";
Packit Service 4897fc
  $states{$abbrev} = "US$fips";
Packit Service 4897fc
  print $states "us-$fips\tUS\tUS$fips\tN\teng\t$abbrev\t$name\t$flat\n";
Packit Service 4897fc
}
Packit Service 4897fc
Packit Service 4897fc
close $states;
Packit Service 4897fc
close $counties;
Packit Service 4897fc
close $cities;
Packit Service 4897fc
Packit Service 4897fc
print "\nCreating adm1 ('states') table\n";
Packit Service 4897fc
open SQLITE, "|-:utf8", "sqlite3 $locationdb";
Packit Service 4897fc
print SQLITE "CREATE TABLE adm1 (id TEXT, country_code TEXT, state_code TEXT, name_type TEXT, language TEXT, short_name TEXT, long_name TEXT, flat_name TEXT);\n";
Packit Service 4897fc
print SQLITE ".separator '\t'\n";
Packit Service 4897fc
print SQLITE ".import $states_filename adm1\n";
Packit Service 4897fc
close SQLITE;
Packit Service 4897fc
#unlink $states_filename;
Packit Service 4897fc
Packit Service 4897fc
print "Creating adm2 ('counties') table\n";
Packit Service 4897fc
open SQLITE, "|-:utf8", "sqlite3 $locationdb";
Packit Service 4897fc
print SQLITE "CREATE TABLE adm2 (id TEXT, country_code TEXT, state_code TEXT, county_code TEXT, name_type TEXT, language TEXT, short_name TEXT, long_name TEXT, flat_name TEXT);\n";
Packit Service 4897fc
print SQLITE ".separator '\t'\n";
Packit Service 4897fc
print SQLITE ".import $counties_filename adm2\n";
Packit Service 4897fc
close SQLITE;
Packit Service 4897fc
#unlink $counties_filename;
Packit Service 4897fc
Packit Service 4897fc
print "Creating cities table\n";
Packit Service 4897fc
open SQLITE, "|-:utf8", "sqlite3 $locationdb";
Packit Service 4897fc
print SQLITE "CREATE TABLE cities (id TEXT, latitude REAL, longitude REAL, elevation REAL, importance INTEGER, country_code TEXT, state_code TEXT, county_code TEXT, name_type TEXT, language TEXT, short_name TEXT, long_name TEXT, flat_name TEXT);\n";
Packit Service 4897fc
print SQLITE ".separator '\t'\n";
Packit Service 4897fc
print SQLITE ".import $cities_filename cities\n";
Packit Service 4897fc
close SQLITE;
Packit Service 4897fc
#unlink $cities_filename;
Packit Service 4897fc
Packit Service 4897fc
print "Indexing cities\n";
Packit Service 4897fc
open SQLITE, "|-:utf8", "sqlite3 $locationdb";
Packit Service 4897fc
print SQLITE "CREATE INDEX short_name ON cities (country_code, short_name);\n";
Packit Service 4897fc
print SQLITE "CREATE INDEX long_name ON cities (country_code, long_name);\n";
Packit Service 4897fc
print SQLITE "CREATE INDEX flat_name ON cities (country_code, flat_name);\n";
Packit Service 4897fc
print SQLITE "CREATE INDEX long_lat ON cities (country_code, longitude, latitude);\n";
Packit Service 4897fc
print SQLITE "CREATE INDEX id ON cities (id);\n";
Packit Service 4897fc
close SQLITE;
Packit Service 4897fc
Packit Service 4897fc
# Update cities by marking cities "important" if they have the same
Packit Service 4897fc
# name as their ADM1. (This seems to do a good job of picking up major
Packit Service 4897fc
# cities in some countries that don't have PC or POP data.)
Packit Service 4897fc
open SQLITE, "|-:utf8", "sqlite3 $locationdb";
Packit Service 4897fc
print SQLITE "UPDATE cities SET importance=1 WHERE importance < 1 AND id IN (SELECT cities.id FROM cities INNER JOIN adm1 ON cities.country_code = adm1.country_code AND cities.state_code = adm1.state_code AND cities.flat_name = adm1.flat_name);\n";
Packit Service 4897fc
print SQLITE "UPDATE cities SET importance=1 WHERE importance < 1 AND id IN (SELECT cities.id FROM cities INNER JOIN adm1 ON cities.country_code = adm1.country_code AND cities.state_code = adm1.state_code AND cities.long_name = adm1.short_name);\n";
Packit Service 4897fc
close SQLITE;
Packit Service 4897fc
Packit Service 4897fc
# Update cities table with additional information about major cities
Packit Service 4897fc
$/ = "\n";
Packit Service 4897fc
open MAJORSRC, "<:utf8", $majorsrc;
Packit Service 4897fc
open SQLITE, "|-:utf8", "sqlite3 $locationdb";
Packit Service 4897fc
Packit Service 4897fc
while (<MAJORSRC>) {
Packit Service 4897fc
  chomp;
Packit Service 4897fc
  s/\s*#.*//;
Packit Service 4897fc
Packit Service 4897fc
  @fields = split /\t/;
Packit Service 4897fc
  next if $#fields != 4;
Packit Service 4897fc
  ($country, $latitude, $longitude, $importance, $city) = @fields;
Packit Service 4897fc
Packit Service 4897fc
  $city =~ s/'/''/g; # for SQL
Packit Service 4897fc
Packit Service 4897fc
  if ($latitude && $longitude) {
Packit Service 4897fc
    $latlo = $latitude - 0.1;
Packit Service 4897fc
    $lathi = $latitude + 0.1;
Packit Service 4897fc
    $lonlo = $longitude - 0.1;
Packit Service 4897fc
    $lonhi = $longitude + 0.1;
Packit Service 4897fc
    print SQLITE "UPDATE cities SET importance=$importance WHERE ( short_name='$city' OR long_name='$city' ) AND country_code='$country' AND importance!=-1 AND latitude BETWEEN $latlo AND $lathi AND longitude BETWEEN $lonlo AND $lonhi;\n";
Packit Service 4897fc
  } else {
Packit Service 4897fc
    print SQLITE "UPDATE cities SET importance=$importance WHERE ( short_name='$city' OR long_name='$city' ) AND country_code='$country' AND importance!=-1;\n";
Packit Service 4897fc
  }
Packit Service 4897fc
}
Packit Service 4897fc
Packit Service 4897fc
close MAJORSRC;
Packit Service 4897fc
close SQLITE;
Packit Service 4897fc
Packit Service 4897fc
# The columns in $weathersrc are
Packit Service 4897fc
# CODE wmo_zone wmo_id NAME STATE COUNTRY region LATITUDE_DMS LONGITUDE_DMS ue_lat ue_long ELEVATION upper_elevation primary
Packit Service 4897fc
# (however, some lines are mising one or both of the last two fields!)
Packit Service 4897fc
#
Packit Service 4897fc
# The columns in the stations table will be
Packit Service 4897fc
# CODE NAME STATE COUNTRY LATITUDE LONGITUDE ELEVATION
Packit Service 4897fc
Packit Service 4897fc
sub dms_to_dec {
Packit Service 4897fc
  my ($coord) = @_;
Packit Service 4897fc
  return 0 if $coord !~ /(\d+)-(\d+)(-(\d+))?([NSEW])/;
Packit Service 4897fc
  my $dec = $1 + $2 / 60;
Packit Service 4897fc
  if ($4) { $dec += $4 / 3600; }
Packit Service 4897fc
  if ($5 =~ /[SW]/) { $dec = -$dec; }
Packit Service 4897fc
  return $dec;
Packit Service 4897fc
}
Packit Service 4897fc
Packit Service 4897fc
print "\nCreating weather stations\n";
Packit Service 4897fc
($stations, $stations_filename) = tempfile();
Packit Service 4897fc
open WEATHERSRC, "-|:utf8", "perl ./station-fixups.pl < $weathersrc";
Packit Service 4897fc
while (<WEATHERSRC>) {
Packit Service 4897fc
  # remove CRLF and split
Packit Service 4897fc
  chomp;
Packit Service 4897fc
  @fields = split /;/;
Packit Service 4897fc
  next if $#fields < 11;
Packit Service 4897fc
  
Packit Service 4897fc
  ($code, $junk, $id, $location, $state, $country, $junk, $latitude, $longitude, $junk, $junk, $elevation) = @fields;
Packit Service 4897fc
Packit Service 4897fc
  $latitude = dms_to_dec($latitude);
Packit Service 4897fc
  $longitude = dms_to_dec($longitude);
Packit Service 4897fc
Packit Service 4897fc
  $country_code = $countries{$country};
Packit Service 4897fc
  if (!$country_code) {
Packit Service 4897fc
    $alt_country = s/^(.*), (.*)$/$2 $1/;
Packit Service 4897fc
    $country_code = $countries{$alt_country};
Packit Service 4897fc
  }
Packit Service 4897fc
  if (!$country_code) {
Packit Service 4897fc
    if ($country) { print "No country for '$country'\n"; }
Packit Service 4897fc
    $country_code = '';
Packit Service 4897fc
  }
Packit Service 4897fc
  if ($state) {
Packit Service 4897fc
    $state_code = $states{$state};
Packit Service 4897fc
    if (!$state_code) {
Packit Service 4897fc
      print "No state for '$state'\n";
Packit Service 4897fc
      next;
Packit Service 4897fc
    }
Packit Service 4897fc
  } else {
Packit Service 4897fc
    $state_code = "";
Packit Service 4897fc
  }
Packit Service 4897fc
Packit Service 4897fc
  print $stations "$code;$location;$state_code;$country_code;$latitude;$longitude;$elevation\n";
Packit Service 4897fc
}
Packit Service 4897fc
close WEATHERSRC;
Packit Service 4897fc
close $stations;
Packit Service 4897fc
Packit Service 4897fc
open SQLITE, "|-:utf8", "sqlite3 $locationdb";
Packit Service 4897fc
print SQLITE "CREATE TABLE stations (code TEXT, name TEXT, state TEXT, country TEXT, latitude REAL, longitude REAL, elevation REAL);\n";
Packit Service 4897fc
print SQLITE ".separator ';'\n";
Packit Service 4897fc
print SQLITE ".import $stations_filename stations\n";
Packit Service 4897fc
print SQLITE "CREATE INDEX station_long_lat ON stations (longitude, latitude);\n";
Packit Service 4897fc
close SQLITE;
Packit Service 4897fc
Packit Service 4897fc
# unlink $stations_filename;