#!/usr/bin/perl -w use strict; use Data::Dumper; use DBI(); my $INFILE = "taxdata.csv"; open(my $fh, "<", $INFILE) or die "Cannot open $INFILE for reading: $!"; my $database = 'taxbrackets'; my $hostname = '127.0.0.1'; my $username = 'root'; my $password = ''; my $dsn = "DBI:mysql:database=$database;host=$hostname;port=3306"; my $dbh = DBI->connect($dsn, $username, $password) or die "Couldn't connect to MySQL server at $hostname (user $username): $!"; # $dbh->trace(2); my %YEARS; while (defined(my $year_line = <$fh>)) { chomp($year_line); print "$year_line\n"; # if ($line =~ /^,,(Nominal),,,,(\d{4}),,,,,,,,,,,,,,(Adjusted for Inflation)?,,,,(\d{4}),,,CPI: ([\d\.]+)(?: \(Projected\))?,,,,,,,$/) { # ,,Nominal,,,,2011,,,,,,,,,,,,,,Adjusted for Inflation,,,,2011,,,CPI: 224.380 (Projected),,,,,,, # ,,Nominal,,,,2010,,,,,,,,,,,,,,Adjusted for Inflation,,,,2010,,,CPI: 218.056,,,,,,, if ($year_line =~ /,,(Nominal),,,,(\d{4})(?: \(\w\))?,,,,,,,,,,,,,,(Adjusted for Inflation)?,,,,(\d{4})(?: \(\w\))?,,,CPI: ([\d\.]+)(?: \(Projected\))?,,,,,,,/) { my $yr = $2; my $cpi = $5; print "year: $yr cpi: $cpi\n"; # Filing status line. my $filing_status_line = <$fh>; chomp($filing_status_line); my @statuses = ($filing_status_line =~ /,(Married Filing Jointly),,,,(Married Filing Separately),,,,(Single),,,,(Head of Household),,,,,,(Married Filing Jointly),,,,(Married Filing Separately),,,,(Single),,,,(Head of Household),,,/); print "$filing_status_line\n"; print "^... is a filing status line: " . join(", ", map { defined($_) ? "'$_'" : 'undef' } @statuses) . "\n"; # Skip headers... <$fh>; # skip "Marginal"|"Tax Brackets" <$fh>; # skip "Tax Rate"|"Over"|"But Not Over" # Compose nominal & adjusted lists for this year. my @notes; my %nominal; my %adjusted; while (defined(my $data_line = <$fh>)) { chomp($data_line); print "$data_line\n"; # Match & extract columns. my $percent = '[\d\.]+%'; my $dollar = '\$[\d,]+|-'; my $q = '"?'; my $colgroup = "(?:($percent)?,$q($dollar)?$q,$q($dollar)?$q|,$q.+$q,)"; my $datagroup = "$colgroup,,$colgroup,,$colgroup,,$colgroup"; my $line_regex = "$datagroup,,,,$datagroup"; # Columns: # Nom Adj # MFJ MFS Sng HoH MFJ MFS Sng HoH # R O B R O B R O B R O B R O B R O B R O B R O B if ($data_line =~ /^,{34}/) { print "^... is a blank line.\n"; last; } elsif (my @matches = ($data_line =~ /^$line_regex/)) { print "^... is a data line: " . join(", ", map { defined($_) ? "'$_'" : 'undef' } @matches) . "\n"; # print Dumper(@matches) . "\n\n"; @matches = map { if (defined($_)) { s/[,\$%]//g; /^[\d\.]+$/ ? $_ * 1.0 : $_; } } @matches; # Nominal data group. NOMINAL: for my $i (0..3) { my $first = $i * 3; # This column is blank / has a note instead of data. next NOMINAL if $matches[$first] eq ''; my %rate; my @keys = ('rate', 'from', 'to'); for my $i (0..2) { $rate{$keys[$i]} = $matches[$first + $i]; } my $status = $statuses[$i]; # status this column is under $dbh->do("INSERT INTO `brackets` (`year`, `status`, `rate`, `from`, `to`, `dollars`, `cpi`)" . "VALUES (?, ?, ?, ?, ?, ?, ?)", undef, $yr, $status, $rate{'rate'}, $rate{'from'}, $rate{'to'} eq '-' ? undef : $rate{'to'}, 'nominal', $cpi); if (!defined($nominal{$status})) { $nominal{$status} = (); } push @{ $nominal{$status} }, { %rate }; } # Adjusted data group. ADJUSTED: for my $i (0..3) { my $first = $i * 3 + 12; # This column is blank / has a note instead of data. next ADJUSTED if $matches[$first] eq ''; my %rate; my @keys = ('rate', 'from', 'to'); for my $i (0..2) { $rate{$keys[$i]} = $matches[$first + $i]; } my $status = $statuses[$i]; # status this column is under $dbh->do("INSERT INTO `brackets` (`year`, `status`, `rate`, `from`, `to`, `dollars`)" . "VALUES (?, ?, ?, ?, ?, ?)", undef, $yr, $status, $rate{'rate'}, $rate{'from'}, $rate{'to'} eq '-' ? undef : $rate{'to'}, 'adjusted'); if (!defined($adjusted{$status})) { $adjusted{$status} = (); } push @{ $adjusted{$status} }, { %rate }; } # 1, 2, 3: $nom{$statuses[0]} # 4, 5, 6: $nom{$statuses[1]} # 7, 8, 9: $nom{$statuses[2]} # 10, 11, 12: $nom{$statuses[3]} } elsif ($data_line =~ /^"?([^,].+)"?,{18}"?([^,].+)"?,{16}/) { my $note = $1; print "^... is a note line: '$note'\n"; $note =~ s/""/"/g; $note =~ s/^\(\w\) //g; push @notes, $note; } else { print "^ line did not match.\n"; } # print Dumper(%nominal) . "\n\n"; # print Dumper(%adjusted) . "\n\n"; # last; } for (@notes) { $dbh->do("INSERT INTO `notes` (`year`, `note`)" . "VALUES (?, ?)", undef, $yr, $_); } $YEARS{$yr} = { 'nominal' => { %nominal }, 'adjusted' => { %adjusted }, 'notes' => [ @notes ] }; # last if (scalar keys %YEARS) == 40; } else { print "^ line did not match.\n"; } # last; } print Dumper(%YEARS) . "\n\n"; $dbh->disconnect(); # %YEARS = { # 1913: { # %year # nominal: { # %nom # 'Married Filing Jointly': [ # { # rate: 1%, # over: 10000, # but_not_over: 100000 # }, # { # rate: 2%, # over: 20000, # but_not_over: 200000 # }, # ], # 'Married Filing Separately': { # rate: 1%, # over: 10000, # but_not_over: 100000 # } # }, # adjusted: { # %adj # # }, # notes: { # @notes # # } # }, # 1914: { # # }, # ... # } # [ # { # "year": 1913, # "cpi": ..., # "statuses": { # "Married Filing Jointly": [ # ... # ] # }, # "notes": [ # ... # ] # } # ]