#!/usr/bin/perl -w # # $Id: psn-util,v 1.8 2007/06/03 04:52:59 jmates Exp $ # # The author disclaims all copyrights and releases this script into the # public domain. # # Utility to handle Portable Shogi Notation (.psn) data. For example, to # load games into a database with the correct schema: # # psn-util psn2db japan-isf-2005.psn # # See end of file for PostgreSQL schema and sample SQL queries. # # Or, to export games to study in a different tool, such as MacShogi, # use the db2psn mode and supply a SQL query that returns a list of # game_id that should be exported: # # psn-util db2psn "select game_id from games" > allgames.psn # # TODO detect duplicate (or nearly similar?) games, based on total # number of moves and whether the last moves are the same? This would # prevent the same game from being loaded twice, or allow duplicate # games to be pruned from the database. use strict; use File::Basename qw(basename); use HTML::Template (); use DBIx::Simple (); use SQL::Abstract (); my $DATA_SOURCE = 'dbi:Pg:dbname=shogi'; # TODO support nested move variations: removed for now with this gem # from MRE 2nd edition my $NESTED_PAREN_RE; $NESTED_PAREN_RE = qr/ \( ( [^()] | (??{ $NESTED_PAREN_RE }) )* \) /x; # TODO support reading comments into the database. Strip them from the # moves list for now. my $NESTED_CURLY_RE; $NESTED_CURLY_RE = qr/ \{ ( [^{}] | (??{ $NESTED_CURLY_RE }) )* \} /x; my %modes = ( 'psn2db' => \&dbconv, db2psn => \&dbunconv ); my $mode = shift; die "Usage: $0 psn2db file1 [.. fileN]\n" if not defined $mode or not exists $modes{$mode}; $modes{$mode}->(@ARGV); sub dbunconv { # Accept SQL query that must return 1 or more game_id my $query = shift; if ( !defined $query or $query eq '' ) { $query = do { local $/ = undef; }; } my $db = DBIx::Simple->connect( $DATA_SOURCE, '', '', { AutoCommit => 0, RaiseError => 1 } ); $db->abstract = SQL::Abstract->new(); my $psn_tmpl = <<'END_TMPL'; [Sente ""] [SenteGrade ""] [Gote ""] [GoteGrade ""] [Date ""] [Event ""] [Venue ""] [Round ""] [Result ""] [Handicap ""] [Moves ""] END_TMPL my $template = HTML::Template->new( scalarref => \$psn_tmpl, die_on_bad_params => 0 ); my @game_ids = $db->query($query)->flat; my @display_fields = qw{sente sentegrade gote gotegrade date event venue round result handicap moves _moves}; for my $game_id (@game_ids) { my $game_ref = $db->select( 'games', \@display_fields, { game_id => $game_id }, ['game_id'] )->hash; $template->param(%$game_ref); my $psn = $template->output; $psn =~ s/\n{2,}/\n/g; print $psn, "\n"; } $db->rollback; } sub dbconv { my $db = DBIx::Simple->connect( $DATA_SOURCE, '', '', { AutoCommit => 1, RaiseError => 1 } ); $db->abstract = SQL::Abstract->new(); for my $filename (@ARGV) { open my $fh, '<', $filename or die "error: could not open file: name=$filename, errstr=$!\n"; # link games to file they came from $db->insert( 'files', { name => basename($filename) } ); my $file_id = $db->last_insert_id( undef, undef, 'files', undef ); parse_file( $db, $fh, $file_id ); } return; } sub parse_file { my $db = shift; my $fh = shift; my $file_id = shift; my %game; # buffer for current game being parsed my $in_moves_section = 0; while ( my $line = <$fh> ) { # Catch player name, other metadata above moves section if ( $line =~ m/^\[ (\S+) \s " ([^"]+) "\]/x ) { # If still in a moves section, assume this new entry marks a new # game and handle the old one if ( $in_moves_section and keys %game ) { handle_game( $db, $file_id, \%game ); %game = (); $in_moves_section = 0; } $game{ lc $1 } = $2; } else { $in_moves_section = 1; # Convert moves section into string parsed later $game{_moves} .= $line; } } # Cleanup if run off end of file if ( keys %game ) { handle_game( $db, $file_id, \%game ); } return; } sub handle_game { my $db = shift; my $file_id = shift; my $game_ref = shift; warn "info: loading game"; $game_ref->{_moves} =~ s/\s+/ /g; $game_ref->{_moves} =~ s/^\s+//g; $game_ref->{_moves} =~ s/\s+$//g; # TODO parse move variations and comments instead of evicting them $game_ref->{_moves} =~ s/$NESTED_PAREN_RE//g; $game_ref->{_moves} =~ s/$NESTED_CURLY_RE//g; my @moves = grep { defined and $_ ne '' } split /\s+\d+\.|\s+(?!\d\.)/, $game_ref->{_moves}; $moves[0] =~ s/^1\.//; $game_ref->{_move_list} = \@moves; # TODO convert "date" into TIMESTAMP (or make a reasonable attempt thereof) # $game_ref->{_date} = # TODO how set the database date format properly, depending on the # date template (if any) used here?? my @game_fields = qw{sente sentegrade gote gotegrade date event venue round result moves _moves handicap}; my $game_id; eval { $db->insert( 'games', { ( map { $_, $game_ref->{$_} } @game_fields ), file_id => $file_id } ); $game_id = $db->last_insert_id( undef, undef, 'games', undef ); }; if ($@) { use Data::Dumper; warn Dumper $game_ref; die $@; } my $move_number = 1; for my $move_text ( @{ $game_ref->{_move_list} } ) { $db->insert( 'moves', { move_number => $move_number, move_text => $move_text, game_id => $game_id } ); $move_number++; } return; } __END__ =head1 NAME psn-util - Portable Shogi Notation (PSN) handler =head1 SYNOPSIS Portable Shogi Notation (PSN) handler. See http://sial.org/shogi/ for more information. Upload data into a suitable database via: $ psn-util psn2db japan-isf-2005.psn Extract games from the database: $ psn-util db2psn "select game_id from games" > allgames.psn =head1 Database Support PostgreSQL database creation: createdb -E UTF8 shogi Schema for PostgreSQL: -- So can find original file the game loaded from CREATE TABLE files ( file_id SERIAL PRIMARY KEY, name VARCHAR(255), url TEXT ); CREATE TABLE games ( game_id SERIAL PRIMARY KEY, file_id INT, FOREIGN KEY (file_id) REFERENCES files ON UPDATE CASCADE ON DELETE NO ACTION, sente VARCHAR(255), sentegrade VARCHAR(32), gote VARCHAR(255), gotegrade VARCHAR(32), date VARCHAR(255), event VARCHAR(255), venue VARCHAR(255), round VARCHAR(32), result VARCHAR(32), handicap VARCHAR(255), moves INT, _moves TEXT, -- for conversion of date _date TIMESTAMP, -- for {blah blah...} text prior to the move list starting game_comment TEXT ); -- RAV (recursive alternative variations?) not supported -- (would need branch_id that otherwise defaults to 1?) CREATE TABLE moves ( move_id SERIAL PRIMARY KEY, move_number INT, move_text VARCHAR(8), game_id INT, FOREIGN KEY (game_id) REFERENCES games ON UPDATE CASCADE ON DELETE CASCADE, -- for {blah blah...} text inside moves section move_comment TEXT ); -- TODO probably need index on commonly queried moves fields... And some sample queries: -- Count "Roaming Rook" opening moves where Rook -- wanders to 4th rank. select move_number,count(*) from moves where move_number < 20 and move_text='R4b' group by move_number order by move_number; -- Count non-handicaped openings (these all begin with -- ... as the first move) select move_text,count(*) from moves where move_number=1 and not move_text='...' group by move_text order by move_text; -- Enumerate responses to P2f opening select move_text,count(*) from moves where move_number=2 and game_id in (select game_id from moves where move_number=1 and move_text='P2f') group by move_text order by count(*),move_text; -- Find K9i (a corner castle) where a lancer advance -- also featured select k.game_id from moves k, moves l where k.move_text='K9i' and l.move_text='L9h' and k.game_id = l.game_i; =head1 BUGS If the bug is in the latest version, send a report to the author. Patches that fix problems or add new features are welcome. See source for TODO and similar comments on ideas for improvements. =head1 SEE ALSO perl(1) =head1 AUTHOR Jeremy Mates, http://sial.org/contact/ =head1 COPYRIGHT The author disclaims all copyrights and releases this script into the public domain. =head1 VERSION $Id: psn-util,v 1.8 2007/06/03 04:52:59 jmates Exp $ =cut