#!/usr/bin/perl -w # # $Id: xpquery,v 1.8 2006/03/25 14:47:50 jmates Exp $ # # The author disclaims all copyrights and releases this script into the # public domain. # # Apply XPath queries to XML local or remote data. use strict; use File::Temp (); use LWP::UserAgent (); use XML::LibXML (); use XML::LibXML::XPathContext (); # KLUGE supposed to mark filehandles, so would have to muck around with # converting whatever the XML is in into utf8 or something... (this # avoids a 'Wide character in ...' warning) no warnings 'utf8'; use Getopt::Std; my %opts; getopts 'h?p:', \%opts; help() if exists $opts{h} or exists $opts{'?'}; $opts{p} = 'XML' unless exists $opts{p}; # figure out where document lives my $document = shift || help(); use URI; my $target = URI->new($document); my $file; # XPath query to search document for my $query = shift || help(); # get document into filehandle # TODO URL lookup needs more error checking if ( $target->scheme ) { $file = ( File::Temp::tempfile( "xquery.XXXXXX", DIR => File::Spec->tmpdir, UNLINK => 1 ) )[1]; my $ua = LWP::UserAgent->new( env_proxy => 1, keep_alive => 0, timeout => 30 ); $ua->agent( $ENV{http_agent} ) if exists $ENV{http_agent}; my $response = $ua->get( $target->canonical, ':content_file' => $file ); } else { $file = $target->canonical; } my $parser = XML::LibXML->new; # try to allow for crummy *ML documents # TODO make these command line arguments? $parser->recover(1); $parser->load_ext_dtd(0); $parser->expand_entities(0); $parser->complete_attributes(0); my $doc; eval { my %parse_method = ( XML => 'parse_file', HTML => 'parse_html_file', SGML => 'parse_sgml_file', ); my $method = $parse_method{ uc $opts{p} } || 'parse_file'; $doc = $parser->$method($file); }; if ($@) { die "error: could not parse $document: $@\n"; } # search resulting tree with XPath my $xc = XML::LibXML::XPathContext->new($doc); my $nodelist = $xc->find($query); for my $node ( $nodelist->get_nodelist ) { print $node->toString, "\n"; } sub help { print <<"HELP"; Usage: $0 [opts] document xpath-query Apply XPath expressions to the specified XML data. Options: -h/-? Display this message -p xx Parse using method xx (XML, HTML, SGML). Default is XML. HELP exit 100; } __DATA__ =head1 NAME xpquery - apply XPath queries to XML data. =head1 SYNOPSIS Show all C tags on the C website: $ xpquery 'http://sial.org/?style=xml' //book Return the C section of a DocBook XML article: $ xpquery operations-manual.xml '/article/section[@id="intro"]' For more information on XPath, see: http://www.w3.org/TR/xpath =head1 DESCRIPTION =head2 Overview The C script allows quick application of XPath expressions to XML data. =head2 Normal Usage $ xpquery [options] file|URL xpath-expression See L<"OPTIONS"> for details on the command line switches supported. Quote the XPath expression to prevent unwanted shell interpolation of it. =head1 OPTIONS This script currently supports the following command line switches: =over 4 =item B<-h>, B<-?> Prints a brief usage note about the script. =item B<-p> I Parse using the specified method: C (default), C, or C. =back =head1 BUGS =head2 Reporting Bugs Newer versions of this script may be available from: http://sial.org/code/perl/ If the bug is in the latest version, send a report to the author. Patches that fix problems or add new features are welcome. =head2 Known Issues No known issues. =head1 SEE ALSO perl(1) =head1 AUTHOR Jeremy Mates, http://sial.org/contact/ =head1 COPYRIGHT The author disclaims all copyrights and releases this script into the public domain. =head1 VERSION $Id: xpquery,v 1.8 2006/03/25 14:47:50 jmates Exp $ =cut