| File | /usr/local/lib/perl5/site_perl/5.10.1/XML/Simple.pm |
| Statements Executed | 1796 |
| Statement Execution Time | 8.32ms |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 3 | 1 | 1 | 1.55ms | 46.7ms | XML::Simple::build_tree |
| 35 | 2 | 1 | 796µs | 7.18ms | XML::Simple::collapse |
| 3 | 1 | 1 | 347µs | 350µs | XML::Simple::handle_options |
| 35 | 2 | 1 | 337µs | 337µs | XML::Simple::start_element |
| 1 | 1 | 1 | 195µs | 199µs | XML::Simple::BEGIN@1687 |
| 28 | 2 | 1 | 176µs | 176µs | XML::Simple::characters |
| 3 | 1 | 1 | 147µs | 147µs | XML::Simple::new |
| 33 | 4 | 2 | 99µs | 6.32ms | XML::Simple::CORE:match (opcode) |
| 3 | 1 | 1 | 81µs | 53.9ms | XML::Simple::build_simple_tree |
| 3 | 1 | 1 | 71µs | 54.6ms | XML::Simple::XMLin |
| 6 | 2 | 1 | 70µs | 245µs | XML::Simple::_get_object |
| 35 | 2 | 1 | 67µs | 67µs | XML::Simple::end_element |
| 3 | 1 | 1 | 55µs | 54.3ms | XML::Simple::parse_string |
| 1 | 1 | 1 | 26µs | 31µs | XML::Simple::array_to_hash |
| 3 | 1 | 1 | 20µs | 20µs | XML::Simple::start_document |
| 1 | 1 | 1 | 17µs | 20µs | XML::Simple::BEGIN@42 |
| 3 | 1 | 1 | 17µs | 17µs | XML::Simple::end_document |
| 1 | 1 | 1 | 14µs | 52µs | XML::Simple::BEGIN@43 |
| 1 | 1 | 1 | 13µs | 15µs | XML::Simple::import |
| 1 | 1 | 1 | 6µs | 67µs | XML::Simple::BEGIN@51 |
| 3 | 1 | 2 | 3µs | 3µs | XML::Simple::CORE:subst (opcode) |
| 1 | 1 | 1 | 3µs | 3µs | XML::Simple::new_hashref |
| 0 | 0 | 0 | 0s | 0s | XML::Simple::XMLout |
| 0 | 0 | 0 | 0s | 0s | XML::Simple::build_tree_xml_parser |
| 0 | 0 | 0 | 0s | 0s | XML::Simple::cache_read_memcopy |
| 0 | 0 | 0 | 0s | 0s | XML::Simple::cache_read_memshare |
| 0 | 0 | 0 | 0s | 0s | XML::Simple::cache_read_storable |
| 0 | 0 | 0 | 0s | 0s | XML::Simple::cache_write_memcopy |
| 0 | 0 | 0 | 0s | 0s | XML::Simple::cache_write_memshare |
| 0 | 0 | 0 | 0s | 0s | XML::Simple::cache_write_storable |
| 0 | 0 | 0 | 0s | 0s | XML::Simple::collapse_content |
| 0 | 0 | 0 | 0s | 0s | XML::Simple::copy_hash |
| 0 | 0 | 0 | 0s | 0s | XML::Simple::default_config_file |
| 0 | 0 | 0 | 0s | 0s | XML::Simple::die_or_warn |
| 0 | 0 | 0 | 0s | 0s | XML::Simple::escape_value |
| 0 | 0 | 0 | 0s | 0s | XML::Simple::find_xml_file |
| 0 | 0 | 0 | 0s | 0s | XML::Simple::get_var |
| 0 | 0 | 0 | 0s | 0s | XML::Simple::hash_to_array |
| 0 | 0 | 0 | 0s | 0s | XML::Simple::normalise_space |
| 0 | 0 | 0 | 0s | 0s | XML::Simple::numeric_escape |
| 0 | 0 | 0 | 0s | 0s | XML::Simple::parse_fh |
| 0 | 0 | 0 | 0s | 0s | XML::Simple::parse_file |
| 0 | 0 | 0 | 0s | 0s | XML::Simple::set_var |
| 0 | 0 | 0 | 0s | 0s | XML::Simple::sorted_keys |
| 0 | 0 | 0 | 0s | 0s | XML::Simple::storable_filename |
| 0 | 0 | 0 | 0s | 0s | XML::Simple::value_to_xml |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | # $Id: Simple.pm,v 1.40 2007/08/15 10:36:48 grantm Exp $ | ||||
| 2 | |||||
| 3 | package XML::Simple; | ||||
| 4 | |||||
| 5 | =head1 NAME | ||||
| 6 | |||||
| 7 | XML::Simple - Easy API to maintain XML (esp config files) | ||||
| 8 | |||||
| 9 | =head1 SYNOPSIS | ||||
| 10 | |||||
| 11 | use XML::Simple; | ||||
| 12 | |||||
| 13 | my $ref = XMLin([<xml file or string>] [, <options>]); | ||||
| 14 | |||||
| 15 | my $xml = XMLout($hashref [, <options>]); | ||||
| 16 | |||||
| 17 | Or the object oriented way: | ||||
| 18 | |||||
| 19 | require XML::Simple; | ||||
| 20 | |||||
| 21 | my $xs = XML::Simple->new(options); | ||||
| 22 | |||||
| 23 | my $ref = $xs->XMLin([<xml file or string>] [, <options>]); | ||||
| 24 | |||||
| 25 | my $xml = $xs->XMLout($hashref [, <options>]); | ||||
| 26 | |||||
| 27 | (or see L<"SAX SUPPORT"> for 'the SAX way'). | ||||
| 28 | |||||
| 29 | To catch common errors: | ||||
| 30 | |||||
| 31 | use XML::Simple qw(:strict); | ||||
| 32 | |||||
| 33 | (see L<"STRICT MODE"> for more details). | ||||
| 34 | |||||
| 35 | =cut | ||||
| 36 | |||||
| 37 | # See after __END__ for more POD documentation | ||||
| 38 | |||||
| 39 | |||||
| 40 | # Load essentials here, other modules loaded on demand later | ||||
| 41 | |||||
| 42 | 3 | 29µs | 2 | 24µs | # spent 20µs (17+3) within XML::Simple::BEGIN@42 which was called
# once (17µs+3µs) by SimpleDB::Client::BEGIN@48 at line 42 # spent 20µs making 1 call to XML::Simple::BEGIN@42
# spent 3µs making 1 call to strict::import |
| 43 | 3 | 32µs | 2 | 89µs | # spent 52µs (14+37) within XML::Simple::BEGIN@43 which was called
# once (14µs+37µs) by SimpleDB::Client::BEGIN@48 at line 43 # spent 52µs making 1 call to XML::Simple::BEGIN@43
# spent 37µs making 1 call to Exporter::import |
| 44 | 1 | 700ns | require Exporter; | ||
| 45 | |||||
| 46 | |||||
| 47 | ############################################################################## | ||||
| 48 | # Define some constants | ||||
| 49 | # | ||||
| 50 | |||||
| 51 | 3 | 4.65ms | 2 | 128µs | # spent 67µs (6+61) within XML::Simple::BEGIN@51 which was called
# once (6µs+61µs) by SimpleDB::Client::BEGIN@48 at line 51 # spent 67µs making 1 call to XML::Simple::BEGIN@51
# spent 61µs making 1 call to vars::import |
| 52 | |||||
| 53 | 1 | 9µs | @ISA = qw(Exporter); | ||
| 54 | 1 | 800ns | @EXPORT = qw(XMLin XMLout); | ||
| 55 | 1 | 500ns | @EXPORT_OK = qw(xml_in xml_out); | ||
| 56 | 1 | 300ns | $VERSION = '2.18'; | ||
| 57 | 1 | 300ns | $PREFERRED_PARSER = undef; | ||
| 58 | |||||
| 59 | 1 | 600ns | my $StrictMode = 0; | ||
| 60 | |||||
| 61 | 1 | 3µs | my @KnownOptIn = qw(keyattr keeproot forcecontent contentkey noattr | ||
| 62 | searchpath forcearray cache suppressempty parseropts | ||||
| 63 | grouptags nsexpand datahandler varattr variables | ||||
| 64 | normalisespace normalizespace valueattr); | ||||
| 65 | |||||
| 66 | 1 | 6µs | my @KnownOptOut = qw(keyattr keeproot contentkey noattr | ||
| 67 | rootname xmldecl outputfile noescape suppressempty | ||||
| 68 | grouptags nsexpand handler noindent attrindent nosort | ||||
| 69 | valueattr numericescape); | ||||
| 70 | |||||
| 71 | 1 | 700ns | my @DefKeyAttr = qw(name key id); | ||
| 72 | 1 | 200ns | my $DefRootName = qq(opt); | ||
| 73 | 1 | 200ns | my $DefContentKey = qq(content); | ||
| 74 | 1 | 200ns | my $DefXmlDecl = qq(<?xml version='1.0' standalone='yes'?>); | ||
| 75 | |||||
| 76 | 1 | 200ns | my $xmlns_ns = 'http://www.w3.org/2000/xmlns/'; | ||
| 77 | 1 | 1µs | my $bad_def_ns_jcn = '{' . $xmlns_ns . '}'; # LibXML::SAX workaround | ||
| 78 | |||||
| 79 | |||||
| 80 | ############################################################################## | ||||
| 81 | # Globals for use by caching routines | ||||
| 82 | # | ||||
| 83 | |||||
| 84 | 1 | 400ns | my %MemShareCache = (); | ||
| 85 | 1 | 100ns | my %MemCopyCache = (); | ||
| 86 | |||||
| 87 | |||||
| 88 | ############################################################################## | ||||
| 89 | # Wrapper for Exporter - handles ':strict' | ||||
| 90 | # | ||||
| 91 | |||||
| 92 | # spent 15µs (13+2) within XML::Simple::import which was called
# once (13µs+2µs) by SimpleDB::Client::BEGIN@48 at line 48 of ../lib/SimpleDB/Client.pm | ||||
| 93 | # Handle the :strict tag | ||||
| 94 | |||||
| 95 | 1 | 8µs | 1 | 1µs | $StrictMode = 1 if grep(/^:strict$/, @_); # spent 1µs making 1 call to XML::Simple::CORE:match |
| 96 | |||||
| 97 | # Pass everything else to Exporter.pm | ||||
| 98 | |||||
| 99 | 1 | 4µs | 1 | 300ns | @_ = grep(!/^:strict$/, @_); # spent 300ns making 1 call to XML::Simple::CORE:match |
| 100 | 1 | 5µs | 1 | 29µs | goto &Exporter::import; # spent 29µs making 1 call to Exporter::import |
| 101 | } | ||||
| 102 | |||||
| 103 | |||||
| 104 | ############################################################################## | ||||
| 105 | # Constructor for optional object interface. | ||||
| 106 | # | ||||
| 107 | |||||
| 108 | # spent 147µs within XML::Simple::new which was called 3 times, avg 49µs/call:
# 3 times (147µs+0s) by XML::Simple::_get_object at line 145, avg 49µs/call | ||||
| 109 | 3 | 3µs | my $class = shift; | ||
| 110 | |||||
| 111 | 3 | 6µs | if(@_ % 2) { | ||
| 112 | croak "Default options must be name=>value pairs (odd number supplied)"; | ||||
| 113 | } | ||||
| 114 | |||||
| 115 | 3 | 1µs | my %known_opt; | ||
| 116 | 3 | 89µs | @known_opt{@KnownOptIn, @KnownOptOut} = (undef) x 100; | ||
| 117 | |||||
| 118 | 3 | 4µs | my %raw_opt = @_; | ||
| 119 | 3 | 600ns | my %def_opt; | ||
| 120 | 3 | 8µs | while(my($key, $val) = each %raw_opt) { | ||
| 121 | my $lkey = lc($key); | ||||
| 122 | $lkey =~ s/_//g; | ||||
| 123 | croak "Unrecognised option: $key" unless(exists($known_opt{$lkey})); | ||||
| 124 | $def_opt{$lkey} = $val; | ||||
| 125 | } | ||||
| 126 | 3 | 8µs | my $self = { def_opt => \%def_opt }; | ||
| 127 | |||||
| 128 | 3 | 32µs | return(bless($self, $class)); | ||
| 129 | } | ||||
| 130 | |||||
| 131 | |||||
| 132 | ############################################################################## | ||||
| 133 | # Sub: _get_object() | ||||
| 134 | # | ||||
| 135 | # Helper routine called from XMLin() and XMLout() to create an object if none | ||||
| 136 | # was provided. Note, this routine does mess with the caller's @_ array. | ||||
| 137 | # | ||||
| 138 | |||||
| 139 | sub _get_object { | ||||
| 140 | 6 | 2µs | my $self; | ||
| 141 | 6 | 58µs | 6 | 29µs | if($_[0] and UNIVERSAL::isa($_[0], 'XML::Simple')) { # spent 29µs making 6 calls to UNIVERSAL::isa, avg 5µs/call |
| 142 | $self = shift; | ||||
| 143 | } | ||||
| 144 | else { | ||||
| 145 | 3 | 18µs | 3 | 147µs | $self = XML::Simple->new(); # spent 147µs making 3 calls to XML::Simple::new, avg 49µs/call |
| 146 | } | ||||
| 147 | |||||
| 148 | 6 | 20µs | return $self; | ||
| 149 | } | ||||
| 150 | |||||
| 151 | |||||
| 152 | ############################################################################## | ||||
| 153 | # Sub/Method: XMLin() | ||||
| 154 | # | ||||
| 155 | # Exported routine for slurping XML into a hashref - see pod for info. | ||||
| 156 | # | ||||
| 157 | # May be called as object method or as a plain function. | ||||
| 158 | # | ||||
| 159 | # Expects one arg for the source XML, optionally followed by a number of | ||||
| 160 | # name => value option pairs. | ||||
| 161 | # | ||||
| 162 | |||||
| 163 | # spent 54.6ms (71µs+54.6) within XML::Simple::XMLin which was called 3 times, avg 18.2ms/call:
# 3 times (71µs+54.6ms) by SimpleDB::Client::handle_response at line 245 of ../lib/SimpleDB/Client.pm, avg 18.2ms/call | ||||
| 164 | 3 | 8µs | 3 | 225µs | my $self = &_get_object; # note, @_ is passed implicitly # spent 225µs making 3 calls to XML::Simple::_get_object, avg 75µs/call |
| 165 | |||||
| 166 | 3 | 4µs | my $target = shift; | ||
| 167 | |||||
| 168 | |||||
| 169 | # Work out whether to parse a string, a file or a filehandle | ||||
| 170 | |||||
| 171 | 3 | 65µs | 6 | 54.3ms | if(not defined $target) { # spent 54.3ms making 3 calls to XML::Simple::parse_string, avg 18.1ms/call
# spent 12µs making 3 calls to XML::Simple::CORE:match, avg 4µs/call |
| 172 | return $self->parse_file(undef, @_); | ||||
| 173 | } | ||||
| 174 | |||||
| 175 | elsif($target eq '-') { | ||||
| 176 | local($/) = undef; | ||||
| 177 | $target = <STDIN>; | ||||
| 178 | return $self->parse_string(\$target, @_); | ||||
| 179 | } | ||||
| 180 | |||||
| 181 | elsif(my $type = ref($target)) { | ||||
| 182 | if($type eq 'SCALAR') { | ||||
| 183 | return $self->parse_string($target, @_); | ||||
| 184 | } | ||||
| 185 | else { | ||||
| 186 | return $self->parse_fh($target, @_); | ||||
| 187 | } | ||||
| 188 | } | ||||
| 189 | |||||
| 190 | elsif($target =~ m{<.*?>}s) { | ||||
| 191 | return $self->parse_string(\$target, @_); | ||||
| 192 | } | ||||
| 193 | |||||
| 194 | else { | ||||
| 195 | return $self->parse_file($target, @_); | ||||
| 196 | } | ||||
| 197 | } | ||||
| 198 | |||||
| 199 | |||||
| 200 | ############################################################################## | ||||
| 201 | # Sub/Method: parse_file() | ||||
| 202 | # | ||||
| 203 | # Same as XMLin, but only parses from a named file. | ||||
| 204 | # | ||||
| 205 | |||||
| 206 | sub parse_file { | ||||
| 207 | my $self = &_get_object; # note, @_ is passed implicitly | ||||
| 208 | |||||
| 209 | my $filename = shift; | ||||
| 210 | |||||
| 211 | $self->handle_options('in', @_); | ||||
| 212 | |||||
| 213 | $filename = $self->default_config_file if not defined $filename; | ||||
| 214 | |||||
| 215 | $filename = $self->find_xml_file($filename, @{$self->{opt}->{searchpath}}); | ||||
| 216 | |||||
| 217 | # Check cache for previous parse | ||||
| 218 | |||||
| 219 | if($self->{opt}->{cache}) { | ||||
| 220 | foreach my $scheme (@{$self->{opt}->{cache}}) { | ||||
| 221 | my $method = 'cache_read_' . $scheme; | ||||
| 222 | my $opt = $self->$method($filename); | ||||
| 223 | return($opt) if($opt); | ||||
| 224 | } | ||||
| 225 | } | ||||
| 226 | |||||
| 227 | my $ref = $self->build_simple_tree($filename, undef); | ||||
| 228 | |||||
| 229 | if($self->{opt}->{cache}) { | ||||
| 230 | my $method = 'cache_write_' . $self->{opt}->{cache}->[0]; | ||||
| 231 | $self->$method($ref, $filename); | ||||
| 232 | } | ||||
| 233 | |||||
| 234 | return $ref; | ||||
| 235 | } | ||||
| 236 | |||||
| 237 | |||||
| 238 | ############################################################################## | ||||
| 239 | # Sub/Method: parse_fh() | ||||
| 240 | # | ||||
| 241 | # Same as XMLin, but only parses from a filehandle. | ||||
| 242 | # | ||||
| 243 | |||||
| 244 | sub parse_fh { | ||||
| 245 | my $self = &_get_object; # note, @_ is passed implicitly | ||||
| 246 | |||||
| 247 | my $fh = shift; | ||||
| 248 | croak "Can't use " . (defined $fh ? qq{string ("$fh")} : 'undef') . | ||||
| 249 | " as a filehandle" unless ref $fh; | ||||
| 250 | |||||
| 251 | $self->handle_options('in', @_); | ||||
| 252 | |||||
| 253 | return $self->build_simple_tree(undef, $fh); | ||||
| 254 | } | ||||
| 255 | |||||
| 256 | |||||
| 257 | ############################################################################## | ||||
| 258 | # Sub/Method: parse_string() | ||||
| 259 | # | ||||
| 260 | # Same as XMLin, but only parses from a string or a reference to a string. | ||||
| 261 | # | ||||
| 262 | |||||
| 263 | # spent 54.3ms (55µs+54.3) within XML::Simple::parse_string which was called 3 times, avg 18.1ms/call:
# 3 times (55µs+54.3ms) by XML::Simple::XMLin at line 171, avg 18.1ms/call | ||||
| 264 | 3 | 4µs | 3 | 20µs | my $self = &_get_object; # note, @_ is passed implicitly # spent 20µs making 3 calls to XML::Simple::_get_object, avg 7µs/call |
| 265 | |||||
| 266 | 3 | 2µs | my $string = shift; | ||
| 267 | |||||
| 268 | 3 | 12µs | 3 | 350µs | $self->handle_options('in', @_); # spent 350µs making 3 calls to XML::Simple::handle_options, avg 117µs/call |
| 269 | |||||
| 270 | 3 | 22µs | 3 | 53.9ms | return $self->build_simple_tree(undef, ref $string ? $string : \$string); # spent 53.9ms making 3 calls to XML::Simple::build_simple_tree, avg 18.0ms/call |
| 271 | } | ||||
| 272 | |||||
| 273 | |||||
| 274 | ############################################################################## | ||||
| 275 | # Method: default_config_file() | ||||
| 276 | # | ||||
| 277 | # Returns the name of the XML file to parse if no filename (or XML string) | ||||
| 278 | # was provided. | ||||
| 279 | # | ||||
| 280 | |||||
| 281 | sub default_config_file { | ||||
| 282 | my $self = shift; | ||||
| 283 | |||||
| 284 | require File::Basename; | ||||
| 285 | |||||
| 286 | my($basename, $script_dir, $ext) = File::Basename::fileparse($0, '\.[^\.]+'); | ||||
| 287 | |||||
| 288 | # Add script directory to searchpath | ||||
| 289 | |||||
| 290 | if($script_dir) { | ||||
| 291 | unshift(@{$self->{opt}->{searchpath}}, $script_dir); | ||||
| 292 | } | ||||
| 293 | |||||
| 294 | return $basename . '.xml'; | ||||
| 295 | } | ||||
| 296 | |||||
| 297 | |||||
| 298 | ############################################################################## | ||||
| 299 | # Method: build_simple_tree() | ||||
| 300 | # | ||||
| 301 | # Builds a 'tree' data structure as provided by XML::Parser and then | ||||
| 302 | # 'simplifies' it as specified by the various options in effect. | ||||
| 303 | # | ||||
| 304 | |||||
| 305 | # spent 53.9ms (81µs+53.8) within XML::Simple::build_simple_tree which was called 3 times, avg 18.0ms/call:
# 3 times (81µs+53.8ms) by XML::Simple::parse_string at line 270, avg 18.0ms/call | ||||
| 306 | 3 | 2µs | my $self = shift; | ||
| 307 | |||||
| 308 | 3 | 11µs | 3 | 46.7ms | my $tree = $self->build_tree(@_); # spent 46.7ms making 3 calls to XML::Simple::build_tree, avg 15.6ms/call |
| 309 | |||||
| 310 | return $self->{opt}->{keeproot} | ||||
| 311 | ? $self->collapse({}, @$tree) | ||||
| 312 | 3 | 64µs | 3 | 7.18ms | : $self->collapse(@{$tree->[1]}); # spent 7.18ms making 3 calls to XML::Simple::collapse, avg 2.39ms/call |
| 313 | } | ||||
| 314 | |||||
| 315 | |||||
| 316 | ############################################################################## | ||||
| 317 | # Method: build_tree() | ||||
| 318 | # | ||||
| 319 | # This routine will be called if there is no suitable pre-parsed tree in a | ||||
| 320 | # cache. It parses the XML and returns an XML::Parser 'Tree' style data | ||||
| 321 | # structure (summarised in the comments for the collapse() routine below). | ||||
| 322 | # | ||||
| 323 | # XML::Simple requires the services of another module that knows how to parse | ||||
| 324 | # XML. If XML::SAX is installed, the default SAX parser will be used, | ||||
| 325 | # otherwise XML::Parser will be used. | ||||
| 326 | # | ||||
| 327 | # This routine expects to be passed a filename as argument 1 or a 'string' as | ||||
| 328 | # argument 2. The 'string' might be a string of XML (passed by reference to | ||||
| 329 | # save memory) or it might be a reference to an IO::Handle. (This | ||||
| 330 | # non-intuitive mess results in part from the way XML::Parser works but that's | ||||
| 331 | # really no excuse). | ||||
| 332 | # | ||||
| 333 | |||||
| 334 | # spent 46.7ms (1.55+45.1) within XML::Simple::build_tree which was called 3 times, avg 15.6ms/call:
# 3 times (1.55ms+45.1ms) by XML::Simple::build_simple_tree at line 308, avg 15.6ms/call | ||||
| 335 | 3 | 2µs | my $self = shift; | ||
| 336 | 3 | 2µs | my $filename = shift; | ||
| 337 | 3 | 2µs | my $string = shift; | ||
| 338 | |||||
| 339 | |||||
| 340 | 3 | 2µs | my $preferred_parser = $PREFERRED_PARSER; | ||
| 341 | 3 | 7µs | unless(defined($preferred_parser)) { | ||
| 342 | $preferred_parser = $ENV{XML_SIMPLE_PREFERRED_PARSER} || ''; | ||||
| 343 | } | ||||
| 344 | 3 | 1µs | if($preferred_parser eq 'XML::Parser') { | ||
| 345 | return($self->build_tree_xml_parser($filename, $string)); | ||||
| 346 | } | ||||
| 347 | |||||
| 348 | 6 | 246µs | eval { require XML::SAX; }; # We didn't need it until now | ||
| 349 | 3 | 1µs | if($@) { # No XML::SAX - fall back to XML::Parser | ||
| 350 | if($preferred_parser) { # unless a SAX parser was expressly requested | ||||
| 351 | croak "XMLin() could not load XML::SAX"; | ||||
| 352 | } | ||||
| 353 | return($self->build_tree_xml_parser($filename, $string)); | ||||
| 354 | } | ||||
| 355 | |||||
| 356 | 3 | 1µs | $XML::SAX::ParserPackage = $preferred_parser if($preferred_parser); | ||
| 357 | |||||
| 358 | 3 | 20µs | 3 | 36.9ms | my $sp = XML::SAX::ParserFactory->parser(Handler => $self); # spent 36.9ms making 3 calls to XML::SAX::ParserFactory::parser, avg 12.3ms/call |
| 359 | |||||
| 360 | 3 | 4µs | $self->{nocollapse} = 1; | ||
| 361 | 3 | 900ns | my($tree); | ||
| 362 | 3 | 3µs | if($filename) { | ||
| 363 | $tree = $sp->parse_uri($filename); | ||||
| 364 | } | ||||
| 365 | else { | ||||
| 366 | 3 | 6µs | if(ref($string) && ref($string) ne 'SCALAR') { | ||
| 367 | $tree = $sp->parse_file($string); | ||||
| 368 | } | ||||
| 369 | else { | ||||
| 370 | 3 | 12µs | 3 | 3.95ms | $tree = $sp->parse_string($$string); # spent 3.95ms making 3 calls to XML::SAX::Base::parse_string, avg 1.32ms/call |
| 371 | } | ||||
| 372 | } | ||||
| 373 | |||||
| 374 | 3 | 59µs | return($tree); | ||
| 375 | } | ||||
| 376 | |||||
| 377 | |||||
| 378 | ############################################################################## | ||||
| 379 | # Method: build_tree_xml_parser() | ||||
| 380 | # | ||||
| 381 | # This routine will be called if XML::SAX is not installed, or if XML::Parser | ||||
| 382 | # was specifically requested. It takes the same arguments as build_tree() and | ||||
| 383 | # returns the same data structure (XML::Parser 'Tree' style). | ||||
| 384 | # | ||||
| 385 | |||||
| 386 | sub build_tree_xml_parser { | ||||
| 387 | my $self = shift; | ||||
| 388 | my $filename = shift; | ||||
| 389 | my $string = shift; | ||||
| 390 | |||||
| 391 | |||||
| 392 | eval { | ||||
| 393 | local($^W) = 0; # Suppress warning from Expat.pm re File::Spec::load() | ||||
| 394 | require XML::Parser; # We didn't need it until now | ||||
| 395 | }; | ||||
| 396 | if($@) { | ||||
| 397 | croak "XMLin() requires either XML::SAX or XML::Parser"; | ||||
| 398 | } | ||||
| 399 | |||||
| 400 | if($self->{opt}->{nsexpand}) { | ||||
| 401 | carp "'nsexpand' option requires XML::SAX"; | ||||
| 402 | } | ||||
| 403 | |||||
| 404 | my $xp = XML::Parser->new(Style => 'Tree', @{$self->{opt}->{parseropts}}); | ||||
| 405 | my($tree); | ||||
| 406 | if($filename) { | ||||
| 407 | # $tree = $xp->parsefile($filename); # Changed due to prob w/mod_perl | ||||
| 408 | local(*XML_FILE); | ||||
| 409 | open(XML_FILE, '<', $filename) || croak qq($filename - $!); | ||||
| 410 | $tree = $xp->parse(*XML_FILE); | ||||
| 411 | close(XML_FILE); | ||||
| 412 | } | ||||
| 413 | else { | ||||
| 414 | $tree = $xp->parse($$string); | ||||
| 415 | } | ||||
| 416 | |||||
| 417 | return($tree); | ||||
| 418 | } | ||||
| 419 | |||||
| 420 | |||||
| 421 | ############################################################################## | ||||
| 422 | # Method: cache_write_storable() | ||||
| 423 | # | ||||
| 424 | # Wrapper routine for invoking Storable::nstore() to cache a parsed data | ||||
| 425 | # structure. | ||||
| 426 | # | ||||
| 427 | |||||
| 428 | sub cache_write_storable { | ||||
| 429 | my($self, $data, $filename) = @_; | ||||
| 430 | |||||
| 431 | my $cachefile = $self->storable_filename($filename); | ||||
| 432 | |||||
| 433 | require Storable; # We didn't need it until now | ||||
| 434 | |||||
| 435 | if ('VMS' eq $^O) { | ||||
| 436 | Storable::nstore($data, $cachefile); | ||||
| 437 | } | ||||
| 438 | else { | ||||
| 439 | # If the following line fails for you, your Storable.pm is old - upgrade | ||||
| 440 | Storable::lock_nstore($data, $cachefile); | ||||
| 441 | } | ||||
| 442 | |||||
| 443 | } | ||||
| 444 | |||||
| 445 | |||||
| 446 | ############################################################################## | ||||
| 447 | # Method: cache_read_storable() | ||||
| 448 | # | ||||
| 449 | # Wrapper routine for invoking Storable::retrieve() to read a cached parsed | ||||
| 450 | # data structure. Only returns cached data if the cache file exists and is | ||||
| 451 | # newer than the source XML file. | ||||
| 452 | # | ||||
| 453 | |||||
| 454 | sub cache_read_storable { | ||||
| 455 | my($self, $filename) = @_; | ||||
| 456 | |||||
| 457 | my $cachefile = $self->storable_filename($filename); | ||||
| 458 | |||||
| 459 | return unless(-r $cachefile); | ||||
| 460 | return unless((stat($cachefile))[9] > (stat($filename))[9]); | ||||
| 461 | |||||
| 462 | require Storable; # We didn't need it until now | ||||
| 463 | |||||
| 464 | if ('VMS' eq $^O) { | ||||
| 465 | return(Storable::retrieve($cachefile)); | ||||
| 466 | } | ||||
| 467 | else { | ||||
| 468 | return(Storable::lock_retrieve($cachefile)); | ||||
| 469 | } | ||||
| 470 | |||||
| 471 | } | ||||
| 472 | |||||
| 473 | |||||
| 474 | ############################################################################## | ||||
| 475 | # Method: storable_filename() | ||||
| 476 | # | ||||
| 477 | # Translates the supplied source XML filename into a filename for the storable | ||||
| 478 | # cached data. A '.stor' suffix is added after stripping an optional '.xml' | ||||
| 479 | # suffix. | ||||
| 480 | # | ||||
| 481 | |||||
| 482 | sub storable_filename { | ||||
| 483 | my($self, $cachefile) = @_; | ||||
| 484 | |||||
| 485 | $cachefile =~ s{(\.xml)?$}{.stor}; | ||||
| 486 | return $cachefile; | ||||
| 487 | } | ||||
| 488 | |||||
| 489 | |||||
| 490 | ############################################################################## | ||||
| 491 | # Method: cache_write_memshare() | ||||
| 492 | # | ||||
| 493 | # Takes the supplied data structure reference and stores it away in a global | ||||
| 494 | # hash structure. | ||||
| 495 | # | ||||
| 496 | |||||
| 497 | sub cache_write_memshare { | ||||
| 498 | my($self, $data, $filename) = @_; | ||||
| 499 | |||||
| 500 | $MemShareCache{$filename} = [time(), $data]; | ||||
| 501 | } | ||||
| 502 | |||||
| 503 | |||||
| 504 | ############################################################################## | ||||
| 505 | # Method: cache_read_memshare() | ||||
| 506 | # | ||||
| 507 | # Takes a filename and looks in a global hash for a cached parsed version. | ||||
| 508 | # | ||||
| 509 | |||||
| 510 | sub cache_read_memshare { | ||||
| 511 | my($self, $filename) = @_; | ||||
| 512 | |||||
| 513 | return unless($MemShareCache{$filename}); | ||||
| 514 | return unless($MemShareCache{$filename}->[0] > (stat($filename))[9]); | ||||
| 515 | |||||
| 516 | return($MemShareCache{$filename}->[1]); | ||||
| 517 | |||||
| 518 | } | ||||
| 519 | |||||
| 520 | |||||
| 521 | ############################################################################## | ||||
| 522 | # Method: cache_write_memcopy() | ||||
| 523 | # | ||||
| 524 | # Takes the supplied data structure and stores a copy of it in a global hash | ||||
| 525 | # structure. | ||||
| 526 | # | ||||
| 527 | |||||
| 528 | sub cache_write_memcopy { | ||||
| 529 | my($self, $data, $filename) = @_; | ||||
| 530 | |||||
| 531 | require Storable; # We didn't need it until now | ||||
| 532 | |||||
| 533 | $MemCopyCache{$filename} = [time(), Storable::dclone($data)]; | ||||
| 534 | } | ||||
| 535 | |||||
| 536 | |||||
| 537 | ############################################################################## | ||||
| 538 | # Method: cache_read_memcopy() | ||||
| 539 | # | ||||
| 540 | # Takes a filename and looks in a global hash for a cached parsed version. | ||||
| 541 | # Returns a reference to a copy of that data structure. | ||||
| 542 | # | ||||
| 543 | |||||
| 544 | sub cache_read_memcopy { | ||||
| 545 | my($self, $filename) = @_; | ||||
| 546 | |||||
| 547 | return unless($MemCopyCache{$filename}); | ||||
| 548 | return unless($MemCopyCache{$filename}->[0] > (stat($filename))[9]); | ||||
| 549 | |||||
| 550 | return(Storable::dclone($MemCopyCache{$filename}->[1])); | ||||
| 551 | |||||
| 552 | } | ||||
| 553 | |||||
| 554 | |||||
| 555 | ############################################################################## | ||||
| 556 | # Sub/Method: XMLout() | ||||
| 557 | # | ||||
| 558 | # Exported routine for 'unslurping' a data structure out to XML. | ||||
| 559 | # | ||||
| 560 | # Expects a reference to a data structure and an optional list of option | ||||
| 561 | # name => value pairs. | ||||
| 562 | # | ||||
| 563 | |||||
| 564 | sub XMLout { | ||||
| 565 | my $self = &_get_object; # note, @_ is passed implicitly | ||||
| 566 | |||||
| 567 | croak "XMLout() requires at least one argument" unless(@_); | ||||
| 568 | my $ref = shift; | ||||
| 569 | |||||
| 570 | $self->handle_options('out', @_); | ||||
| 571 | |||||
| 572 | |||||
| 573 | # If namespace expansion is set, XML::NamespaceSupport is required | ||||
| 574 | |||||
| 575 | if($self->{opt}->{nsexpand}) { | ||||
| 576 | require XML::NamespaceSupport; | ||||
| 577 | $self->{nsup} = XML::NamespaceSupport->new(); | ||||
| 578 | $self->{ns_prefix} = 'aaa'; | ||||
| 579 | } | ||||
| 580 | |||||
| 581 | |||||
| 582 | # Wrap top level arrayref in a hash | ||||
| 583 | |||||
| 584 | if(UNIVERSAL::isa($ref, 'ARRAY')) { | ||||
| 585 | $ref = { anon => $ref }; | ||||
| 586 | } | ||||
| 587 | |||||
| 588 | |||||
| 589 | # Extract rootname from top level hash if keeproot enabled | ||||
| 590 | |||||
| 591 | if($self->{opt}->{keeproot}) { | ||||
| 592 | my(@keys) = keys(%$ref); | ||||
| 593 | if(@keys == 1) { | ||||
| 594 | $ref = $ref->{$keys[0]}; | ||||
| 595 | $self->{opt}->{rootname} = $keys[0]; | ||||
| 596 | } | ||||
| 597 | } | ||||
| 598 | |||||
| 599 | # Ensure there are no top level attributes if we're not adding root elements | ||||
| 600 | |||||
| 601 | elsif($self->{opt}->{rootname} eq '') { | ||||
| 602 | if(UNIVERSAL::isa($ref, 'HASH')) { | ||||
| 603 | my $refsave = $ref; | ||||
| 604 | $ref = {}; | ||||
| 605 | foreach (keys(%$refsave)) { | ||||
| 606 | if(ref($refsave->{$_})) { | ||||
| 607 | $ref->{$_} = $refsave->{$_}; | ||||
| 608 | } | ||||
| 609 | else { | ||||
| 610 | $ref->{$_} = [ $refsave->{$_} ]; | ||||
| 611 | } | ||||
| 612 | } | ||||
| 613 | } | ||||
| 614 | } | ||||
| 615 | |||||
| 616 | |||||
| 617 | # Encode the hashref and write to file if necessary | ||||
| 618 | |||||
| 619 | $self->{_ancestors} = []; | ||||
| 620 | my $xml = $self->value_to_xml($ref, $self->{opt}->{rootname}, ''); | ||||
| 621 | delete $self->{_ancestors}; | ||||
| 622 | |||||
| 623 | if($self->{opt}->{xmldecl}) { | ||||
| 624 | $xml = $self->{opt}->{xmldecl} . "\n" . $xml; | ||||
| 625 | } | ||||
| 626 | |||||
| 627 | if($self->{opt}->{outputfile}) { | ||||
| 628 | if(ref($self->{opt}->{outputfile})) { | ||||
| 629 | my $fh = $self->{opt}->{outputfile}; | ||||
| 630 | if(UNIVERSAL::isa($fh, 'GLOB') and !UNIVERSAL::can($fh, 'print')) { | ||||
| 631 | eval { require IO::Handle; }; | ||||
| 632 | croak $@ if $@; | ||||
| 633 | } | ||||
| 634 | return($fh->print($xml)); | ||||
| 635 | } | ||||
| 636 | else { | ||||
| 637 | local(*OUT); | ||||
| 638 | open(OUT, '>', "$self->{opt}->{outputfile}") || | ||||
| 639 | croak "open($self->{opt}->{outputfile}): $!"; | ||||
| 640 | binmode(OUT, ':utf8') if($] >= 5.008); | ||||
| 641 | print OUT $xml || croak "print: $!"; | ||||
| 642 | close(OUT); | ||||
| 643 | } | ||||
| 644 | } | ||||
| 645 | elsif($self->{opt}->{handler}) { | ||||
| 646 | require XML::SAX; | ||||
| 647 | my $sp = XML::SAX::ParserFactory->parser( | ||||
| 648 | Handler => $self->{opt}->{handler} | ||||
| 649 | ); | ||||
| 650 | return($sp->parse_string($xml)); | ||||
| 651 | } | ||||
| 652 | else { | ||||
| 653 | return($xml); | ||||
| 654 | } | ||||
| 655 | } | ||||
| 656 | |||||
| 657 | |||||
| 658 | ############################################################################## | ||||
| 659 | # Method: handle_options() | ||||
| 660 | # | ||||
| 661 | # Helper routine for both XMLin() and XMLout(). Both routines handle their | ||||
| 662 | # first argument and assume all other args are options handled by this routine. | ||||
| 663 | # Saves a hash of options in $self->{opt}. | ||||
| 664 | # | ||||
| 665 | # If default options were passed to the constructor, they will be retrieved | ||||
| 666 | # here and merged with options supplied to the method call. | ||||
| 667 | # | ||||
| 668 | # First argument should be the string 'in' or the string 'out'. | ||||
| 669 | # | ||||
| 670 | # Remaining arguments should be name=>value pairs. Sets up default values | ||||
| 671 | # for options not supplied. Unrecognised options are a fatal error. | ||||
| 672 | # | ||||
| 673 | |||||
| 674 | # spent 350µs (347+3) within XML::Simple::handle_options which was called 3 times, avg 117µs/call:
# 3 times (347µs+3µs) by XML::Simple::parse_string at line 268, avg 117µs/call | ||||
| 675 | 3 | 2µs | my $self = shift; | ||
| 676 | 3 | 2µs | my $dirn = shift; | ||
| 677 | |||||
| 678 | |||||
| 679 | # Determine valid options based on context | ||||
| 680 | |||||
| 681 | 3 | 1µs | my %known_opt; | ||
| 682 | 3 | 55µs | if($dirn eq 'in') { | ||
| 683 | @known_opt{@KnownOptIn} = @KnownOptIn; | ||||
| 684 | } | ||||
| 685 | else { | ||||
| 686 | @known_opt{@KnownOptOut} = @KnownOptOut; | ||||
| 687 | } | ||||
| 688 | |||||
| 689 | |||||
| 690 | # Store supplied options in hashref and weed out invalid ones | ||||
| 691 | |||||
| 692 | 3 | 3µs | if(@_ % 2) { | ||
| 693 | croak "Options must be name=>value pairs (odd number supplied)"; | ||||
| 694 | } | ||||
| 695 | 3 | 7µs | my %raw_opt = @_; | ||
| 696 | 3 | 3µs | my $opt = {}; | ||
| 697 | 3 | 4µs | $self->{opt} = $opt; | ||
| 698 | |||||
| 699 | 3 | 15µs | while(my($key, $val) = each %raw_opt) { | ||
| 700 | 3 | 2µs | my $lkey = lc($key); | ||
| 701 | 3 | 18µs | 3 | 3µs | $lkey =~ s/_//g; # spent 3µs making 3 calls to XML::Simple::CORE:subst, avg 1µs/call |
| 702 | 3 | 2µs | croak "Unrecognised option: $key" unless($known_opt{$lkey}); | ||
| 703 | 3 | 5µs | $opt->{$lkey} = $val; | ||
| 704 | } | ||||
| 705 | |||||
| 706 | |||||
| 707 | # Merge in options passed to constructor | ||||
| 708 | |||||
| 709 | 3 | 30µs | foreach (keys(%known_opt)) { | ||
| 710 | 54 | 34µs | unless(exists($opt->{$_})) { | ||
| 711 | if(exists($self->{def_opt}->{$_})) { | ||||
| 712 | $opt->{$_} = $self->{def_opt}->{$_}; | ||||
| 713 | } | ||||
| 714 | } | ||||
| 715 | } | ||||
| 716 | |||||
| 717 | |||||
| 718 | # Set sensible defaults if not supplied | ||||
| 719 | |||||
| 720 | 3 | 4µs | if(exists($opt->{rootname})) { | ||
| 721 | unless(defined($opt->{rootname})) { | ||||
| 722 | $opt->{rootname} = ''; | ||||
| 723 | } | ||||
| 724 | } | ||||
| 725 | else { | ||||
| 726 | 3 | 5µs | $opt->{rootname} = $DefRootName; | ||
| 727 | } | ||||
| 728 | |||||
| 729 | 3 | 3µs | if($opt->{xmldecl} and $opt->{xmldecl} eq '1') { | ||
| 730 | $opt->{xmldecl} = $DefXmlDecl; | ||||
| 731 | } | ||||
| 732 | |||||
| 733 | 3 | 4µs | if(exists($opt->{contentkey})) { | ||
| 734 | if($opt->{contentkey} =~ m{^-(.*)$}) { | ||||
| 735 | $opt->{contentkey} = $1; | ||||
| 736 | $opt->{collapseagain} = 1; | ||||
| 737 | } | ||||
| 738 | } | ||||
| 739 | else { | ||||
| 740 | 3 | 4µs | $opt->{contentkey} = $DefContentKey; | ||
| 741 | } | ||||
| 742 | |||||
| 743 | 3 | 5µs | unless(exists($opt->{normalisespace})) { | ||
| 744 | $opt->{normalisespace} = $opt->{normalizespace}; | ||||
| 745 | } | ||||
| 746 | 3 | 3µs | $opt->{normalisespace} = 0 unless(defined($opt->{normalisespace})); | ||
| 747 | |||||
| 748 | # Cleanups for values assumed to be arrays later | ||||
| 749 | |||||
| 750 | 3 | 2µs | if($opt->{searchpath}) { | ||
| 751 | unless(ref($opt->{searchpath})) { | ||||
| 752 | $opt->{searchpath} = [ $opt->{searchpath} ]; | ||||
| 753 | } | ||||
| 754 | } | ||||
| 755 | else { | ||||
| 756 | 3 | 10µs | $opt->{searchpath} = [ ]; | ||
| 757 | } | ||||
| 758 | |||||
| 759 | 3 | 2µs | if($opt->{cache} and !ref($opt->{cache})) { | ||
| 760 | $opt->{cache} = [ $opt->{cache} ]; | ||||
| 761 | } | ||||
| 762 | 3 | 2µs | if($opt->{cache}) { | ||
| 763 | $_ = lc($_) foreach (@{$opt->{cache}}); | ||||
| 764 | foreach my $scheme (@{$opt->{cache}}) { | ||||
| 765 | my $method = 'cache_read_' . $scheme; | ||||
| 766 | croak "Unsupported caching scheme: $scheme" | ||||
| 767 | unless($self->can($method)); | ||||
| 768 | } | ||||
| 769 | } | ||||
| 770 | |||||
| 771 | 3 | 4µs | if(exists($opt->{parseropts})) { | ||
| 772 | if($^W) { | ||||
| 773 | carp "Warning: " . | ||||
| 774 | "'ParserOpts' is deprecated, contact the author if you need it"; | ||||
| 775 | } | ||||
| 776 | } | ||||
| 777 | else { | ||||
| 778 | 3 | 2µs | $opt->{parseropts} = [ ]; | ||
| 779 | } | ||||
| 780 | |||||
| 781 | |||||
| 782 | # Special cleanup for {forcearray} which could be regex, arrayref or boolean | ||||
| 783 | # or left to default to 0 | ||||
| 784 | |||||
| 785 | 3 | 6µs | if(exists($opt->{forcearray})) { | ||
| 786 | 3 | 5µs | if(ref($opt->{forcearray}) eq 'Regexp') { | ||
| 787 | $opt->{forcearray} = [ $opt->{forcearray} ]; | ||||
| 788 | } | ||||
| 789 | |||||
| 790 | 3 | 4µs | if(ref($opt->{forcearray}) eq 'ARRAY') { | ||
| 791 | 3 | 6µs | my @force_list = @{$opt->{forcearray}}; | ||
| 792 | 3 | 2µs | if(@force_list) { | ||
| 793 | 3 | 2µs | $opt->{forcearray} = {}; | ||
| 794 | 3 | 3µs | foreach my $tag (@force_list) { | ||
| 795 | 3 | 5µs | if(ref($tag) eq 'Regexp') { | ||
| 796 | push @{$opt->{forcearray}->{_regex}}, $tag; | ||||
| 797 | } | ||||
| 798 | else { | ||||
| 799 | 3 | 6µs | $opt->{forcearray}->{$tag} = 1; | ||
| 800 | } | ||||
| 801 | } | ||||
| 802 | } | ||||
| 803 | else { | ||||
| 804 | $opt->{forcearray} = 0; | ||||
| 805 | } | ||||
| 806 | } | ||||
| 807 | else { | ||||
| 808 | $opt->{forcearray} = ( $opt->{forcearray} ? 1 : 0 ); | ||||
| 809 | } | ||||
| 810 | } | ||||
| 811 | else { | ||||
| 812 | if($StrictMode and $dirn eq 'in') { | ||||
| 813 | croak "No value specified for 'ForceArray' option in call to XML$dirn()"; | ||||
| 814 | } | ||||
| 815 | $opt->{forcearray} = 0; | ||||
| 816 | } | ||||
| 817 | |||||
| 818 | |||||
| 819 | # Special cleanup for {keyattr} which could be arrayref or hashref or left | ||||
| 820 | # to default to arrayref | ||||
| 821 | |||||
| 822 | 3 | 3µs | if(exists($opt->{keyattr})) { | ||
| 823 | if(ref($opt->{keyattr})) { | ||||
| 824 | if(ref($opt->{keyattr}) eq 'HASH') { | ||||
| 825 | |||||
| 826 | # Make a copy so we can mess with it | ||||
| 827 | |||||
| 828 | $opt->{keyattr} = { %{$opt->{keyattr}} }; | ||||
| 829 | |||||
| 830 | |||||
| 831 | # Convert keyattr => { elem => '+attr' } | ||||
| 832 | # to keyattr => { elem => [ 'attr', '+' ] } | ||||
| 833 | |||||
| 834 | foreach my $el (keys(%{$opt->{keyattr}})) { | ||||
| 835 | if($opt->{keyattr}->{$el} =~ /^(\+|-)?(.*)$/) { | ||||
| 836 | $opt->{keyattr}->{$el} = [ $2, ($1 ? $1 : '') ]; | ||||
| 837 | if($StrictMode and $dirn eq 'in') { | ||||
| 838 | next if($opt->{forcearray} == 1); | ||||
| 839 | next if(ref($opt->{forcearray}) eq 'HASH' | ||||
| 840 | and $opt->{forcearray}->{$el}); | ||||
| 841 | croak "<$el> set in KeyAttr but not in ForceArray"; | ||||
| 842 | } | ||||
| 843 | } | ||||
| 844 | else { | ||||
| 845 | delete($opt->{keyattr}->{$el}); # Never reached (famous last words?) | ||||
| 846 | } | ||||
| 847 | } | ||||
| 848 | } | ||||
| 849 | else { | ||||
| 850 | if(@{$opt->{keyattr}} == 0) { | ||||
| 851 | delete($opt->{keyattr}); | ||||
| 852 | } | ||||
| 853 | } | ||||
| 854 | } | ||||
| 855 | else { | ||||
| 856 | $opt->{keyattr} = [ $opt->{keyattr} ]; | ||||
| 857 | } | ||||
| 858 | } | ||||
| 859 | else { | ||||
| 860 | 3 | 2µs | if($StrictMode) { | ||
| 861 | croak "No value specified for 'KeyAttr' option in call to XML$dirn()"; | ||||
| 862 | } | ||||
| 863 | 3 | 10µs | $opt->{keyattr} = [ @DefKeyAttr ]; | ||
| 864 | } | ||||
| 865 | |||||
| 866 | |||||
| 867 | # Special cleanup for {valueattr} which could be arrayref or hashref | ||||
| 868 | |||||
| 869 | 3 | 3µs | if(exists($opt->{valueattr})) { | ||
| 870 | if(ref($opt->{valueattr}) eq 'ARRAY') { | ||||
| 871 | $opt->{valueattrlist} = {}; | ||||
| 872 | $opt->{valueattrlist}->{$_} = 1 foreach(@{ delete $opt->{valueattr} }); | ||||
| 873 | } | ||||
| 874 | } | ||||
| 875 | |||||
| 876 | # make sure there's nothing weird in {grouptags} | ||||
| 877 | |||||
| 878 | 3 | 2µs | if($opt->{grouptags}) { | ||
| 879 | croak "Illegal value for 'GroupTags' option - expected a hashref" | ||||
| 880 | unless UNIVERSAL::isa($opt->{grouptags}, 'HASH'); | ||||
| 881 | |||||
| 882 | while(my($key, $val) = each %{$opt->{grouptags}}) { | ||||
| 883 | next if $key ne $val; | ||||
| 884 | croak "Bad value in GroupTags: '$key' => '$val'"; | ||||
| 885 | } | ||||
| 886 | } | ||||
| 887 | |||||
| 888 | |||||
| 889 | # Check the {variables} option is valid and initialise variables hash | ||||
| 890 | |||||
| 891 | 3 | 2µs | if($opt->{variables} and !UNIVERSAL::isa($opt->{variables}, 'HASH')) { | ||
| 892 | croak "Illegal value for 'Variables' option - expected a hashref"; | ||||
| 893 | } | ||||
| 894 | |||||
| 895 | 3 | 34µs | if($opt->{variables}) { | ||
| 896 | $self->{_var_values} = { %{$opt->{variables}} }; | ||||
| 897 | } | ||||
| 898 | elsif($opt->{varattr}) { | ||||
| 899 | $self->{_var_values} = {}; | ||||
| 900 | } | ||||
| 901 | |||||
| 902 | } | ||||
| 903 | |||||
| 904 | |||||
| 905 | ############################################################################## | ||||
| 906 | # Method: find_xml_file() | ||||
| 907 | # | ||||
| 908 | # Helper routine for XMLin(). | ||||
| 909 | # Takes a filename, and a list of directories, attempts to locate the file in | ||||
| 910 | # the directories listed. | ||||
| 911 | # Returns a full pathname on success; croaks on failure. | ||||
| 912 | # | ||||
| 913 | |||||
| 914 | sub find_xml_file { | ||||
| 915 | my $self = shift; | ||||
| 916 | my $file = shift; | ||||
| 917 | my @search_path = @_; | ||||
| 918 | |||||
| 919 | |||||
| 920 | require File::Basename; | ||||
| 921 | require File::Spec; | ||||
| 922 | |||||
| 923 | my($filename, $filedir) = File::Basename::fileparse($file); | ||||
| 924 | |||||
| 925 | if($filename ne $file) { # Ignore searchpath if dir component | ||||
| 926 | return($file) if(-e $file); | ||||
| 927 | } | ||||
| 928 | else { | ||||
| 929 | my($path); | ||||
| 930 | foreach $path (@search_path) { | ||||
| 931 | my $fullpath = File::Spec->catfile($path, $file); | ||||
| 932 | return($fullpath) if(-e $fullpath); | ||||
| 933 | } | ||||
| 934 | } | ||||
| 935 | |||||
| 936 | # If user did not supply a search path, default to current directory | ||||
| 937 | |||||
| 938 | if(!@search_path) { | ||||
| 939 | return($file) if(-e $file); | ||||
| 940 | croak "File does not exist: $file"; | ||||
| 941 | } | ||||
| 942 | |||||
| 943 | croak "Could not find $file in ", join(':', @search_path); | ||||
| 944 | } | ||||
| 945 | |||||
| 946 | |||||
| 947 | ############################################################################## | ||||
| 948 | # Method: collapse() | ||||
| 949 | # | ||||
| 950 | # Helper routine for XMLin(). This routine really comprises the 'smarts' (or | ||||
| 951 | # value add) of this module. | ||||
| 952 | # | ||||
| 953 | # Takes the parse tree that XML::Parser produced from the supplied XML and | ||||
| 954 | # recurses through it 'collapsing' unnecessary levels of indirection (nested | ||||
| 955 | # arrays etc) to produce a data structure that is easier to work with. | ||||
| 956 | # | ||||
| 957 | # Elements in the original parser tree are represented as an element name | ||||
| 958 | # followed by an arrayref. The first element of the array is a hashref | ||||
| 959 | # containing the attributes. The rest of the array contains a list of any | ||||
| 960 | # nested elements as name+arrayref pairs: | ||||
| 961 | # | ||||
| 962 | # <element name>, [ { <attribute hashref> }, <element name>, [ ... ], ... ] | ||||
| 963 | # | ||||
| 964 | # The special element name '0' (zero) flags text content. | ||||
| 965 | # | ||||
| 966 | # This routine cuts down the noise by discarding any text content consisting of | ||||
| 967 | # only whitespace and then moves the nested elements into the attribute hash | ||||
| 968 | # using the name of the nested element as the hash key and the collapsed | ||||
| 969 | # version of the nested element as the value. Multiple nested elements with | ||||
| 970 | # the same name will initially be represented as an arrayref, but this may be | ||||
| 971 | # 'folded' into a hashref depending on the value of the keyattr option. | ||||
| 972 | # | ||||
| 973 | |||||
| 974 | sub collapse { | ||||
| 975 | 35 | 8µs | my $self = shift; | ||
| 976 | |||||
| 977 | |||||
| 978 | # Start with the hash of attributes | ||||
| 979 | |||||
| 980 | 35 | 6µs | my $attr = shift; | ||
| 981 | 35 | 26µs | if($self->{opt}->{noattr}) { # Discard if 'noattr' set | ||
| 982 | $attr = {}; | ||||
| 983 | } | ||||
| 984 | elsif($self->{opt}->{normalisespace} == 2) { | ||||
| 985 | while(my($key, $value) = each %$attr) { | ||||
| 986 | $attr->{$key} = $self->normalise_space($value) | ||||
| 987 | } | ||||
| 988 | } | ||||
| 989 | |||||
| 990 | |||||
| 991 | # Do variable substitutions | ||||
| 992 | |||||
| 993 | 35 | 10µs | if(my $var = $self->{_var_values}) { | ||
| 994 | while(my($key, $val) = each(%$attr)) { | ||||
| 995 | $val =~ s{\$\{([\w.]+)\}}{ $self->get_var($1) }ge; | ||||
| 996 | $attr->{$key} = $val; | ||||
| 997 | } | ||||
| 998 | } | ||||
| 999 | |||||
| 1000 | |||||
| 1001 | # Roll up 'value' attributes (but only if no nested elements) | ||||
| 1002 | |||||
| 1003 | 35 | 10µs | if(!@_ and keys %$attr == 1) { | ||
| 1004 | my($k) = keys %$attr; | ||||
| 1005 | if($self->{opt}->{valueattrlist} and $self->{opt}->{valueattrlist}->{$k}) { | ||||
| 1006 | return $attr->{$k}; | ||||
| 1007 | } | ||||
| 1008 | } | ||||
| 1009 | |||||
| 1010 | |||||
| 1011 | # Add any nested elements | ||||
| 1012 | |||||
| 1013 | 35 | 9µs | my($key, $val); | ||
| 1014 | 35 | 10µs | while(@_) { | ||
| 1015 | 60 | 13µs | $key = shift; | ||
| 1016 | 60 | 14µs | $val = shift; | ||
| 1017 | |||||
| 1018 | 60 | 27µs | if(ref($val)) { | ||
| 1019 | 32 | 59µs | 32 | 0s | $val = $self->collapse(@$val); # spent 13.6ms making 32 calls to XML::Simple::collapse, avg 425µs/call, recursion: max depth 2, time 13.6ms |
| 1020 | 32 | 10µs | next if(!defined($val) and $self->{opt}->{suppressempty}); | ||
| 1021 | } | ||||
| 1022 | elsif($key eq '0') { | ||||
| 1023 | 31 | 129µs | 29 | 7.74ms | next if($val =~ m{^\s*$}s); # Skip all whitespace content # spent 6.30ms making 28 calls to XML::Simple::CORE:match, avg 225µs/call
# spent 1.44ms making 1 call to utf8::AUTOLOAD |
| 1024 | |||||
| 1025 | 28 | 14µs | $val = $self->normalise_space($val) | ||
| 1026 | if($self->{opt}->{normalisespace} == 2); | ||||
| 1027 | |||||
| 1028 | # do variable substitutions | ||||
| 1029 | |||||
| 1030 | 28 | 9µs | if(my $var = $self->{_var_values}) { | ||
| 1031 | $val =~ s{\$\{(\w+)\}}{ $self->get_var($1) }ge; | ||||
| 1032 | } | ||||
| 1033 | |||||
| 1034 | |||||
| 1035 | # look for variable definitions | ||||
| 1036 | |||||
| 1037 | 28 | 8µs | if(my $var = $self->{opt}->{varattr}) { | ||
| 1038 | if(exists $attr->{$var}) { | ||||
| 1039 | $self->set_var($attr->{$var}, $val); | ||||
| 1040 | } | ||||
| 1041 | } | ||||
| 1042 | |||||
| 1043 | |||||
| 1044 | # Collapse text content in element with no attributes to a string | ||||
| 1045 | |||||
| 1046 | 28 | 100µs | if(!%$attr and !@_) { | ||
| 1047 | return($self->{opt}->{forcecontent} ? | ||||
| 1048 | { $self->{opt}->{contentkey} => $val } : $val | ||||
| 1049 | ); | ||||
| 1050 | } | ||||
| 1051 | $key = $self->{opt}->{contentkey}; | ||||
| 1052 | } | ||||
| 1053 | |||||
| 1054 | |||||
| 1055 | # Combine duplicate attributes into arrayref if required | ||||
| 1056 | |||||
| 1057 | 32 | 151µs | 32 | 31µs | if(exists($attr->{$key})) { # spent 31µs making 32 calls to UNIVERSAL::isa, avg 962ns/call |
| 1058 | if(UNIVERSAL::isa($attr->{$key}, 'ARRAY')) { | ||||
| 1059 | push(@{$attr->{$key}}, $val); | ||||
| 1060 | } | ||||
| 1061 | else { | ||||
| 1062 | 1 | 2µs | $attr->{$key} = [ $attr->{$key}, $val ]; | ||
| 1063 | } | ||||
| 1064 | } | ||||
| 1065 | elsif(defined($val) and UNIVERSAL::isa($val, 'ARRAY')) { | ||||
| 1066 | $attr->{$key} = [ $val ]; | ||||
| 1067 | } | ||||
| 1068 | else { | ||||
| 1069 | 11 | 42µs | if( $key ne $self->{opt}->{contentkey} | ||
| 1070 | and ( | ||||
| 1071 | ($self->{opt}->{forcearray} == 1) | ||||
| 1072 | or ( | ||||
| 1073 | (ref($self->{opt}->{forcearray}) eq 'HASH') | ||||
| 1074 | and ( | ||||
| 1075 | $self->{opt}->{forcearray}->{$key} | ||||
| 1076 | or (grep $key =~ $_, @{$self->{opt}->{forcearray}->{_regex}}) | ||||
| 1077 | ) | ||||
| 1078 | ) | ||||
| 1079 | ) | ||||
| 1080 | ) { | ||||
| 1081 | $attr->{$key} = [ $val ]; | ||||
| 1082 | } | ||||
| 1083 | else { | ||||
| 1084 | 11 | 16µs | $attr->{$key} = $val; | ||
| 1085 | } | ||||
| 1086 | } | ||||
| 1087 | |||||
| 1088 | } | ||||
| 1089 | |||||
| 1090 | |||||
| 1091 | # Turn arrayrefs into hashrefs if key fields present | ||||
| 1092 | |||||
| 1093 | 7 | 32µs | if($self->{opt}->{keyattr}) { | ||
| 1094 | while(($key,$val) = each %$attr) { | ||||
| 1095 | 14 | 74µs | 15 | 45µs | if(defined($val) and UNIVERSAL::isa($val, 'ARRAY')) { # spent 31µs making 1 call to XML::Simple::array_to_hash
# spent 14µs making 14 calls to UNIVERSAL::isa, avg 1µs/call |
| 1096 | $attr->{$key} = $self->array_to_hash($key, $val); | ||||
| 1097 | } | ||||
| 1098 | } | ||||
| 1099 | } | ||||
| 1100 | |||||
| 1101 | |||||
| 1102 | # disintermediate grouped tags | ||||
| 1103 | |||||
| 1104 | 7 | 5µs | if($self->{opt}->{grouptags}) { | ||
| 1105 | while(my($key, $val) = each(%$attr)) { | ||||
| 1106 | next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1)); | ||||
| 1107 | next unless(exists($self->{opt}->{grouptags}->{$key})); | ||||
| 1108 | |||||
| 1109 | my($child_key, $child_val) = %$val; | ||||
| 1110 | |||||
| 1111 | if($self->{opt}->{grouptags}->{$key} eq $child_key) { | ||||
| 1112 | $attr->{$key}= $child_val; | ||||
| 1113 | } | ||||
| 1114 | } | ||||
| 1115 | } | ||||
| 1116 | |||||
| 1117 | |||||
| 1118 | # Fold hashes containing a single anonymous array up into just the array | ||||
| 1119 | |||||
| 1120 | 7 | 5µs | my $count = scalar keys %$attr; | ||
| 1121 | 7 | 4µs | if($count == 1 | ||
| 1122 | and exists $attr->{anon} | ||||
| 1123 | and UNIVERSAL::isa($attr->{anon}, 'ARRAY') | ||||
| 1124 | ) { | ||||
| 1125 | return($attr->{anon}); | ||||
| 1126 | } | ||||
| 1127 | |||||
| 1128 | |||||
| 1129 | # Do the right thing if hash is empty, otherwise just return it | ||||
| 1130 | |||||
| 1131 | 7 | 14µs | if(!%$attr and exists($self->{opt}->{suppressempty})) { | ||
| 1132 | if(defined($self->{opt}->{suppressempty}) and | ||||
| 1133 | $self->{opt}->{suppressempty} eq '') { | ||||
| 1134 | return(''); | ||||
| 1135 | } | ||||
| 1136 | return(undef); | ||||
| 1137 | } | ||||
| 1138 | |||||
| 1139 | |||||
| 1140 | # Roll up named elements with named nested 'value' attributes | ||||
| 1141 | |||||
| 1142 | 7 | 5µs | if($self->{opt}->{valueattr}) { | ||
| 1143 | while(my($key, $val) = each(%$attr)) { | ||||
| 1144 | next unless($self->{opt}->{valueattr}->{$key}); | ||||
| 1145 | next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1)); | ||||
| 1146 | my($k) = keys %$val; | ||||
| 1147 | next unless($k eq $self->{opt}->{valueattr}->{$key}); | ||||
| 1148 | $attr->{$key} = $val->{$k}; | ||||
| 1149 | } | ||||
| 1150 | } | ||||
| 1151 | |||||
| 1152 | 7 | 21µs | return($attr) | ||
| 1153 | |||||
| 1154 | } | ||||
| 1155 | |||||
| 1156 | |||||
| 1157 | ############################################################################## | ||||
| 1158 | # Method: set_var() | ||||
| 1159 | # | ||||
| 1160 | # Called when a variable definition is encountered in the XML. (A variable | ||||
| 1161 | # definition looks like <element attrname="name">value</element> where attrname | ||||
| 1162 | # matches the varattr setting). | ||||
| 1163 | # | ||||
| 1164 | |||||
| 1165 | sub set_var { | ||||
| 1166 | my($self, $name, $value) = @_; | ||||
| 1167 | |||||
| 1168 | $self->{_var_values}->{$name} = $value; | ||||
| 1169 | } | ||||
| 1170 | |||||
| 1171 | |||||
| 1172 | ############################################################################## | ||||
| 1173 | # Method: get_var() | ||||
| 1174 | # | ||||
| 1175 | # Called during variable substitution to get the value for the named variable. | ||||
| 1176 | # | ||||
| 1177 | |||||
| 1178 | sub get_var { | ||||
| 1179 | my($self, $name) = @_; | ||||
| 1180 | |||||
| 1181 | my $value = $self->{_var_values}->{$name}; | ||||
| 1182 | return $value if(defined($value)); | ||||
| 1183 | |||||
| 1184 | return '${' . $name . '}'; | ||||
| 1185 | } | ||||
| 1186 | |||||
| 1187 | |||||
| 1188 | ############################################################################## | ||||
| 1189 | # Method: normalise_space() | ||||
| 1190 | # | ||||
| 1191 | # Strips leading and trailing whitespace and collapses sequences of whitespace | ||||
| 1192 | # characters to a single space. | ||||
| 1193 | # | ||||
| 1194 | |||||
| 1195 | sub normalise_space { | ||||
| 1196 | my($self, $text) = @_; | ||||
| 1197 | |||||
| 1198 | $text =~ s/^\s+//s; | ||||
| 1199 | $text =~ s/\s+$//s; | ||||
| 1200 | $text =~ s/\s\s+/ /sg; | ||||
| 1201 | |||||
| 1202 | return $text; | ||||
| 1203 | } | ||||
| 1204 | |||||
| 1205 | |||||
| 1206 | ############################################################################## | ||||
| 1207 | # Method: array_to_hash() | ||||
| 1208 | # | ||||
| 1209 | # Helper routine for collapse(). | ||||
| 1210 | # Attempts to 'fold' an array of hashes into an hash of hashes. Returns a | ||||
| 1211 | # reference to the hash on success or the original array if folding is | ||||
| 1212 | # not possible. Behaviour is controlled by 'keyattr' option. | ||||
| 1213 | # | ||||
| 1214 | |||||
| 1215 | # spent 31µs (26+4) within XML::Simple::array_to_hash which was called
# once (26µs+4µs) by XML::Simple::collapse at line 1095 | ||||
| 1216 | 1 | 600ns | my $self = shift; | ||
| 1217 | 1 | 700ns | my $name = shift; | ||
| 1218 | 1 | 400ns | my $arrayref = shift; | ||
| 1219 | |||||
| 1220 | 1 | 2µs | 1 | 3µs | my $hashref = $self->new_hashref; # spent 3µs making 1 call to XML::Simple::new_hashref |
| 1221 | |||||
| 1222 | 1 | 400ns | my($i, $key, $val, $flag); | ||
| 1223 | |||||
| 1224 | |||||
| 1225 | # Handle keyattr => { .... } | ||||
| 1226 | |||||
| 1227 | 1 | 2µs | if(ref($self->{opt}->{keyattr}) eq 'HASH') { | ||
| 1228 | return($arrayref) unless(exists($self->{opt}->{keyattr}->{$name})); | ||||
| 1229 | ($key, $flag) = @{$self->{opt}->{keyattr}->{$name}}; | ||||
| 1230 | for($i = 0; $i < @$arrayref; $i++) { | ||||
| 1231 | if(UNIVERSAL::isa($arrayref->[$i], 'HASH') and | ||||
| 1232 | exists($arrayref->[$i]->{$key}) | ||||
| 1233 | ) { | ||||
| 1234 | $val = $arrayref->[$i]->{$key}; | ||||
| 1235 | if(ref($val)) { | ||||
| 1236 | $self->die_or_warn("<$name> element has non-scalar '$key' key attribute"); | ||||
| 1237 | return($arrayref); | ||||
| 1238 | } | ||||
| 1239 | $val = $self->normalise_space($val) | ||||
| 1240 | if($self->{opt}->{normalisespace} == 1); | ||||
| 1241 | $self->die_or_warn("<$name> element has non-unique value in '$key' key attribute: $val") | ||||
| 1242 | if(exists($hashref->{$val})); | ||||
| 1243 | $hashref->{$val} = { %{$arrayref->[$i]} }; | ||||
| 1244 | $hashref->{$val}->{"-$key"} = $hashref->{$val}->{$key} if($flag eq '-'); | ||||
| 1245 | delete $hashref->{$val}->{$key} unless($flag eq '+'); | ||||
| 1246 | } | ||||
| 1247 | else { | ||||
| 1248 | $self->die_or_warn("<$name> element has no '$key' key attribute"); | ||||
| 1249 | return($arrayref); | ||||
| 1250 | } | ||||
| 1251 | } | ||||
| 1252 | } | ||||
| 1253 | |||||
| 1254 | |||||
| 1255 | # Or assume keyattr => [ .... ] | ||||
| 1256 | |||||
| 1257 | else { | ||||
| 1258 | my $default_keys = | ||||
| 1259 | 1 | 5µs | join(',', @DefKeyAttr) eq join(',', @{$self->{opt}->{keyattr}}); | ||
| 1260 | |||||
| 1261 | 1 | 1µs | ELEMENT: for($i = 0; $i < @$arrayref; $i++) { | ||
| 1262 | 1 | 11µs | 1 | 1µs | return($arrayref) unless(UNIVERSAL::isa($arrayref->[$i], 'HASH')); # spent 1µs making 1 call to UNIVERSAL::isa |
| 1263 | |||||
| 1264 | foreach $key (@{$self->{opt}->{keyattr}}) { | ||||
| 1265 | if(defined($arrayref->[$i]->{$key})) { | ||||
| 1266 | $val = $arrayref->[$i]->{$key}; | ||||
| 1267 | if(ref($val)) { | ||||
| 1268 | $self->die_or_warn("<$name> element has non-scalar '$key' key attribute") | ||||
| 1269 | if not $default_keys; | ||||
| 1270 | return($arrayref); | ||||
| 1271 | } | ||||
| 1272 | $val = $self->normalise_space($val) | ||||
| 1273 | if($self->{opt}->{normalisespace} == 1); | ||||
| 1274 | $self->die_or_warn("<$name> element has non-unique value in '$key' key attribute: $val") | ||||
| 1275 | if(exists($hashref->{$val})); | ||||
| 1276 | $hashref->{$val} = { %{$arrayref->[$i]} }; | ||||
| 1277 | delete $hashref->{$val}->{$key}; | ||||
| 1278 | next ELEMENT; | ||||
| 1279 | } | ||||
| 1280 | } | ||||
| 1281 | |||||
| 1282 | return($arrayref); # No keyfield matched | ||||
| 1283 | 1 | 500ns | } | ||
| 1284 | } | ||||
| 1285 | |||||
| 1286 | # collapse any hashes which now only have a 'content' key | ||||
| 1287 | |||||
| 1288 | if($self->{opt}->{collapseagain}) { | ||||
| 1289 | $hashref = $self->collapse_content($hashref); | ||||
| 1290 | } | ||||
| 1291 | |||||
| 1292 | return($hashref); | ||||
| 1293 | } | ||||
| 1294 | |||||
| 1295 | |||||
| 1296 | ############################################################################## | ||||
| 1297 | # Method: die_or_warn() | ||||
| 1298 | # | ||||
| 1299 | # Takes a diagnostic message and does one of three things: | ||||
| 1300 | # 1. dies if strict mode is enabled | ||||
| 1301 | # 2. warns if warnings are enabled but strict mode is not | ||||
| 1302 | # 3. ignores message and resturns silently if neither strict mode nor warnings | ||||
| 1303 | # are enabled | ||||
| 1304 | # | ||||
| 1305 | |||||
| 1306 | sub die_or_warn { | ||||
| 1307 | my $self = shift; | ||||
| 1308 | my $msg = shift; | ||||
| 1309 | |||||
| 1310 | croak $msg if($StrictMode); | ||||
| 1311 | carp "Warning: $msg" if($^W); | ||||
| 1312 | } | ||||
| 1313 | |||||
| 1314 | |||||
| 1315 | ############################################################################## | ||||
| 1316 | # Method: new_hashref() | ||||
| 1317 | # | ||||
| 1318 | # This is a hook routine for overriding in a sub-class. Some people believe | ||||
| 1319 | # that using Tie::IxHash here will solve order-loss problems. | ||||
| 1320 | # | ||||
| 1321 | |||||
| 1322 | # spent 3µs within XML::Simple::new_hashref which was called
# once (3µs+0s) by XML::Simple::array_to_hash at line 1220 | ||||
| 1323 | 1 | 800ns | my $self = shift; | ||
| 1324 | |||||
| 1325 | 1 | 6µs | return { @_ }; | ||
| 1326 | } | ||||
| 1327 | |||||
| 1328 | |||||
| 1329 | ############################################################################## | ||||
| 1330 | # Method: collapse_content() | ||||
| 1331 | # | ||||
| 1332 | # Helper routine for array_to_hash | ||||
| 1333 | # | ||||
| 1334 | # Arguments expected are: | ||||
| 1335 | # - an XML::Simple object | ||||
| 1336 | # - a hasref | ||||
| 1337 | # the hashref is a former array, turned into a hash by array_to_hash because | ||||
| 1338 | # of the presence of key attributes | ||||
| 1339 | # at this point collapse_content avoids over-complicated structures like | ||||
| 1340 | # dir => { libexecdir => { content => '$exec_prefix/libexec' }, | ||||
| 1341 | # localstatedir => { content => '$prefix' }, | ||||
| 1342 | # } | ||||
| 1343 | # into | ||||
| 1344 | # dir => { libexecdir => '$exec_prefix/libexec', | ||||
| 1345 | # localstatedir => '$prefix', | ||||
| 1346 | # } | ||||
| 1347 | |||||
| 1348 | sub collapse_content { | ||||
| 1349 | my $self = shift; | ||||
| 1350 | my $hashref = shift; | ||||
| 1351 | |||||
| 1352 | my $contentkey = $self->{opt}->{contentkey}; | ||||
| 1353 | |||||
| 1354 | # first go through the values,checking that they are fit to collapse | ||||
| 1355 | foreach my $val (values %$hashref) { | ||||
| 1356 | return $hashref unless ( (ref($val) eq 'HASH') | ||||
| 1357 | and (keys %$val == 1) | ||||
| 1358 | and (exists $val->{$contentkey}) | ||||
| 1359 | ); | ||||
| 1360 | } | ||||
| 1361 | |||||
| 1362 | # now collapse them | ||||
| 1363 | foreach my $key (keys %$hashref) { | ||||
| 1364 | $hashref->{$key}= $hashref->{$key}->{$contentkey}; | ||||
| 1365 | } | ||||
| 1366 | |||||
| 1367 | return $hashref; | ||||
| 1368 | } | ||||
| 1369 | |||||
| 1370 | |||||
| 1371 | ############################################################################## | ||||
| 1372 | # Method: value_to_xml() | ||||
| 1373 | # | ||||
| 1374 | # Helper routine for XMLout() - recurses through a data structure building up | ||||
| 1375 | # and returning an XML representation of that structure as a string. | ||||
| 1376 | # | ||||
| 1377 | # Arguments expected are: | ||||
| 1378 | # - the data structure to be encoded (usually a reference) | ||||
| 1379 | # - the XML tag name to use for this item | ||||
| 1380 | # - a string of spaces for use as the current indent level | ||||
| 1381 | # | ||||
| 1382 | |||||
| 1383 | sub value_to_xml { | ||||
| 1384 | my $self = shift;; | ||||
| 1385 | |||||
| 1386 | |||||
| 1387 | # Grab the other arguments | ||||
| 1388 | |||||
| 1389 | my($ref, $name, $indent) = @_; | ||||
| 1390 | |||||
| 1391 | my $named = (defined($name) and $name ne '' ? 1 : 0); | ||||
| 1392 | |||||
| 1393 | my $nl = "\n"; | ||||
| 1394 | |||||
| 1395 | my $is_root = $indent eq '' ? 1 : 0; # Warning, dirty hack! | ||||
| 1396 | if($self->{opt}->{noindent}) { | ||||
| 1397 | $indent = ''; | ||||
| 1398 | $nl = ''; | ||||
| 1399 | } | ||||
| 1400 | |||||
| 1401 | |||||
| 1402 | # Convert to XML | ||||
| 1403 | |||||
| 1404 | if(ref($ref)) { | ||||
| 1405 | croak "circular data structures not supported" | ||||
| 1406 | if(grep($_ == $ref, @{$self->{_ancestors}})); | ||||
| 1407 | push @{$self->{_ancestors}}, $ref; | ||||
| 1408 | } | ||||
| 1409 | else { | ||||
| 1410 | if($named) { | ||||
| 1411 | return(join('', | ||||
| 1412 | $indent, '<', $name, '>', | ||||
| 1413 | ($self->{opt}->{noescape} ? $ref : $self->escape_value($ref)), | ||||
| 1414 | '</', $name, ">", $nl | ||||
| 1415 | )); | ||||
| 1416 | } | ||||
| 1417 | else { | ||||
| 1418 | return("$ref$nl"); | ||||
| 1419 | } | ||||
| 1420 | } | ||||
| 1421 | |||||
| 1422 | |||||
| 1423 | # Unfold hash to array if possible | ||||
| 1424 | |||||
| 1425 | if(UNIVERSAL::isa($ref, 'HASH') # It is a hash | ||||
| 1426 | and keys %$ref # and it's not empty | ||||
| 1427 | and $self->{opt}->{keyattr} # and folding is enabled | ||||
| 1428 | and !$is_root # and its not the root element | ||||
| 1429 | ) { | ||||
| 1430 | $ref = $self->hash_to_array($name, $ref); | ||||
| 1431 | } | ||||
| 1432 | |||||
| 1433 | |||||
| 1434 | my @result = (); | ||||
| 1435 | my($key, $value); | ||||
| 1436 | |||||
| 1437 | |||||
| 1438 | # Handle hashrefs | ||||
| 1439 | |||||
| 1440 | if(UNIVERSAL::isa($ref, 'HASH')) { | ||||
| 1441 | |||||
| 1442 | # Reintermediate grouped values if applicable | ||||
| 1443 | |||||
| 1444 | if($self->{opt}->{grouptags}) { | ||||
| 1445 | $ref = $self->copy_hash($ref); | ||||
| 1446 | while(my($key, $val) = each %$ref) { | ||||
| 1447 | if($self->{opt}->{grouptags}->{$key}) { | ||||
| 1448 | $ref->{$key} = { $self->{opt}->{grouptags}->{$key} => $val }; | ||||
| 1449 | } | ||||
| 1450 | } | ||||
| 1451 | } | ||||
| 1452 | |||||
| 1453 | |||||
| 1454 | # Scan for namespace declaration attributes | ||||
| 1455 | |||||
| 1456 | my $nsdecls = ''; | ||||
| 1457 | my $default_ns_uri; | ||||
| 1458 | if($self->{nsup}) { | ||||
| 1459 | $ref = $self->copy_hash($ref); | ||||
| 1460 | $self->{nsup}->push_context(); | ||||
| 1461 | |||||
| 1462 | # Look for default namespace declaration first | ||||
| 1463 | |||||
| 1464 | if(exists($ref->{xmlns})) { | ||||
| 1465 | $self->{nsup}->declare_prefix('', $ref->{xmlns}); | ||||
| 1466 | $nsdecls .= qq( xmlns="$ref->{xmlns}"); | ||||
| 1467 | delete($ref->{xmlns}); | ||||
| 1468 | } | ||||
| 1469 | $default_ns_uri = $self->{nsup}->get_uri(''); | ||||
| 1470 | |||||
| 1471 | |||||
| 1472 | # Then check all the other keys | ||||
| 1473 | |||||
| 1474 | foreach my $qname (keys(%$ref)) { | ||||
| 1475 | my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname); | ||||
| 1476 | if($uri) { | ||||
| 1477 | if($uri eq $xmlns_ns) { | ||||
| 1478 | $self->{nsup}->declare_prefix($lname, $ref->{$qname}); | ||||
| 1479 | $nsdecls .= qq( xmlns:$lname="$ref->{$qname}"); | ||||
| 1480 | delete($ref->{$qname}); | ||||
| 1481 | } | ||||
| 1482 | } | ||||
| 1483 | } | ||||
| 1484 | |||||
| 1485 | # Translate any remaining Clarkian names | ||||
| 1486 | |||||
| 1487 | foreach my $qname (keys(%$ref)) { | ||||
| 1488 | my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname); | ||||
| 1489 | if($uri) { | ||||
| 1490 | if($default_ns_uri and $uri eq $default_ns_uri) { | ||||
| 1491 | $ref->{$lname} = $ref->{$qname}; | ||||
| 1492 | delete($ref->{$qname}); | ||||
| 1493 | } | ||||
| 1494 | else { | ||||
| 1495 | my $prefix = $self->{nsup}->get_prefix($uri); | ||||
| 1496 | unless($prefix) { | ||||
| 1497 | # $self->{nsup}->declare_prefix(undef, $uri); | ||||
| 1498 | # $prefix = $self->{nsup}->get_prefix($uri); | ||||
| 1499 | $prefix = $self->{ns_prefix}++; | ||||
| 1500 | $self->{nsup}->declare_prefix($prefix, $uri); | ||||
| 1501 | $nsdecls .= qq( xmlns:$prefix="$uri"); | ||||
| 1502 | } | ||||
| 1503 | $ref->{"$prefix:$lname"} = $ref->{$qname}; | ||||
| 1504 | delete($ref->{$qname}); | ||||
| 1505 | } | ||||
| 1506 | } | ||||
| 1507 | } | ||||
| 1508 | } | ||||
| 1509 | |||||
| 1510 | |||||
| 1511 | my @nested = (); | ||||
| 1512 | my $text_content = undef; | ||||
| 1513 | if($named) { | ||||
| 1514 | push @result, $indent, '<', $name, $nsdecls; | ||||
| 1515 | } | ||||
| 1516 | |||||
| 1517 | if(keys %$ref) { | ||||
| 1518 | my $first_arg = 1; | ||||
| 1519 | foreach my $key ($self->sorted_keys($name, $ref)) { | ||||
| 1520 | my $value = $ref->{$key}; | ||||
| 1521 | next if(substr($key, 0, 1) eq '-'); | ||||
| 1522 | if(!defined($value)) { | ||||
| 1523 | next if $self->{opt}->{suppressempty}; | ||||
| 1524 | unless(exists($self->{opt}->{suppressempty}) | ||||
| 1525 | and !defined($self->{opt}->{suppressempty}) | ||||
| 1526 | ) { | ||||
| 1527 | carp 'Use of uninitialized value' if($^W); | ||||
| 1528 | } | ||||
| 1529 | if($key eq $self->{opt}->{contentkey}) { | ||||
| 1530 | $text_content = ''; | ||||
| 1531 | } | ||||
| 1532 | else { | ||||
| 1533 | $value = exists($self->{opt}->{suppressempty}) ? {} : ''; | ||||
| 1534 | } | ||||
| 1535 | } | ||||
| 1536 | |||||
| 1537 | if(!ref($value) | ||||
| 1538 | and $self->{opt}->{valueattr} | ||||
| 1539 | and $self->{opt}->{valueattr}->{$key} | ||||
| 1540 | ) { | ||||
| 1541 | $value = { $self->{opt}->{valueattr}->{$key} => $value }; | ||||
| 1542 | } | ||||
| 1543 | |||||
| 1544 | if(ref($value) or $self->{opt}->{noattr}) { | ||||
| 1545 | push @nested, | ||||
| 1546 | $self->value_to_xml($value, $key, "$indent "); | ||||
| 1547 | } | ||||
| 1548 | else { | ||||
| 1549 | $value = $self->escape_value($value) unless($self->{opt}->{noescape}); | ||||
| 1550 | if($key eq $self->{opt}->{contentkey}) { | ||||
| 1551 | $text_content = $value; | ||||
| 1552 | } | ||||
| 1553 | else { | ||||
| 1554 | push @result, "\n$indent " . ' ' x length($name) | ||||
| 1555 | if($self->{opt}->{attrindent} and !$first_arg); | ||||
| 1556 | push @result, ' ', $key, '="', $value , '"'; | ||||
| 1557 | $first_arg = 0; | ||||
| 1558 | } | ||||
| 1559 | } | ||||
| 1560 | } | ||||
| 1561 | } | ||||
| 1562 | else { | ||||
| 1563 | $text_content = ''; | ||||
| 1564 | } | ||||
| 1565 | |||||
| 1566 | if(@nested or defined($text_content)) { | ||||
| 1567 | if($named) { | ||||
| 1568 | push @result, ">"; | ||||
| 1569 | if(defined($text_content)) { | ||||
| 1570 | push @result, $text_content; | ||||
| 1571 | $nested[0] =~ s/^\s+// if(@nested); | ||||
| 1572 | } | ||||
| 1573 | else { | ||||
| 1574 | push @result, $nl; | ||||
| 1575 | } | ||||
| 1576 | if(@nested) { | ||||
| 1577 | push @result, @nested, $indent; | ||||
| 1578 | } | ||||
| 1579 | push @result, '</', $name, ">", $nl; | ||||
| 1580 | } | ||||
| 1581 | else { | ||||
| 1582 | push @result, @nested; # Special case if no root elements | ||||
| 1583 | } | ||||
| 1584 | } | ||||
| 1585 | else { | ||||
| 1586 | push @result, " />", $nl; | ||||
| 1587 | } | ||||
| 1588 | $self->{nsup}->pop_context() if($self->{nsup}); | ||||
| 1589 | } | ||||
| 1590 | |||||
| 1591 | |||||
| 1592 | # Handle arrayrefs | ||||
| 1593 | |||||
| 1594 | elsif(UNIVERSAL::isa($ref, 'ARRAY')) { | ||||
| 1595 | foreach $value (@$ref) { | ||||
| 1596 | next if !defined($value) and $self->{opt}->{suppressempty}; | ||||
| 1597 | if(!ref($value)) { | ||||
| 1598 | push @result, | ||||
| 1599 | $indent, '<', $name, '>', | ||||
| 1600 | ($self->{opt}->{noescape} ? $value : $self->escape_value($value)), | ||||
| 1601 | '</', $name, ">$nl"; | ||||
| 1602 | } | ||||
| 1603 | elsif(UNIVERSAL::isa($value, 'HASH')) { | ||||
| 1604 | push @result, $self->value_to_xml($value, $name, $indent); | ||||
| 1605 | } | ||||
| 1606 | else { | ||||
| 1607 | push @result, | ||||
| 1608 | $indent, '<', $name, ">$nl", | ||||
| 1609 | $self->value_to_xml($value, 'anon', "$indent "), | ||||
| 1610 | $indent, '</', $name, ">$nl"; | ||||
| 1611 | } | ||||
| 1612 | } | ||||
| 1613 | } | ||||
| 1614 | |||||
| 1615 | else { | ||||
| 1616 | croak "Can't encode a value of type: " . ref($ref); | ||||
| 1617 | } | ||||
| 1618 | |||||
| 1619 | |||||
| 1620 | pop @{$self->{_ancestors}} if(ref($ref)); | ||||
| 1621 | |||||
| 1622 | return(join('', @result)); | ||||
| 1623 | } | ||||
| 1624 | |||||
| 1625 | |||||
| 1626 | ############################################################################## | ||||
| 1627 | # Method: sorted_keys() | ||||
| 1628 | # | ||||
| 1629 | # Returns the keys of the referenced hash sorted into alphabetical order, but | ||||
| 1630 | # with the 'key' key (as in KeyAttr) first, if there is one. | ||||
| 1631 | # | ||||
| 1632 | |||||
| 1633 | sub sorted_keys { | ||||
| 1634 | my($self, $name, $ref) = @_; | ||||
| 1635 | |||||
| 1636 | return keys %$ref if $self->{opt}->{nosort}; | ||||
| 1637 | |||||
| 1638 | my %hash = %$ref; | ||||
| 1639 | my $keyattr = $self->{opt}->{keyattr}; | ||||
| 1640 | |||||
| 1641 | my @key; | ||||
| 1642 | |||||
| 1643 | if(ref $keyattr eq 'HASH') { | ||||
| 1644 | if(exists $keyattr->{$name} and exists $hash{$keyattr->{$name}->[0]}) { | ||||
| 1645 | push @key, $keyattr->{$name}->[0]; | ||||
| 1646 | delete $hash{$keyattr->{$name}->[0]}; | ||||
| 1647 | } | ||||
| 1648 | } | ||||
| 1649 | elsif(ref $keyattr eq 'ARRAY') { | ||||
| 1650 | foreach (@{$keyattr}) { | ||||
| 1651 | if(exists $hash{$_}) { | ||||
| 1652 | push @key, $_; | ||||
| 1653 | delete $hash{$_}; | ||||
| 1654 | last; | ||||
| 1655 | } | ||||
| 1656 | } | ||||
| 1657 | } | ||||
| 1658 | |||||
| 1659 | return(@key, sort keys %hash); | ||||
| 1660 | } | ||||
| 1661 | |||||
| 1662 | ############################################################################## | ||||
| 1663 | # Method: escape_value() | ||||
| 1664 | # | ||||
| 1665 | # Helper routine for automatically escaping values for XMLout(). | ||||
| 1666 | # Expects a scalar data value. Returns escaped version. | ||||
| 1667 | # | ||||
| 1668 | |||||
| 1669 | sub escape_value { | ||||
| 1670 | my($self, $data) = @_; | ||||
| 1671 | |||||
| 1672 | return '' unless(defined($data)); | ||||
| 1673 | |||||
| 1674 | $data =~ s/&/&/sg; | ||||
| 1675 | $data =~ s/</</sg; | ||||
| 1676 | $data =~ s/>/>/sg; | ||||
| 1677 | $data =~ s/"/"/sg; | ||||
| 1678 | |||||
| 1679 | my $level = $self->{opt}->{numericescape} or return $data; | ||||
| 1680 | |||||
| 1681 | return $self->numeric_escape($data, $level); | ||||
| 1682 | } | ||||
| 1683 | |||||
| 1684 | sub numeric_escape { | ||||
| 1685 | my($self, $data, $level) = @_; | ||||
| 1686 | |||||
| 1687 | 3 | 775µs | 2 | 203µs | # spent 199µs (195+4) within XML::Simple::BEGIN@1687 which was called
# once (195µs+4µs) by SimpleDB::Client::BEGIN@48 at line 1687 # spent 199µs making 1 call to XML::Simple::BEGIN@1687
# spent 4µs making 1 call to utf8::import |
| 1688 | |||||
| 1689 | if($self->{opt}->{numericescape} eq '2') { | ||||
| 1690 | $data =~ s/([^\x00-\x7F])/'&#' . ord($1) . ';'/gse; | ||||
| 1691 | } | ||||
| 1692 | else { | ||||
| 1693 | $data =~ s/([^\x00-\xFF])/'&#' . ord($1) . ';'/gse; | ||||
| 1694 | } | ||||
| 1695 | |||||
| 1696 | return $data; | ||||
| 1697 | } | ||||
| 1698 | |||||
| 1699 | |||||
| 1700 | ############################################################################## | ||||
| 1701 | # Method: hash_to_array() | ||||
| 1702 | # | ||||
| 1703 | # Helper routine for value_to_xml(). | ||||
| 1704 | # Attempts to 'unfold' a hash of hashes into an array of hashes. Returns a | ||||
| 1705 | # reference to the array on success or the original hash if unfolding is | ||||
| 1706 | # not possible. | ||||
| 1707 | # | ||||
| 1708 | |||||
| 1709 | sub hash_to_array { | ||||
| 1710 | my $self = shift; | ||||
| 1711 | my $parent = shift; | ||||
| 1712 | my $hashref = shift; | ||||
| 1713 | |||||
| 1714 | my $arrayref = []; | ||||
| 1715 | |||||
| 1716 | my($key, $value); | ||||
| 1717 | |||||
| 1718 | my @keys = $self->{opt}->{nosort} ? keys %$hashref : sort keys %$hashref; | ||||
| 1719 | foreach $key (@keys) { | ||||
| 1720 | $value = $hashref->{$key}; | ||||
| 1721 | return($hashref) unless(UNIVERSAL::isa($value, 'HASH')); | ||||
| 1722 | |||||
| 1723 | if(ref($self->{opt}->{keyattr}) eq 'HASH') { | ||||
| 1724 | return($hashref) unless(defined($self->{opt}->{keyattr}->{$parent})); | ||||
| 1725 | push @$arrayref, $self->copy_hash( | ||||
| 1726 | $value, $self->{opt}->{keyattr}->{$parent}->[0] => $key | ||||
| 1727 | ); | ||||
| 1728 | } | ||||
| 1729 | else { | ||||
| 1730 | push(@$arrayref, { $self->{opt}->{keyattr}->[0] => $key, %$value }); | ||||
| 1731 | } | ||||
| 1732 | } | ||||
| 1733 | |||||
| 1734 | return($arrayref); | ||||
| 1735 | } | ||||
| 1736 | |||||
| 1737 | |||||
| 1738 | ############################################################################## | ||||
| 1739 | # Method: copy_hash() | ||||
| 1740 | # | ||||
| 1741 | # Helper routine for hash_to_array(). When unfolding a hash of hashes into | ||||
| 1742 | # an array of hashes, we need to copy the key from the outer hash into the | ||||
| 1743 | # inner hash. This routine makes a copy of the original hash so we don't | ||||
| 1744 | # destroy the original data structure. You might wish to override this | ||||
| 1745 | # method if you're using tied hashes and don't want them to get untied. | ||||
| 1746 | # | ||||
| 1747 | |||||
| 1748 | sub copy_hash { | ||||
| 1749 | my($self, $orig, @extra) = @_; | ||||
| 1750 | |||||
| 1751 | return { @extra, %$orig }; | ||||
| 1752 | } | ||||
| 1753 | |||||
| 1754 | ############################################################################## | ||||
| 1755 | # Methods required for building trees from SAX events | ||||
| 1756 | ############################################################################## | ||||
| 1757 | |||||
| 1758 | # spent 20µs within XML::Simple::start_document which was called 3 times, avg 7µs/call:
# 3 times (20µs+0s) by XML::SAX::Base::start_document at line 1259 of XML/SAX/Base.pm, avg 7µs/call | ||||
| 1759 | 3 | 2µs | my $self = shift; | ||
| 1760 | |||||
| 1761 | 3 | 2µs | $self->handle_options('in') unless($self->{opt}); | ||
| 1762 | |||||
| 1763 | 3 | 3µs | $self->{lists} = []; | ||
| 1764 | 3 | 15µs | $self->{curlist} = $self->{tree} = []; | ||
| 1765 | } | ||||
| 1766 | |||||
| 1767 | |||||
| 1768 | # spent 337µs within XML::Simple::start_element which was called 35 times, avg 10µs/call:
# 32 times (272µs+0s) by XML::SAX::Base::__ANON__[/usr/local/lib/perl5/site_perl/5.10.1/XML/SAX/Base.pm:292] at line 292 of XML/SAX/Base.pm, avg 9µs/call
# 3 times (64µs+0s) by XML::SAX::Base::start_element at line 293 of XML/SAX/Base.pm, avg 21µs/call | ||||
| 1769 | 35 | 7µs | my $self = shift; | ||
| 1770 | 35 | 7µs | my $element = shift; | ||
| 1771 | |||||
| 1772 | 35 | 15µs | my $name = $element->{Name}; | ||
| 1773 | 35 | 16µs | if($self->{opt}->{nsexpand}) { | ||
| 1774 | $name = $element->{LocalName} || ''; | ||||
| 1775 | if($element->{NamespaceURI}) { | ||||
| 1776 | $name = '{' . $element->{NamespaceURI} . '}' . $name; | ||||
| 1777 | } | ||||
| 1778 | } | ||||
| 1779 | 35 | 20µs | my $attributes = {}; | ||
| 1780 | 35 | 23µs | if($element->{Attributes}) { # Might be undef | ||
| 1781 | 35 | 68µs | foreach my $attr (values %{$element->{Attributes}}) { | ||
| 1782 | 3 | 6µs | if($self->{opt}->{nsexpand}) { | ||
| 1783 | my $name = $attr->{LocalName} || ''; | ||||
| 1784 | if($attr->{NamespaceURI}) { | ||||
| 1785 | $name = '{' . $attr->{NamespaceURI} . '}' . $name | ||||
| 1786 | } | ||||
| 1787 | $name = 'xmlns' if($name eq $bad_def_ns_jcn); | ||||
| 1788 | $attributes->{$name} = $attr->{Value}; | ||||
| 1789 | } | ||||
| 1790 | else { | ||||
| 1791 | 3 | 9µs | $attributes->{$attr->{Name}} = $attr->{Value}; | ||
| 1792 | } | ||||
| 1793 | } | ||||
| 1794 | } | ||||
| 1795 | 35 | 22µs | my $newlist = [ $attributes ]; | ||
| 1796 | 35 | 26µs | push @{ $self->{lists} }, $self->{curlist}; | ||
| 1797 | 35 | 30µs | push @{ $self->{curlist} }, $name => $newlist; | ||
| 1798 | 35 | 123µs | $self->{curlist} = $newlist; | ||
| 1799 | } | ||||
| 1800 | |||||
| 1801 | |||||
| 1802 | # spent 176µs within XML::Simple::characters which was called 28 times, avg 6µs/call:
# 25 times (147µs+0s) by XML::SAX::Base::__ANON__[/usr/local/lib/perl5/site_perl/5.10.1/XML/SAX/Base.pm:200] at line 200 of XML/SAX/Base.pm, avg 6µs/call
# 3 times (29µs+0s) by XML::SAX::Base::characters at line 201 of XML/SAX/Base.pm, avg 10µs/call | ||||
| 1803 | 28 | 6µs | my $self = shift; | ||
| 1804 | 28 | 6µs | my $chars = shift; | ||
| 1805 | |||||
| 1806 | 28 | 12µs | my $text = $chars->{Data}; | ||
| 1807 | 28 | 7µs | my $clist = $self->{curlist}; | ||
| 1808 | 28 | 39µs | my $pos = $#$clist; | ||
| 1809 | |||||
| 1810 | 28 | 108µs | if ($pos > 0 and $clist->[$pos - 1] eq '0') { | ||
| 1811 | $clist->[$pos] .= $text; | ||||
| 1812 | } | ||||
| 1813 | else { | ||||
| 1814 | 28 | 40µs | push @$clist, 0 => $text; | ||
| 1815 | } | ||||
| 1816 | } | ||||
| 1817 | |||||
| 1818 | |||||
| 1819 | # spent 67µs within XML::Simple::end_element which was called 35 times, avg 2µs/call:
# 32 times (56µs+0s) by XML::SAX::Base::__ANON__[/usr/local/lib/perl5/site_perl/5.10.1/XML/SAX/Base.pm:2201] at line 2201 of XML/SAX/Base.pm, avg 2µs/call
# 3 times (11µs+0s) by XML::SAX::Base::end_element at line 2202 of XML/SAX/Base.pm, avg 4µs/call | ||||
| 1820 | 35 | 7µs | my $self = shift; | ||
| 1821 | |||||
| 1822 | 35 | 121µs | $self->{curlist} = pop @{ $self->{lists} }; | ||
| 1823 | } | ||||
| 1824 | |||||
| 1825 | |||||
| 1826 | # spent 17µs within XML::Simple::end_document which was called 3 times, avg 6µs/call:
# 3 times (17µs+0s) by XML::SAX::Base::end_document at line 1443 of XML/SAX/Base.pm, avg 6µs/call | ||||
| 1827 | 3 | 2µs | my $self = shift; | ||
| 1828 | |||||
| 1829 | 3 | 2µs | delete($self->{curlist}); | ||
| 1830 | 3 | 2µs | delete($self->{lists}); | ||
| 1831 | |||||
| 1832 | 3 | 1µs | my $tree = $self->{tree}; | ||
| 1833 | 3 | 1µs | delete($self->{tree}); | ||
| 1834 | |||||
| 1835 | |||||
| 1836 | # Return tree as-is to XMLin() | ||||
| 1837 | |||||
| 1838 | 3 | 12µs | return($tree) if($self->{nocollapse}); | ||
| 1839 | |||||
| 1840 | |||||
| 1841 | # Or collapse it before returning it to SAX parser class | ||||
| 1842 | |||||
| 1843 | if($self->{opt}->{keeproot}) { | ||||
| 1844 | $tree = $self->collapse({}, @$tree); | ||||
| 1845 | } | ||||
| 1846 | else { | ||||
| 1847 | $tree = $self->collapse(@{$tree->[1]}); | ||||
| 1848 | } | ||||
| 1849 | |||||
| 1850 | if($self->{opt}->{datahandler}) { | ||||
| 1851 | return($self->{opt}->{datahandler}->($self, $tree)); | ||||
| 1852 | } | ||||
| 1853 | |||||
| 1854 | return($tree); | ||||
| 1855 | } | ||||
| 1856 | |||||
| 1857 | 1 | 1µs | *xml_in = \&XMLin; | ||
| 1858 | 1 | 300ns | *xml_out = \&XMLout; | ||
| 1859 | |||||
| 1860 | 1 | 28µs | 1; | ||
| 1861 | |||||
| 1862 | __END__ | ||||
| 1863 | |||||
| 1864 | =head1 QUICK START | ||||
| 1865 | |||||
| 1866 | Say you have a script called B<foo> and a file of configuration options | ||||
| 1867 | called B<foo.xml> containing this: | ||||
| 1868 | |||||
| 1869 | <config logdir="/var/log/foo/" debugfile="/tmp/foo.debug"> | ||||
| 1870 | <server name="sahara" osname="solaris" osversion="2.6"> | ||||
| 1871 | <address>10.0.0.101</address> | ||||
| 1872 | <address>10.0.1.101</address> | ||||
| 1873 | </server> | ||||
| 1874 | <server name="gobi" osname="irix" osversion="6.5"> | ||||
| 1875 | <address>10.0.0.102</address> | ||||
| 1876 | </server> | ||||
| 1877 | <server name="kalahari" osname="linux" osversion="2.0.34"> | ||||
| 1878 | <address>10.0.0.103</address> | ||||
| 1879 | <address>10.0.1.103</address> | ||||
| 1880 | </server> | ||||
| 1881 | </config> | ||||
| 1882 | |||||
| 1883 | The following lines of code in B<foo>: | ||||
| 1884 | |||||
| 1885 | use XML::Simple; | ||||
| 1886 | |||||
| 1887 | my $config = XMLin(); | ||||
| 1888 | |||||
| 1889 | will 'slurp' the configuration options into the hashref $config (because no | ||||
| 1890 | arguments are passed to C<XMLin()> the name and location of the XML file will | ||||
| 1891 | be inferred from name and location of the script). You can dump out the | ||||
| 1892 | contents of the hashref using Data::Dumper: | ||||
| 1893 | |||||
| 1894 | use Data::Dumper; | ||||
| 1895 | |||||
| 1896 | print Dumper($config); | ||||
| 1897 | |||||
| 1898 | which will produce something like this (formatting has been adjusted for | ||||
| 1899 | brevity): | ||||
| 1900 | |||||
| 1901 | { | ||||
| 1902 | 'logdir' => '/var/log/foo/', | ||||
| 1903 | 'debugfile' => '/tmp/foo.debug', | ||||
| 1904 | 'server' => { | ||||
| 1905 | 'sahara' => { | ||||
| 1906 | 'osversion' => '2.6', | ||||
| 1907 | 'osname' => 'solaris', | ||||
| 1908 | 'address' => [ '10.0.0.101', '10.0.1.101' ] | ||||
| 1909 | }, | ||||
| 1910 | 'gobi' => { | ||||
| 1911 | 'osversion' => '6.5', | ||||
| 1912 | 'osname' => 'irix', | ||||
| 1913 | 'address' => '10.0.0.102' | ||||
| 1914 | }, | ||||
| 1915 | 'kalahari' => { | ||||
| 1916 | 'osversion' => '2.0.34', | ||||
| 1917 | 'osname' => 'linux', | ||||
| 1918 | 'address' => [ '10.0.0.103', '10.0.1.103' ] | ||||
| 1919 | } | ||||
| 1920 | } | ||||
| 1921 | } | ||||
| 1922 | |||||
| 1923 | Your script could then access the name of the log directory like this: | ||||
| 1924 | |||||
| 1925 | print $config->{logdir}; | ||||
| 1926 | |||||
| 1927 | similarly, the second address on the server 'kalahari' could be referenced as: | ||||
| 1928 | |||||
| 1929 | print $config->{server}->{kalahari}->{address}->[1]; | ||||
| 1930 | |||||
| 1931 | What could be simpler? (Rhetorical). | ||||
| 1932 | |||||
| 1933 | For simple requirements, that's really all there is to it. If you want to | ||||
| 1934 | store your XML in a different directory or file, or pass it in as a string or | ||||
| 1935 | even pass it in via some derivative of an IO::Handle, you'll need to check out | ||||
| 1936 | L<"OPTIONS">. If you want to turn off or tweak the array folding feature (that | ||||
| 1937 | neat little transformation that produced $config->{server}) you'll find options | ||||
| 1938 | for that as well. | ||||
| 1939 | |||||
| 1940 | If you want to generate XML (for example to write a modified version of | ||||
| 1941 | $config back out as XML), check out C<XMLout()>. | ||||
| 1942 | |||||
| 1943 | If your needs are not so simple, this may not be the module for you. In that | ||||
| 1944 | case, you might want to read L<"WHERE TO FROM HERE?">. | ||||
| 1945 | |||||
| 1946 | =head1 DESCRIPTION | ||||
| 1947 | |||||
| 1948 | The XML::Simple module provides a simple API layer on top of an underlying XML | ||||
| 1949 | parsing module (either XML::Parser or one of the SAX2 parser modules). Two | ||||
| 1950 | functions are exported: C<XMLin()> and C<XMLout()>. Note: you can explicity | ||||
| 1951 | request the lower case versions of the function names: C<xml_in()> and | ||||
| 1952 | C<xml_out()>. | ||||
| 1953 | |||||
| 1954 | The simplest approach is to call these two functions directly, but an | ||||
| 1955 | optional object oriented interface (see L<"OPTIONAL OO INTERFACE"> below) | ||||
| 1956 | allows them to be called as methods of an B<XML::Simple> object. The object | ||||
| 1957 | interface can also be used at either end of a SAX pipeline. | ||||
| 1958 | |||||
| 1959 | =head2 XMLin() | ||||
| 1960 | |||||
| 1961 | Parses XML formatted data and returns a reference to a data structure which | ||||
| 1962 | contains the same information in a more readily accessible form. (Skip | ||||
| 1963 | down to L<"EXAMPLES"> below, for more sample code). | ||||
| 1964 | |||||
| 1965 | C<XMLin()> accepts an optional XML specifier followed by zero or more 'name => | ||||
| 1966 | value' option pairs. The XML specifier can be one of the following: | ||||
| 1967 | |||||
| 1968 | =over 4 | ||||
| 1969 | |||||
| 1970 | =item A filename | ||||
| 1971 | |||||
| 1972 | If the filename contains no directory components C<XMLin()> will look for the | ||||
| 1973 | file in each directory in the SearchPath (see L<"OPTIONS"> below) or in the | ||||
| 1974 | current directory if the SearchPath option is not defined. eg: | ||||
| 1975 | |||||
| 1976 | $ref = XMLin('/etc/params.xml'); | ||||
| 1977 | |||||
| 1978 | Note, the filename '-' can be used to parse from STDIN. | ||||
| 1979 | |||||
| 1980 | =item undef | ||||
| 1981 | |||||
| 1982 | If there is no XML specifier, C<XMLin()> will check the script directory and | ||||
| 1983 | each of the SearchPath directories for a file with the same name as the script | ||||
| 1984 | but with the extension '.xml'. Note: if you wish to specify options, you | ||||
| 1985 | must specify the value 'undef'. eg: | ||||
| 1986 | |||||
| 1987 | $ref = XMLin(undef, ForceArray => 1); | ||||
| 1988 | |||||
| 1989 | =item A string of XML | ||||
| 1990 | |||||
| 1991 | A string containing XML (recognised by the presence of '<' and '>' characters) | ||||
| 1992 | will be parsed directly. eg: | ||||
| 1993 | |||||
| 1994 | $ref = XMLin('<opt username="bob" password="flurp" />'); | ||||
| 1995 | |||||
| 1996 | =item An IO::Handle object | ||||
| 1997 | |||||
| 1998 | An IO::Handle object will be read to EOF and its contents parsed. eg: | ||||
| 1999 | |||||
| 2000 | $fh = IO::File->new('/etc/params.xml'); | ||||
| 2001 | $ref = XMLin($fh); | ||||
| 2002 | |||||
| 2003 | =back | ||||
| 2004 | |||||
| 2005 | =head2 XMLout() | ||||
| 2006 | |||||
| 2007 | Takes a data structure (generally a hashref) and returns an XML encoding of | ||||
| 2008 | that structure. If the resulting XML is parsed using C<XMLin()>, it should | ||||
| 2009 | return a data structure equivalent to the original (see caveats below). | ||||
| 2010 | |||||
| 2011 | The C<XMLout()> function can also be used to output the XML as SAX events | ||||
| 2012 | see the C<Handler> option and L<"SAX SUPPORT"> for more details). | ||||
| 2013 | |||||
| 2014 | When translating hashes to XML, hash keys which have a leading '-' will be | ||||
| 2015 | silently skipped. This is the approved method for marking elements of a | ||||
| 2016 | data structure which should be ignored by C<XMLout>. (Note: If these items | ||||
| 2017 | were not skipped the key names would be emitted as element or attribute names | ||||
| 2018 | with a leading '-' which would not be valid XML). | ||||
| 2019 | |||||
| 2020 | =head2 Caveats | ||||
| 2021 | |||||
| 2022 | Some care is required in creating data structures which will be passed to | ||||
| 2023 | C<XMLout()>. Hash keys from the data structure will be encoded as either XML | ||||
| 2024 | element names or attribute names. Therefore, you should use hash key names | ||||
| 2025 | which conform to the relatively strict XML naming rules: | ||||
| 2026 | |||||
| 2027 | Names in XML must begin with a letter. The remaining characters may be | ||||
| 2028 | letters, digits, hyphens (-), underscores (_) or full stops (.). It is also | ||||
| 2029 | allowable to include one colon (:) in an element name but this should only be | ||||
| 2030 | used when working with namespaces (B<XML::Simple> can only usefully work with | ||||
| 2031 | namespaces when teamed with a SAX Parser). | ||||
| 2032 | |||||
| 2033 | You can use other punctuation characters in hash values (just not in hash | ||||
| 2034 | keys) however B<XML::Simple> does not support dumping binary data. | ||||
| 2035 | |||||
| 2036 | If you break these rules, the current implementation of C<XMLout()> will | ||||
| 2037 | simply emit non-compliant XML which will be rejected if you try to read it | ||||
| 2038 | back in. (A later version of B<XML::Simple> might take a more proactive | ||||
| 2039 | approach). | ||||
| 2040 | |||||
| 2041 | Note also that although you can nest hashes and arrays to arbitrary levels, | ||||
| 2042 | circular data structures are not supported and will cause C<XMLout()> to die. | ||||
| 2043 | |||||
| 2044 | If you wish to 'round-trip' arbitrary data structures from Perl to XML and back | ||||
| 2045 | to Perl, then you should probably disable array folding (using the KeyAttr | ||||
| 2046 | option) both with C<XMLout()> and with C<XMLin()>. If you still don't get the | ||||
| 2047 | expected results, you may prefer to use L<XML::Dumper> which is designed for | ||||
| 2048 | exactly that purpose. | ||||
| 2049 | |||||
| 2050 | Refer to L<"WHERE TO FROM HERE?"> if C<XMLout()> is too simple for your needs. | ||||
| 2051 | |||||
| 2052 | |||||
| 2053 | =head1 OPTIONS | ||||
| 2054 | |||||
| 2055 | B<XML::Simple> supports a number of options (in fact as each release of | ||||
| 2056 | B<XML::Simple> adds more options, the module's claim to the name 'Simple' | ||||
| 2057 | becomes increasingly tenuous). If you find yourself repeatedly having to | ||||
| 2058 | specify the same options, you might like to investigate L<"OPTIONAL OO | ||||
| 2059 | INTERFACE"> below. | ||||
| 2060 | |||||
| 2061 | If you can't be bothered reading the documentation, refer to | ||||
| 2062 | L<"STRICT MODE"> to automatically catch common mistakes. | ||||
| 2063 | |||||
| 2064 | Because there are so many options, it's hard for new users to know which ones | ||||
| 2065 | are important, so here are the two you really need to know about: | ||||
| 2066 | |||||
| 2067 | =over 4 | ||||
| 2068 | |||||
| 2069 | =item * | ||||
| 2070 | |||||
| 2071 | check out C<ForceArray> because you'll almost certainly want to turn it on | ||||
| 2072 | |||||
| 2073 | =item * | ||||
| 2074 | |||||
| 2075 | make sure you know what the C<KeyAttr> option does and what its default value is | ||||
| 2076 | because it may surprise you otherwise (note in particular that 'KeyAttr' | ||||
| 2077 | affects both C<XMLin> and C<XMLout>) | ||||
| 2078 | |||||
| 2079 | =back | ||||
| 2080 | |||||
| 2081 | The option name headings below have a trailing 'comment' - a hash followed by | ||||
| 2082 | two pieces of metadata: | ||||
| 2083 | |||||
| 2084 | =over 4 | ||||
| 2085 | |||||
| 2086 | =item * | ||||
| 2087 | |||||
| 2088 | Options are marked with 'I<in>' if they are recognised by C<XMLin()> and | ||||
| 2089 | 'I<out>' if they are recognised by C<XMLout()>. | ||||
| 2090 | |||||
| 2091 | =item * | ||||
| 2092 | |||||
| 2093 | Each option is also flagged to indicate whether it is: | ||||
| 2094 | |||||
| 2095 | 'important' - don't use the module until you understand this one | ||||
| 2096 | 'handy' - you can skip this on the first time through | ||||
| 2097 | 'advanced' - you can skip this on the second time through | ||||
| 2098 | 'SAX only' - don't worry about this unless you're using SAX (or | ||||
| 2099 | alternatively if you need this, you also need SAX) | ||||
| 2100 | 'seldom used' - you'll probably never use this unless you were the | ||||
| 2101 | person that requested the feature | ||||
| 2102 | |||||
| 2103 | =back | ||||
| 2104 | |||||
| 2105 | The options are listed alphabetically: | ||||
| 2106 | |||||
| 2107 | Note: option names are no longer case sensitive so you can use the mixed case | ||||
| 2108 | versions shown here; all lower case as required by versions 2.03 and earlier; | ||||
| 2109 | or you can add underscores between the words (eg: key_attr). | ||||
| 2110 | |||||
| 2111 | |||||
| 2112 | =head2 AttrIndent => 1 I<# out - handy> | ||||
| 2113 | |||||
| 2114 | When you are using C<XMLout()>, enable this option to have attributes printed | ||||
| 2115 | one-per-line with sensible indentation rather than all on one line. | ||||
| 2116 | |||||
| 2117 | =head2 Cache => [ cache schemes ] I<# in - advanced> | ||||
| 2118 | |||||
| 2119 | Because loading the B<XML::Parser> module and parsing an XML file can consume a | ||||
| 2120 | significant number of CPU cycles, it is often desirable to cache the output of | ||||
| 2121 | C<XMLin()> for later reuse. | ||||
| 2122 | |||||
| 2123 | When parsing from a named file, B<XML::Simple> supports a number of caching | ||||
| 2124 | schemes. The 'Cache' option may be used to specify one or more schemes (using | ||||
| 2125 | an anonymous array). Each scheme will be tried in turn in the hope of finding | ||||
| 2126 | a cached pre-parsed representation of the XML file. If no cached copy is | ||||
| 2127 | found, the file will be parsed and the first cache scheme in the list will be | ||||
| 2128 | used to save a copy of the results. The following cache schemes have been | ||||
| 2129 | implemented: | ||||
| 2130 | |||||
| 2131 | =over 4 | ||||
| 2132 | |||||
| 2133 | =item storable | ||||
| 2134 | |||||
| 2135 | Utilises B<Storable.pm> to read/write a cache file with the same name as the | ||||
| 2136 | XML file but with the extension .stor | ||||
| 2137 | |||||
| 2138 | =item memshare | ||||
| 2139 | |||||
| 2140 | When a file is first parsed, a copy of the resulting data structure is retained | ||||
| 2141 | in memory in the B<XML::Simple> module's namespace. Subsequent calls to parse | ||||
| 2142 | the same file will return a reference to this structure. This cached version | ||||
| 2143 | will persist only for the life of the Perl interpreter (which in the case of | ||||
| 2144 | mod_perl for example, may be some significant time). | ||||
| 2145 | |||||
| 2146 | Because each caller receives a reference to the same data structure, a change | ||||
| 2147 | made by one caller will be visible to all. For this reason, the reference | ||||
| 2148 | returned should be treated as read-only. | ||||
| 2149 | |||||
| 2150 | =item memcopy | ||||
| 2151 | |||||
| 2152 | This scheme works identically to 'memshare' (above) except that each caller | ||||
| 2153 | receives a reference to a new data structure which is a copy of the cached | ||||
| 2154 | version. Copying the data structure will add a little processing overhead, | ||||
| 2155 | therefore this scheme should only be used where the caller intends to modify | ||||
| 2156 | the data structure (or wishes to protect itself from others who might). This | ||||
| 2157 | scheme uses B<Storable.pm> to perform the copy. | ||||
| 2158 | |||||
| 2159 | =back | ||||
| 2160 | |||||
| 2161 | Warning! The memory-based caching schemes compare the timestamp on the file to | ||||
| 2162 | the time when it was last parsed. If the file is stored on an NFS filesystem | ||||
| 2163 | (or other network share) and the clock on the file server is not exactly | ||||
| 2164 | synchronised with the clock where your script is run, updates to the source XML | ||||
| 2165 | file may appear to be ignored. | ||||
| 2166 | |||||
| 2167 | =head2 ContentKey => 'keyname' I<# in+out - seldom used> | ||||
| 2168 | |||||
| 2169 | When text content is parsed to a hash value, this option let's you specify a | ||||
| 2170 | name for the hash key to override the default 'content'. So for example: | ||||
| 2171 | |||||
| 2172 | XMLin('<opt one="1">Text</opt>', ContentKey => 'text') | ||||
| 2173 | |||||
| 2174 | will parse to: | ||||
| 2175 | |||||
| 2176 | { 'one' => 1, 'text' => 'Text' } | ||||
| 2177 | |||||
| 2178 | instead of: | ||||
| 2179 | |||||
| 2180 | { 'one' => 1, 'content' => 'Text' } | ||||
| 2181 | |||||
| 2182 | C<XMLout()> will also honour the value of this option when converting a hashref | ||||
| 2183 | to XML. | ||||
| 2184 | |||||
| 2185 | You can also prefix your selected key name with a '-' character to have | ||||
| 2186 | C<XMLin()> try a little harder to eliminate unnecessary 'content' keys after | ||||
| 2187 | array folding. For example: | ||||
| 2188 | |||||
| 2189 | XMLin( | ||||
| 2190 | '<opt><item name="one">First</item><item name="two">Second</item></opt>', | ||||
| 2191 | KeyAttr => {item => 'name'}, | ||||
| 2192 | ForceArray => [ 'item' ], | ||||
| 2193 | ContentKey => '-content' | ||||
| 2194 | ) | ||||
| 2195 | |||||
| 2196 | will parse to: | ||||
| 2197 | |||||
| 2198 | { | ||||
| 2199 | 'item' => { | ||||
| 2200 | 'one' => 'First' | ||||
| 2201 | 'two' => 'Second' | ||||
| 2202 | } | ||||
| 2203 | } | ||||
| 2204 | |||||
| 2205 | rather than this (without the '-'): | ||||
| 2206 | |||||
| 2207 | { | ||||
| 2208 | 'item' => { | ||||
| 2209 | 'one' => { 'content' => 'First' } | ||||
| 2210 | 'two' => { 'content' => 'Second' } | ||||
| 2211 | } | ||||
| 2212 | } | ||||
| 2213 | |||||
| 2214 | =head2 DataHandler => code_ref I<# in - SAX only> | ||||
| 2215 | |||||
| 2216 | When you use an B<XML::Simple> object as a SAX handler, it will return a | ||||
| 2217 | 'simple tree' data structure in the same format as C<XMLin()> would return. If | ||||
| 2218 | this option is set (to a subroutine reference), then when the tree is built the | ||||
| 2219 | subroutine will be called and passed two arguments: a reference to the | ||||
| 2220 | B<XML::Simple> object and a reference to the data tree. The return value from | ||||
| 2221 | the subroutine will be returned to the SAX driver. (See L<"SAX SUPPORT"> for | ||||
| 2222 | more details). | ||||
| 2223 | |||||
| 2224 | =head2 ForceArray => 1 I<# in - important> | ||||
| 2225 | |||||
| 2226 | This option should be set to '1' to force nested elements to be represented | ||||
| 2227 | as arrays even when there is only one. Eg, with ForceArray enabled, this | ||||
| 2228 | XML: | ||||
| 2229 | |||||
| 2230 | <opt> | ||||
| 2231 | <name>value</name> | ||||
| 2232 | </opt> | ||||
| 2233 | |||||
| 2234 | would parse to this: | ||||
| 2235 | |||||
| 2236 | { | ||||
| 2237 | 'name' => [ | ||||
| 2238 | 'value' | ||||
| 2239 | ] | ||||
| 2240 | } | ||||
| 2241 | |||||
| 2242 | instead of this (the default): | ||||
| 2243 | |||||
| 2244 | { | ||||
| 2245 | 'name' => 'value' | ||||
| 2246 | } | ||||
| 2247 | |||||
| 2248 | This option is especially useful if the data structure is likely to be written | ||||
| 2249 | back out as XML and the default behaviour of rolling single nested elements up | ||||
| 2250 | into attributes is not desirable. | ||||
| 2251 | |||||
| 2252 | If you are using the array folding feature, you should almost certainly enable | ||||
| 2253 | this option. If you do not, single nested elements will not be parsed to | ||||
| 2254 | arrays and therefore will not be candidates for folding to a hash. (Given that | ||||
| 2255 | the default value of 'KeyAttr' enables array folding, the default value of this | ||||
| 2256 | option should probably also have been enabled too - sorry). | ||||
| 2257 | |||||
| 2258 | =head2 ForceArray => [ names ] I<# in - important> | ||||
| 2259 | |||||
| 2260 | This alternative (and preferred) form of the 'ForceArray' option allows you to | ||||
| 2261 | specify a list of element names which should always be forced into an array | ||||
| 2262 | representation, rather than the 'all or nothing' approach above. | ||||
| 2263 | |||||
| 2264 | It is also possible (since version 2.05) to include compiled regular | ||||
| 2265 | expressions in the list - any element names which match the pattern will be | ||||
| 2266 | forced to arrays. If the list contains only a single regex, then it is not | ||||
| 2267 | necessary to enclose it in an arrayref. Eg: | ||||
| 2268 | |||||
| 2269 | ForceArray => qr/_list$/ | ||||
| 2270 | |||||
| 2271 | =head2 ForceContent => 1 I<# in - seldom used> | ||||
| 2272 | |||||
| 2273 | When C<XMLin()> parses elements which have text content as well as attributes, | ||||
| 2274 | the text content must be represented as a hash value rather than a simple | ||||
| 2275 | scalar. This option allows you to force text content to always parse to | ||||
| 2276 | a hash value even when there are no attributes. So for example: | ||||
| 2277 | |||||
| 2278 | XMLin('<opt><x>text1</x><y a="2">text2</y></opt>', ForceContent => 1) | ||||
| 2279 | |||||
| 2280 | will parse to: | ||||
| 2281 | |||||
| 2282 | { | ||||
| 2283 | 'x' => { 'content' => 'text1' }, | ||||
| 2284 | 'y' => { 'a' => 2, 'content' => 'text2' } | ||||
| 2285 | } | ||||
| 2286 | |||||
| 2287 | instead of: | ||||
| 2288 | |||||
| 2289 | { | ||||
| 2290 | 'x' => 'text1', | ||||
| 2291 | 'y' => { 'a' => 2, 'content' => 'text2' } | ||||
| 2292 | } | ||||
| 2293 | |||||
| 2294 | =head2 GroupTags => { grouping tag => grouped tag } I<# in+out - handy> | ||||
| 2295 | |||||
| 2296 | You can use this option to eliminate extra levels of indirection in your Perl | ||||
| 2297 | data structure. For example this XML: | ||||
| 2298 | |||||
| 2299 | <opt> | ||||
| 2300 | <searchpath> | ||||
| 2301 | <dir>/usr/bin</dir> | ||||
| 2302 | <dir>/usr/local/bin</dir> | ||||
| 2303 | <dir>/usr/X11/bin</dir> | ||||
| 2304 | </searchpath> | ||||
| 2305 | </opt> | ||||
| 2306 | |||||
| 2307 | Would normally be read into a structure like this: | ||||
| 2308 | |||||
| 2309 | { | ||||
| 2310 | searchpath => { | ||||
| 2311 | dir => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ] | ||||
| 2312 | } | ||||
| 2313 | } | ||||
| 2314 | |||||
| 2315 | But when read in with the appropriate value for 'GroupTags': | ||||
| 2316 | |||||
| 2317 | my $opt = XMLin($xml, GroupTags => { searchpath => 'dir' }); | ||||
| 2318 | |||||
| 2319 | It will return this simpler structure: | ||||
| 2320 | |||||
| 2321 | { | ||||
| 2322 | searchpath => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ] | ||||
| 2323 | } | ||||
| 2324 | |||||
| 2325 | The grouping element (C<< <searchpath> >> in the example) must not contain any | ||||
| 2326 | attributes or elements other than the grouped element. | ||||
| 2327 | |||||
| 2328 | You can specify multiple 'grouping element' to 'grouped element' mappings in | ||||
| 2329 | the same hashref. If this option is combined with C<KeyAttr>, the array | ||||
| 2330 | folding will occur first and then the grouped element names will be eliminated. | ||||
| 2331 | |||||
| 2332 | C<XMLout> will also use the grouptag mappings to re-introduce the tags around | ||||
| 2333 | the grouped elements. Beware though that this will occur in all places that | ||||
| 2334 | the 'grouping tag' name occurs - you probably don't want to use the same name | ||||
| 2335 | for elements as well as attributes. | ||||
| 2336 | |||||
| 2337 | =head2 Handler => object_ref I<# out - SAX only> | ||||
| 2338 | |||||
| 2339 | Use the 'Handler' option to have C<XMLout()> generate SAX events rather than | ||||
| 2340 | returning a string of XML. For more details see L<"SAX SUPPORT"> below. | ||||
| 2341 | |||||
| 2342 | Note: the current implementation of this option generates a string of XML | ||||
| 2343 | and uses a SAX parser to translate it into SAX events. The normal encoding | ||||
| 2344 | rules apply here - your data must be UTF8 encoded unless you specify an | ||||
| 2345 | alternative encoding via the 'XMLDecl' option; and by the time the data reaches | ||||
| 2346 | the handler object, it will be in UTF8 form regardless of the encoding you | ||||
| 2347 | supply. A future implementation of this option may generate the events | ||||
| 2348 | directly. | ||||
| 2349 | |||||
| 2350 | =head2 KeepRoot => 1 I<# in+out - handy> | ||||
| 2351 | |||||
| 2352 | In its attempt to return a data structure free of superfluous detail and | ||||
| 2353 | unnecessary levels of indirection, C<XMLin()> normally discards the root | ||||
| 2354 | element name. Setting the 'KeepRoot' option to '1' will cause the root element | ||||
| 2355 | name to be retained. So after executing this code: | ||||
| 2356 | |||||
| 2357 | $config = XMLin('<config tempdir="/tmp" />', KeepRoot => 1) | ||||
| 2358 | |||||
| 2359 | You'll be able to reference the tempdir as | ||||
| 2360 | C<$config-E<gt>{config}-E<gt>{tempdir}> instead of the default | ||||
| 2361 | C<$config-E<gt>{tempdir}>. | ||||
| 2362 | |||||
| 2363 | Similarly, setting the 'KeepRoot' option to '1' will tell C<XMLout()> that the | ||||
| 2364 | data structure already contains a root element name and it is not necessary to | ||||
| 2365 | add another. | ||||
| 2366 | |||||
| 2367 | =head2 KeyAttr => [ list ] I<# in+out - important> | ||||
| 2368 | |||||
| 2369 | This option controls the 'array folding' feature which translates nested | ||||
| 2370 | elements from an array to a hash. It also controls the 'unfolding' of hashes | ||||
| 2371 | to arrays. | ||||
| 2372 | |||||
| 2373 | For example, this XML: | ||||
| 2374 | |||||
| 2375 | <opt> | ||||
| 2376 | <user login="grep" fullname="Gary R Epstein" /> | ||||
| 2377 | <user login="stty" fullname="Simon T Tyson" /> | ||||
| 2378 | </opt> | ||||
| 2379 | |||||
| 2380 | would, by default, parse to this: | ||||
| 2381 | |||||
| 2382 | { | ||||
| 2383 | 'user' => [ | ||||
| 2384 | { | ||||
| 2385 | 'login' => 'grep', | ||||
| 2386 | 'fullname' => 'Gary R Epstein' | ||||
| 2387 | }, | ||||
| 2388 | { | ||||
| 2389 | 'login' => 'stty', | ||||
| 2390 | 'fullname' => 'Simon T Tyson' | ||||
| 2391 | } | ||||
| 2392 | ] | ||||
| 2393 | } | ||||
| 2394 | |||||
| 2395 | If the option 'KeyAttr => "login"' were used to specify that the 'login' | ||||
| 2396 | attribute is a key, the same XML would parse to: | ||||
| 2397 | |||||
| 2398 | { | ||||
| 2399 | 'user' => { | ||||
| 2400 | 'stty' => { | ||||
| 2401 | 'fullname' => 'Simon T Tyson' | ||||
| 2402 | }, | ||||
| 2403 | 'grep' => { | ||||
| 2404 | 'fullname' => 'Gary R Epstein' | ||||
| 2405 | } | ||||
| 2406 | } | ||||
| 2407 | } | ||||
| 2408 | |||||
| 2409 | The key attribute names should be supplied in an arrayref if there is more | ||||
| 2410 | than one. C<XMLin()> will attempt to match attribute names in the order | ||||
| 2411 | supplied. C<XMLout()> will use the first attribute name supplied when | ||||
| 2412 | 'unfolding' a hash into an array. | ||||
| 2413 | |||||
| 2414 | Note 1: The default value for 'KeyAttr' is ['name', 'key', 'id']. If you do | ||||
| 2415 | not want folding on input or unfolding on output you must setting this option | ||||
| 2416 | to an empty list to disable the feature. | ||||
| 2417 | |||||
| 2418 | Note 2: If you wish to use this option, you should also enable the | ||||
| 2419 | C<ForceArray> option. Without 'ForceArray', a single nested element will be | ||||
| 2420 | rolled up into a scalar rather than an array and therefore will not be folded | ||||
| 2421 | (since only arrays get folded). | ||||
| 2422 | |||||
| 2423 | =head2 KeyAttr => { list } I<# in+out - important> | ||||
| 2424 | |||||
| 2425 | This alternative (and preferred) method of specifiying the key attributes | ||||
| 2426 | allows more fine grained control over which elements are folded and on which | ||||
| 2427 | attributes. For example the option 'KeyAttr => { package => 'id' } will cause | ||||
| 2428 | any package elements to be folded on the 'id' attribute. No other elements | ||||
| 2429 | which have an 'id' attribute will be folded at all. | ||||
| 2430 | |||||
| 2431 | Note: C<XMLin()> will generate a warning (or a fatal error in L<"STRICT MODE">) | ||||
| 2432 | if this syntax is used and an element which does not have the specified key | ||||
| 2433 | attribute is encountered (eg: a 'package' element without an 'id' attribute, to | ||||
| 2434 | use the example above). Warnings will only be generated if B<-w> is in force. | ||||
| 2435 | |||||
| 2436 | Two further variations are made possible by prefixing a '+' or a '-' character | ||||
| 2437 | to the attribute name: | ||||
| 2438 | |||||
| 2439 | The option 'KeyAttr => { user => "+login" }' will cause this XML: | ||||
| 2440 | |||||
| 2441 | <opt> | ||||
| 2442 | <user login="grep" fullname="Gary R Epstein" /> | ||||
| 2443 | <user login="stty" fullname="Simon T Tyson" /> | ||||
| 2444 | </opt> | ||||
| 2445 | |||||
| 2446 | to parse to this data structure: | ||||
| 2447 | |||||
| 2448 | { | ||||
| 2449 | 'user' => { | ||||
| 2450 | 'stty' => { | ||||
| 2451 | 'fullname' => 'Simon T Tyson', | ||||
| 2452 | 'login' => 'stty' | ||||
| 2453 | }, | ||||
| 2454 | 'grep' => { | ||||
| 2455 | 'fullname' => 'Gary R Epstein', | ||||
| 2456 | 'login' => 'grep' | ||||
| 2457 | } | ||||
| 2458 | } | ||||
| 2459 | } | ||||
| 2460 | |||||
| 2461 | The '+' indicates that the value of the key attribute should be copied rather | ||||
| 2462 | than moved to the folded hash key. | ||||
| 2463 | |||||
| 2464 | A '-' prefix would produce this result: | ||||
| 2465 | |||||
| 2466 | { | ||||
| 2467 | 'user' => { | ||||
| 2468 | 'stty' => { | ||||
| 2469 | 'fullname' => 'Simon T Tyson', | ||||
| 2470 | '-login' => 'stty' | ||||
| 2471 | }, | ||||
| 2472 | 'grep' => { | ||||
| 2473 | 'fullname' => 'Gary R Epstein', | ||||
| 2474 | '-login' => 'grep' | ||||
| 2475 | } | ||||
| 2476 | } | ||||
| 2477 | } | ||||
| 2478 | |||||
| 2479 | As described earlier, C<XMLout> will ignore hash keys starting with a '-'. | ||||
| 2480 | |||||
| 2481 | =head2 NoAttr => 1 I<# in+out - handy> | ||||
| 2482 | |||||
| 2483 | When used with C<XMLout()>, the generated XML will contain no attributes. | ||||
| 2484 | All hash key/values will be represented as nested elements instead. | ||||
| 2485 | |||||
| 2486 | When used with C<XMLin()>, any attributes in the XML will be ignored. | ||||
| 2487 | |||||
| 2488 | =head2 NoEscape => 1 I<# out - seldom used> | ||||
| 2489 | |||||
| 2490 | By default, C<XMLout()> will translate the characters 'E<lt>', 'E<gt>', '&' and | ||||
| 2491 | '"' to '<', '>', '&' and '"' respectively. Use this option to | ||||
| 2492 | suppress escaping (presumably because you've already escaped the data in some | ||||
| 2493 | more sophisticated manner). | ||||
| 2494 | |||||
| 2495 | =head2 NoIndent => 1 I<# out - seldom used> | ||||
| 2496 | |||||
| 2497 | Set this option to 1 to disable C<XMLout()>'s default 'pretty printing' mode. | ||||
| 2498 | With this option enabled, the XML output will all be on one line (unless there | ||||
| 2499 | are newlines in the data) - this may be easier for downstream processing. | ||||
| 2500 | |||||
| 2501 | =head2 NoSort => 1 I<# out - seldom used> | ||||
| 2502 | |||||
| 2503 | Newer versions of XML::Simple sort elements and attributes alphabetically (*), | ||||
| 2504 | by default. Enable this option to suppress the sorting - possibly for | ||||
| 2505 | backwards compatibility. | ||||
| 2506 | |||||
| 2507 | * Actually, sorting is alphabetical but 'key' attribute or element names (as in | ||||
| 2508 | 'KeyAttr') sort first. Also, when a hash of hashes is 'unfolded', the elements | ||||
| 2509 | are sorted alphabetically by the value of the key field. | ||||
| 2510 | |||||
| 2511 | =head2 NormaliseSpace => 0 | 1 | 2 I<# in - handy> | ||||
| 2512 | |||||
| 2513 | This option controls how whitespace in text content is handled. Recognised | ||||
| 2514 | values for the option are: | ||||
| 2515 | |||||
| 2516 | =over 4 | ||||
| 2517 | |||||
| 2518 | =item * | ||||
| 2519 | |||||
| 2520 | 0 = (default) whitespace is passed through unaltered (except of course for the | ||||
| 2521 | normalisation of whitespace in attribute values which is mandated by the XML | ||||
| 2522 | recommendation) | ||||
| 2523 | |||||
| 2524 | =item * | ||||
| 2525 | |||||
| 2526 | 1 = whitespace is normalised in any value used as a hash key (normalising means | ||||
| 2527 | removing leading and trailing whitespace and collapsing sequences of whitespace | ||||
| 2528 | characters to a single space) | ||||
| 2529 | |||||
| 2530 | =item * | ||||
| 2531 | |||||
| 2532 | 2 = whitespace is normalised in all text content | ||||
| 2533 | |||||
| 2534 | =back | ||||
| 2535 | |||||
| 2536 | Note: you can spell this option with a 'z' if that is more natural for you. | ||||
| 2537 | |||||
| 2538 | =head2 NSExpand => 1 I<# in+out handy - SAX only> | ||||
| 2539 | |||||
| 2540 | This option controls namespace expansion - the translation of element and | ||||
| 2541 | attribute names of the form 'prefix:name' to '{uri}name'. For example the | ||||
| 2542 | element name 'xsl:template' might be expanded to: | ||||
| 2543 | '{http://www.w3.org/1999/XSL/Transform}template'. | ||||
| 2544 | |||||
| 2545 | By default, C<XMLin()> will return element names and attribute names exactly as | ||||
| 2546 | they appear in the XML. Setting this option to 1 will cause all element and | ||||
| 2547 | attribute names to be expanded to include their namespace prefix. | ||||
| 2548 | |||||
| 2549 | I<Note: You must be using a SAX parser for this option to work (ie: it does not | ||||
| 2550 | work with XML::Parser)>. | ||||
| 2551 | |||||
| 2552 | This option also controls whether C<XMLout()> performs the reverse translation | ||||
| 2553 | from '{uri}name' back to 'prefix:name'. The default is no translation. If | ||||
| 2554 | your data contains expanded names, you should set this option to 1 otherwise | ||||
| 2555 | C<XMLout> will emit XML which is not well formed. | ||||
| 2556 | |||||
| 2557 | I<Note: You must have the XML::NamespaceSupport module installed if you want | ||||
| 2558 | C<XMLout()> to translate URIs back to prefixes>. | ||||
| 2559 | |||||
| 2560 | =head2 NumericEscape => 0 | 1 | 2 I<# out - handy> | ||||
| 2561 | |||||
| 2562 | Use this option to have 'high' (non-ASCII) characters in your Perl data | ||||
| 2563 | structure converted to numeric entities (eg: €) in the XML output. Three | ||||
| 2564 | levels are possible: | ||||
| 2565 | |||||
| 2566 | 0 - default: no numeric escaping (OK if you're writing out UTF8) | ||||
| 2567 | |||||
| 2568 | 1 - only characters above 0xFF are escaped (ie: characters in the 0x80-FF range are not escaped), possibly useful with ISO8859-1 output | ||||
| 2569 | |||||
| 2570 | 2 - all characters above 0x7F are escaped (good for plain ASCII output) | ||||
| 2571 | |||||
| 2572 | =head2 OutputFile => <file specifier> I<# out - handy> | ||||
| 2573 | |||||
| 2574 | The default behaviour of C<XMLout()> is to return the XML as a string. If you | ||||
| 2575 | wish to write the XML to a file, simply supply the filename using the | ||||
| 2576 | 'OutputFile' option. | ||||
| 2577 | |||||
| 2578 | This option also accepts an IO handle object - especially useful in Perl 5.8.0 | ||||
| 2579 | and later for output using an encoding other than UTF-8, eg: | ||||
| 2580 | |||||
| 2581 | open my $fh, '>:encoding(iso-8859-1)', $path or die "open($path): $!"; | ||||
| 2582 | XMLout($ref, OutputFile => $fh); | ||||
| 2583 | |||||
| 2584 | Note, XML::Simple does not require that the object you pass in to the | ||||
| 2585 | OutputFile option inherits from L<IO::Handle> - it simply assumes the object | ||||
| 2586 | supports a C<print> method. | ||||
| 2587 | |||||
| 2588 | =head2 ParserOpts => [ XML::Parser Options ] I<# in - don't use this> | ||||
| 2589 | |||||
| 2590 | I<Note: This option is now officially deprecated. If you find it useful, email | ||||
| 2591 | the author with an example of what you use it for. Do not use this option to | ||||
| 2592 | set the ProtocolEncoding, that's just plain wrong - fix the XML>. | ||||
| 2593 | |||||
| 2594 | This option allows you to pass parameters to the constructor of the underlying | ||||
| 2595 | XML::Parser object (which of course assumes you're not using SAX). | ||||
| 2596 | |||||
| 2597 | =head2 RootName => 'string' I<# out - handy> | ||||
| 2598 | |||||
| 2599 | By default, when C<XMLout()> generates XML, the root element will be named | ||||
| 2600 | 'opt'. This option allows you to specify an alternative name. | ||||
| 2601 | |||||
| 2602 | Specifying either undef or the empty string for the RootName option will | ||||
| 2603 | produce XML with no root elements. In most cases the resulting XML fragment | ||||
| 2604 | will not be 'well formed' and therefore could not be read back in by C<XMLin()>. | ||||
| 2605 | Nevertheless, the option has been found to be useful in certain circumstances. | ||||
| 2606 | |||||
| 2607 | =head2 SearchPath => [ list ] I<# in - handy> | ||||
| 2608 | |||||
| 2609 | If you pass C<XMLin()> a filename, but the filename include no directory | ||||
| 2610 | component, you can use this option to specify which directories should be | ||||
| 2611 | searched to locate the file. You might use this option to search first in the | ||||
| 2612 | user's home directory, then in a global directory such as /etc. | ||||
| 2613 | |||||
| 2614 | If a filename is provided to C<XMLin()> but SearchPath is not defined, the | ||||
| 2615 | file is assumed to be in the current directory. | ||||
| 2616 | |||||
| 2617 | If the first parameter to C<XMLin()> is undefined, the default SearchPath | ||||
| 2618 | will contain only the directory in which the script itself is located. | ||||
| 2619 | Otherwise the default SearchPath will be empty. | ||||
| 2620 | |||||
| 2621 | =head2 SuppressEmpty => 1 | '' | undef I<# in+out - handy> | ||||
| 2622 | |||||
| 2623 | This option controls what C<XMLin()> should do with empty elements (no | ||||
| 2624 | attributes and no content). The default behaviour is to represent them as | ||||
| 2625 | empty hashes. Setting this option to a true value (eg: 1) will cause empty | ||||
| 2626 | elements to be skipped altogether. Setting the option to 'undef' or the empty | ||||
| 2627 | string will cause empty elements to be represented as the undefined value or | ||||
| 2628 | the empty string respectively. The latter two alternatives are a little | ||||
| 2629 | easier to test for in your code than a hash with no keys. | ||||
| 2630 | |||||
| 2631 | The option also controls what C<XMLout()> does with undefined values. Setting | ||||
| 2632 | the option to undef causes undefined values to be output as empty elements | ||||
| 2633 | (rather than empty attributes), it also suppresses the generation of warnings | ||||
| 2634 | about undefined values. Setting the option to a true value (eg: 1) causes | ||||
| 2635 | undefined values to be skipped altogether on output. | ||||
| 2636 | |||||
| 2637 | =head2 ValueAttr => [ names ] I<# in - handy> | ||||
| 2638 | |||||
| 2639 | Use this option to deal elements which always have a single attribute and no | ||||
| 2640 | content. Eg: | ||||
| 2641 | |||||
| 2642 | <opt> | ||||
| 2643 | <colour value="red" /> | ||||
| 2644 | <size value="XXL" /> | ||||
| 2645 | </opt> | ||||
| 2646 | |||||
| 2647 | Setting C<< ValueAttr => [ 'value' ] >> will cause the above XML to parse to: | ||||
| 2648 | |||||
| 2649 | { | ||||
| 2650 | colour => 'red', | ||||
| 2651 | size => 'XXL' | ||||
| 2652 | } | ||||
| 2653 | |||||
| 2654 | instead of this (the default): | ||||
| 2655 | |||||
| 2656 | { | ||||
| 2657 | colour => { value => 'red' }, | ||||
| 2658 | size => { value => 'XXL' } | ||||
| 2659 | } | ||||
| 2660 | |||||
| 2661 | Note: This form of the ValueAttr option is not compatible with C<XMLout()> - | ||||
| 2662 | since the attribute name is discarded at parse time, the original XML cannot be | ||||
| 2663 | reconstructed. | ||||
| 2664 | |||||
| 2665 | =head2 ValueAttr => { element => attribute, ... } I<# in+out - handy> | ||||
| 2666 | |||||
| 2667 | This (preferred) form of the ValueAttr option requires you to specify both | ||||
| 2668 | the element and the attribute names. This is not only safer, it also allows | ||||
| 2669 | the original XML to be reconstructed by C<XMLout()>. | ||||
| 2670 | |||||
| 2671 | Note: You probably don't want to use this option and the NoAttr option at the | ||||
| 2672 | same time. | ||||
| 2673 | |||||
| 2674 | =head2 Variables => { name => value } I<# in - handy> | ||||
| 2675 | |||||
| 2676 | This option allows variables in the XML to be expanded when the file is read. | ||||
| 2677 | (there is no facility for putting the variable names back if you regenerate | ||||
| 2678 | XML using C<XMLout>). | ||||
| 2679 | |||||
| 2680 | A 'variable' is any text of the form C<${name}> which occurs in an attribute | ||||
| 2681 | value or in the text content of an element. If 'name' matches a key in the | ||||
| 2682 | supplied hashref, C<${name}> will be replaced with the corresponding value from | ||||
| 2683 | the hashref. If no matching key is found, the variable will not be replaced. | ||||
| 2684 | Names must match the regex: C<[\w.]+> (ie: only 'word' characters and dots are | ||||
| 2685 | allowed). | ||||
| 2686 | |||||
| 2687 | =head2 VarAttr => 'attr_name' I<# in - handy> | ||||
| 2688 | |||||
| 2689 | In addition to the variables defined using C<Variables>, this option allows | ||||
| 2690 | variables to be defined in the XML. A variable definition consists of an | ||||
| 2691 | element with an attribute called 'attr_name' (the value of the C<VarAttr> | ||||
| 2692 | option). The value of the attribute will be used as the variable name and the | ||||
| 2693 | text content of the element will be used as the value. A variable defined in | ||||
| 2694 | this way will override a variable defined using the C<Variables> option. For | ||||
| 2695 | example: | ||||
| 2696 | |||||
| 2697 | XMLin( '<opt> | ||||
| 2698 | <dir name="prefix">/usr/local/apache</dir> | ||||
| 2699 | <dir name="exec_prefix">${prefix}</dir> | ||||
| 2700 | <dir name="bindir">${exec_prefix}/bin</dir> | ||||
| 2701 | </opt>', | ||||
| 2702 | VarAttr => 'name', ContentKey => '-content' | ||||
| 2703 | ); | ||||
| 2704 | |||||
| 2705 | produces the following data structure: | ||||
| 2706 | |||||
| 2707 | { | ||||
| 2708 | dir => { | ||||
| 2709 | prefix => '/usr/local/apache', | ||||
| 2710 | exec_prefix => '/usr/local/apache', | ||||
| 2711 | bindir => '/usr/local/apache/bin', | ||||
| 2712 | } | ||||
| 2713 | } | ||||
| 2714 | |||||
| 2715 | =head2 XMLDecl => 1 or XMLDecl => 'string' I<# out - handy> | ||||
| 2716 | |||||
| 2717 | If you want the output from C<XMLout()> to start with the optional XML | ||||
| 2718 | declaration, simply set the option to '1'. The default XML declaration is: | ||||
| 2719 | |||||
| 2720 | <?xml version='1.0' standalone='yes'?> | ||||
| 2721 | |||||
| 2722 | If you want some other string (for example to declare an encoding value), set | ||||
| 2723 | the value of this option to the complete string you require. | ||||
| 2724 | |||||
| 2725 | |||||
| 2726 | =head1 OPTIONAL OO INTERFACE | ||||
| 2727 | |||||
| 2728 | The procedural interface is both simple and convenient however there are a | ||||
| 2729 | couple of reasons why you might prefer to use the object oriented (OO) | ||||
| 2730 | interface: | ||||
| 2731 | |||||
| 2732 | =over 4 | ||||
| 2733 | |||||
| 2734 | =item * | ||||
| 2735 | |||||
| 2736 | to define a set of default values which should be used on all subsequent calls | ||||
| 2737 | to C<XMLin()> or C<XMLout()> | ||||
| 2738 | |||||
| 2739 | =item * | ||||
| 2740 | |||||
| 2741 | to override methods in B<XML::Simple> to provide customised behaviour | ||||
| 2742 | |||||
| 2743 | =back | ||||
| 2744 | |||||
| 2745 | The default values for the options described above are unlikely to suit | ||||
| 2746 | everyone. The OO interface allows you to effectively override B<XML::Simple>'s | ||||
| 2747 | defaults with your preferred values. It works like this: | ||||
| 2748 | |||||
| 2749 | First create an XML::Simple parser object with your preferred defaults: | ||||
| 2750 | |||||
| 2751 | my $xs = XML::Simple->new(ForceArray => 1, KeepRoot => 1); | ||||
| 2752 | |||||
| 2753 | then call C<XMLin()> or C<XMLout()> as a method of that object: | ||||
| 2754 | |||||
| 2755 | my $ref = $xs->XMLin($xml); | ||||
| 2756 | my $xml = $xs->XMLout($ref); | ||||
| 2757 | |||||
| 2758 | You can also specify options when you make the method calls and these values | ||||
| 2759 | will be merged with the values specified when the object was created. Values | ||||
| 2760 | specified in a method call take precedence. | ||||
| 2761 | |||||
| 2762 | Note: when called as methods, the C<XMLin()> and C<XMLout()> routines may be | ||||
| 2763 | called as C<xml_in()> or C<xml_out()>. The method names are aliased so the | ||||
| 2764 | only difference is the aesthetics. | ||||
| 2765 | |||||
| 2766 | =head2 Parsing Methods | ||||
| 2767 | |||||
| 2768 | You can explicitly call one of the following methods rather than rely on the | ||||
| 2769 | C<xml_in()> method automatically determining whether the target to be parsed is | ||||
| 2770 | a string, a file or a filehandle: | ||||
| 2771 | |||||
| 2772 | =over 4 | ||||
| 2773 | |||||
| 2774 | =item parse_string(text) | ||||
| 2775 | |||||
| 2776 | Works exactly like the C<xml_in()> method but assumes the first argument is | ||||
| 2777 | a string of XML (or a reference to a scalar containing a string of XML). | ||||
| 2778 | |||||
| 2779 | =item parse_file(filename) | ||||
| 2780 | |||||
| 2781 | Works exactly like the C<xml_in()> method but assumes the first argument is | ||||
| 2782 | the name of a file containing XML. | ||||
| 2783 | |||||
| 2784 | =item parse_fh(file_handle) | ||||
| 2785 | |||||
| 2786 | Works exactly like the C<xml_in()> method but assumes the first argument is | ||||
| 2787 | a filehandle which can be read to get XML. | ||||
| 2788 | |||||
| 2789 | =back | ||||
| 2790 | |||||
| 2791 | =head2 Hook Methods | ||||
| 2792 | |||||
| 2793 | You can make your own class which inherits from XML::Simple and overrides | ||||
| 2794 | certain behaviours. The following methods may provide useful 'hooks' upon | ||||
| 2795 | which to hang your modified behaviour. You may find other undocumented methods | ||||
| 2796 | by examining the source, but those may be subject to change in future releases. | ||||
| 2797 | |||||
| 2798 | =over 4 | ||||
| 2799 | |||||
| 2800 | =item handle_options(direction, name => value ...) | ||||
| 2801 | |||||
| 2802 | This method will be called when one of the parsing methods or the C<XMLout()> | ||||
| 2803 | method is called. The initial argument will be a string (either 'in' or 'out') | ||||
| 2804 | and the remaining arguments will be name value pairs. | ||||
| 2805 | |||||
| 2806 | =item default_config_file() | ||||
| 2807 | |||||
| 2808 | Calculates and returns the name of the file which should be parsed if no | ||||
| 2809 | filename is passed to C<XMLin()> (default: C<$0.xml>). | ||||
| 2810 | |||||
| 2811 | =item build_simple_tree(filename, string) | ||||
| 2812 | |||||
| 2813 | Called from C<XMLin()> or any of the parsing methods. Takes either a file name | ||||
| 2814 | as the first argument or C<undef> followed by a 'string' as the second | ||||
| 2815 | argument. Returns a simple tree data structure. You could override this | ||||
| 2816 | method to apply your own transformations before the data structure is returned | ||||
| 2817 | to the caller. | ||||
| 2818 | |||||
| 2819 | =item new_hashref() | ||||
| 2820 | |||||
| 2821 | When the 'simple tree' data structure is being built, this method will be | ||||
| 2822 | called to create any required anonymous hashrefs. | ||||
| 2823 | |||||
| 2824 | =item sorted_keys(name, hashref) | ||||
| 2825 | |||||
| 2826 | Called when C<XMLout()> is translating a hashref to XML. This routine returns | ||||
| 2827 | a list of hash keys in the order that the corresponding attributes/elements | ||||
| 2828 | should appear in the output. | ||||
| 2829 | |||||
| 2830 | =item escape_value(string) | ||||
| 2831 | |||||
| 2832 | Called from C<XMLout()>, takes a string and returns a copy of the string with | ||||
| 2833 | XML character escaping rules applied. | ||||
| 2834 | |||||
| 2835 | =item numeric_escape(string) | ||||
| 2836 | |||||
| 2837 | Called from C<escape_value()>, to handle non-ASCII characters (depending on the | ||||
| 2838 | value of the NumericEscape option). | ||||
| 2839 | |||||
| 2840 | =item copy_hash(hashref, extra_key => value, ...) | ||||
| 2841 | |||||
| 2842 | Called from C<XMLout()>, when 'unfolding' a hash of hashes into an array of | ||||
| 2843 | hashes. You might wish to override this method if you're using tied hashes and | ||||
| 2844 | don't want them to get untied. | ||||
| 2845 | |||||
| 2846 | =back | ||||
| 2847 | |||||
| 2848 | =head2 Cache Methods | ||||
| 2849 | |||||
| 2850 | XML::Simple implements three caching schemes ('storable', 'memshare' and | ||||
| 2851 | 'memcopy'). You can implement a custom caching scheme by implementing | ||||
| 2852 | two methods - one for reading from the cache and one for writing to it. | ||||
| 2853 | |||||
| 2854 | For example, you might implement a new 'dbm' scheme that stores cached data | ||||
| 2855 | structures using the L<MLDBM> module. First, you would add a | ||||
| 2856 | C<cache_read_dbm()> method which accepted a filename for use as a lookup key | ||||
| 2857 | and returned a data structure on success, or undef on failure. Then, you would | ||||
| 2858 | implement a C<cache_read_dbm()> method which accepted a data structure and a | ||||
| 2859 | filename. | ||||
| 2860 | |||||
| 2861 | You would use this caching scheme by specifying the option: | ||||
| 2862 | |||||
| 2863 | Cache => [ 'dbm' ] | ||||
| 2864 | |||||
| 2865 | =head1 STRICT MODE | ||||
| 2866 | |||||
| 2867 | If you import the B<XML::Simple> routines like this: | ||||
| 2868 | |||||
| 2869 | use XML::Simple qw(:strict); | ||||
| 2870 | |||||
| 2871 | the following common mistakes will be detected and treated as fatal errors | ||||
| 2872 | |||||
| 2873 | =over 4 | ||||
| 2874 | |||||
| 2875 | =item * | ||||
| 2876 | |||||
| 2877 | Failing to explicitly set the C<KeyAttr> option - if you can't be bothered | ||||
| 2878 | reading about this option, turn it off with: KeyAttr => [ ] | ||||
| 2879 | |||||
| 2880 | =item * | ||||
| 2881 | |||||
| 2882 | Failing to explicitly set the C<ForceArray> option - if you can't be bothered | ||||
| 2883 | reading about this option, set it to the safest mode with: ForceArray => 1 | ||||
| 2884 | |||||
| 2885 | =item * | ||||
| 2886 | |||||
| 2887 | Setting ForceArray to an array, but failing to list all the elements from the | ||||
| 2888 | KeyAttr hash. | ||||
| 2889 | |||||
| 2890 | =item * | ||||
| 2891 | |||||
| 2892 | Data error - KeyAttr is set to say { part => 'partnum' } but the XML contains | ||||
| 2893 | one or more E<lt>partE<gt> elements without a 'partnum' attribute (or nested | ||||
| 2894 | element). Note: if strict mode is not set but -w is, this condition triggers a | ||||
| 2895 | warning. | ||||
| 2896 | |||||
| 2897 | =item * | ||||
| 2898 | |||||
| 2899 | Data error - as above, but non-unique values are present in the key attribute | ||||
| 2900 | (eg: more than one E<lt>partE<gt> element with the same partnum). This will | ||||
| 2901 | also trigger a warning if strict mode is not enabled. | ||||
| 2902 | |||||
| 2903 | =item * | ||||
| 2904 | |||||
| 2905 | Data error - as above, but value of key attribute (eg: partnum) is not a | ||||
| 2906 | scalar string (due to nested elements etc). This will also trigger a warning | ||||
| 2907 | if strict mode is not enabled. | ||||
| 2908 | |||||
| 2909 | =back | ||||
| 2910 | |||||
| 2911 | =head1 SAX SUPPORT | ||||
| 2912 | |||||
| 2913 | From version 1.08_01, B<XML::Simple> includes support for SAX (the Simple API | ||||
| 2914 | for XML) - specifically SAX2. | ||||
| 2915 | |||||
| 2916 | In a typical SAX application, an XML parser (or SAX 'driver') module generates | ||||
| 2917 | SAX events (start of element, character data, end of element, etc) as it parses | ||||
| 2918 | an XML document and a 'handler' module processes the events to extract the | ||||
| 2919 | required data. This simple model allows for some interesting and powerful | ||||
| 2920 | possibilities: | ||||
| 2921 | |||||
| 2922 | =over 4 | ||||
| 2923 | |||||
| 2924 | =item * | ||||
| 2925 | |||||
| 2926 | Applications written to the SAX API can extract data from huge XML documents | ||||
| 2927 | without the memory overheads of a DOM or tree API. | ||||
| 2928 | |||||
| 2929 | =item * | ||||
| 2930 | |||||
| 2931 | The SAX API allows for plug and play interchange of parser modules without | ||||
| 2932 | having to change your code to fit a new module's API. A number of SAX parsers | ||||
| 2933 | are available with capabilities ranging from extreme portability to blazing | ||||
| 2934 | performance. | ||||
| 2935 | |||||
| 2936 | =item * | ||||
| 2937 | |||||
| 2938 | A SAX 'filter' module can implement both a handler interface for receiving | ||||
| 2939 | data and a generator interface for passing modified data on to a downstream | ||||
| 2940 | handler. Filters can be chained together in 'pipelines'. | ||||
| 2941 | |||||
| 2942 | =item * | ||||
| 2943 | |||||
| 2944 | One filter module might split a data stream to direct data to two or more | ||||
| 2945 | downstream handlers. | ||||
| 2946 | |||||
| 2947 | =item * | ||||
| 2948 | |||||
| 2949 | Generating SAX events is not the exclusive preserve of XML parsing modules. | ||||
| 2950 | For example, a module might extract data from a relational database using DBI | ||||
| 2951 | and pass it on to a SAX pipeline for filtering and formatting. | ||||
| 2952 | |||||
| 2953 | =back | ||||
| 2954 | |||||
| 2955 | B<XML::Simple> can operate at either end of a SAX pipeline. For example, | ||||
| 2956 | you can take a data structure in the form of a hashref and pass it into a | ||||
| 2957 | SAX pipeline using the 'Handler' option on C<XMLout()>: | ||||
| 2958 | |||||
| 2959 | use XML::Simple; | ||||
| 2960 | use Some::SAX::Filter; | ||||
| 2961 | use XML::SAX::Writer; | ||||
| 2962 | |||||
| 2963 | my $ref = { | ||||
| 2964 | .... # your data here | ||||
| 2965 | }; | ||||
| 2966 | |||||
| 2967 | my $writer = XML::SAX::Writer->new(); | ||||
| 2968 | my $filter = Some::SAX::Filter->new(Handler => $writer); | ||||
| 2969 | my $simple = XML::Simple->new(Handler => $filter); | ||||
| 2970 | $simple->XMLout($ref); | ||||
| 2971 | |||||
| 2972 | You can also put B<XML::Simple> at the opposite end of the pipeline to take | ||||
| 2973 | advantage of the simple 'tree' data structure once the relevant data has been | ||||
| 2974 | isolated through filtering: | ||||
| 2975 | |||||
| 2976 | use XML::SAX; | ||||
| 2977 | use Some::SAX::Filter; | ||||
| 2978 | use XML::Simple; | ||||
| 2979 | |||||
| 2980 | my $simple = XML::Simple->new(ForceArray => 1, KeyAttr => ['partnum']); | ||||
| 2981 | my $filter = Some::SAX::Filter->new(Handler => $simple); | ||||
| 2982 | my $parser = XML::SAX::ParserFactory->parser(Handler => $filter); | ||||
| 2983 | |||||
| 2984 | my $ref = $parser->parse_uri('some_huge_file.xml'); | ||||
| 2985 | |||||
| 2986 | print $ref->{part}->{'555-1234'}; | ||||
| 2987 | |||||
| 2988 | You can build a filter by using an XML::Simple object as a handler and setting | ||||
| 2989 | its DataHandler option to point to a routine which takes the resulting tree, | ||||
| 2990 | modifies it and sends it off as SAX events to a downstream handler: | ||||
| 2991 | |||||
| 2992 | my $writer = XML::SAX::Writer->new(); | ||||
| 2993 | my $filter = XML::Simple->new( | ||||
| 2994 | DataHandler => sub { | ||||
| 2995 | my $simple = shift; | ||||
| 2996 | my $data = shift; | ||||
| 2997 | |||||
| 2998 | # Modify $data here | ||||
| 2999 | |||||
| 3000 | $simple->XMLout($data, Handler => $writer); | ||||
| 3001 | } | ||||
| 3002 | ); | ||||
| 3003 | my $parser = XML::SAX::ParserFactory->parser(Handler => $filter); | ||||
| 3004 | |||||
| 3005 | $parser->parse_uri($filename); | ||||
| 3006 | |||||
| 3007 | I<Note: In this last example, the 'Handler' option was specified in the call to | ||||
| 3008 | C<XMLout()> but it could also have been specified in the constructor>. | ||||
| 3009 | |||||
| 3010 | =head1 ENVIRONMENT | ||||
| 3011 | |||||
| 3012 | If you don't care which parser module B<XML::Simple> uses then skip this | ||||
| 3013 | section entirely (it looks more complicated than it really is). | ||||
| 3014 | |||||
| 3015 | B<XML::Simple> will default to using a B<SAX> parser if one is available or | ||||
| 3016 | B<XML::Parser> if SAX is not available. | ||||
| 3017 | |||||
| 3018 | You can dictate which parser module is used by setting either the environment | ||||
| 3019 | variable 'XML_SIMPLE_PREFERRED_PARSER' or the package variable | ||||
| 3020 | $XML::Simple::PREFERRED_PARSER to contain the module name. The following rules | ||||
| 3021 | are used: | ||||
| 3022 | |||||
| 3023 | =over 4 | ||||
| 3024 | |||||
| 3025 | =item * | ||||
| 3026 | |||||
| 3027 | The package variable takes precedence over the environment variable if both are defined. To force B<XML::Simple> to ignore the environment settings and use | ||||
| 3028 | its default rules, you can set the package variable to an empty string. | ||||
| 3029 | |||||
| 3030 | =item * | ||||
| 3031 | |||||
| 3032 | If the 'preferred parser' is set to the string 'XML::Parser', then | ||||
| 3033 | L<XML::Parser> will be used (or C<XMLin()> will die if L<XML::Parser> is not | ||||
| 3034 | installed). | ||||
| 3035 | |||||
| 3036 | =item * | ||||
| 3037 | |||||
| 3038 | If the 'preferred parser' is set to some other value, then it is assumed to be | ||||
| 3039 | the name of a SAX parser module and is passed to L<XML::SAX::ParserFactory.> | ||||
| 3040 | If L<XML::SAX> is not installed, or the requested parser module is not | ||||
| 3041 | installed, then C<XMLin()> will die. | ||||
| 3042 | |||||
| 3043 | =item * | ||||
| 3044 | |||||
| 3045 | If the 'preferred parser' is not defined at all (the normal default | ||||
| 3046 | state), an attempt will be made to load L<XML::SAX>. If L<XML::SAX> is | ||||
| 3047 | installed, then a parser module will be selected according to | ||||
| 3048 | L<XML::SAX::ParserFactory>'s normal rules (which typically means the last SAX | ||||
| 3049 | parser installed). | ||||
| 3050 | |||||
| 3051 | =item * | ||||
| 3052 | |||||
| 3053 | if the 'preferred parser' is not defined and B<XML::SAX> is not | ||||
| 3054 | installed, then B<XML::Parser> will be used. C<XMLin()> will die if | ||||
| 3055 | L<XML::Parser> is not installed. | ||||
| 3056 | |||||
| 3057 | =back | ||||
| 3058 | |||||
| 3059 | Note: The B<XML::SAX> distribution includes an XML parser written entirely in | ||||
| 3060 | Perl. It is very portable but it is not very fast. You should consider | ||||
| 3061 | installing L<XML::LibXML> or L<XML::SAX::Expat> if they are available for your | ||||
| 3062 | platform. | ||||
| 3063 | |||||
| 3064 | =head1 ERROR HANDLING | ||||
| 3065 | |||||
| 3066 | The XML standard is very clear on the issue of non-compliant documents. An | ||||
| 3067 | error in parsing any single element (for example a missing end tag) must cause | ||||
| 3068 | the whole document to be rejected. B<XML::Simple> will die with an appropriate | ||||
| 3069 | message if it encounters a parsing error. | ||||
| 3070 | |||||
| 3071 | If dying is not appropriate for your application, you should arrange to call | ||||
| 3072 | C<XMLin()> in an eval block and look for errors in $@. eg: | ||||
| 3073 | |||||
| 3074 | my $config = eval { XMLin() }; | ||||
| 3075 | PopUpMessage($@) if($@); | ||||
| 3076 | |||||
| 3077 | Note, there is a common misconception that use of B<eval> will significantly | ||||
| 3078 | slow down a script. While that may be true when the code being eval'd is in a | ||||
| 3079 | string, it is not true of code like the sample above. | ||||
| 3080 | |||||
| 3081 | =head1 EXAMPLES | ||||
| 3082 | |||||
| 3083 | When C<XMLin()> reads the following very simple piece of XML: | ||||
| 3084 | |||||
| 3085 | <opt username="testuser" password="frodo"></opt> | ||||
| 3086 | |||||
| 3087 | it returns the following data structure: | ||||
| 3088 | |||||
| 3089 | { | ||||
| 3090 | 'username' => 'testuser', | ||||
| 3091 | 'password' => 'frodo' | ||||
| 3092 | } | ||||
| 3093 | |||||
| 3094 | The identical result could have been produced with this alternative XML: | ||||
| 3095 | |||||
| 3096 | <opt username="testuser" password="frodo" /> | ||||
| 3097 | |||||
| 3098 | Or this (although see 'ForceArray' option for variations): | ||||
| 3099 | |||||
| 3100 | <opt> | ||||
| 3101 | <username>testuser</username> | ||||
| 3102 | <password>frodo</password> | ||||
| 3103 | </opt> | ||||
| 3104 | |||||
| 3105 | Repeated nested elements are represented as anonymous arrays: | ||||
| 3106 | |||||
| 3107 | <opt> | ||||
| 3108 | <person firstname="Joe" lastname="Smith"> | ||||
| 3109 | <email>joe@smith.com</email> | ||||
| 3110 | <email>jsmith@yahoo.com</email> | ||||
| 3111 | </person> | ||||
| 3112 | <person firstname="Bob" lastname="Smith"> | ||||
| 3113 | <email>bob@smith.com</email> | ||||
| 3114 | </person> | ||||
| 3115 | </opt> | ||||
| 3116 | |||||
| 3117 | { | ||||
| 3118 | 'person' => [ | ||||
| 3119 | { | ||||
| 3120 | 'email' => [ | ||||
| 3121 | 'joe@smith.com', | ||||
| 3122 | 'jsmith@yahoo.com' | ||||
| 3123 | ], | ||||
| 3124 | 'firstname' => 'Joe', | ||||
| 3125 | 'lastname' => 'Smith' | ||||
| 3126 | }, | ||||
| 3127 | { | ||||
| 3128 | 'email' => 'bob@smith.com', | ||||
| 3129 | 'firstname' => 'Bob', | ||||
| 3130 | 'lastname' => 'Smith' | ||||
| 3131 | } | ||||
| 3132 | ] | ||||
| 3133 | } | ||||
| 3134 | |||||
| 3135 | Nested elements with a recognised key attribute are transformed (folded) from | ||||
| 3136 | an array into a hash keyed on the value of that attribute (see the C<KeyAttr> | ||||
| 3137 | option): | ||||
| 3138 | |||||
| 3139 | <opt> | ||||
| 3140 | <person key="jsmith" firstname="Joe" lastname="Smith" /> | ||||
| 3141 | <person key="tsmith" firstname="Tom" lastname="Smith" /> | ||||
| 3142 | <person key="jbloggs" firstname="Joe" lastname="Bloggs" /> | ||||
| 3143 | </opt> | ||||
| 3144 | |||||
| 3145 | { | ||||
| 3146 | 'person' => { | ||||
| 3147 | 'jbloggs' => { | ||||
| 3148 | 'firstname' => 'Joe', | ||||
| 3149 | 'lastname' => 'Bloggs' | ||||
| 3150 | }, | ||||
| 3151 | 'tsmith' => { | ||||
| 3152 | 'firstname' => 'Tom', | ||||
| 3153 | 'lastname' => 'Smith' | ||||
| 3154 | }, | ||||
| 3155 | 'jsmith' => { | ||||
| 3156 | 'firstname' => 'Joe', | ||||
| 3157 | 'lastname' => 'Smith' | ||||
| 3158 | } | ||||
| 3159 | } | ||||
| 3160 | } | ||||
| 3161 | |||||
| 3162 | |||||
| 3163 | The <anon> tag can be used to form anonymous arrays: | ||||
| 3164 | |||||
| 3165 | <opt> | ||||
| 3166 | <head><anon>Col 1</anon><anon>Col 2</anon><anon>Col 3</anon></head> | ||||
| 3167 | <data><anon>R1C1</anon><anon>R1C2</anon><anon>R1C3</anon></data> | ||||
| 3168 | <data><anon>R2C1</anon><anon>R2C2</anon><anon>R2C3</anon></data> | ||||
| 3169 | <data><anon>R3C1</anon><anon>R3C2</anon><anon>R3C3</anon></data> | ||||
| 3170 | </opt> | ||||
| 3171 | |||||
| 3172 | { | ||||
| 3173 | 'head' => [ | ||||
| 3174 | [ 'Col 1', 'Col 2', 'Col 3' ] | ||||
| 3175 | ], | ||||
| 3176 | 'data' => [ | ||||
| 3177 | [ 'R1C1', 'R1C2', 'R1C3' ], | ||||
| 3178 | [ 'R2C1', 'R2C2', 'R2C3' ], | ||||
| 3179 | [ 'R3C1', 'R3C2', 'R3C3' ] | ||||
| 3180 | ] | ||||
| 3181 | } | ||||
| 3182 | |||||
| 3183 | Anonymous arrays can be nested to arbirtrary levels and as a special case, if | ||||
| 3184 | the surrounding tags for an XML document contain only an anonymous array the | ||||
| 3185 | arrayref will be returned directly rather than the usual hashref: | ||||
| 3186 | |||||
| 3187 | <opt> | ||||
| 3188 | <anon><anon>Col 1</anon><anon>Col 2</anon></anon> | ||||
| 3189 | <anon><anon>R1C1</anon><anon>R1C2</anon></anon> | ||||
| 3190 | <anon><anon>R2C1</anon><anon>R2C2</anon></anon> | ||||
| 3191 | </opt> | ||||
| 3192 | |||||
| 3193 | [ | ||||
| 3194 | [ 'Col 1', 'Col 2' ], | ||||
| 3195 | [ 'R1C1', 'R1C2' ], | ||||
| 3196 | [ 'R2C1', 'R2C2' ] | ||||
| 3197 | ] | ||||
| 3198 | |||||
| 3199 | Elements which only contain text content will simply be represented as a | ||||
| 3200 | scalar. Where an element has both attributes and text content, the element | ||||
| 3201 | will be represented as a hashref with the text content in the 'content' key | ||||
| 3202 | (see the C<ContentKey> option): | ||||
| 3203 | |||||
| 3204 | <opt> | ||||
| 3205 | <one>first</one> | ||||
| 3206 | <two attr="value">second</two> | ||||
| 3207 | </opt> | ||||
| 3208 | |||||
| 3209 | { | ||||
| 3210 | 'one' => 'first', | ||||
| 3211 | 'two' => { 'attr' => 'value', 'content' => 'second' } | ||||
| 3212 | } | ||||
| 3213 | |||||
| 3214 | Mixed content (elements which contain both text content and nested elements) | ||||
| 3215 | will be not be represented in a useful way - element order and significant | ||||
| 3216 | whitespace will be lost. If you need to work with mixed content, then | ||||
| 3217 | XML::Simple is not the right tool for your job - check out the next section. | ||||
| 3218 | |||||
| 3219 | =head1 WHERE TO FROM HERE? | ||||
| 3220 | |||||
| 3221 | B<XML::Simple> is able to present a simple API because it makes some | ||||
| 3222 | assumptions on your behalf. These include: | ||||
| 3223 | |||||
| 3224 | =over 4 | ||||
| 3225 | |||||
| 3226 | =item * | ||||
| 3227 | |||||
| 3228 | You're not interested in text content consisting only of whitespace | ||||
| 3229 | |||||
| 3230 | =item * | ||||
| 3231 | |||||
| 3232 | You don't mind that when things get slurped into a hash the order is lost | ||||
| 3233 | |||||
| 3234 | =item * | ||||
| 3235 | |||||
| 3236 | You don't want fine-grained control of the formatting of generated XML | ||||
| 3237 | |||||
| 3238 | =item * | ||||
| 3239 | |||||
| 3240 | You would never use a hash key that was not a legal XML element name | ||||
| 3241 | |||||
| 3242 | =item * | ||||
| 3243 | |||||
| 3244 | You don't need help converting between different encodings | ||||
| 3245 | |||||
| 3246 | =back | ||||
| 3247 | |||||
| 3248 | In a serious XML project, you'll probably outgrow these assumptions fairly | ||||
| 3249 | quickly. This section of the document used to offer some advice on chosing a | ||||
| 3250 | more powerful option. That advice has now grown into the 'Perl-XML FAQ' | ||||
| 3251 | document which you can find at: L<http://perl-xml.sourceforge.net/faq/> | ||||
| 3252 | |||||
| 3253 | The advice in the FAQ boils down to a quick explanation of tree versus | ||||
| 3254 | event based parsers and then recommends: | ||||
| 3255 | |||||
| 3256 | For event based parsing, use SAX (do not set out to write any new code for | ||||
| 3257 | XML::Parser's handler API - it is obselete). | ||||
| 3258 | |||||
| 3259 | For tree-based parsing, you could choose between the 'Perlish' approach of | ||||
| 3260 | L<XML::Twig> and more standards based DOM implementations - preferably one with | ||||
| 3261 | XPath support. | ||||
| 3262 | |||||
| 3263 | |||||
| 3264 | =head1 SEE ALSO | ||||
| 3265 | |||||
| 3266 | B<XML::Simple> requires either L<XML::Parser> or L<XML::SAX>. | ||||
| 3267 | |||||
| 3268 | To generate documents with namespaces, L<XML::NamespaceSupport> is required. | ||||
| 3269 | |||||
| 3270 | The optional caching functions require L<Storable>. | ||||
| 3271 | |||||
| 3272 | Answers to Frequently Asked Questions about XML::Simple are bundled with this | ||||
| 3273 | distribution as: L<XML::Simple::FAQ> | ||||
| 3274 | |||||
| 3275 | =head1 COPYRIGHT | ||||
| 3276 | |||||
| 3277 | Copyright 1999-2004 Grant McLean E<lt>grantm@cpan.orgE<gt> | ||||
| 3278 | |||||
| 3279 | This library is free software; you can redistribute it and/or modify it | ||||
| 3280 | under the same terms as Perl itself. | ||||
| 3281 | |||||
| 3282 | =cut | ||||
| 3283 | |||||
| 3284 | |||||
# spent 6.32ms (99µs+6.22) within XML::Simple::CORE:match which was called 33 times, avg 191µs/call:
# 28 times (85µs+6.22ms) by XML::Simple::collapse at line 1023 of XML/Simple.pm, avg 225µs/call
# 3 times (12µs+0s) by XML::Simple::XMLin at line 171 of XML/Simple.pm, avg 4µs/call
# once (1µs+0s) by XML::Simple::import at line 95 of XML/Simple.pm
# once (300ns+0s) by XML::Simple::import at line 99 of XML/Simple.pm | |||||
# spent 3µs within XML::Simple::CORE:subst which was called 3 times, avg 1µs/call:
# 3 times (3µs+0s) by XML::Simple::handle_options at line 701 of XML/Simple.pm, avg 1µs/call |