package Mojo::DOM; use Mojo::Base -strict; use overload '@{}' => sub { shift->child_nodes }, '%{}' => sub { shift->attr }, bool => sub {1}, '""' => sub { shift->to_string }, fallback => 1; # "Fry: This snow is beautiful. I'm glad global warming never happened. # Leela: Actually, it did. But thank God nuclear winter canceled it out." use Carp 'croak'; use Mojo::Collection; use Mojo::DOM::CSS; use Mojo::DOM::HTML; use Scalar::Util 'weaken'; sub all_text { _text(_nodes(shift->tree), 1) } sub ancestors { _select($_[0]->_collect([$_[0]->_ancestors]), $_[1]) } sub append { shift->_add(1, @_) } sub append_content { shift->_content(1, 0, @_) } sub at { my $self = shift; return undef unless my $result = $self->_css->select_one(@_); return $self->_build($result, $self->xml); } sub attr { my $self = shift; # Hash my $tree = $self->tree; my $attrs = $tree->[0] ne 'tag' ? {} : $tree->[2]; return $attrs unless @_; # Get return $attrs->{$_[0]} unless @_ > 1 || ref $_[0]; # Set my $values = ref $_[0] ? $_[0] : {@_}; @$attrs{keys %$values} = values %$values; return $self; } sub child_nodes { $_[0]->_collect(_nodes($_[0]->tree)) } sub children { _select($_[0]->_collect(_nodes($_[0]->tree, 1)), $_[1]) } sub content { my $self = shift; my $type = $self->type; if ($type eq 'root' || $type eq 'tag') { return $self->_content(0, 1, @_) if @_; my $html = Mojo::DOM::HTML->new(xml => $self->xml); return join '', map { $html->tree($_)->render } @{_nodes($self->tree)}; } return $self->tree->[1] unless @_; $self->tree->[1] = shift; return $self; } sub descendant_nodes { $_[0]->_collect(_all(_nodes($_[0]->tree))) } sub find { $_[0]->_collect($_[0]->_css->select($_[1])) } sub following { _select($_[0]->_collect($_[0]->_siblings(1, 1)), $_[1]) } sub following_nodes { $_[0]->_collect($_[0]->_siblings(0, 1)) } sub matches { shift->_css->matches(@_) } sub namespace { my $self = shift; return undef if (my $tree = $self->tree)->[0] ne 'tag'; # Extract namespace prefix and search parents my $ns = $tree->[1] =~ /^(.*?):/ ? "xmlns:$1" : undef; for my $node ($tree, $self->_ancestors) { # Namespace for prefix my $attrs = $node->[2]; if ($ns) { $_ eq $ns and return $attrs->{$_} for keys %$attrs } # Namespace attribute elsif (defined $attrs->{xmlns}) { return $attrs->{xmlns} } } return undef; } sub new { my $class = shift; my $self = bless \Mojo::DOM::HTML->new, ref $class || $class; return @_ ? $self->parse(@_) : $self; } sub next { $_[0]->_maybe($_[0]->_siblings(1, 1, 0)) } sub next_node { $_[0]->_maybe($_[0]->_siblings(0, 1, 0)) } sub parent { my $self = shift; return undef if (my $tree = $self->tree)->[0] eq 'root'; return $self->_build(_parent($tree), $self->xml); } sub parse { shift->_delegate(parse => @_) } sub preceding { _select($_[0]->_collect($_[0]->_siblings(1, 0)), $_[1]) } sub preceding_nodes { $_[0]->_collect($_[0]->_siblings(0)) } sub prepend { shift->_add(0, @_) } sub prepend_content { shift->_content(0, 0, @_) } sub previous { $_[0]->_maybe($_[0]->_siblings(1, 0, -1)) } sub previous_node { $_[0]->_maybe($_[0]->_siblings(0, 0, -1)) } sub remove { shift->replace('') } sub replace { my ($self, $new) = @_; return $self->parse($new) if (my $tree = $self->tree)->[0] eq 'root'; return $self->_replace(_parent($tree), $tree, _nodes($self->_parse($new))); } sub root { my $self = shift; return $self unless my $tree = $self->_ancestors(1); return $self->_build($tree, $self->xml); } sub strip { my $self = shift; return $self if (my $tree = $self->tree)->[0] ne 'tag'; return $self->_replace($tree->[3], $tree, _nodes($tree)); } sub tag { my ($self, $tag) = @_; return undef if (my $tree = $self->tree)->[0] ne 'tag'; return $tree->[1] unless $tag; $tree->[1] = $tag; return $self; } sub tap { shift->Mojo::Base::tap(@_) } sub text { _text(_nodes(shift->tree), 0) } sub to_string { shift->_delegate('render') } sub tree { shift->_delegate(tree => @_) } sub type { shift->tree->[0] } sub val { my $self = shift; # "option" return $self->{value} // $self->text if (my $tag = $self->tag) eq 'option'; # "input" ("type=checkbox" and "type=radio") my $type = $self->{type} // ''; return $self->{value} // 'on' if $tag eq 'input' && ($type eq 'radio' || $type eq 'checkbox'); # "textarea", "input" or "button" return $tag eq 'textarea' ? $self->text : $self->{value} if $tag ne 'select'; # "select" my $v = $self->find('option:checked:not([disabled])') ->grep(sub { !$_->ancestors('optgroup[disabled]')->size })->map('val'); return exists $self->{multiple} ? $v->size ? $v->to_array : undef : $v->last; } sub with_roles { shift->Mojo::Base::with_roles(@_) } sub wrap { shift->_wrap(0, @_) } sub wrap_content { shift->_wrap(1, @_) } sub xml { shift->_delegate(xml => @_) } sub _add { my ($self, $offset, $new) = @_; return $self if (my $tree = $self->tree)->[0] eq 'root'; my $parent = _parent($tree); splice @$parent, _offset($parent, $tree) + $offset, 0, @{_link($parent, _nodes($self->_parse($new)))}; return $self; } sub _all { my $nodes = shift; @$nodes = map { $_->[0] eq 'tag' ? ($_, @{_all(_nodes($_))}) : ($_) } @$nodes; return $nodes; } sub _ancestors { my ($self, $root) = @_; return () unless my $tree = _parent($self->tree); my @ancestors; do { push @ancestors, $tree } while ($tree->[0] eq 'tag') && ($tree = $tree->[3]); return $root ? $ancestors[-1] : @ancestors[0 .. $#ancestors - 1]; } sub _build { shift->new->tree(shift)->xml(shift) } sub _collect { my ($self, $nodes) = (shift, shift // []); my $xml = $self->xml; return Mojo::Collection->new(map { $self->_build($_, $xml) } @$nodes); } sub _content { my ($self, $start, $offset, $new) = @_; my $tree = $self->tree; unless ($tree->[0] eq 'root' || $tree->[0] eq 'tag') { my $old = $self->content; return $self->content($start ? $old . $new : $new . $old); } $start = $start ? ($#$tree + 1) : _start($tree); $offset = $offset ? $#$tree : 0; splice @$tree, $start, $offset, @{_link($tree, _nodes($self->_parse($new)))}; return $self; } sub _css { Mojo::DOM::CSS->new(tree => shift->tree) } sub _delegate { my ($self, $method) = (shift, shift); return $$self->$method unless @_; $$self->$method(@_); return $self; } sub _link { my ($parent, $children) = @_; # Link parent to children for my $node (@$children) { my $offset = $node->[0] eq 'tag' ? 3 : 2; $node->[$offset] = $parent; weaken $node->[$offset]; } return $children; } sub _maybe { $_[1] ? $_[0]->_build($_[1], $_[0]->xml) : undef } sub _nodes { return () unless my $tree = shift; my @nodes = @$tree[_start($tree) .. $#$tree]; return shift() ? [grep { $_->[0] eq 'tag' } @nodes] : \@nodes; } sub _offset { my ($parent, $child) = @_; my $i = _start($parent); $_ eq $child ? last : $i++ for @$parent[$i .. $#$parent]; return $i; } sub _parent { $_[0]->[$_[0][0] eq 'tag' ? 3 : 2] } sub _parse { Mojo::DOM::HTML->new(xml => shift->xml)->parse(shift)->tree } sub _replace { my ($self, $parent, $child, $nodes) = @_; splice @$parent, _offset($parent, $child), 1, @{_link($parent, $nodes)}; return $self->parent; } sub _select { $_[1] ? $_[0]->grep(matches => $_[1]) : $_[0] } sub _siblings { my ($self, $tags, $tail, $i) = @_; return defined $i ? undef : [] if (my $tree = $self->tree)->[0] eq 'root'; my $nodes = _nodes(_parent($tree)); my $match = -1; defined($match++) and $_ eq $tree and last for @$nodes; if ($tail) { splice @$nodes, 0, $match + 1 } else { splice @$nodes, $match, ($#$nodes + 1) - $match } @$nodes = grep { $_->[0] eq 'tag' } @$nodes if $tags; return defined $i ? $i == -1 && !@$nodes ? undef : $nodes->[$i] : $nodes; } sub _start { $_[0][0] eq 'root' ? 1 : 4 } sub _text { my ($nodes, $all) = @_; my $text = ''; while (my $node = shift @$nodes) { my $type = $node->[0]; # Text if ($type eq 'text' || $type eq 'cdata' || $type eq 'raw') { $text .= $node->[1]; } # Nested tag elsif ($type eq 'tag' && $all) { unshift @$nodes, @{_nodes($node)} } } return $text; } sub _wrap { my ($self, $content, $new) = @_; return $self if (my $tree = $self->tree)->[0] eq 'root' && !$content; return $self if $tree->[0] ne 'root' && $tree->[0] ne 'tag' && $content; # Find innermost tag my $current; my $first = $new = $self->_parse($new); $current = $first while $first = _nodes($first, 1)->[0]; return $self unless $current; # Wrap content if ($content) { push @$current, @{_link($current, _nodes($tree))}; splice @$tree, _start($tree), $#$tree, @{_link($tree, _nodes($new))}; return $self; } # Wrap element $self->_replace(_parent($tree), $tree, _nodes($new)); push @$current, @{_link($current, [$tree])}; return $self; } 1; =encoding utf8 =head1 NAME Mojo::DOM - Minimalistic HTML/XML DOM parser with CSS selectors =head1 SYNOPSIS use Mojo::DOM; # Parse my $dom = Mojo::DOM->new('
Test
123
456
'); $dom->find(':not(p)')->map('strip'); # Render say "$dom"; =head1 DESCRIPTION LHi!
'); say $dom->at('p[id]')->text; If an XML declaration is found, the parser will automatically switch into XML mode and everything becomes case-sensitive. # XML semantics my $dom = Mojo::DOM->new('Hi!
'); say $dom->at('P[ID]')->text; HTML or XML semantics can also be forced with the L"xml"> method. # Force HTML semantics my $dom = Mojo::DOM->new->xml(0)->parse('Hi!
'); say $dom->at('p[id]')->text; # Force XML semantics my $dom = Mojo::DOM->new->xml(1)->parse('Hi!
'); say $dom->at('P[ID]')->text; =head1 METHODS Lbar
baz\nI ♥ Mojolicious!
'); Append HTML/XML fragment to this node (for all node types other than CTest 123
" $dom->parse('Test
')->at('p') ->child_nodes->first->append(' 123')->root; =head2 append_content $dom = $dom->append_content('I ♥ Mojolicious!
'); Append HTML/XML fragment (for CTest123
" $dom->parse('Test
')->at('p')->append_content('123')->root; =head2 at my $result = $dom->at('div ~ p'); Find first descendant element of this element matching the CSS selector and return it as a L123
" $dom->parse('Test123
')->at('p')->child_nodes->first->remove; # "" $dom->parse('123')->child_nodes->first; # " Test " $dom->parse('123')->child_nodes->last->content; =head2 children my $collection = $dom->children; my $collection = $dom->children('div ~ p'); Find all child elements of this element matching the CSS selector and return a LI ♥ Mojolicious!
'); Return this node's content or replace it with HTML/XML fragment (for C123
" $dom->parse('Test
')->at('p')->content('123')->root; # "123
" $dom->parse('123
') ->descendant_nodes->grep(sub { $_->type eq 'comment' }) ->map('remove')->first; # "testtest
" $dom->parse('123456
') ->at('p')->descendant_nodes->grep(sub { $_->type eq 'text' }) ->map(content => 'test')->first->root; =head2 find my $collection = $dom->find('div ~ p'); Find all descendant elements of this element matching the CSS selector and return a LA
C')->at('p')->following_nodes->last->content; =head2 matches my $bool = $dom->matches('div ~ p'); Check if this element matches the CSS selector. All selectors from LA
')->at('p')->matches('.a'); $dom->parse('A
')->at('p')->matches('p[class]'); # False $dom->parse('A
')->at('p')->matches('.b'); $dom->parse('A
')->at('p')->matches('p[id]'); =head2 namespace my $namespace = $dom->namespace; Find this element's namespace, or return C123456
') ->at('b')->next_node->next_node; # " Test " $dom->parse('123456
') ->at('b')->next_node->content; =head2 parent my $parent = $dom->parent; Return LTest
')->at('i')->parent; =head2 parse $dom = $dom->parse('C
')->at('p')->preceding_nodes->first->content; =head2 prepend $dom = $dom->prepend('I ♥ Mojolicious!
'); Prepend HTML/XML fragment to this node (for all node types other than CTest 123
" $dom->parse('123
') ->at('p')->child_nodes->first->prepend('Test ')->root; =head2 prepend_content $dom = $dom->prepend_content('I ♥ Mojolicious!
'); Prepend HTML/XML fragment (for C123Test
" $dom->parse('Test
')->at('p')->prepend_content('123')->root; =head2 previous my $sibling = $dom->previous; Return L123456
') ->at('b')->previous_node->previous_node; # " Test " $dom->parse('123456
') ->at('b')->previous_node->content; =head2 remove my $parent = $dom->remove; Remove this node and return L"root"> (for C456
" $dom->parse('123456
') ->at('p')->child_nodes->first->remove->root; =head2 replace my $parent = $dom->replace('123
" $dom->parse('Test
') ->at('p')->child_nodes->[0]->replace('123')->root; =head2 root my $root = $dom->root; Return Lbar
bazbar
baz\nTest
')->type; # "tag" $dom->parse('Test
')->at('p')->type; # "text" $dom->parse('Test
')->at('p')->child_nodes->first->type; =head2 val my $value = $dom->val; Extract value from form element (such as C