package PPI::Token::Unknown; =pod =head1 NAME PPI::Token::Unknown - Token of unknown or as-yet undetermined type =head1 INHERITANCE PPI::Token::Unknown isa PPI::Token isa PPI::Element =head1 DESCRIPTION Object of the type C exist primarily inside the tokenizer, where they are temporarily brought into existing for a very short time to represent a token that could be one of a number of types. Generally, they only exist for a character or two, after which they are resolved and converted into the correct type. For an object of this type to survive the parsing process is considered a major bug. Please report any C you encounter in a L object as a bug. =cut use strict; use PPI::Token (); use PPI::Exception (); use vars qw{$VERSION @ISA $CURLY_SYMBOL}; BEGIN { $VERSION = '1.236'; @ISA = 'PPI::Token'; $CURLY_SYMBOL = qr{\G\^[[:upper:]_]\w+\}}; } ##################################################################### # Tokenizer Methods sub __TOKENIZER__on_char { my ( $self, $t ) = @_; # Self and Tokenizer my $c = $t->{token}->{content}; # Current token my $char = substr( $t->{line}, $t->{line_cursor}, 1 ); # Current character # Now, we split on the different values of the current content if ( $c eq '*' ) { # Is it a number? if ( $char =~ /\d/ ) { # bitwise operator $t->{class} = $t->{token}->set_class( 'Operator' ); return $t->_finalize_token->__TOKENIZER__on_char( $t ); } if ( $char =~ /[\w:]/ ) { # Symbol (unless the thing before it is a number my ( $prev ) = $t->_previous_significant_tokens(1); if ( not $prev or not $prev->isa('PPI::Token::Number') ) { $t->{class} = $t->{token}->set_class( 'Symbol' ); return 1; } } if ( $char eq '{' ) { # Get rest of line pos $t->{line} = $t->{line_cursor} + 1; if ( $t->{line} =~ m/$CURLY_SYMBOL/gc ) { # control-character symbol (e.g. *{^_Foo}) $t->{class} = $t->{token}->set_class( 'Magic' ); return 1; } } if ( $char eq '*' || $char eq '=' ) { # Power operator '**' or mult-assign '*=' $t->{class} = $t->{token}->set_class( 'Operator' ); return 1; } return $self->_as_cast_or_op($t) if $self->_is_cast_or_op($char); $t->{class} = $t->{token}->set_class( 'Operator' ); return $t->_finalize_token->__TOKENIZER__on_char( $t ); } elsif ( $c eq '$' ) { if ( $char =~ /[a-z_]/i ) { # Symbol $t->{class} = $t->{token}->set_class( 'Symbol' ); return 1; } if ( $PPI::Token::Magic::magic{ $c . $char } ) { # Magic variable $t->{class} = $t->{token}->set_class( 'Magic' ); return 1; } if ( $char eq '{' ) { # Get rest of line pos $t->{line} = $t->{line_cursor} + 1; if ( $t->{line} =~ m/$CURLY_SYMBOL/gc ) { # control-character symbol (e.g. ${^MATCH}) $t->{class} = $t->{token}->set_class( 'Magic' ); return 1; } } # Must be a cast $t->{class} = $t->{token}->set_class( 'Cast' ); return $t->_finalize_token->__TOKENIZER__on_char( $t ); } elsif ( $c eq '@' ) { if ( $char =~ /[\w:]/ ) { # Symbol $t->{class} = $t->{token}->set_class( 'Symbol' ); return 1; } if ( $PPI::Token::Magic::magic{ $c . $char } ) { # Magic variable $t->{class} = $t->{token}->set_class( 'Magic' ); return 1; } if ( $char eq '{' ) { # Get rest of line pos $t->{line} = $t->{line_cursor} + 1; if ( $t->{line} =~ m/$CURLY_SYMBOL/gc ) { # control-character symbol (e.g. @{^_Foo}) $t->{class} = $t->{token}->set_class( 'Magic' ); return 1; } } # Must be a cast $t->{class} = $t->{token}->set_class( 'Cast' ); return $t->_finalize_token->__TOKENIZER__on_char( $t ); } elsif ( $c eq '%' ) { # Is it a number? if ( $char =~ /\d/ ) { # bitwise operator $t->{class} = $t->{token}->set_class( 'Operator' ); return $t->_finalize_token->__TOKENIZER__on_char( $t ); } # Is it a magic variable? if ( $char eq '^' || $PPI::Token::Magic::magic{ $c . $char } ) { $t->{class} = $t->{token}->set_class( 'Magic' ); return 1; } if ( $char =~ /[\w:]/ ) { # Symbol (unless the thing before it is a number my ( $prev ) = $t->_previous_significant_tokens(1); if ( not $prev or not $prev->isa('PPI::Token::Number') ) { $t->{class} = $t->{token}->set_class( 'Symbol' ); return 1; } } if ( $char eq '{' ) { # Get rest of line pos $t->{line} = $t->{line_cursor} + 1; if ( $t->{line} =~ m/$CURLY_SYMBOL/gc ) { # control-character symbol (e.g. %{^_Foo}) $t->{class} = $t->{token}->set_class( 'Magic' ); return 1; } } return $self->_as_cast_or_op($t) if $self->_is_cast_or_op($char); # Probably the mod operator $t->{class} = $t->{token}->set_class( 'Operator' ); return $t->{class}->__TOKENIZER__on_char( $t ); } elsif ( $c eq '&' ) { # Is it a number? if ( $char =~ /\d/ ) { # bitwise operator $t->{class} = $t->{token}->set_class( 'Operator' ); return $t->_finalize_token->__TOKENIZER__on_char( $t ); } if ( $char =~ /[\w:]/ ) { # Symbol (unless the thing before it is a number my ( $prev ) = $t->_previous_significant_tokens(1); if ( not $prev or not $prev->isa('PPI::Token::Number') ) { $t->{class} = $t->{token}->set_class( 'Symbol' ); return 1; } } return $self->_as_cast_or_op($t) if $self->_is_cast_or_op($char); # Probably the binary and operator $t->{class} = $t->{token}->set_class( 'Operator' ); return $t->{class}->__TOKENIZER__on_char( $t ); } elsif ( $c eq '-' ) { if ( $char =~ /\d/o ) { # Number $t->{class} = $t->{token}->set_class( 'Number' ); return 1; } if ( $char eq '.' ) { # Number::Float $t->{class} = $t->{token}->set_class( 'Number::Float' ); return 1; } if ( $char =~ /[a-zA-Z]/ ) { $t->{class} = $t->{token}->set_class( 'DashedWord' ); return 1; } # The numeric negative operator $t->{class} = $t->{token}->set_class( 'Operator' ); return $t->{class}->__TOKENIZER__on_char( $t ); } elsif ( $c eq ':' ) { if ( $char eq ':' ) { # ::foo style bareword $t->{class} = $t->{token}->set_class( 'Word' ); return 1; } # Now, : acts very very differently in different contexts. # Mainly, we need to find out if this is a subroutine attribute. # We'll leave a hint in the token to indicate that, if it is. if ( $self->__TOKENIZER__is_an_attribute( $t ) ) { # This : is an attribute indicator $t->{class} = $t->{token}->set_class( 'Operator' ); $t->{token}->{_attribute} = 1; return $t->_finalize_token->__TOKENIZER__on_char( $t ); } # It MIGHT be a label, but it's probably the ?: trinary operator $t->{class} = $t->{token}->set_class( 'Operator' ); return $t->{class}->__TOKENIZER__on_char( $t ); } # erm... PPI::Exception->throw('Unknown value in PPI::Token::Unknown token'); } sub _is_cast_or_op { my ( $self, $char ) = @_; return 1 if $char eq '$'; return 1 if $char eq '@'; return 1 if $char eq '%'; return 1 if $char eq '*'; return 1 if $char eq '{'; return; } sub _as_cast_or_op { my ( $self, $t ) = @_; my $class = _cast_or_op( $t ); $t->{class} = $t->{token}->set_class( $class ); return $t->_finalize_token->__TOKENIZER__on_char( $t ); } sub _prev_significant_w_cursor { my ( $tokens, $cursor, $extra_check ) = @_; while ( $cursor >= 0 ) { my $token = $tokens->[ $cursor-- ]; next if !$token->significant; next if $extra_check and !$extra_check->($token); return ( $token, $cursor ); } return ( undef, $cursor ); } # Operator/operand-sensitive, multiple or GLOB cast sub _cast_or_op { my ( $t ) = @_; my $tokens = $t->{tokens}; my $cursor = scalar( @$tokens ) - 1; my $token; ( $token, $cursor ) = _prev_significant_w_cursor( $tokens, $cursor ); return 'Cast' if !$token; # token was first in the document if ( $token->isa( 'PPI::Token::Structure' ) and $token->content eq '}' ) { # Scan the token stream backwards an arbitrarily long way, # looking for the matching opening curly brace. my $structure_depth = 1; ( $token, $cursor ) = _prev_significant_w_cursor( $tokens, $cursor, sub { my ( $token ) = @_; return if !$token->isa( 'PPI::Token::Structure' ); if ( $token eq '}' ) { $structure_depth++; return; } if ( $token eq '{' ) { $structure_depth--; return if $structure_depth; } return 1; } ); return 'Operator' if !$token; # no matching '{', probably an unbalanced '}' # Scan past any whitespace ( $token, $cursor ) = _prev_significant_w_cursor( $tokens, $cursor ); return 'Operator' if !$token; # Document began with what must be a hash constructor. return 'Operator' if $token->isa( 'PPI::Token::Symbol' ); # subscript my %meth_or_subscript_end = map { $_ => 1 } qw@ -> } ] @; return 'Operator' if $meth_or_subscript_end{ $token->content }; # subscript my $content = $token->content; my $produces_or_wants_value = ( $token->isa( 'PPI::Token::Word' ) and ( $content eq 'do' or $content eq 'eval' ) ); return $produces_or_wants_value ? 'Operator' : 'Cast'; } my %list_start_or_term_end = map { $_ => 1 } qw@ ; ( { [ @; return 'Cast' if $token->isa( 'PPI::Token::Structure' ) and $list_start_or_term_end{ $token->content } or $token->isa( 'PPI::Token::Cast' ) or $token->isa( 'PPI::Token::Operator' ) or $token->isa( 'PPI::Token::Label' ); return 'Operator' if !$token->isa( 'PPI::Token::Word' ); ( $token, $cursor ) = _prev_significant_w_cursor( $tokens, $cursor ); return 'Cast' if !$token || $token->content ne '->'; return 'Operator'; } # Are we at a location where a ':' would indicate a subroutine attribute sub __TOKENIZER__is_an_attribute { my $t = $_[1]; # Tokenizer object my @tokens = $t->_previous_significant_tokens(3); my $p0 = $tokens[0]; return '' if not $p0; # If we just had another attribute, we are also an attribute return 1 if $p0->isa('PPI::Token::Attribute'); # If we just had a prototype, then we are an attribute return 1 if $p0->isa('PPI::Token::Prototype'); # Other than that, we would need to have had a bareword return '' unless $p0->isa('PPI::Token::Word'); # We could be an anonymous subroutine if ( $p0->isa('PPI::Token::Word') and $p0->content eq 'sub' ) { return 1; } # Or, we could be a named subroutine my $p1 = $tokens[1]; my $p2 = $tokens[2]; if ( $p1 and $p1->isa('PPI::Token::Word') and $p1->content eq 'sub' and ( not $p2 or $p2->isa('PPI::Token::Structure') or ( $p2->isa('PPI::Token::Whitespace') and $p2->content eq '' ) ) ) { return 1; } # We aren't an attribute ''; } 1; =pod =head1 SUPPORT See the L in the main module. =head1 AUTHOR Adam Kennedy Eadamk@cpan.orgE =head1 COPYRIGHT Copyright 2001 - 2011 Adam Kennedy. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. The full text of the license can be found in the LICENSE file included with this module. =cut