package Net::DNS::Domain; # # $Id: Domain.pm 1611 2018-01-02 09:41:24Z willem $ # our $VERSION = (qw$LastChangedRevision: 1611 $)[1]; =head1 NAME Net::DNS::Domain - DNS domains =head1 SYNOPSIS use Net::DNS::Domain; $domain = new Net::DNS::Domain('example.com'); $name = $domain->name; =head1 DESCRIPTION The Net::DNS::Domain module implements a class of abstract DNS domain objects with associated class and instance methods. Each domain object instance represents a single DNS domain which has a fixed identity throughout its lifetime. Internally, the primary representation is a (possibly empty) list of ASCII domain name labels, and optional link to an arbitrary origin domain object topologically closer to the DNS root. The computational expense of Unicode character-set conversion is partially mitigated by use of caches. =cut use strict; use warnings; use integer; use Carp; use constant ASCII => ref eval { require Encode; Encode::find_encoding('ascii'); }; use constant UTF8 => scalar eval { ## not UTF-EBCDIC [see UTR#16 3.6] Encode::encode_utf8( chr(182) ) eq pack( 'H*', 'C2B6' ); }; use constant LIBIDN => defined eval 'require Net::LibIDN'; use constant LIBIDN2 => ref eval 'require Net::LibIDN2; Net::LibIDN2->can("idn2_to_ascii_8")'; use constant IDN2FLAG => eval 'Net::LibIDN2::IDN2_NFC_INPUT + Net::LibIDN2::IDN2_NONTRANSITIONAL'; # perlcc: address of encoding objects must be determined at runtime my $ascii = ASCII ? Encode::find_encoding('ascii') : undef; # Osborn's Law: my $utf8 = UTF8 ? Encode::find_encoding('utf8') : undef; # Variables won't; constants aren't. =head1 METHODS =head2 new $object = new Net::DNS::Domain('example.com'); Creates a domain object which represents the DNS domain specified by the character string argument. The argument consists of a sequence of labels delimited by dots. A character preceded by \ represents itself, without any special interpretation. Arbitrary 8-bit codes can be represented by \ followed by exactly three decimal digits. Character code points are ASCII, irrespective of the character coding scheme employed by the underlying platform. Argument string literals should be delimited by single quotes to avoid escape sequences being interpreted as octal character codes by the Perl compiler. The character string presentation format follows the conventions for zone files described in RFC1035. Users should be aware that non-ASCII domain names will be transcoded to NFC before encoding, which is an irreversible process. =cut my ( %escape, %unescape ); ## precalculated ASCII escape tables our $ORIGIN; my ( $cache1, $cache2, $limit ) = ( {}, {}, 100 ); sub new { my ( $class, $s ) = @_; croak 'domain identifier undefined' unless defined $s; my $k = join '', $s, $class, $ORIGIN || ''; # cache key my $cache = $$cache1{$k} ||= $$cache2{$k}; # two layer cache return $cache if defined $cache; ( $cache1, $cache2, $limit ) = ( {}, $cache1, 500 ) unless $limit--; # recycle cache my $self = bless {}, $class; $s =~ s/\\\\/\\092/g; # disguise escaped escape $s =~ s/\\\./\\046/g; # disguise escaped dot my $label = $self->{label} = ( $s eq '@' ) ? [] : [split /\056/, _encode_utf8($s)]; foreach (@$label) { croak 'empty domain label' unless length; if ( LIBIDN2 && UTF8 && /[^\000-\177]/ ) { my $rc = 0; s/\134/\357\277\275/; # disallow escapes $_ = Net::LibIDN2::idn2_to_ascii_8( $_, IDN2FLAG, $rc ); croak Net::LibIDN2::idn2_strerror($rc) unless $_; } if ( !LIBIDN2 && LIBIDN && UTF8 && /[^\000-\177]/ ) { s/\134/\357\277\275/; # disallow escapes $_ = Net::LibIDN::idn_to_ascii( $_, 'utf-8' ); croak 'name contains disallowed character' unless $_; } s/\134([\060-\071]{3})/$unescape{$1}/eg; # numeric escape s/\134(.)/$1/g; # character escape croak 'long domain label' if length > 63; } $$cache1{$k} = $self; # cache object reference return $self if $s =~ /\.$/; # fully qualified name $self->{origin} = $ORIGIN || return $self; # dynamically scoped $ORIGIN return $self; } =head2 name $name = $domain->name; Returns the domain name as a character string corresponding to the "common interpretation" to which RFC1034, 3.1, paragraph 9 alludes. Character escape sequences are used to represent a dot inside a domain name label and the escape character itself. Any non-printable code point is represented using the appropriate numerical escape sequence. =cut sub name { my ($self) = @_; return $self->{name} if defined $self->{name}; return unless defined wantarray; my @label = map { s/([^\055\101-\132\141-\172\060-\071])/$escape{$1}/eg; $_ } $self->_wire; return $self->{name} = '.' unless scalar @label; $self->{name} = _decode_ascii( join chr(46), @label ); } =head2 fqdn @fqdn = $domain->fqdn; Returns a character string containing the fully qualified domain name, including the trailing dot. =cut sub fqdn { my $name = &name; return $name =~ /[.]$/ ? $name : $name . '.'; # append trailing dot } =head2 xname $xname = $domain->xname; Interprets an extended name containing Unicode domain name labels encoded as Punycode A-labels. If decoding is not possible, the ACE encoded name is returned. =cut sub xname { my $name = &name; if ( LIBIDN2 && UTF8 && $name =~ /xn--/i ) { my $self = shift; return $self->{xname} if defined $self->{xname}; my $u8 = Net::LibIDN2::idn2_to_unicode_88($name); return $self->{xname} = $u8 ? $utf8->decode($u8) : $name; } if ( !LIBIDN2 && LIBIDN && UTF8 && $name =~ /xn--/i ) { my $self = shift; return $self->{xname} if defined $self->{xname}; return $self->{xname} = $utf8->decode( Net::LibIDN::idn_to_unicode $name, 'utf-8' ); } return $name; } =head2 label @label = $domain->label; Identifies the domain by means of a list of domain labels. =cut sub label { map { s/([^\055\101-\132\141-\172\060-\071])/$escape{$1}/eg; _decode_ascii($_) } shift->_wire; } sub _wire { my $self = shift; my $label = $self->{label}; my $origin = $self->{origin} || return (@$label); return ( @$label, $origin->_wire ); } =head2 string $string = $object->string; Returns a character string containing the fully qualified domain name as it appears in a zone file. Characters which are recognised by RFC1035 zone file syntax are represented by the appropriate escape sequence. =cut sub string { ( my $name = &name ) =~ s/(["'\$();@])/\\$1/; # escape special char return $name =~ /[.]$/ ? $name : $name . '.'; # append trailing dot } =head2 origin $create = origin Net::DNS::Domain( $ORIGIN ); $result = &$create( sub{ new Net::DNS::RR( 'mx MX 10 a' ); } ); $expect = new Net::DNS::RR( "mx.$ORIGIN. MX 10 a.$ORIGIN." ); Class method which returns a reference to a subroutine wrapper which executes a given constructor in a dynamically scoped context where relative names become descendents of the specified $ORIGIN. =cut my $placebo = sub { my $constructor = shift; &$constructor; }; sub origin { my ( $class, $name ) = @_; my $domain = defined $name ? new Net::DNS::Domain($name) : return $placebo; return sub { # closure w.r.t. $domain my $constructor = shift; local $ORIGIN = $domain; # dynamically scoped $ORIGIN &$constructor; } } ######################################## sub _decode_ascii { ## ASCII to perl internal encoding local $_ = shift; # partial transliteration for non-ASCII character encodings tr [\040-\176\000-\377] [ !"#$%&'()*+,\-./0-9:;<=>?@A-Z\[\\\]^_`a-z{|}~?] unless ASCII; my $z = length($_) - length($_); # pre-5.18 taint workaround ASCII ? substr( $ascii->decode($_), $z ) : $_; } sub _encode_utf8 { ## perl internal encoding to UTF8 local $_ = shift; # partial transliteration for non-ASCII character encodings tr [ !"#$%&'()*+,\-./0-9:;<=>?@A-Z\[\\\]^_`a-z{|}~\000-\377] [\040-\176\077] unless ASCII; my $z = length($_) - length($_); # pre-5.18 taint workaround ASCII ? substr( ( UTF8 ? $utf8 : $ascii )->encode($_), $z ) : $_; } %escape = eval { ## precalculated ASCII escape table my %table; foreach ( 33 .. 126 ) { # ASCII printable $table{pack( 'C', $_ )} = pack 'C', $_; } # minimal character escapes foreach ( 46, 92 ) { # \. \\ $table{pack( 'C', $_ )} = pack 'C*', 92, $_; } foreach my $n ( 0 .. 32, 127 .. 255 ) { # \ddd my $codepoint = sprintf( '%03u', $n ); # partial transliteration for non-ASCII character encodings $codepoint =~ tr [0-9] [\060-\071]; $table{pack( 'C', $n )} = pack 'C a3', 92, $codepoint; } return %table; }; %unescape = eval { ## precalculated numeric escape table my %table; foreach my $n ( 0 .. 255 ) { my $key = sprintf( '%03u', $n ); # partial transliteration for non-ASCII character encodings $key =~ tr [0-9] [\060-\071]; $table{$key} = pack 'C', $n; $table{$key} = pack 'C2', 92, $n if $n == 92; # escaped escape } return %table; }; 1; __END__ ######################################## =head1 BUGS Coding strategy is intended to avoid creating unnecessary argument lists and stack frames. This improves efficiency at the expense of code readability. Platform specific character coding features are conditionally compiled into the code. =head1 COPYRIGHT Copyright (c)2009-2011,2017 Dick Franks. All rights reserved. =head1 LICENSE Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that copyright notice and this permission notice appear in supporting documentation, and that the name of the author not be used in advertising or publicity pertaining to distribution of the software without specific prior written permission. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. =head1 SEE ALSO L, L, L, RFC1034, RFC1035, RFC5891, Unicode Technical Report #16 =cut