package SiteConvert; #============================================================================== # SiteConvert.pm -- Japanese word conversion module -- # (Documentaion is in the end of file.) #============================================================================== #============================================================================== use 5.005; # This module requires more than 5.005 version use CGI qw(:standard); use IO::Socket; use strict; #============================================================================== require Exporter; use vars qw($VERSION @ISA @EXPORT); @ISA = qw(Exporter); @EXPORT = qw(); $VERSION = '1.0'; #== Configuration =========================================================== use constant MAX_CONTENT_LENGTH => 1024 * 256; # max content-length (byte) use constant UserAgent => "Mogula/99.0"; # user agent name use constant ExcludeTag => 0; # The outside of a tag is # changed in conversion. #============================================================================== use constant CRLF => "\x0D\x0A"; use constant DEBUG => 0; #============================================================================== # CONSTRUCTOR #============================================================================== sub new{ my $class = shift; my $self = {}; bless($self,$class); $self->_init(@_); return $self; } #============================================================================== sub _init{ my $self = shift; my %hash = @_; $self->{Jcode}->{convert} = $hash{Jcode_Conv}; $self->{Jcode}->{getcode} = $hash{Jcode_Get}; $self->{Print} = $hash{Print}; # Print flag $self->{status} = 0; # status code of response $self->{CGI} = CGI->new(); # holds instance of CGI.pm $self->{script_url} = $self->{CGI}->url(); # called CGI path & file name $self->{script_name} = $self->{CGI}->script_name(); # set error process $self->{Error} = $hash{Error} || \&_disp_error; print $self->{CGI}->header(-type=>'text/html; charset=SHIFT_JIS'); $self->set_convert_code( $hash{ConvertCode} ); } #============================================================================== #============================================================================== # METHODs #============================================================================== #============================================================================== # HTTP CONNECTION #============================================================================== sub set_path{ my $self = shift; my $url = $_[0]; my ($domain,$port,$filepath); my $regexp = q{^http://[\w\d\-./:@?!~*'();&=+$,%#]+$}; if($url !~ /$regexp/o){ $self->error("$url is invalid URL."); } $self->{Original} = $url; ($self->{Domain},$self->{Filepath},$self->{Port}) = $self->_get_domain($url); } #------------------------------------------------------------------------------ sub _get_domain{ my $self = shift; my $url = $_[0]; my ($domain,$port,$filepath); if( $url =~ m{^http://(.*?)/(.*)} ){ ($domain,$filepath) = ($1,$2); $port = 80; $port = $1 if($domain =~ s/:(\d+)//); } else{ $self->error("$url is invalid URL."); } $filepath = '/' . $filepath; return ($domain,$filepath,$port); } #------------------------------------------------------------------------------ sub auto_connect{ my $self = shift; $self->error("Bad Request") if( $self->{Filepath} =~ /$self->{script_name}/ ); $self->{socket} = $self->_connect($self->{Domain},$self->{Port}) or $self->error("Can't Connect $self->{Domain}:$self->{Port}"); $self->_request( $self->{socket},$self->{Domain},$self->{Port},$self->{Filepath}); $self->_check_response_header(); $self->_set_content(); return $self->{status}; } #------------------------------------------------------------------------------ sub _connect{ my $self = shift; my $host = $_[0]; my $port = $_[1] || 80; my $socket; $socket = IO::Socket::INET->new( PeerAddr => $host, PeerPort => $port, Proto => 'tcp' , Type => SOCK_STREAM , Timeout => 30 ); return $socket; } #------------------------------------------------------------------------------ sub _request{ my $self = shift; my $socket = $_[0]; my $host = $_[1]; my $port = $_[2]; my $file = $_[3]; my $agent = UserAgent; my $addr = $self->{CGI}->remote_addr; my $request; $agent = $agent . ' (' . $addr . ')' if($addr); $request = "GET $file HTTP/1.1" . CRLF; $request .= "Host: $host:$port" . CRLF; $request .= "User-Agent: " . $agent . CRLF; $request .= "Accept-Language: ja" . CRLF; $request .= CRLF; print $socket $request; $socket->flush(); } #------------------------------------------------------------------------------ sub _check_response_header{ my $self = shift; my $socket = $self->{socket}; my $status = 500; my ($name,$value); my %feild; while(<$socket>){ $status = $1 if( m{^HTTP/\d+.\d+\s(\d{3})\s} ); if( /([\-\w]+): (.*)\x0D\x0A/ ){ ($name,$value) = ($1,$2); chomp($value); $feild{lc($name)} = $value; } last if($_ eq CRLF); } if($status >= 300 and $status < 400){ $self->{Redirection} = $feild{location}; } $self->{Header} = \%feild; return $self->{status} = $status; } #------------------------------------------------------------------------------ sub _set_content{ my $self = shift; my $socket = $self->{socket}; my $feild = $self->{Header}; if( $feild->{'content-length'} and $feild->{'content-length'} > MAX_CONTENT_LENGTH ){ $self->error("Data is so large."); } if( lc($feild->{'transfer-encoding'}) eq 'chunked'){ my $chunk = ChunkDecode->new(); # Decode chunked data while(<$socket>){ $chunk->decode($_); $self->error("Data is so large.") if($chunk->get_length > MAX_CONTENT_LENGTH); } $self->{ContentBody} = $chunk->body; } else{ my $body; while(<$socket>){ $body .= $_; } $self->{ContentBody} = \$body; } } #------------------------------------------------------------------------------ sub disconnect{ my $self = shift; $self->{socket}->close() if(defined $self->{socket}); } #============================================================================== #============================================================================== # CONVERTING #============================================================================== sub set_convert_code{ my $self = shift; my $new = $_[0]; my $default = { # Default treatment of tags 'a' => \&add_href, 'area' => \&add_href, 'body' => \¬_add_bg, 'frame' => \&add_src, 'link' => \¬_add_href, 'img' => \¬_add_src, 'script' => \¬_add_src, '/body' => \&make_footer, 'Sanitize' => \&sanitize_line, # deletion process of a dangerous line }; %{ $self->{Html} } = %{ $default }; %{ $self->{Html} } = (%{ $self->{Html} },%{ $new }) if(ref $new eq 'HASH'); } #------------------------------------------------------------------------------ sub convert_page{ my $self = shift; if(defined $self->{ContentBody}){ $self->convert_process(${ $self->{ContentBody} }); } return 1; } #------------------------------------------------------------------------------ sub convert_process{ my $self = shift; local $_ = $_[0]; return if(!defined); $self->{Html}->{Sanitize}->($self,\$_); if($self->{Jcode}){ local $^W = 0; # Unavoidable disposal for evasion of jcode's wranning $self->{code} = $self->{Jcode}->{getcode}->(\$_); $self->{Jcode}->{convert}->(\$_,'euc',$self->{code}) if($self->{code} ne 'euc'); print "CODE : " . $self->{code} if(DEBUG); $self->translate(\$_); $self->{Jcode}->{convert}->(\$_,$self->{code},'euc') if($self->{code} ne 'euc'); } else{ $self->translate(\$_); } $self->_check_tag(\$_); if($self->{Print}){ print; } else{ $self->{ConvertedBody} = $_; } } #------------------------------------------------------------------------------ sub translate{ my $self = shift; my $str = $_[0]; $self->{RegExp}->($str) if($self->{RegExp}); } #------------------------------------------------------------------------------ sub _check_tag{ my $self = shift; my $ref = $_[0]; ${$ref} =~ s{<(\/?\w+)(\s+(.*?))?>}{ if(defined $self->{Html}->{lc($1)}){ $self->{Html}->{lc($1)}->($self,$1,$3); } elsif(!defined $2){ '<' . $1 . '>'; } else{ '<' . $1 . $2 . '>'; } }seg; } #------------------------------------------------------------------------------ sub link_convert{ my $self = shift; my ($url,$path,$domain) = @_; # url......link address # path.....cgi path $url =~ s{^\./}{}; # domain...cgi domain $path =~ s{[^/]*$}{}; while($url =~ s{\.\./}{}){ $path =~ s{[^/]*/$}{}; } $path = '/' . $path if($path !~ m{^/}); return $domain . $path . $url; } #------------------------------------------------------------------------------ sub get_converted{ my $self = shift; return $self->{ConvertedBody} } #============================================================================== #============================================================================== # REGEXP #============================================================================== sub set_regexp{ my $self = shift; my $patterns = $_[0]; my $regexp; # This regular expression is extensively based my $code; # on 'http://www.din.or.jp/~ohzaki/perl.htm my $ascii = '[\x00-\x7F]'; my $twoBytes = '[\x8E\xA1-\xFE][\xA1-\xFE]'; my $threeBytes = '\x8F[\xA1-\xFE][\xA1-\xFE]'; my $eucpre = qr{(?)[^<]*?)$eucpre(?:$_->[0])$eucpost} {\$1$_->[1]}xg;\n"; } else{ $code .= "s/$eucpre(?:$_->[0])$eucpost/$_->[1]/xgo;\n"; } } $regexp = eval "sub{ $code }"; if($@){ $self->error("set_regxep: $@") } if(DEBUG){ print "------- RegExp -------\n$code\n----------------------\n"; } return $self->{RegExp} = $regexp; } #============================================================================== #============================================================================== # ETC. #============================================================================== sub get_param{ my $self = shift; my $q = $self->{CGI}; my %param = (); for( $q->param ){ $param{$_} = $q->param($_); } return \%param; } #------------------------------------------------------------------------------ sub url_encode{ my $self = shift; local $_ = $_[0]; s{([^\.\*\-_a-zA-Z0-9 ])}{ '%' . unpack('H2', $1) }eg; s/ /+/g; return $_; } #------------------------------------------------------------------------------ sub sanitize_tag{ my $self = shift; my $str = $_[0]; $str =~ s/&/&/g; $str =~ s/</g; $str =~ s/>/>/g; $str =~ s/'/'/g; $str =~ s/"/"/g; return $str ; } #------------------------------------------------------------------------------ sub error{ my $self = shift; $self->{Error}->($self,@_); } #------------------------------------------------------------------------------ sub _disp_error{ my $self = shift; my $mes = $self->sanitize_tag($_[0]); print qq|\n
\n