#!/usr/local/bin/perl $VERSION = "1.12"; # CGI code.pl # Version 1.11 # Part of "WWW Cyrillic Encoding Suite" # Get docs and newest version from # http://www.neystadt.org/cyrillic/ # # Copyright (c) 1997-98, John Neystadt # You may install this script on your web site for free # To obtain permision for redistribution or any other usage # contact john@neystadt.org. # # Drop me a line if you deploy this script on your site. # This script translates WEB pages from one Russian code to another. # Developed by Leonid Neishtadt (http://www.neystadt.org/leonid/) # e-mail: leonid@neystadt.org # # Currently the following codes are supported: # DOS (alternate) code page CP866 (dos) # Windows code page CP1251 (win). # UNIX code KOI8-r (koi8 or nocs for supressing charset Metatag), # ISO-8859-5 (iso), # Macintosh (mac), # Volapuk (transliteration) (vol) - only as output code. # AUTO - auto selection of output encoding according to platform where browser # runs (Windows, UNIX, MACINTOSH, OS/2) # # Usage: Copy this script into cgi-bin directory, # refer to it as ..../cgi-bin/code.pl/"tab"/"URL to be translated" # where "tab" is one of the above encodings or 'rus' for displaying menu # with available codes. # It is also can be coded as 'fromcode-tocode' for explicit definition of # the original file encoding. # "URL" is absolute URL from the server root (Don't forget to set $path). # or full URL like http://cnn.com. # All relative references from this page to other WEB pages will be also # translated through the same code table (isn't supported yet for full URLs). # # Source encoding is taken from Metatag like: # # The tag is changed during translation or deleted for 'vol' and 'nocs'. # If the tag is absent default encoding is taken from variable $defcode. # # It is recommended that you put on all # your pages, and choose only destination encoding in urls. Do no worry for # old buggy browsers which can't display correctly pages with this meta-tag # NOCS encoding converts page to koi8 and deletes the meta-tag # # READABLE URLS # ------------- # If you use APPACHE you can add the lines similar to those to your webserver # configuration files: # # ScriptAlias /koi8 /home/www/neystadt/cgi-bin/code.pl/koi8 # ScriptAlias /win /home/www/neystadt/cgi-bin/code.pl/win # ScriptAlias /dos /home/www/neystadt/cgi-bin/code.pl/dos # ScriptAlias /mac /home/www/neystadt/cgi-bin/code.pl/mac # ScriptAlias /iso /home/www/neystadt/cgi-bin/code.pl/iso # ScriptAlias /vol /home/www/neystadt/cgi-bin/code.pl/vol # ScriptAlias /lat /home/www/neystadt/cgi-bin/code.pl/vol # ScriptAlias /nocs /home/www/neystadt/cgi-bin/code.pl/nocs # # From now you will be able to translate urls like http://www.neystadt.org/russia/ # simply by prefixing the url with encoding: http://www.neystadt.org/koi8/russia/ # or http://www.neystadt.org/lat/russia/. # # Note that code.pl automatically finds index.html if directory names is given # (like in example above). The index file name can be changed by $IndexFileName # parameters in the script. =head1 NAME code.pl - CGI script to convert on-the-fly html pages across cyrillic charsets =cut use Convert::Cyrillic; use LWP::UserAgent; use HTTP::Headers::UserAgent; $path=".."; # <==== path from cgi-bin to the server root. $defcode="WIN"; # <==== default source encoding $maxsize=500000; # maximum file size $IndexFileName = 'index.html'; $UserAgent=$ENV{HTTP_USER_AGENT}; $scrname=$ENV{SCRIPT_NAME}; $file=$ENV{PATH_INFO}; $file=~s/^$scrname//; $file=~s/\+/ /go; $file=~s/%(..)/pack("c",hex($1))/ge; if ($file=~/[\.\/\\]([^\.\/\\]+)$/o) {$ext=lc($1);} else {$ext='html';} $file=~s%^\/([^\/]*)%%o; $lang=uc($+); if ($lang eq 'RUS') { print "Content-type: text/html\n

Select Russian encoding:

"; goto end; } if ($lang=~/(.*)-(.*)/o) { $charset=$1; $lang=$2; } if (!(',ISO,KOI8,KOI,DOS,WIN,VOL,MAC,NOCS,AUTO,' =~ /,$lang,/i)) { $err = "Unsupported code - $lang"; goto error; } $file =~ s|http:/([^/])|http://$1|oi; # Some vers of Ms-IIS merge '//' into '/' in Urls if ($file =~ s|^/(http://)|$1|oi) { $url=$ENV {'QUERY_STRING'}; if ($url) { $url= "?" . $url; } $url = $file . $url; my $ua = new LWP::UserAgent; $ua->agent("code.pl/1.2 " . $ua->agent); $ua->from ('leonid@neystadt.org'); my $req = new HTTP::Request (GET => $url); my $res = $ua->request ($req); if (!$res->is_success) { my $err = $res->error_as_HTML(); print <<"EOF"; Content-Type: text/html

Failure

Failed to retrive url: $url. Remote server returned the following reponse:
$err EOF goto end; } $type = $res->content_type; $buffer = $res->content; #neystadt::http_rtr::Http_Retrieve ($url, $buffer, $hdrs); #$hdrs=~/Content-Type: (.*)\n/io; $type = $1; } else { if ($file=~/cgi-bin/io) { $err = "Incorrect file name"; goto error; } $file = "$path$file"; if (-d $file) { $file = "$file/$IndexFileName"; $ext = 'htm'; } if (open In,"$file") { binmode In; read (In, $buffer, $maxsize); close In; } else { print "Content-type: text/html HTTP Error

Error: 404 Not Found


The requested URI $file does not exist.
"; goto end; } } if ($lang=~/auto/io){ $platform = HTTP::Headers::UserAgent::GetPlatform ($UserAgent); $lang='koi'; $lang='win' if $platform=~/WIN/io; $lang='mac' if $platform eq 'MAC'; $lang='koi' if $platform eq 'UNIX'; $lang='dos' if $platform eq 'OS2'; $lang='nocs' if $platform eq 'Linux'; } $newcharset = "koi8-r" if $lang=~/koi|nocs/io; $newcharset = "windows-1251" if $lang=~/win/io; $newcharset = "x-mac-cyrillic" if $lang=~/mac/io; $newcharset = "ibm866" if $lang=~/dos/io; $newcharset = "ISO-8859-5" if $lang=~/iso/io; if ($buffer=~s/<\s*META\s+HTTP-EQUIV\s*=\s*"?Content-Type"?\s+CONTENT\s*=\s*"?(.*);\s+charset\s*=\s*(.*)"?\s*>//io) { $type=$1; $charset=$2 if !$charset; if ($lang=~/nocs|vol/io){ $buffer=~s/<\s*META\s+HTTP-EQUIV\s*=\s*"?Content-Type"?\s+CONTENT\s*=\s*"?(.*);\s+charset\s*=\s*(.*)"?\s*>//io; } } else { $type="text/html" if $ext eq 'html' || $ext eq 'htm'; $type="text/plain" if $ext eq 'txt'; $type="image/gif" if $ext eq 'gif'; $type="image/jpeg" if $ext eq 'jpg' || $ext eq 'jpeg'; } $lang="koi8" if $lang=~/nocs/io; $type="text/html" if !$type; $slang=$defcode; $slang="KOI8" if $charset=~/koi/io; $slang="WIN" if $charset=~/1251/io; $slang="ISO" if $charset=~/iso/io; $slang="DOS" if $charset=~/alt/io; $slang="MAC" if $charset=~/mac/io; # translate the page $buffer = Convert::Cyrillic::cstocs ($slang,$lang,$buffer) if $type =~ /text/o; if ($hdrs) { binmode STDOUT; print $hdrs; } else { print("Content-type: $type\n\n"); binmode STDOUT; } print $buffer; goto end; error: ermsg($err); end:; sub ermsg { if (!$sw) {$sw=1; print "Content-type: text/plain\n\n";} print "@_[0]\n"; } __END__ =head1 DESCRIPTION See the comments on the top of the script. =head1 PREREQUISITES This script requires the C, C and C modules available from CPAN. =pod OSNAMES any =pod SCRIPT CATEGORIES CGI =cut