#!/usr/local/bin/perl
$VERSION = "1.12";
# CGI code.pl
# Version 1.11
# Part of "WWW Cyrillic Encoding Suite"
# Get docs and newest version from
# http://www.neystadt.org/cyrillic/
#
# Copyright (c) 1997-98, John Neystadt
# You may install this script on your web site for free
# To obtain permision for redistribution or any other usage
# contact john@neystadt.org.
#
# Drop me a line if you deploy this script on your site.
# This script translates WEB pages from one Russian code to another.
# Developed by Leonid Neishtadt (http://www.neystadt.org/leonid/)
# e-mail: leonid@neystadt.org
#
# Currently the following codes are supported:
# DOS (alternate) code page CP866 (dos)
# Windows code page CP1251 (win).
# UNIX code KOI8-r (koi8 or nocs for supressing charset Metatag),
# ISO-8859-5 (iso),
# Macintosh (mac),
# Volapuk (transliteration) (vol) - only as output code.
# AUTO - auto selection of output encoding according to platform where browser
# runs (Windows, UNIX, MACINTOSH, OS/2)
#
# Usage: Copy this script into cgi-bin directory,
# refer to it as ..../cgi-bin/code.pl/"tab"/"URL to be translated"
# where "tab" is one of the above encodings or 'rus' for displaying menu
# with available codes.
# It is also can be coded as 'fromcode-tocode' for explicit definition of
# the original file encoding.
# "URL" is absolute URL from the server root (Don't forget to set $path).
# or full URL like http://cnn.com.
# All relative references from this page to other WEB pages will be also
# translated through the same code table (isn't supported yet for full URLs).
#
# Source encoding is taken from Metatag like:
#
# The tag is changed during translation or deleted for 'vol' and 'nocs'.
# If the tag is absent default encoding is taken from variable $defcode.
#
# It is recommended that you put on all
# your pages, and choose only destination encoding in urls. Do no worry for
# old buggy browsers which can't display correctly pages with this meta-tag
# NOCS encoding converts page to koi8 and deletes the meta-tag
#
# READABLE URLS
# -------------
# If you use APPACHE you can add the lines similar to those to your webserver
# configuration files:
#
# ScriptAlias /koi8 /home/www/neystadt/cgi-bin/code.pl/koi8
# ScriptAlias /win /home/www/neystadt/cgi-bin/code.pl/win
# ScriptAlias /dos /home/www/neystadt/cgi-bin/code.pl/dos
# ScriptAlias /mac /home/www/neystadt/cgi-bin/code.pl/mac
# ScriptAlias /iso /home/www/neystadt/cgi-bin/code.pl/iso
# ScriptAlias /vol /home/www/neystadt/cgi-bin/code.pl/vol
# ScriptAlias /lat /home/www/neystadt/cgi-bin/code.pl/vol
# ScriptAlias /nocs /home/www/neystadt/cgi-bin/code.pl/nocs
#
# From now you will be able to translate urls like http://www.neystadt.org/russia/
# simply by prefixing the url with encoding: http://www.neystadt.org/koi8/russia/
# or http://www.neystadt.org/lat/russia/.
#
# Note that code.pl automatically finds index.html if directory names is given
# (like in example above). The index file name can be changed by $IndexFileName
# parameters in the script.
=head1 NAME
code.pl - CGI script to convert on-the-fly html pages across cyrillic charsets
=cut
use Convert::Cyrillic;
use LWP::UserAgent;
use HTTP::Headers::UserAgent;
$path=".."; # <==== path from cgi-bin to the server root.
$defcode="WIN"; # <==== default source encoding
$maxsize=500000; # maximum file size
$IndexFileName = 'index.html';
$UserAgent=$ENV{HTTP_USER_AGENT};
$scrname=$ENV{SCRIPT_NAME};
$file=$ENV{PATH_INFO};
$file=~s/^$scrname//;
$file=~s/\+/ /go;
$file=~s/%(..)/pack("c",hex($1))/ge;
if ($file=~/[\.\/\\]([^\.\/\\]+)$/o) {$ext=lc($1);} else {$ext='html';}
$file=~s%^\/([^\/]*)%%o;
$lang=uc($+);
if ($lang eq 'RUS') {
print "Content-type: text/html\n
Select Russian encoding:
";
goto end;
}
if ($lang=~/(.*)-(.*)/o) { $charset=$1; $lang=$2; }
if (!(',ISO,KOI8,KOI,DOS,WIN,VOL,MAC,NOCS,AUTO,' =~ /,$lang,/i)) {
$err = "Unsupported code - $lang";
goto error;
}
$file =~ s|http:/([^/])|http://$1|oi; # Some vers of Ms-IIS merge '//' into '/' in Urls
if ($file =~ s|^/(http://)|$1|oi) {
$url=$ENV {'QUERY_STRING'};
if ($url) { $url= "?" . $url; }
$url = $file . $url;
my $ua = new LWP::UserAgent;
$ua->agent("code.pl/1.2 " . $ua->agent);
$ua->from ('leonid@neystadt.org');
my $req = new HTTP::Request (GET => $url);
my $res = $ua->request ($req);
if (!$res->is_success) {
my $err = $res->error_as_HTML();
print <<"EOF";
Content-Type: text/html
Failure
Failed to retrive url: $url.
Remote server returned the following reponse:
$err
EOF
goto end;
}
$type = $res->content_type;
$buffer = $res->content;
#neystadt::http_rtr::Http_Retrieve ($url, $buffer, $hdrs);
#$hdrs=~/Content-Type: (.*)\n/io; $type = $1;
} else {
if ($file=~/cgi-bin/io) {
$err = "Incorrect file name";
goto error;
}
$file = "$path$file";
if (-d $file) {
$file = "$file/$IndexFileName";
$ext = 'htm';
}
if (open In,"$file") {
binmode In; read (In, $buffer, $maxsize); close In;
} else {
print "Content-type: text/html
HTTP ErrorError: 404 Not Found
The requested URI $file does not exist.
";
goto end;
}
}
if ($lang=~/auto/io){
$platform = HTTP::Headers::UserAgent::GetPlatform ($UserAgent);
$lang='koi';
$lang='win' if $platform=~/WIN/io;
$lang='mac' if $platform eq 'MAC';
$lang='koi' if $platform eq 'UNIX';
$lang='dos' if $platform eq 'OS2';
$lang='nocs' if $platform eq 'Linux';
}
$newcharset = "koi8-r" if $lang=~/koi|nocs/io;
$newcharset = "windows-1251" if $lang=~/win/io;
$newcharset = "x-mac-cyrillic" if $lang=~/mac/io;
$newcharset = "ibm866" if $lang=~/dos/io;
$newcharset = "ISO-8859-5" if $lang=~/iso/io;
if ($buffer=~s/<\s*META\s+HTTP-EQUIV\s*=\s*"?Content-Type"?\s+CONTENT\s*=\s*"?(.*);\s+charset\s*=\s*(.*)"?\s*>//io) {
$type=$1; $charset=$2 if !$charset;
if ($lang=~/nocs|vol/io){
$buffer=~s/<\s*META\s+HTTP-EQUIV\s*=\s*"?Content-Type"?\s+CONTENT\s*=\s*"?(.*);\s+charset\s*=\s*(.*)"?\s*>//io;
}
}
else {
$type="text/html" if $ext eq 'html' || $ext eq 'htm';
$type="text/plain" if $ext eq 'txt';
$type="image/gif" if $ext eq 'gif';
$type="image/jpeg" if $ext eq 'jpg' || $ext eq 'jpeg';
}
$lang="koi8" if $lang=~/nocs/io;
$type="text/html" if !$type;
$slang=$defcode;
$slang="KOI8" if $charset=~/koi/io;
$slang="WIN" if $charset=~/1251/io;
$slang="ISO" if $charset=~/iso/io;
$slang="DOS" if $charset=~/alt/io;
$slang="MAC" if $charset=~/mac/io;
# translate the page
$buffer = Convert::Cyrillic::cstocs ($slang,$lang,$buffer)
if $type =~ /text/o;
if ($hdrs) {
binmode STDOUT;
print $hdrs;
} else {
print("Content-type: $type\n\n");
binmode STDOUT;
}
print $buffer;
goto end;
error:
ermsg($err);
end:;
sub ermsg {
if (!$sw) {$sw=1; print "Content-type: text/plain\n\n";}
print "@_[0]\n";
}
__END__
=head1 DESCRIPTION
See the comments on the top of the script.
=head1 PREREQUISITES
This script requires the C, C and C
modules available from CPAN.
=pod OSNAMES
any
=pod SCRIPT CATEGORIES
CGI
=cut