|
#Pack (July 2005)
|
|
# Based on "Pack.js" by Dean Edwards <http://dean.edwards.name/>
|
|
# Ported to Perl by Rob Seiler, ELR Software Pty Ltd <http://www.elr.com.au>
|
|
# Copyright 2005. License <http://creativecommons.org/licenses/LGPL/2.1/>
|
|
|
|
package Pack;
|
|
use strict;
|
|
use Data::Dumper;
|
|
|
|
use ParseMaster;
|
|
|
|
# Package wide variable declarations
|
|
use vars qw/$VERSION $PM_VERSION
|
|
$_X_encodePrivate $_JSunpack $_JSdecode %baseLookup
|
|
$_X_encode10 $_X_encode36 $_X_encode62 $_X_encode95
|
|
$_JSencode10 $_JSencode36 $_JSencode62 $_JSencode95
|
|
@_X_parsers
|
|
$_X_script $_X_encoding $_X_fastDecode $_X_specialChars
|
|
/;
|
|
$VERSION = '024';
|
|
$PM_VERSION = $ParseMaster::VERSION;
|
|
|
|
# Package wide constants
|
|
my $X_IGNORE = q{$1};
|
|
my $X_ENCODE = q/\x24encode\(\x24count\)/; # NB: requires g modifier
|
|
my $PERL = 'perl'; # Flag to indicate whether we need to use one of our "internal" Perl encoding functions
|
|
my $JSCRIPT = 'jscript'; # or embed a pre-build JScript encoding function
|
|
########################################
|
|
|
|
##################
|
|
sub pack($$$$) { # require 4 arguments
|
|
##################
|
|
#print Dumper(@_);
|
|
($_X_script, $_X_encoding, $_X_fastDecode, $_X_specialChars) = @_;
|
|
# validate parameters (sort of!)
|
|
$_X_script .= "\n";
|
|
$_X_encoding = ($_X_encoding > 95) ? 95 : $_X_encoding;
|
|
|
|
@_X_parsers = (); # Reset parsers
|
|
|
|
####################
|
|
sub _X_pack($) { # require 1 argument
|
|
####################
|
|
# apply all parsing routines
|
|
my $X_script = shift;
|
|
for (my $i = 0; $i<scalar(@_X_parsers); $i++) {
|
|
my $X_parse = $_X_parsers[$i];
|
|
$X_script = &$X_parse($X_script);
|
|
}
|
|
return $X_script;
|
|
};
|
|
|
|
######################
|
|
sub _X_addParser { #
|
|
######################
|
|
# keep a list of parsing functions, they'll be executed all at once
|
|
my $X_parser = shift;
|
|
push (@_X_parsers,$X_parser);
|
|
}
|
|
|
|
#############################
|
|
sub _X_basicCompression { #
|
|
#############################
|
|
# zero encoding - just removal of white space and comments
|
|
my $X_script = shift;
|
|
my $parser = ParseMaster->new();
|
|
# make safe
|
|
$parser->escapeChar("\\");
|
|
# protect strings
|
|
$parser->add(q/'[^'\n\r]*'/, $X_IGNORE);
|
|
$parser->add(q/"[^"\n\r]*"/, $X_IGNORE);
|
|
# remove comments
|
|
$parser->add(q/\/\/[^\n\r]*[\n\r]/);
|
|
$parser->add(q/\/\*[^*]*\*+([^\/][^*]*\*+)*\//);
|
|
# protect regular expressions
|
|
$parser->add(q/\s+(\/[^\/\n\r\*][^\/\n\r]*\/g?i?)/, q{$2}); # IGNORE
|
|
$parser->add(q/[^\w\x24\/'"*)\?:]\/[^\/\n\r\*][^\/\n\r]*\/g?i?/, $X_IGNORE);
|
|
# remove: ;;; doSomething();
|
|
$parser->add(q/;;[^\n\r]+[\n\r]/) if ($_X_specialChars);
|
|
# remove redundant semi-colons
|
|
$parser->add(q/;+\s*([};])/, q{$2});
|
|
# remove white-space
|
|
$parser->add(q/(\b|\x24)\s+(\b|\x24)/, q{$2 $3});
|
|
$parser->add(q/([+\-])\s+([+\-])/, q{$2 $3});
|
|
$parser->add(q/\s+/, '');
|
|
# done
|
|
return $parser->exec($X_script);
|
|
}
|
|
|
|
###############################
|
|
sub _X_encodeSpecialChars { #
|
|
###############################
|
|
my $X_script = shift;
|
|
my $parser = ParseMaster->new();
|
|
# replace: $name -> n, $$name -> $$na
|
|
$parser->add(q/((\x24+)([a-zA-Z\x24_]+))(\d*)/,
|
|
sub {
|
|
my $X_offset = pop;
|
|
my @X_match = @_;
|
|
my $X_length = length($X_match[$X_offset+2]);
|
|
my $lengthnext = length($X_match[$X_offset+3]);
|
|
my $X_start = $X_length - ((($X_length - $lengthnext) > 0) ? ($X_length - $lengthnext) : 0);
|
|
my $str = $X_match[$X_offset+1];
|
|
$str = substr($str,$X_start,$X_length) . $X_match[$X_offset+4];
|
|
return "$str";
|
|
});
|
|
# replace: _name -> _0, double-underscore (__name) is ignored
|
|
my $X_regexp = q/\b_[A-Za-z\d]\w*/;
|
|
# build the word list
|
|
my %X_keywords = &_X_analyze($X_script, $X_regexp, $_X_encodePrivate);
|
|
#print Dumper(%X_keywords);
|
|
# quick ref
|
|
my $X_encoded = \$X_keywords{X_encoded}; # eg _private1 => '_0',_private2 => '_1';
|
|
#print Dumper($X_encoded);
|
|
$parser->add($X_regexp, sub {my $X_offset = pop; my @X_match = @_; return ${$X_encoded}->{$X_match[$X_offset]};});
|
|
|
|
return $parser->exec($X_script);
|
|
};
|
|
|
|
###########################
|
|
sub _X_encodeKeywords { #
|
|
###########################
|
|
my $X_script = shift;
|
|
# escape high-ascii values already in the script (i.e. in strings)
|
|
if ($_X_encoding > 62) {$X_script = &_X_escape95($X_script)};
|
|
# create the parser
|
|
my $parser = ParseMaster->new();
|
|
my $X_encode = &_X_getEncoder($_X_encoding,$PERL);
|
|
# for high-ascii, don't encode single character low-ascii
|
|
my $X_regexp = ($_X_encoding > 62) ? q/\w\w+/ : q/\w+/;
|
|
# build the word list
|
|
my %X_keywords = &_X_analyze($X_script, $X_regexp, $X_encode);
|
|
#print Dumper(%X_keywords);
|
|
my $X_encoded = \$X_keywords{X_encoded}; # eg alert => 2, function => 10 etc
|
|
# encode
|
|
$parser->add($X_regexp, sub {my $X_offset = pop; my @X_match = @_; return ${$X_encoded}->{$X_match[$X_offset]};});
|
|
# if encoded, wrap the script in a decoding function
|
|
|
|
return $X_script && _X_bootStrap(\$parser->exec($X_script), \%X_keywords);
|
|
}
|
|
|
|
####################
|
|
sub _X_analyze { #
|
|
####################
|
|
#print Dumper(@_);
|
|
my ($X_script, $X_regexp, $X_encode) = @_;
|
|
# analyse
|
|
# retreive all words in the script
|
|
my @X_all = $X_script =~ m/$X_regexp/g; # Save all captures in a list context
|
|
my %XX_sorted = (); # list of words sorted by frequency
|
|
my %XX_encoded = (); # dictionary of word->encoding
|
|
my %XX_protected = (); # instances of "protected" words
|
|
if (@X_all) {
|
|
my @X_unsorted = (); # same list, not sorted
|
|
my %X_protected = (); # "protected" words (dictionary of word->"word")
|
|
my %X_values = (); # dictionary of charCode->encoding (eg. 256->ff)
|
|
my %X_count = (); # word->count
|
|
my $i = scalar(@X_all); my $j = 0; my $X_word = '';
|
|
# count the occurrences - used for sorting later
|
|
do {
|
|
$X_word = '$' . $X_all[--$i];
|
|
if (!exists($X_count{$X_word})) {
|
|
$X_count{$X_word} = [0,$i]; # Store both the usage count and original array position (ie a secondary sort key)
|
|
$X_unsorted[$j] = $X_word;
|
|
# make a dictionary of all of the protected words in this script
|
|
# these are words that might be mistaken for encoding
|
|
$X_values{$j} = &$X_encode($j);
|
|
my $v = '$'.$X_values{$j};
|
|
$X_protected{$v} = $j++;
|
|
}
|
|
# increment the word counter
|
|
$X_count{$X_word}[0]++;
|
|
} while ($i);
|
|
#print Dumper (%X_values);
|
|
#print Dumper (@X_unsorted);
|
|
#print Dumper (%X_protected);
|
|
# prepare to sort the word list, first we must protect
|
|
# words that are also used as codes. we assign them a code
|
|
# equivalent to the word itself.
|
|
# e.g. if "do" falls within our encoding range
|
|
# then we store keywords["do"] = "do";
|
|
# this avoids problems when decoding
|
|
$i = scalar(@X_unsorted);
|
|
do {
|
|
$X_word = $X_unsorted[--$i];
|
|
if (exists($X_protected{$X_word})) {
|
|
$XX_sorted{$X_protected{$X_word}} = substr($X_word,1);
|
|
$XX_protected{$X_protected{$X_word}} = 1; # true
|
|
$X_count{$X_word}[0] = 0;
|
|
}
|
|
} while ($i);
|
|
#print Dumper (%XX_protected);
|
|
#print Dumper (%XX_sorted);
|
|
#print Dumper (%X_count);
|
|
# sort the words by frequency
|
|
# Sort with count a primary key and original array order as secondary key - which is apparently the default in javascript!
|
|
@X_unsorted = sort ({($X_count{$b}[0] - $X_count{$a}[0]) or ($X_count{$b}[1] <=> $X_count{$a}[1])} @X_unsorted);
|
|
#print Dumper (@X_unsorted) . "\n";
|
|
|
|
$j = 0;
|
|
# because there are "protected" words in the list
|
|
# we must add the sorted words around them
|
|
do {
|
|
if (!exists($XX_sorted{$i})) {$XX_sorted{$i} = substr($X_unsorted[$j++],1)}
|
|
$XX_encoded{$XX_sorted{$i}} = $X_values{$i};
|
|
} while (++$i < scalar(@X_unsorted));
|
|
}
|
|
#print Dumper(X_sorted => \%XX_sorted, X_encoded => \%XX_encoded, X_protected => \%XX_protected);
|
|
return (X_sorted => \%XX_sorted, X_encoded => \%XX_encoded, X_protected => \%XX_protected);
|
|
}
|
|
|
|
######################
|
|
sub _X_bootStrap { #
|
|
######################
|
|
# build the boot function used for loading and decoding
|
|
my ($X_packed, $X_keywords) = @_; # Reference arguments!
|
|
#print Dumper ($X_keywords) . "\n";
|
|
|
|
# $packed: the packed script - dereference and escape
|
|
$X_packed = "'" . &_X_escape($$X_packed) ."'";
|
|
|
|
my %sorted = %{$$X_keywords{X_sorted}}; # Dereference to local variables
|
|
my %protected = %{$$X_keywords{X_protected}}; # for simplicity
|
|
|
|
my @sorted = ();
|
|
foreach my $key (keys %sorted) {$sorted[$key] = $sorted{$key}}; # Convert hash to a standard list
|
|
|
|
# ascii: base for encoding
|
|
my $X_ascii = ((scalar(@sorted) > $_X_encoding) ? $_X_encoding : scalar(@sorted)) || 1;
|
|
|
|
# count: number of (unique {RS}) words contained in the script
|
|
my $X_count = scalar(@sorted); # Use $X_count for assigning $X_ascii
|
|
|
|
# keywords: list of words contained in the script
|
|
foreach my $i (keys %protected) {$sorted[$i] = ''}; # Blank out protected words
|
|
#print Dumper(@sorted) . "\n";
|
|
|
|
# convert from a string to an array - prepare keywords as a JScript string->array {RS}
|
|
$X_keywords = "'" . join('|',@sorted) . "'.split('|')";
|
|
|
|
# encode: encoding function (used for decoding the script)
|
|
my $X_encode = $_X_encoding > 62 ? $_JSencode95 : &_X_getEncoder($X_ascii,$JSCRIPT); # This is a JScript function (as a string)
|
|
$X_encode =~ s/_encoding/\x24ascii/g; $X_encode =~ s/arguments\.callee/\x24encode/g;
|
|
my $X_inline = '$count' . ($X_ascii > 10 ? '.toString($ascii)' : '');
|
|
|
|
# decode: code snippet to speed up decoding
|
|
my $X_decode = '';
|
|
if ($_X_fastDecode) {
|
|
# create the decoder
|
|
$X_decode = &_X_getFunctionBody($_JSdecode); # ie from the Javascript literal function
|
|
if ($_X_encoding > 62) {$X_decode =~ s/\\\\w/[\\xa1-\\xff]/g}
|
|
# perform the encoding inline for lower ascii values
|
|
elsif ($X_ascii < 36) {$X_decode =~ s/$X_ENCODE/$X_inline/g}
|
|
# special case: when $X_count==0 there ar no keywords. i want to keep
|
|
# the basic shape of the unpacking funcion so i'll frig the code...
|
|
if (!$X_count) {$X_decode =~ s/(\x24count)\s*=\s*1/$1=0/}
|
|
}
|
|
|
|
# boot function
|
|
my $X_unpack = $_JSunpack;
|
|
if ($_X_fastDecode) {
|
|
# insert the decoder
|
|
$X_unpack =~ s/\{/\{$X_decode;/;
|
|
}
|
|
$X_unpack =~ s/"/'/g;
|
|
if ($_X_encoding > 62) { # high-ascii
|
|
# get rid of the word-boundaries for regexp matches
|
|
$X_unpack =~ s/'\\\\b'\s*\+|\+\s*'\\\\b'//g; # Not checked! {RS}
|
|
}
|
|
if ($X_ascii > 36 || $_X_encoding > 62 || $_X_fastDecode) {
|
|
# insert the encode function
|
|
$X_unpack =~ s/\{/\{\$encode=$X_encode;/;
|
|
} else {
|
|
# perform the encoding inline
|
|
$X_unpack =~ s/$X_ENCODE/$X_inline/;
|
|
}
|
|
|
|
# arguments {RS} Do this before using &pack because &pack changes the pack parameters (eg $fastDecode) in Perl!!
|
|
my $X_params = "$X_packed,$X_ascii,$X_count,$X_keywords"; # Interpolate to comma separated string
|
|
if ($_X_fastDecode) {
|
|
# insert placeholders for the decoder
|
|
$X_params .= ',0,{}';
|
|
}
|
|
|
|
# pack the boot function too
|
|
$X_unpack = &pack($X_unpack,0,0,1);
|
|
|
|
# the whole thing
|
|
return "eval(" . $X_unpack . "(" . $X_params . "))\n";
|
|
};
|
|
|
|
#######################
|
|
sub _X_getEncoder { #
|
|
#######################
|
|
# mmm.. ..which one do i need ?? ({RS} Perl or JScript ??)
|
|
my ($X_ascii,$language) = @_;
|
|
my $perl_encoder = ($X_ascii > 10) ? ($X_ascii > 36) ? ($X_ascii > 62) ? $_X_encode95 : $_X_encode62 : $_X_encode36 : $_X_encode10;
|
|
my $jscript_encoder = ($X_ascii > 10) ? ($X_ascii > 36) ? ($X_ascii > 62) ? $_JSencode95 : $_JSencode62 : $_JSencode36 : $_JSencode10;
|
|
return ($language eq $JSCRIPT) ? $jscript_encoder : $perl_encoder;
|
|
};
|
|
|
|
#############################
|
|
# Perl versions of encoders #
|
|
#############################
|
|
# base10 zero encoding - characters: 0123456789
|
|
$_X_encode10 = sub {return &_encodeBase(shift,10)};
|
|
# base36 - characters: 0123456789abcdefghijklmnopqrstuvwxyz
|
|
$_X_encode36 = sub {return &_encodeBase(shift,36)};
|
|
# base62 - characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
|
|
$_X_encode62 = sub {return &_encodeBase(shift,62)};
|
|
# high-ascii values - characters: 、�ウЖ��⑭�渦慨偽係杭纂従神疎団兎波品北洋椀冫嘖孛忤掣桀毳烙痰邃繙艾蜉謖邇關髓齡��巐鄕�
|
|
$_X_encode95 = sub {return &_encodeBase(shift,95)};
|
|
# Lookup character sets for baseN encoding
|
|
$baseLookup{10} = [(0..9)[0..9]]; # base 10
|
|
$baseLookup{36} = [(0..9,'a'..'z')[0..35]]; # base 36
|
|
$baseLookup{62} = [(0..9,'a'..'z','A'..'Z')[0..61]]; # base 62
|
|
$baseLookup{95} = (); for (my $i=0; $i<95; $i++) {$baseLookup{95}[$i] = chr($i+161)}; # base95 (high ascii)
|
|
#print Dumper(%baseLookup);
|
|
#####################
|
|
sub _encodeBase { #
|
|
#####################
|
|
# Generic base conversion function using defined lookup arrays (perl version only)
|
|
my ($X_charCode, $base) = @_;
|
|
my $X_encoded = '';
|
|
# Do we know this encoding?
|
|
if (exists ($baseLookup{$base})) {
|
|
if ($X_charCode == 0) {$X_encoded = $baseLookup{$base}[0]}
|
|
while($X_charCode > 0) {
|
|
$X_encoded = $baseLookup{$base}[$X_charCode % $base] . $X_encoded;
|
|
$X_charCode = int($X_charCode / $base);
|
|
}
|
|
}
|
|
else {$X_encoded = "$X_charCode"} # default is to return unchanged (ie as for base 10) if no baselookup is available
|
|
return $X_encoded;
|
|
};
|
|
|
|
#############################
|
|
$_X_encodePrivate = sub { #
|
|
#############################
|
|
# special _chars
|
|
my $X_charCode = shift;
|
|
return '_' . $X_charCode;
|
|
};
|
|
|
|
############################
|
|
sub _X_escape($script) { #
|
|
############################
|
|
# protect characters used by the parser
|
|
my $X_script = shift;
|
|
$X_script =~ s/([\\'])/\\$1/g;
|
|
return $X_script;
|
|
};
|
|
|
|
#####################
|
|
sub _X_escape95 { #
|
|
#####################
|
|
# protect high-ascii characters already in the script
|
|
my $X_script = shift;
|
|
$X_script =~ s/([\xa1-\xff])/sprintf("\\x%1x",ord($1))/eg;
|
|
return $X_script;
|
|
};
|
|
|
|
############################
|
|
sub _X_getFunctionBody { #
|
|
############################
|
|
# extract the body of a function (ie between opening/closing {}) - consistent with Dean Edwards approach
|
|
my $X_function = shift;
|
|
$X_function =~ m/^.*\{(.*)\}*$/sg; # Multiline, global (greedy)
|
|
my $start = index($X_function,'{');
|
|
my $end = rindex($X_function,'}');
|
|
$X_function = substr($X_function,($start+1),($end-1-$start));
|
|
return $X_function;
|
|
};
|
|
|
|
######################
|
|
sub _X_globalize { #
|
|
######################
|
|
# set the global flag on a RegExp (you have to create a new one) !!! Unused in perl version
|
|
# my $X_regexp = shift;
|
|
};
|
|
|
|
# build the parsing routine
|
|
&_X_addParser(\&_X_basicCompression);
|
|
&_X_addParser(\&_X_encodeSpecialChars) if ($_X_specialChars);
|
|
&_X_addParser(\&_X_encodeKeywords) if ($_X_encoding);
|
|
|
|
# go!
|
|
return &_X_pack($_X_script);
|
|
}
|
|
|
|
########################
|
|
# Javascript Literals #
|
|
########################
|
|
|
|
# JScript function "_unpack" - from DeanEdwards pack.js (NB: No ";" after final "}")
|
|
($_JSunpack) = <<'END_JSCRIPT_UNPACK';
|
|
/* unpacking function - this is the boot strap function */
|
|
/* data extracted from this packing routine is passed to */
|
|
/* this function when decoded in the target */
|
|
function($packed, $ascii, $count, $keywords, $encode, $decode) {
|
|
while ($count--)
|
|
if ($keywords[$count])
|
|
$packed = $packed.replace(new RegExp('\\b' + $encode($count) + '\\b', 'g'), $keywords[$count]);
|
|
/* RS_Debug = $packed; */ /* {RS} !!!!!!!!! */
|
|
return $packed;
|
|
}
|
|
END_JSCRIPT_UNPACK
|
|
|
|
# JScript function "_decode" - from DeanEdwards pack.js
|
|
($_JSdecode) = <<'END_JSCRIPT_DECODE';
|
|
/* code-snippet inserted into the unpacker to speed up decoding */
|
|
function() {
|
|
/* does the browser support String.replace where the */
|
|
/* replacement value is a function? */
|
|
if (!''.replace(/^/, String)) {
|
|
/* decode all the values we need */
|
|
while ($count--) $decode[$encode($count)] = $keywords[$count] || $encode($count);
|
|
/* global replacement function */
|
|
$keywords = [function($encoded){return $decode[$encoded]}];
|
|
/* generic match */
|
|
$encode = function(){return'\\w+'};
|
|
/* reset the loop counter - we are now doing a global replace */
|
|
$count = 1;
|
|
}
|
|
};
|
|
END_JSCRIPT_DECODE
|
|
|
|
# JScript versions of encoders
|
|
($_JSencode10) = <<'END_JSCRIPT_ENCODE10';
|
|
/* zero encoding */
|
|
/* characters: 0123456789 */
|
|
function($charCode) {
|
|
return $charCode;
|
|
};
|
|
END_JSCRIPT_ENCODE10
|
|
|
|
($_JSencode36) = <<'END_JSCRIPT_ENCODE36';
|
|
/* inherent base36 support */
|
|
/* characters: 0123456789abcdefghijklmnopqrstuvwxyz */
|
|
function($charCode) {
|
|
return $charCode.toString(36);
|
|
};
|
|
END_JSCRIPT_ENCODE36
|
|
|
|
($_JSencode62) = <<'END_JSCRIPT_ENCODE62';
|
|
/* hitch a ride on base36 and add the upper case alpha characters */
|
|
/* characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ */
|
|
function($charCode) {
|
|
return ($charCode < _encoding ? '' : arguments.callee(parseInt($charCode / _encoding))) +
|
|
(($charCode = $charCode % _encoding) > 35 ? String.fromCharCode($charCode + 29) : $charCode.toString(36));
|
|
};
|
|
END_JSCRIPT_ENCODE62
|
|
|
|
($_JSencode95) = <<'END_JSCRIPT_ENCODE95';
|
|
/* use high-ascii values */
|
|
/* characters: 、�ウЖ��⑭�渦慨偽係杭纂従神疎団兎波品北洋椀冫嘖孛忤掣桀毳烙痰邃繙艾蜉謖邇關髓齡��巐鄕� */
|
|
function($charCode) {
|
|
return ($charCode < _encoding ? '' : arguments.callee($charCode / _encoding)) +
|
|
String.fromCharCode($charCode % _encoding + 161);
|
|
};
|
|
END_JSCRIPT_ENCODE95
|
|
|
|
###########
|
|
# END #
|
|
###########
|
|
1; # Pack #
|
|
###########
|