From: Nathaniel Wesley Filardo Date: Fri, 25 Jul 2008 11:19:26 +0000 (-0400) Subject: Prepare to split out the coding stuff to its own module. X-Git-Url: https://hydra-www.ietfng.org/gitweb/?a=commitdiff_plain;h=f4fcc9174f162795c2c7b24408a9dfb867589e0a;p=instirc Prepare to split out the coding stuff to its own module. darcs-hash:20080725111926-2ee1a-e31c38cf7272ef89a23496dfed8b6cd28681c93f.gz --- diff --git a/code.pm b/code.pm new file mode 100644 index 0000000..4807645 --- /dev/null +++ b/code.pm @@ -0,0 +1,194 @@ +use warnings; + +package Irssi::Scripts::Instance::Coder; + +# XXX Do we want to make any guarantees about the appearance of this delimiter +# in our encoded message? +my $MAGIC_DELIM = ""; +my $END_CODE = ""; + +################################################################# + +my @code_chars = ("", "", "", "", "", ""); + +my $i = 0; +foreach (@code_chars) { + $code_chars_rev{$_} = $i++; +} + +################################################################# + +### This is a 6-ary tree, ideally of the Huffman variety. +### Note that for convenience, plies of single characters may be +### represented as strings. Plies of larger varieties need to +### be represented as array references. + +#my $decode_table = [ +# "abcdef", +# "ghijkl", +# "mnopqr", +# "stuvwx", +# [ "y", +# "z", +# "-", +# "_", +# ".+=&\@!", +# "*^/\$#?" ], +# [ "ABCDEF", +# "GHIJKL", +# "MNOPQR", +# "STUVWX", +# "YZ1234", +# "567890"] ]; + +### The following version is determined by Smaug (and nwf) by +### Ensuring one count +### /usr/share/dict/** filtered +### /usr/share/doc/* filtered +### +### In particular, by... +### +### LIST='abcdefghijklmnopqrstuvwxyz'\ +### 'ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890'\ +### '_.+=&@!*^\$#?-' +### (echo $LIST; find /usr/share/dict /usr/share/doc -type f -exec cat {} \;\ +### | LC_ALL="C" sed -e s/[^$LIST]//g ) | perl ./huffgen.pl +### +### It was subsequently slightly re-arranged to take advantage of the +### shorter encodings available in gwillen's storage here. + +my $decode_table = [ + "staelr", + "0yfb.g", + "ESAvT-", + "=umhdp", + [ + "c", + "o", + "i", + "n", + "M#854H", + "P&Bx3N", + ], + [ + "UjG796", + "FO2kLI", + "C_1wDR", + "^Q+@?Z", + "JK!\\\$V", + "XYzW*q", + ], +]; + +sub build_encoding_table($$$); +sub build_encoding_table($$$) { + my ($dec_tbl, $enc_tbl, $prefix) = @_; + + if (ref $dec_tbl eq "") { + if (length $dec_tbl == 1) { + $enc_tbl->{$dec_tbl} = $prefix; + } else { + my $i = 0; + foreach my $char (split(//, $dec_tbl)) { + build_encoding_table($char, $enc_tbl, $prefix.$code_chars[$i++]); + } + } + } else { + my $i = 0; + foreach my $tbl (@$dec_tbl) { + build_encoding_table($tbl, $enc_tbl, $prefix.$code_chars[$i++]); + } + } +} + +my $encode_table = {}; +build_encoding_table($decode_table, $encode_table, ""); + +sub dump_encode_table($); +sub dump_encode_table($) { + my ($tbl) = @_; + + foreach my $key (sort keys %$tbl) { + printf " %s -> ", $key; + foreach my $char (split(//, $tbl->{$key})) { + printf "%2x ", ord($char); + } + print "\n"; + } +} + +sub dump_decode_table($); +sub dump_decode_table($) { + my ($tbl) = @_; + + if (ref $tbl eq "") { + if (length $tbl == 1) { + print $tbl." "; + } else { + print "\"$tbl\" "; + } + } else { + print "["; + for my $i (0 .. 5) { + dump_decode_table($tbl->[$i]); + } + print "] "; + } +} + +################################################################# + +sub encode_instance($) { + my ($inst) = @_; + + my $result = ""; + foreach my $char (split(//, $inst)) { + my $code = $encode_table->{$char}; + return undef if not defined $code; + $result .= $code; + } + return $result.$END_CODE; +} + +sub decode_char($$) { + my ($char, $tbl) = @_; + + if (ref $tbl eq "") { + return substr($tbl, $code_chars_rev{$char}, 1); + } else { + return $tbl->[$code_chars_rev{$char}]; + } +} + +sub is_char($) { + my ($char) = @_; + return (ref $char eq "") && (length $char == 1); +} + +sub decode_instance($) { + my ($inst) = @_; + + my $lec = length($END_CODE); + my $lin = length($inst); + + return "" if $lin <= $lec; + + my $lastchars = substr($inst, $lin - $lec); + return "" if $lastchars ne $END_CODE; + + $inst = substr($inst, 0, $lin - $lec); + + my $result = ""; + while ($inst ne "") { + my $tbl = $decode_table; + while (!is_char($tbl)) { + my $code = substr($inst, 0, 1); + $inst = substr($inst, 1); + $tbl = decode_char($code, $tbl); + } + $result .= $tbl; + } + return $result; +} + +1; diff --git a/code_test.pl b/code_test.pl new file mode 100644 index 0000000..e84aaac --- /dev/null +++ b/code_test.pl @@ -0,0 +1,26 @@ +use warnings; + +# use Irssi::Scripts::Instance::Coder; ### XXX +use code; + +while (my $inst = <>) { + chomp $inst; + + my $enc = Irssi::Scripts::Instance::Coder::encode_instance($inst); + + unless (defined $enc) { + print "Unable to encode input.\n"; + next; + } + + my $dec = Irssi::Scripts::Instance::Coder::decode_instance($enc); + + print $inst.">>".$dec."(".(length $enc)."):"; + + foreach my $char (split(//, $enc)) { + printf " %2X", ord($char); + } + print "\n"; + + die unless ($inst eq $dec); +}