From 431695c6f1f71afb05f8178d000ef6d6e0bd8bba Mon Sep 17 00:00:00 2001 From: nwf Date: Wed, 27 Aug 2008 01:21:57 -0400 Subject: [PATCH] Refactor master coder and definitions darcs-hash:20080827052157-4d648-413280b09986ebc55ac90c4cebdb67c2cf4c6d1e.gz --- Definitions.pm | 101 ++++++++++++++++++++ MasterCoder.pm | 111 ++++++++++++++++++++++ mastercode_test.pl => MasterCoder_test.pl | 14 ++- mastercode.pm | 98 ------------------- 4 files changed, 221 insertions(+), 103 deletions(-) create mode 100644 Definitions.pm create mode 100644 MasterCoder.pm rename mastercode_test.pl => MasterCoder_test.pl (64%) delete mode 100644 mastercode.pm diff --git a/Definitions.pm b/Definitions.pm new file mode 100644 index 0000000..e2a1874 --- /dev/null +++ b/Definitions.pm @@ -0,0 +1,101 @@ +use warnings; +use strict; + +#package Irssi::Scripts::Instance::Definitions; +package Definitions; + +use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); +require Exporter; +@ISA = qw(Exporter); + +@EXPORT_OK = qw( + %known_types + $MESSAGE_START $MESSAGE_END + @default_code_chars + @debug_code_chars + $instance_huffman_table1 +); + +################################################################# + +our $MESSAGE_START = ""; +our $MESSAGE_END = ""; + +################################################################# + +our @default_code_chars = ("", "", "", "", "", ""); +our @debug_code_chars = ("B", "C", "G", "O", "V", "_"); + +################################################################# + +our %known_types = ( + 'InstanceLabelHuffman1' => 0x6, + 'InstanceContinuationMessage' => 0x7, + ); + +################################################################# + +### This is a 6-ary tree, ideally of the Huffman variety. +### Note that for convenience, plies of single characters may be +### represented as strings. Plies of larger varieties need to +### be represented as array references. + +#my $decode_table = [ +# "abcdef", +# "ghijkl", +# "mnopqr", +# "stuvwx", +# [ "y", +# "z", +# "-", +# "_", +# ".+=&\@!", +# "*^/\$#?" ], +# [ "ABCDEF", +# "GHIJKL", +# "MNOPQR", +# "STUVWX", +# "YZ1234", +# "567890"] ]; + +### The following version is determined by Smaug (and nwf) by +### Ensuring one count +### /usr/share/dict/** filtered +### /usr/share/doc/* filtered +### +### In particular, by... +### +### LIST='abcdefghijklmnopqrstuvwxyz'\ +### 'ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890'\ +### '_.+=&@!*^\$#?-' +### (echo $LIST; find /usr/share/dict /usr/share/doc -type f -exec cat {} \;\ +### | LC_ALL="C" sed -e s/[^$LIST]//g ) | perl ./huffgen.pl +### +### It was subsequently slightly re-arranged to take advantage of the +### shorter encodings available in gwillen's storage here. + +our $instance_huffman_table1 = [ + "staelr", + "0yfb.g", + "ESAvT-", + "=umhdp", + [ + "c", + "o", + "i", + "n", + "M#854H", + "P&Bx3N", + ], + [ + "UjG796", + "FO2kLI", + "C_1wDR", + "^Q+@?Z", + "JK!\\\$V", + "XYzW*q", + ], +]; + +################################################################# +1; diff --git a/MasterCoder.pm b/MasterCoder.pm new file mode 100644 index 0000000..51f987b --- /dev/null +++ b/MasterCoder.pm @@ -0,0 +1,111 @@ +use warnings; +use strict; + +use Math::BaseCalc; +use POSIX qw( floor ); + +#package Irssi::Scripts::Instance::Mastercoder; +package MasterCoder; + +################################################################# + +sub new ($$) { + my $class = shift @_; + my $code_chars = shift @_; + + return undef if not defined $code_chars; + + my $i = 0; + my $code_chars_rev = {}; + foreach (@$code_chars) { + $$code_chars_rev{$_} = $i++; + } + + my $tencoder = new Math::BaseCalc(digits => $code_chars); + + bless { + # Treat as read only exports, please + 'code_chars' => $code_chars, + 'code_chars_rev' => $code_chars_rev, + 'code_chars_count' => scalar @$code_chars, + + # Treat as private; you shouldn't use this; use + # ->tencode and ->tdecode instead. + '_tencoder' => $tencoder, + }, $class; +} + +################################################################# + +sub tencode ($$) { (shift @_)->{'_tencoder'}->to_base(@_); } +sub tdecode ($$) { (shift @_)->{'_tencoder'}->from_base(@_); } + +sub tencode_padded ($$$) { + my $self = shift @_; + my $in = shift @_; + my $minpad = shift @_; + + my $tenc = $self->tencode($in); + + return @{$$self{'code_chars'}}[0] + x ($minpad > length $tenc ? $minpad - length $tenc : 0) + . $tenc; +} + +################################################################# + +sub _lenc_body_len($$) { + my $self = shift @_; + my $in = shift @_; + + my $ccc = $$self{'code_chars_count'}; + + return 0 if $in == 0; + return POSIX::floor( log(($ccc-1)*$in + 1) / log($ccc) ); +} + +sub _lenc_correction($) { + my $self = shift @_; + my $enclen = shift @_; + my $ccc = $$self{'code_chars_count'}; + return ( ($ccc**($enclen) - 1) / ($ccc-1) ); +} + +sub lencode ($$) { + my $self = shift @_; + my $in = shift @_; + my $enclen = $self->_lenc_body_len($in); + + my $ccc = $$self{'code_chars_count'}; + + die "Can't encode numbers that big!" if $enclen >= $ccc - 1; + + return $self->tencode($enclen) if $in == 0; + + $in -= $self->_lenc_correction($enclen); + + return $self->tencode($enclen) . $self->tencode_padded($in, $enclen); +} + +sub ldecode($$) { + my $self = shift @_; + my $in = shift @_; + + my $enclen = substr($in, 0, 1); + my $reallen = $self->tdecode($enclen); + + my $ccc = $$self{'code_chars_count'}; + + die "Can't decode numbers that big!" if $reallen >= $ccc - 1; + + my $encval = substr($in, 1, $reallen); + + my $realval = $self->tdecode($encval); + $realval += $self->_lenc_correction($reallen); + + return ( $realval, $reallen+1 ) +} + +################################################################# + +1; diff --git a/mastercode_test.pl b/MasterCoder_test.pl similarity index 64% rename from mastercode_test.pl rename to MasterCoder_test.pl index d925ec2..5c7a0a3 100644 --- a/mastercode_test.pl +++ b/MasterCoder_test.pl @@ -1,13 +1,17 @@ use warnings; +use strict; # use Irssi::Scripts::Instance::Mastercode; ### XXX -use mastercode qw(lencode ldecode tencode_padded tdecode); +require MasterCoder; +use Definitions qw( @debug_code_chars ); + +my $coder = MasterCoder->new(\@debug_code_chars); print "Checking T encoding machinery... \n"; foreach my $i (0 .. 35) { - my $enc = tencode_padded($i,2); - my $dec = tdecode($enc); + my $enc = $coder->tencode_padded($i,2); + my $dec = $coder->tdecode($enc); print $i, " ", $enc, " ==> ", $dec, "\n" if $i % 10 == 0; @@ -18,8 +22,8 @@ foreach my $i (0 .. 35) { print "Checking L encoding machinery... \n"; foreach my $i (0 .. 1554) { - my $enc = lencode($i); - my ($deci, $decsize) = ldecode($enc); + my $enc = $coder->lencode($i); + my ($deci, $decsize) = $coder->ldecode($enc); print $i, " ", $enc, " (", length $enc, ") ==> ", $deci, " (", $decsize, ")\n" diff --git a/mastercode.pm b/mastercode.pm deleted file mode 100644 index 2b76620..0000000 --- a/mastercode.pm +++ /dev/null @@ -1,98 +0,0 @@ -use warnings; -use strict; - -use Math::BaseCalc; -use POSIX qw( floor ); - -#package Irssi::Scripts::Instance::Mastercode; - -use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); -require Exporter; -@ISA = qw(Exporter); - -@EXPORT = qw( - tencode tencode_padded tdecode lencode ldecode -); - -$VERSION = '0.0'; - -################################################################# - -my $MESSAGE_START = ""; -my $MESSAGE_END = ""; - -################################################################# - -#my @code_chars = ("", "", "", "", "", ""); - -# This version is lovely for visual debugging. -my @code_chars = ("B", "C", "G", "O", "V", "_"); -my %code_chars_rev; - -my $i = 0; -foreach (@code_chars) { - $code_chars_rev{$_} = $i++; -} - -my $code_chars_count = scalar @code_chars; - -################################################################# - -my $tencoder = new Math::BaseCalc(digits => \@code_chars); - -sub tencode ($) { $tencoder->to_base(@_); } -sub tdecode ($) { $tencoder->from_base(@_); } - -sub tencode_padded ($$) { - my $in = shift @_; - my $minpad = shift @_; - - my $tenc = tencode($in); - return $code_chars[0] - x ($minpad > length $tenc ? $minpad - length $tenc : 0) - . $tenc; -} - -sub lenc_body_len($) { - my $in = shift @_; - return 0 if $in == 0; - return floor( log(($code_chars_count-1)*$in + 1) / log($code_chars_count) ); -} - -sub lenc_correction($) { - my $enclen = shift @_; - return ( ($code_chars_count**($enclen) - 1) / ($code_chars_count-1) ); -} - -sub lencode ($) { - my $in = shift @_; - my $enclen = lenc_body_len($in); - - die "Can't encode numbers that big!" if $enclen >= $code_chars_count - 1; - - return tencode($enclen) if $in == 0; - - $in -= lenc_correction($enclen); - - return tencode($enclen) . tencode_padded($in, $enclen); -} - -sub ldecode($) { - my $in = shift @_; - - my $enclen = substr($in, 0, 1); - my $reallen = tdecode($enclen); - - die "Can't decode numbers that big!" if $reallen >= $code_chars_count - 1; - - my $encval = substr($in, 1, $reallen); - - my $realval = tdecode($encval); - $realval += lenc_correction($reallen); - - return ( $realval, $reallen+1 ) -} - -################################################################# - -1; -- 2.50.1