From: nwf Date: Wed, 27 Aug 2008 05:22:46 +0000 (-0400) Subject: Add Huffman coder (and test) X-Git-Url: https://hydra-www.ietfng.org/gitweb/?a=commitdiff_plain;h=7867eca701680d9dc25fbf4e5ff0f9e5bf1c318d;p=instirc Add Huffman coder (and test) darcs-hash:20080827052246-4d648-a30541dad510dcedb255f94c8b27c5e8b5b809c6.gz --- diff --git a/HuffmanCoder.pm b/HuffmanCoder.pm new file mode 100644 index 0000000..1599f5c --- /dev/null +++ b/HuffmanCoder.pm @@ -0,0 +1,142 @@ +use warnings; + +#package Irssi::Scripts::Instance::HuffmanCoder; +package HuffmanCoder; + +################################################################# + +sub _build_encoding_table($$$$); +sub _build_encoding_table($$$$) { + my ($code_chars, $dec_tbl, $enc_tbl, $prefix) = @_; + + if (ref $dec_tbl eq "") { + if (length $dec_tbl == 1) { + $enc_tbl->{$dec_tbl} = $prefix; + } else { + my $i = 0; + foreach my $char (split(//, $dec_tbl)) { + _build_encoding_table($code_chars, $char, + $enc_tbl, $prefix.$$code_chars[$i++]); + } + } + } else { + my $i = 0; + foreach my $tbl (@$dec_tbl) { + _build_encoding_table($code_chars, $tbl, + $enc_tbl, $prefix.$$code_chars[$i++]); + } + } + + return $enc_tbl; +} + +################################################################# + +# The constructor takes a MasterCoder object and peers inside to +# borrow the coding characters. This is to make it easy on upstream +# modules, not for any technical necessity. + +sub new ($$$) { + my $class = shift @_; + my $mastercoder = shift @_; + my $hufftree = shift @_; + + return undef if not defined $mastercoder; + return undef if not defined $$mastercoder{'code_chars'}; + + my $enctable = _build_encoding_table($$mastercoder{'code_chars'}, + $hufftree, {}, ""); + + bless { + 'ccr' => $$mastercoder{'code_chars_rev'}, + 'dt' => $hufftree, + 'et' => $enctable, + }, $class; +} + +################################################################# + +sub dump_encode_table($); +sub dump_encode_table($) { + my $self = shift @_; + + foreach my $key (sort keys %{$$self{'et'}}) { + printf " %s -> ", $key; + foreach my $char (split(//, $$self{'et'}{$key})) { + printf "%2x ", ord($char); + } + print "\n"; + } +} + +sub _dump_decode_table_helper($); +sub _dump_decode_table_helper($) { + my $tbl = shift @_; + + if (ref $tbl eq "") { + if (length $tbl == 1) { + print "'".$tbl."' "; + } else { + print "\"$tbl\" "; + } + } else { + print "["; + for my $i (0 .. 5) { + _dump_decode_table_helper($tbl->[$i]); + } + print "] "; + } +} + +sub dump_decode_table($) { + my ($self) = @_; + _dump_decode_table_helper( $$self{'dt'} ); + print "\n"; +} + +################################################################# + +sub encode($$) { + my ($self, $in) = @_; + + my $result = ""; + foreach my $char (split(//, $in)) { + my $code = $$self{'et'}{$char}; + return undef if not defined $code; + $result .= $code; + } + return $result; +} + +sub _decode_char($$$) { + my ($ccr, $char, $tbl) = @_; + + if (ref $tbl eq "") { + return substr($tbl, $$ccr{$char}, 1); + } else { + return $tbl->[$$ccr{$char}]; + } +} + +sub _is_char($) { + my ($char) = @_; + return (ref $char eq "") && (length $char == 1); +} + +sub decode($$) { + my ($self, $inst) = @_; + + my $result = ""; + while ($inst ne "") { + my $tbl = $$self{'dt'}; + while (!_is_char($tbl)) { + my $code = substr($inst, 0, 1); + $inst = substr($inst, 1); + $tbl = _decode_char($$self{'ccr'}, $code, $tbl); + } + $result .= $tbl; + } + return $result; +} + +1; diff --git a/HuffmanCoder_test.pl b/HuffmanCoder_test.pl new file mode 100644 index 0000000..0c21f82 --- /dev/null +++ b/HuffmanCoder_test.pl @@ -0,0 +1,30 @@ +use warnings; +use strict; +#use Data::Dumper; + +# use Irssi::Scripts::Instance::Mastercode; ### XXX +require MasterCoder; +require HuffmanCoder; +use Definitions qw( @debug_code_chars $instance_huffman_table1 ); + +my $mastercoder = MasterCoder->new(\@debug_code_chars); +my $huffmancoder = HuffmanCoder->new($mastercoder, $instance_huffman_table1); + +#print "Dumping encoding table:\n"; +#$huffmancoder->dump_encode_table(); + +#print "Dumping decoding table:\n"; +#$huffmancoder->dump_decode_table(); + +#print Dumper($huffmancoder); + +my @test_strings = ( "hi", "there", "coin", "test", "!@#\$&*.-=" ); + +foreach my $ts (@test_strings) { + my $enc = $huffmancoder->encode($ts); + my $dec = $huffmancoder->decode($enc); + + print $ts, " ==> ", $enc, " ==> ", $dec, "\n"; + + die if $dec ne $ts; +} diff --git a/instance_1.pm b/instance_1.pm deleted file mode 100644 index 670fce9..0000000 --- a/instance_1.pm +++ /dev/null @@ -1,180 +0,0 @@ -use warnings; - -#package Irssi::Scripts::Instance::Tagger1; - -################################################################# - -### This is a 6-ary tree, ideally of the Huffman variety. -### Note that for convenience, plies of single characters may be -### represented as strings. Plies of larger varieties need to -### be represented as array references. - -#my $decode_table = [ -# "abcdef", -# "ghijkl", -# "mnopqr", -# "stuvwx", -# [ "y", -# "z", -# "-", -# "_", -# ".+=&\@!", -# "*^/\$#?" ], -# [ "ABCDEF", -# "GHIJKL", -# "MNOPQR", -# "STUVWX", -# "YZ1234", -# "567890"] ]; - -### The following version is determined by Smaug (and nwf) by -### Ensuring one count -### /usr/share/dict/** filtered -### /usr/share/doc/* filtered -### -### In particular, by... -### -### LIST='abcdefghijklmnopqrstuvwxyz'\ -### 'ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890'\ -### '_.+=&@!*^\$#?-' -### (echo $LIST; find /usr/share/dict /usr/share/doc -type f -exec cat {} \;\ -### | LC_ALL="C" sed -e s/[^$LIST]//g ) | perl ./huffgen.pl -### -### It was subsequently slightly re-arranged to take advantage of the -### shorter encodings available in gwillen's storage here. - -my $decode_table = [ - "staelr", - "0yfb.g", - "ESAvT-", - "=umhdp", - [ - "c", - "o", - "i", - "n", - "M#854H", - "P&Bx3N", - ], - [ - "UjG796", - "FO2kLI", - "C_1wDR", - "^Q+@?Z", - "JK!\\\$V", - "XYzW*q", - ], -]; - -sub build_encoding_table($$$); -sub build_encoding_table($$$) { - my ($dec_tbl, $enc_tbl, $prefix) = @_; - - if (ref $dec_tbl eq "") { - if (length $dec_tbl == 1) { - $enc_tbl->{$dec_tbl} = $prefix; - } else { - my $i = 0; - foreach my $char (split(//, $dec_tbl)) { - build_encoding_table($char, $enc_tbl, $prefix.$code_chars[$i++]); - } - } - } else { - my $i = 0; - foreach my $tbl (@$dec_tbl) { - build_encoding_table($tbl, $enc_tbl, $prefix.$code_chars[$i++]); - } - } -} - -my $encode_table = {}; -build_encoding_table($decode_table, $encode_table, ""); - -sub dump_encode_table($); -sub dump_encode_table($) { - my ($tbl) = @_; - - foreach my $key (sort keys %$tbl) { - printf " %s -> ", $key; - foreach my $char (split(//, $tbl->{$key})) { - printf "%2x ", ord($char); - } - print "\n"; - } -} - -sub dump_decode_table($); -sub dump_decode_table($) { - my ($tbl) = @_; - - if (ref $tbl eq "") { - if (length $tbl == 1) { - print $tbl." "; - } else { - print "\"$tbl\" "; - } - } else { - print "["; - for my $i (0 .. 5) { - dump_decode_table($tbl->[$i]); - } - print "] "; - } -} - -################################################################# - -sub encode_instance($) { - my ($inst) = @_; - - my $result = ""; - foreach my $char (split(//, $inst)) { - my $code = $encode_table->{$char}; - return undef if not defined $code; - $result .= $code; - } - return $result.$END_CODE; -} - -sub decode_char($$) { - my ($char, $tbl) = @_; - - if (ref $tbl eq "") { - return substr($tbl, $code_chars_rev{$char}, 1); - } else { - return $tbl->[$code_chars_rev{$char}]; - } -} - -sub is_char($) { - my ($char) = @_; - return (ref $char eq "") && (length $char == 1); -} - -sub decode_instance($) { - my ($inst) = @_; - - my $lec = length($END_CODE); - my $lin = length($inst); - - return "" if $lin <= $lec; - - my $lastchars = substr($inst, $lin - $lec); - return "" if $lastchars ne $END_CODE; - - $inst = substr($inst, 0, $lin - $lec); - - my $result = ""; - while ($inst ne "") { - my $tbl = $decode_table; - while (!is_char($tbl)) { - my $code = substr($inst, 0, 1); - $inst = substr($inst, 1); - $tbl = decode_char($code, $tbl); - } - $result .= $tbl; - } - return $result; -} - -1;