From c1b46883d10f5a21d81f382c7562fa264e02ccae Mon Sep 17 00:00:00 2001
From: Jason Eisner <jason@cs.jhu.edu>
Date: Wed, 10 Jul 2013 08:39:20 -0400
Subject: [PATCH] added forward-backward.dyna, and its companion file
 icecream.dyna. It exposes errors in the current implementation: see #47.

---
 examples/forward-backward.dyna | 130 +++++++++++++++++++++++++++++++++
 examples/icecream.dyna         |  51 +++++++++++++
 2 files changed, 181 insertions(+)
 create mode 100644 examples/forward-backward.dyna
 create mode 100644 examples/icecream.dyna

diff --git a/examples/forward-backward.dyna b/examples/forward-backward.dyna
new file mode 100644
index 0000000..b3a2c62
--- /dev/null
+++ b/examples/forward-backward.dyna
@@ -0,0 +1,130 @@
+% ---------------------------------------------------
+% INPUT
+% ---------------------------------------------------
+
+% Someone needs to define an input word sequence like tis.
+% word(1) = "Now".
+% word(2) = "is".
+% ...
+
+% Add a special eos (end of sequence) marker after the last word.
+
+last_time max= Time for &eos != word(Time).
+word(last_time+1) := &eos.
+
+% ---------------------------------------------------
+% ITERATION NUMBERS.
+% ---------------------------------------------------
+
+% How many times to re-estimate the model parameters.
+
+num_iterations := 10.
+
+is_iter(0).
+is_iter(I+1) |= true for is_iter(I) & I < num_iterations.
+
+% ---------------------------------------------------
+% FORWARD-BACKWARD ALGORITHM (THE "E STEP" OF THE EM ALGORITHM)
+% ---------------------------------------------------
+
+% At each iteration I = 0, 1, ... num_iterations, 
+% define a trellis graph on states of the form &state(Time,Tag),
+% which expresses all ways of explaining the word sequence.
+% The edge weights are defined from iteration I's probability model.
+
+edge(I,&state(Time-1,PrevTag),&state(Time,Tag)) 
+   = p_transition(I,PrevTag,Tag) * p_emission(I,Tag,word(Time)).
+start_state = &state(0,&start).        % the initial tag at time 0 is called &start
+end_state = &state(last_time+1,&stop). % the final tag is called &stop
+
+% Compute alphas and betas on that trellis graph, using 
+% dynamic programming.
+
+alpha(I,start_state) += 1 for is_iter(I).
+alpha(I,State) += alpha(I,PrevState) * edge(I,PrevState,State).
+
+beta(I,end_state) += 1 for is_iter(I).
+beta(I,State) += edge(I,State,NextState) * beta(I,NextState).
+
+% Total probability of all paths in the trellis.
+
+z(I) = alpha(I,end_state).
+
+% The "a posteriori" probabilities that the true path
+% goes through particular states and edges, given that the
+% true path is one of the paths in the trellis.
+
+prob(I,State) = alpha(I,State) * beta(I,State) / z(I).
+prob(I,PrevState,State) 
+    = alpha(I,PrevState) * edge(I,PrevState,State) * beta(I,State) / z(I).
+
+% ---------------------------------------------------
+% COUNTING TRANSITIONS AND EMISSIONS
+% ---------------------------------------------------
+
+% From the above, we can get the expected counts of specific 
+% transitions and emissions.
+
+count_transition(I,PrevTag,Tag) += prob(I,&state(Time-1,PrevTag),&state(Time,Tag)).
+count_emission(I,Tag,word(Time)) += prob(I,&state(Time,Tag)).
+
+% Smooth by adding lambda to all counts corresponding to parameters, 
+% even counts that are still 0 or null.
+
+lambda := 0.0.    % can be changed
+count_transition(I,PrevTag,Tag) += lambda for (Param is p_transition(I,PrevTag,Tag)).
+count_emission(I,Tag,Word) += lambda for (Param is p_emission(I,Tag,Word)).
+
+% Get the total smoothed count in each context.  These will be the 
+% denominators of our smoothed probabiities.
+
+count_transition_denom(I,PrevTag) += count_transition(I,PrevTag,Tag).
+count_emission_denom(I,Tag) += count_emission(I,Tag,Word).
+
+% ---------------------------------------------------
+% RE-ESTIMATE THE PARAMETERS (THE "M STEP" OF THE EM ALGORITHM)
+% ---------------------------------------------------
+
+% Someone needs to define the initial guessed model at iteration 0:
+% p_t(PrevTag,Tag) := ...
+% p_e(Tag,Word): = ...
+
+% At time 0, use the initial guessed model.
+
+p_transition(0,PrevTag,Tag) = p_t(PrevTag,Tag).
+p_emission(0,Tag,Word) := p_e(Tag,Word).
+
+% Given the smoothed counts at time I, we define the new model at time I+1:
+
+p_transition(I+1,PrevTag,Tag) 
+     = count_transition(I,PrevTag,Tag) / count_transition_denom(I,PrevTag) 
+           for I < num_iterations.
+p_emission(I+1,Tag,Word) 
+     := count_emission(I,Tag,Word) / count_emission_denom(I,Tag)
+           for I < num_iterations.
+
+% The special stop tag always emits the eos word.  Make sure that this
+% has probability 1 regardless of smoothing.  This overrides the previous
+% rules.
+
+p_emission(0,&stop,&eos) := 1.
+p_emission(I+1,&stop,&eos) := p_emission(I,&stop,&eos) for I < num_iterations.
+
+% ---------------------------------------------------
+% DRAW CONCLUSIONS FROM THE ESTIMATED MODEL
+% ---------------------------------------------------
+
+% After the model is estimated, look at the computations for the final 
+% iteration to find the most likely tag at each time step, and its probability.
+
+besttagprob(Time) max= prob(num_iterations, &state(Time,Tag)) with_key Tag.
+besttag(Time) = $key(besttagprob(Time)).
+
+% Let's also find the single best path, using the Viterbi algorithm.
+
+bestpathprob(end_state) max= 1 with_key [&stop].
+bestpathprob(State) max= edge(num_iterations,State,NextState) * bestpathprob(NextState)
+                            with_key [Tag | $key(bestpathprob(NextState))] for &state(_,Tag) = State.
+bestpathprob = bestpathprob(start_state).
+bestpath = $key(bestpathprob(start_state)).
+
diff --git a/examples/icecream.dyna b/examples/icecream.dyna
new file mode 100644
index 0000000..e708dcb
--- /dev/null
+++ b/examples/icecream.dyna
@@ -0,0 +1,51 @@
+% Ice cream example matching the spreadsheet.
+% Should be used with hmm.dyna.
+
+% initial emission probabilities
+p_e("C",1) := 0.7.   p_e("H",1) := 0.1.
+p_e("C",2) := 0.2.   p_e("H",2) := 0.2.
+p_e("C",3) := 0.1.   p_e("H",3) := 0.7.
+
+% initial transition probabilities
+p_t("C","C") := 0.8.   p_t("H","C") := 0.1.   p_t(&start,"C") := 0.5.
+p_t("C","H") := 0.1.   p_t("H","H") := 0.8.   p_t(&start,"H") := 0.5.
+p_t("C",&stop) := 0.1. p_t("H",&stop) := 0.1. p_t(&start,&stop) := 0.
+
+% word sequence: number of ice creams on each day.
+
+word(1) := 2.  
+word(2) := 3.
+word(3) := 3.
+word(4) := 2.
+word(5) := 3.
+word(6) := 2.
+word(7) := 3.
+word(8) := 2.
+word(9) := 2.
+word(10) := 3.
+word(11) := 1.
+word(12) := 3.
+word(13) := 3.
+word(14) := 1.
+word(15) := 1.
+word(16) := 1.
+word(17) := 2.
+word(18) := 1.
+word(19) := 1.
+word(20) := 1.
+word(21) := 3.
+word(22) := 1.
+word(23) := 2.
+word(24) := 1.
+word(25) := 1.
+word(26) := 1.
+word(27) := 2.
+word(28) := 3.
+word(29) := 3.
+word(30) := 2.
+word(31) := 3.
+word(32) := 2.
+word(33) := 2.
+
+
+
-- 
2.50.1