From c1b46883d10f5a21d81f382c7562fa264e02ccae Mon Sep 17 00:00:00 2001 From: Jason Eisner Date: Wed, 10 Jul 2013 08:39:20 -0400 Subject: [PATCH] added forward-backward.dyna, and its companion file icecream.dyna. It exposes errors in the current implementation: see #47. --- examples/forward-backward.dyna | 130 +++++++++++++++++++++++++++++++++ examples/icecream.dyna | 51 +++++++++++++ 2 files changed, 181 insertions(+) create mode 100644 examples/forward-backward.dyna create mode 100644 examples/icecream.dyna diff --git a/examples/forward-backward.dyna b/examples/forward-backward.dyna new file mode 100644 index 0000000..b3a2c62 --- /dev/null +++ b/examples/forward-backward.dyna @@ -0,0 +1,130 @@ +% --------------------------------------------------- +% INPUT +% --------------------------------------------------- + +% Someone needs to define an input word sequence like tis. +% word(1) = "Now". +% word(2) = "is". +% ... + +% Add a special eos (end of sequence) marker after the last word. + +last_time max= Time for &eos != word(Time). +word(last_time+1) := &eos. + +% --------------------------------------------------- +% ITERATION NUMBERS. +% --------------------------------------------------- + +% How many times to re-estimate the model parameters. + +num_iterations := 10. + +is_iter(0). +is_iter(I+1) |= true for is_iter(I) & I < num_iterations. + +% --------------------------------------------------- +% FORWARD-BACKWARD ALGORITHM (THE "E STEP" OF THE EM ALGORITHM) +% --------------------------------------------------- + +% At each iteration I = 0, 1, ... num_iterations, +% define a trellis graph on states of the form &state(Time,Tag), +% which expresses all ways of explaining the word sequence. +% The edge weights are defined from iteration I's probability model. + +edge(I,&state(Time-1,PrevTag),&state(Time,Tag)) + = p_transition(I,PrevTag,Tag) * p_emission(I,Tag,word(Time)). +start_state = &state(0,&start). % the initial tag at time 0 is called &start +end_state = &state(last_time+1,&stop). % the final tag is called &stop + +% Compute alphas and betas on that trellis graph, using +% dynamic programming. + +alpha(I,start_state) += 1 for is_iter(I). +alpha(I,State) += alpha(I,PrevState) * edge(I,PrevState,State). + +beta(I,end_state) += 1 for is_iter(I). +beta(I,State) += edge(I,State,NextState) * beta(I,NextState). + +% Total probability of all paths in the trellis. + +z(I) = alpha(I,end_state). + +% The "a posteriori" probabilities that the true path +% goes through particular states and edges, given that the +% true path is one of the paths in the trellis. + +prob(I,State) = alpha(I,State) * beta(I,State) / z(I). +prob(I,PrevState,State) + = alpha(I,PrevState) * edge(I,PrevState,State) * beta(I,State) / z(I). + +% --------------------------------------------------- +% COUNTING TRANSITIONS AND EMISSIONS +% --------------------------------------------------- + +% From the above, we can get the expected counts of specific +% transitions and emissions. + +count_transition(I,PrevTag,Tag) += prob(I,&state(Time-1,PrevTag),&state(Time,Tag)). +count_emission(I,Tag,word(Time)) += prob(I,&state(Time,Tag)). + +% Smooth by adding lambda to all counts corresponding to parameters, +% even counts that are still 0 or null. + +lambda := 0.0. % can be changed +count_transition(I,PrevTag,Tag) += lambda for (Param is p_transition(I,PrevTag,Tag)). +count_emission(I,Tag,Word) += lambda for (Param is p_emission(I,Tag,Word)). + +% Get the total smoothed count in each context. These will be the +% denominators of our smoothed probabiities. + +count_transition_denom(I,PrevTag) += count_transition(I,PrevTag,Tag). +count_emission_denom(I,Tag) += count_emission(I,Tag,Word). + +% --------------------------------------------------- +% RE-ESTIMATE THE PARAMETERS (THE "M STEP" OF THE EM ALGORITHM) +% --------------------------------------------------- + +% Someone needs to define the initial guessed model at iteration 0: +% p_t(PrevTag,Tag) := ... +% p_e(Tag,Word): = ... + +% At time 0, use the initial guessed model. + +p_transition(0,PrevTag,Tag) = p_t(PrevTag,Tag). +p_emission(0,Tag,Word) := p_e(Tag,Word). + +% Given the smoothed counts at time I, we define the new model at time I+1: + +p_transition(I+1,PrevTag,Tag) + = count_transition(I,PrevTag,Tag) / count_transition_denom(I,PrevTag) + for I < num_iterations. +p_emission(I+1,Tag,Word) + := count_emission(I,Tag,Word) / count_emission_denom(I,Tag) + for I < num_iterations. + +% The special stop tag always emits the eos word. Make sure that this +% has probability 1 regardless of smoothing. This overrides the previous +% rules. + +p_emission(0,&stop,&eos) := 1. +p_emission(I+1,&stop,&eos) := p_emission(I,&stop,&eos) for I < num_iterations. + +% --------------------------------------------------- +% DRAW CONCLUSIONS FROM THE ESTIMATED MODEL +% --------------------------------------------------- + +% After the model is estimated, look at the computations for the final +% iteration to find the most likely tag at each time step, and its probability. + +besttagprob(Time) max= prob(num_iterations, &state(Time,Tag)) with_key Tag. +besttag(Time) = $key(besttagprob(Time)). + +% Let's also find the single best path, using the Viterbi algorithm. + +bestpathprob(end_state) max= 1 with_key [&stop]. +bestpathprob(State) max= edge(num_iterations,State,NextState) * bestpathprob(NextState) + with_key [Tag | $key(bestpathprob(NextState))] for &state(_,Tag) = State. +bestpathprob = bestpathprob(start_state). +bestpath = $key(bestpathprob(start_state)). + diff --git a/examples/icecream.dyna b/examples/icecream.dyna new file mode 100644 index 0000000..e708dcb --- /dev/null +++ b/examples/icecream.dyna @@ -0,0 +1,51 @@ +% Ice cream example matching the spreadsheet. +% Should be used with hmm.dyna. + +% initial emission probabilities +p_e("C",1) := 0.7. p_e("H",1) := 0.1. +p_e("C",2) := 0.2. p_e("H",2) := 0.2. +p_e("C",3) := 0.1. p_e("H",3) := 0.7. + +% initial transition probabilities +p_t("C","C") := 0.8. p_t("H","C") := 0.1. p_t(&start,"C") := 0.5. +p_t("C","H") := 0.1. p_t("H","H") := 0.8. p_t(&start,"H") := 0.5. +p_t("C",&stop) := 0.1. p_t("H",&stop) := 0.1. p_t(&start,&stop) := 0. + +% word sequence: number of ice creams on each day. + +word(1) := 2. +word(2) := 3. +word(3) := 3. +word(4) := 2. +word(5) := 3. +word(6) := 2. +word(7) := 3. +word(8) := 2. +word(9) := 2. +word(10) := 3. +word(11) := 1. +word(12) := 3. +word(13) := 3. +word(14) := 1. +word(15) := 1. +word(16) := 1. +word(17) := 2. +word(18) := 1. +word(19) := 1. +word(20) := 1. +word(21) := 3. +word(22) := 1. +word(23) := 2. +word(24) := 1. +word(25) := 1. +word(26) := 1. +word(27) := 2. +word(28) := 3. +word(29) := 3. +word(30) := 2. +word(31) := 3. +word(32) := 2. +word(33) := 2. + + + -- 2.50.1