--- /dev/null
+% ---------------------------------------------------
+% INPUT
+% ---------------------------------------------------
+
+% Someone needs to define an input word sequence like tis.
+% word(1) = "Now".
+% word(2) = "is".
+% ...
+
+% Add a special eos (end of sequence) marker after the last word.
+
+last_time max= Time for &eos != word(Time).
+word(last_time+1) := &eos.
+
+% ---------------------------------------------------
+% ITERATION NUMBERS.
+% ---------------------------------------------------
+
+% How many times to re-estimate the model parameters.
+
+num_iterations := 10.
+
+is_iter(0).
+is_iter(I+1) |= true for is_iter(I) & I < num_iterations.
+
+% ---------------------------------------------------
+% FORWARD-BACKWARD ALGORITHM (THE "E STEP" OF THE EM ALGORITHM)
+% ---------------------------------------------------
+
+% At each iteration I = 0, 1, ... num_iterations,
+% define a trellis graph on states of the form &state(Time,Tag),
+% which expresses all ways of explaining the word sequence.
+% The edge weights are defined from iteration I's probability model.
+
+edge(I,&state(Time-1,PrevTag),&state(Time,Tag))
+ = p_transition(I,PrevTag,Tag) * p_emission(I,Tag,word(Time)).
+start_state = &state(0,&start). % the initial tag at time 0 is called &start
+end_state = &state(last_time+1,&stop). % the final tag is called &stop
+
+% Compute alphas and betas on that trellis graph, using
+% dynamic programming.
+
+alpha(I,start_state) += 1 for is_iter(I).
+alpha(I,State) += alpha(I,PrevState) * edge(I,PrevState,State).
+
+beta(I,end_state) += 1 for is_iter(I).
+beta(I,State) += edge(I,State,NextState) * beta(I,NextState).
+
+% Total probability of all paths in the trellis.
+
+z(I) = alpha(I,end_state).
+
+% The "a posteriori" probabilities that the true path
+% goes through particular states and edges, given that the
+% true path is one of the paths in the trellis.
+
+prob(I,State) = alpha(I,State) * beta(I,State) / z(I).
+prob(I,PrevState,State)
+ = alpha(I,PrevState) * edge(I,PrevState,State) * beta(I,State) / z(I).
+
+% ---------------------------------------------------
+% COUNTING TRANSITIONS AND EMISSIONS
+% ---------------------------------------------------
+
+% From the above, we can get the expected counts of specific
+% transitions and emissions.
+
+count_transition(I,PrevTag,Tag) += prob(I,&state(Time-1,PrevTag),&state(Time,Tag)).
+count_emission(I,Tag,word(Time)) += prob(I,&state(Time,Tag)).
+
+% Smooth by adding lambda to all counts corresponding to parameters,
+% even counts that are still 0 or null.
+
+lambda := 0.0. % can be changed
+count_transition(I,PrevTag,Tag) += lambda for (Param is p_transition(I,PrevTag,Tag)).
+count_emission(I,Tag,Word) += lambda for (Param is p_emission(I,Tag,Word)).
+
+% Get the total smoothed count in each context. These will be the
+% denominators of our smoothed probabiities.
+
+count_transition_denom(I,PrevTag) += count_transition(I,PrevTag,Tag).
+count_emission_denom(I,Tag) += count_emission(I,Tag,Word).
+
+% ---------------------------------------------------
+% RE-ESTIMATE THE PARAMETERS (THE "M STEP" OF THE EM ALGORITHM)
+% ---------------------------------------------------
+
+% Someone needs to define the initial guessed model at iteration 0:
+% p_t(PrevTag,Tag) := ...
+% p_e(Tag,Word): = ...
+
+% At time 0, use the initial guessed model.
+
+p_transition(0,PrevTag,Tag) = p_t(PrevTag,Tag).
+p_emission(0,Tag,Word) := p_e(Tag,Word).
+
+% Given the smoothed counts at time I, we define the new model at time I+1:
+
+p_transition(I+1,PrevTag,Tag)
+ = count_transition(I,PrevTag,Tag) / count_transition_denom(I,PrevTag)
+ for I < num_iterations.
+p_emission(I+1,Tag,Word)
+ := count_emission(I,Tag,Word) / count_emission_denom(I,Tag)
+ for I < num_iterations.
+
+% The special stop tag always emits the eos word. Make sure that this
+% has probability 1 regardless of smoothing. This overrides the previous
+% rules.
+
+p_emission(0,&stop,&eos) := 1.
+p_emission(I+1,&stop,&eos) := p_emission(I,&stop,&eos) for I < num_iterations.
+
+% ---------------------------------------------------
+% DRAW CONCLUSIONS FROM THE ESTIMATED MODEL
+% ---------------------------------------------------
+
+% After the model is estimated, look at the computations for the final
+% iteration to find the most likely tag at each time step, and its probability.
+
+besttagprob(Time) max= prob(num_iterations, &state(Time,Tag)) with_key Tag.
+besttag(Time) = $key(besttagprob(Time)).
+
+% Let's also find the single best path, using the Viterbi algorithm.
+
+bestpathprob(end_state) max= 1 with_key [&stop].
+bestpathprob(State) max= edge(num_iterations,State,NextState) * bestpathprob(NextState)
+ with_key [Tag | $key(bestpathprob(NextState))] for &state(_,Tag) = State.
+bestpathprob = bestpathprob(start_state).
+bestpath = $key(bestpathprob(start_state)).
+