Authors: Christian Doczkal and Jan-Oliver Kaiser
Require Import ssreflect ssrfun ssrbool eqtype ssrnat seq choice fintype path fingraph finfun finset.
Require Import misc regexp.

Set Implicit Arguments.

Section FA.
Variable char : finType.
Definition word := regexp.word char.

Finite Automata

Deterministic Finite Automata


Record dfa : Type := {
    dfa_state :> finType;
    dfa_s : dfa_state;
    dfa_fin : pred dfa_state;
    dfa_trans : dfa_state -> char -> dfa_state
    }.

For DFAs, we use the direct recursive defintion of acceptance as well as a definition in terms of runs. The latter is used in the translation of DFAs to regular expressions.

Fixpoint dfa_accept (A : dfa) (x : A) w :=
  if w is a :: w' then dfa_accept A (dfa_trans A x a) w' else x \in dfa_fin A.


Section DFA_Acceptance.
Variable A : dfa.

Lemma dfa_accept_cons (x : A) a w :
  a :: w \in dfa_accept x = (w \in dfa_accept (dfa_trans x a)).

We define a run of w on the automaton A to be the list of states x_1 .. x|w| traversed when following the edges labeled w_1 .. w|w| starting in x. So the run on x does not include x

Fixpoint dfa_run (x : A) (w : word) : seq A :=
  if w is a :: w' then dfa_trans x a :: dfa_run (dfa_trans x a) w' else [::].

Lemma dfa_run_take x w n : take n (dfa_run x w) = dfa_run x (take n w).

Lemma dfa_run_cat x w1 w2 :
  dfa_run x (w1 ++ w2) = dfa_run x w1 ++ dfa_run (last x (dfa_run x w1)) w2.

Definition delta x w := last x (dfa_run x w).

Lemma delta_cons x a w : delta x (a :: w) = delta (dfa_trans x a) w.

Lemma delta_cat x w1 w2 : delta x (w1 ++ w2) = delta (delta x w1) w2.

Definition delta_s w := delta (dfa_s A) w.

Lemma delta_accept (x : A) w : (w \in dfa_accept x) = (delta x w \in dfa_fin A).

Lemma dfa_trans1 x w a :
  dfa_trans (delta x w) a = delta x (w ++ [::a]).

End DFA_Acceptance.

Definition dfa_lang A := [pred w | dfa_accept (dfa_s A) w].


Nondeterministic Finite Automata.

Record nfa : Type := {
    nfa_state :> finType;
    nfa_s : nfa_state;
    nfa_fin : pred nfa_state;
    nfa_trans : nfa_state -> char -> nfa_state -> bool
    }.

Non-deterministic acceptance.
Fixpoint nfa_accept (A : nfa) (x : A) w :=
  if w is a :: w' then [exists y, nfa_trans A x a y && nfa_accept A y w']
                  else x \in nfa_fin A.

Definition nfa_lang (A : nfa) := [pred w | nfa_accept A (nfa_s A) w].

Definition nfa_char a :=
  {| nfa_s := false ;
     nfa_fin := id ;
     nfa_trans x b y := [&& (b == a), ~~ x & y] |}.

Lemma nfa_char_correct a : nfa_lang (nfa_char a) =1 pred1 [:: a].

Equivalence of DFAs and NFAs

We use the powerset construction to obtain a deterministic automaton from a non-deterministic one.
Embedding deterministic automata in non-deterministic automata.
Section Embed.

Variable A : dfa.

Definition dfa_to_nfa : nfa := {|
  nfa_s := dfa_s A;
  nfa_fin := dfa_fin A;
  nfa_trans x a y := y == dfa_trans x a |}.

Lemma dfa_to_nfa_correct : dfa_lang A =i nfa_lang dfa_to_nfa.

End Embed.

Regular Operations on Automata

Primitive automata
Operations on deterministic automata.
Section DFAOps.

Variable op : bool -> bool -> bool.
Variable A1 A2 : dfa.

Complement automaton
Definition dfa_compl :=
 {|
    dfa_s := dfa_s A1;
    dfa_fin := [predC (dfa_fin A1)];
    dfa_trans := (@dfa_trans A1)
  |}.

Lemma dfa_compl_correct w :
  w \in dfa_lang dfa_compl = (w \notin dfa_lang A1).

BinOp Automaton

Definition dfa_op :=
  {| dfa_s := (dfa_s A1, dfa_s A2);
     dfa_fin q := let (x1,x2) := q in op (dfa_fin A1 x1) (dfa_fin A2 x2);
     dfa_trans x a := (dfa_trans x.1 a, dfa_trans x.2 a) |}.

Lemma dfa_op_correct w :
  w \in dfa_lang dfa_op = op (w \in dfa_lang A1) (w \in dfa_lang A2).

End DFAOps.

Operations on non-deterministic automata.
Section NFAOps.

Implicit Type A : nfa.

Concatenation of two non-deterministic automata. This construction looks more complicated than the ususal construction with epsilon edges, but the construction is essentially the same

Definition nfa_conc A1 A2 : nfa :=
 {| nfa_s := inl _ (nfa_s A1);
    nfa_fin := [pred x |
        match x with
          | inl x => (x \in nfa_fin A1) && (nfa_s A2 \in nfa_fin A2)
          | inr x => x \in nfa_fin A2
        end];
     nfa_trans := fun x a y =>
        match x,y with
          | inl x, inl y => nfa_trans A1 x a y
          | inl x, inr y => (x \in nfa_fin A1) && nfa_trans A2 (nfa_s A2) a y
          | inr x, inr y => nfa_trans A2 x a y
          | inr x, inl y => false
        end
 |}.

Lemma conc_inrP A1 A2 x w :
  nfa_accept (nfa_conc A1 A2) (inr x) w = nfa_accept A2 x w.

Lemma conc_inlP A1 A2 x w : reflect
  (exists w1 w2, [/\ w = w1 ++ w2 , nfa_accept A1 x w1 & w2 \in nfa_lang A2])
  (nfa_accept (nfa_conc A1 A2) (inl x) w).

Lemma nfa_conc_correct A1 A2 :
  nfa_lang (nfa_conc A1 A2) =i conc (nfa_lang A1) (nfa_lang A2).

For nfa_star we again have to duplicate some edges since we have no epsilon transitions.

Definition nfa_star A : nfa :=
 {| nfa_s := None;
    nfa_fin x := x == None;
    nfa_trans x a y :=
      match x,y with
        | Some x, Some y => nfa_trans A x a y
        | None, Some y => nfa_trans A (nfa_s A) a y
        | Some x, None => [exists z, (nfa_trans A x a z) && (z \in nfa_fin A)]
        | None, None => [exists z, (nfa_trans A (nfa_s A) a z) && (z \in nfa_fin A)]
      end |}.

Since we need at least one step to get back to None, acceptance in A can only be lifted to nfa_star A for nonempty words.

Lemma nfa_star_cont A x w :
  ~~ nilp w -> nfa_accept A x w -> nfa_accept (nfa_star A) (Some x) w.

Lemma nfa_star_clone A w :
  w \in nfa_lang A -> w \in nfa_lang (nfa_star A).

Lemma nfa_star_cat A x w1 w2 :
  nfa_accept (nfa_star A) x w1 -> w2 \in nfa_lang (nfa_star A) ->
  nfa_accept (nfa_star A) x (w1 ++ w2).

Lemma nfa_starE_aux {A x w} :
  nfa_accept (nfa_star A) (Some x) w ->
  (exists w1 w2, [/\ w = w1 ++ w2, nfa_accept A x w1 & nfa_accept (nfa_star A) None w2]).

Lemma nfa_starE A a w: (a :: w) \in nfa_lang (nfa_star A) ->
  exists w1 w2, [/\ w = w1 ++ w2, a::w1 \in nfa_lang A & w2 \in nfa_lang (nfa_star A)].

Lemma nfa_star_correct A : nfa_lang (nfa_star A) =i star (nfa_lang A).

End NFAOps.

Translation of Regular Expressions to Finite Automata


Fixpoint re_to_dfa (r : regexp char): dfa :=
  match r with
  | Void => dfa_void
  | Eps => dfa_eps
  | Atom a => nfa_to_dfa (nfa_char a)
  | Star s => nfa_to_dfa (nfa_star (dfa_to_nfa (re_to_dfa s)))
  | Plus s t => dfa_op orb (re_to_dfa s) (re_to_dfa t)
  | Conc s t => nfa_to_dfa (nfa_conc (dfa_to_nfa (re_to_dfa s)) (dfa_to_nfa (re_to_dfa t)))
  end.

Lemma re_to_dfa_correct r : dfa_lang (re_to_dfa r) =i r.

Decidability of Language Equivalence


Section Reachability.
  Variable A : dfa.

  Definition dfa_trans_some (x y : A) := [exists a, dfa_trans x a == y].

  Lemma step_someP x y :
    reflect (exists w, delta x w == y) (connect dfa_trans_some x y).

  Definition reachable := connect dfa_trans_some (dfa_s A).
End Reachability.

Definition dfa_lang_empty A := [forall (x | reachable A x), x \notin dfa_fin A].

Lemma dfa_lang_emptyP A :
  reflect (dfa_lang A =i pred0) (dfa_lang_empty A).

Section Equivalence.

  Definition dfa_sym_diff A1 A2 := dfa_op addb A1 A2.

  Definition dfa_equiv A1 A2 := dfa_lang_empty (dfa_sym_diff A1 A2).

  Lemma dfa_equiv_correct A1 A2 :
    reflect (dfa_lang A1 =i dfa_lang A2) (dfa_equiv A1 A2).
End Equivalence.

Decidability of regular expression equivalence

Definition re_equiv r s := dfa_equiv (re_to_dfa r) (re_to_dfa s).

Lemma re_equiv_correct r s : reflect (r =i s) (re_equiv r s).


Construction of the Connected Sub-Automaton


Section Connected.
  Variable A : dfa.

  Definition reachable_type := { x:A | reachable x }.

  Lemma reachable_trans_proof (x : reachable_type) a : reachable (dfa_trans (val x) a).

  Definition reachable_trans (x : reachable_type) a : reachable_type :=
    Sub (dfa_trans (val x) a) (reachable_trans_proof x a).

  Definition reachable_s : reachable_type := Sub (dfa_s A) (connect0 _ _).

  Definition dfa_prune := {|
      dfa_s := reachable_s;
      dfa_fin x := val x \in dfa_fin A;
      dfa_trans := reachable_trans
  |}.

  Lemma dfa_prune_correct : dfa_lang dfa_prune =i dfa_lang A.

  Definition connected (A : dfa) := surjective (delta_s A).
  Definition connectedb (A : dfa) := [forall x: A, reachable x].

  Lemma connectedP (B : dfa) : reflect (connected B) (connectedb B).

  Lemma dfa_prune_connectedb_aux x y :
    connect (dfa_trans_some A) (val x) (val y) ->
    connect (dfa_trans_some dfa_prune) x y.

  Lemma dfa_prune_connected : connected dfa_prune.

  Hint Resolve dfa_prune_connected.

For connected DFAs, we can use xchoose to obtain for every state a word leading to that state. We use this in the Myhill-Nerode theorem to obtain Myhill partitions from DFAs
A DFA is connected, if it has the same size as ist connected sub automaton

DFA Minimization


Section Minimize.
  Variable A : dfa.

  Definition coll x y := forall w, (delta x w \in dfa_fin A) = (delta y w \in dfa_fin A).

Computation of the collapsing Relation


  Definition dist0 : {set A*A} := [set x | (x.1 \in dfa_fin A) != (x.2 \in dfa_fin A)].

  Definition distS (dist : {set A*A}) :=
    [set x | [exists a, (dfa_trans x.1 a, dfa_trans x.2 a) \in dist]].

  Lemma distSP (dist : {set A*A}) (x y : A) :
    reflect (exists a, (dfa_trans x a, dfa_trans y a) \in dist)
            ((x,y) \in distS dist).

  Lemma distS_mono: mono distS.

  Definition one_step_dist dist := dist0 :|: distS dist.

  Lemma one_step_dist_mono : mono one_step_dist.

  Definition dist := lfp one_step_dist.

  Lemma distP x y :
    reflect (exists w, (delta x w \in dfa_fin A) != (delta y w \in dfa_fin A))
            ((x,y) \in dist).

  Definition collb x y := (x,y) \notin dist.

  Lemma collP x y : reflect (coll x y) (collb x y).

  Lemma collb_refl x : collb x x.

  Lemma collb_sym : symmetric collb.

  Lemma collb_trans : transitive collb.

  Definition min_state := quot collb.
  Definition class (x:A) : min_state :=
    Rep (collb_refl) (collb_sym) (collb_trans) x.

  Lemma collb_step a x y : collb x y -> collb (dfa_trans x a) (dfa_trans y a).

  Lemma class_eq x y : reflect (class x = class y) (collb x y).

  Definition minimize : dfa := {|
    dfa_s := class (dfa_s A);
    dfa_trans x a := class (dfa_trans (val x) a);
    dfa_fin := [pred x | val x \in dfa_fin A ] |}.

  Canonical fin_app_pred := ApplicativePred (dfa_fin minimize).

  Lemma minimize_delta (x : A) w :
    @delta minimize (class x) w = class (delta x w).

  Lemma minstate_exp (x : minimize) : x = class (val x).

  Lemma minimize_val (x : minimize) w : class (delta (val x) w) = (delta x w).

  Lemma minimal_fin (x : A) :
    (class x \in dfa_fin minimize) = (x \in dfa_fin A).

End Minimize.


Correctness of Minimization


Lemma minimize_correct A : dfa_lang (minimize A) =i dfa_lang A.

Lemma minimize_size A : #|minimize A| <= #|A|.

Definition minimal (A : dfa) := forall B, dfa_lang A =i dfa_lang B -> #|A| <= #|B|.

Definition collapsed (A : dfa) := forall x y: A, coll x y <-> x = y.

Lemma minimize_collapsed (A : dfa) : collapsed (minimize A).

Uniqueness of Minimal DFAs


Definition dfa_iso (A1 A2 : dfa) :=
  exists i: A1 -> A2,
    [/\ bijective i,
        forall x a, i (dfa_trans x a) = dfa_trans (i x) a,
        forall x, i (x) \in dfa_fin A2 = (x \in dfa_fin A1) &
        i (dfa_s A1) = dfa_s A2 ].

Section Isomopism.
  Variables (A B : dfa).
  Hypothesis L_AB : dfa_lang A =i dfa_lang B.

  Hypothesis (A_coll: collapsed A) (B_coll: collapsed B).
  Hypothesis (A_conn : connected A) (B_conn : connected B).

  Definition iso (x : A) : B := delta (dfa_s B) (cr A_conn x).
  Definition iso_inv (x : B) : A := delta (dfa_s A) (cr B_conn x).

  Lemma delta_iso w x : delta (iso x) w \in dfa_fin B = (delta x w \in dfa_fin A).

  Lemma delta_iso_inv w x : delta (iso_inv x) w \in dfa_fin A = (delta x w \in dfa_fin B).

  Lemma can_iso : cancel iso_inv iso.

  Lemma can_iso_inv : cancel iso iso_inv.

  Lemma coll_iso : dfa_iso A B.

  Lemma dfa_iso_size : dfa_iso A B -> #|A| = #|B|.

End Isomopism.

Lemma minimize_connected A : connected A -> connected (minimize A).

Lemma minimal_connected A : minimal A -> connected A.

Lemma minimal_size_eq A B :
  minimal A -> minimal B -> dfa_lang A =i dfa_lang B -> #|A| = #|B|.

Lemma minimize_minimal A (CA : connected A) : minimal (minimize A).


Lemma minimize_idem_size A : collapsed A -> #|A| = #|minimize A|.

Lemma minimal_idem A : minimal A -> collapsed A.

Lemma minimalP A : minimal A <-> (connected A /\ collapsed A).

Lemma minimal_iso A B :
  dfa_lang A =i dfa_lang B -> minimal A -> minimal B -> dfa_iso A B.

End FA.