Authors: Christian Doczkal and Jan-Oliver Kaiser
Require Import ssreflect ssrfun ssrbool eqtype ssrnat seq choice fintype path fingraph finfun finset.
Require Import misc regexp.
Set Implicit Arguments.
Section FA.
Variable char : finType.
Definition word := regexp.word char.
Require Import misc regexp.
Set Implicit Arguments.
Section FA.
Variable char : finType.
Definition word := regexp.word char.
Record dfa : Type := {
dfa_state :> finType;
dfa_s : dfa_state;
dfa_fin : pred dfa_state;
dfa_trans : dfa_state -> char -> dfa_state
}.
For DFAs, we use the direct recursive defintion of acceptance
as well as a definition in terms of runs. The latter is used
in the translation of DFAs to regular expressions.
Fixpoint dfa_accept (A : dfa) (x : A) w :=
if w is a :: w' then dfa_accept A (dfa_trans A x a) w' else x \in dfa_fin A.
Section DFA_Acceptance.
Variable A : dfa.
Lemma dfa_accept_cons (x : A) a w :
a :: w \in dfa_accept x = (w \in dfa_accept (dfa_trans x a)).
We define a run of w on the automaton A
to be the list of states x_1 .. x|w|
traversed when following the edges labeled
w_1 .. w|w| starting in x.
So the run on x does not include x
Fixpoint dfa_run (x : A) (w : word) : seq A :=
if w is a :: w' then dfa_trans x a :: dfa_run (dfa_trans x a) w' else [::].
Lemma dfa_run_take x w n : take n (dfa_run x w) = dfa_run x (take n w).
Lemma dfa_run_cat x w1 w2 :
dfa_run x (w1 ++ w2) = dfa_run x w1 ++ dfa_run (last x (dfa_run x w1)) w2.
Definition delta x w := last x (dfa_run x w).
Lemma delta_cons x a w : delta x (a :: w) = delta (dfa_trans x a) w.
Lemma delta_cat x w1 w2 : delta x (w1 ++ w2) = delta (delta x w1) w2.
Definition delta_s w := delta (dfa_s A) w.
Lemma delta_accept (x : A) w : (w \in dfa_accept x) = (delta x w \in dfa_fin A).
Lemma dfa_trans1 x w a :
dfa_trans (delta x w) a = delta x (w ++ [::a]).
End DFA_Acceptance.
Definition dfa_lang A := [pred w | dfa_accept (dfa_s A) w].
Record nfa : Type := {
nfa_state :> finType;
nfa_s : nfa_state;
nfa_fin : pred nfa_state;
nfa_trans : nfa_state -> char -> nfa_state -> bool
}.
nfa_state :> finType;
nfa_s : nfa_state;
nfa_fin : pred nfa_state;
nfa_trans : nfa_state -> char -> nfa_state -> bool
}.
Non-deterministic acceptance.
Fixpoint nfa_accept (A : nfa) (x : A) w :=
if w is a :: w' then [exists y, nfa_trans A x a y && nfa_accept A y w']
else x \in nfa_fin A.
Definition nfa_lang (A : nfa) := [pred w | nfa_accept A (nfa_s A) w].
Definition nfa_char a :=
{| nfa_s := false ;
nfa_fin := id ;
nfa_trans x b y := [&& (b == a), ~~ x & y] |}.
Lemma nfa_char_correct a : nfa_lang (nfa_char a) =1 pred1 [:: a].
if w is a :: w' then [exists y, nfa_trans A x a y && nfa_accept A y w']
else x \in nfa_fin A.
Definition nfa_lang (A : nfa) := [pred w | nfa_accept A (nfa_s A) w].
Definition nfa_char a :=
{| nfa_s := false ;
nfa_fin := id ;
nfa_trans x b y := [&& (b == a), ~~ x & y] |}.
Lemma nfa_char_correct a : nfa_lang (nfa_char a) =1 pred1 [:: a].
Equivalence of DFAs and NFAs
We use the powerset construction to obtain a deterministic automaton from a non-deterministic one.
Section PowersetConstruction.
Variable A : nfa.
Definition nfa_to_dfa := {|
dfa_s := [set nfa_s A];
dfa_fin X := [exists x: A, (x \in X) && nfa_fin A x];
dfa_trans X a := \bigcup_(x | x \in X) [set y | nfa_trans A x a y] |}.
Lemma nfa_to_dfa_aux2 (x : A) w (X : nfa_to_dfa) :
x \in X -> nfa_accept A x w -> dfa_accept X w.
Lemma nfa_to_dfa_aux1 (X : nfa_to_dfa) w :
dfa_accept X w -> exists2 x, (x \in X) & nfa_accept A x w.
Lemma nfa_to_dfa_correct : nfa_lang A =i dfa_lang nfa_to_dfa.
End PowersetConstruction.
Variable A : nfa.
Definition nfa_to_dfa := {|
dfa_s := [set nfa_s A];
dfa_fin X := [exists x: A, (x \in X) && nfa_fin A x];
dfa_trans X a := \bigcup_(x | x \in X) [set y | nfa_trans A x a y] |}.
Lemma nfa_to_dfa_aux2 (x : A) w (X : nfa_to_dfa) :
x \in X -> nfa_accept A x w -> dfa_accept X w.
Lemma nfa_to_dfa_aux1 (X : nfa_to_dfa) w :
dfa_accept X w -> exists2 x, (x \in X) & nfa_accept A x w.
Lemma nfa_to_dfa_correct : nfa_lang A =i dfa_lang nfa_to_dfa.
End PowersetConstruction.
Embedding deterministic automata in non-deterministic automata.
Section Embed.
Variable A : dfa.
Definition dfa_to_nfa : nfa := {|
nfa_s := dfa_s A;
nfa_fin := dfa_fin A;
nfa_trans x a y := y == dfa_trans x a |}.
Lemma dfa_to_nfa_correct : dfa_lang A =i nfa_lang dfa_to_nfa.
End Embed.
Variable A : dfa.
Definition dfa_to_nfa : nfa := {|
nfa_s := dfa_s A;
nfa_fin := dfa_fin A;
nfa_trans x a y := y == dfa_trans x a |}.
Lemma dfa_to_nfa_correct : dfa_lang A =i nfa_lang dfa_to_nfa.
End Embed.
Definition dfa_void :=
{| dfa_s := tt; dfa_fin x := false; dfa_trans x a := tt |}.
Lemma dfa_void_correct (x: dfa_void) w: ~~ dfa_accept x w.
Definition dfa_eps :=
{| dfa_s := true; dfa_fin := id ; dfa_trans x a := false |}.
Lemma dfa_eps_correct : dfa_lang dfa_eps =i pred1 [::].
{| dfa_s := tt; dfa_fin x := false; dfa_trans x a := tt |}.
Lemma dfa_void_correct (x: dfa_void) w: ~~ dfa_accept x w.
Definition dfa_eps :=
{| dfa_s := true; dfa_fin := id ; dfa_trans x a := false |}.
Lemma dfa_eps_correct : dfa_lang dfa_eps =i pred1 [::].
Operations on deterministic automata.
Complement automaton
Definition dfa_compl :=
{|
dfa_s := dfa_s A1;
dfa_fin := [predC (dfa_fin A1)];
dfa_trans := (@dfa_trans A1)
|}.
Lemma dfa_compl_correct w :
w \in dfa_lang dfa_compl = (w \notin dfa_lang A1).
{|
dfa_s := dfa_s A1;
dfa_fin := [predC (dfa_fin A1)];
dfa_trans := (@dfa_trans A1)
|}.
Lemma dfa_compl_correct w :
w \in dfa_lang dfa_compl = (w \notin dfa_lang A1).
BinOp Automaton
Definition dfa_op :=
{| dfa_s := (dfa_s A1, dfa_s A2);
dfa_fin q := let (x1,x2) := q in op (dfa_fin A1 x1) (dfa_fin A2 x2);
dfa_trans x a := (dfa_trans x.1 a, dfa_trans x.2 a) |}.
Lemma dfa_op_correct w :
w \in dfa_lang dfa_op = op (w \in dfa_lang A1) (w \in dfa_lang A2).
End DFAOps.
Operations on non-deterministic automata.
Concatenation of two non-deterministic automata.
This construction looks more complicated than the ususal construction with epsilon edges,
but the construction is essentially the same
Definition nfa_conc A1 A2 : nfa :=
{| nfa_s := inl _ (nfa_s A1);
nfa_fin := [pred x |
match x with
| inl x => (x \in nfa_fin A1) && (nfa_s A2 \in nfa_fin A2)
| inr x => x \in nfa_fin A2
end];
nfa_trans := fun x a y =>
match x,y with
| inl x, inl y => nfa_trans A1 x a y
| inl x, inr y => (x \in nfa_fin A1) && nfa_trans A2 (nfa_s A2) a y
| inr x, inr y => nfa_trans A2 x a y
| inr x, inl y => false
end
|}.
Lemma conc_inrP A1 A2 x w :
nfa_accept (nfa_conc A1 A2) (inr x) w = nfa_accept A2 x w.
Lemma conc_inlP A1 A2 x w : reflect
(exists w1 w2, [/\ w = w1 ++ w2 , nfa_accept A1 x w1 & w2 \in nfa_lang A2])
(nfa_accept (nfa_conc A1 A2) (inl x) w).
Lemma nfa_conc_correct A1 A2 :
nfa_lang (nfa_conc A1 A2) =i conc (nfa_lang A1) (nfa_lang A2).
For nfa_star we again have to duplicate some edges since we have no epsilon transitions.
Definition nfa_star A : nfa :=
{| nfa_s := None;
nfa_fin x := x == None;
nfa_trans x a y :=
match x,y with
| Some x, Some y => nfa_trans A x a y
| None, Some y => nfa_trans A (nfa_s A) a y
| Some x, None => [exists z, (nfa_trans A x a z) && (z \in nfa_fin A)]
| None, None => [exists z, (nfa_trans A (nfa_s A) a z) && (z \in nfa_fin A)]
end |}.
Since we need at least one step to get back to None,
acceptance in A can only be lifted to nfa_star A for nonempty words.
Lemma nfa_star_cont A x w :
~~ nilp w -> nfa_accept A x w -> nfa_accept (nfa_star A) (Some x) w.
Lemma nfa_star_clone A w :
w \in nfa_lang A -> w \in nfa_lang (nfa_star A).
Lemma nfa_star_cat A x w1 w2 :
nfa_accept (nfa_star A) x w1 -> w2 \in nfa_lang (nfa_star A) ->
nfa_accept (nfa_star A) x (w1 ++ w2).
Lemma nfa_starE_aux {A x w} :
nfa_accept (nfa_star A) (Some x) w ->
(exists w1 w2, [/\ w = w1 ++ w2, nfa_accept A x w1 & nfa_accept (nfa_star A) None w2]).
Lemma nfa_starE A a w: (a :: w) \in nfa_lang (nfa_star A) ->
exists w1 w2, [/\ w = w1 ++ w2, a::w1 \in nfa_lang A & w2 \in nfa_lang (nfa_star A)].
Lemma nfa_star_correct A : nfa_lang (nfa_star A) =i star (nfa_lang A).
End NFAOps.
Fixpoint re_to_dfa (r : regexp char): dfa :=
match r with
| Void => dfa_void
| Eps => dfa_eps
| Atom a => nfa_to_dfa (nfa_char a)
| Star s => nfa_to_dfa (nfa_star (dfa_to_nfa (re_to_dfa s)))
| Plus s t => dfa_op orb (re_to_dfa s) (re_to_dfa t)
| Conc s t => nfa_to_dfa (nfa_conc (dfa_to_nfa (re_to_dfa s)) (dfa_to_nfa (re_to_dfa t)))
end.
Lemma re_to_dfa_correct r : dfa_lang (re_to_dfa r) =i r.
Section Reachability.
Variable A : dfa.
Definition dfa_trans_some (x y : A) := [exists a, dfa_trans x a == y].
Lemma step_someP x y :
reflect (exists w, delta x w == y) (connect dfa_trans_some x y).
Definition reachable := connect dfa_trans_some (dfa_s A).
End Reachability.
Definition dfa_lang_empty A := [forall (x | reachable A x), x \notin dfa_fin A].
Lemma dfa_lang_emptyP A :
reflect (dfa_lang A =i pred0) (dfa_lang_empty A).
Section Equivalence.
Definition dfa_sym_diff A1 A2 := dfa_op addb A1 A2.
Definition dfa_equiv A1 A2 := dfa_lang_empty (dfa_sym_diff A1 A2).
Lemma dfa_equiv_correct A1 A2 :
reflect (dfa_lang A1 =i dfa_lang A2) (dfa_equiv A1 A2).
End Equivalence.
Decidability of regular expression equivalence
Definition re_equiv r s := dfa_equiv (re_to_dfa r) (re_to_dfa s).
Lemma re_equiv_correct r s : reflect (r =i s) (re_equiv r s).
Section Connected.
Variable A : dfa.
Definition reachable_type := { x:A | reachable x }.
Lemma reachable_trans_proof (x : reachable_type) a : reachable (dfa_trans (val x) a).
Definition reachable_trans (x : reachable_type) a : reachable_type :=
Sub (dfa_trans (val x) a) (reachable_trans_proof x a).
Definition reachable_s : reachable_type := Sub (dfa_s A) (connect0 _ _).
Definition dfa_prune := {|
dfa_s := reachable_s;
dfa_fin x := val x \in dfa_fin A;
dfa_trans := reachable_trans
|}.
Lemma dfa_prune_correct : dfa_lang dfa_prune =i dfa_lang A.
Definition connected (A : dfa) := surjective (delta_s A).
Definition connectedb (A : dfa) := [forall x: A, reachable x].
Lemma connectedP (B : dfa) : reflect (connected B) (connectedb B).
Lemma dfa_prune_connectedb_aux x y :
connect (dfa_trans_some A) (val x) (val y) ->
connect (dfa_trans_some dfa_prune) x y.
Lemma dfa_prune_connected : connected dfa_prune.
Hint Resolve dfa_prune_connected.
For connected DFAs, we can use xchoose to obtain for every state
a word leading to that state. We use this in the Myhill-Nerode theorem
to obtain Myhill partitions from DFAs
A DFA is connected, if it has the same size as ist connected sub automaton
Section Minimize.
Variable A : dfa.
Definition coll x y := forall w, (delta x w \in dfa_fin A) = (delta y w \in dfa_fin A).
Definition dist0 : {set A*A} := [set x | (x.1 \in dfa_fin A) != (x.2 \in dfa_fin A)].
Definition distS (dist : {set A*A}) :=
[set x | [exists a, (dfa_trans x.1 a, dfa_trans x.2 a) \in dist]].
Lemma distSP (dist : {set A*A}) (x y : A) :
reflect (exists a, (dfa_trans x a, dfa_trans y a) \in dist)
((x,y) \in distS dist).
Lemma distS_mono: mono distS.
Definition one_step_dist dist := dist0 :|: distS dist.
Lemma one_step_dist_mono : mono one_step_dist.
Definition dist := lfp one_step_dist.
Lemma distP x y :
reflect (exists w, (delta x w \in dfa_fin A) != (delta y w \in dfa_fin A))
((x,y) \in dist).
Definition collb x y := (x,y) \notin dist.
Lemma collP x y : reflect (coll x y) (collb x y).
Lemma collb_refl x : collb x x.
Lemma collb_sym : symmetric collb.
Lemma collb_trans : transitive collb.
Definition min_state := quot collb.
Definition class (x:A) : min_state :=
Repr (collb_refl) (collb_sym) (collb_trans) x.
Lemma collb_step a x y : collb x y -> collb (dfa_trans x a) (dfa_trans y a).
Lemma class_eq x y : reflect (class x = class y) (collb x y).
Definition minimize : dfa := {|
dfa_s := class (dfa_s A);
dfa_trans x a := class (dfa_trans (val x) a);
dfa_fin := [pred x | val x \in dfa_fin A ] |}.
Canonical fin_app_pred := ApplicativePred (dfa_fin minimize).
Lemma minimize_delta (x : A) w :
@delta minimize (class x) w = class (delta x w).
Lemma minstate_exp (x : minimize) : x = class (val x).
Lemma minimize_val (x : minimize) w : class (delta (val x) w) = (delta x w).
Lemma minimal_fin (x : A) :
(class x \in dfa_fin minimize) = (x \in dfa_fin A).
End Minimize.
Lemma minimize_correct A : dfa_lang (minimize A) =i dfa_lang A.
Lemma minimize_size A : #|minimize A| <= #|A|.
Definition minimal (A : dfa) := forall B, dfa_lang A =i dfa_lang B -> #|A| <= #|B|.
Definition collapsed (A : dfa) := forall x y: A, coll x y <-> x = y.
Lemma minimize_collapsed (A : dfa) : collapsed (minimize A).
Definition dfa_iso (A1 A2 : dfa) :=
exists i: A1 -> A2,
[/\ bijective i,
forall x a, i (dfa_trans x a) = dfa_trans (i x) a,
forall x, i (x) \in dfa_fin A2 = (x \in dfa_fin A1) &
i (dfa_s A1) = dfa_s A2 ].
Section Isomopism.
Variables (A B : dfa).
Hypothesis L_AB : dfa_lang A =i dfa_lang B.
Hypothesis (A_coll: collapsed A) (B_coll: collapsed B).
Hypothesis (A_conn : connected A) (B_conn : connected B).
Definition iso (x : A) : B := delta (dfa_s B) (cr A_conn x).
Definition iso_inv (x : B) : A := delta (dfa_s A) (cr B_conn x).
Lemma delta_iso w x : delta (iso x) w \in dfa_fin B = (delta x w \in dfa_fin A).
Lemma delta_iso_inv w x : delta (iso_inv x) w \in dfa_fin A = (delta x w \in dfa_fin B).
Lemma can_iso : cancel iso_inv iso.
Lemma can_iso_inv : cancel iso iso_inv.
Lemma coll_iso : dfa_iso A B.
Lemma dfa_iso_size : dfa_iso A B -> #|A| = #|B|.
End Isomopism.
Lemma minimize_connected A : connected A -> connected (minimize A).
Lemma minimal_connected A : minimal A -> connected A.
Lemma minimal_size_eq A B :
minimal A -> minimal B -> dfa_lang A =i dfa_lang B -> #|A| = #|B|.
Lemma minimize_minimal A (CA : connected A) : minimal (minimize A).
Lemma minimize_idem_size A : collapsed A -> #|A| = #|minimize A|.
Lemma minimal_idem A : minimal A -> collapsed A.
Lemma minimalP A : minimal A <-> (connected A /\ collapsed A).
Lemma minimal_iso A B :
dfa_lang A =i dfa_lang B -> minimal A -> minimal B -> dfa_iso A B.
End FA.