Authors: Christian Doczkal and Jan-Oliver Kaiser
Require Import Recdef.
Require Import ssreflect ssrbool ssrfun ssrnat eqtype seq fintype finset bigop.
Require Import automata misc regexp.
Set Implicit Arguments.
Unset Strict Implicit.
Import Prenex Implicits.
Require Import ssreflect ssrbool ssrfun ssrnat eqtype seq fintype finset bigop.
Require Import automata misc regexp.
Set Implicit Arguments.
Unset Strict Implicit.
Import Prenex Implicits.
We first define the transition languages between states. The
trasition languages are defined such that w \in L^X q p iff the
unique run from q on w ends in p and only traverses states in
X in between.
Inductive TLang (X: {set A}): A -> A -> word char -> Prop :=
| TLang0 q: TLang X q q [::]
| TLang1 q a: TLang X q (dfa_trans A q a) [::a]
| TLangS q a w p:
dfa_trans A q a \in X ->
TLang X (dfa_trans A q a) p w ->
TLang X q p (a::w).
Fixpoint L (X: {set A}) (q p: A) w :=
match w with
| [::] => p == q
| [:: a] => p == (dfa_trans A q a)
| a :: w' => let: q' := dfa_trans A q a in (q' \in X) && L X q' p w'
end.
Lemma LP X q p w:
reflect (TLang X q p w) (L X q p w).
Proof.
apply: (iffP idP).
- elim: w q p => [|a [|b w] IHw] q p; try by move/eqP => ->; constructor.
move => /= /andP [H1 /IHw]. by constructor.
- move => H. induction H => //=. rewrite IHTLang H /=. by case: w H0 IHTLang.
Qed.
Lemma TLang_split X q p w z:
TLang (z |: X) q p w ->
TLang X q p w \/
exists i, [/\ 0 < i, TLang X q z (take i w) & TLang (z |: X) z p (drop i w)].
Proof.
move => H. induction H; try by left; constructor.
- move: H. rewrite !inE => /orP [/eqP H|].
+ subst. right. exists 1; split; rewrite /= ?take0 ?drop0 //=. by constructor.
+ case: IHTLang => [|[i [H1 H2 H3]]].
* left. by constructor.
* right. exists i.+1; split => //=. by constructor.
Qed.
Lemma TLang_set0 q p w: TLang set0 q p w ->
p = q /\ w = [::] \/ exists2 a, w = [::a] & p = dfa_trans A q a.
Proof. move => H. inversion H => //; try by subst; eauto. by rewrite inE in H0. Qed.
We instantiate Ssreflects Canonical Big Operators
Notation "\sigma_( i <- r ) F" := (\big[Plus/Void]_(i <- r) F) (at level 50).
Notation "\sigma_( i | P ) F" := (\big[Plus/Void]_(i | P) F) (at level 50).
Lemma big_plus_seqP (T : eqType) (r : seq T) w (F : T -> regexp char) :
reflect (exists2 x, x \in r & w \in F x) (w \in \sigma_(i <- r) F i).
Proof.
elim: r w => [|r rs IHrs] w. rewrite big_nil; by constructor => [[x]].
rewrite big_cons; apply: (iffP plusP) => [[H|H]|[x]].
- exists r => //; by rewrite mem_head.
- case/IHrs : H => x Hx ?. exists x => //. by rewrite in_cons Hx orbT.
- rewrite in_cons; case/orP => [/eqP -> |]; auto => ? ?.
right. apply/IHrs. by exists x.
Qed.
Lemma big_plusP (T : finType) (P:pred T) w (F : T -> regexp char) :
reflect (exists2 i, P i & w \in F i) (w \in \sigma_(i | P i) F i).
Proof.
rewrite -big_filter filter_index_enum.
apply: (iffP (big_plus_seqP _ _ _)) => [|] [x] H1 H2; exists x => //;
move: H1; by rewrite mem_enum.
Qed.
Definition edges x y := \big[Plus/Void]_(a | dfa_trans A x a == y) Atom a.
Definition edgesP x y w :
reflect (exists2 a, w = [::a] & dfa_trans A x a = y) (w \in edges x y).
Proof. apply: (iffP (big_plusP _ _ _)) => [|] [a] /eqP ? /eqP ?; by exists a. Qed.
Definition R0 x y := Plus (if x == y then Eps else Void) (edges x y).
Lemma mem_R0 w x y :
reflect (w = [::] /\ x=y \/ exists2 a, w = [::a] & dfa_trans A x a = y)
(w \in R0 x y).
Proof. apply: (iffP plusP).
- case => [| /edgesP]; auto. case e : (x == y) => // /eqP.
by rewrite (eqP e); auto.
- case => [[-> ->]|/edgesP];auto. by rewrite eqxx; auto.
Qed.
Fixpoint R (X : seq A) (x y : A) :=
if X isn't z :: X' then R0 x y else
Plus (Conc (R X' x z) (Conc (Star (R X' z z)) (R X' z y))) (R X' x y).
Notation "'R^' X" := (R X) (at level 8).
Notation "'L^' X" := (L X) (at level 8).
Notation "\sigma_( i | P ) F" := (\big[Plus/Void]_(i | P) F) (at level 50).
Lemma big_plus_seqP (T : eqType) (r : seq T) w (F : T -> regexp char) :
reflect (exists2 x, x \in r & w \in F x) (w \in \sigma_(i <- r) F i).
Proof.
elim: r w => [|r rs IHrs] w. rewrite big_nil; by constructor => [[x]].
rewrite big_cons; apply: (iffP plusP) => [[H|H]|[x]].
- exists r => //; by rewrite mem_head.
- case/IHrs : H => x Hx ?. exists x => //. by rewrite in_cons Hx orbT.
- rewrite in_cons; case/orP => [/eqP -> |]; auto => ? ?.
right. apply/IHrs. by exists x.
Qed.
Lemma big_plusP (T : finType) (P:pred T) w (F : T -> regexp char) :
reflect (exists2 i, P i & w \in F i) (w \in \sigma_(i | P i) F i).
Proof.
rewrite -big_filter filter_index_enum.
apply: (iffP (big_plus_seqP _ _ _)) => [|] [x] H1 H2; exists x => //;
move: H1; by rewrite mem_enum.
Qed.
Definition edges x y := \big[Plus/Void]_(a | dfa_trans A x a == y) Atom a.
Definition edgesP x y w :
reflect (exists2 a, w = [::a] & dfa_trans A x a = y) (w \in edges x y).
Proof. apply: (iffP (big_plusP _ _ _)) => [|] [a] /eqP ? /eqP ?; by exists a. Qed.
Definition R0 x y := Plus (if x == y then Eps else Void) (edges x y).
Lemma mem_R0 w x y :
reflect (w = [::] /\ x=y \/ exists2 a, w = [::a] & dfa_trans A x a = y)
(w \in R0 x y).
Proof. apply: (iffP plusP).
- case => [| /edgesP]; auto. case e : (x == y) => // /eqP.
by rewrite (eqP e); auto.
- case => [[-> ->]|/edgesP];auto. by rewrite eqxx; auto.
Qed.
Fixpoint R (X : seq A) (x y : A) :=
if X isn't z :: X' then R0 x y else
Plus (Conc (R X' x z) (Conc (Star (R X' z z)) (R X' z y))) (R X' x y).
Notation "'R^' X" := (R X) (at level 8).
Notation "'L^' X" := (L X) (at level 8).
Lemma L_monotone (X : {set A}) (x y z : A): {subset L^X x y <= L^(z |: X) x y}.
Proof.
move => w /LP H. apply/LP. induction H => //; try by constructor.
constructor => //. by rewrite in_setU H orbT.
Qed.
Lemma L_nil X x y : reflect (x = y) ([::] \in L^X x y).
Proof. rewrite -topredE /= eq_sym. exact: eqP. Qed.
Lemma L_split X x y z w : w \in L^(z |: X) x y ->
w \in L^X x y \/ exists w1 w2, w = w1 ++ w2 /\ size w2 < size w
/\ w1 \in L^X x z /\ w2 \in L^(z |: X) z y.
Proof.
case: w => [|a w].
- move => /LP H. inversion H. subst. left. apply/LP. constructor.
- move/LP/TLang_split => [|[i [Hi /LP H1 /LP H2]]].
+ by left; apply/LP.
+ right. exists (take i (a::w)). exists (drop i (a::w)).
rewrite cat_take_drop size_drop. firstorder.
by rewrite -{2}[size (a::w)]subn0 ltn_sub2l.
Qed.
Lemma L_cat (X : {set A}) x y z w1 w2 :
z \in X -> w1 \in L^X x z -> w2 \in L^X z y -> w1++w2 \in L^X x y.
Proof.
move => Hz /LP H1 /LP H2. apply/LP.
elim: H1 Hz H2 => [q|q a Hz| q a w p ? ? IH] //=; first by constructor.
constructor => //. exact: IH.
Qed.
Lemma L_catL X x y z w1 w2 :
w1 \in L^X x z -> w2 \in L^(z |: X) z y -> w1++w2 \in L^(z |: X) x y.
Proof. move/(L_monotone z). apply: L_cat. exact: setU11. Qed.
Lemma L_catR X x y z w1 w2 :
w1 \in L^(z |: X) x z -> w2 \in L^X z y -> w1++w2 \in L^(z |: X) x y.
Proof. move => H /(L_monotone z). apply: L_cat H. exact: setU11. Qed.
Lemma L_star (X : {set A}) z w : w \in star (L^X z z) -> w \in L^(z |: X) z z.
Proof.
move/starP => [vv Hvv ->]. elim: vv Hvv => [_|r vv IHvv]; first exact/L_nil.
move => /= /andP [/andP [_ H1] H2]. exact: L_catL H1 (IHvv H2).
Qed.
Lemma L_rec (X : {set A}) x y z :
L^(z |: X) x y =i plus (conc (L^X x z) (conc (star (L^X z z)) (L^X z y)))
(L^X x y).
Proof.
move => w. apply/idP/idP.
- apply/(size_induction (f:=size)): w x y => w IHw x y.
move/L_split => [|[w1 [w2 [Hw' [H1 [Hw1 Hw2]]]]]].
+ rewrite inE => ->. by rewrite orbT.
+ move: (IHw w2 H1 z y Hw2) Hw' => H4 -> {IHw H1}.
rewrite inE (conc_cat Hw1 _) //.
case/plusP : H4 => H; last by rewrite -[w2]cat0s conc_cat //.
move/concP : H => [w21] [w22] [W1 [W2]] /concP [w221] [w222] [W3 [W4 W5]]; subst.
by rewrite catA conc_cat // star_cat.
- case/plusP ; last exact: L_monotone.
move/concP => [w1] [w2] [-> [Hw1]] /concP [w3] [w4] [-> [Hw3 Hw4]].
by rewrite (L_catL Hw1) // (L_catR _ Hw4) // L_star.
Qed.
Lemma L_R (X : seq A) x y : L^[set z in X] x y =i R^X x y.
Proof.
elim: X x y => [|z X' IH] x y w.
- rewrite (_ : [set z in [::]] = set0) //=.
apply/LP/mem_R0.
+ move/TLang_set0 => [[-> ->]|[a -> ->]]; by eauto.
+ move => [[-> ->]|[a -> <-]]; by constructor.
- rewrite set_cons /=.
rewrite (L_rec _ _) -2!topredE /= /plus //=.
rewrite !IH => //; f_equal.
apply: conc_eq; first exact: IH.
apply: conc_eq; last exact: IH.
apply: star_eq. exact: IH.
Qed.
Lemma dfa_L x y w : w \in L^setT x y = (delta x w == y).
Proof.
apply/LP/eqP => [H|<-]; first by induction H.
elim: w x y => [|a w IHw] x y; first by constructor.
rewrite delta_cons. constructor. auto. exact: IHw.
Qed.
Definition dfa_to_re : regexp char := \sigma_(x | dfa_fin A x) R^(enum A) (dfa_s A) x.
Lemma dfa_to_re_correct : dfa_lang A =i dfa_to_re.
Proof.
move => w. rewrite delta_accept. apply/idP/big_plusP => [H|[x Hx]].
- exists (delta_s A w) => //. by rewrite -L_R set_enum dfa_L.
- by rewrite -L_R set_enum dfa_L => /eqP ->.
Qed.
End KleeneAlgorithm.
Definition Inter (char : finType) (r s : regexp char) :=
dfa_to_re (dfa_op andb (re_to_dfa r) (re_to_dfa s)).
Lemma Inter_correct (char : finType) (r s : regexp char) w :
w \in Inter r s = (w \in r) && (w \in s).
Proof. by rewrite /Inter -dfa_to_re_correct dfa_op_correct !re_to_dfa_correct. Qed.
Definition Neg (char : finType) (r : regexp char) :=
dfa_to_re (dfa_compl (re_to_dfa r)).
Lemma Neg_correct (char : finType) (r : regexp char) w :
w \in Neg r = (w \notin r).
Proof. by rewrite /Neg -dfa_to_re_correct dfa_compl_correct !re_to_dfa_correct. Qed.