Authors: Christian Doczkal and Jan-Oliver Kaiser
Require Import Recdef.
Require Import ssreflect ssrbool ssrfun ssrnat eqtype seq fintype finset bigop.
Require Import automata misc regexp.

Set Implicit Arguments.
Unset Strict Implicit.
Import Prenex Implicits.

Kleene's Algorithm

The All-but-last predicatate

One the definition of the "transition languages" of an automaton employs the all-but-last predicate abl. This requires a number of lemmas.

Section AllButLastDef.
  Variable X : eqType.
  Variable p : pred X.

  Definition belast (xs : seq X) := rev (behead (rev xs)).

  Lemma belast_rcons (x : X) xs :
    belast (rcons xs x) = xs.
  Proof. by rewrite /belast rev_rcons /= revK. Qed.

  Lemma belast_s1s xs y ys : belast (xs ++ y :: ys) = xs ++ belast (y :: ys).
  Proof.
    elim/last_ind : ys; first by rewrite cats1 /= !belast_rcons cats0.
    move => ys x _. by rewrite -!rcons_cons -rcons_cat !belast_rcons.
  Qed.

  Lemma mem_belast x xs : x \in belast xs -> x \in xs.
  Proof. rewrite /belast mem_rev. move/mem_behead. by rewrite mem_rev. Qed.

  Definition abl xs := all p (belast xs).
End AllButLastDef.

Section AllButLast.
  Variable X : eqType.
  Variable p q : pred X.

  Lemma abl_impl xs :
    subpred p q -> abl p xs -> abl q xs.
  Proof. move => ?. exact: sub_all. Qed.

  Lemma abl0 : (abl p nil) = true.
  Proof. done. Qed.

  Lemma abl_rcons x xs : abl p (rcons xs x) = all p xs.
  Proof. by rewrite /abl belast_rcons. Qed.

  Lemma all_abl xs : all p xs -> abl p xs.
  Proof. move/allP => H. apply/allP => x /mem_belast. exact: H. Qed.

  Lemma all_abl_cat xs ys : all p xs -> abl p ys -> abl p (xs++ys).
  Proof.
    case: ys => [H|y ys]; first by rewrite cats0 [_ _ xs]all_abl.
    by rewrite /abl belast_s1s all_cat => -> ->.
  Qed.

  Lemma abl_cat xs ys : abl p (xs++ys) -> abl p xs /\ abl p ys.
  Proof.
    case: ys => [|y ys]; first by rewrite cats0 //.
    rewrite {1}/abl belast_s1s all_cat.
    by case/andP; firstorder using all_abl.
  Qed.

  Lemma abl_last x xs : abl p xs -> p (last x xs) -> all p xs.
  Proof.
    elim/last_ind : xs => // ys y _.
    by rewrite /= abl_rcons last_rcons all_rcons => -> ->.
  Qed.

  Lemma abl2s x y ys : abl p [:: x, y & ys] = p x && abl p (y :: ys).
  Proof. by rewrite /abl -cat1s belast_s1s. Qed.

  Lemma abl_pred0 xs : p =1 pred0 -> abl p xs -> size xs <= 1.
  Proof. move: xs => [|x [|y ys]] H0 //=. by rewrite abl2s H0. Qed.
End AllButLast.

Defining R and L


Section TransitiveClosure.
  Variable char : finType.
  Variable A : dfa char.

We instantiate Ssreflects Canonical Big Operators
  Notation "\sigma_( i <- r ) F" := (\big[Plus/Void]_(i <- r) F) (at level 50).
  Notation "\sigma_( i | P ) F" := (\big[Plus/Void]_(i | P) F) (at level 50).

  Lemma big_plus_seqP (T : eqType) (r : seq T) w (F : T -> regexp char) :
    reflect (exists2 x, x \in r & w \in F x) (w \in \sigma_(i <- r) F i).
  Proof.
    elim: r w => [|r rs IHrs] w. rewrite big_nil; by constructor => [[x]].
    rewrite big_cons; apply: (iffP plusP) => [[H|H]|[x]].
    - exists r => //; by rewrite mem_head.
    - case/IHrs : H => x Hx ?. exists x => //. by rewrite in_cons Hx orbT.
    - rewrite in_cons; case/orP => [/eqP -> |]; auto => ? ?.
      right. apply/IHrs. by exists x.
  Qed.

  Lemma big_plusP (T : finType) (P:pred T) w (F : T -> regexp char) :
    reflect (exists2 i, P i & w \in F i) (w \in \sigma_(i | P i) F i).
  Proof.
    rewrite -big_filter filter_index_enum.
    apply: (iffP (big_plus_seqP _ _ _)) => [|] [x] H1 H2; exists x => //;
      move: H1; by rewrite mem_enum.
  Qed.

  Definition edges x y := \big[Plus/Void]_(a | dfa_trans A x a == y) Atom a.

  Definition edgesP x y w :
    reflect (exists2 a, w = [::a] & dfa_trans A x a = y) (w \in edges x y).
  Proof. apply: (iffP (big_plusP _ _ _)) => [|] [a] /eqP ? /eqP ?; by exists a. Qed.

  Definition R0 x y := Plus (if x == y then Eps else Void) (edges x y).

  Lemma mem_R0 w x y :
    reflect (w = [::] /\ x=y \/ exists2 a, w = [::a] & dfa_trans A x a = y)
            (w \in R0 x y).
  Proof. apply: (iffP plusP).
    - case => [| /edgesP]; auto. case e : (x == y) => // /eqP.
      by rewrite (eqP e); auto.
    - case => [[-> ->]|/edgesP];auto. by rewrite eqxx; auto.
  Qed.

  Function R (X : {set A}) (x y : A) {measure (fun X => #|X|) X} :=
    match [pick z in X] with
    | None => R0 x y
    | Some z => let X' := X :\ z in
        Plus (Conc (R X' x z) (Conc (Star (R X' z z)) (R X' z y))) (R X' x y)
    end.
  Proof.
    move => X x y z /= H; apply/ltP; exact: set_pick_size.
    move => X x y z /= H; apply/ltP; exact: set_pick_size.
    move => X x y z /= H; apply/ltP; exact: set_pick_size.
    move => X x y z /= H; apply/ltP; exact: set_pick_size.
  Defined.

  Notation "'R^' X" := (R X) (at level 8).

  Definition L (X : {set A}) (x y : A) :=
      [pred w | (delta x w == y) && abl (mem X) (dfa_run x w)].

  Notation "'L^' X" := (L X) (at level 8).

  Lemma LP (X : {set A}) (x y : A) w :
    reflect (delta x w = y /\ abl (mem X) (dfa_run x w))
            (w \in L^X x y).
  Proof. rewrite inE. apply: (iffP andP); firstorder; exact/eqP. Qed.

Correspondence between L and R


  Lemma L_monotone (X : {set A}) (x y z : A): {subset L^X x y <= L^(z |: X) x y}.
  Proof.
    move => w. rewrite !inE => /andP [-> /=].
    apply: abl_impl => x' /=. exact: setU1r.
  Qed.

  Lemma L_nil X x y : reflect (x = y) ([::] \in L^X x y).
  Proof. rewrite inE /= abl0 andbT. exact: eqP. Qed.

  Lemma run_split (x : A) z w : z \in dfa_run x w ->
    exists w1, exists w2,
      w = w1++w2 /\
      size w2 < size w /\
      z \notin belast (dfa_run x w1) /\
      delta x w1 = z.
  Proof.
    case: w => [|a w] //.
    elim: w a x z => [|b w IHw] a x z.
      rewrite !inE => /eqP ->.
      exists [::a]. by exists [::].
    rewrite /= in_cons.
    case: (eqP) => /=.
      exists [::a]. by exists (b::w).
    move/eqP => H0 H1.
    move/IHw: (H1) => [w1 [w2 [H2 H3]]].
    exists (a::w1). exists w2.
    rewrite cat_cons H2. firstorder.
    rewrite /=.
    elim: w1 H2 H H3 H4 => [|c w1 IHw1] H2 H H3 H4 //=.
    rewrite -cat1s belast_s1s in_cons. apply/norP => //.
  Qed.

  Lemma L_split X x y z w : w \in L^(z |: X) x y ->
    w \in L^X x y \/ exists w1 w2, w = w1 ++ w2 /\ size w2 < size w
                               /\ w1 \in L^X x z /\ w2 \in L^(z |: X) z y.
  Proof.
    case/boolP : (z \in X) => HX; first by rewrite -(set1mem HX); auto.
    case/boolP : (z \in dfa_run x w) => [Hz | /negP Hz].
    - move/run_split: Hz => [w1 [w2 [H2 [H3 [H4 H5]]]]].
      right;exists w1; exists w2; firstorder; subst w.
      + rewrite inE H5 /= eqxx /=.
        case/LP : H => _. rewrite dfa_run_cat => /abl_cat [/allP H _].
        apply/allP => p Hp.
        have/negbTE pz: p != z by apply/eqP => ?; subst p; rewrite Hp in H4.
        move/H : (Hp). by rewrite !inE pz.
      + case/LP : H => e. rewrite dfa_run_cat => /abl_cat [_ H].
        rewrite !inE -{3}H5 H andbT. by rewrite -e delta_cat -H5.
    - case/LP => e /allP Hw. left. rewrite inE e eqxx /=.
      apply/allP => p Hp.
      have/negbTE pz: p != z. apply/eqP => ?; subst p. apply: Hz. exact: mem_belast Hp.
      move/Hw : (Hp). by rewrite !inE pz.
   Qed.

  Lemma L_cat (X : {set A}) x y z w1 w2 :
    z \in X -> w1 \in L^X x z -> w2 \in L^X z y -> w1++w2 \in L^X x y.
  Proof.
    move => H /LP [H0 H1] /LP [H2 H3].
    rewrite !inE delta_cat !H0 H2 eqxx /= dfa_run_cat -/(delta x w1).
    apply: all_abl_cat; last by rewrite H0.
    rewrite -H0 in H. exact: abl_last H1 H.
  Qed.

  Lemma L_catL X x y z w1 w2 :
    w1 \in L^X x z ->
    w2 \in L^(z |: X) z y ->
    w1++w2 \in L^(z |: X) x y.
  Proof. move/(L_monotone z). apply: L_cat. exact: setU11. Qed.

  Lemma L_catR X x y z w1 w2 :
    w1 \in L^(z |: X) x z ->
    w2 \in L^X z y ->
    w1++w2 \in L^(z |: X) x y.
  Proof. move => H /(L_monotone z). apply: L_cat H. exact: setU11. Qed.

  Lemma L_star (X : {set A}) z w : w \in star (L^X z z) -> w \in L^(z |: X) z z.
  Proof.
    move/starP => [vv Hvv ->]. elim: vv Hvv => [_|r vv IHvv]; first exact/L_nil.
    move => /= /andP [/andP [_ H1] H2]. exact: L_catL H1 (IHvv H2).
  Qed.

  Lemma L_rec (X : {set A}) x y z :
    L^(z |: X) x y =i plus (conc (L^X x z) (conc (star (L^X z z)) (L^X z y)))
                      (L^X x y).
  Proof.
    move => w. apply/idP/idP.
      apply/(size_induction (f:=size)): w x y => w IHw x y.
      move/L_split => [/LP [H1 H2]|[w1 [w2 [Hw' [H1 [Hw1 Hw2]]]]]].
        by rewrite !inE H1 H2 eqxx /= orbT.       move: (IHw w2 H1 z y Hw2) Hw' => H4 -> {IHw H1}.
      rewrite inE (conc_cat Hw1 _) //.
      case/plusP : H4 => H; last by rewrite -[w2]/([::] ++ w2) conc_cat //.
      move/concP : H => [w21] [w22] [W1 [W2]] /concP [w221] [w222] [W3 [W4 W5]]; subst.
      by rewrite catA conc_cat // star_cat.
    case/plusP ; last exact: L_monotone.
    move/concP => [w1] [w2] [-> [Hw1]] /concP [w3] [w4] [-> [Hw3 Hw4]].
    by rewrite (L_catL Hw1) // (L_catR _ Hw4) // L_star.
  Qed.

  Lemma L_R n (X : {set A}) x y : #|X| = n -> L^X x y =i R^X x y.
  Proof.
    elim: n X x y => [|n IHn] X x y.
      move/cards0_eq => ->.
      rewrite R_equation.
      case: pickP => [z|Hset0 w]; first by rewrite in_set0.
      apply/LP/mem_R0.
        move => [H /(abl_pred0 Hset0)].
        by case: w H => [|a [|b w]] //; eauto.
      move => [[-> ->]|[a [-> <-]]] /=; last by done.
      by rewrite abl0.
    move => HX w. rewrite R_equation.
    case: pickP => [z Hz|Hset0]; last by rewrite (eq_card0 Hset0) in HX.
    have HsizeX: (#|X :\ z| = n).
      move: HX. rewrite (cardsD1 z) Hz add1n. by move => [].
    rewrite -(setD1K Hz).
    rewrite (L_rec _ _) -2!topredE /= /plus /= setD1K => //.
    rewrite !IHn => //; f_equal.
    apply: conc_eq; first exact: IHn.
    apply: conc_eq; last exact: IHn.
    apply: star_eq. exact: IHn.
  Qed.

  Lemma dfa_L x y : L^setT x y =i [pred w | last x (dfa_run x w) == y].
  Proof.
    move => w. rewrite !inE /abl. case: (last _ _ == _) => //=.
    suff {x y} H: mem [set: A] =1 predT by rewrite (eq_all H) all_predT.
    by move => x; rewrite !inE.
  Qed.

A regular expression for the language of a DFA


  Definition dfa_to_re : regexp char := \sigma_(x | dfa_fin A x) R^setT (dfa_s A) x.

  Lemma dfa_to_re_correct : dfa_lang A =i dfa_to_re.
  Proof.
    move => w. rewrite delta_accept. apply/idP/big_plusP => [H|[x Hx]].
    - exists (delta_s A w) => //. by rewrite -(@L_R #|A|) ?cardsE // dfa_L inE.
    - by rewrite -(@L_R #|A|) ?cardsE // dfa_L inE /delta => /eqP ->.
  Qed.

End TransitiveClosure.

Closure of Regular Languages under intersection and complement


Definition Inter (char : finType) (r s : regexp char) :=
  dfa_to_re (dfa_op andb (re_to_dfa r) (re_to_dfa s)).

Lemma Inter_correct (char : finType) (r s : regexp char) w :
  w \in Inter r s = (w \in r) && (w \in s).
Proof. by rewrite /Inter -dfa_to_re_correct dfa_op_correct !re_to_dfa_correct. Qed.

Definition Neg (char : finType) (r : regexp char) :=
  dfa_to_re (dfa_compl (re_to_dfa r)).

Lemma Neg_correct (char : finType) (r : regexp char) w :
  w \in Neg r = (w \notin r).
Proof. by rewrite /Neg -dfa_to_re_correct dfa_compl_correct !re_to_dfa_correct. Qed.