Authors: Christian Doczkal and Jan-Oliver Kaiser
Require Import Recdef.
Require Import ssreflect ssrbool ssrfun ssrnat eqtype seq fintype finset bigop.
Require Import automata misc regexp.

Set Implicit Arguments.
Unset Strict Implicit.
Import Prenex Implicits.

Kleene's Algorithm

Defining R and L


Section KleeneAlgorithm.
  Variable char : finType.
  Variable A : dfa char.

We first define the transition languages between states. The trasition languages are defined such that w \in L^X q p iff the unique run from q on w ends in p and only traverses states in X in between.

  Inductive TLang (X: {set A}): A -> A -> word char -> Prop :=
  | TLang0 q: TLang X q q [::]
  | TLang1 q a: TLang X q (dfa_trans A q a) [::a]
  | TLangS q a w p:
      dfa_trans A q a \in X ->
      TLang X (dfa_trans A q a) p w ->
      TLang X q p (a::w).

  Fixpoint L (X: {set A}) (q p: A) w :=
    match w with
      | [::] => p == q
      | [:: a] => p == (dfa_trans A q a)
      | a :: w' => let: q' := dfa_trans A q a in (q' \in X) && L X q' p w'
    end.

  Lemma LP X q p w:
    reflect (TLang X q p w) (L X q p w).
  Proof.
    apply: (iffP idP).
    - elim: w q p => [|a [|b w] IHw] q p; try by move/eqP => ->; constructor.
      move => /= /andP [H1 /IHw]. by constructor.
    - move => H. induction H => //=. rewrite IHTLang H /=. by case: w H0 IHTLang.
  Qed.

  Lemma TLang_split X q p w z:
    TLang (z |: X) q p w ->
    TLang X q p w \/
    exists i, [/\ 0 < i, TLang X q z (take i w) & TLang (z |: X) z p (drop i w)].
  Proof.
    move => H. induction H; try by left; constructor.
    - move: H. rewrite !inE => /orP [/eqP H|].
      + subst. right. exists 1; split; rewrite /= ?take0 ?drop0 //=. by constructor.
      + case: IHTLang => [|[i [H1 H2 H3]]].
        * left. by constructor.
        * right. exists i.+1; split => //=. by constructor.
  Qed.

  Lemma TLang_set0 q p w: TLang set0 q p w ->
                       p = q /\ w = [::] \/ exists2 a, w = [::a] & p = dfa_trans A q a.
  Proof. move => H. inversion H => //; try by subst; eauto. by rewrite inE in H0. Qed.

We instantiate Ssreflects Canonical Big Operators
  Notation "\sigma_( i <- r ) F" := (\big[Plus/Void]_(i <- r) F) (at level 50).
  Notation "\sigma_( i | P ) F" := (\big[Plus/Void]_(i | P) F) (at level 50).

  Lemma big_plus_seqP (T : eqType) (r : seq T) w (F : T -> regexp char) :
    reflect (exists2 x, x \in r & w \in F x) (w \in \sigma_(i <- r) F i).
  Proof.
    elim: r w => [|r rs IHrs] w. rewrite big_nil; by constructor => [[x]].
    rewrite big_cons; apply: (iffP plusP) => [[H|H]|[x]].
    - exists r => //; by rewrite mem_head.
    - case/IHrs : H => x Hx ?. exists x => //. by rewrite in_cons Hx orbT.
    - rewrite in_cons; case/orP => [/eqP -> |]; auto => ? ?.
      right. apply/IHrs. by exists x.
  Qed.

  Lemma big_plusP (T : finType) (P:pred T) w (F : T -> regexp char) :
    reflect (exists2 i, P i & w \in F i) (w \in \sigma_(i | P i) F i).
  Proof.
    rewrite -big_filter filter_index_enum.
    apply: (iffP (big_plus_seqP _ _ _)) => [|] [x] H1 H2; exists x => //;
      move: H1; by rewrite mem_enum.
  Qed.

  Definition edges x y := \big[Plus/Void]_(a | dfa_trans A x a == y) Atom a.

  Definition edgesP x y w :
    reflect (exists2 a, w = [::a] & dfa_trans A x a = y) (w \in edges x y).
  Proof. apply: (iffP (big_plusP _ _ _)) => [|] [a] /eqP ? /eqP ?; by exists a. Qed.

  Definition R0 x y := Plus (if x == y then Eps else Void) (edges x y).

  Lemma mem_R0 w x y :
    reflect (w = [::] /\ x=y \/ exists2 a, w = [::a] & dfa_trans A x a = y)
            (w \in R0 x y).
  Proof. apply: (iffP plusP).
    - case => [| /edgesP]; auto. case e : (x == y) => // /eqP.
      by rewrite (eqP e); auto.
    - case => [[-> ->]|/edgesP];auto. by rewrite eqxx; auto.
  Qed.

  Fixpoint R (X : seq A) (x y : A) :=
    if X isn't z :: X' then R0 x y else
      Plus (Conc (R X' x z) (Conc (Star (R X' z z)) (R X' z y))) (R X' x y).

  Notation "'R^' X" := (R X) (at level 8).
  Notation "'L^' X" := (L X) (at level 8).

Correspondence between L and R


  Lemma L_monotone (X : {set A}) (x y z : A): {subset L^X x y <= L^(z |: X) x y}.
  Proof.
    move => w /LP H. apply/LP. induction H => //; try by constructor.
    constructor => //. by rewrite in_setU H orbT.
  Qed.

  Lemma L_nil X x y : reflect (x = y) ([::] \in L^X x y).
  Proof. rewrite -topredE /= eq_sym. exact: eqP. Qed.

  Lemma L_split X x y z w : w \in L^(z |: X) x y ->
    w \in L^X x y \/ exists w1 w2, w = w1 ++ w2 /\ size w2 < size w
                               /\ w1 \in L^X x z /\ w2 \in L^(z |: X) z y.
  Proof.
    case: w => [|a w].
    - move => /LP H. inversion H. subst. left. apply/LP. constructor.
    - move/LP/TLang_split => [|[i [Hi /LP H1 /LP H2]]].
      + by left; apply/LP.
      + right. exists (take i (a::w)). exists (drop i (a::w)).
        rewrite cat_take_drop size_drop. firstorder.
        by rewrite -{2}[size (a::w)]subn0 ltn_sub2l.
  Qed.

  Lemma L_cat (X : {set A}) x y z w1 w2 :
    z \in X -> w1 \in L^X x z -> w2 \in L^X z y -> w1++w2 \in L^X x y.
  Proof.
    move => Hz /LP H1 /LP H2. apply/LP.
    elim: H1 Hz H2 => [q|q a Hz| q a w p ? ? IH] //=; first by constructor.
    constructor => //. exact: IH.
  Qed.

  Lemma L_catL X x y z w1 w2 :
    w1 \in L^X x z -> w2 \in L^(z |: X) z y -> w1++w2 \in L^(z |: X) x y.
  Proof. move/(L_monotone z). apply: L_cat. exact: setU11. Qed.

  Lemma L_catR X x y z w1 w2 :
    w1 \in L^(z |: X) x z -> w2 \in L^X z y -> w1++w2 \in L^(z |: X) x y.
  Proof. move => H /(L_monotone z). apply: L_cat H. exact: setU11. Qed.

  Lemma L_star (X : {set A}) z w : w \in star (L^X z z) -> w \in L^(z |: X) z z.
  Proof.
    move/starP => [vv Hvv ->]. elim: vv Hvv => [_|r vv IHvv]; first exact/L_nil.
    move => /= /andP [/andP [_ H1] H2]. exact: L_catL H1 (IHvv H2).
  Qed.

  Lemma L_rec (X : {set A}) x y z :
    L^(z |: X) x y =i plus (conc (L^X x z) (conc (star (L^X z z)) (L^X z y)))
                      (L^X x y).
  Proof.
    move => w. apply/idP/idP.
    - apply/(size_induction (f:=size)): w x y => w IHw x y.
      move/L_split => [|[w1 [w2 [Hw' [H1 [Hw1 Hw2]]]]]].
      + rewrite inE => ->. by rewrite orbT.
      + move: (IHw w2 H1 z y Hw2) Hw' => H4 -> {IHw H1}.
        rewrite inE (conc_cat Hw1 _) //.
        case/plusP : H4 => H; last by rewrite -[w2]cat0s conc_cat //.
        move/concP : H => [w21] [w22] [W1 [W2]] /concP [w221] [w222] [W3 [W4 W5]]; subst.
        by rewrite catA conc_cat // star_cat.
    - case/plusP ; last exact: L_monotone.
      move/concP => [w1] [w2] [-> [Hw1]] /concP [w3] [w4] [-> [Hw3 Hw4]].
      by rewrite (L_catL Hw1) // (L_catR _ Hw4) // L_star.
  Qed.

  Lemma L_R (X : seq A) x y : L^[set z in X] x y =i R^X x y.
  Proof.
    elim: X x y => [|z X' IH] x y w.
    - rewrite (_ : [set z in [::]] = set0) //=.
      apply/LP/mem_R0.
      + move/TLang_set0 => [[-> ->]|[a -> ->]]; by eauto.
      + move => [[-> ->]|[a -> <-]]; by constructor.
    - rewrite set_cons /=.
      rewrite (L_rec _ _) -2!topredE /= /plus //=.
      rewrite !IH => //; f_equal.
      apply: conc_eq; first exact: IH.
      apply: conc_eq; last exact: IH.
      apply: star_eq. exact: IH.
  Qed.

  Lemma dfa_L x y w : w \in L^setT x y = (delta x w == y).
  Proof.
    apply/LP/eqP => [H|<-]; first by induction H.
    elim: w x y => [|a w IHw] x y; first by constructor.
    rewrite delta_cons. constructor. auto. exact: IHw.
  Qed.

A regular expression for the language of a DFA


  Definition dfa_to_re : regexp char := \sigma_(x | dfa_fin A x) R^(enum A) (dfa_s A) x.

  Lemma dfa_to_re_correct : dfa_lang A =i dfa_to_re.
  Proof.
    move => w. rewrite delta_accept. apply/idP/big_plusP => [H|[x Hx]].
    - exists (delta_s A w) => //. by rewrite -L_R set_enum dfa_L.
    - by rewrite -L_R set_enum dfa_L => /eqP ->.
  Qed.

End KleeneAlgorithm.

Closure of Regular Languages under intersection and complement


Definition Inter (char : finType) (r s : regexp char) :=
  dfa_to_re (dfa_op andb (re_to_dfa r) (re_to_dfa s)).

Lemma Inter_correct (char : finType) (r s : regexp char) w :
  w \in Inter r s = (w \in r) && (w \in s).
Proof. by rewrite /Inter -dfa_to_re_correct dfa_op_correct !re_to_dfa_correct. Qed.

Definition Neg (char : finType) (r : regexp char) :=
  dfa_to_re (dfa_compl (re_to_dfa r)).

Lemma Neg_correct (char : finType) (r : regexp char) w :
  w \in Neg r = (w \notin r).
Proof. by rewrite /Neg -dfa_to_re_correct dfa_compl_correct !re_to_dfa_correct. Qed.