Authors: Christian Doczkal and Jan-Oliver Kaiser
Require Import Recdef.
Require Import ssreflect ssrbool ssrfun ssrnat eqtype seq fintype finset bigop.
Require Import automata misc regexp.

Set Implicit Arguments.
Import Prenex Implicits.

Kleene's Algorithm

Defining R and L


Section KleeneAlgorithm.
  Variable char : finType.
  Variable A : dfa char.

We first define the transition languages between states. The trasition languages are defined such that w \in L^X q p iff the unique run from q on w ends in p and only traverses states in X in between.

  Inductive TLang (X: {set A}): A -> A -> word char -> Prop :=
  | TLang0 q: TLang X q q [::]
  | TLang1 q a: TLang X q (dfa_trans A q a) [::a]
  | TLangS q a w p:
      dfa_trans A q a \in X ->
      TLang X (dfa_trans A q a) p w ->
      TLang X q p (a::w).

  Fixpoint L (X: {set A}) (q p: A) w :=
    match w with
      | [::] => p == q
      | [:: a] => p == (dfa_trans A q a)
      | a :: w' => let: q' := dfa_trans A q a in (q' \in X) && L X q' p w'
    end.

  Lemma LP X q p w:
    reflect (TLang X q p w) (L X q p w).

  Lemma TLang_split X q p w z:
    TLang (z |: X) q p w ->
    TLang X q p w \/
    exists i, [/\ 0 < i, TLang X q z (take i w) & TLang (z |: X) z p (drop i w)].

  Lemma TLang_set0 q p w: TLang set0 q p w ->
                       p = q /\ w = [::] \/ exists2 a, w = [::a] & p = dfa_trans A q a.

We instantiate Ssreflects Canonical Big Operators
  Notation "\sigma_( i <- r ) F" := (\big[Plus/Void]_(i <- r) F) (at level 50).
  Notation "\sigma_( i | P ) F" := (\big[Plus/Void]_(i | P) F) (at level 50).

  Lemma big_plus_seqP (T : eqType) (r : seq T) w (F : T -> regexp char) :
    reflect (exists2 x, x \in r & w \in F x) (w \in \sigma_(i <- r) F i).

  Lemma big_plusP (T : finType) (P:pred T) w (F : T -> regexp char) :
    reflect (exists2 i, P i & w \in F i) (w \in \sigma_(i | P i) F i).

  Definition edges x y := \big[Plus/Void]_(a | dfa_trans A x a == y) Atom a.

  Definition edgesP x y w :
    reflect (exists2 a, w = [::a] & dfa_trans A x a = y) (w \in edges x y).

  Definition R0 x y := Plus (if x == y then Eps else Void) (edges x y).

  Lemma mem_R0 w x y :
    reflect (w = [::] /\ x=y \/ exists2 a, w = [::a] & dfa_trans A x a = y)
            (w \in R0 x y).

  Fixpoint R (X : seq A) (x y : A) :=
    if X isn't z :: X' then R0 x y else
      Plus (Conc (R X' x z) (Conc (Star (R X' z z)) (R X' z y))) (R X' x y).

  Notation "'R^' X" := (R X) (at level 8).
  Notation "'L^' X" := (L X) (at level 8).

Correspondence between L and R


  Lemma L_monotone (X : {set A}) (x y z : A): {subset L^X x y <= L^(z |: X) x y}.

  Lemma L_nil X x y : reflect (x = y) ([::] \in L^X x y).

  Lemma L_split X x y z w : w \in L^(z |: X) x y ->
    w \in L^X x y \/ exists w1 w2, w = w1 ++ w2 /\ size w2 < size w
                               /\ w1 \in L^X x z /\ w2 \in L^(z |: X) z y.

  Lemma L_cat (X : {set A}) x y z w1 w2 :
    z \in X -> w1 \in L^X x z -> w2 \in L^X z y -> w1++w2 \in L^X x y.

  Lemma L_catL X x y z w1 w2 :
    w1 \in L^X x z -> w2 \in L^(z |: X) z y -> w1++w2 \in L^(z |: X) x y.

  Lemma L_catR X x y z w1 w2 :
    w1 \in L^(z |: X) x z -> w2 \in L^X z y -> w1++w2 \in L^(z |: X) x y.

  Lemma L_star (X : {set A}) z w : w \in star (L^X z z) -> w \in L^(z |: X) z z.

  Lemma L_rec (X : {set A}) x y z :
    L^(z |: X) x y =i plus (conc (L^X x z) (conc (star (L^X z z)) (L^X z y)))
                      (L^X x y).

  Lemma L_R (X : seq A) x y : L^[set z in X] x y =i R^X x y.

  Lemma dfa_L x y w : w \in L^setT x y = (delta x w == y).

A regular expression for the language of a DFA

Closure of Regular Languages under intersection and complement


Definition Inter (char : finType) (r s : regexp char) :=
  dfa_to_re (dfa_op andb (re_to_dfa r) (re_to_dfa s)).

Lemma Inter_correct (char : finType) (r s : regexp char) w :
  w \in Inter r s = (w \in r) && (w \in s).

Definition Neg (char : finType) (r : regexp char) :=
  dfa_to_re (dfa_compl (re_to_dfa r)).

Lemma Neg_correct (char : finType) (r : regexp char) w :
  w \in Neg r = (w \notin r).