Authors: Christian Doczkal and Jan-Oliver Kaiser
Require Import Recdef.
Require Import ssreflect ssrbool ssrfun ssrnat eqtype seq fintype finset bigop.
Require Import automata misc regexp.

Set Implicit Arguments.
Import Prenex Implicits.

Kleene's Algorithm

The All-but-last predicatate

One the definition of the "transition languages" of an automaton employs the all-but-last predicate abl. This requires a number of lemmas.

Section AllButLastDef.
  Variable X : eqType.
  Variable p : pred X.

  Definition belast (xs : seq X) := rev (behead (rev xs)).

  Lemma belast_rcons (x : X) xs :
    belast (rcons xs x) = xs.

  Lemma belast_s1s xs y ys : belast (xs ++ y :: ys) = xs ++ belast (y :: ys).

  Lemma mem_belast x xs : x \in belast xs -> x \in xs.

  Definition abl xs := all p (belast xs).
End AllButLastDef.

Section AllButLast.
  Variable X : eqType.
  Variable p q : pred X.

  Lemma abl_impl xs :
    subpred p q -> abl p xs -> abl q xs.

  Lemma abl0 : (abl p nil) = true.

  Lemma abl_rcons x xs : abl p (rcons xs x) = all p xs.

  Lemma all_abl xs : all p xs -> abl p xs.

  Lemma all_abl_cat xs ys : all p xs -> abl p ys -> abl p (xs++ys).

  Lemma abl_cat xs ys : abl p (xs++ys) -> abl p xs /\ abl p ys.

  Lemma abl_last x xs : abl p xs -> p (last x xs) -> all p xs.

  Lemma abl2s x y ys : abl p [:: x, y & ys] = p x && abl p (y :: ys).

  Lemma abl_pred0 xs : p =1 pred0 -> abl p xs -> size xs <= 1.
End AllButLast.

Defining R and L


Section TransitiveClosure.
  Variable char : finType.
  Variable A : dfa char.

We instantiate Ssreflects Canonical Big Operators
  Notation "\sigma_( i <- r ) F" := (\big[Plus/Void]_(i <- r) F) (at level 50).
  Notation "\sigma_( i | P ) F" := (\big[Plus/Void]_(i | P) F) (at level 50).

  Lemma big_plus_seqP (T : eqType) (r : seq T) w (F : T -> regexp char) :
    reflect (exists2 x, x \in r & w \in F x) (w \in \sigma_(i <- r) F i).

  Lemma big_plusP (T : finType) (P:pred T) w (F : T -> regexp char) :
    reflect (exists2 i, P i & w \in F i) (w \in \sigma_(i | P i) F i).

  Definition edges x y := \big[Plus/Void]_(a | dfa_trans A x a == y) Atom a.

  Definition edgesP x y w :
    reflect (exists2 a, w = [::a] & dfa_trans A x a = y) (w \in edges x y).

  Definition R0 x y := Plus (if x == y then Eps else Void) (edges x y).

  Lemma mem_R0 w x y :
    reflect (w = [::] /\ x=y \/ exists2 a, w = [::a] & dfa_trans A x a = y)
            (w \in R0 x y).

  Function R (X : {set A}) (x y : A) {measure (fun X => #|X|) X} :=
    match [pick z in X] with
    | None => R0 x y
    | Some z => let X' := X :\ z in
        Plus (Conc (R X' x z) (Conc (Star (R X' z z)) (R X' z y))) (R X' x y)
    end.

  Notation "'R^' X" := (R X) (at level 8).

  Definition L (X : {set A}) (x y : A) :=
      [pred w | (delta x w == y) && abl (mem X) (dfa_run x w)].

  Notation "'L^' X" := (L X) (at level 8).

  Lemma LP (X : {set A}) (x y : A) w :
    reflect (delta x w = y /\ abl (mem X) (dfa_run x w))
            (w \in L^X x y).

Correspondence between L and R


  Lemma L_monotone (X : {set A}) (x y z : A): {subset L^X x y <= L^(z |: X) x y}.

  Lemma L_nil X x y : reflect (x = y) ([::] \in L^X x y).

  Lemma run_split (x : A) z w : z \in dfa_run x w ->
    exists w1, exists w2,
      w = w1++w2 /\
      size w2 < size w /\
      z \notin belast (dfa_run x w1) /\
      delta x w1 = z.

  Lemma L_split X x y z w : w \in L^(z |: X) x y ->
    w \in L^X x y \/ exists w1 w2, w = w1 ++ w2 /\ size w2 < size w
                               /\ w1 \in L^X x z /\ w2 \in L^(z |: X) z y.

  Lemma L_cat (X : {set A}) x y z w1 w2 :
    z \in X -> w1 \in L^X x z -> w2 \in L^X z y -> w1++w2 \in L^X x y.

  Lemma L_catL X x y z w1 w2 :
    w1 \in L^X x z ->
    w2 \in L^(z |: X) z y ->
    w1++w2 \in L^(z |: X) x y.

  Lemma L_catR X x y z w1 w2 :
    w1 \in L^(z |: X) x z ->
    w2 \in L^X z y ->
    w1++w2 \in L^(z |: X) x y.

  Lemma L_star (X : {set A}) z w : w \in star (L^X z z) -> w \in L^(z |: X) z z.

  Lemma L_rec (X : {set A}) x y z :
    L^(z |: X) x y =i plus (conc (L^X x z) (conc (star (L^X z z)) (L^X z y)))
                      (L^X x y).

  Lemma L_R n (X : {set A}) x y : #|X| = n -> L^X x y =i R^X x y.

  Lemma dfa_L x y : L^setT x y =i [pred w | last x (dfa_run x w) == y].

A regular expression for the language of a DFA

Closure of Regular Languages under intersection and complement


Definition Inter (char : finType) (r s : regexp char) :=
  dfa_to_re (dfa_op andb (re_to_dfa r) (re_to_dfa s)).

Lemma Inter_correct (char : finType) (r s : regexp char) w :
  w \in Inter r s = (w \in r) && (w \in s).

Definition Neg (char : finType) (r : regexp char) :=
  dfa_to_re (dfa_compl (re_to_dfa r)).

Lemma Neg_correct (char : finType) (r : regexp char) w :
  w \in Neg r = (w \notin r).