Authors: Christian Doczkal and Jan-Oliver Kaiser
Derived from regexp.v in Thierry Coquand, Vincent Siles, A Decision Procedure for Regular Expression Equivalence in Type Theory DOI: 10.1007/978-3-642-25379-9_11

Section Basics.
Variable char : eqType.
Definition word := seq char.
End Basics.

Definition dlang char := pred (word char).

Section RegExp.

Decidable Languages

Variable char : eqType.

Regular operations on decidable Languages


Definition void : dlang char := pred0.

Definition eps : dlang char := pred1 [::].

Definition atom x : dlang char := pred1 [:: x].

Definition compl L : dlang char := predC L.

Definition prod L1 L2 : dlang char := [pred w in L1 | w \in L2].

Definition plus L1 L2 : dlang char := [pred w | (w \in L1) || (w \in L2)].

Lemma plusP r s w :
  reflect (w \in r \/ w \in s) (w \in plus r s).

For the concatenation of two decidable languages, we use finite types. Note that we need to use L1 and L2 applicatively in order for the termination check for star to succeed.

Definition conc (L1 L2: dlang char) : dlang char :=
  fun v => [exists i : 'I_(size v).+1, L1 (take i v) && L2 (drop i v)].

Lemma concP {L1 L2 : dlang char} {w : word char} :
  reflect (exists w1 w2, w = w1 ++ w2 /\ w1 \in L1 /\ w2 \in L2) (w \in conc L1 L2).

Lemma conc_cat w1 w2 L1 L2 : w1 \in L1 -> w2 \in L2 -> w1 ++ w2 \in conc L1 L2.

Lemma conc_eq (l1: dlang char) l2 l3 l4: l1 =i l2 -> l3 =i l4 -> conc l1 l3 =i conc l2 l4.

The Kleene star operator is defined using resisdual languages.

Definition residual x L : dlang char := [pred w | x :: w \in L].

Definition star L : dlang char :=
  fix star v := if v is x :: v' then conc (residual x L) star v' else true.

Lemma starP : forall {L v},
  reflect (exists2 vv, all [predD L & eps] vv & v = flatten vv) (v \in star L).

Lemma star_cat w1 w2 L : w1 \in L -> w2 \in (star L) -> w1 ++ w2 \in star L.

Lemma starI (L : dlang char) vv :
  (forall v, v \in vv -> v \in L) -> flatten vv \in star L.

Lemma star_eq (l1 : dlang char) l2: l1 =i l2 -> star l1 =i star l2.

Regular Expressions


Inductive regexp :=
 | Void
 | Eps
 | Atom of char
 | Star of regexp
 | Plus of regexp & regexp
 | Conc of regexp & regexp.


Lemma eq_regexp_dec e1 e2 : {e1 = e2} + {e1 <> e2}.

Definition regexp_eqMixin := EqMixin (compareP eq_regexp_dec).
Canonical Structure form_eqType := EqType _ regexp_eqMixin.

End RegExp.

Implicit Arguments void [].
Implicit Arguments eps [].

Section ReLang.
Variable char: eqType.
We assign a decidable language to every regular expression
Fixpoint re_lang (e : regexp char) : dlang char :=
  match e with
  | Void => void char
  | Eps => eps char
  | Atom x => atom x
  | Star e1 => star (re_lang e1)
  | Plus e1 e2 => plus (re_lang e1) (re_lang e2)
  | Conc e1 e2 => conc (re_lang e1) (re_lang e2)
  end.

Canonical Structure req_exp_predType := Eval hnf in mkPredType re_lang.
End ReLang.

Implicit Arguments plusP [char r s w].

Notation "'Void'" := (@Void _).
Notation "'Eps'" := (@Eps _).

Regular Languages


Definition regular char (L : word char -> Prop ) :=
  exists e : regexp char, forall w, L w <-> w \in e.