[CakeML] translator
Konrad Slind
konrad.slind at gmail.com
Sat Aug 15 22:29:21 UTC 2015
Oh sure. All I had to do is do a minor edit to the declaration of the
datatype of regexps.
See attached. The generated code is in regexpML_ml.txt. I am also
attaching the original hand-written regexp.cake file. At the end of that
file are some examples, which should be easy to adapt to and run on
the generated code. I will do this once I get back from having a really
great Saturday night, but you can have a go at getting that to run if
you can't wait.
(Note to cakeML folk: is there a way to have the declared type be "regexp"
instead of
"regexp_regexp" --- which I assume is the concatenation of theory and
type name?)
Konrad.
On Sat, Aug 15, 2015 at 4:57 PM, David Hardin <
david.hardin at rockwellcollins.com> wrote:
> So, Konrad, does that mean you can cobble together a source file that I
> can play with, or do I have to wait? Sounds like you are close, but
> Michael's comments indicate that the final fixes to create a "proper"
> source file aren't going to appear overnight. I'm guessing that some
> hand-tweaking may be needed in the short term. Is that correct?
>
> Thanks, Konrad, and thanks to everyone on the CakeML team for your help so
> far. I'm excited about the possibilities for CakeML, and can't wait to do
> more with it. BTW, I'm a big believer in pushing through third party
> examples like this -- nothing better for finding problems with one's tools.
>
>
> DSH
>
>
> On Sat, Aug 15, 2015 at 7:15 AM, Michael Norrish <
> Michael.Norrish at nicta.com.au> wrote:
>
>> That's not fixed, and won't be until the parser gets access to arity
>> information for constructors or if we adopt your idea of treating all
>> constructor applications as function calls that resolve to primitives
>> somehow. I don't know if the second approach also works for patterns. I'm
>> happy to thread arity information through the parser but the toplevel state
>> will have to pick it up and allow for the parser to change it.
>>
>> Michael
>>
>> > On 15 Aug 2015, at 18:59, Scott Owens <S.A.Owens at kent.ac.uk> wrote:
>> >
>> > I think that was fixed on the master branch, but we can’t generate a
>> new executable as the master branch has some problems right now (possibly
>> related to HOL updates).
>> >
>> > -Scott
>> >
>> >> On 2015/08/15, at 01:19, Konrad Slind <konrad.slind at gmail.com> wrote:
>> >>
>> >> Example of Problem 2:
>> >>
>> >> SOME(1,2) and
>> >> SOME((1,2))
>> >
>> > _______________________________________________
>> > Users mailing list
>> > Users at cakeml.org
>> > https://lists.cakeml.org/listinfo/users
>>
>> ________________________________
>>
>> The information in this e-mail may be confidential and subject to legal
>> professional privilege and/or copyright. National ICT Australia Limited
>> accepts no liability for any damage caused by this email or its attachments.
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.cakeml.org/pipermail/users/attachments/20150815/761bb349/attachment-0001.html>
-------------- next part --------------
fun fst v3 = case v3 of (v2,v1) => v2;
fun snd v3 = case v3 of (v2,v1) => v1;
fun pair_case v2 = (fn v1 => v1 (fst v2) (snd v2));
fun inv_image v3 = (fn v4 => (fn v2 => (fn v1 => v3 (v4 v2) (v4 v1))));
fun the x1 =
case x1
of NONE => (raise Bind)
| SOME(v1) => v1;
fun hd x1 =
case x1
of [] => (raise Bind)
| v2::v1 => v2;
fun tl x1 =
case x1
of [] => (raise Bind)
| v2::v1 => v1;
fun el v2 v1 =
if (v2 <= 0)
then (hd v1)
else (el (let val k = v2 - 1
in
if (k < 0)
then 0
else k
end) (tl v1));
fun append v3 v4 =
case v3
of [] => v4
| v2::v1 => (v2::(append v1 v4));
fun length v3 =
case v3
of [] => 0
| v2::v1 => ((length v1) + 1);
fun map v3 v4 =
case v4
of [] => []
| v2::v1 => (v3 v2::(map v3 v1));
fun map2 v5 v6 v7 =
case v6
of [] => []
| v4::v3 => (case v7
of [] => []
| (v2::v1) => (v5 v4 v2::(map2 v5 v3 v1)));
fun zip v5 v6 =
case v5
of [] => []
| v4::v3 => (case v6
of [] => []
| (v2::v1) => ((v4,v2)::(zip v3 v1)));
fun member v4 v3 =
case v3
of [] => (0 < 0)
| v2::v1 => ((v4 = v2) orelse (member v4 v1));
fun lupdate v8 v9 v7 =
case (v9,v7)
of (v6,v5) => (if (v6 <= 0)
then (case v5
of [] => []
| (v2::v1) => (v8::v1))
else (case v5
of [] => []
| (v4::v3) => (v4::(lupdate v8 (let val k =
v6 - 1
in
if (k < 0)
then 0
else k
end) v3))));
fun part v3 v6 v4 v5 =
case v6
of [] => (v4,v5)
| v2::v1 => (if (v3 v2)
then (part v3 v1 (v2::v4) v5)
else (part v3 v1 v4 (v2::v5)));
fun partition v1 = (fn v2 => part v1 v2 [] []);
fun qsort v7 v8 =
case v8
of [] => []
| v6::v5 => (let val v3 = partition (fn v4 => (v7 v4 v6)) v5
in
case v3
of (v2,v1) => (append (append (qsort v7 v2) [v6]) (qsort v7 v1))
end);
fun tolistaux v1 v2 v3 =
if (v2 < v3)
then (Vector.sub v1 v2::(tolistaux v1 (v2 + 1) v3))
else [];
fun tolist v1 = tolistaux v1 0 (Vector.length v1);
datatype ('k , 'v) balanced_map_balanced_map = Bin of int * 'k * 'v * ('k , 'v) balanced_map_balanced_map * ('k , 'v) balanced_map_balanced_map
| Tip;
val empty = Tip;
datatype toto_cpn = Greater
| Equal
| Less;
fun lookup v6 v7 v8 =
case v8
of Tip => NONE
| Bin(v5,v4,v3,v2,v1) => (case (v6 v7 v4)
of Less => (lookup v6 v7 v2)
| Equal => (SOME(v3))
| Greater => (lookup v6 v7 v1));
fun singleton v1 = (fn v2 => Bin(1,v1,v2,Tip,Tip));
val ratio = 2;
val delta = 3;
fun size v6 =
case v6
of Tip => 0
| Bin(v5,v4,v3,v2,v1) => v5;
fun balancel v41 =
(fn v42 =>
(fn v43 =>
(fn v44 =>
case v43
of Tip => (case v44
of Tip => (Bin(1,v41,v42,Tip,Tip))
| (Bin(v5,v4,v3,v2,v1)) => (Bin(1 + v5,v41,v42,Tip,Bin(v5,v4,v3,v2,v1))))
| Bin(v40,v39,v38,v37,v36) => (case v44
of Tip => (case v37
of Tip => (case v36
of Tip => (Bin(2,v41,v42,Bin(v40,v39,v38,Tip,Tip),Tip))
| (Bin(v10,v9,v8,v7,v6)) => (Bin(3,v9,v8,Bin(1,v39,v38,Tip,Tip),Bin(1,v41,v42,Tip,Tip))))
| (Bin(v20,v19,v18,v17,v16)) => (case v36
of Tip => (Bin(3,v39,v38,Bin(v20,v19,v18,v17,v16),Bin(1,v41,v42,Tip,Tip)))
| (Bin(v15,v14,v13,v12,v11)) => (if (v15 < (ratio * v20))
then (Bin(1 + v40,v39,v38,Bin(v20,v19,v18,v17,v16),Bin(1 + v15,v41,v42,Bin(v15,v14,v13,v12,v11),Tip)))
else (Bin(1 + v40,v14,v13,Bin((1 + v20) + (size v12),v39,v38,Bin(v20,v19,v18,v17,v16),v12),Bin(1 + (size v11),v41,v42,v11,Tip))))))
| (Bin(v35,v34,v33,v32,v31)) => (if (v40 > (delta * v35))
then (case v37
of Tip => Tip
| (Bin(v30,v29,v28,v27,v26)) => (case v36
of Tip => Tip
| (Bin(v25,v24,v23,v22,v21)) => (if (v25 < (ratio * v30))
then (Bin((1 + v40) + v35,v39,v38,v37,Bin((1 + v35) + v25,v41,v42,v36,Bin(v35,v34,v33,v32,v31))))
else (Bin((1 + v40) + v35,v24,v23,Bin((1 + v30) + (size v22),v39,v38,v37,v22),Bin((1 + v35) + (size v21),v41,v42,v21,Bin(v35,v34,v33,v32,v31)))))))
else (Bin((1 + v40) + v35,v41,v42,Bin(v40,v39,v38,v37,v36),Bin(v35,v34,v33,v32,v31))))))));
fun balancer v41 =
(fn v42 =>
(fn v43 =>
(fn v44 =>
case v43
of Tip => (case v44
of Tip => (Bin(1,v41,v42,Tip,Tip))
| (Bin(v20,v19,v18,v17,v16)) => (case v17
of Tip => (case v16
of Tip => (Bin(2,v41,v42,Tip,Bin(v20,v19,v18,Tip,Tip)))
| (Bin(v5,v4,v3,v2,v1)) => (Bin(3,v19,v18,Bin(1,v41,v42,Tip,Tip),Bin(v5,v4,v3,v2,v1))))
| (Bin(v15,v14,v13,v12,v11)) => (case v16
of Tip => (Bin(3,v14,v13,Bin(1,v41,v42,Tip,Tip),Bin(1,v19,v18,Tip,Tip)))
| (Bin(v10,v9,v8,v7,v6)) => (if (v15 < (ratio * v10))
then (Bin(1 + v20,v19,v18,Bin(1 + v15,v41,v42,Tip,Bin(v15,v14,v13,v12,v11)),Bin(v10,v9,v8,v7,v6)))
else (Bin(1 + v20,v14,v13,Bin(1 + (size v12),v41,v42,Tip,v12),Bin((1 + v10) + (size v11),v19,v18,v11,Bin(v10,v9,v8,v7,v6))))))))
| Bin(v40,v39,v38,v37,v36) => (case v44
of Tip => (Bin(1 + v40,v41,v42,Bin(v40,v39,v38,v37,v36),Tip))
| (Bin(v35,v34,v33,v32,v31)) => (if (v35 > (delta * v40))
then (case v32
of Tip => Tip
| (Bin(v30,v29,v28,v27,v26)) => (case v31
of Tip => Tip
| (Bin(v25,v24,v23,v22,v21)) => (if (v30 < (ratio * v25))
then (Bin((1 + v40) + v35,v34,v33,Bin((1 + v40) + v30,v41,v42,Bin(v40,v39,v38,v37,v36),v32),v31))
else (Bin((1 + v40) + v35,v29,v28,Bin((1 + v40) + (size v27),v41,v42,Bin(v40,v39,v38,v37,v36),v27),Bin((1 + v25) + (size v26),v34,v33,v26,v31))))))
else (Bin((1 + v40) + v35,v41,v42,Bin(v40,v39,v38,v37,v36),Bin(v35,v34,v33,v32,v31))))))));
fun insert v6 v7 v9 v8 =
case v8
of Tip => (singleton v7 v9)
| Bin(v5,v4,v3,v2,v1) => (case (v6 v7 v4)
of Less => (balancel v4 v3 (insert v6 v7 v9 v2) v1)
| Equal => (Bin(v5,v7,v9,v2,v1))
| Greater => (balancer v4 v3 v2 (insert v6 v7 v9 v1)));
fun member_1 v6 v7 v8 =
case v8
of Tip => (0 < 0)
| Bin(v5,v4,v3,v2,v1) => (case (v6 v7 v4)
of Less => (member_1 v6 v7 v2)
| Equal => (0 <= 0)
| Greater => (member_1 v6 v7 v1));
fun foldrwithkey v6 v8 v7 =
case v7
of Tip => v8
| Bin(v5,v4,v3,v2,v1) => (foldrwithkey v6 (v6 v4 v3 (foldrwithkey v6 v8 v1)) v2);
val alphabet_size = 256;
val alphabet =
[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255];
datatype regexp_regexp = Neg of regexp_regexp
| Or of regexp_regexp list
| Star of regexp_regexp
| Cat of regexp_regexp * regexp_regexp
| Chset of bool Vector.vector;
fun regexp_list_size v10 v9 =
case v10
of [] => v9
| v8::v7 => (case v8
of (Chset(v1)) => (regexp_list_size v7 (v9 + 1))
| (Cat(v3,v2)) => (regexp_list_size (v3::v2::v7) (v9 + 1))
| (Star(v4)) => (regexp_list_size (v4::v7) (v9 + 1))
| (Or(v5)) => (regexp_list_size (append v5 v7) ((v9 + (length v5)) + 1))
| (Neg(v6)) => (regexp_list_size (v6::v7) (v9 + 1)));
fun and_1 v1 = (fn v2 => Neg(Or([Neg(v1),Neg(v2)])));
val all_unset =
[0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0,0 < 0];
val all_set =
[0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0,0 <= 0];
val charset_empty = Vector.fromList all_unset;
val charset_full = Vector.fromList all_set;
fun charset_mem v2 = (fn v1 => Vector.sub v1 v2);
fun charset_sing v1 = Vector.fromList (lupdate (0 <= 0) v1 all_unset);
fun charset_union v3 =
(fn v4 =>
Vector.fromList (map2 (fn v2 =>
(fn v1 =>
(v2 orelse v1))) (tolist v3) (tolist v4)));
fun merge_charsets v17 =
case v17
of [] => []
| v16::v15 => (case v16
of (Chset(v9)) => (case v15
of [] => [Chset(v9)]
| (v8::v7) => (case v8
of (Chset(v1)) => (merge_charsets (Chset(charset_union v9 v1)::v7))
| (Cat(v3,v2)) => (Chset(v9)::Cat(v3,v2)::v7)
| (Star(v4)) => (Chset(v9)::Star(v4)::v7)
| (Or(v5)) => (Chset(v9)::Or(v5)::v7)
| (Neg(v6)) => (Chset(v9)::Neg(v6)::v7)))
| (Cat(v11,v10)) => (Cat(v11,v10)::v15)
| (Star(v12)) => (Star(v12)::v15)
| (Or(v13)) => (Or(v13)::v15)
| (Neg(v14)) => (Neg(v14)::v15));
fun len_cmp v7 v8 =
case v7
of [] => (case v8
of [] => Equal
| (v2::v1) => Less)
| v6::v5 => (case v8
of [] => Greater
| (v4::v3) => (len_cmp v5 v3));
fun bool_cmp v1 =
(fn v2 =>
if v1
then (if v2
then Equal
else Greater)
else (if v2
then Less
else Equal));
fun list_cmp v7 v8 v9 =
case v8
of [] => (case v9
of [] => Equal
| (v2::v1) => Less)
| v6::v5 => (case v9
of [] => Greater
| (v4::v3) => (case (v7 v6 v4)
of Less => Less
| Equal => (list_cmp v7 v5 v3)
| Greater => Greater));
fun charset_cmp v1 =
(fn v2 => list_cmp bool_cmp (tolist v1) (tolist v2));
fun regexp_comparea v41 =
case v41
of [] => Equal
| v40::v39 => (case v40
of (v38,v37) => (case v38
of (Chset(v7)) => (case v37
of (Chset(v1)) => (case (charset_cmp v7 v1)
of Less => Less
| Equal => (regexp_comparea v39)
| Greater => Greater)
| (Cat(v3,v2)) => Less
| (Star(v4)) => Less
| (Or(v5)) => Less
| (Neg(v6)) => Less)
| (Cat(v15,v14)) => (case v37
of (Chset(v8)) => Greater
| (Cat(v10,v9)) => (regexp_comparea ((v15,v10)::(v14,v9)::v39))
| (Star(v11)) => Less
| (Or(v12)) => Less
| (Neg(v13)) => Less)
| (Star(v22)) => (case v37
of (Chset(v16)) => Greater
| (Cat(v18,v17)) => Greater
| (Star(v19)) => (regexp_comparea ((v22,v19)::v39))
| (Or(v20)) => Less
| (Neg(v21)) => Less)
| (Or(v29)) => (case v37
of (Chset(v23)) => Greater
| (Cat(v25,v24)) => Greater
| (Star(v26)) => Greater
| (Or(v27)) => (case (len_cmp v29 v27)
of Less => Less
| Equal => (regexp_comparea (append (zip v29 v27) v39))
| Greater => Greater)
| (Neg(v28)) => Less)
| (Neg(v36)) => (case v37
of (Chset(v30)) => Greater
| (Cat(v32,v31)) => Greater
| (Star(v33)) => Greater
| (Or(v34)) => Greater
| (Neg(v35)) => (regexp_comparea ((v36,v35)::v39)))));
fun regexp_compare v1 = (fn v2 => regexp_comparea [(v1,v2)]);
fun regexp_leq v1 =
(fn v2 =>
case (regexp_compare v1 v2)
of Less => (0 <= 0)
| Equal => (0 <= 0)
| Greater => (0 < 0));
fun build_char_set v1 = Chset(v1);
val empty_1 = Chset(charset_empty);
val sigma = Chset(charset_full);
val epsilon = Star(Chset(charset_empty));
fun assoc_cat v7 v8 =
case v7
of Chset(v1) => (Cat(Chset(v1),v8))
| Cat(v3,v2) => (Cat(v3,assoc_cat v2 v8))
| Star(v4) => (Cat(Star(v4),v8))
| Or(v5) => (Cat(Or(v5),v8))
| Neg(v6) => (Cat(Neg(v6),v8));
fun build_cat v1 =
(fn v2 =>
if ((v1 = empty_1) orelse (v2 = empty_1))
then empty_1
else (if (v1 = epsilon)
then v2
else (if (v2 = epsilon)
then v1
else (assoc_cat v1 v2))));
fun build_neg v7 =
case v7
of Chset(v1) => (Neg(Chset(v1)))
| Cat(v3,v2) => (Neg(Cat(v3,v2)))
| Star(v4) => (Neg(Star(v4)))
| Or(v5) => (Neg(Or(v5)))
| Neg(v6) => v6;
fun build_star v7 =
case v7
of Chset(v1) => (Star(Chset(v1)))
| Cat(v3,v2) => (Star(Cat(v3,v2)))
| Star(v4) => (Star(v4))
| Or(v5) => (Star(Or(v5)))
| Neg(v6) => (Star(Neg(v6)));
fun flatten_or v9 =
case v9
of [] => []
| v8::v7 => (case v8
of (Chset(v1)) => (Chset(v1)::(flatten_or v7))
| (Cat(v3,v2)) => (Cat(v3,v2)::(flatten_or v7))
| (Star(v4)) => (Star(v4)::(flatten_or v7))
| (Or(v5)) => (append v5 (flatten_or v7))
| (Neg(v6)) => (Neg(v6)::(flatten_or v7)));
fun remove_dups v5 =
case v5
of [] => []
| v4::v3 => (case v3
of [] => [v4]
| (v2::v1) => (if ((regexp_compare v4 v2) = Equal)
then (remove_dups (v2::v1))
else (v4::(remove_dups (v2::v1)))));
fun build_or v14 =
let val v13 =
remove_dups (merge_charsets (qsort regexp_leq (flatten_or v14)))
in
if (member (Neg(empty_1)) v13)
then (Neg(empty_1))
else (case v13
of [] => empty_1
| (v12::v11) => (case v11
of [] => v12
| (v10::v9) => (case v12
of (Chset(v3)) => (case v9
of [] => (if (v3 = charset_empty)
then v10
else (Or([Chset(v3),v10])))
| (v2::v1) => (if (v3 = charset_empty)
then (Or(v10::v2::v1))
else (Or(Chset(v3)::v10::v2::v1))))
| (Cat(v5,v4)) => (Or(v13))
| (Star(v6)) => (Or(v13))
| (Or(v7)) => (Or(v13))
| (Neg(v8)) => (Or(v13)))))
end;
fun haseps v9 =
case v9
of [] => (0 < 0)
| v8::v7 => (case v8
of (Chset(v1)) => (haseps v7)
| (Cat(v3,v2)) => ((haseps (v3::v7)) andalso (haseps (v2::v7)))
| (Star(v4)) => (0 <= 0)
| (Or(v5)) => (haseps (append v5 v7))
| (Neg(v6)) => ((haseps (v6::v7)) = (0 < 0)));
fun hasepsilon v1 = haseps [v1];
fun smart_deriv v10 v9 =
case v9
of Chset(v1) => (if (charset_mem v10 v1)
then epsilon
else empty_1)
| Cat(v4,v3) => (let val v2 = build_cat (smart_deriv v10 v4) v3
in
if (hasepsilon v4)
then (build_or [v2,smart_deriv v10 v3])
else v2
end)
| Star(v5) => (build_cat (smart_deriv v10 v5) (build_star v5))
| Or(v7) => (build_or (map (fn v6 => (smart_deriv v10 v6)) v7))
| Neg(v8) => (build_neg (smart_deriv v10 v8));
fun normalize v8 =
case v8
of Chset(v1) => (build_char_set v1)
| Cat(v3,v2) => (build_cat (normalize v3) (normalize v2))
| Star(v4) => (build_star (normalize v4))
| Or(v6) => (build_or (map (fn v5 => (normalize v5)) v6))
| Neg(v7) => (build_neg (normalize v7));
fun transitions v2 = map (fn v1 => (v1,smart_deriv v1 v2)) alphabet;
fun extend_states v6 v7 v8 v9 =
case v9
of [] => (v6,(v7,v8))
| v5::v4 => (case v5
of (v3,v2) => (case (lookup regexp_compare v2 v7)
of NONE => (extend_states (v6 + 1) (insert regexp_compare v2 v6 v7) ((v3,v6)::v8) v4)
| (SOME(v1)) => (extend_states v6 v7 ((v3,v1)::v8) v4)));
fun build_table v13 =
(fn v14 =>
(fn v15 =>
case (v14,v15)
of (v12,v11) => (case v11
of (v10,v9) => (case v9
of (v8,v7) => (let val v6 =
extend_states v10 v8 [] v13
in
case v6
of (v5,v4) => (case v4
of (v3,v2) => (case (lookup regexp_compare v12 v3)
of NONE => (v5 + 1,(insert regexp_compare v12 v5 v3,(v5,v2)::v7))
| (SOME(v1)) => (v5,(v3,(v1,v2)::v7))))
end)))));
fun insert_regexp v1 = (fn v2 => insert regexp_compare v1 () v2);
fun mem_regexp v1 = (fn v2 => member_1 regexp_compare v1 v2);
fun brz v5 v6 v7 v8 =
case v6
of [] => (let val v1 = (v5,v7)
in
SOME(v1)
end)
| v4::v3 => (if (v8 <= 0)
then NONE
else (if (mem_regexp v4 v5)
then (brz v5 v3 v7 (let val k = v8 - 1
in
if (k < 0)
then 0
else k
end))
else (let val v2 = transitions v4
in
brz (insert_regexp v4 v5) (append (remove_dups (map snd v2)) v3) (build_table v2 v4 v7) (let val k =
v8 - 1
in
if (k < 0)
then 0
else k
end)
end)));
val bigint = 2147483647;
fun get_accepts v6 =
qsort (fn v2 =>
(fn v1 =>
(v2 <= v1))) (foldrwithkey (fn v5 =>
(fn v4 =>
(fn v3 =>
(if (hasepsilon v5)
then (v4::v3)
else v3)))) [] v6);
fun alist_to_vec v5 v6 v7 v8 =
if (v7 <= 0)
then []
else (case v5
of [] => (v6::(alist_to_vec [] v6 (let val k = v7 - 1
in
if (k < 0)
then 0
else k
end) v8))
| (v4::v3) => (case v4
of (v2,v1) => (if (((let val k = v7 - 1
in
if (k < 0)
then 0
else k
end) + 1) <= v8)
then (if (v2 = (let val k =
v8 - ((let val k =
v7 - 1
in
if (k < 0)
then 0
else k
end) + 1)
in
if (k < 0)
then 0
else k
end))
then (v1::(alist_to_vec v3 v6 (let val k =
v7 - 1
in
if (k < 0)
then 0
else k
end) v8))
else (if (v2 < (let val k =
v8 - ((let val k =
v7 - 1
in
if (k < 0)
then 0
else k
end) + 1)
in
if (k < 0)
then 0
else k
end))
then (alist_to_vec v3 v6 ((let val k =
v7 - 1
in
if (k < 0)
then 0
else k
end) + 1) v8)
else (v6::(alist_to_vec ((v2,v1)::v3) v6 (let val k =
v7 - 1
in
if (k < 0)
then 0
else k
end) v8))))
else [])));
fun accepts_to_vector v2 =
(fn v3 => alist_to_vec (map (fn v1 => (v1,0 <= 0)) v2) (0 < 0) v3 v3);
fun table_to_vectors v11 =
map (fn v8 =>
(case v8
of (v7,v6) => (alist_to_vec (qsort (inv_image (fn v2 =>
(fn v1 =>
(v2 <= v1))) fst) (map (fn v5 =>
(case v5
of (v4,v3) => (v4,SOME(v3)))) v6)) NONE (let val k =
alphabet_size - 1
in
if (k < 0)
then 0
else k
end) (let val k =
alphabet_size - 1
in
if (k < 0)
then 0
else k
end)))) (qsort (inv_image (fn v10 =>
(fn v9 =>
(v10 <= v9))) fst) v11);
fun compile_regexp v11 =
let val v10 = normalize v11
val v9 = the (brz empty [v10] (1,(singleton v10 0,[])) bigint)
in
case v9
of (v8,v7) => (case v7
of (v6,v5) => (case v5
of (v4,v3) => (let val v2 =
table_to_vectors v3
val v1 =
accepts_to_vector (get_accepts v4) v6
in
(v4,(v2,v1))
end)))
end;
fun exec_dfa v4 v7 v5 v6 =
case v6
of [] => (el v5 v4)
| v3::v2 => (case (el (Char.ord v3) (el v5 v7))
of NONE => (0 < 0)
| (SOME(v1)) => (exec_dfa v4 v7 v1 v2));
fun regexp_matcher v7 =
let val v6 = compile_regexp v7
in
case v6
of (v5,v4) => (pair_case v4 (fn v3 =>
(fn v2 =>
(let val v1 =
the (lookup regexp_compare (normalize v7) v5)
in
exec_dfa v2 v3 v1
end))))
end;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: regexp.cake
Type: application/octet-stream
Size: 25092 bytes
Desc: not available
URL: <https://lists.cakeml.org/pipermail/users/attachments/20150815/761bb349/attachment-0001.obj>
More information about the Users
mailing list