/
Datalog.elm
710 lines (572 loc) · 23.1 KB
/
Datalog.elm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
module Database.Datalog exposing
( Database, empty, Problem(..), insert, query
, Rule, rule, with, without, filter, planRule
, Filter, eq, gt, lt, not_, or
, Term, var, int, string
)
{-|
@docs Database, empty, Problem, insert, query
@docs Rule, rule, with, without, filter, planRule
@docs Filter, eq, gt, lt, not_, or
@docs Term, var, int, string
-}
import Database exposing (Constant)
import Dict
import Graph exposing (Edge, Graph, Node)
import List.Extra exposing (foldrResult, indexOf)
import Murmur3
type Database
= Database Database.Database
empty : Database
empty =
Database Database.empty
type Problem
= NeedAtLeastOnePositiveAtom
| NeedAtLeastOneName
| VariableDoesNotAppearInBody String
| VariableMustAppearInPositiveAtom String
| CannotInsertVariable String
| CannotHaveNegationInRecursiveQuery
| DatabaseProblem Database.Problem
insert : String -> List Term -> Database -> Result Problem Database
insert name body (Database db) =
body
|> foldrResult
(\term soFar ->
case term of
Constant constant ->
Ok (constant :: soFar)
Variable name_ ->
Err (CannotInsertVariable name_)
)
[]
|> Result.andThen
(\constants ->
Database.insert name constants db
|> Result.mapError DatabaseProblem
)
|> Result.map Database
query : List Rule -> Database -> Result Problem Database.Database
query rules (Database db) =
let
nodes : Result Problem (List (Node ( String, Maybe Database.QueryPlan )))
nodes =
rules
|> foldrResult
(\((Rule (Atom name _) _) as rule_) soFar ->
case planRule rule_ of
Ok plan ->
soFar
|> Dict.insert
(Murmur3.hashString 0 name)
( name, Nothing )
|> Dict.insert
(Murmur3.hashString 0 (ruleToString rule_))
( name, Just plan )
|> Ok
Err problem ->
Err problem
)
Dict.empty
|> Result.map
(Dict.foldr
(\id maybePlan soFar ->
Node id maybePlan :: soFar
)
[]
)
edges : List (Edge Negation)
edges =
List.concatMap
(\((Rule (Atom headName _) bodyAtoms) as rule_) ->
Edge
(Murmur3.hashString 0 headName)
(Murmur3.hashString 0 (ruleToString rule_))
Positive
:: List.filterMap
(\bodyAtom ->
case bodyAtom of
BodyAtom negation (Atom bodyName _) ->
Just
(Edge
(Murmur3.hashString 0 (ruleToString rule_))
(Murmur3.hashString 0 bodyName)
negation
)
Filter _ ->
-- filters don't actually create
-- dependencies between atoms; they only
-- filter on names that have already been
-- bound from those dependencies. So we're
-- good to just drop them at this stage.
Nothing
)
bodyAtoms
)
rules
strataResult : Result Problem (List (Graph ( String, Maybe Database.QueryPlan ) Negation))
strataResult =
Result.andThen
(\nodes_ ->
let
graph : Graph ( String, Maybe Database.QueryPlan ) Negation
graph =
Graph.fromNodesAndEdges nodes_ edges
in
case Graph.stronglyConnectedComponents graph of
Ok _ ->
Ok [ graph ]
Err strata ->
foldrResult
(\stratum soFar ->
if List.any (\{ label } -> label == Negative) (Graph.edges stratum) then
Err CannotHaveNegationInRecursiveQuery
else
Ok (stratum :: soFar)
)
[]
strata
)
nodes
in
Result.andThen
(\strata ->
foldrResult
runUntilExhausted
db
strata
)
strataResult
runUntilExhausted :
Graph ( String, Maybe Database.QueryPlan ) Negation
-> Database.Database
-> Result Problem Database.Database
runUntilExhausted stratum db =
runUntilExhaustedHelp stratum db db
runUntilExhaustedHelp :
Graph ( String, Maybe Database.QueryPlan ) Negation
-> Database.Database
-> Database.Database
-> Result Problem Database.Database
runUntilExhaustedHelp stratum db finalDb =
-- the goal of semi-naive evaluation is to only read "new" tuples on each
-- iteration towards exhaustion (which is what I'm calling the state of
-- having found all the tuples.) This helps with performance: instead of
-- having to do joins over the entire data set plus new tuples, you only
-- have to do new tuples. This is safe because you've already evaluated the
-- "old" tuples in previous iterations.
--
-- The mental model here is that we're keeping track of a stack of relations
-- for each name instead of merging them all immediately. In practice,
-- we actually only need to keep track of a "new" database and the final
-- database. Once we don't get any new tuples in the database, we can
-- quit looping.
let
iterationResult : Result Problem ( Database.Database, Database.Database )
iterationResult =
Graph.nodes stratum
|> List.filterMap
(\{ label } ->
case label of
( name, Just plan ) ->
Just ( name, plan )
( _, Nothing ) ->
Nothing
)
|> foldrResult
(\( name, plan ) ( dbSoFar, finalDbSoFar ) ->
let
-- We only want to get the new rows in order to avoid
-- recomputing previous tuples, so we want an outer
-- join on the existing rows! But, the relation
-- we're joining on might not be in the database
-- yet. So we try to look up the relation first
-- (which is pretty quick.) If it exists, we know
-- that our join is at least feasible, but if the
-- lookup fails for any reason we'd better not try it!
finalPlan : Database.QueryPlan
finalPlan =
case Database.read name dbSoFar of
Ok _ ->
Database.OuterJoin
{ keep = plan
, drop = Database.Read name
}
Err _ ->
plan
in
dbSoFar
|> Database.query finalPlan
|> Result.andThen
(\relation ->
Result.map
(\merged ->
( Database.replaceRelation name relation dbSoFar
, merged
)
)
(Database.mergeRelations name relation finalDbSoFar)
)
|> Result.mapError DatabaseProblem
)
( db, finalDb )
in
case iterationResult of
Ok ( nextDb, newFinalDb ) ->
if newFinalDb == finalDb then
Ok newFinalDb
else
runUntilExhaustedHelp stratum nextDb newFinalDb
Err problem ->
Err problem
type Rule
= Rule Atom (List BodyAtom)
{-| Start making a new rule! You'll need to name it (the first argument)
and then name the fields you'll end up exporting.
Some rules to keep in mind:
- You have to provide at least one name.
- You have to bind every name you define using `with`.
If you have multiple rules with the same name, they'll be merged together
(for an example, see the docs for [`with`](#with).)
-}
rule : String -> List String -> Rule
rule name headVars =
Rule (Atom name (List.map Variable headVars)) []
{-| Add matches from the given name (TODO: table? rule? named tuple store?)
For example, if you have some greeks (Socrates, say) you can write a rule
like this to see which of them are mortal:
rule "mortal" [ "name" ]
|> with "greek" [ var "name" ]
It's fine to use this to set up recursive queries. For example, you could
compute reachability for all nodes in a graph using two rules like this:
[ rule "reachable" [ "a", "b" ]
|> with "link" [ var "a", var "b" ]
, rule "reachable" [ "a", "c" ]
|> with "link" [ var "a", var "b" ]
|> with "reachable" [ var "b", var "c" ]
]
If you introduce a variable in a `with` like that above, it's also fine!
-}
with : String -> List Term -> Rule -> Rule
with name terms (Rule head body) =
Rule head (BodyAtom Positive (Atom name terms) :: body)
{-| The opposite of [`with`](#with): remove any matching tuples based on
these names.
This has a few more rules than `with`, though:
- You can't introduce new names in a `without` (every name must be used
in a positive clause. If you could, we wouldn't have a way to know which
values are permissible and we'd have to invent stuff; a big no-no!)
- You can't use `without` recursively (if you could, you could get
inconsistent outcomes based on which rules you evaluate first.)
If you've used another datalog implementation before: this is just negation,
and the rules are more-or-less the same.
Here's an example of computing all the nodes in a graph that _aren't_
reachable from each other:
[ -- first, define `reachable` as in the example in `with`:
rule "reachable" [ "a", "b" ]
|> with "link" [ var "a", var "b" ]
, rule "reachable" [ "a", "c" ]
|> with "link" [ var "a", var "b" ]
|> with "reachable" [ var "b", var "c" ]
-- next, we need to know what is a valid node so we can
, rule "node" [ "a" ]
|> with "link" [ var "a", var "b" ]
, rule "node" [ "b" ]
|> with "link" [ var "a", var "b" ]
-- finally, we just say "a set of two nodes is unreachable if they're
-- individually in `node` but not together in `reachable`"
, rule "unreachable" [ "a", "b" ]
|> with "node" [ var "a" ]
|> with "node" [ var "b" ]
|> without "reachable" [ var "a", var "b" ]
]
-}
without : String -> List Term -> Rule -> Rule
without name terms (Rule head body) =
Rule head (BodyAtom Negative (Atom name terms) :: body)
planRule : Rule -> Result Problem Database.QueryPlan
planRule (Rule (Atom _ headTerms) bodyAtoms) =
let
( positiveAtoms, negativeAtoms, filters ) =
List.foldl
(\bodyAtom ( positiveAtomsSoFar, negativeAtomsSoFar, filtersSoFar ) ->
case bodyAtom of
BodyAtom Positive atom_ ->
( atom_ :: positiveAtomsSoFar, negativeAtomsSoFar, filtersSoFar )
BodyAtom Negative atom_ ->
( positiveAtomsSoFar, atom_ :: negativeAtomsSoFar, filtersSoFar )
Filter filter_ ->
( positiveAtomsSoFar, negativeAtomsSoFar, filter_ :: filtersSoFar )
)
( [], [], [] )
bodyAtoms
plannedPositiveAtoms : Result Problem ( List String, Database.QueryPlan )
plannedPositiveAtoms =
case positiveAtoms of
[] ->
Err NeedAtLeastOnePositiveAtom
first :: rest ->
List.foldl
(\nextAtom ( rightNames, rightPlan ) ->
let
( leftNames, leftPlan ) =
atomToPlan nextAtom
in
( leftNames ++ rightNames
, Database.JoinOn
{ left = leftPlan
, right = rightPlan
, fields =
Dict.merge
(\_ _ soFar -> soFar)
(\_ left right soFar -> ( left, right ) :: soFar)
(\_ _ soFar -> soFar)
(Dict.fromList (List.indexedMap (\i field -> ( field, i )) leftNames))
(Dict.fromList (List.indexedMap (\i field -> ( field, i )) rightNames))
[]
}
)
)
(atomToPlan first)
rest
|> Ok
plannedNegativeAtoms : Result Problem ( List String, Database.QueryPlan )
plannedNegativeAtoms =
case ( negativeAtoms, plannedPositiveAtoms ) of
( [], _ ) ->
plannedPositiveAtoms
( _, Err _ ) ->
plannedPositiveAtoms
( _, Ok starter ) ->
foldrResult
(\nextAtom ( keepNames, keepPlan ) ->
let
( dropNames, dropPlan ) =
atomToPlan nextAtom
in
dropNames
|> List.indexedMap Tuple.pair
|> foldrResult
(\( dropIndex, dropName ) soFar ->
case indexOf dropName keepNames of
Just keepIndex ->
Ok (( keepIndex, dropIndex ) :: soFar)
Nothing ->
Err (VariableMustAppearInPositiveAtom dropName)
)
[]
|> Result.map
(\fields ->
( keepNames
, Database.OuterJoinOn
{ keep = keepPlan
, drop = dropPlan
, fields = fields
}
)
)
)
starter
negativeAtoms
planned : Result Problem ( List String, Database.QueryPlan )
planned =
case ( filters, plannedNegativeAtoms ) of
( [], _ ) ->
plannedNegativeAtoms
( _, Err _ ) ->
plannedNegativeAtoms
( _, Ok starter ) ->
foldrResult
(\nextFilter ( names, plan ) -> filterToPlan nextFilter names plan)
starter
filters
in
Result.andThen
(\( names, plan ) ->
if List.isEmpty headTerms then
Err NeedAtLeastOneName
else
headTerms
|> foldrResult
(\term soFar ->
case term of
Variable name ->
case indexOf name names of
Just idx ->
Ok (idx :: soFar)
Nothing ->
Err (VariableDoesNotAppearInBody name)
Constant _ ->
-- It's fine to just ignore this, since
-- we disallow rules having constants by
-- construction. This will be an unfortunate
-- bug if we ever change that, though! :\
Ok soFar
)
[]
|> Result.map (\indexes -> Database.Project indexes plan)
)
planned
ruleToString : Rule -> String
ruleToString (Rule head body) =
atomToString head ++ " :- " ++ String.join ", " (List.map bodyAtomToString body)
type Negation
= Positive
| Negative
type BodyAtom
= BodyAtom Negation Atom
| Filter Filter
bodyAtomToString : BodyAtom -> String
bodyAtomToString bodyAtom =
case bodyAtom of
BodyAtom negation atom_ ->
let
notString : String
notString =
case negation of
Positive ->
""
Negative ->
"not "
in
notString ++ atomToString atom_
Filter filter_ ->
filterToString filter_
type Atom
= Atom String (List Term)
atomToString : Atom -> String
atomToString (Atom name terms) =
name ++ "(" ++ String.join ", " (List.map termToString terms) ++ ")"
atomToPlan : Atom -> ( List String, Database.QueryPlan )
atomToPlan (Atom name terms) =
terms
|> List.indexedMap Tuple.pair
|> List.foldr
(\( fieldNum, term ) ( termNames, plan ) ->
case term of
Variable var_ ->
( var_ :: termNames, plan )
Constant constant ->
( "_" :: termNames
, plan
|> Database.Select
(Database.Predicate
fieldNum
Database.Eq
(Database.Constant constant)
)
)
)
( [], Database.Read name )
{-| Note: we don't need AND here because it's implicit in the list of
conditions in a rule.
-}
type Filter
= Predicate String Op Term
| Not Filter
| Or Filter Filter
type Op
= Eq
| Gt
| Lt
filter : Filter -> Rule -> Rule
filter filter_ (Rule head body) =
Rule head (Filter filter_ :: body)
eq : String -> Term -> Filter
eq lhs rhs =
Predicate lhs Eq rhs
gt : String -> Term -> Filter
gt lhs rhs =
Predicate lhs Gt rhs
lt : String -> Term -> Filter
lt lhs rhs =
Predicate lhs Lt rhs
not_ : Filter -> Filter
not_ =
Not
or : Filter -> Filter -> Filter
or =
Or
filterToPlan : Filter -> List String -> Database.QueryPlan -> Result Problem ( List String, Database.QueryPlan )
filterToPlan topFilter names plan =
let
convertField : String -> Result Problem Database.Field
convertField name =
case indexOf name names of
Just idx ->
Ok idx
Nothing ->
Err (VariableDoesNotAppearInBody name)
convertTerm : Term -> Result Problem Database.FieldOrConstant
convertTerm term =
case term of
Variable name ->
Result.map Database.Field (convertField name)
Constant constant ->
Ok (Database.Constant constant)
convertOp : Op -> Database.Op
convertOp op =
case op of
Eq ->
Database.Eq
Lt ->
Database.Lt
Gt ->
Database.Gt
toSelection : Filter -> Result Problem Database.Selection
toSelection filter_ =
case filter_ of
Predicate lhs op rhs ->
Result.map3 Database.Predicate
(convertField lhs)
(Ok (convertOp op))
(convertTerm rhs)
Not inner ->
Result.map Database.Not (toSelection inner)
Or left right ->
Result.map2 Database.Or
(toSelection left)
(toSelection right)
in
Result.map
(\selection -> ( names, Database.Select selection plan ))
(toSelection topFilter)
filterToString : Filter -> String
filterToString filter_ =
case filter_ of
Predicate lhs op rhs ->
lhs ++ " " ++ opToString op ++ " " ++ termToString rhs
Not notFilter ->
"not " ++ filterToString notFilter
Or left right ->
filterToString left ++ " or " ++ filterToString right
opToString : Op -> String
opToString op =
case op of
Eq ->
"="
Lt ->
"<"
Gt ->
">"
type Term
= Variable String
| Constant Constant
var : String -> Term
var =
Variable
string : String -> Term
string =
Constant << Database.String
int : Int -> Term
int =
Constant << Database.Int
termToString : Term -> String
termToString term =
case term of
Variable var_ ->
var_
Constant (Database.String string_) ->
"\"" ++ string_ ++ "\""
Constant (Database.Int int_) ->
String.fromInt int_