Skip to content

Commit 63d1feb

Browse files
committed
Add POC support for EXPLODE operator and functions over Lists.
1 parent 3ec14ed commit 63d1feb

File tree

8 files changed

+569
-8
lines changed

8 files changed

+569
-8
lines changed

lib/Attean.pm

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,10 @@ currently undefined.
432432

433433

434434
our %global_functions;
435+
436+
=item C<< register_global_function( %uri_to_func ) >>
437+
438+
=cut
435439
sub register_global_function {
436440
my $class = shift;
437441
my %args = @_;
@@ -441,13 +445,20 @@ currently undefined.
441445
}
442446
}
443447

448+
=item C<< get_global_function( $uri ) >>
449+
450+
=cut
444451
sub get_global_function {
445452
my $class = shift;
446453
my $uri = shift;
447454
return $global_functions{ $uri };
448455
}
449456

450457
our %global_aggregates;
458+
459+
=item C<< register_global_aggregate( %uri_to_hash ) >>
460+
461+
=cut
451462
sub register_global_aggregate {
452463
my $class = shift;
453464
my %args = @_;
@@ -457,6 +468,9 @@ currently undefined.
457468
}
458469
}
459470

471+
=item C<< get_global_aggregate( $uri ) >>
472+
473+
=cut
460474
sub get_global_aggregate {
461475
my $class = shift;
462476
my $uri = shift;

lib/Attean/Algebra.pm

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,55 @@ package Attean::Algebra::Extend 0.031 {
421421
}
422422
}
423423

424+
=item * L<Attean::Algebra::Explode>
425+
426+
=cut
427+
428+
package Attean::Algebra::Explode 0.031 {
429+
use AtteanX::SPARQL::Constants;
430+
use AtteanX::SPARQL::Token;
431+
use Moo;
432+
use Types::Standard qw(ConsumerOf);
433+
use namespace::clean;
434+
435+
sub in_scope_variables {
436+
my $self = shift;
437+
my ($child) = @{ $self->children };
438+
my @vars = $child->in_scope_variables;
439+
return Set::Scalar->new(@vars, $self->variable->value)->elements;
440+
}
441+
with 'Attean::API::Algebra', 'Attean::API::UnaryQueryTree';
442+
443+
has 'variable' => (is => 'ro', isa => ConsumerOf['Attean::API::Variable'], required => 1);
444+
has 'expression' => (is => 'ro', isa => ConsumerOf['Attean::API::Expression'], required => 1);
445+
446+
sub algebra_as_string {
447+
my $self = shift;
448+
return sprintf('Explode { %s ← %s }', $self->variable->as_string, $self->expression->as_string);
449+
}
450+
sub tree_attributes { return qw(variable expression) };
451+
sub sparql_tokens {
452+
my $self = shift;
453+
my $explode = AtteanX::SPARQL::Token->keyword('EXPLODE');
454+
my $as = AtteanX::SPARQL::Token->keyword('AS');
455+
my $l = AtteanX::SPARQL::Token->lparen;
456+
my $r = AtteanX::SPARQL::Token->rparen;
457+
my ($child) = @{ $self->children };
458+
my $var = $self->variable;
459+
my $expr = $self->expression;
460+
461+
my @tokens;
462+
push(@tokens, $child->sparql_tokens->elements);
463+
push(@tokens, $explode);
464+
push(@tokens, $l);
465+
push(@tokens, $expr->sparql_tokens->elements);
466+
push(@tokens, $as);
467+
push(@tokens, $var->sparql_tokens->elements);
468+
push(@tokens, $r);
469+
return Attean::ListIterator->new( values => \@tokens, item_type => 'AtteanX::SPARQL::Token' );
470+
}
471+
}
472+
424473
=item * L<Attean::Algebra::Minus>
425474
426475
=cut

lib/Attean/SimpleQueryEvaluator.pm

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ package Attean::SimpleQueryEvaluator 0.031 {
3838
use Moo;
3939
use Encode qw(encode);
4040
use Attean::RDF;
41+
use AtteanX::Functions::CompositeLists;
4142
use LWP::UserAgent;
4243
use Scalar::Util qw(blessed);
4344
use List::Util qw(all any reduce);
@@ -140,7 +141,7 @@ supplied C<< $active_graph >>.
140141
foreach my $var (@extends) {
141142
my $expr = $extends{ $var };
142143
my $val = $expr_eval->evaluate_expression( $expr, $r, $active_graph, \%row_cache );
143-
if ($val->does('Attean::API::Binding')) {
144+
if (blessed($val) and $val->does('Attean::API::Binding')) {
144145
# patterns need to be made ground to be bound as values (e.g. TriplePattern -> Triple)
145146
$val = $val->ground($r);
146147
}
@@ -149,6 +150,38 @@ supplied C<< $active_graph >>.
149150
}
150151
return $r;
151152
});
153+
} elsif ($algebra->isa('Attean::Algebra::Explode')) {
154+
my $expr = $algebra->expression;
155+
my $var = $algebra->variable->value;
156+
157+
my ($child) = @{ $algebra->children };
158+
my $iter = $self->evaluate( $child, $active_graph );
159+
my @results;
160+
while (my $r = $iter->next) {
161+
my %extension;
162+
my %row_cache;
163+
my $val = $expr_eval->evaluate_expression( $expr, $r, $active_graph, \%row_cache );
164+
die 'TypeError' unless ($val->does('Attean::API::Literal'));
165+
my $dt = $val->datatype;
166+
die 'TypeError' unless ($dt->value eq $AtteanX::Functions::CompositeLists::LIST_TYPE_IRI);
167+
my $lex = $val->value;
168+
substr($lex, 0, 1, '');
169+
substr($lex, -1, 1, '');
170+
my $p = Attean->get_parser('SPARQL')->new();
171+
my @nodes = $p->parse_nodes($lex, commas => 1);
172+
foreach my $val (@nodes) {
173+
if ($val->does('Attean::API::Binding')) {
174+
# patterns need to be made ground to be bound as values (e.g. TriplePattern -> Triple)
175+
$val = $val->ground($r);
176+
}
177+
# warn "Explode error: $@" if ($@);
178+
my $new = Attean::Result->new( bindings => { $var => $val } )->join($r) if ($val);
179+
push(@results, $new);
180+
}
181+
}
182+
my %vars = map { $_ => 1 } $iter->variables;
183+
$vars{$var}++;
184+
return Attean::ListIterator->new(variables => [keys %vars], values => \@results, item_type => 'Attean::API::Result');
152185
} elsif ($algebra->isa('Attean::Algebra::Filter')) {
153186
# TODO: Merge adjacent filter evaluation so that they can share a row_cache hash (as is done for Extend above)
154187
my $expr = $algebra->expression;
@@ -485,7 +518,7 @@ supplied C<< $active_graph >>.
485518
my $vars = [map { $_->value } @{ $algebra->variables }];
486519
return Attean::ListIterator->new(variables => $vars, values => $algebra->rows, item_type => 'Attean::API::Result');
487520
}
488-
die "Unimplemented algebra evaluation for: $algebra";
521+
die "Unimplemented simple algebra evaluation for: $algebra";
489522
}
490523

491524

@@ -608,7 +641,8 @@ package Attean::SimpleQueryEvaluator::ExpressionEvaluator 0.031 {
608641
my $active_graph = shift;
609642
my $row_cache = shift || {};
610643
my $impl = $self->impl($expr, $active_graph);
611-
return eval { $impl->($row, row_cache => $row_cache) };
644+
my $result = eval { $impl->($row, row_cache => $row_cache) };
645+
return $result;
612646
}
613647

614648
sub impl {
@@ -726,7 +760,8 @@ package Attean::SimpleQueryEvaluator::ExpressionEvaluator 0.031 {
726760

727761
if ($func eq 'IF') {
728762
my $term = $children[0]->( $r, %args );
729-
return ($term->ebv) ? $children[1]->( $r, %args ) : $children[2]->( $r, %args );
763+
my $ebv = $term->ebv;
764+
return $ebv ? $children[1]->( $r, %args ) : $children[2]->( $r, %args );
730765
} elsif ($func eq 'IN' or $func eq 'NOTIN') {
731766
($true, $false) = ($false, $true) if ($func eq 'NOTIN');
732767
my $child = shift(@children);
@@ -754,6 +789,12 @@ package Attean::SimpleQueryEvaluator::ExpressionEvaluator 0.031 {
754789
my $term = $operands[0]->$pos();
755790
return $term;
756791
} elsif ($func =~ /^([UI]RI)$/) {
792+
my $operand = $operands[0];
793+
if ($operand->does('Attean::API::Literal')) {
794+
if ($operand->datatype->value ne 'http://www.w3.org/2001/XMLSchema#string') {
795+
die "TypeError: ${func} called with a datatyped-literal other than xsd:string";
796+
}
797+
}
757798
my @base = $expr->has_base ? (base => $expr->base) : ();
758799
return $type_classes{$1}->new(value => $operands[0]->value, @base);
759800
} elsif ($func eq 'BNODE') {
Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
use v5.14;
2+
use warnings;
3+
use utf8;
4+
5+
=head1 NAME
6+
7+
AtteanX::Functions::CompositeLists - Functions and aggregates to work with composite lists
8+
9+
=head1 VERSION
10+
11+
This document describes AtteanX::Functions::CompositeLists version 0.031
12+
13+
=head1 SYNOPSIS
14+
15+
use v5.14;
16+
use Attean;
17+
18+
=head1 DESCRIPTION
19+
20+
This is a utility package that defines functions and aggregates to work with
21+
composite list datatypes.
22+
23+
=over 4
24+
25+
=cut
26+
27+
package AtteanX::Functions::CompositeLists 0.031 {
28+
29+
use Attean;
30+
use Attean::RDF;
31+
32+
our $LIST_TYPE_IRI = 'http://example.org/List';
33+
34+
=item C<< stringSplit($string, $pattern) >>
35+
36+
=cut
37+
sub stringSplit {
38+
my $string = shift;
39+
my $pattern = shift;
40+
my @parts = map { literal($_) } split(quotemeta($pattern->value), $string->value);
41+
return list_to_lex(@parts);
42+
}
43+
44+
=item C<< listGet($list, $pos) >>
45+
46+
=cut
47+
sub listGet {
48+
my $l = shift;
49+
my $pos = shift;
50+
die 'TypeError' unless ($l->does('Attean::API::Literal'));
51+
my $dt = $l->datatype;
52+
die 'TypeError' unless ($dt->value eq $LIST_TYPE_IRI);
53+
my $lex = $l->value;
54+
substr($lex, 0, 1, '');
55+
substr($lex, -1, 1, '');
56+
my $p = Attean->get_parser('SPARQL')->new();
57+
my @nodes = $p->parse_nodes($lex, commas => 1);
58+
my $i = int($pos->value);
59+
return $nodes[$i];
60+
}
61+
62+
=item C<< sequence($start, $end) >>
63+
64+
=cut
65+
sub sequence {
66+
my $start = 1;
67+
my $end = 1;
68+
if (scalar(@_) == 2) {
69+
$start = shift->numeric_value;
70+
$end = shift->numeric_value;
71+
} else {
72+
$end = shift->numeric_value;
73+
}
74+
my @terms = map { Attean::Literal->integer($_) } ($start .. $end);
75+
my $lex = '(' . join(',', map { $_->ntriples_string } @terms) . ')';
76+
return dtliteral($lex, $LIST_TYPE_IRI);
77+
}
78+
79+
=item C<< lex_to_list($literal) >>
80+
81+
=cut
82+
sub lex_to_list {
83+
my $l = shift;
84+
die 'TypeError' unless ($l->does('Attean::API::Literal'));
85+
my $dt = $l->datatype;
86+
die 'TypeError: not a datatype literal' unless ($dt);
87+
die 'TypeError: Expecting a List but found ' . $dt->value unless ($dt->value eq $LIST_TYPE_IRI);
88+
my $lex = $l->value;
89+
substr($lex, 0, 1, '');
90+
substr($lex, -1, 1, '');
91+
my $p = Attean->get_parser('SPARQL')->new();
92+
my @nodes = $p->parse_nodes($lex, commas => 1);
93+
return @nodes;
94+
}
95+
96+
=item C<< list_to_lex(@terms) >>
97+
98+
=cut
99+
sub list_to_lex {
100+
my @terms = @_;
101+
my $lex = '(' . join(',', map { $_->ntriples_string } @terms) . ')';
102+
return dtliteral($lex, $LIST_TYPE_IRI);
103+
}
104+
105+
=item C<< zip($list, $list) >>
106+
107+
=cut
108+
sub zip {
109+
my $lhs = shift;
110+
my $rhs = shift;
111+
my @lhs_nodes = lex_to_list($lhs);
112+
my @rhs_nodes = lex_to_list($rhs);
113+
die 'zip operands are not the same length' unless (scalar(@lhs_nodes) == scalar(@rhs_nodes));
114+
115+
my @elements;
116+
while (scalar(@lhs_nodes)) {
117+
my @list = (shift(@lhs_nodes), shift(@rhs_nodes));
118+
my $l = list_to_lex(@list);
119+
push(@elements, $l);
120+
}
121+
return list_to_lex(@elements);
122+
}
123+
124+
=item C<< listCreate_agg_start() >>
125+
126+
=cut
127+
sub listCreate_agg_start {
128+
return {
129+
values => []
130+
};
131+
}
132+
133+
=item C<< listCreate_agg_process($thunk, $term) >>
134+
135+
=cut
136+
sub listCreate_agg_process {
137+
my $thunk = shift;
138+
my ($term) = @_;
139+
push(@{ $thunk->{'values' }}, $term);
140+
}
141+
142+
=item C<< listCreate_agg_finalize($thunk) >>
143+
144+
=cut
145+
sub listCreate_agg_finalize {
146+
my $thunk = shift;
147+
my @terms = @{ $thunk->{'values' }};
148+
my $lex = '(' . join(',', map { $_->ntriples_string } @terms) . ')';
149+
return dtliteral($lex, $LIST_TYPE_IRI);
150+
}
151+
152+
=item C<< register() >>
153+
154+
=cut
155+
sub register {
156+
Attean->register_global_function(
157+
'http://example.org/listGet' => \&listGet,
158+
'http://example.org/listCreate' => \&list_to_lex,
159+
'http://example.org/sequence' => \&sequence,
160+
'http://example.org/zip' => \&zip,
161+
'http://example.org/split' => \&stringSplit,
162+
);
163+
164+
Attean->register_global_aggregate(
165+
'http://example.org/listAgg' => {
166+
start => \&listCreate_agg_start,
167+
process => \&listCreate_agg_process,
168+
finalize => \&listCreate_agg_finalize,
169+
},
170+
);
171+
}
172+
}
173+
174+
1;
175+
176+
__END__
177+
178+
=back
179+
180+
=head1 BUGS
181+
182+
Please report any bugs or feature requests to through the GitHub web interface
183+
at L<https://github.com/kasei/attean/issues>.
184+
185+
=head1 SEE ALSO
186+
187+
188+
189+
=head1 AUTHOR
190+
191+
Gregory Todd Williams C<< <gwilliams@cpan.org> >>
192+
193+
=head1 COPYRIGHT
194+
195+
Copyright (c) 2014--2022 Gregory Todd Williams.
196+
This program is free software; you can redistribute it and/or modify it under
197+
the same terms as Perl itself.
198+
199+
=cut

0 commit comments

Comments
 (0)