-
Notifications
You must be signed in to change notification settings - Fork 71
Support for distinct aggregations #161
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -109,25 +109,47 @@ object OpaqueOperators extends Strategy { | |
| if (isEncrypted(child) && aggExpressions.forall(expr => expr.isInstanceOf[AggregateExpression])) => | ||
|
|
||
| val aggregateExpressions = aggExpressions.map(expr => expr.asInstanceOf[AggregateExpression]) | ||
|
|
||
| if (groupingExpressions.size == 0) { | ||
| // Global aggregation | ||
| val partialAggregate = EncryptedAggregateExec(groupingExpressions, aggregateExpressions, Partial, planLater(child)) | ||
| val partialOutput = partialAggregate.output | ||
| val (projSchema, tag) = tagForGlobalAggregate(partialOutput) | ||
|
|
||
| EncryptedProjectExec(resultExpressions, | ||
| EncryptedAggregateExec(groupingExpressions, aggregateExpressions, Final, | ||
| EncryptedProjectExec(partialOutput, | ||
| EncryptedSortExec(Seq(SortOrder(tag, Ascending)), true, | ||
| EncryptedProjectExec(projSchema, partialAggregate))))) :: Nil | ||
| } else { | ||
| // Grouping aggregation | ||
| EncryptedProjectExec(resultExpressions, | ||
| EncryptedAggregateExec(groupingExpressions, aggregateExpressions, Final, | ||
| EncryptedSortExec(groupingExpressions.map(_.toAttribute).map(e => SortOrder(e, Ascending)), true, | ||
| EncryptedAggregateExec(groupingExpressions, aggregateExpressions, Partial, | ||
| EncryptedSortExec(groupingExpressions.map(e => SortOrder(e, Ascending)), false, planLater(child)))))) :: Nil | ||
| val (functionsWithDistinct, functionsWithoutDistinct) = aggregateExpressions.partition(_.isDistinct) | ||
|
|
||
| functionsWithDistinct.size match { | ||
| case size if size == 0 => // No distinct aggregate operations | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this |
||
| if (groupingExpressions.size == 0) { | ||
| // Global aggregation | ||
| val partialAggregate = EncryptedAggregateExec(groupingExpressions, aggregateExpressions, Partial, planLater(child)) | ||
| val partialOutput = partialAggregate.output | ||
| val (projSchema, tag) = tagForGlobalAggregate(partialOutput) | ||
|
|
||
| EncryptedProjectExec(resultExpressions, | ||
| EncryptedAggregateExec(groupingExpressions, aggregateExpressions, Final, | ||
| EncryptedProjectExec(partialOutput, | ||
| EncryptedSortExec(Seq(SortOrder(tag, Ascending)), true, | ||
| EncryptedProjectExec(projSchema, partialAggregate))))) :: Nil | ||
| } else { | ||
| // Grouping aggregation | ||
| EncryptedProjectExec(resultExpressions, | ||
| EncryptedAggregateExec(groupingExpressions, aggregateExpressions, Final, | ||
| EncryptedSortExec(groupingExpressions.map(_.toAttribute).map(e => SortOrder(e, Ascending)), true, | ||
| EncryptedAggregateExec(groupingExpressions, aggregateExpressions, Partial, | ||
| EncryptedSortExec(groupingExpressions.map(e => SortOrder(e, Ascending)), false, planLater(child)))))) :: Nil | ||
| } | ||
| case size if size == 1 => // One distinct aggregate operation | ||
| if (groupingExpressions.size == 0) { | ||
| // Global aggregation | ||
| val partialAggregate = EncryptedAggregateExec(groupingExpressions, aggregateExpressions, Partial, planLater(child)) | ||
| val partialOutput = partialAggregate.output | ||
| val (projSchema, tag) = tagForGlobalAggregate(partialOutput) | ||
|
|
||
| EncryptedProjectExec(resultExpressions, | ||
| EncryptedAggregateExec(groupingExpressions, aggregateExpressions, Final, | ||
| EncryptedProjectExec(partialOutput, | ||
| EncryptedSortExec(Seq(SortOrder(tag, Ascending)), true, | ||
| EncryptedProjectExec(projSchema, partialAggregate))))) :: Nil | ||
| } else { | ||
| // Grouping aggregation | ||
| EncryptedProjectExec(resultExpressions, | ||
| EncryptedAggregateExec(groupingExpressions, aggregateExpressions, Complete, | ||
| EncryptedSortExec(groupingExpressions.map(e => SortOrder(e, Ascending)), true, planLater(child)))) :: Nil | ||
| } | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you see what happens when there are multiple distincts? We should catch it here or somewhere else and say that we do not support it.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Getting |
||
| } | ||
|
|
||
| case p @ Union(Seq(left, right)) if isEncrypted(p) => | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -377,6 +377,13 @@ trait OpaqueOperatorTests extends OpaqueTestsBase { self => | |
| .collect.sortBy { case Row(category: String, _) => category } | ||
| } | ||
|
|
||
| testAgainstSpark("aggregate count - distinct") { securityLevel => | ||
| val data = (0 until 32).map{ i => (abc(i), i % 8)}.toSeq | ||
| val words = makeDF(data, securityLevel, "category", "price") | ||
| words.groupBy("category").agg(countDistinct("price").as("distinctPrices")) | ||
| .collect.sortBy { case Row(category: String, _) => category } | ||
| } | ||
|
|
||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add another test for global distinct aggregation, as well as tests for when the number of distinct items is 0? |
||
| testAgainstSpark("aggregate first") { securityLevel => | ||
| val data = for (i <- 0 until 256) yield (i, abc(i), 1) | ||
| val words = makeDF(data, securityLevel, "id", "category", "price") | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why was this comment changed? I don't think the new meaning is equivalent to what the code says?