Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1372,19 +1372,30 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC
val pattern = children.head.genCode(ctx)

val argListGen = children.tail.map(x => (x.dataType, x.genCode(ctx)))
val argListCode = argListGen.map(_._2.code + "\n")

val argListString = argListGen.foldLeft("")((s, v) => {
val nullSafeString =
val argList = ctx.freshName("argLists")
val numArgLists = argListGen.length
val argListCode = argListGen.zipWithIndex.map { case(v, index) =>
val value =
if (ctx.boxedType(v._1) != ctx.javaType(v._1)) {
// Java primitives get boxed in order to allow null values.
s"(${v._2.isNull}) ? (${ctx.boxedType(v._1)}) null : " +
s"new ${ctx.boxedType(v._1)}(${v._2.value})"
} else {
s"(${v._2.isNull}) ? null : ${v._2.value}"
}
s + "," + nullSafeString
})
s"""
${v._2.code}
$argList[$index] = $value;
"""
}
val argListCodes = if (ctx.INPUT_ROW != null && ctx.currentVars == null) {
ctx.splitExpressions(
expressions = argListCode,
funcName = "valueFormatString",
arguments = ("InternalRow", ctx.INPUT_ROW) :: ("Object[]", argList) :: Nil)
} else {
argListCode.mkString("\n")
}

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you create a splitExpressions in CodegenContext for avoiding the duplicate codes, like

    if (ctx.INPUT_ROW != null && ctx.currentVars == null) {
      ctx.splitExpressions(...)
    } else {
      inputs.mkString("\n")
    }

@kiszk kiszk Nov 26, 2017

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, is it better to do this in this PR? Or, should I create another PR?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted to submit a PR for that, but I was waiting for all the PRs related (this one should be the last one) to be merged. Let me know if I can do that or you are doing it. My suggestion would be: can't we just change the existing method to include this check?

@kiszk kiszk Nov 26, 2017

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for your comment. After writing the above comment, I thought it would be good to create a separate PR.
Thus, I have just submitted a PR #19821 as WIP before I read your comment. That PR should wait for merging this PR and then I will do rebase for #19821.


val form = ctx.freshName("formatter")
val formatter = classOf[java.util.Formatter].getName
Expand All @@ -1395,10 +1406,11 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC
boolean ${ev.isNull} = ${pattern.isNull};
${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
if (!${ev.isNull}) {
${argListCode.mkString}
$stringBuffer $sb = new $stringBuffer();
$formatter $form = new $formatter($sb, ${classOf[Locale].getName}.US);
$form.format(${pattern.value}.toString() $argListString);
Object[] $argList = new Object[$numArgLists];
$argListCodes
$form.format(${pattern.value}.toString(), $argList);
${ev.value} = UTF8String.fromString($sb.toString());
}""")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,14 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
FormatString(Literal("aa%d%s"), 12, Literal.create(null, StringType)), "aa12null")
}

test("SPARK-22603: FormatString should not generate codes beyond 64KB") {
val N = 4500
val args = (1 to N).map(i => Literal.create(i.toString, StringType))
val format = "%s" * N
val expected = (1 to N).map(i => i.toString).mkString
checkEvaluation(FormatString(Literal(format) +: args: _*), expected)
}

test("INSTR") {
val s1 = 'a.string.at(0)
val s2 = 'b.string.at(1)
Expand Down