Skip to content

Commit

Permalink
[SPARK-48296][SQL] Codegen Support for to_xml
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
The PR adds `Codegen Support` for `to_xml`.

### Why are the changes needed?
Improve codegen coverage.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
- Add new UT & existed UT.
- Pass GA.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #46591 from panbingkun/minor_to_xml.

Lead-authored-by: panbingkun <panbingkun@baidu.com>
Co-authored-by: panbingkun <pbk1982@gmail.com>
Signed-off-by: Kent Yao <yao@apache.org>
  • Loading branch information
2 people authored and yaooqinn committed May 16, 2024
1 parent 3bd845e commit fa83d0f
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import java.io.CharArrayWriter
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodegenFallback, ExprCode}
import org.apache.spark.sql.catalyst.util.{ArrayData, DropMalformedMode, FailFastMode, FailureSafeParser, GenericArrayData, PermissiveMode}
import org.apache.spark.sql.catalyst.util.TypeUtils._
import org.apache.spark.sql.catalyst.xml.{StaxXmlGenerator, StaxXmlParser, ValidatorUtil, XmlInferSchema, XmlOptions}
Expand Down Expand Up @@ -186,9 +186,6 @@ case class SchemaOfXml(
@transient
private lazy val xmlOptions = new XmlOptions(options, "UTC")

@transient
private lazy val xmlFactory = xmlOptions.buildXmlFactory()

@transient
private lazy val xmlInferSchema = {
if (xmlOptions.parseMode == DropMalformedMode) {
Expand Down Expand Up @@ -266,7 +263,6 @@ case class StructsToXml(
timeZoneId: Option[String] = None)
extends UnaryExpression
with TimeZoneAwareExpression
with CodegenFallback
with ExpectsInputTypes
with NullIntolerant {
override def nullable: Boolean = true
Expand Down Expand Up @@ -328,6 +324,11 @@ case class StructsToXml(

override def nullSafeEval(value: Any): Any = converter(value)

override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val expr = ctx.addReferenceObj("this", this)
defineCodeGen(ctx, ev, input => s"(UTF8String) $expr.nullSafeEval($input)")
}

override def inputTypes: Seq[AbstractDataType] = StructType :: Nil

override def prettyName: String = "to_xml"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import java.util.Locale

import scala.jdk.CollectionConverters._

import org.apache.spark.sql.execution.WholeStageCodegenExec
import org.apache.spark.sql.functions._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SharedSparkSession
Expand Down Expand Up @@ -55,7 +56,7 @@ class XmlFunctionsSuite extends QueryTest with SharedSparkSession {
val options = Map("rowTag" -> "foo").asJava

checkAnswer(
df.select(from_xml($"value", schema)),
df.select(from_xml($"value", schema, options)),
Row(Row(1)) :: Nil)
}

Expand Down Expand Up @@ -383,6 +384,22 @@ class XmlFunctionsSuite extends QueryTest with SharedSparkSession {
}
}

test("SPARK-48296: to_xml - Codegen Support") {
withTempView("StructsToXmlTable") {
val schema = StructType(StructField("a", IntegerType, nullable = false) :: Nil)
val dataDF = spark.createDataFrame(Seq(Row(1)).asJava, schema).withColumn("a", struct($"a"))
dataDF.createOrReplaceTempView("StructsToXmlTable")
val df = sql("SELECT to_xml(a) FROM StructsToXmlTable")
val plan = df.queryExecution.executedPlan
assert(plan.isInstanceOf[WholeStageCodegenExec])
val expected =
s"""|<ROW>
| <a>1</a>
|</ROW>""".stripMargin
checkAnswer(df, Seq(Row(expected)))
}
}

test("corrupt record column in the middle") {
val schema = new StructType()
.add("a", IntegerType)
Expand Down

0 comments on commit fa83d0f

Please sign in to comment.