Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support of Apache spark 2.4.2: avro built-in support and scala 2.12 #189

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion build.sbt
Expand Up @@ -20,7 +20,7 @@ import java.io.File
import java.nio.file.Files
import java.nio.file.StandardCopyOption.REPLACE_EXISTING

scalaVersion in ThisBuild := "2.11.8"
scalaVersion in ThisBuild := "2.12.8"

/*
**********************************************************************************
Expand Down
6 changes: 3 additions & 3 deletions project/Dependencies.scala
Expand Up @@ -19,7 +19,7 @@ import sbt._

object Dependencies {
// Versions
lazy val sparkVersion = "2.3.0"
lazy val sparkVersion = "2.4.2"
lazy val scalacheckVersion = "1.13.5"
lazy val junitVersion = "4.12"
lazy val scalatestVersion = "3.0.5"
Expand All @@ -29,7 +29,7 @@ object Dependencies {
"org.apache.spark" %% "spark-core" % sparkVersion % "provided",
"org.apache.spark" %% "spark-mllib" % sparkVersion % "provided",
"org.apache.spark" %% "spark-sql" % sparkVersion % "provided",
"com.databricks" %% "spark-avro" % "4.0.0"
"org.apache.spark" %% "spark-avro" % sparkVersion % "provided"
)

val breezeDeps = Seq(
Expand All @@ -55,7 +55,7 @@ object Dependencies {
"org.scalactic" %% "scalactic" % scalatestVersion % "test",
"org.scalatest" %% "scalatest" % scalatestVersion % "test",
"org.apache.spark" %% "spark-hive" % sparkVersion % "test",
"com.holdenkarau" %% "spark-testing-base" % "2.2.0_0.8.0" % "test" excludeAll(
"com.holdenkarau" %% "spark-testing-base" % "2.4.2_0.12.0" % "test" excludeAll(
ExclusionRule(organization = "org.scalacheck"),
ExclusionRule(organization = "org.scalactic"),
ExclusionRule(organization = "org.scalatest"),
Expand Down
Expand Up @@ -20,7 +20,7 @@ package com.ibm.sparktc.sparkbench.utils
import org.apache.hadoop.fs.Path
import org.apache.spark.sql.functions.lit
import org.apache.spark.sql.{DataFrame, SparkSession}
import com.databricks.spark.avro._
import org.apache.spark.sql.avro._

object SparkFuncs {

Expand Down Expand Up @@ -103,7 +103,7 @@ object SparkFuncs {
case Formats.parquet => data.write.mode(saveMode).parquet(outputDir)
case Formats.csv => data.write.mode(saveMode).option("header", "true").csv(outputDir)
case Formats.orc => data.write.mode(saveMode).orc(outputDir)
case Formats.avro => data.write.mode(saveMode).avro(outputDir)
case Formats.avro => data.write.mode(saveMode).format("avro").save(outputDir)
case Formats.json => data.write.mode(saveMode).json(outputDir)
case Formats.console => data.show()
case _ => throw new Exception(s"Unrecognized or unspecified save format: $format. " +
Expand All @@ -123,7 +123,7 @@ object SparkFuncs {
inputFormat match {
case Formats.parquet => spark.read.parquet(inputDir)
case Formats.orc => spark.read.orc(inputDir)
case Formats.avro => spark.read.avro(inputDir)
case Formats.avro => spark.read.format("avro").load(inputDir)
case Formats.json => spark.read.json(inputDir)
case Formats.csv | _ => spark.read.option("inferSchema", "true").option("header", "true").csv(inputDir) //if unspecified, assume csv
}
Expand Down
2 changes: 1 addition & 1 deletion version.sbt
Expand Up @@ -16,4 +16,4 @@
*/
// assign version to all projects
// Spark version 2.1.1, spark-bench version 0.2.0
version in ThisBuild := "2.3.0_0.4.0-RELEASE"
version in ThisBuild := "2.4.2_0.4.0-RELEASE"