Skip to content

Commit

Permalink
[GH-5725] Expose UUID for MOJO2
Browse files Browse the repository at this point in the history
  • Loading branch information
krasinski committed Apr 17, 2024
1 parent 63d4f86 commit 22be47e
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ class H2OMOJOPipelineModelTestSuite extends FunSuite with SparkTestContext with
}
}

test("get UUID") {
prostateMojoPipeline.getUuid() shouldEqual "b448504b-1877-435f-9b09-d911a3388bd7"
}

test("Testing dataset is missing one of the feature columns") {
val schema = prostateTestData.drop("AGE").schema
val rdd = sc.parallelize(Seq(Row("1", "0", "1", "2", "1", "1.4", "0", "6")))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,6 @@ def getWithPredictionInterval(self):

def getScoringBulkSize(self):
return self._java_obj.getScoringBulkSize()

def getUuid(self):
return self._java_obj.getUuid()
Original file line number Diff line number Diff line change
Expand Up @@ -164,3 +164,4 @@ def testMojoPipelinePredictionInterval(spark):
assert predictionDF.select("secret_Pressure3pm").distinct().count() == expectedCount
assert predictionDF.select("`secret_Pressure3pm.lower`").distinct().count() == expectedCount
assert predictionDF.select("`secret_Pressure3pm.upper`").distinct().count() == expectedCount
assert mojo.getUuid() == "test_regression_accuracy3_e5169_d71b"
3 changes: 3 additions & 0 deletions r/src/R/ai/h2o/sparkling/ml/models/H2OMOJOPipelineModel.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,8 @@ H2OMOJOPipelineModel <- setRefClass("H2OMOJOPipelineModel", contains = ("H2OAlgo
},
getScoringBulkSize = function() {
invoke(.self$jmojo, "getScoringBulkSize")
},
getUuid = function() {
invoke(.self$jmojo, "getUuid")
}
))
1 change: 1 addition & 0 deletions r/src/tests/testthat/testMojoPipeline.R
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ test_that("test MOJO predicition intervals", {

flattenedContributions <- tidyr::unnest_wider(data = mojoOutput, col = "prediction")
expect_equal(length(colnames(flattenedContributions)), length(colnames(dataset)) + 3)
expect_equal(mojo$getUuid(), "test_regression_accuracy3_e5169_d71b")
})

spark_disconnect(sc)
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import ai.h2o.mojos.runtime.MojoPipeline
import ai.h2o.mojos.runtime.api.{MojoPipelineService, PipelineConfig}
import ai.h2o.mojos.runtime.frame.MojoColumn.Type
import ai.h2o.mojos.runtime.frame.MojoFrame
import ai.h2o.sparkling.ml.params.{H2OAlgorithmMOJOParams, H2OBaseMOJOParams, HasFeatureTypesOnMOJO}
import ai.h2o.sparkling.ml.params.{H2OAlgorithmMOJOParams, H2OBaseMOJOParams, HasFeatureTypesOnMOJO, ParameterConstructorMethods}
import ai.h2o.sparkling.sql.catalyst.encoders.RowEncoder
import com.google.common.collect.Iterators
import org.apache.spark.annotation.DeveloperApi
Expand All @@ -40,10 +40,13 @@ class H2OMOJOPipelineModel(override val uid: String)
with H2OMOJOWritable
with H2OAlgorithmMOJOParams
with H2OBaseMOJOParams
with ParameterConstructorMethods
with HasFeatureTypesOnMOJO {

H2OMOJOPipelineCache.startCleanupThread()

protected final val uuid = stringParam(name = "UUID", doc = "MOJO2 UUID")

// private parameter used to store MOJO output columns
protected final val outputSubCols: StringArrayParam =
new StringArrayParam(this, "outputSubCols", "Names of sub-columns under the output column")
Expand Down Expand Up @@ -89,6 +92,8 @@ class H2OMOJOPipelineModel(override val uid: String)
"scoringBulkSize",
"A number of records passed at once to the underlying mojo2 runtime library. Supported only by DriverlessAI MOJO models.")

def getUuid(): String = $ { uuid }

def getOutputSubCols(): Array[String] = $ { outputSubCols }

def getContributionsCol(): String = "contributions"
Expand Down Expand Up @@ -341,6 +346,7 @@ object H2OMOJOPipelineModel extends H2OMOJOReadable[H2OMOJOPipelineModel] with H
val featureTypesMap = featureCols.zip(featureTypeNames).toMap
val outputCols = pipelineMojo.getOutputMeta.getColumns.asScala
model.set(model.featureTypes, featureTypesMap)
model.set(model.uuid, pipelineMojo.getUuid)
model.set(model.outputSubCols, outputCols.map(_.getColumnName).toArray)
model.set(model.outputSubTypes, outputCols.map(_.getColumnType.toString).toArray)
}
Expand Down

0 comments on commit 22be47e

Please sign in to comment.