Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SUPPORT] Hudi COW Encryptions #11257

Open
soumilshah1995 opened this issue May 19, 2024 · 0 comments
Open

[SUPPORT] Hudi COW Encryptions #11257

soumilshah1995 opened this issue May 19, 2024 · 0 comments
Labels
feature-enquiry issue contains feature enquiries/requests or great improvement ideas

Comments

@soumilshah1995
Copy link

Sample Code

try:
    import os
    import sys
    import uuid
    import pyspark
    import datetime
    from pyspark.sql import SparkSession
    from pyspark import SparkConf, SparkContext
    from faker import Faker
    import datetime
    from datetime import datetime
    import random 
    import pandas as pd  # Import Pandas library for pretty printing

    print("Imports loaded ")

except Exception as e:
    print("error", e)

HUDI_VERSION = '1.0.0-beta1'
SPARK_VERSION = '3.4'

os.environ["JAVA_HOME"] = "/opt/homebrew/opt/openjdk@11"
SUBMIT_ARGS = f"--packages org.apache.hudi:hudi-spark{SPARK_VERSION}-bundle_2.12:{HUDI_VERSION} pyspark-shell"
os.environ["PYSPARK_SUBMIT_ARGS"] = SUBMIT_ARGS
os.environ['PYSPARK_PYTHON'] = sys.executable

# Spark session
spark = SparkSession.builder \
    .config('spark.serializer', 'org.apache.spark.serializer.KryoSerializer') \
    .config('spark.sql.extensions', 'org.apache.spark.sql.hudi.HoodieSparkSessionExtension') \
    .config('className', 'org.apache.hudi') \
    .config('spark.sql.hive.convertMetastoreParquet', 'false') \
    .getOrCreate()

spark._jsc.hadoopConfiguration().set("parquet.crypto.factory.class", "org.apache.parquet.crypto.keytools.PropertiesDrivenCryptoFactory")
spark._jsc.hadoopConfiguration().set("parquet.encryption.kms.client.class" , "org.apache.parquet.crypto.keytools.mocks.InMemoryKMS")
spark._jsc.hadoopConfiguration().set("parquet.encryption.footer.key", "k1")
spark._jsc.hadoopConfiguration().set("parquet.encryption.column.keys", "k2:customer_id")


global faker
faker = Faker()


def get_customer_data(total_customers=2):
    customers_array = []
    for i in range(0, total_customers):
        customer_data = {
            "customer_id": str(uuid.uuid4()),
            "name": faker.name(),
            "state": faker.state(),
            "city": faker.city(),
            "email": faker.email(),
            "created_at": datetime.now().isoformat().__str__(),
            "adqdress": faker.address(),
           "salary": faker.random_int(min=30000, max=100000) 
        }
        customers_array.append(customer_data)
    return customers_array

global total_customers, order_data_sample_size
total_customers = 10000
customer_data = get_customer_data(total_customers=total_customers)

spark_df_customers = spark.createDataFrame(data=[tuple(i.values()) for i in customer_data],
                                           schema=list(customer_data[0].keys()))
spark_df_customers.show(1, truncate=False)
spark_df_customers.printSchema()



def write_to_hudi(spark_df, 
                  table_name, 
                  db_name, 
                  method='upsert',
                  table_type='COPY_ON_WRITE',
                  recordkey='',
                  precombine='',
                  partition_fields='',
                  index_type='BLOOM'
                 ):

    path = f"file:///Users/soumilshah/IdeaProjects/SparkProject/tem/database={db_name}/table_name{table_name}"

    hudi_options = {
        'hoodie.table.name': table_name,
        'hoodie.datasource.write.table.type': table_type,
        'hoodie.datasource.write.table.name': table_name,
        'hoodie.datasource.write.operation': method,
        'hoodie.datasource.write.recordkey.field': recordkey,
        'hoodie.datasource.write.precombine.field': precombine,
        "hoodie.datasource.write.partitionpath.field": partition_fields,
         "hoodie.index.type": index_type,
    }

    if index_type == 'RECORD_INDEX':
        hudi_options.update({
            "hoodie.enable.data.skipping": "true",
            "hoodie.metadata.enable": "true",
            "hoodie.metadata.index.column.stats.enable": "true",
            "hoodie.write.concurrency.mode": "optimistic_concurrency_control",
            "hoodie.write.lock.provider": "org.apache.hudi.client.transaction.lock.InProcessLockProvider",
            "hoodie.metadata.record.index.enable": "true"
        })
        

    print("\n")
    print(path)
    print("\n")
    
    spark_df.write.format("hudi"). \
        options(**hudi_options). \
        mode("append"). \
        save(path)


write_to_hudi(
    spark_df=spark_df_customers,
    db_name="default",
    table_name="customers",
    recordkey="customer_id",
    precombine="created_at",
    partition_fields="state",
    index_type="BLOOM"
)

Error

24/05/19 11:01:48 ERROR SimpleExecutor: Failed consuming records
org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not instantiate KmsClient class: null
	at org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:350)
	at org.apache.parquet.crypto.keytools.KeyToolkit.lambda$getKmsClient$0(KeyToolkit.java:330)
	at java.base/java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1705)
	at org.apache.parquet.crypto.keytools.KeyToolkit.getKmsClient(KeyToolkit.java:329)
	at org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:80)
	at org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:109)
	at org.apache.parquet.crypto.keytools.PropertiesDrivenCryptoFactory.getFileEncryptionProperties(PropertiesDrivenCryptoFactory.java:134)
	at org.apache.parquet.hadoop.ParquetOutputFormat.createEncryptionProperties(ParquetOutputFormat.java:552)
	at org.apache.parquet.hadoop.ParquetWriter.<init>(ParquetWriter.java:285)
	at org.apache.parquet.hadoop.ParquetWriter$Builder.build(ParquetWriter.java:680)
	at org.apache.hudi.io.storage.HoodieBaseParquetWriter.<init>(HoodieBaseParquetWriter.java:80)
	at org.apache.hudi.io.storage.HoodieAvroParquetWriter.<init>(HoodieAvroParquetWriter.java:54)
	at org.apache.hudi.io.storage.HoodieAvroFileWriterFactory.newParquetFileWriter(HoodieAvroFileWriterFactory.java:69)
	at org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriterByFormat(HoodieFileWriterFactory.java:80)
	at org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriter(HoodieFileWriterFactory.java:67)
	at org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:104)
	at org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:76)
	at org.apache.hudi.io.CreateHandleFactory.create(CreateHandleFactory.java:45)
	at org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:101)
	at org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:44)
	at org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:69)
	at org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:78)
	at org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:37)
	at org.apache.hudi.client.utils.LazyIterableIterator.next(LazyIterableIterator.java:119)
	at scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:46)
	at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
	at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223)
	at org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:352)
	at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1552)
	at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1462)
	at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1526)
	at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1349)
	at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:375)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:326)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:328)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92)
	at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
	at org.apache.spark.scheduler.Task.run(Task.scala:139)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1529)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
	at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.parquet.hadoop.BadConfigurationException: could not instantiate class org.apache.parquet.crypto.keytools.mocks.InMemoryKMS set in job conf at parquet.encryption.kms.client.class
	at org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:40)
	at org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:342)
	... 46 more
Caused by: java.lang.ClassNotFoundException: Class org.apache.parquet.crypto.keytools.mocks.InMemoryKMS not found
	at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2592)
	at org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:33)
	... 47 more
24/05/19 11:01:48 ERROR SimpleExecutor: Failed consuming records
org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not instantiate KmsClient class: null
	at org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:350)
	at org.apache.parquet.crypto.keytools.KeyToolkit.lambda$getKmsClient$0(KeyToolkit.java:330)
	at java.base/java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1705)
	at org.apache.parquet.crypto.keytools.KeyToolkit.getKmsClient(KeyToolkit.java:329)
	at org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:80)
	at org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:109)
	at org.apache.parquet.crypto.keytools.PropertiesDrivenCryptoFactory.getFileEncryptionProperties(PropertiesDrivenCryptoFactory.java:134)
	at org.apache.parquet.hadoop.ParquetOutputFormat.createEncryptionProperties(ParquetOutputFormat.java:552)
	at org.apache.parquet.hadoop.ParquetWriter.<init>(ParquetWriter.java:285)
	at org.apache.parquet.hadoop.ParquetWriter$Builder.build(ParquetWriter.java:680)
	at org.apache.hudi.io.storage.HoodieBaseParquetWriter.<init>(HoodieBaseParquetWriter.java:80)
	at org.apache.hudi.io.storage.HoodieAvroParquetWriter.<init>(HoodieAvroParquetWriter.java:54)
	at org.apache.hudi.io.storage.HoodieAvroFileWriterFactory.newParquetFileWriter(HoodieAvroFileWriterFactory.java:69)
	at org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriterByFormat(HoodieFileWriterFactory.java:80)
	at org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriter(HoodieFileWriterFactory.java:67)
	at org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:104)
	at org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:76)
	at org.apache.hudi.io.CreateHandleFactory.create(CreateHandleFactory.java:45)
	at org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:101)
	at org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:44)
	at org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:69)
	at org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:78)
	at org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:37)
	at org.apache.hudi.client.utils.LazyIterableIterator.next(LazyIterableIterator.java:119)
	at scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:46)
	at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
	at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223)
	at org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:352)
	at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1552)
	at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1462)
	at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1526)
	at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1349)
	at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:375)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:326)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:328)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92)
	at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
	at org.apache.spark.scheduler.Task.run(Task.scala:139)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1529)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
	at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.parquet.hadoop.BadConfigurationException: could not instantiate class org.apache.parquet.crypto.keytools.mocks.InMemoryKMS set in job conf at parquet.encryption.kms.client.class
	at org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:40)
	at org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:342)
	... 46 more
Caused by: java.lang.ClassNotFoundException: Class org.apache.parquet.crypto.keytools.mocks.InMemoryKMS not found
	at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2592)
	at org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:33)
	... 47 more
24/05/19 11:01:48 WARN BlockManager: Putting block rdd_153_10 failed due to exception java.lang.RuntimeException: org.apache.hudi.exception.HoodieException: org.apache.hudi.exception.HoodieException: org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not instantiate KmsClient class: null.
24/05/19 11:01:48 WARN BlockManager: Putting block rdd_153_7 failed due to exception java.lang.RuntimeException: org.apache.hudi.exception.HoodieException: org.apache.hudi.exception.HoodieException: org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not instantiate KmsClient class: null.
24/05/19 11:01:48 WARN BlockManager: Block rdd_153_10 could not be removed as it was not found on disk or in memory
24/05/19 11:01:48 WARN BlockManager: Block rdd_153_7 could not be removed as it was not found on disk or in memory
24/05/19 11:01:48 ERROR SimpleExecutor: Failed consuming records
org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not instantiate KmsClient class: null
	at org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:350)
	at org.apache.parquet.crypto.keytools.KeyToolkit.lambda$getKmsClient$0(KeyToolkit.java:330)
	at java.base/java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1705)
	at org.apache.parquet.crypto.keytools.KeyToolkit.getKmsClient(KeyToolkit.java:329)
	at org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:80)
	at org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:109)
	at org.apache.parquet.crypto.keytools.PropertiesDrivenCryptoFactory.getFileEncryptionProperties(PropertiesDrivenCryptoFactory.java:134)
	at org.apache.parquet.hadoop.ParquetOutputFormat.createEncryptionProperties(ParquetOutputFormat.java:552)
	at org.apache.parquet.hadoop.ParquetWriter.<init>(ParquetWriter.java:285)
	at org.apache.parquet.hadoop.ParquetWriter$Builder.build(ParquetWriter.java:680)
	at org.apache.hudi.io.storage.HoodieBaseParquetWriter.<init>(HoodieBaseParquetWriter.java:80)
	at org.apache.hudi.io.storage.HoodieAvroParquetWriter.<init>(HoodieAvroParquetWriter.java:54)
	at org.apache.hudi.io.storage.HoodieAvroFileWriterFactory.newParquetFileWriter(HoodieAvroFileWriterFactory.java:69)
	at org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriterByFormat(HoodieFileWriterFactory.java:80)
	at org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriter(HoodieFileWriterFactory.java:67)
	at org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:104)
	at org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:76)
	at org.apache.hudi.io.CreateHandleFactory.create(CreateHandleFactory.java:45)
	at org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:101)
	at org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:44)
	at org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:69)
	at org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:78)
	at org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:37)
	at org.apache.hudi.client.utils.LazyIterableIterator.next(LazyIterableIterator.java:119)
	at scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:46)
	at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
	at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223)
	at org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:352)
	at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1552)
	at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1462)
	at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1526)
	at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1349)
	at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:375)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:326)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:328)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92)
	at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
	at org.apache.spark.scheduler.Task.run(Task.scala:139)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1529)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
	at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.parquet.hadoop.BadConfigurationException: could not instantiate class org.apache.parquet.crypto.keytools.mocks.InMemoryKMS set in job conf at parquet.encryption.kms.client.class
	at org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:40)
	at org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:342)
	... 46 more
Caused by: java.lang.ClassNotFoundException: Class org.apache.parquet.crypto.keytools.mocks.InMemoryKMS not found
	at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2592)
	at org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:33)
	... 47 more
24/05/19 11:01:48 WARN BlockManager: Putting block rdd_153_2 failed due to exception java.lang.RuntimeException: org.apache.hudi.exception.HoodieException: org.apache.hudi.exception.HoodieException: org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not instantiate KmsClient class: null.
24/05/19 11:01:48 WARN BlockManager: Block rdd_153_2 could not be removed as it was not found on disk or in memory
24/05/19 11:01:48 ERROR SimpleExecutor: Failed consuming records
org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not instantiate KmsClient class: null
	at org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:350)
	at org.apache.parquet.crypto.keytools.KeyToolkit.lambda$getKmsClient$0(KeyToolkit.java:330)
	at java.base/java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1705)
	at org.apache.parquet.crypto.keytools.KeyToolkit.getKmsClient(KeyToolkit.java:329)
	at org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:80)
	at org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:109)
	at org.apache.parquet.crypto.keytools.PropertiesDrivenCryptoFactory.getFileEncryptionProperties(PropertiesDrivenCryptoFactory.java:134)
	at org.apache.parquet.hadoop.ParquetOutputFormat.createEncryptionProperties(ParquetOutputFormat.java:552)
	at org.apache.parquet.hadoop.ParquetWriter.<init>(ParquetWriter.java:285)
	at org.apache.parquet.hadoop.ParquetWriter$Builder.build(ParquetWriter.java:680)
	at org.apache.hudi.io.storage.HoodieBaseParquetWriter.<init>(HoodieBaseParquetWriter.java:80)
	at org.apache.hudi.io.storage.HoodieAvroParquetWriter.<init>(HoodieAvroParquetWriter.java:54)
	at org.apache.hudi.io.storage.HoodieAvroFileWriterFactory.newParquetFileWriter(HoodieAvroFileWriterFactory.java:69)
	at org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriterByFormat(HoodieFileWriterFactory.java:80)
	at org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriter(HoodieFileWriterFactory.java:67)
	at org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:104)
	at org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:76)
	at org.apache.hudi.io.CreateHandleFactory.create(CreateHandleFactory.java:45)
	at org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:101)
	at org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:44)
	at org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:69)
	at org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:78)
	at org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:37)
	at org.apache.hudi.client.utils.LazyIterableIterator.next(LazyIterableIterator.java:119)
	at scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:46)
	at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
	at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223)
	at org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:352)
	at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1552)
	at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1462)
	at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1526)
	at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1349)
	at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:375)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:326)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:328)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92)
	at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
	at org.apache.spark.scheduler.Task.run(Task.scala:139)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1529)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
	at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.parquet.hadoop.BadConfigurationException: could not instantiate class org.apache.parquet.crypto.keytools.mocks.InMemoryKMS set in job conf at parquet.encryption.kms.client.class
	at org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:40)
	at org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:342)
	... 46 more
Caused by: java.lang.ClassNotFoundException: Class org.apache.parquet.crypto.keytools.mocks.InMemoryKMS not found
	at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2592)
	at org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:33)
	... 47 more
24/05/19 11:01:48 ERROR SimpleExecutor: Failed consuming records
org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not instantiate KmsClient class: null
	at org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:350)
	at org.apache.parquet.crypto.keytools.KeyToolkit.lambda$getKmsClient$0(KeyToolkit.java:330)
	at java.base/java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1705)
	at org.apache.parquet.crypto.keytools.KeyToolkit.getKmsClient(KeyToolkit.java:329)
	at org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:80)
	at org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:109)
	at org.apache.parquet.crypto.keytools.PropertiesDrivenCryptoFactory.getFileEncryptionProperties(PropertiesDrivenCryptoFactory.java:134)
	at org.apache.parquet.hadoop.ParquetOutputFormat.createEncryptionProperties(ParquetOutputFormat.java:552)
	at org.apache.parquet.hadoop.ParquetWriter.<init>(ParquetWriter.java:285)
	at org.apache.parquet.hadoop.ParquetWriter$Builder.build(ParquetWriter.java:680)
	at org.apache.hudi.io.storage.HoodieBaseParquetWriter.<init>(HoodieBaseParquetWriter.java:80)
	at org.apache.hudi.io.storage.HoodieAvroParquetWriter.<init>(HoodieAvroParquetWriter.java:54)
	at org.apache.hudi.io.storage.HoodieAvroFileWriterFactory.newParquetFileWriter(HoodieAvroFileWriterFactory.java:69)
	at org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriterByFormat(HoodieFileWriterFactory.java:80)
	at org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriter(HoodieFileWriterFactory.java:67)
	at org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:104)
	at org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:76)
	at org.apache.hudi.io.CreateHandleFactory.create(CreateHandleFactory.java:45)
	at org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:101)
	at org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:44)
	at org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:69)
	at org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:78)
	at org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:37)
	at org.apache.hudi.client.utils.LazyIterableIterator.next(LazyIterableIterator.java:119)
	at scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:46)
	at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
	at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223)
	at org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:352)
	at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1552)
	at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1462)
	at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1526)
	at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1349)
	at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:375)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:326)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:328)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92)
	at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
	at org.apache.spark.scheduler.Task.run(Task.scala:139)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1529)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
	at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.parquet.hadoop.BadConfigurationException: could not instantiate class org.apache.parquet.crypto.keytools.mocks.InMemoryKMS set in job conf at parquet.encryption.kms.client.class
	at org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:40)
	at org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:342)
	... 46 more
Caused by: java.lang.ClassNotFoundException: Class org.apache.parquet.crypto.keytools.mocks.InMemoryKMS not found
	at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2592)
	at org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:33)
	... 47 more
24/05/19 11:01:48 WARN BlockManager: Putting block rdd_153_1 failed due to exception java.lang.RuntimeException: org.apache.hudi.exception.HoodieException: org.apache.hudi.exception.HoodieException: org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not instantiate KmsClient class: null.
24/05/19 11:01:48 WARN BlockManager: Putting block rdd_153_8 failed due to exception java.lang.RuntimeException: org.apache.hudi.exception.HoodieException: org.apache.hudi.exception.HoodieException: org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not instantiate KmsClient class: null.
24/05/19 11:01:48 WARN BlockManager: Block rdd_153_1 could not be removed as it was not found on disk or in memory
24/05/19 11:01:48 ERROR SimpleExecutor: Failed consuming records
org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not instantiate KmsClient class: null
	at org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:350)
	at org.apache.parquet.crypto.keytools.KeyToolkit.lambda$getKmsClient$0(KeyToolkit.java:330)
	at java.base/java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1705)
	at org.apache.parquet.crypto.keytools.KeyToolkit.getKmsClient(KeyToolkit.java:329)
	at org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:80)
	at org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:109)
	at org.apache.parquet.crypto.keytools.PropertiesDrivenCryptoFactory.getFileEncryptionProperties(PropertiesDrivenCryptoFactory.java:134)
	at org.apache.parquet.hadoop.ParquetOutputFormat.createEncryptionProperties(ParquetOutputFormat.java:552)
	at org.apache.parquet.hadoop.ParquetWriter.<init>(ParquetWriter.java:285)
	at org.apache.parquet.hadoop.ParquetWriter$Builder.build(ParquetWriter.java:680)
	at org.apache.hudi.io.storage.HoodieBaseParquetWriter.<init>(HoodieBaseParquetWriter.java:80)
	at org.apache.hudi.io.storage.HoodieAvroParquetWriter.<init>(HoodieAvroParquetWriter.java:54)
	at org.apache.hudi.io.storage.HoodieAvroFileWriterFactory.newParquetFileWriter(HoodieAvroFileWriterFactory.java:69)
	at org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriterByFormat(HoodieFileWriterFactory.java:80)
	at org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriter(HoodieFileWriterFactory.java:67)
	at org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:104)
	at org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:76)
	at org.apache.hudi.io.CreateHandleFactory.create(CreateHandleFactory.java:45)
	at org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:101)
	at org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:44)
	at org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:69)
	at org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:78)
	at org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:37)
	at org.apache.hudi.client.utils.LazyIterableIterator.next(LazyIterableIterator.java:119)
	at scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:46)
	at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
	at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223)
	at org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:352)
	at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1552)
	at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1462)
	at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1526)
	at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1349)
	at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:375)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:326)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:328)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92)
	at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
	at org.apache.spark.scheduler.Task.run(Task.scala:139)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1529)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
	at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.parquet.hadoop.BadConfigurationException: could not instantiate class org.apache.parquet.crypto.keytools.mocks.InMemoryKMS set in job conf at parquet.encryption.kms.client.class
	at org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:40)
	at org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:342)
	... 46 more
Caused by: java.lang.ClassNotFoundException: Class org.apache.parquet.crypto.keytools.mocks.InMemoryKMS not found
	at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2592)
	at org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:33)
	... 47 more
24/05/19 11:01:48 WARN BlockManager: Putting block rdd_153_0 failed due to exception java.lang.RuntimeException: org.apache.hudi.exception.HoodieException: org.apache.hudi.exception.HoodieException: org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not instantiate KmsClient class: null.
24/05/19 11:01:48 WARN BlockManager: Block rdd_153_0 could not be removed as it was not found on disk or in memory
24/05/19 11:01:48 ERROR SimpleExecutor: Failed consuming records
org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not instantiate KmsClient class: null
	at org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:350)
	at org.apache.parquet.crypto.keytools.KeyToolkit.lambda$getKmsClient$0(KeyToolkit.java:330)
	at java.base/java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1705)
	at org.apache.parquet.crypto.keytools.KeyToolkit.getKmsClient(KeyToolkit.java:329)
	at org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:80)
	at org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:109)
	at org.apache.parquet.crypto.keytools.PropertiesDrivenCryptoFactory.getFileEncryptionProperties(PropertiesDrivenCryptoFactory.java:134)
	at org.apache.parquet.hadoop.ParquetOutputFormat.createEncryptionProperties(ParquetOutputFormat.java:552)
	at org.apache.parquet.hadoop.ParquetWriter.<init>(ParquetWriter.java:285)
	at org.apache.parquet.hadoop.ParquetWriter$Builder.build(ParquetWriter.java:680)
	at org.apache.hudi.io.storage.HoodieBaseParquetWriter.<init>(HoodieBaseParquetWriter.java:80)
	at org.apache.hudi.io.storage.HoodieAvroParquetWriter.<init>(HoodieAvroParquetWriter.java:54)
	at org.apache.hudi.io.storage.HoodieAvroFileWriterFactory.newParquetFileWriter(HoodieAvroFileWriterFactory.java:69)
	at org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriterByFormat(HoodieFileWriterFactory.java:80)
	at org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriter(HoodieFileWriterFactory.java:67)
	at org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:104)
	at org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:76)
	at org.apache.hudi.io.CreateHandleFactory.create(CreateHandleFactory.java:45)
	at org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:101)
	at org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:44)
	at org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:69)
	at org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:78)
	at org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:37)
	at org.apache.hudi.client.utils.LazyIterableIterator.next(LazyIterableIterator.java:119)
	at scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:46)
	at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
	at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223)
	at org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:352)
	at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1552)
	at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1462)
	at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1526)
	at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1349)
	at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:375)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:326)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:328)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92)
	at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
	at org.apache.spark.scheduler.Task.run(Task.scala:139)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1529)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
	at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.parquet.hadoop.BadConfigurationException: could not instantiate class org.apache.parquet.crypto.keytools.mocks.InMemoryKMS set in job conf at parquet.encryption.kms.client.class
	at org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:40)
	at org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:342)
	... 46 more
Caused by: java.lang.ClassNotFoundException: Class org.apache.parquet.crypto.keytools.mocks.InMemoryKMS not found
	at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2592)
	at org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:33)
	... 47 more
24/05/19 11:01:48 WARN BlockManager: Putting block rdd_153_11 failed due to exception java.lang.RuntimeException: org.apache.hudi.exception.HoodieException: org.apache.hudi.exception.HoodieException: org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not instantiate KmsClient class: null.
24/05/19 11:01:48 WARN BlockManager: Block rdd_153_11 could not be removed as it was not found on disk or in memory
24/05/19 11:01:48 WARN BlockManager: Block rdd_153_8 could not be removed as it was not found on disk or in memory
24/05/19 11:01:48 ERROR SimpleExecutor: Failed consuming records
org.apache.parquet.crypto.ParquetCryptoRuntimeException: Could not instantiate KmsClient class: null
	at org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:350)
	at org.apache.parquet.crypto.keytools.KeyToolkit.lambda$getKmsClient$0(KeyToolkit.java:330)
	at java.base/java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1705)
	at org.apache.parquet.crypto.keytools.KeyToolkit.getKmsClient(KeyToolkit.java:329)
	at org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:80)
	at org.apache.parquet.crypto.keytools.FileKeyWrapper.<init>(FileKeyWrapper.java:109)
	at org.apache.parquet.crypto.keytools.PropertiesDrivenCryptoFactory.getFileEncryptionProperties(PropertiesDrivenCryptoFactory.java:134)
	at org.apache.parquet.hadoop.ParquetOutputFormat.createEncryptionProperties(ParquetOutputFormat.java:552)
	at org.apache.parquet.hadoop.ParquetWriter.<init>(ParquetWriter.java:285)
	at org.apache.parquet.hadoop.ParquetWriter$Builder.build(ParquetWriter.java:680)
	at org.apache.hudi.io.storage.HoodieBaseParquetWriter.<init>(HoodieBaseParquetWriter.java:80)
	at org.apache.hudi.io.storage.HoodieAvroParquetWriter.<init>(HoodieAvroParquetWriter.java:54)
	at org.apache.hudi.io.storage.HoodieAvroFileWriterFactory.newParquetFileWriter(HoodieAvroFileWriterFactory.java:69)
	at org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriterByFormat(HoodieFileWriterFactory.java:80)
	at org.apache.hudi.io.storage.HoodieFileWriterFactory.getFileWriter(HoodieFileWriterFactory.java:67)
	at org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:104)
	at org.apache.hudi.io.HoodieCreateHandle.<init>(HoodieCreateHandle.java:76)
	at org.apache.hudi.io.CreateHandleFactory.create(CreateHandleFactory.java:45)
	at org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:101)
	at org.apache.hudi.execution.CopyOnWriteInsertHandler.consume(CopyOnWriteInsertHandler.java:44)
	at org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:69)
	at org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:78)
	at org.apache.hudi.execution.SparkLazyInsertIterable.computeNext(SparkLazyInsertIterable.java:37)
	at org.apache.hudi.client.utils.LazyIterableIterator.next(LazyIterableIterator.java:119)
	at scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:46)
	at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
	at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223)
	at org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:352)
	at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1552)
	at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1462)
	at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1526)
	at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1349)
	at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:375)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:326)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:328)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92)
	at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
	at org.apache.spark.scheduler.Task.run(Task.scala:139)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1529)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
	at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.parquet.hadoop.BadConfigurationException: could not instantiate class org.apache.parquet.crypto.keytools.mocks.InMemoryKMS set in job conf at parquet.encryption.kms.client.class
	at org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:40)
	at org.apache.parquet.crypto.keytools.KeyToolkit.createAndInitKmsClient(KeyToolkit.java:342)
	... 46 more
Caused by: java.lang.ClassNotFoundException: Class org.apache.parquet.crypto.keytools.mocks.InMemoryKMS not found
	at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2592)
	at org.apache.parquet.hadoop.util.ConfigurationUtil.getClassFromConfig(ConfigurationUtil.java:33)
	... 47 more
24/05/19 11:01:48 WARN BlockManager: Putting block rdd_153_4 failed due to exception java.lang.RuntimeException: org.apache.hudi.exception.HoodieException: org.apache.hudi.exception.HoodieException: 

REF

@codope codope added the feature-enquiry issue contains feature enquiries/requests or great improvement ideas label May 31, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
feature-enquiry issue contains feature enquiries/requests or great improvement ideas
Projects
Status: Awaiting Triage
Development

No branches or pull requests

2 participants