diff --git a/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/main/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslator.java b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/main/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslator.java
index 9fa6674bf8..4dab136ce0 100644
--- a/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/main/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslator.java
+++ b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/main/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslator.java
@@ -16,32 +16,37 @@
package com.google.cloud.bigtable.hbase.tools;
import com.google.bigtable.repackaged.com.google.api.core.InternalApi;
+import com.google.bigtable.repackaged.com.google.common.annotations.VisibleForTesting;
import com.google.bigtable.repackaged.com.google.common.base.Preconditions;
import com.google.bigtable.repackaged.com.google.gson.Gson;
+import com.google.bigtable.repackaged.com.google.gson.reflect.TypeToken;
import com.google.cloud.bigtable.hbase.BigtableConfiguration;
import com.google.cloud.bigtable.hbase.BigtableOptionsFactory;
import com.google.cloud.bigtable.hbase.tools.ClusterSchemaDefinition.TableSchemaDefinition;
-import com.google.common.annotations.VisibleForTesting;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
+import java.lang.reflect.Type;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
+import java.util.Map;
import javax.annotation.Nullable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.log4j.BasicConfigurator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -52,8 +57,7 @@
* Execute the following command to copy the schema from HBase to Cloud Bigtable:
*
*
- * mvn exec:java \
- * -Dexec.mainClass=com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator \
+ * java -jar bigtable-hbase-1.x-tools--jar-with-dependencies.jar com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator \
* -Dhbase.zookeeper.quorum=$ZOOKEEPER_QUORUM \
* -Dhbase.zookeeper.property.clientPort=$ZOOKEEPER_PORT \
* -Dgoogle.bigtable.table.filter=$TABLE_NAME_REGEX \
@@ -69,8 +73,7 @@
* Run the tool from a host that can connect to HBase. Store HBase schema in a file:
*
*
- * mvn exec:java \
- * -Dexec.mainClass=com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator \
+ * java -jar bigtable-hbase-1.x-tools--jar-with-dependencies.jar com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator \
* -Dhbase.zookeeper.quorum=$ZOOKEEPER_QUORUM \
* -Dhbase.zookeeper.property.clientPort=$ZOOKEEPER_PORT \
* -Dgoogle.bigtable.table.filter=$TABLE_NAME_REGEX \
@@ -81,8 +84,7 @@
* Bigtable using the schema file:
*
*
- * mvn exec:java \
- * -Dexec.mainClass=com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator \
+ * java -jar bigtable-hbase-1.x-tools--jar-with-dependencies.jar com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator \
* -Dgoogle.bigtable.input.filepath=$SCHEMA_FILE_PATH \
* -Dgoogle.bigtable.project.id=$PROJECT_ID \
* -Dgoogle.bigtable.instance.id=$INSTANCE_ID
@@ -98,23 +100,25 @@ public class HBaseSchemaTranslator {
public static final String INPUT_FILE_KEY = "google.bigtable.input.filepath";
public static final String OUTPUT_FILE_KEY = "google.bigtable.output.filepath";
public static final String TABLE_NAME_FILTER_KEY = "google.bigtable.table.filter";
+ public static final String SCHEMA_MAPPING_FILEPATH = "google.bigtable.schema.mapping.filepath";
private static final Logger LOG = LoggerFactory.getLogger(HBaseSchemaTranslator.class);
private final SchemaReader schemaReader;
+ private final SchemaTransformer schemaTransformer;
private final SchemaWriter schemaWriter;
- // TODO Add a schemaOverrider
@VisibleForTesting
static class SchemaTranslationOptions {
- String projectId;
- String instanceId;
- String zookeeperQuorum;
- Integer zookeeperPort;
- String inputFilePath;
- String outputFilePath;
- String tableNameFilter;
+ @Nullable String projectId;
+ @Nullable String instanceId;
+ @Nullable String zookeeperQuorum;
+ @Nullable Integer zookeeperPort;
+ @Nullable String inputFilePath;
+ @Nullable String outputFilePath;
+ @Nullable String tableNameFilter;
+ @Nullable String schemaMappingFilePath;
@VisibleForTesting
SchemaTranslationOptions() {}
@@ -160,6 +164,7 @@ public static SchemaTranslationOptions loadOptionsFromSystemProperties() {
}
options.tableNameFilter = System.getProperty(TABLE_NAME_FILTER_KEY);
+ options.schemaMappingFilePath = System.getProperty(SCHEMA_MAPPING_FILEPATH);
// Ensure that the options are set properly
// TODO It is possible to validate the options without creating the object, but its less
@@ -175,8 +180,7 @@ public static SchemaTranslationOptions loadOptionsFromSystemProperties() {
}
/** Interface for reading HBase schema. */
- interface SchemaReader {
-
+ private interface SchemaReader {
ClusterSchemaDefinition readSchema() throws IOException;
}
@@ -184,6 +188,7 @@ interface SchemaReader {
* Reads HBase schema from a JSON file. JSON file should be representation of a {@link
* ClusterSchemaDefinition} object.
*/
+ @VisibleForTesting
static class FileBasedSchemaReader implements SchemaReader {
private final String schemaFilePath;
@@ -200,6 +205,7 @@ public ClusterSchemaDefinition readSchema() throws IOException {
}
/** Reads the HBase schema by connecting to an HBase cluster. */
+ @VisibleForTesting
static class HBaseSchemaReader implements SchemaReader {
private final String tableFilterPattern;
@@ -245,14 +251,17 @@ private byte[][] getSplits(TableName table) throws IOException {
return new byte[0][];
}
- byte[][] splits = new byte[regions.size()][];
- int i = 0;
+ List splits = new ArrayList<>();
for (HRegionInfo region : regions) {
- splits[i] = region.getStartKey();
- i++;
+ if (Arrays.equals(region.getStartKey(), HConstants.EMPTY_START_ROW)) {
+ // CBT client does not accept an empty row as a split.
+ continue;
+ }
+ splits.add(region.getStartKey());
}
- LOG.debug("Found {} splits for table {}.", splits.length, table.getNameAsString());
- return splits;
+
+ LOG.debug("Found {} splits for table {}.", splits.size(), table.getNameAsString());
+ return splits.toArray(new byte[0][]);
}
@Override
@@ -273,7 +282,7 @@ public ClusterSchemaDefinition readSchema() throws IOException {
/**
* Interface for writing the HBase schema represented by a {@link ClusterSchemaDefinition} object.
*/
- interface SchemaWriter {
+ private interface SchemaWriter {
void writeSchema(ClusterSchemaDefinition schemaDefinition) throws IOException;
}
@@ -282,6 +291,7 @@ interface SchemaWriter {
* Writes the HBase schema into a file. File contains the JSON representation of the {@link
* ClusterSchemaDefinition} object.
*/
+ @VisibleForTesting
static class FileBasedSchemaWriter implements SchemaWriter {
private final String outputFilePath;
@@ -304,6 +314,7 @@ public void writeSchema(ClusterSchemaDefinition schemaDefinition) throws IOExcep
* Creates tables in Cloud Bigtable based on the schema provided by the {@link
* ClusterSchemaDefinition} object.
*/
+ @VisibleForTesting
static class BigtableSchemaWriter implements SchemaWriter {
private final Admin btAdmin;
@@ -353,6 +364,15 @@ public HBaseSchemaTranslator(SchemaTranslationOptions options) throws IOExceptio
options.zookeeperQuorum, options.zookeeperPort, options.tableNameFilter);
}
+ if (options.schemaMappingFilePath != null) {
+
+ this.schemaTransformer =
+ JsonBasedSchemaTransformer.newSchemaTransformerFromJsonFile(
+ options.schemaMappingFilePath);
+ } else {
+ this.schemaTransformer = new NoopSchemaTransformer();
+ }
+
if (options.outputFilePath != null) {
this.schemaWriter = new FileBasedSchemaWriter(options.outputFilePath);
} else {
@@ -360,16 +380,108 @@ public HBaseSchemaTranslator(SchemaTranslationOptions options) throws IOExceptio
}
}
+ /**
+ * Transforms the {@link ClusterSchemaDefinition} read by {@link SchemaReader} before writing it
+ * to {@link SchemaWriter}.
+ */
+ private interface SchemaTransformer {
+
+ ClusterSchemaDefinition transform(ClusterSchemaDefinition originalSchema)
+ throws IOException, DeserializationException;
+ }
+
+ /** No-op implementation of @{@link SchemaTransformer}. Returns the original schema definition. */
+ private static class NoopSchemaTransformer implements SchemaTransformer {
+
+ @Override
+ public ClusterSchemaDefinition transform(ClusterSchemaDefinition originalSchema) {
+ return originalSchema;
+ }
+ }
+
+ /**
+ * Transforms the @{@link ClusterSchemaDefinition} based on a provided JSON map. It can rename
+ * tables before writing them to {@link SchemaWriter}.
+ *
+ * JSON map should look like { "SourceTable": "DestinationTable",
+ * "sourceTable-2":"DestinationTable-2"}
+ */
+ @VisibleForTesting
+ static class JsonBasedSchemaTransformer implements SchemaTransformer {
+
+ // Map from old-tableName -> new-tableName
+ @VisibleForTesting Map tableNameMappings;
+
+ @VisibleForTesting
+ JsonBasedSchemaTransformer(Map tableNameMappings) {
+ this.tableNameMappings = tableNameMappings;
+ LOG.info("Creating SchemaTransformer with schema mapping: {}", tableNameMappings);
+ }
+
+ public static JsonBasedSchemaTransformer newSchemaTransformerFromJsonFile(
+ String mappingFilePath) throws IOException {
+
+ Map tableNameMappings = null;
+ Type mapType = new TypeToken