diff --git a/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-shaded/pom.xml b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-shaded/pom.xml index c5ec2086e7..9ff785eec9 100644 --- a/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-shaded/pom.xml +++ b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-shaded/pom.xml @@ -304,6 +304,10 @@ limitations under the License. com.google.bigtable.repackaged.org.codehaus + + org.checkerframework + com.google.bigtable.repackaged.org.checkerframework + diff --git a/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/pom.xml b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/pom.xml index f515ceda85..f906ff5de9 100644 --- a/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/pom.xml +++ b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/pom.xml @@ -51,7 +51,7 @@ com.google.guava guava - 30.0-jre + 30.1.1-android org.slf4j @@ -80,6 +80,11 @@ 4.13.2 test + + javax.annotation + javax.annotation-api + 1.3.2 + diff --git a/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/main/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslator.java b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/main/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslator.java index 9fa6674bf8..4dab136ce0 100644 --- a/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/main/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslator.java +++ b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/main/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslator.java @@ -16,32 +16,37 @@ package com.google.cloud.bigtable.hbase.tools; import com.google.bigtable.repackaged.com.google.api.core.InternalApi; +import com.google.bigtable.repackaged.com.google.common.annotations.VisibleForTesting; import com.google.bigtable.repackaged.com.google.common.base.Preconditions; import com.google.bigtable.repackaged.com.google.gson.Gson; +import com.google.bigtable.repackaged.com.google.gson.reflect.TypeToken; import com.google.cloud.bigtable.hbase.BigtableConfiguration; import com.google.cloud.bigtable.hbase.BigtableOptionsFactory; import com.google.cloud.bigtable.hbase.tools.ClusterSchemaDefinition.TableSchemaDefinition; -import com.google.common.annotations.VisibleForTesting; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.Reader; import java.io.Writer; +import java.lang.reflect.Type; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Arrays; import java.util.LinkedList; import java.util.List; +import java.util.Map; import javax.annotation.Nullable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.log4j.BasicConfigurator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,8 +57,7 @@ *

Execute the following command to copy the schema from HBase to Cloud Bigtable: * *

- *   mvn exec:java \
- *  -Dexec.mainClass=com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator \
+ * java -jar bigtable-hbase-1.x-tools--jar-with-dependencies.jar com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator \
  *  -Dhbase.zookeeper.quorum=$ZOOKEEPER_QUORUM \
  *  -Dhbase.zookeeper.property.clientPort=$ZOOKEEPER_PORT \
  *  -Dgoogle.bigtable.table.filter=$TABLE_NAME_REGEX \
@@ -69,8 +73,7 @@
  * 

Run the tool from a host that can connect to HBase. Store HBase schema in a file: * *

- *   mvn exec:java \
- *  -Dexec.mainClass=com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator \
+ * java -jar bigtable-hbase-1.x-tools--jar-with-dependencies.jar com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator \
  *  -Dhbase.zookeeper.quorum=$ZOOKEEPER_QUORUM \
  *  -Dhbase.zookeeper.property.clientPort=$ZOOKEEPER_PORT \
  *  -Dgoogle.bigtable.table.filter=$TABLE_NAME_REGEX \
@@ -81,8 +84,7 @@
  * Bigtable using the schema file:
  *
  * 
- *   mvn exec:java \
- *  -Dexec.mainClass=com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator \
+ * java -jar bigtable-hbase-1.x-tools--jar-with-dependencies.jar com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator \
  *  -Dgoogle.bigtable.input.filepath=$SCHEMA_FILE_PATH \
  *  -Dgoogle.bigtable.project.id=$PROJECT_ID \
  *  -Dgoogle.bigtable.instance.id=$INSTANCE_ID
@@ -98,23 +100,25 @@ public class HBaseSchemaTranslator {
   public static final String INPUT_FILE_KEY = "google.bigtable.input.filepath";
   public static final String OUTPUT_FILE_KEY = "google.bigtable.output.filepath";
   public static final String TABLE_NAME_FILTER_KEY = "google.bigtable.table.filter";
+  public static final String SCHEMA_MAPPING_FILEPATH = "google.bigtable.schema.mapping.filepath";
 
   private static final Logger LOG = LoggerFactory.getLogger(HBaseSchemaTranslator.class);
 
   private final SchemaReader schemaReader;
+  private final SchemaTransformer schemaTransformer;
   private final SchemaWriter schemaWriter;
-  // TODO Add a schemaOverrider
 
   @VisibleForTesting
   static class SchemaTranslationOptions {
 
-    String projectId;
-    String instanceId;
-    String zookeeperQuorum;
-    Integer zookeeperPort;
-    String inputFilePath;
-    String outputFilePath;
-    String tableNameFilter;
+    @Nullable String projectId;
+    @Nullable String instanceId;
+    @Nullable String zookeeperQuorum;
+    @Nullable Integer zookeeperPort;
+    @Nullable String inputFilePath;
+    @Nullable String outputFilePath;
+    @Nullable String tableNameFilter;
+    @Nullable String schemaMappingFilePath;
 
     @VisibleForTesting
     SchemaTranslationOptions() {}
@@ -160,6 +164,7 @@ public static SchemaTranslationOptions loadOptionsFromSystemProperties() {
       }
 
       options.tableNameFilter = System.getProperty(TABLE_NAME_FILTER_KEY);
+      options.schemaMappingFilePath = System.getProperty(SCHEMA_MAPPING_FILEPATH);
 
       // Ensure that the options are set properly
       // TODO It is possible to validate the options without creating the object, but its less
@@ -175,8 +180,7 @@ public static SchemaTranslationOptions loadOptionsFromSystemProperties() {
   }
 
   /** Interface for reading HBase schema. */
-  interface SchemaReader {
-
+  private interface SchemaReader {
     ClusterSchemaDefinition readSchema() throws IOException;
   }
 
@@ -184,6 +188,7 @@ interface SchemaReader {
    * Reads HBase schema from a JSON file. JSON file should be representation of a {@link
    * ClusterSchemaDefinition} object.
    */
+  @VisibleForTesting
   static class FileBasedSchemaReader implements SchemaReader {
 
     private final String schemaFilePath;
@@ -200,6 +205,7 @@ public ClusterSchemaDefinition readSchema() throws IOException {
   }
 
   /** Reads the HBase schema by connecting to an HBase cluster. */
+  @VisibleForTesting
   static class HBaseSchemaReader implements SchemaReader {
 
     private final String tableFilterPattern;
@@ -245,14 +251,17 @@ private byte[][] getSplits(TableName table) throws IOException {
         return new byte[0][];
       }
 
-      byte[][] splits = new byte[regions.size()][];
-      int i = 0;
+      List splits = new ArrayList<>();
       for (HRegionInfo region : regions) {
-        splits[i] = region.getStartKey();
-        i++;
+        if (Arrays.equals(region.getStartKey(), HConstants.EMPTY_START_ROW)) {
+          // CBT client does not accept an empty row as a split.
+          continue;
+        }
+        splits.add(region.getStartKey());
       }
-      LOG.debug("Found {} splits for table {}.", splits.length, table.getNameAsString());
-      return splits;
+
+      LOG.debug("Found {} splits for table {}.", splits.size(), table.getNameAsString());
+      return splits.toArray(new byte[0][]);
     }
 
     @Override
@@ -273,7 +282,7 @@ public ClusterSchemaDefinition readSchema() throws IOException {
   /**
    * Interface for writing the HBase schema represented by a {@link ClusterSchemaDefinition} object.
    */
-  interface SchemaWriter {
+  private interface SchemaWriter {
 
     void writeSchema(ClusterSchemaDefinition schemaDefinition) throws IOException;
   }
@@ -282,6 +291,7 @@ interface SchemaWriter {
    * Writes the HBase schema into a file. File contains the JSON representation of the {@link
    * ClusterSchemaDefinition} object.
    */
+  @VisibleForTesting
   static class FileBasedSchemaWriter implements SchemaWriter {
 
     private final String outputFilePath;
@@ -304,6 +314,7 @@ public void writeSchema(ClusterSchemaDefinition schemaDefinition) throws IOExcep
    * Creates tables in Cloud Bigtable based on the schema provided by the {@link
    * ClusterSchemaDefinition} object.
    */
+  @VisibleForTesting
   static class BigtableSchemaWriter implements SchemaWriter {
 
     private final Admin btAdmin;
@@ -353,6 +364,15 @@ public HBaseSchemaTranslator(SchemaTranslationOptions options) throws IOExceptio
               options.zookeeperQuorum, options.zookeeperPort, options.tableNameFilter);
     }
 
+    if (options.schemaMappingFilePath != null) {
+
+      this.schemaTransformer =
+          JsonBasedSchemaTransformer.newSchemaTransformerFromJsonFile(
+              options.schemaMappingFilePath);
+    } else {
+      this.schemaTransformer = new NoopSchemaTransformer();
+    }
+
     if (options.outputFilePath != null) {
       this.schemaWriter = new FileBasedSchemaWriter(options.outputFilePath);
     } else {
@@ -360,16 +380,108 @@ public HBaseSchemaTranslator(SchemaTranslationOptions options) throws IOExceptio
     }
   }
 
+  /**
+   * Transforms the {@link ClusterSchemaDefinition} read by {@link SchemaReader} before writing it
+   * to {@link SchemaWriter}.
+   */
+  private interface SchemaTransformer {
+
+    ClusterSchemaDefinition transform(ClusterSchemaDefinition originalSchema)
+        throws IOException, DeserializationException;
+  }
+
+  /** No-op implementation of @{@link SchemaTransformer}. Returns the original schema definition. */
+  private static class NoopSchemaTransformer implements SchemaTransformer {
+
+    @Override
+    public ClusterSchemaDefinition transform(ClusterSchemaDefinition originalSchema) {
+      return originalSchema;
+    }
+  }
+
+  /**
+   * Transforms the @{@link ClusterSchemaDefinition} based on a provided JSON map. It can rename
+   * tables before writing them to {@link SchemaWriter}.
+   *
+   * 

JSON map should look like { "SourceTable": "DestinationTable", + * "sourceTable-2":"DestinationTable-2"} + */ + @VisibleForTesting + static class JsonBasedSchemaTransformer implements SchemaTransformer { + + // Map from old-tableName -> new-tableName + @VisibleForTesting Map tableNameMappings; + + @VisibleForTesting + JsonBasedSchemaTransformer(Map tableNameMappings) { + this.tableNameMappings = tableNameMappings; + LOG.info("Creating SchemaTransformer with schema mapping: {}", tableNameMappings); + } + + public static JsonBasedSchemaTransformer newSchemaTransformerFromJsonFile( + String mappingFilePath) throws IOException { + + Map tableNameMappings = null; + Type mapType = new TypeToken>() {}.getType(); + + try (Reader jsonReader = new FileReader(mappingFilePath)) { + tableNameMappings = new Gson().fromJson(jsonReader, mapType); + } + + if (tableNameMappings == null) { + throw new IllegalStateException( + "SchemaMapping file does not contain valid schema mappings"); + } + + return new JsonBasedSchemaTransformer(tableNameMappings); + } + + @Override + public ClusterSchemaDefinition transform(ClusterSchemaDefinition originalSchema) + throws DeserializationException, IOException { + ClusterSchemaDefinition transformedSchema = new ClusterSchemaDefinition(); + + // Apply the transformations. + for (TableSchemaDefinition tableSchemaDefinition : originalSchema.tableSchemaDefinitions) { + String newTableName = tableSchemaDefinition.name; + HTableDescriptor tableDescriptor = tableSchemaDefinition.getHbaseTableDescriptor(); + HTableDescriptor newTableDescriptor = tableDescriptor; + + // Override the table name if its present in the mapping file + if (tableNameMappings.containsKey(newTableName)) { + newTableName = tableNameMappings.get(newTableName); + // Rename the table and copy all the other configs, including the column families. + newTableDescriptor = + new HTableDescriptor(TableName.valueOf(newTableName), tableDescriptor); + LOG.info("Renaming table {} to {}.", tableSchemaDefinition.name, newTableName); + } + + // finalize the transformed schema + transformedSchema.addTableSchemaDefinition( + newTableDescriptor, tableSchemaDefinition.splits); + } + return transformedSchema; + } + } + @VisibleForTesting HBaseSchemaTranslator(SchemaReader schemaReader, SchemaWriter schemaWriter) { + this(schemaReader, schemaWriter, new NoopSchemaTransformer()); + } + + @VisibleForTesting + HBaseSchemaTranslator( + SchemaReader schemaReader, SchemaWriter schemaWriter, SchemaTransformer schemaTransformer) { this.schemaReader = schemaReader; this.schemaWriter = schemaWriter; + this.schemaTransformer = schemaTransformer; } - public void translate() throws IOException { + public void translate() throws IOException, DeserializationException { ClusterSchemaDefinition schemaDefinition = schemaReader.readSchema(); LOG.info("Read schema with {} tables.", schemaDefinition.tableSchemaDefinitions.size()); - this.schemaWriter.writeSchema(schemaDefinition); + + this.schemaWriter.writeSchema(schemaTransformer.transform(schemaDefinition)); } /* @@ -418,9 +530,13 @@ private static void usage(final String errorMsg) { System.err.println( " Additionally, you can filter tables to create when using HBase as source"); System.err.println(" -D " + TABLE_NAME_FILTER_KEY + "="); + System.err.println( + " Optionally, the tables can be renamed by providing a JSON map. Example JSON " + + "{\"source-table\": \"destination-table\", \"namespace:source-table2\": \"namespace-destination-table2\"}."); + System.err.println(" -D " + SCHEMA_MAPPING_FILEPATH + "=/schema/mapping/file/path.json"); } - public static void main(String[] args) throws IOException { + public static void main(String[] args) throws IOException, DeserializationException { // Configure the logger. BasicConfigurator.configure(); diff --git a/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/test/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTransformerTest.java b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/test/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTransformerTest.java new file mode 100644 index 0000000000..9858fdddb1 --- /dev/null +++ b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/test/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTransformerTest.java @@ -0,0 +1,268 @@ +/* + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.hbase.tools; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +import com.google.bigtable.repackaged.com.google.gson.Gson; +import com.google.bigtable.repackaged.com.google.gson.JsonObject; +import com.google.cloud.bigtable.hbase.tools.ClusterSchemaDefinition.TableSchemaDefinition; +import com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator.JsonBasedSchemaTransformer; +import com.google.common.collect.ImmutableMap; +import java.io.FileWriter; +import java.io.IOException; +import java.io.Writer; +import java.util.HashMap; +import java.util.Map; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.exceptions.DeserializationException; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public class HBaseSchemaTransformerTest { + + @Rule public TemporaryFolder tempFolder = new TemporaryFolder(); + + private String schemaFilePath; + private JsonBasedSchemaTransformer schemaTransformer; + + @Before + public void before() throws IOException { + schemaFilePath = tempFolder.newFile().getPath(); + } + + ///////////////////////////////// Invalid cases ////////////////////////////////////// + @Test + public void testBadJson() throws IOException { + + // Setup bad JSON + FileWriter fw = new FileWriter(schemaFilePath); + fw.write("random JSON string"); + // Execute the test method + try { + schemaTransformer = + JsonBasedSchemaTransformer.newSchemaTransformerFromJsonFile(schemaFilePath); + fail("Expected IllegalStateException due to bad JSON."); + } catch (IllegalStateException e) { + e.printStackTrace(); + } catch (Exception e) { + fail("Expected IllegalStateException due to bad JSON but found " + e.toString()); + } + } + + //////////////////////////////////////// Happy cases //////////////////////////////////// + @Test + public void testParseJsonTables() throws IOException { + // Setup the schema file + JsonObject jsonObject = new JsonObject(); + jsonObject.addProperty("old-table", "new-table"); + // Make sure that the whole table name is matched and not just prefix + jsonObject.addProperty("old-table-2", "random-table-2"); + try (Writer writer = new FileWriter(schemaFilePath)) { + new Gson().toJson(jsonObject, writer); + } + + schemaTransformer = JsonBasedSchemaTransformer.newSchemaTransformerFromJsonFile(schemaFilePath); + + // Validate + Map expectedTableMapping = + ImmutableMap.of("old-table", "new-table", "old-table-2", "random-table-2"); + assertEquals(expectedTableMapping, schemaTransformer.tableNameMappings); + } + + @Test + public void testParseJsonEmpty() throws IOException { + // Setup the schema file + JsonObject jsonObject = new JsonObject(); + try (Writer writer = new FileWriter(schemaFilePath)) { + new Gson().toJson(jsonObject, writer); + } + + // Parse schema file + schemaTransformer = JsonBasedSchemaTransformer.newSchemaTransformerFromJsonFile(schemaFilePath); + assertEquals(0, schemaTransformer.tableNameMappings.size()); + } + + @Test + public void testTransformWithNormalTable() throws DeserializationException, IOException { + + // Setup a cluster schema + ClusterSchemaDefinition schemaDefinition = new ClusterSchemaDefinition(); + TableSchemaDefinition tableSchemaDefinition = new TableSchemaDefinition(); + tableSchemaDefinition.name = "test-table1"; + tableSchemaDefinition.splits = new byte[3][]; + tableSchemaDefinition.splits[0] = HConstants.EMPTY_BYTE_ARRAY; + tableSchemaDefinition.splits[1] = "first-split".getBytes(); + tableSchemaDefinition.splits[2] = "second-split".getBytes(); + HTableDescriptor tableDescriptor = new HTableDescriptor(TableName.valueOf("test-table1")); + HColumnDescriptor columnDescriptor = new HColumnDescriptor("cf"); + columnDescriptor.setMaxVersions(2).setTimeToLive(1000); + tableDescriptor.addFamily(columnDescriptor); + tableSchemaDefinition.tableDescriptor = tableDescriptor.toByteArray(); + schemaDefinition.tableSchemaDefinitions.add(tableSchemaDefinition); + + // setup a transformer + Map schemaMapping = new HashMap<>(); + // NOOP transform + schemaMapping.put("test-table1", "test-table1"); + JsonBasedSchemaTransformer transformer = new JsonBasedSchemaTransformer(schemaMapping); + + ClusterSchemaDefinition transformedClusterSchema = transformer.transform(schemaDefinition); + + // Verify transformation + Assert.assertEquals( + transformedClusterSchema.tableSchemaDefinitions.get(0).name, + schemaDefinition.tableSchemaDefinitions.get(0).name); + Assert.assertEquals( + transformedClusterSchema.tableSchemaDefinitions.get(0).getHbaseTableDescriptor(), + schemaDefinition.tableSchemaDefinitions.get(0).getHbaseTableDescriptor()); + Assert.assertEquals( + transformedClusterSchema.tableSchemaDefinitions.get(0).splits, + schemaDefinition.tableSchemaDefinitions.get(0).splits); + } + + @Test + public void testTransformWithEmptyMappings() throws DeserializationException, IOException { + + // Setup a cluster schema + ClusterSchemaDefinition schemaDefinition = new ClusterSchemaDefinition(); + TableSchemaDefinition tableSchemaDefinition = new TableSchemaDefinition(); + tableSchemaDefinition.name = "test-table1"; + tableSchemaDefinition.splits = new byte[3][]; + tableSchemaDefinition.splits[0] = HConstants.EMPTY_BYTE_ARRAY; + tableSchemaDefinition.splits[1] = "first-split".getBytes(); + tableSchemaDefinition.splits[2] = "second-split".getBytes(); + HTableDescriptor tableDescriptor = new HTableDescriptor(TableName.valueOf("test-table1")); + HColumnDescriptor columnDescriptor = new HColumnDescriptor("cf"); + columnDescriptor.setMaxVersions(2).setTimeToLive(1000); + tableDescriptor.addFamily(columnDescriptor); + tableSchemaDefinition.tableDescriptor = tableDescriptor.toByteArray(); + schemaDefinition.tableSchemaDefinitions.add(tableSchemaDefinition); + + // setup a transformer + Map schemaMapping = new HashMap<>(); + JsonBasedSchemaTransformer transformer = new JsonBasedSchemaTransformer(schemaMapping); + + ClusterSchemaDefinition transformedClusterSchema = transformer.transform(schemaDefinition); + + // Verify transformation + Assert.assertEquals( + transformedClusterSchema.tableSchemaDefinitions.get(0).name, + schemaDefinition.tableSchemaDefinitions.get(0).name); + Assert.assertEquals( + transformedClusterSchema.tableSchemaDefinitions.get(0).getHbaseTableDescriptor(), + schemaDefinition.tableSchemaDefinitions.get(0).getHbaseTableDescriptor()); + Assert.assertEquals( + transformedClusterSchema.tableSchemaDefinitions.get(0).splits, + schemaDefinition.tableSchemaDefinitions.get(0).splits); + } + + @Test + public void testTransformWithExtraMapping() throws DeserializationException, IOException { + + // Setup a cluster schema + ClusterSchemaDefinition schemaDefinition = new ClusterSchemaDefinition(); + TableSchemaDefinition tableSchemaDefinition = new TableSchemaDefinition(); + tableSchemaDefinition.name = "test-table1"; + tableSchemaDefinition.splits = new byte[3][]; + tableSchemaDefinition.splits[0] = HConstants.EMPTY_BYTE_ARRAY; + tableSchemaDefinition.splits[1] = "first-split".getBytes(); + tableSchemaDefinition.splits[2] = "second-split".getBytes(); + HTableDescriptor tableDescriptor = new HTableDescriptor(TableName.valueOf("test-table1")); + HColumnDescriptor columnDescriptor = new HColumnDescriptor("cf"); + columnDescriptor.setMaxVersions(2).setTimeToLive(1000); + tableDescriptor.addFamily(columnDescriptor); + tableSchemaDefinition.tableDescriptor = tableDescriptor.toByteArray(); + schemaDefinition.tableSchemaDefinitions.add(tableSchemaDefinition); + + // setup a transformer + Map schemaMapping = new HashMap<>(); + // NOOP transform + schemaMapping.put("test-table1", "new-table1"); + schemaMapping.put("test", "new-table2"); + JsonBasedSchemaTransformer transformer = new JsonBasedSchemaTransformer(schemaMapping); + + ClusterSchemaDefinition transformedClusterSchema = transformer.transform(schemaDefinition); + TableSchemaDefinition expectedTableSchema = + new TableSchemaDefinition( + new HTableDescriptor( + TableName.valueOf("new-table1"), + schemaDefinition.tableSchemaDefinitions.get(0).getHbaseTableDescriptor()), + tableSchemaDefinition.splits); + + // Verify transformation + Assert.assertEquals("new-table1", transformedClusterSchema.tableSchemaDefinitions.get(0).name); + Assert.assertEquals( + expectedTableSchema.getHbaseTableDescriptor(), + transformedClusterSchema.tableSchemaDefinitions.get(0).getHbaseTableDescriptor()); + Assert.assertEquals( + transformedClusterSchema.tableSchemaDefinitions.get(0).splits, + schemaDefinition.tableSchemaDefinitions.get(0).splits); + } + + @Test + public void testTransformWithinCustomNamespaceTable() + throws DeserializationException, IOException { + + // Setup a cluster schema + ClusterSchemaDefinition schemaDefinition = new ClusterSchemaDefinition(); + TableSchemaDefinition tableSchemaDefinition = new TableSchemaDefinition(); + tableSchemaDefinition.name = "ns:test-table1"; + tableSchemaDefinition.splits = new byte[3][]; + tableSchemaDefinition.splits[0] = HConstants.EMPTY_BYTE_ARRAY; + tableSchemaDefinition.splits[1] = "first-split".getBytes(); + tableSchemaDefinition.splits[2] = "second-split".getBytes(); + HTableDescriptor tableDescriptor = new HTableDescriptor(TableName.valueOf("ns", "test-table1")); + HColumnDescriptor columnDescriptor = new HColumnDescriptor("cf"); + columnDescriptor.setMaxVersions(2).setTimeToLive(1000); + tableDescriptor.addFamily(columnDescriptor); + tableSchemaDefinition.tableDescriptor = tableDescriptor.toByteArray(); + schemaDefinition.tableSchemaDefinitions.add(tableSchemaDefinition); + + // setup a transformer + Map schemaMapping = new HashMap<>(); + // NOOP transform + schemaMapping.put("ns:test-table1", "new-table1"); + JsonBasedSchemaTransformer transformer = new JsonBasedSchemaTransformer(schemaMapping); + + ClusterSchemaDefinition transformedClusterSchema = transformer.transform(schemaDefinition); + TableSchemaDefinition expectedTableSchema = + new TableSchemaDefinition( + new HTableDescriptor( + TableName.valueOf("new-table1"), + schemaDefinition.tableSchemaDefinitions.get(0).getHbaseTableDescriptor()), + tableSchemaDefinition.splits); + + // Verify transformation + Assert.assertEquals("new-table1", transformedClusterSchema.tableSchemaDefinitions.get(0).name); + Assert.assertEquals( + expectedTableSchema.getHbaseTableDescriptor(), + transformedClusterSchema.tableSchemaDefinitions.get(0).getHbaseTableDescriptor()); + Assert.assertEquals( + transformedClusterSchema.tableSchemaDefinitions.get(0).splits, + schemaDefinition.tableSchemaDefinitions.get(0).splits); + } +} diff --git a/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/test/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslationOptionsTest.java b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/test/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslationOptionsTest.java index 55c7d30133..61301deed8 100644 --- a/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/test/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslationOptionsTest.java +++ b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/test/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslationOptionsTest.java @@ -106,12 +106,14 @@ public void parseSchemaTranslationOptionsHbaseToFile() { System.setProperty(HBaseSchemaTranslator.ZOOKEEPER_PORT_KEY, "1080"); System.setProperty(HBaseSchemaTranslator.TABLE_NAME_FILTER_KEY, "hbase-.*"); System.setProperty(HBaseSchemaTranslator.OUTPUT_FILE_KEY, "/tmp/output"); + System.setProperty(HBaseSchemaTranslator.SCHEMA_MAPPING_FILEPATH, "/tmp/schema_mapping.json"); SchemaTranslationOptions options = SchemaTranslationOptions.loadOptionsFromSystemProperties(); Assert.assertEquals("localhost", options.zookeeperQuorum); Assert.assertEquals((Integer) 1080, options.zookeeperPort); Assert.assertEquals("/tmp/output", options.outputFilePath); Assert.assertEquals("hbase-.*", options.tableNameFilter); + Assert.assertEquals("/tmp/schema_mapping.json", options.schemaMappingFilePath); } @Test diff --git a/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/test/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslatorTest.java b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/test/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslatorTest.java index 6e8eaf8f46..eb8bcc3c1e 100644 --- a/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/test/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslatorTest.java +++ b/bigtable-hbase-1.x-parent/bigtable-hbase-1.x-tools/src/test/java/com/google/cloud/bigtable/hbase/tools/HBaseSchemaTranslatorTest.java @@ -15,6 +15,7 @@ */ package com.google.cloud.bigtable.hbase.tools; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; import static org.mockito.ArgumentMatchers.eq; @@ -23,11 +24,15 @@ import com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator.FileBasedSchemaReader; import com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator.FileBasedSchemaWriter; import com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator.HBaseSchemaReader; +import com.google.cloud.bigtable.hbase.tools.HBaseSchemaTranslator.JsonBasedSchemaTransformer; import java.io.File; +import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.List; +import java.util.Map; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; @@ -41,6 +46,7 @@ import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; +import org.mockito.ArgumentCaptor; import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.junit.MockitoJUnit; @@ -148,7 +154,9 @@ public void testTranslateFromHBaseToBigtable() throws IOException, Deserializati Mockito.verify(btAdmin) .createTable( eq(schemaDefinition.tableSchemaDefinitions.get(0).getHbaseTableDescriptor()), - eq(schemaDefinition.tableSchemaDefinitions.get(0).splits)); + // First split is EMPTY_BYTE_ARRAY which gets filtered as CBT does not support empty + // splits. + eq(Arrays.copyOfRange(schemaDefinition.tableSchemaDefinitions.get(0).splits, 1, 3))); Mockito.verify(btAdmin) .createTable( eq(schemaDefinition.tableSchemaDefinitions.get(1).getHbaseTableDescriptor()), @@ -158,6 +166,58 @@ public void testTranslateFromHBaseToBigtable() throws IOException, Deserializati Mockito.verify(hbaseAdmin).getTableRegions(TableName.valueOf("test-table2")); } + @Test + public void testTranslateFromHBaseToBigtableWithTransformation() + throws IOException, DeserializationException { + // Setup + Mockito.when(hbaseAdmin.listTables(eq("test.*"))).thenReturn(getTables()); + Mockito.when(hbaseAdmin.getTableRegions(eq(TableName.valueOf("test-table1")))) + .thenReturn(getRegions(0)); + Mockito.when(hbaseAdmin.getTableRegions(eq(TableName.valueOf("test-table2")))) + .thenReturn(getRegions(1)); + ArgumentCaptor tableCaptor = ArgumentCaptor.forClass(HTableDescriptor.class); + + Map schemaMapping = new HashMap<>(); + schemaMapping.put("test-table1", "new-test-table1"); + // NO Op renaming + schemaMapping.put("test-table2", "test-table2"); + // Not found in the schema, should get discarded. + schemaMapping.put("test", "new-test"); + + HBaseSchemaTranslator translator = + new HBaseSchemaTranslator( + new HBaseSchemaReader(hbaseAdmin, "test.*"), + new BigtableSchemaWriter(btAdmin), + new JsonBasedSchemaTransformer(schemaMapping)); + + // Call + translator.translate(); + + // Verify + Mockito.verify(btAdmin) + .createTable( + tableCaptor.capture(), + // First split is EMPTY_BYTE_ARRAY which gets filtered as CBT does not support empty + // splits. + eq(Arrays.copyOfRange(schemaDefinition.tableSchemaDefinitions.get(0).splits, 1, 3))); + Mockito.verify(btAdmin) + .createTable( + tableCaptor.capture(), eq(schemaDefinition.tableSchemaDefinitions.get(1).splits)); + HTableDescriptor transformedTable = tableCaptor.getAllValues().get(0); + assertEquals("new-test-table1", transformedTable.getNameAsString()); + assertEquals("cf", transformedTable.getColumnFamilies()[0].getNameAsString()); + // Cloud Bigtable only uses the GC policies from HColumnFamily object + assertEquals(2, transformedTable.getColumnFamilies()[0].getMaxVersions()); + assertEquals(1000, transformedTable.getColumnFamilies()[0].getTimeToLive()); + assertEquals( + schemaDefinition.tableSchemaDefinitions.get(1).getHbaseTableDescriptor(), + tableCaptor.getAllValues().get(1)); + + Mockito.verify(hbaseAdmin).listTables("test.*"); + Mockito.verify(hbaseAdmin).getTableRegions(TableName.valueOf("test-table1")); + Mockito.verify(hbaseAdmin).getTableRegions(TableName.valueOf("test-table2")); + } + @Test public void testTranslateHBaseToBigtableViaFile() throws IOException, DeserializationException { // Setup @@ -186,7 +246,9 @@ public void testTranslateHBaseToBigtableViaFile() throws IOException, Deserializ Mockito.verify(btAdmin) .createTable( eq(schemaDefinition.tableSchemaDefinitions.get(0).getHbaseTableDescriptor()), - eq(schemaDefinition.tableSchemaDefinitions.get(0).splits)); + // First split is EMPTY_BYTE_ARRAY which gets filtered as CBT does not support empty + // splits. + eq(Arrays.copyOfRange(schemaDefinition.tableSchemaDefinitions.get(0).splits, 1, 3))); Mockito.verify(btAdmin) .createTable( eq(schemaDefinition.tableSchemaDefinitions.get(1).getHbaseTableDescriptor()), @@ -262,7 +324,9 @@ public void testBigtableCreateTableFails() throws IOException, DeserializationEx .when(btAdmin) .createTable( eq(schemaDefinition.tableSchemaDefinitions.get(0).getHbaseTableDescriptor()), - eq(schemaDefinition.tableSchemaDefinitions.get(0).splits)); + // First split is EMPTY_BYTE_ARRAY which gets filtered as CBT does not support empty + // splits. + eq(Arrays.copyOfRange(schemaDefinition.tableSchemaDefinitions.get(0).splits, 1, 3))); HBaseSchemaTranslator translator = new HBaseSchemaTranslator( @@ -282,16 +346,41 @@ public void testBigtableCreateTableFails() throws IOException, DeserializationEx Mockito.verify(btAdmin) .createTable( eq(schemaDefinition.tableSchemaDefinitions.get(0).getHbaseTableDescriptor()), - eq(schemaDefinition.tableSchemaDefinitions.get(0).splits)); + // First split is EMPTY_BYTE_ARRAY which gets filtered as CBT does not support empty + // splits. + eq(Arrays.copyOfRange(schemaDefinition.tableSchemaDefinitions.get(0).splits, 1, 3))); + // Validate that translator calls createTable for test-table-2 even after creation of + // test-table1 failed. Mockito.verify(btAdmin) .createTable( eq(schemaDefinition.tableSchemaDefinitions.get(1).getHbaseTableDescriptor()), eq(schemaDefinition.tableSchemaDefinitions.get(1).splits)); Mockito.verify(hbaseAdmin).listTables(".*"); Mockito.verify(hbaseAdmin).getTableRegions(TableName.valueOf("test-table1")); - // Validate that translator calls createTable for test-table-2 even after creation of - // test-table1 failed. Mockito.verify(hbaseAdmin).getTableRegions(TableName.valueOf("test-table2")); } } + + @Test + public void testHBaseSchemaTranslatorCreationFails() + throws IOException, DeserializationException { + // Setup + File schemaFile = tempFolder.newFile("schema.json"); + FileWriter fileWriter = new FileWriter(schemaFile.getPath()); + fileWriter.write("{This is invalid JSON}"); + + // Try creating the schema translator with invalid JSON, expect it to fail. + try { + HBaseSchemaTranslator translator = + new HBaseSchemaTranslator( + new HBaseSchemaReader(hbaseAdmin, ".*"), + new BigtableSchemaWriter(btAdmin), + JsonBasedSchemaTransformer.newSchemaTransformerFromJsonFile(schemaFile.getPath())); + fail("Expected IOException here."); + } catch (IllegalStateException e) { + // Expected. + } catch (Exception e) { + fail("Expected DeserializationException but found: " + e.toString()); + } + } }